|
@@ -127,7 +127,7 @@ T cheb_approx(T* fn_v, int cheb_deg, int dof, T* out, mem::MemoryManager* mem_mg
|
|
|
// Create work buffers
|
|
|
size_t buff_size=dof*d*d*d;
|
|
|
Y* buff=(Y*)(mem_mgr?mem_mgr->malloc(2*buff_size*sizeof(Y)):
|
|
|
- malloc(2*buff_size*sizeof(Y)));
|
|
|
+ mem::aligned_malloc<char>(2*buff_size*sizeof(Y)));
|
|
|
Y* buff1=buff+buff_size*0;
|
|
|
Y* buff2=buff+buff_size*1;
|
|
|
|
|
@@ -189,7 +189,8 @@ T cheb_approx(T* fn_v, int cheb_deg, int dof, T* out, mem::MemoryManager* mem_mg
|
|
|
}
|
|
|
|
|
|
// Free memory
|
|
|
- if(mem_mgr )mem_mgr->free(buff);
|
|
|
+ if(mem_mgr) mem_mgr->free(buff);
|
|
|
+ else mem::aligned_free((char*)buff);
|
|
|
|
|
|
return cheb_err(out,cheb_deg,dof);
|
|
|
}
|
|
@@ -437,7 +438,7 @@ void cheb_eval(const Vector<T>& coeff_, int cheb_deg, const std::vector<T>& in_x
|
|
|
// Create work buffers
|
|
|
size_t buff_size=std::max(d,n1)*std::max(d,n2)*std::max(d,n3)*dof;
|
|
|
T* buff=(T*)(mem_mgr?mem_mgr->malloc(2*buff_size*sizeof(T)):
|
|
|
- malloc(2*buff_size*sizeof(T)));
|
|
|
+ mem::aligned_malloc<char>(2*buff_size*sizeof(T)));
|
|
|
Vector<T> v1(buff_size,buff+buff_size*0,false);
|
|
|
Vector<T> v2(buff_size,buff+buff_size*1,false);
|
|
|
|
|
@@ -461,7 +462,7 @@ void cheb_eval(const Vector<T>& coeff_, int cheb_deg, const std::vector<T>& in_x
|
|
|
{ // Apply Mp1
|
|
|
Matrix<T> Mi ( d* d*dof, d,&v1[0],false);
|
|
|
Matrix<T> Mo ( d* d*dof,n1,&v2[0],false);
|
|
|
- Matrix<T>::DGEMM(Mo, Mi, Mp1, 0);
|
|
|
+ Matrix<T>::DGEMM(Mo, Mi, Mp1);
|
|
|
|
|
|
Matrix<T> Mo_t(n1, d* d*dof,&v1[0],false);
|
|
|
for(size_t i=0;i<Mo.Dim(0);i++)
|
|
@@ -472,7 +473,7 @@ void cheb_eval(const Vector<T>& coeff_, int cheb_deg, const std::vector<T>& in_x
|
|
|
{ // Apply Mp2
|
|
|
Matrix<T> Mi (n1* d*dof, d,&v1[0],false);
|
|
|
Matrix<T> Mo (n1* d*dof,n2,&v2[0],false);
|
|
|
- Matrix<T>::DGEMM(Mo, Mi, Mp2, 0);
|
|
|
+ Matrix<T>::DGEMM(Mo, Mi, Mp2);
|
|
|
|
|
|
Matrix<T> Mo_t(n2,n1* d*dof,&v1[0],false);
|
|
|
for(size_t i=0;i<Mo.Dim(0);i++)
|
|
@@ -483,7 +484,7 @@ void cheb_eval(const Vector<T>& coeff_, int cheb_deg, const std::vector<T>& in_x
|
|
|
{ // Apply Mp3
|
|
|
Matrix<T> Mi (n2*n1*dof, d,&v1[0],false);
|
|
|
Matrix<T> Mo (n2*n1*dof,n3,&v2[0],false);
|
|
|
- Matrix<T>::DGEMM(Mo, Mi, Mp3, 0);
|
|
|
+ Matrix<T>::DGEMM(Mo, Mi, Mp3);
|
|
|
|
|
|
Matrix<T> Mo_t(n3,n2*n1*dof,&v1[0],false);
|
|
|
for(size_t i=0;i<Mo.Dim(0);i++)
|
|
@@ -502,8 +503,8 @@ void cheb_eval(const Vector<T>& coeff_, int cheb_deg, const std::vector<T>& in_x
|
|
|
}
|
|
|
|
|
|
// Free memory
|
|
|
- if(mem_mgr )mem_mgr->free(buff);
|
|
|
- else free(buff);
|
|
|
+ if(mem_mgr) mem_mgr->free(buff);
|
|
|
+ else mem::aligned_free((char*)buff);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -1169,7 +1170,7 @@ void cheb_diff(const Vector<T>& A, int deg, int diff_dim, Vector<T>& B, mem::Mem
|
|
|
// Create work buffers
|
|
|
size_t buff_size=A.Dim();
|
|
|
T* buff=(T*)(mem_mgr?mem_mgr->malloc(2*buff_size*sizeof(T)):
|
|
|
- malloc(2*buff_size*sizeof(T)));
|
|
|
+ mem::aligned_malloc<char>(2*buff_size*sizeof(T)));
|
|
|
T* buff1=buff+buff_size*0;
|
|
|
T* buff2=buff+buff_size*1;
|
|
|
|
|
@@ -1188,7 +1189,7 @@ void cheb_diff(const Vector<T>& A, int deg, int diff_dim, Vector<T>& B, mem::Mem
|
|
|
{ // Apply M
|
|
|
Matrix<T> Mi(d,A.Dim()/d,&buff1[0],false);
|
|
|
Matrix<T> Mo(d,A.Dim()/d,&buff2[0],false);
|
|
|
- Matrix<T>::DGEMM(Mo, M, Mi, 0);
|
|
|
+ Matrix<T>::DGEMM(Mo, M, Mi);
|
|
|
}
|
|
|
|
|
|
for(size_t k=0;k<n2;k++){ // Rearrange and write output to B
|
|
@@ -1201,8 +1202,8 @@ void cheb_diff(const Vector<T>& A, int deg, int diff_dim, Vector<T>& B, mem::Mem
|
|
|
}
|
|
|
|
|
|
// Free memory
|
|
|
- if(mem_mgr )mem_mgr->free(buff);
|
|
|
- else free(buff);
|
|
|
+ if(mem_mgr) mem_mgr->free(buff);
|
|
|
+ else mem::aligned_free((char*)buff);
|
|
|
}
|
|
|
|
|
|
template <class T>
|
|
@@ -1215,7 +1216,7 @@ void cheb_grad(const Vector<T>& A, int deg, Vector<T>& B, mem::MemoryManager* me
|
|
|
|
|
|
// Create work buffers
|
|
|
T* buff=(T*)(mem_mgr?mem_mgr->malloc(2*n_coeff_*dof*sizeof(T)):
|
|
|
- malloc(2*n_coeff_*dof*sizeof(T)));
|
|
|
+ mem::aligned_malloc<char>(2*n_coeff_*dof*sizeof(T)));
|
|
|
Vector<T> A_(n_coeff_*dof,buff+n_coeff_*0); A_.SetZero();
|
|
|
Vector<T> B_(n_coeff_*dof,buff+n_coeff_*1); B_.SetZero();
|
|
|
|
|
@@ -1254,8 +1255,8 @@ void cheb_grad(const Vector<T>& A, int deg, Vector<T>& B, mem::MemoryManager* me
|
|
|
}
|
|
|
|
|
|
// Free memory
|
|
|
- if(mem_mgr )mem_mgr->free(buff);
|
|
|
- else free(buff);
|
|
|
+ if(mem_mgr) mem_mgr->free(buff);
|
|
|
+ else mem::aligned_free((char*)buff);
|
|
|
}
|
|
|
|
|
|
template <class T>
|
|
@@ -1352,8 +1353,8 @@ void cheb_laplacian(T* A, int deg, T* B){
|
|
|
int d=deg+1;
|
|
|
int n1=(int)(pow((T)d,dim)+0.5);
|
|
|
|
|
|
- T* C1=new T[n1];
|
|
|
- T* C2=new T[n1];
|
|
|
+ T* C1=mem::aligned_malloc<T>(n1);
|
|
|
+ T* C2=mem::aligned_malloc<T>(n1);
|
|
|
|
|
|
Matrix<T> M_(1,n1,C2,false);
|
|
|
for(int i=0;i<3;i++){
|
|
@@ -1366,8 +1367,8 @@ void cheb_laplacian(T* A, int deg, T* B){
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- delete[] C1;
|
|
|
- delete[] C2;
|
|
|
+ mem::aligned_free<T>(C1);
|
|
|
+ mem::aligned_free<T>(C2);
|
|
|
}
|
|
|
|
|
|
/*
|