소스 검색

Create wrappers for floating-point utilities

- Create templated wrapper functions in pvfmm namespace for
  floating-point utility functions, since QuadReal_t versions of these
  functions were confusing the compiler about which version to use.

- Fix multiple definitions. Move specialized templates to cpp files.
Dhairya Malhotra 10 년 전
부모
커밋
704cc02970

+ 3 - 2
Makefile.am

@@ -72,7 +72,7 @@ lib_libfmm_a_HEADERS = \
 									include/parUtils.h \
 									include/precomp_mat.hpp \
 									include/profile.hpp \
-									include/quad_utils.hpp \
+									include/math_utils.hpp \
 									include/stacktrace.h \
 									include/tree.hpp \
 									include/tree_node.hpp \
@@ -95,7 +95,6 @@ lib_libfmm_a_HEADERS = \
 									include/ompUtils.txx \
 									include/parUtils.txx \
 									include/precomp_mat.txx \
-									include/quad_utils.txx \
 									include/tree.txx \
 									include/vector.txx
 
@@ -105,10 +104,12 @@ lib_libfmm_a_HEADERS = \
 # the sources to add to the library and to add to the source distribution
 lib_libpvfmm_a_SOURCES = \
 									$(lib_libpvfmm_a_HEADERS) \
+									src/cheb_utils.cpp \
 									src/device_wrapper.cpp \
 									src/fmm_gll.cpp \
 									src/legendre_rule.cpp \
 									src/mat_utils.cpp \
+									src/math_utils.cpp \
 									src/mem_mgr.cpp \
 									src/mortonid.cpp \
 									src/profile.cpp \

+ 8 - 8
examples/include/utils.txx

@@ -131,9 +131,9 @@ void CheckChebOutput(FMMTree_t* mytree, typename TestFn<typename FMMTree_t::Real
   }
 
   int cheb_deg=nodes[0]->ChebDeg();
-  std::vector<Real_t> cheb_nds=pvfmm::cheb_nodes<Real_t>(cheb_deg/3+1, 1);
+  std::vector<Real_t> cheb_nds=pvfmm::cheb_nodes<Real_t>(cheb_deg+1, 1);
   for(size_t i=0;i<cheb_nds.size();i++) cheb_nds[i]=2.0*cheb_nds[i]-1.0;
-  std::vector<Real_t> cheb_pts=pvfmm::cheb_nodes<Real_t>(cheb_deg/3+1, COORD_DIM);
+  std::vector<Real_t> cheb_pts=pvfmm::cheb_nodes<Real_t>(cheb_deg+1, COORD_DIM);
   int n_pts=cheb_pts.size()/COORD_DIM;
   int omp_p=omp_get_max_threads();
 
@@ -172,7 +172,7 @@ void CheckChebOutput(FMMTree_t* mytree, typename TestFn<typename FMMTree_t::Real
         err_avg[k]+=err_avg[tid*dof*fn_dof+k];
     MPI_Allreduce(&err_avg[0], &glb_err_avg[0], dof*fn_dof, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::sum(), c1);
   }
-  { // Write error to file.
+  if(0){ // Write error to file.
     int nn_x=1;
     int nn_y=201;
     int nn_z=201;
@@ -213,11 +213,11 @@ void CheckChebOutput(FMMTree_t* mytree, typename TestFn<typename FMMTree_t::Real
     MPI_Reduce(&M_out    [0][0], &M_global    [0][0], nn_x*nn_y*nn_z*fn_dof*dof, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::sum(), 0, c1);
     MPI_Reduce(&M_out_err[0][0], &M_global_err[0][0], nn_x*nn_y*nn_z*fn_dof*dof, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::sum(), 0, c1);
 
-    //std::string fname;
-    //fname=std::string("result/"    )+t_name+std::string(".mat");
-    //if(!myrank) M_global    .Write(fname.c_str());
-    //fname=std::string("result/err_")+t_name+std::string(".mat");
-    //if(!myrank) M_global_err.Write(fname.c_str());
+    std::string fname;
+    fname=std::string("result/"    )+t_name+std::string(".mat");
+    if(!myrank) M_global    .Write(fname.c_str());
+    fname=std::string("result/err_")+t_name+std::string(".mat");
+    if(!myrank) M_global_err.Write(fname.c_str());
   }
   std::vector<Real_t> max    (omp_p,0), l2    (omp_p,0);
   std::vector<Real_t> max_err(omp_p,0), l2_err(omp_p,0);

+ 11 - 11
include/cheb_node.txx

@@ -39,8 +39,8 @@ void Cheb_Node<Real_t>::Initialize(TreeNode* parent_, int path2node_, TreeNode::
   //Compute Chebyshev approximation.
   if(this->IsLeaf() && !this->IsGhost()){
     if(!input_fn.IsEmpty() && data_dof>0){
-      Real_t s=pow(0.5,this->Depth());
-      int n1=(int)(pow((Real_t)(cheb_deg+1),this->Dim())+0.5);
+      Real_t s=pvfmm::pow<Real_t>(0.5,this->Depth());
+      int n1=pvfmm::pow<unsigned int>(cheb_deg+1,this->Dim());
       std::vector<Real_t> coord=cheb_nodes<Real_t>(cheb_deg,this->Dim());
       for(int i=0;i<n1;i++){
         coord[i*3+0]=coord[i*3+0]*s+this->Coord()[0];
@@ -103,7 +103,7 @@ bool Cheb_Node<Real_t>::SubdivCond(){
     std::vector<Real_t> x=cheb_nodes<Real_t>(cheb_deg,1);
     std::vector<Real_t> y=x;
     std::vector<Real_t> z=x;
-    Real_t s=pow(0.5,this->Depth());
+    Real_t s=pvfmm::pow<Real_t>(0.5,this->Depth());
     Real_t* coord=this->Coord();
     for(size_t i=0;i<x.size();i++){
       x[i]=x[i]*s+coord[0];
@@ -132,7 +132,7 @@ void Cheb_Node<Real_t>::Subdivide() {
   std::vector<Real_t> y(cheb_deg+1);
   std::vector<Real_t> z(cheb_deg+1);
   Vector<Real_t> cheb_node=cheb_nodes<Real_t>(cheb_deg,1);
-  Vector<Real_t> val((size_t)pow(cheb_deg+1,this->Dim())*data_dof);
+  Vector<Real_t> val(pvfmm::pow<unsigned int>(cheb_deg+1,this->Dim())*data_dof);
   Vector<Real_t> child_cheb_coeff[8];
   int n=(1UL<<this->Dim());
   for(int i=0;i<n;i++){
@@ -145,7 +145,7 @@ void Cheb_Node<Real_t>::Subdivide() {
       z[j]=cheb_node[j]+coord[2];
     }
     cheb_eval(cheb_coeff, cheb_deg, x, y, z, val);
-    assert(val.Dim()==pow(cheb_deg+1,this->Dim())*data_dof);
+    assert(val.Dim()==pvfmm::pow<unsigned int>(cheb_deg+1,this->Dim())*data_dof);
     child_cheb_coeff[i].Resize(data_dof*((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6);
     cheb_approx<Real_t,Real_t>(&val[0],cheb_deg,data_dof,&(child_cheb_coeff[i][0]));
 
@@ -166,7 +166,7 @@ void Cheb_Node<Real_t>::Truncate() {
   std::vector<Real_t> y=cheb_node;
   std::vector<Real_t> z=cheb_node;
   std::vector<Real_t> val((cheb_deg+1)*(cheb_deg+1)*(cheb_deg+1)*data_dof);
-  Real_t s=pow(0.5,this->Depth());
+  Real_t s=pvfmm::pow<Real_t>(0.5,this->Depth());
   Real_t* coord=this->Coord();
   for(size_t i=0;i<x.size();i++){
     x[i]=x[i]*s+coord[0];
@@ -225,7 +225,7 @@ void Cheb_Node<Real_t>::VTU_Data(VTUData_t& vtu_data, std::vector<Node_t*>& node
 
       size_t point_cnt=coord.size()/COORD_DIM;
       Real_t* c=n->Coord();
-      Real_t s=pow(0.5,n->Depth());
+      Real_t s=pvfmm::pow<Real_t>(0.5,n->Depth());
       for(int i0=0;i0<gridpt_cnt;i0++)
       for(int i1=0;i1<gridpt_cnt;i1++)
       for(int i2=0;i2<gridpt_cnt;i2++){
@@ -272,7 +272,7 @@ template <class Real_t>
 void Cheb_Node<Real_t>::Gradient(){
   int dim=3;//this->Dim();
   if(this->IsLeaf() && ChebData().Dim()>0){
-    Real_t scale=pow(2,this->depth);
+    Real_t scale=pvfmm::pow<Real_t>(2,this->depth);
     for(size_t i=0;i<ChebData().Dim();i++)
       ChebData()[i]*=scale;
 
@@ -294,7 +294,7 @@ void Cheb_Node<Real_t>::Divergence(){
       cheb_div(&(ChebData()[n3*i]),cheb_deg,&coeff[n3*(i/dim)]);
     ChebData().Swap(coeff);
 
-    Real_t scale=pow(2,this->depth);
+    Real_t scale=pvfmm::pow<Real_t>(2,this->depth);
     for(size_t i=0;i<ChebData().Dim();i++)
       ChebData()[i]*=scale;
   }
@@ -312,7 +312,7 @@ void Cheb_Node<Real_t>::Curl(){
       cheb_curl(&(ChebData()[n3*i]),cheb_deg,&coeff[n3*i]);
     ChebData().Swap(coeff);
 
-    Real_t scale=pow(2,this->depth);
+    Real_t scale=pvfmm::pow<Real_t>(2,this->depth);
     for(size_t i=0;i<ChebData().Dim();i++)
       ChebData()[i]*=scale;
   }
@@ -321,7 +321,7 @@ void Cheb_Node<Real_t>::Curl(){
 template <class Real_t>
 void Cheb_Node<Real_t>::read_val(std::vector<Real_t> x,std::vector<Real_t> y, std::vector<Real_t> z, int nx, int ny, int nz, Real_t* val, bool show_ghost/*=true*/){
   if(cheb_deg<0) return;
-  Real_t s=0.5*pow(0.5,this->Depth());
+  Real_t s=0.5*pvfmm::pow<Real_t>(0.5,this->Depth());
   Real_t s_inv=1/s;
   if(this->IsLeaf()){
     if(cheb_coeff.Dim()!=(size_t)((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6*data_dof

+ 45 - 84
include/cheb_utils.txx

@@ -36,16 +36,16 @@ template <class T>
 inline void cheb_poly(int d, const T* in, int n, T* out){
   if(d==0){
     for(int i=0;i<n;i++)
-      out[i]=(fabs(in[i])<=1?1.0:0);
+      out[i]=(pvfmm::fabs<T>(in[i])<=1?1.0:0);
   }else if(d==1){
     for(int i=0;i<n;i++){
-      out[i]=(fabs(in[i])<=1?1.0:0);
-      out[i+n]=(fabs(in[i])<=1?in[i]:0);
+      out[i]=(pvfmm::fabs<T>(in[i])<=1?1.0:0);
+      out[i+n]=(pvfmm::fabs<T>(in[i])<=1?in[i]:0);
     }
   }else{
     for(int j=0;j<n;j++){
-      T x=(fabs(in[j])<=1?in[j]:0);
-      T y0=(fabs(in[j])<=1?1.0:0);
+      T x=(pvfmm::fabs<T>(in[j])<=1?in[j]:0);
+      T y0=(pvfmm::fabs<T>(in[j])<=1?1.0:0);
       out[j]=y0;
       out[j+n]=x;
 
@@ -74,7 +74,7 @@ T cheb_err(T* cheb_coeff, int deg, int dof){
   for(int i=0;i<=deg;i++)
   for(int j=0;i+j<=deg;j++)
   for(int k=0;i+j+k<=deg;k++){
-    if(i+j+k==deg) err+=fabs(cheb_coeff[indx]);
+    if(i+j+k==deg) err+=pvfmm::fabs<T>(cheb_coeff[indx]);
     indx++;
   }
   return err;
@@ -108,7 +108,7 @@ T cheb_approx(T* fn_v, int cheb_deg, int dof, T* out, mem::MemoryManager* mem_mg
     if(precomp [d].Dim(0)==0 && precomp [d].Dim(1)==0){
       std::vector<Y> x(d);
       for(int i=0;i<d;i++)
-        x[i]=-cos((i+(T)0.5)*const_pi<T>()/d);
+        x[i]=-pvfmm::cos<Y>((i+(T)0.5)*const_pi<T>()/d);
 
       std::vector<Y> p(d*d);
       cheb_poly(cheb_deg,&x[0],d,&p[0]);
@@ -201,16 +201,16 @@ template <class T>
 inline void legn_poly(int d, T* in, int n, T* out){
   if(d==0){
     for(int i=0;i<n;i++)
-      out[i]=(fabs(in[i])<=1?1.0:0);
+      out[i]=(pvfmm::fabs<T>(in[i])<=1?1.0:0);
   }else if(d==1){
     for(int i=0;i<n;i++){
-      out[i]=(fabs(in[i])<=1?1.0:0);
-      out[i+n]=(fabs(in[i])<=1?in[i]:0);
+      out[i]=(pvfmm::fabs<T>(in[i])<=1?1.0:0);
+      out[i+n]=(pvfmm::fabs<T>(in[i])<=1?in[i]:0);
     }
   }else{
     for(int j=0;j<n;j++){
-      T x=(fabs(in[j])<=1?in[j]:0);
-      T y0=(fabs(in[j])<=1?1.0:0);
+      T x=(pvfmm::fabs<T>(in[j])<=1?in[j]:0);
+      T y0=(pvfmm::fabs<T>(in[j])<=1?1.0:0);
       out[j]=y0;
       out[j+n]=x;
 
@@ -238,7 +238,7 @@ void gll_quadrature(int deg, T* x_, T* w){//*
 
   Vector<T> x(d,x_,false);
   for(int i=0;i<d;i++)
-    x[i]=-cos((const_pi<T>()*i)/N);
+    x[i]=-pvfmm::cos<T>((const_pi<T>()*i)/N);
   Matrix<T> P(d,d); P.SetZero();
 
   T err=1;
@@ -255,7 +255,7 @@ void gll_quadrature(int deg, T* x_, T* w){//*
     err=0;
     for(int i=0;i<d;i++){
       T dx=-( x[i]*P[i][N]-P[i][N-1] )/( d*P[i][N] );
-      err=(err<fabs(dx)?fabs(dx):err);
+      err=(err<pvfmm::fabs<T>(dx)?pvfmm::fabs<T>(dx):err);
       x[i]=xold[i]+dx;
     }
   }
@@ -283,7 +283,7 @@ T gll2cheb(T* fn_v, int deg, int dof, T* out){//*
 
       std::vector<Y> x(d); //Cheb nodes.
       for(int i=0;i<d;i++)
-        x[i]=-cos((i+(T)0.5)*const_pi<Y>()/d);
+        x[i]=-pvfmm::cos<Y>((i+(T)0.5)*const_pi<Y>()/d);
 
       Vector<T> w(d);
       Vector<T> x_legn(d); // GLL nodes.
@@ -352,13 +352,13 @@ T gll2cheb(T* fn_v, int deg, int dof, T* out){//*
     for(int i=0;i<d;i++)
     for(int j=0;i+j<d;j++)
     for(int k=0;i+j+k<d;k++){
-      sum+=fabs(fn_v3[k+(j+i*d)*d]);
+      sum+=pvfmm::fabs<T>(fn_v3[k+(j+i*d)*d]);
     }
     for(int i=0;i<d;i++)
     for(int j=0;i+j<d;j++)
     for(int k=0;i+j+k<d;k++){
       out[indx]=fn_v3[k+(j+i*d)*d];
-      //if(fabs(out[indx])<eps*sum) out[indx]=0;
+      //if(pvfmm::fabs<T>(out[indx])<eps*sum) out[indx]=0;
       indx++;
     }
   }
@@ -374,7 +374,7 @@ T cheb_approx(T (*fn)(T,T,T), int cheb_deg, T* coord, T s, std::vector<T>& out){
   int d=cheb_deg+1;
   std::vector<T> x(d);
   for(int i=0;i<d;i++)
-    x[i]=cos((i+(T)0.5)*const_pi<T>()/d);
+    x[i]=pvfmm::cos<T>((i+(T)0.5)*const_pi<T>()/d);
 
   std::vector<T> p;
   cheb_poly(cheb_deg,&x[0],d,&p[0]);
@@ -609,7 +609,7 @@ inline void cheb_eval(int cheb_deg, T* coord, T* coeff0,T* buff){
 template <class T>
 void points2cheb(int deg, T* coord, T* val, int n, int dim, T* node_coord, T node_size, Vector<T>& cheb_coeff){
   if(n==0) return;
-  int deg_=((int)(pow((T)n*6,1.0/3.0)+0.5))/2;
+  int deg_=((int)(pvfmm::pow<T>(n*6,1.0/3.0)+0.5))/2;
   deg_=(deg_>deg?deg:deg_);
   deg_=(deg_>0?deg_:1);
   int deg3=((deg_+1)*(deg_+2)*(deg_+3))/6;
@@ -676,8 +676,8 @@ void quad_rule(int n, T* x, T* w){
 
   { //Chebyshev quadrature nodes and weights
     for(int i=0;i<n;i++){
-      x_[i]=-cos((T)(2.0*i+1.0)/(2.0*n)*const_pi<T>());
-      w_[i]=0;//sqrt(1.0-x_[i]*x_[i])*const_pi<T>()/n;
+      x_[i]=-pvfmm::cos<T>((T)(2.0*i+1.0)/(2.0*n)*const_pi<T>());
+      w_[i]=0;//pvfmm::sqrt<T>(1.0-x_[i]*x_[i])*const_pi<T>()/n;
     }
     Matrix<T> M(n,n);
     cheb_poly(n-1, &x_[0], n, &M[0][0]);
@@ -725,7 +725,7 @@ void quad_rule(int n, T* x, T* w){
     //    for(size_t i=1;i<n;i+=2) w_sample[i]=0.0;
 
     //    err=0;
-    //    for(size_t i=0;i<n;i++) err+=fabs(w_sample[i]-w_prev[i]);
+    //    for(size_t i=0;i<n;i++) err+=pvfmm::fabs<T>(w_sample[i]-w_prev[i]);
     //  }
     //}
 
@@ -754,45 +754,6 @@ void quad_rule(int n, T* x, T* w){
   quad_rule(n, x, w);
 }
 
-template <>
-void quad_rule<double>(int n, double* x, double* w){
-  static std::vector<Vector<double> > x_lst(10000);
-  static std::vector<Vector<double> > w_lst(10000);
-  assert(n<10000);
-
-  bool done=false;
-  #pragma omp critical (QUAD_RULE)
-  if(x_lst[n].Dim()>0){
-    Vector<double>& x_=x_lst[n];
-    Vector<double>& w_=w_lst[n];
-    for(int i=0;i<n;i++){
-      x[i]=x_[i];
-      w[i]=w_[i];
-    }
-    done=true;
-  }
-  if(done) return;
-
-  Vector<double> x_(n);
-  Vector<double> w_(n);
-
-  { //Gauss-Legendre quadrature nodes and weights
-    double alpha=0.0;
-    double beta=0.0;
-    double a=-1.0;
-    double b= 1.0;
-    int kind = 1;
-    cgqf ( n, kind, (double)alpha, (double)beta, (double)a, (double)b, &x_[0], &w_[0] );
-  }
-
-  #pragma omp critical (QUAD_RULE)
-  { // Set x_lst, w_lst
-    x_lst[n]=x_;
-    w_lst[n]=w_;
-  }
-  quad_rule(n, x, w);
-}
-
 template <class T>
 std::vector<T> integ_pyramid(int m, T* s, T r, int nx, const Kernel<T>& kernel, int* perm){//*
   static mem::MemoryManager mem_mgr(16*1024*1024*sizeof(T));
@@ -812,11 +773,11 @@ std::vector<T> integ_pyramid(int m, T* s, T r, int nx, const Kernel<T>& kernel,
   std::vector<T> x_;
   { //  Build stack along X-axis.
     x_.push_back(s[0]);
-    x_.push_back(fabs(1.0-s[0])+s[0]);
-    x_.push_back(fabs(1.0-s[1])+s[0]);
-    x_.push_back(fabs(1.0+s[1])+s[0]);
-    x_.push_back(fabs(1.0-s[2])+s[0]);
-    x_.push_back(fabs(1.0+s[2])+s[0]);
+    x_.push_back(pvfmm::fabs<T>(1.0-s[0])+s[0]);
+    x_.push_back(pvfmm::fabs<T>(1.0-s[1])+s[0]);
+    x_.push_back(pvfmm::fabs<T>(1.0+s[1])+s[0]);
+    x_.push_back(pvfmm::fabs<T>(1.0-s[2])+s[0]);
+    x_.push_back(pvfmm::fabs<T>(1.0+s[2])+s[0]);
     std::sort(x_.begin(),x_.end());
     for(int i=0;i<x_.size();i++){
       if(x_[i]<-1.0) x_[i]=-1.0;
@@ -824,11 +785,11 @@ std::vector<T> integ_pyramid(int m, T* s, T r, int nx, const Kernel<T>& kernel,
     }
 
     std::vector<T> x_new;
-    T x_jump=fabs(1.0-s[0]);
-    if(fabs(1.0-s[1])>eps) x_jump=std::min(x_jump,(T)fabs(1.0-s[1]));
-    if(fabs(1.0+s[1])>eps) x_jump=std::min(x_jump,(T)fabs(1.0+s[1]));
-    if(fabs(1.0-s[2])>eps) x_jump=std::min(x_jump,(T)fabs(1.0-s[2]));
-    if(fabs(1.0+s[2])>eps) x_jump=std::min(x_jump,(T)fabs(1.0+s[2]));
+    T x_jump=pvfmm::fabs<T>(1.0-s[0]);
+    if(pvfmm::fabs<T>(1.0-s[1])>eps) x_jump=std::min(x_jump,(T)pvfmm::fabs<T>(1.0-s[1]));
+    if(pvfmm::fabs<T>(1.0+s[1])>eps) x_jump=std::min(x_jump,(T)pvfmm::fabs<T>(1.0+s[1]));
+    if(pvfmm::fabs<T>(1.0-s[2])>eps) x_jump=std::min(x_jump,(T)pvfmm::fabs<T>(1.0-s[2]));
+    if(pvfmm::fabs<T>(1.0+s[2])>eps) x_jump=std::min(x_jump,(T)pvfmm::fabs<T>(1.0+s[2]));
     for(int k=0; k<x_.size()-1; k++){
       T x0=x_[k];
       T x1=x_[k+1];
@@ -1103,8 +1064,8 @@ std::vector<T> cheb_integ(int m, T* s_, T r_, const Kernel<T>& kernel){
     U_=integ<T>(m+1,s,r,n,kernel);
     err=0;
     for(int i=0;i<(m+1)*(m+1)*(m+1)*k_dim;i++)
-      if(fabs(U[i]-U_[i])>err)
-        err=fabs(U[i]-U_[i]);
+      if(pvfmm::fabs<T>(U[i]-U_[i])>err)
+        err=pvfmm::fabs<T>(U[i]-U_[i]);
     U=U_;
   }
 
@@ -1127,16 +1088,16 @@ std::vector<T> cheb_integ(int m, T* s_, T r_, const Kernel<T>& kernel){
 
 template <class T>
 std::vector<T> cheb_nodes(int deg, int dim){
-  int d=deg+1;
+  unsigned int d=deg+1;
   std::vector<T> x(d);
   for(int i=0;i<d;i++)
-    x[i]=-cos((i+(T)0.5)*const_pi<T>()/d)*0.5+0.5;
+    x[i]=-pvfmm::cos<T>((i+(T)0.5)*const_pi<T>()/d)*0.5+0.5;
   if(dim==1) return x;
 
-  int n1=(int)(pow((T)d,dim)+0.5);
+  unsigned int n1=pvfmm::pow<unsigned int>(d,dim);
   std::vector<T> y(n1*dim);
   for(int i=0;i<dim;i++){
-    int n2=(int)(pow((T)d,i)+0.5);
+    unsigned int n2=pvfmm::pow<unsigned int>(d,i);
     for(int j=0;j<n1;j++){
       y[j*dim+i]=x[(j/n2)%d];
     }
@@ -1169,7 +1130,7 @@ void cheb_diff(const Vector<T>& A, int deg, int diff_dim, Vector<T>& B, mem::Mem
   T* buff1=buff+buff_size*0;
   T* buff2=buff+buff_size*1;
 
-  size_t n1=(size_t)(pow((T)d,diff_dim)+0.5);
+  size_t n1=pvfmm::pow<unsigned int>(d,diff_dim);
   size_t n2=A.Dim()/(n1*d);
 
   for(size_t k=0;k<n2;k++){ // Rearrange A to make diff_dim the last array dimension
@@ -1205,7 +1166,7 @@ void cheb_grad(const Vector<T>& A, int deg, Vector<T>& B, mem::MemoryManager* me
   size_t dim=3;
   size_t d=(size_t)deg+1;
   size_t n_coeff =(d*(d+1)*(d+2))/6;
-  size_t n_coeff_=(size_t)(pow((T)d,dim)+0.5);
+  size_t n_coeff_=pvfmm::pow<unsigned int>(d,dim);
   size_t dof=A.Dim()/n_coeff;
 
   // Create work buffers
@@ -1255,7 +1216,7 @@ template <class T>
 void cheb_div(T* A_, int deg, T* B_){
   int dim=3;
   int d=deg+1;
-  int n1 =(int)(pow((T)d,dim)+0.5);
+  int n1 =pvfmm::pow<unsigned int>(d,dim);
   Vector<T> A(n1*dim); A.SetZero();
   Vector<T> B(n1    ); B.SetZero();
 
@@ -1294,7 +1255,7 @@ template <class T>
 void cheb_curl(T* A_, int deg, T* B_){
   int dim=3;
   int d=deg+1;
-  int n1 =(int)(pow((T)d,dim)+0.5);
+  int n1 =pvfmm::pow<unsigned int>(d,dim);
   Vector<T> A(n1*dim); A.SetZero();
   Vector<T> B(n1*dim); B.SetZero();
 
@@ -1343,7 +1304,7 @@ template <class T>
 void cheb_laplacian(T* A, int deg, T* B){
   int dim=3;
   int d=deg+1;
-  int n1=(int)(pow((T)d,dim)+0.5);
+  int n1=pvfmm::pow<unsigned int>(d,dim);
 
   T* C1=mem::aligned_new<T>(n1);
   T* C2=mem::aligned_new<T>(n1);
@@ -1369,8 +1330,8 @@ void cheb_laplacian(T* A, int deg, T* B){
 template <class T>
 void cheb_img(T* A, T* B, int deg, int dir, bool neg_){
   int d=deg+1;
-  int n1=(int)(pow((T)d,3-dir)+0.5);
-  int n2=(int)(pow((T)d,  dir)+0.5);
+  int n1=pvfmm::pow<unsigned int>(d,3-dir);
+  int n2=pvfmm::pow<unsigned int>(d,  dir);
   int indx;
   T sgn,neg;
   neg=(T)(neg_?-1.0:1.0);

+ 8 - 8
include/fft_wrapper.hpp

@@ -182,8 +182,8 @@ struct FFTW_t{
     Matrix<T> M(N1,2*N2);
     for(size_t j=0;j<N1;j++)
     for(size_t i=0;i<N2;i++){
-      M[j][2*i+0]=cos(j*i*(1.0/N1)*2.0*const_pi<T>());
-      M[j][2*i+1]=sin(j*i*(1.0/N1)*2.0*const_pi<T>());
+      M[j][2*i+0]=pvfmm::cos<T>(j*i*(1.0/N1)*2.0*const_pi<T>());
+      M[j][2*i+1]=pvfmm::sin<T>(j*i*(1.0/N1)*2.0*const_pi<T>());
     }
     return M;
   }
@@ -192,10 +192,10 @@ struct FFTW_t{
     Matrix<T> M(2*N1,2*N1);
     for(size_t i=0;i<N1;i++)
     for(size_t j=0;j<N1;j++){
-      M[2*i+0][2*j+0]=cos(j*i*(1.0/N1)*2.0*const_pi<T>());
-      M[2*i+1][2*j+0]=sin(j*i*(1.0/N1)*2.0*const_pi<T>());
-      M[2*i+0][2*j+1]=-sin(j*i*(1.0/N1)*2.0*const_pi<T>());
-      M[2*i+1][2*j+1]= cos(j*i*(1.0/N1)*2.0*const_pi<T>());
+      M[2*i+0][2*j+0]=pvfmm::cos<T>(j*i*(1.0/N1)*2.0*const_pi<T>());
+      M[2*i+1][2*j+0]=pvfmm::sin<T>(j*i*(1.0/N1)*2.0*const_pi<T>());
+      M[2*i+0][2*j+1]=-pvfmm::sin<T>(j*i*(1.0/N1)*2.0*const_pi<T>());
+      M[2*i+1][2*j+1]= pvfmm::cos<T>(j*i*(1.0/N1)*2.0*const_pi<T>());
     }
     return M;
   }
@@ -205,8 +205,8 @@ struct FFTW_t{
     Matrix<T> M(2*N2,N1);
     for(size_t i=0;i<N2;i++)
     for(size_t j=0;j<N1;j++){
-      M[2*i+0][j]=2*cos(j*i*(1.0/N1)*2.0*const_pi<T>());
-      M[2*i+1][j]=2*sin(j*i*(1.0/N1)*2.0*const_pi<T>());
+      M[2*i+0][j]=2*pvfmm::cos<T>(j*i*(1.0/N1)*2.0*const_pi<T>());
+      M[2*i+1][j]=2*pvfmm::sin<T>(j*i*(1.0/N1)*2.0*const_pi<T>());
     }
     if(N2>0){
       for(size_t j=0;j<N1;j++){

+ 16 - 16
include/fmm_cheb.txx

@@ -219,14 +219,14 @@ Permutation<Real_t> cheb_perm(size_t q, size_t p_indx, const Permutation<Real_t>
   int dof=ker_perm.Dim();
 
   int coeff_cnt=((q+1)*(q+2)*(q+3))/6;
-  int n3=(int)pow((Real_t)(q+1),dim);
+  int n3=pvfmm::pow<unsigned int>(q+1,dim);
 
   Permutation<Real_t> P0(n3*dof);
   if(scal_exp && p_indx==Scaling){ // Set level-by-level scaling
     assert(dof==scal_exp->Dim());
     Vector<Real_t> scal(scal_exp->Dim());
     for(size_t i=0;i<scal.Dim();i++){
-      scal[i]=pow(2.0,(*scal_exp)[i]);
+      scal[i]=pvfmm::pow<Real_t>(2.0,(*scal_exp)[i]);
     }
     for(int j=0;j<dof;j++){
       for(int i=0;i<n3;i++){
@@ -325,7 +325,7 @@ Permutation<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::PrecompPerm(Mat_Type t
         Permutation<Real_t> ker_perm0=this->kernel->k_s2m->perm_vec[C_Perm+p_indx];
         Permutation<Real_t> ker_perm1=this->kernel->k_m2m->perm_vec[C_Perm+p_indx];
         assert(ker_perm0.Dim()==ker_perm1.Dim());
-        if(ker_perm0.Dim()>0 && fabs(ker_perm0.scal[0]-ker_perm1.scal[0])>eps){
+        if(ker_perm0.Dim()>0 && pvfmm::fabs<Real_t>(ker_perm0.scal[0]-ker_perm1.scal[0])>eps){
           for(size_t i=0;i<ker_perm0.Dim();i++){
             ker_perm0.scal[i]*=-1.0;
           }
@@ -334,8 +334,8 @@ Permutation<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::PrecompPerm(Mat_Type t
           }
         }
         for(size_t i=0;i<ker_perm0.Dim();i++){
-          assert(    (ker_perm0.perm[i]-ker_perm1.perm[i])== 0);
-          assert(fabs(ker_perm0.scal[i]-ker_perm1.scal[i])<eps);
+          assert(                   (ker_perm0.perm[i]-ker_perm1.perm[i])== 0);
+          assert(pvfmm::fabs<Real_t>(ker_perm0.scal[i]-ker_perm1.scal[i])<eps);
         }
 
         Real_t s=0;
@@ -346,7 +346,7 @@ Permutation<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::PrecompPerm(Mat_Type t
         if(scal_exp0.Dim()>0){
           s=(scal_exp0[0]-scal_exp1[0]);
           for(size_t i=1;i<scal_exp0.Dim();i++){
-            assert(fabs(s-(scal_exp0[i]-scal_exp1[i]))<eps);
+            assert(pvfmm::fabs<Real_t>(s-(scal_exp0[i]-scal_exp1[i]))<eps);
             // In general this is not necessary, but to allow this, we must
             // also change src_scal accordingly.
           }
@@ -371,7 +371,7 @@ Permutation<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::PrecompPerm(Mat_Type t
         Permutation<Real_t> ker_perm0=this->kernel->k_l2t->perm_vec[0     +p_indx];
         Permutation<Real_t> ker_perm1=this->kernel->k_l2l->perm_vec[0     +p_indx];
         assert(ker_perm0.Dim()==ker_perm1.Dim());
-        if(ker_perm0.Dim()>0 && fabs(ker_perm0.scal[0]-ker_perm1.scal[0])>eps){
+        if(ker_perm0.Dim()>0 && pvfmm::fabs<Real_t>(ker_perm0.scal[0]-ker_perm1.scal[0])>eps){
           for(size_t i=0;i<ker_perm0.Dim();i++){
             ker_perm0.scal[i]*=-1.0;
           }
@@ -380,8 +380,8 @@ Permutation<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::PrecompPerm(Mat_Type t
           }
         }
         for(size_t i=0;i<ker_perm0.Dim();i++){
-          assert(    (ker_perm0.perm[i]-ker_perm1.perm[i])== 0);
-          assert(fabs(ker_perm0.scal[i]-ker_perm1.scal[i])<eps);
+          assert(                   (ker_perm0.perm[i]-ker_perm1.perm[i])== 0);
+          assert(pvfmm::fabs<Real_t>(ker_perm0.scal[i]-ker_perm1.scal[i])<eps);
         }
 
         Real_t s=0;
@@ -392,7 +392,7 @@ Permutation<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::PrecompPerm(Mat_Type t
         if(scal_exp0.Dim()>0){
           s=(scal_exp0[0]-scal_exp1[0]);
           for(size_t i=1;i<scal_exp0.Dim();i++){
-            assert(fabs(s-(scal_exp0[i]-scal_exp1[i]))<eps);
+            assert(pvfmm::fabs<Real_t>(s-(scal_exp0[i]-scal_exp1[i]))<eps);
             // In general this is not necessary, but to allow this, we must
             // also change trg_scal accordingly.
           }
@@ -519,7 +519,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
     case S2U_Type:
     {
       if(this->MultipoleOrder()==0) break;
-      Real_t r=pow(0.5,level);
+      Real_t r=pvfmm::pow<Real_t>(0.5,level);
       Real_t c[3]={0,0,0};
 
       // Coord of upward check surface
@@ -579,7 +579,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
     case U0_Type:
     {
       // Coord of target points
-      Real_t s=pow(0.5,level);
+      Real_t s=pvfmm::pow<Real_t>(0.5,level);
       int* coord=this->interac_list.RelativeCoord(type,mat_indx);
       Real_t coord_diff[3]={(Real_t)((coord[0]-1)*s*0.5),(Real_t)((coord[1]-1.0)*s*0.5),(Real_t)((coord[2]-1.0)*s*0.5)};
       std::vector<Real_t>& rel_trg_coord = this->mat->RelativeTrgCoord();
@@ -629,7 +629,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
     case U1_Type:
     {
       // Coord of target points
-      Real_t s=pow(0.5,level);
+      Real_t s=pvfmm::pow<Real_t>(0.5,level);
       int* coord=this->interac_list.RelativeCoord(type,mat_indx);
       Real_t coord_diff[3]={coord[0]*s,coord[1]*s,coord[2]*s};
       std::vector<Real_t>& rel_trg_coord = this->mat->RelativeTrgCoord();
@@ -679,7 +679,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
     case U2_Type:
     {
       // Coord of target points
-      Real_t s=pow(0.5,level);
+      Real_t s=pvfmm::pow<Real_t>(0.5,level);
       int* coord=this->interac_list.RelativeCoord(type,mat_indx);
       Real_t coord_diff[3]={(Real_t)((coord[0]+1)*s*0.25),(Real_t)((coord[1]+1)*s*0.25),(Real_t)((coord[2]+1)*s*0.25)};
       std::vector<Real_t>& rel_trg_coord = this->mat->RelativeTrgCoord();
@@ -750,7 +750,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
     {
       if(this->MultipoleOrder()==0) break;
       // Coord of target points
-      Real_t s=pow(0.5,level-1);
+      Real_t s=pvfmm::pow<Real_t>(0.5,level-1);
       int* coord=this->interac_list.RelativeCoord(type,mat_indx);
       Real_t c[3]={-(Real_t)((coord[0]-1)*s*0.25),-(Real_t)((coord[1]-1)*s*0.25),-(Real_t)((coord[2]-1)*s*0.25)};
       std::vector<Real_t> trg_coord=d_check_surf(this->MultipoleOrder(),c,level);
@@ -1079,7 +1079,7 @@ void FMM_Cheb<FMMNode>::PostProcessing(std::vector<FMMNode_t*>& nodes){
       //Sample local expansion at target points.
       if(trg_cnt>0 && cheb_out.Dim()>0){
         Real_t* c=nodes[i]->Coord();
-        Real_t scale=pow(2.0,nodes[i]->Depth()+1);
+        Real_t scale=pvfmm::pow<Real_t>(2.0,nodes[i]->Depth()+1);
         std::vector<Real_t> rel_coord(COORD_DIM*trg_cnt);
         for(size_t j=0;j<trg_cnt;j++) for(int k=0;k<COORD_DIM;k++)
           rel_coord[j*COORD_DIM+k]=(trg_coord[j*COORD_DIM+k]-c[k])*scale-1.0;

+ 34 - 34
include/fmm_pts.txx

@@ -77,7 +77,7 @@ std::vector<Real_t> surface(int p, Real_t* c, Real_t alpha, int depth){
   for(size_t i=0;i<(n_/2)*3;i++)
     coord[cnt*3+i]=-coord[i];
 
-  Real_t r = 0.5*pow(0.5,depth);
+  Real_t r = 0.5*pvfmm::pow<Real_t>(0.5,depth);
   Real_t b = alpha*r;
   for(size_t i=0;i<n_;i++){
     coord[i*3+0]=(coord[i*3+0]+1.0)*b+c[0];
@@ -93,7 +93,7 @@ std::vector<Real_t> surface(int p, Real_t* c, Real_t alpha, int depth){
  */
 template <class Real_t>
 std::vector<Real_t> u_check_surf(int p, Real_t* c, int depth){
-  Real_t r=0.5*pow(0.5,depth);
+  Real_t r=0.5*pvfmm::pow<Real_t>(0.5,depth);
   Real_t coord[3]={(Real_t)(c[0]-r*(RAD1-1.0)),(Real_t)(c[1]-r*(RAD1-1.0)),(Real_t)(c[2]-r*(RAD1-1.0))};
   return surface(p,coord,(Real_t)RAD1,depth);
 }
@@ -104,7 +104,7 @@ std::vector<Real_t> u_check_surf(int p, Real_t* c, int depth){
  */
 template <class Real_t>
 std::vector<Real_t> u_equiv_surf(int p, Real_t* c, int depth){
-  Real_t r=0.5*pow(0.5,depth);
+  Real_t r=0.5*pvfmm::pow<Real_t>(0.5,depth);
   Real_t coord[3]={(Real_t)(c[0]-r*(RAD0-1.0)),(Real_t)(c[1]-r*(RAD0-1.0)),(Real_t)(c[2]-r*(RAD0-1.0))};
   return surface(p,coord,(Real_t)RAD0,depth);
 }
@@ -115,7 +115,7 @@ std::vector<Real_t> u_equiv_surf(int p, Real_t* c, int depth){
  */
 template <class Real_t>
 std::vector<Real_t> d_check_surf(int p, Real_t* c, int depth){
-  Real_t r=0.5*pow(0.5,depth);
+  Real_t r=0.5*pvfmm::pow<Real_t>(0.5,depth);
   Real_t coord[3]={(Real_t)(c[0]-r*(RAD0-1.0)),(Real_t)(c[1]-r*(RAD0-1.0)),(Real_t)(c[2]-r*(RAD0-1.0))};
   return surface(p,coord,(Real_t)RAD0,depth);
 }
@@ -126,7 +126,7 @@ std::vector<Real_t> d_check_surf(int p, Real_t* c, int depth){
  */
 template <class Real_t>
 std::vector<Real_t> d_equiv_surf(int p, Real_t* c, int depth){
-  Real_t r=0.5*pow(0.5,depth);
+  Real_t r=0.5*pvfmm::pow<Real_t>(0.5,depth);
   Real_t coord[3]={(Real_t)(c[0]-r*(RAD1-1.0)),(Real_t)(c[1]-r*(RAD1-1.0)),(Real_t)(c[2]-r*(RAD1-1.0))};
   return surface(p,coord,(Real_t)RAD1,depth);
 }
@@ -137,12 +137,12 @@ std::vector<Real_t> d_equiv_surf(int p, Real_t* c, int depth){
  */
 template <class Real_t>
 std::vector<Real_t> conv_grid(int p, Real_t* c, int depth){
-  Real_t r=pow(0.5,depth);
+  Real_t r=pvfmm::pow<Real_t>(0.5,depth);
   Real_t a=r*RAD0;
   Real_t coord[3]={c[0],c[1],c[2]};
   int n1=p*2;
-  int n2=(int)pow((Real_t)n1,2);
-  int n3=(int)pow((Real_t)n1,3);
+  int n2=pvfmm::pow<int>((Real_t)n1,2);
+  int n3=pvfmm::pow<int>((Real_t)n1,3);
   std::vector<Real_t> grid(n3*3);
   for(int i=0;i<n1;i++)
   for(int j=0;j<n1;j++)
@@ -332,9 +332,9 @@ Permutation<Real_t> equiv_surf_perm(size_t m, size_t p_indx, const Permutation<R
   if(p_indx==ReflecX || p_indx==ReflecY || p_indx==ReflecZ){ // Set P.perm
     for(int i=0;i<n_trg;i++)
     for(int j=0;j<n_trg;j++){
-      if(fabs(trg_coord[i*3+0]-trg_coord[j*3+0]*(p_indx==ReflecX?-1.0:1.0))<eps)
-      if(fabs(trg_coord[i*3+1]-trg_coord[j*3+1]*(p_indx==ReflecY?-1.0:1.0))<eps)
-      if(fabs(trg_coord[i*3+2]-trg_coord[j*3+2]*(p_indx==ReflecZ?-1.0:1.0))<eps){
+      if(pvfmm::fabs<Real_t>(trg_coord[i*3+0]-trg_coord[j*3+0]*(p_indx==ReflecX?-1.0:1.0))<eps)
+      if(pvfmm::fabs<Real_t>(trg_coord[i*3+1]-trg_coord[j*3+1]*(p_indx==ReflecY?-1.0:1.0))<eps)
+      if(pvfmm::fabs<Real_t>(trg_coord[i*3+2]-trg_coord[j*3+2]*(p_indx==ReflecZ?-1.0:1.0))<eps){
         for(int k=0;k<dof;k++){
           P.perm[j*dof+k]=i*dof+ker_perm.perm[k];
         }
@@ -343,9 +343,9 @@ Permutation<Real_t> equiv_surf_perm(size_t m, size_t p_indx, const Permutation<R
   }else if(p_indx==SwapXY || p_indx==SwapXZ){
     for(int i=0;i<n_trg;i++)
     for(int j=0;j<n_trg;j++){
-      if(fabs(trg_coord[i*3+0]-trg_coord[j*3+(p_indx==SwapXY?1:2)])<eps)
-      if(fabs(trg_coord[i*3+1]-trg_coord[j*3+(p_indx==SwapXY?0:1)])<eps)
-      if(fabs(trg_coord[i*3+2]-trg_coord[j*3+(p_indx==SwapXY?2:0)])<eps){
+      if(pvfmm::fabs<Real_t>(trg_coord[i*3+0]-trg_coord[j*3+(p_indx==SwapXY?1:2)])<eps)
+      if(pvfmm::fabs<Real_t>(trg_coord[i*3+1]-trg_coord[j*3+(p_indx==SwapXY?0:1)])<eps)
+      if(pvfmm::fabs<Real_t>(trg_coord[i*3+2]-trg_coord[j*3+(p_indx==SwapXY?2:0)])<eps){
         for(int k=0;k<dof;k++){
           P.perm[j*dof+k]=i*dof+ker_perm.perm[k];
         }
@@ -363,7 +363,7 @@ Permutation<Real_t> equiv_surf_perm(size_t m, size_t p_indx, const Permutation<R
     assert(dof==scal_exp->Dim());
     Vector<Real_t> scal(scal_exp->Dim());
     for(size_t i=0;i<scal.Dim();i++){
-      scal[i]=pow(2.0,(*scal_exp)[i]);
+      scal[i]=pvfmm::pow<Real_t>(2.0,(*scal_exp)[i]);
     }
     for(int j=0;j<n_trg;j++){
       for(int i=0;i<dof;i++){
@@ -487,7 +487,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
       Real_t eps=1, max_S=0;
       while(eps*(Real_t)0.5+(Real_t)1.0>1.0) eps*=0.5;
       for(size_t i=0;i<std::min(S.Dim(0),S.Dim(1));i++){
-        if(fabs(S[i][i])>max_S) max_S=fabs(S[i][i]);
+        if(pvfmm::fabs<Real_t>(S[i][i])>max_S) max_S=pvfmm::fabs<Real_t>(S[i][i]);
       }
       for(size_t i=0;i<S.Dim(0);i++) S[i][i]=(S[i][i]>eps*max_S*4?1.0/S[i][i]:0.0);
       M=V.Transpose()*S;//*U.Transpose();
@@ -539,7 +539,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
       Real_t eps=1, max_S=0;
       while(eps*(Real_t)0.5+(Real_t)1.0>1.0) eps*=0.5;
       for(size_t i=0;i<std::min(S.Dim(0),S.Dim(1));i++){
-        if(fabs(S[i][i])>max_S) max_S=fabs(S[i][i]);
+        if(pvfmm::fabs<Real_t>(S[i][i])>max_S) max_S=pvfmm::fabs<Real_t>(S[i][i]);
       }
       for(size_t i=0;i<S.Dim(0);i++) S[i][i]=(S[i][i]>eps*max_S*4?1.0/S[i][i]:0.0);
       M=V.Transpose()*S;//*U.Transpose();
@@ -578,7 +578,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
       size_t n_uc=check_surf.size()/3;
 
       // Coord of child's upward equivalent surface
-      Real_t s=pow(0.5,(level+2));
+      Real_t s=pvfmm::pow<Real_t>(0.5,(level+2));
       int* coord=interac_list.RelativeCoord(type,mat_indx);
       Real_t child_coord[3]={(coord[0]+1)*s,(coord[1]+1)*s,(coord[2]+1)*s};
       std::vector<Real_t> equiv_surf=u_equiv_surf(MultipoleOrder(),child_coord,level+1);
@@ -598,7 +598,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
       if(MultipoleOrder()==0) break;
       const int* ker_dim=kernel->k_l2l->ker_dim;
       // Coord of downward check surface
-      Real_t s=pow(0.5,level+1);
+      Real_t s=pvfmm::pow<Real_t>(0.5,level+1);
       int* coord=interac_list.RelativeCoord(type,mat_indx);
       Real_t c[3]={(coord[0]+1)*s,(coord[1]+1)*s,(coord[2]+1)*s};
       std::vector<Real_t> check_surf=d_check_surf(MultipoleOrder(),c,level);
@@ -637,7 +637,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
       std::vector<Real_t>& rel_trg_coord=mat->RelativeTrgCoord();
 
       // Coord of target points
-      Real_t r=pow(0.5,level);
+      Real_t r=pvfmm::pow<Real_t>(0.5,level);
       size_t n_trg=rel_trg_coord.size()/3;
       std::vector<Real_t> trg_coord(n_trg*3);
       for(size_t i=0;i<n_trg*COORD_DIM;i++) trg_coord[i]=rel_trg_coord[i]*r;
@@ -666,7 +666,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
       int n3_=n1*n1*(n1/2+1);
 
       //Compute the matrix.
-      Real_t s=pow(0.5,level);
+      Real_t s=pvfmm::pow<Real_t>(0.5,level);
       int* coord2=interac_list.RelativeCoord(type,mat_indx);
       Real_t coord_diff[3]={coord2[0]*s,coord2[1]*s,coord2[2]*s};
 
@@ -758,7 +758,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
       std::vector<Real_t>& rel_trg_coord=mat->RelativeTrgCoord();
 
       // Coord of target points
-      Real_t s=pow(0.5,level);
+      Real_t s=pvfmm::pow<Real_t>(0.5,level);
       size_t n_trg=rel_trg_coord.size()/3;
       std::vector<Real_t> trg_coord(n_trg*3);
       for(size_t j=0;j<n_trg*COORD_DIM;j++) trg_coord[j]=rel_trg_coord[j]*s;
@@ -902,7 +902,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
                 Real_t pt_coord[3]={corner_pts[k*COORD_DIM+0]-j0,
                                     corner_pts[k*COORD_DIM+1]-j1,
                                     corner_pts[k*COORD_DIM+2]-j2};
-                if(fabs(pt_coord[0]-0.5)>1.0 || fabs(pt_coord[1]-0.5)>1.0 || fabs(pt_coord[2]-0.5)>1.0){
+                if(pvfmm::fabs<Real_t>(pt_coord[0]-0.5)>1.0 || pvfmm::fabs<Real_t>(pt_coord[1]-0.5)>1.0 || pvfmm::fabs<Real_t>(pt_coord[2]-0.5)>1.0){
                   Matrix<Real_t> M_e2pt(n_surf*ker_dim[0],ker_dim[1]);
                   kernel->k_m2l->BuildMatrix(&up_equiv_surf[0], n_surf,
                                                   &pt_coord[0], 1, &(M_e2pt[0][0]));
@@ -2173,7 +2173,7 @@ void FMM_Pts<FMMNode>::Source2UpSetup(SetupData<Real_t>&  setup_data, FMMTree_t*
         Vector<Real_t>& scal_exp=ker->trg_scal;
         scal.ReInit(scal_exp.Dim());
         for(size_t i=0;i<scal.Dim();i++){
-          scal[i]=std::pow(2.0,-scal_exp[i]*l);
+          scal[i]=pvfmm::pow<Real_t>(2.0,-scal_exp[i]*l);
         }
       }
       for(size_t l=0;l<MAX_DEPTH;l++){ // scal[l*4+3]
@@ -2181,7 +2181,7 @@ void FMM_Pts<FMMNode>::Source2UpSetup(SetupData<Real_t>&  setup_data, FMMTree_t*
         Vector<Real_t>& scal_exp=ker->src_scal;
         scal.ReInit(scal_exp.Dim());
         for(size_t i=0;i<scal.Dim();i++){
-          scal[i]=std::pow(2.0,-scal_exp[i]*l);
+          scal[i]=pvfmm::pow<Real_t>(2.0,-scal_exp[i]*l);
         }
       }
     }
@@ -2197,7 +2197,7 @@ void FMM_Pts<FMMNode>::Source2UpSetup(SetupData<Real_t>&  setup_data, FMMTree_t*
       size_t b=(nodes_out.size()*(tid+1))/omp_p;
       for(size_t i=a;i<b;i++){
         FMMNode_t* tnode=(FMMNode_t*)nodes_out[i];
-        Real_t s=std::pow(0.5,tnode->Depth());
+        Real_t s=pvfmm::pow<Real_t>(0.5,tnode->Depth());
 
         size_t interac_cnt_=0;
         { // S2U_Type
@@ -3183,13 +3183,13 @@ void FMM_Pts<FMMNode>::V_ListSetup(SetupData<Real_t>&  setup_data, FMMTree_t* tr
           for(size_t i=0;i<nodes_in_ .size();i++){
             size_t depth=nodes_in_[i]->Depth()+1;
             for(size_t j=0;j<scal_dim0;j++){
-              fft_scl[blk0][i*scal_dim0+j]=pow(2.0, src_scal[j]*depth);
+              fft_scl[blk0][i*scal_dim0+j]=pvfmm::pow<Real_t>(2.0, src_scal[j]*depth);
             }
           }
           for(size_t i=0;i<nodes_out_.size();i++){
             size_t depth=nodes_out_[i]->Depth()+1;
             for(size_t j=0;j<scal_dim1;j++){
-              ifft_scl[blk0][i*scal_dim1+j]=pow(2.0, trg_scal[j]*depth);
+              ifft_scl[blk0][i*scal_dim1+j]=pvfmm::pow<Real_t>(2.0, trg_scal[j]*depth);
             }
           }
         }
@@ -4327,7 +4327,7 @@ void FMM_Pts<FMMNode>::X_ListSetup(SetupData<Real_t>&  setup_data, FMMTree_t* tr
           interac_cnt.push_back(0);
           continue;
         }
-        Real_t s=std::pow(0.5,tnode->Depth());
+        Real_t s=pvfmm::pow<Real_t>(0.5,tnode->Depth());
 
         size_t interac_cnt_=0;
         { // X_Type
@@ -4611,7 +4611,7 @@ void FMM_Pts<FMMNode>::W_ListSetup(SetupData<Real_t>&  setup_data, FMMTree_t* tr
       size_t b=(nodes_out.size()*(tid+1))/omp_p;
       for(size_t i=a;i<b;i++){
         FMMNode_t* tnode=(FMMNode_t*)nodes_out[i];
-        Real_t s=std::pow(0.5,tnode->Depth());
+        Real_t s=pvfmm::pow<Real_t>(0.5,tnode->Depth());
 
         size_t interac_cnt_=0;
         { // W_Type
@@ -4910,7 +4910,7 @@ void FMM_Pts<FMMNode>::U_ListSetup(SetupData<Real_t>& setup_data, FMMTree_t* tre
       size_t b=(nodes_out.size()*(tid+1))/omp_p;
       for(size_t i=a;i<b;i++){
         FMMNode_t* tnode=(FMMNode_t*)nodes_out[i];
-        Real_t s=std::pow(0.5,tnode->Depth());
+        Real_t s=pvfmm::pow<Real_t>(0.5,tnode->Depth());
 
         size_t interac_cnt_=0;
         { // U0_Type
@@ -5285,7 +5285,7 @@ void FMM_Pts<FMMNode>::Down2TargetSetup(SetupData<Real_t>&  setup_data, FMMTree_
         Vector<Real_t>& scal_exp=ker->trg_scal;
         scal.ReInit(scal_exp.Dim());
         for(size_t i=0;i<scal.Dim();i++){
-          scal[i]=std::pow(2.0,-scal_exp[i]*l);
+          scal[i]=pvfmm::pow<Real_t>(2.0,-scal_exp[i]*l);
         }
       }
       for(size_t l=0;l<MAX_DEPTH;l++){ // scal[l*4+1]
@@ -5293,7 +5293,7 @@ void FMM_Pts<FMMNode>::Down2TargetSetup(SetupData<Real_t>&  setup_data, FMMTree_
         Vector<Real_t>& scal_exp=ker->src_scal;
         scal.ReInit(scal_exp.Dim());
         for(size_t i=0;i<scal.Dim();i++){
-          scal[i]=std::pow(2.0,-scal_exp[i]*l);
+          scal[i]=pvfmm::pow<Real_t>(2.0,-scal_exp[i]*l);
         }
       }
     }
@@ -5309,7 +5309,7 @@ void FMM_Pts<FMMNode>::Down2TargetSetup(SetupData<Real_t>&  setup_data, FMMTree_
       size_t b=(nodes_out.size()*(tid+1))/omp_p;
       for(size_t i=a;i<b;i++){
         FMMNode_t* tnode=(FMMNode_t*)nodes_out[i];
-        Real_t s=std::pow(0.5,tnode->Depth());
+        Real_t s=pvfmm::pow<Real_t>(0.5,tnode->Depth());
 
         size_t interac_cnt_=0;
         { // D2T_Type

+ 4 - 4
include/fmm_pts_gpu.hpp

@@ -19,17 +19,17 @@ void  in_perm_gpu(char* precomp_data, Real_t*  input_data, char* buff_in , size_
 template <class Real_t>
 void out_perm_gpu(char* precomp_data, Real_t* output_data, char* buff_out, size_t* output_perm, size_t vec_cnt, size_t M_dim1, cudaStream_t* stream);
 
-template<> void  in_perm_gpu<float >(char* precomp_data, float *  input_data, char* buff_in , size_t*  input_perm, size_t vec_cnt, size_t M_dim0, cudaStream_t* stream){
+template<> inline void  in_perm_gpu<float >(char* precomp_data, float *  input_data, char* buff_in , size_t*  input_perm, size_t vec_cnt, size_t M_dim0, cudaStream_t* stream){
   in_perm_gpu_f (precomp_data,  input_data, buff_in ,  input_perm, vec_cnt, M_dim0, stream);
 }
-template<> void  in_perm_gpu<double>(char* precomp_data, double*  input_data, char* buff_in , size_t*  input_perm, size_t vec_cnt, size_t M_dim0, cudaStream_t* stream){
+template<> inline void  in_perm_gpu<double>(char* precomp_data, double*  input_data, char* buff_in , size_t*  input_perm, size_t vec_cnt, size_t M_dim0, cudaStream_t* stream){
   in_perm_gpu_d (precomp_data,  input_data, buff_in ,  input_perm, vec_cnt, M_dim0, stream);
 }
 
-template<> void out_perm_gpu<float >(char* precomp_data, float * output_data, char* buff_out, size_t* output_perm, size_t vec_cnt, size_t M_dim1, cudaStream_t* stream){
+template<> inline void out_perm_gpu<float >(char* precomp_data, float * output_data, char* buff_out, size_t* output_perm, size_t vec_cnt, size_t M_dim1, cudaStream_t* stream){
   out_perm_gpu_f(precomp_data, output_data, buff_out, output_perm, vec_cnt, M_dim1, stream);
 }
-template<> void out_perm_gpu<double>(char* precomp_data, double* output_data, char* buff_out, size_t* output_perm, size_t vec_cnt, size_t M_dim1, cudaStream_t* stream){
+template<> inline void out_perm_gpu<double>(char* precomp_data, double* output_data, char* buff_out, size_t* output_perm, size_t vec_cnt, size_t M_dim1, cudaStream_t* stream){
   out_perm_gpu_d(precomp_data, output_data, buff_out, output_perm, vec_cnt, M_dim1, stream);
 }
 

+ 4 - 3
include/interac_list.txx

@@ -100,8 +100,8 @@ void InteracList<Node_t>::BuildList(Node_t* n, Mat_Type t){
   if(interac_list.Dim()!=ListCount(t)) interac_list.ReInit(ListCount(t));
   interac_list.SetZero();
 
-  static const int n_collg=(int)pow(3.0,(int)dim);
-  static const int n_child=(int)pow(2.0,(int)dim);
+  static const int n_collg=pvfmm::pow<unsigned int>(3,dim);
+  static const int n_child=pvfmm::pow<unsigned int>(2,dim);
   int rel_coord[3];
 
   switch (t){
@@ -369,7 +369,8 @@ int InteracList<Node_t>::class_hash(int* c_){
  */
 template <class Node_t>
 void InteracList<Node_t>::InitList(int max_r, int min_r, int step, Mat_Type t){
-  size_t count=(size_t)(pow((max_r*2)/step+1.0,(int)dim)-(min_r>0?pow((min_r*2)/step-1.0,(int)dim):0));
+  size_t count=           pvfmm::pow<unsigned int>((max_r*2)/step+1,dim)
+                -(min_r>0?pvfmm::pow<unsigned int>((min_r*2)/step-1,dim):0);
   Matrix<int>& M=rel_coord[t];
   M.Resize(count,dim);
   hash_lut[t].assign(PVFMM_MAX_COORD_HASH, -1);

+ 12 - 12
include/intrin_wrapper.hpp

@@ -68,7 +68,7 @@ inline T sub_intrin(const T& a, const T& b){
 
 template <class T>
 inline T rinv_approx_intrin(const T& r2){
-  if(r2!=0) return 1.0/sqrt(r2);
+  if(r2!=0) return 1.0/pvfmm::sqrt<T>(r2);
   return 0;
 }
 
@@ -79,18 +79,18 @@ inline void rinv_newton_intrin(T& rinv, const T& r2, const Real_t& nwtn_const){
 
 template <class T>
 inline T rinv_single_intrin(const T& r2){
-  if(r2!=0) return 1.0/sqrt(r2);
+  if(r2!=0) return 1.0/pvfmm::sqrt<T>(r2);
   return 0;
 }
 
 template <class T>
 inline T sin_intrin(const T& t){
-  return sin(t);
+  return pvfmm::sin<T>(t);
 }
 
 template <class T>
 inline T cos_intrin(const T& t){
-  return cos(t);
+  return pvfmm::cos<T>(t);
 }
 
 
@@ -261,25 +261,25 @@ inline __m128d cos_intrin(const __m128d& t){
 template <>
 inline __m128 sin_intrin(const __m128& t_){
   union{float e[4];__m128 d;} t; store_intrin(t.e, t_);
-  return _mm_set_ps(sin(t.e[3]),sin(t.e[2]),sin(t.e[1]),sin(t.e[0]));
+  return _mm_set_ps(pvfmm::sin<float>(t.e[3]),pvfmm::sin<float>(t.e[2]),pvfmm::sin<float>(t.e[1]),pvfmm::sin<float>(t.e[0]));
 }
 
 template <>
 inline __m128 cos_intrin(const __m128& t_){
   union{float e[4];__m128 d;} t; store_intrin(t.e, t_);
-  return _mm_set_ps(cos(t.e[3]),cos(t.e[2]),cos(t.e[1]),cos(t.e[0]));
+  return _mm_set_ps(pvfmm::cos<float>(t.e[3]),pvfmm::cos<float>(t.e[2]),pvfmm::cos<float>(t.e[1]),pvfmm::cos<float>(t.e[0]));
 }
 
 template <>
 inline __m128d sin_intrin(const __m128d& t_){
   union{double e[2];__m128d d;} t; store_intrin(t.e, t_);
-  return _mm_set_pd(sin(t.e[1]),sin(t.e[0]));
+  return _mm_set_pd(pvfmm::sin<double>(t.e[1]),pvfmm::sin<double>(t.e[0]));
 }
 
 template <>
 inline __m128d cos_intrin(const __m128d& t_){
   union{double e[2];__m128d d;} t; store_intrin(t.e, t_);
-  return _mm_set_pd(cos(t.e[1]),cos(t.e[0]));
+  return _mm_set_pd(pvfmm::cos<double>(t.e[1]),pvfmm::cos<double>(t.e[0]));
 }
 #endif
 #endif
@@ -453,25 +453,25 @@ inline __m256d cos_intrin(const __m256d& t){
 template <>
 inline __m256 sin_intrin(const __m256& t_){
   union{float e[8];__m256 d;} t; store_intrin(t.e, t_);//t.d=t_;
-  return _mm256_set_ps(sin(t.e[7]),sin(t.e[6]),sin(t.e[5]),sin(t.e[4]),sin(t.e[3]),sin(t.e[2]),sin(t.e[1]),sin(t.e[0]));
+  return _mm256_set_ps(pvfmm::sin<float>(t.e[7]),pvfmm::sin<float>(t.e[6]),pvfmm::sin<float>(t.e[5]),pvfmm::sin<float>(t.e[4]),pvfmm::sin<float>(t.e[3]),pvfmm::sin<float>(t.e[2]),pvfmm::sin<float>(t.e[1]),pvfmm::sin<float>(t.e[0]));
 }
 
 template <>
 inline __m256 cos_intrin(const __m256& t_){
   union{float e[8];__m256 d;} t; store_intrin(t.e, t_);//t.d=t_;
-  return _mm256_set_ps(cos(t.e[7]),cos(t.e[6]),cos(t.e[5]),cos(t.e[4]),cos(t.e[3]),cos(t.e[2]),cos(t.e[1]),cos(t.e[0]));
+  return _mm256_set_ps(pvfmm::cos<float>(t.e[7]),pvfmm::cos<float>(t.e[6]),pvfmm::cos<float>(t.e[5]),pvfmm::cos<float>(t.e[4]),pvfmm::cos<float>(t.e[3]),pvfmm::cos<float>(t.e[2]),pvfmm::cos<float>(t.e[1]),pvfmm::cos<float>(t.e[0]));
 }
 
 template <>
 inline __m256d sin_intrin(const __m256d& t_){
   union{double e[4];__m256d d;} t; store_intrin(t.e, t_);//t.d=t_;
-  return _mm256_set_pd(sin(t.e[3]),sin(t.e[2]),sin(t.e[1]),sin(t.e[0]));
+  return _mm256_set_pd(pvfmm::sin<double>(t.e[3]),pvfmm::sin<double>(t.e[2]),pvfmm::sin<double>(t.e[1]),pvfmm::sin<double>(t.e[0]));
 }
 
 template <>
 inline __m256d cos_intrin(const __m256d& t_){
   union{double e[4];__m256d d;} t; store_intrin(t.e, t_);//t.d=t_;
-  return _mm256_set_pd(cos(t.e[3]),cos(t.e[2]),cos(t.e[1]),cos(t.e[0]));
+  return _mm256_set_pd(pvfmm::cos<double>(t.e[3]),pvfmm::cos<double>(t.e[2]),pvfmm::cos<double>(t.e[1]),pvfmm::cos<double>(t.e[0]));
 }
 #endif
 #endif

+ 37 - 37
include/kernel.txx

@@ -83,7 +83,7 @@ void Kernel<T>::Initialize(bool verbose) const{
           x=(drand48()-0.5);
           y=(drand48()-0.5);
           z=(drand48()-0.5);
-          r=sqrt(x*x+y*y+z*z);
+          r=pvfmm::sqrt<T>(x*x+y*y+z*z);
         }while(r<0.25);
         trg_coord1[i*COORD_DIM+0]=x*scal;
         trg_coord1[i*COORD_DIM+1]=y*scal;
@@ -95,7 +95,7 @@ void Kernel<T>::Initialize(bool verbose) const{
           x=(drand48()-0.5);
           y=(drand48()-0.5);
           z=(drand48()-0.5);
-          r=sqrt(x*x+y*y+z*z);
+          r=pvfmm::sqrt<T>(x*x+y*y+z*z);
         }while(r<0.25);
         trg_coord1[i*COORD_DIM+0]=x*1.0/scal;
         trg_coord1[i*COORD_DIM+1]=y*1.0/scal;
@@ -105,10 +105,10 @@ void Kernel<T>::Initialize(bool verbose) const{
         BuildMatrix(&src_coord [          0], 1,
                     &trg_coord1[i*COORD_DIM], 1, &(M1[i][0]));
         for(size_t j=0;j<ker_dim[0]*ker_dim[1];j++){
-          abs_sum+=fabs(M1[i][j]);
+          abs_sum+=pvfmm::fabs<T>(M1[i][j]);
         }
       }
-      if(abs_sum>sqrt(eps) || scal<eps) break;
+      if(abs_sum>pvfmm::sqrt<T>(eps) || scal<eps) break;
       scal=scal*0.5;
     }
 
@@ -139,8 +139,8 @@ void Kernel<T>::Initialize(bool verbose) const{
       if(dot11>max_val*max_val*eps_ &&
          dot22>max_val*max_val*eps_ ){
         T s=dot12/dot11;
-        M_scal[0][i]=log(s)/log(2.0);
-        T err=sqrt(0.5*(dot22/dot11)/(s*s)-0.5);
+        M_scal[0][i]=pvfmm::log<T>(s)/pvfmm::log<T>(2.0);
+        T err=pvfmm::sqrt<T>(0.5*(dot22/dot11)/(s*s)-0.5);
         if(err>eps_){
           scale_invar=false;
           M_scal[0][i]=0.0;
@@ -177,7 +177,7 @@ void Kernel<T>::Initialize(bool verbose) const{
       for(size_t i0=0;i0<ker_dim[0];i0++)
       for(size_t i1=0;i1<ker_dim[1];i1++){
         if(M_scal[i0][i1]>=0){
-          if(fabs(src_scal[i0]+trg_scal[i1]-M_scal[i0][i1])>eps_){
+          if(pvfmm::fabs<T>(src_scal[i0]+trg_scal[i1]-M_scal[i0][i1])>eps_){
             scale_invar=false;
           }
         }
@@ -202,7 +202,7 @@ void Kernel<T>::Initialize(bool verbose) const{
         x=(drand48()-0.5);
         y=(drand48()-0.5);
         z=(drand48()-0.5);
-        r=sqrt(x*x+y*y+z*z);
+        r=pvfmm::sqrt<T>(x*x+y*y+z*z);
       }while(r<0.25);
       trg_coord1[i*COORD_DIM+0]=x*scal;
       trg_coord1[i*COORD_DIM+1]=y*scal;
@@ -214,7 +214,7 @@ void Kernel<T>::Initialize(bool verbose) const{
         x=(drand48()-0.5);
         y=(drand48()-0.5);
         z=(drand48()-0.5);
-        r=sqrt(x*x+y*y+z*z);
+        r=pvfmm::sqrt<T>(x*x+y*y+z*z);
       }while(r<0.25);
       trg_coord1[i*COORD_DIM+0]=x*1.0/scal;
       trg_coord1[i*COORD_DIM+1]=y*1.0/scal;
@@ -292,8 +292,8 @@ void Kernel<T>::Initialize(bool verbose) const{
             dot22[i][j]+=M2[k][i]*M2[k][j];
           }
           for(size_t i=0;i<ker_dim[0]*ker_dim[1];i++){
-            norm1[i]=sqrt(dot11[i][i]);
-            norm2[i]=sqrt(dot22[i][i]);
+            norm1[i]=pvfmm::sqrt<T>(dot11[i][i]);
+            norm2[i]=pvfmm::sqrt<T>(dot22[i][i]);
           }
           for(size_t i=0;i<ker_dim[0]*ker_dim[1];i++)
           for(size_t j=0;j<ker_dim[0]*ker_dim[1];j++){
@@ -309,10 +309,10 @@ void Kernel<T>::Initialize(bool verbose) const{
         for(size_t i=0;i<ker_dim[0]*ker_dim[1];i++){
           if(norm1[i]>eps_ && M11[0][i]==0){
             for(size_t j=0;j<ker_dim[0]*ker_dim[1];j++){
-              if(fabs(norm1[i]-norm1[j])<eps_ && fabs(fabs(dot11[i][j])-1.0)<eps_){
+              if(pvfmm::fabs<T>(norm1[i]-norm1[j])<eps_ && pvfmm::fabs<T>(pvfmm::fabs<T>(dot11[i][j])-1.0)<eps_){
                 M11[0][j]=(dot11[i][j]>0?flag:-flag);
               }
-              if(fabs(norm1[i]-norm2[j])<eps_ && fabs(fabs(dot12[i][j])-1.0)<eps_){
+              if(pvfmm::fabs<T>(norm1[i]-norm2[j])<eps_ && pvfmm::fabs<T>(pvfmm::fabs<T>(dot12[i][j])-1.0)<eps_){
                 M22[0][j]=(dot12[i][j]>0?flag:-flag);
               }
             }
@@ -576,8 +576,8 @@ void Kernel<T>::Initialize(bool verbose) const{
       std::cout<<"Scaling Matrix :\n";
       Matrix<T> Src(ker_dim[0],1);
       Matrix<T> Trg(1,ker_dim[1]);
-      for(size_t i=0;i<ker_dim[0];i++) Src[i][0]=pow(2.0,src_scal[i]);
-      for(size_t i=0;i<ker_dim[1];i++) Trg[0][i]=pow(2.0,trg_scal[i]);
+      for(size_t i=0;i<ker_dim[0];i++) Src[i][0]=pvfmm::pow<T>(2.0,src_scal[i]);
+      for(size_t i=0;i<ker_dim[1];i++) Trg[0][i]=pvfmm::pow<T>(2.0,trg_scal[i]);
       std::cout<<Src*Trg;
     }
     if(ker_dim[0]*ker_dim[1]>0){ // Accuracy of multipole expansion
@@ -625,11 +625,11 @@ void Kernel<T>::Initialize(bool verbose) const{
             x=(drand48()-0.5);
             y=(drand48()-0.5);
             z=(drand48()-0.5);
-            r=sqrt(x*x+y*y+z*z);
+            r=pvfmm::sqrt<T>(x*x+y*y+z*z);
           }while(r==0.0);
-          trg_coord.push_back(x/r*sqrt((T)COORD_DIM)*rad);
-          trg_coord.push_back(y/r*sqrt((T)COORD_DIM)*rad);
-          trg_coord.push_back(z/r*sqrt((T)COORD_DIM)*rad);
+          trg_coord.push_back(x/r*pvfmm::sqrt<T>((T)COORD_DIM)*rad);
+          trg_coord.push_back(y/r*pvfmm::sqrt<T>((T)COORD_DIM)*rad);
+          trg_coord.push_back(z/r*pvfmm::sqrt<T>((T)COORD_DIM)*rad);
         }
 
         Matrix<T> M_s2c(n_src*ker_dim[0],n_check*ker_dim[1]);
@@ -646,7 +646,7 @@ void Kernel<T>::Initialize(bool verbose) const{
           T eps=1, max_S=0;
           while(eps*(T)0.5+(T)1.0>1.0) eps*=0.5;
           for(size_t i=0;i<std::min(S.Dim(0),S.Dim(1));i++){
-            if(fabs(S[i][i])>max_S) max_S=fabs(S[i][i]);
+            if(pvfmm::fabs<T>(S[i][i])>max_S) max_S=pvfmm::fabs<T>(S[i][i]);
           }
           for(size_t i=0;i<S.Dim(0);i++) S[i][i]=(S[i][i]>eps*max_S*4?1.0/S[i][i]:0.0);
           M_c2e0=V.Transpose()*S;
@@ -665,8 +665,8 @@ void Kernel<T>::Initialize(bool verbose) const{
         T max_error=0, max_value=0;
         for(size_t i=0;i<M.Dim(0);i++)
         for(size_t j=0;j<M.Dim(1);j++){
-          max_error=std::max<T>(max_error,fabs(M    [i][j]));
-          max_value=std::max<T>(max_value,fabs(M_s2t[i][j]));
+          max_error=std::max<T>(max_error,pvfmm::fabs<T>(M    [i][j]));
+          max_value=std::max<T>(max_value,pvfmm::fabs<T>(M_s2t[i][j]));
         }
 
         std::cout<<(double)(max_error/max_value)<<' ';
@@ -1183,12 +1183,12 @@ template<class T> const Kernel<T>& LaplaceKernel<T>::gradient(){
   return grad_ker;
 }
 
-template<> const Kernel<double>& LaplaceKernel<double>::potential(){
+template<> inline const Kernel<double>& LaplaceKernel<double>::potential(){
   typedef double T;
   static Kernel<T> potn_ker=BuildKernel<T, laplace_poten<T,2>, laplace_dbl_poten<T,2> >("laplace"     , 3, std::pair<int,int>(1,1));
   return potn_ker;
 }
-template<> const Kernel<double>& LaplaceKernel<double>::gradient(){
+template<> inline const Kernel<double>& LaplaceKernel<double>::gradient(){
   typedef double T;
   static Kernel<T> potn_ker=BuildKernel<T, laplace_poten<T,2>, laplace_dbl_poten<T,2> >("laplace"     , 3, std::pair<int,int>(1,1));
   static Kernel<T> grad_ker=BuildKernel<T, laplace_grad <T,2>                         >("laplace_grad", 3, std::pair<int,int>(1,3),
@@ -1345,7 +1345,7 @@ void stokes_sym_dip(T* r_src, int src_cnt, T* v_src, int dof, T* r_trg, int trg_
         T R = (dR[0]*dR[0]+dR[1]*dR[1]+dR[2]*dR[2]);
         if (R!=0){
           T invR2=1.0/R;
-          T invR=sqrt(invR2);
+          T invR=pvfmm::sqrt<T>(invR2);
           T invR3=invR2*invR;
 
           T* f=&v_src[(s*dof+i)*6+0];
@@ -1384,7 +1384,7 @@ void stokes_press(T* r_src, int src_cnt, T* v_src_, int dof, T* r_trg, int trg_c
         T R = (dR[0]*dR[0]+dR[1]*dR[1]+dR[2]*dR[2]);
         if (R!=0){
           T invR2=1.0/R;
-          T invR=sqrt(invR2);
+          T invR=pvfmm::sqrt<T>(invR2);
           T invR3=invR2*invR;
           T v_src[3]={v_src_[(s*dof+i)*3  ],
                       v_src_[(s*dof+i)*3+1],
@@ -1419,7 +1419,7 @@ void stokes_stress(T* r_src, int src_cnt, T* v_src_, int dof, T* r_trg, int trg_
         T R = (dR[0]*dR[0]+dR[1]*dR[1]+dR[2]*dR[2]);
         if (R!=0){
           T invR2=1.0/R;
-          T invR=sqrt(invR2);
+          T invR=pvfmm::sqrt<T>(invR2);
           T invR3=invR2*invR;
           T invR5=invR3*invR2;
           T v_src[3]={v_src_[(s*dof+i)*3  ],
@@ -1466,7 +1466,7 @@ void stokes_grad(T* r_src, int src_cnt, T* v_src_, int dof, T* r_trg, int trg_cn
         T R = (dR[0]*dR[0]+dR[1]*dR[1]+dR[2]*dR[2]);
         if (R!=0){
           T invR2=1.0/R;
-          T invR=sqrt(invR2);
+          T invR=pvfmm::sqrt<T>(invR2);
           T invR3=invR2*invR;
           T v_src[3]={v_src_[(s*dof+i)*3  ],
                       v_src_[(s*dof+i)*3+1],
@@ -1618,7 +1618,7 @@ namespace
         double y = ty[i] - sy[j];
         double z = tz[i] - sz[j];
         double r2 = x*x + y*y + z*z;
-        double r = sqrt(r2);
+        double r = pvfmm::sqrt<T>(r2);
         double invdr;
         if (r == 0)
           invdr = 0;
@@ -1793,7 +1793,7 @@ namespace
         double y = ty[i] - sy[j];
         double z = tz[i] - sz[j];
         double r2 = x*x + y*y + z*z;
-        double r = sqrt(r2);
+        double r = pvfmm::sqrt<T>(r2);
         double invdr;
         if (r == 0)
           invdr = 0;
@@ -1982,7 +1982,7 @@ namespace
         double y = ty[i] - sy[j];
         double z = tz[i] - sz[j];
         double r2 = x*x + y*y + z*z;
-        double r = sqrt(r2);
+        double r = pvfmm::sqrt<T>(r2);
         double invdr;
         if (r == 0)
           invdr = 0;
@@ -2102,7 +2102,7 @@ namespace
 }
 
 template <>
-void stokes_press<double>(double* r_src, int src_cnt, double* v_src_, int dof, double* r_trg, int trg_cnt, double* k_out, mem::MemoryManager* mem_mgr){
+inline void stokes_press<double>(double* r_src, int src_cnt, double* v_src_, int dof, double* r_trg, int trg_cnt, double* k_out, mem::MemoryManager* mem_mgr){
   Profile::Add_FLOP((long long)trg_cnt*(long long)src_cnt*(17*dof));
 
   stokesPressureSSEShuffle(src_cnt, trg_cnt, r_src, r_trg, v_src_, k_out, mem_mgr);
@@ -2110,14 +2110,14 @@ void stokes_press<double>(double* r_src, int src_cnt, double* v_src_, int dof, d
 }
 
 template <>
-void stokes_stress<double>(double* r_src, int src_cnt, double* v_src_, int dof, double* r_trg, int trg_cnt, double* k_out, mem::MemoryManager* mem_mgr){
+inline void stokes_stress<double>(double* r_src, int src_cnt, double* v_src_, int dof, double* r_trg, int trg_cnt, double* k_out, mem::MemoryManager* mem_mgr){
   Profile::Add_FLOP((long long)trg_cnt*(long long)src_cnt*(45*dof));
 
   stokesStressSSEShuffle(src_cnt, trg_cnt, r_src, r_trg, v_src_, k_out, mem_mgr);
 }
 
 template <>
-void stokes_grad<double>(double* r_src, int src_cnt, double* v_src_, int dof, double* r_trg, int trg_cnt, double* k_out, mem::MemoryManager* mem_mgr){
+inline void stokes_grad<double>(double* r_src, int src_cnt, double* v_src_, int dof, double* r_trg, int trg_cnt, double* k_out, mem::MemoryManager* mem_mgr){
   Profile::Add_FLOP((long long)trg_cnt*(long long)src_cnt*(89*dof));
 
   const double mu=1.0;
@@ -2143,7 +2143,7 @@ template<class T> const Kernel<T>& StokesKernel<T>::vel_grad(){
   return ker;
 }
 
-template<> const Kernel<double>& StokesKernel<double>::velocity(){
+template<> inline const Kernel<double>& StokesKernel<double>::velocity(){
   typedef double T;
   static Kernel<T> ker=BuildKernel<T, stokes_vel<T,2>, stokes_sym_dip>("stokes_vel"   , 3, std::pair<int,int>(3,3));
   return ker;
@@ -2273,7 +2273,7 @@ template<class T> const Kernel<T>& BiotSavartKernel<T>::potential(){
   static Kernel<T> ker=BuildKernel<T, biot_savart<T,1> >("biot_savart", 3, std::pair<int,int>(3,3));
   return ker;
 }
-template<> const Kernel<double>& BiotSavartKernel<double>::potential(){
+template<> inline const Kernel<double>& BiotSavartKernel<double>::potential(){
   typedef double T;
   static Kernel<T> ker=BuildKernel<T, biot_savart<T,2> >("biot_savart", 3, std::pair<int,int>(3,3));
   return ker;
@@ -2406,7 +2406,7 @@ template<class T> const Kernel<T>& HelmholtzKernel<T>::potential(){
   static Kernel<T> ker=BuildKernel<T, helmholtz_poten<T,1> >("helmholtz"     , 3, std::pair<int,int>(2,2));
   return ker;
 }
-template<> const Kernel<double>& HelmholtzKernel<double>::potential(){
+template<> inline const Kernel<double>& HelmholtzKernel<double>::potential(){
   typedef double T;
   static Kernel<T> ker=BuildKernel<T, helmholtz_poten<T,3> >("helmholtz"     , 3, std::pair<int,int>(2,2));
   return ker;

+ 16 - 16
include/mat_utils.txx

@@ -105,7 +105,7 @@ namespace mat{
 
   template <class T>
   static void GivensL(T* S_, const size_t dim[2], size_t m, T a, T b){
-    T r=sqrt(a*a+b*b);
+    T r=pvfmm::sqrt<T>(a*a+b*b);
     T c=a/r;
     T s=-b/r;
 
@@ -123,7 +123,7 @@ namespace mat{
 
   template <class T>
   static void GivensR(T* S_, const size_t dim[2], size_t m, T a, T b){
-    T r=sqrt(a*a+b*b);
+    T r=pvfmm::sqrt<T>(a*a+b*b);
     T c=a/r;
     T s=-b/r;
 
@@ -159,9 +159,9 @@ namespace mat{
           for(size_t j=i;j<dim[0];j++){
             x_inv_norm+=S(j,i)*S(j,i);
           }
-          if(x_inv_norm>0) x_inv_norm=1/sqrt(x_inv_norm);
+          if(x_inv_norm>0) x_inv_norm=1/pvfmm::sqrt<T>(x_inv_norm);
 
-          T alpha=sqrt(1+x1*x_inv_norm);
+          T alpha=pvfmm::sqrt<T>(1+x1*x_inv_norm);
           T beta=x_inv_norm/alpha;
 
           house_vec[i]=-alpha;
@@ -203,9 +203,9 @@ namespace mat{
           for(size_t j=i+1;j<dim[1];j++){
             x_inv_norm+=S(i,j)*S(i,j);
           }
-          if(x_inv_norm>0) x_inv_norm=1/sqrt(x_inv_norm);
+          if(x_inv_norm>0) x_inv_norm=1/pvfmm::sqrt<T>(x_inv_norm);
 
-          T alpha=sqrt(1+x1*x_inv_norm);
+          T alpha=pvfmm::sqrt<T>(1+x1*x_inv_norm);
           T beta=x_inv_norm/alpha;
 
           house_vec[i+1]=-alpha;
@@ -252,13 +252,13 @@ namespace mat{
       T S_max=0.0;
       for(size_t i=0;i<dim[1];i++) S_max=(S_max>S(i,i)?S_max:S(i,i));
 
-      //while(k0<dim[1]-1 && fabs(S(k0,k0+1))<=eps*(fabs(S(k0,k0))+fabs(S(k0+1,k0+1)))) k0++;
-      while(k0<dim[1]-1 && fabs(S(k0,k0+1))<=eps*S_max) k0++;
+      //while(k0<dim[1]-1 && pvfmm::fabs<T>(S(k0,k0+1))<=eps*(pvfmm::fabs<T>(S(k0,k0))+pvfmm::fabs<T>(S(k0+1,k0+1)))) k0++;
+      while(k0<dim[1]-1 && pvfmm::fabs<T>(S(k0,k0+1))<=eps*S_max) k0++;
       if(k0==dim[1]-1) continue;
 
       size_t n=k0+2;
-      //while(n<dim[1] && fabs(S(n-1,n))>eps*(fabs(S(n-1,n-1))+fabs(S(n,n)))) n++;
-      while(n<dim[1] && fabs(S(n-1,n))>eps*S_max) n++;
+      //while(n<dim[1] && pvfmm::fabs<T>(S(n-1,n))>eps*(pvfmm::fabs<T>(S(n-1,n-1))+pvfmm::fabs<T>(S(n,n)))) n++;
+      while(n<dim[1] && pvfmm::fabs<T>(S(n-1,n))>eps*S_max) n++;
 
       T alpha=0;
       T beta=0;
@@ -273,11 +273,11 @@ namespace mat{
         T b=-(C[0][0]+C[1][1])/2;
         T c=  C[0][0]*C[1][1] - C[0][1]*C[1][0];
         T d=0;
-        if(b*b-c>0) d=sqrt(b*b-c);
+        if(b*b-c>0) d=pvfmm::sqrt<T>(b*b-c);
         else{
           T b=(C[0][0]-C[1][1])/2;
           T c=-C[0][1]*C[1][0];
-          if(b*b-c>0) d=sqrt(b*b-c);
+          if(b*b-c>0) d=pvfmm::sqrt<T>(b*b-c);
         }
 
         T lambda1=-b+d;
@@ -318,7 +318,7 @@ namespace mat{
           }
         }
         for(size_t i=0;i<dim[1]-1;i++){
-          if(fabs(S(i,i+1))<=eps*S_max){
+          if(pvfmm::fabs<T>(S(i,i+1))<=eps*S_max){
             S(i,i+1)=0;
           }
         }
@@ -337,9 +337,9 @@ namespace mat{
       T max_nondiag1=0;
       for(size_t i=0;i<E.Dim(0);i++)
       for(size_t j=0;j<E.Dim(1);j++){
-        if(max_err<fabs(E[i][j])) max_err=fabs(E[i][j]);
-        if((i>j+0 || i+0<j) && max_nondiag0<fabs(S0[i][j])) max_nondiag0=fabs(S0[i][j]);
-        if((i>j+1 || i+1<j) && max_nondiag1<fabs(S0[i][j])) max_nondiag1=fabs(S0[i][j]);
+        if(max_err<pvfmm::fabs<T>(E[i][j])) max_err=pvfmm::fabs<T>(E[i][j]);
+        if((i>j+0 || i+0<j) && max_nondiag0<pvfmm::fabs<T>(S0[i][j])) max_nondiag0=pvfmm::fabs<T>(S0[i][j]);
+        if((i>j+1 || i+1<j) && max_nondiag1<pvfmm::fabs<T>(S0[i][j])) max_nondiag1=pvfmm::fabs<T>(S0[i][j]);
       }
       std::cout<<max_err<<'\n';
       std::cout<<max_nondiag0<<'\n';

+ 63 - 0
include/math_utils.hpp

@@ -0,0 +1,63 @@
+/**
+ * \file math_utils.hpp
+ * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
+ * \date 7-16-2014
+ * \brief This file contains wrappers for functions in math.h
+ */
+
+#include <cmath>
+#include <ostream>
+
+#ifndef _MATH_UTILS_
+#define _MATH_UTILS_
+
+namespace pvfmm{
+
+template <class Real_t>
+inline Real_t const_pi(){return 3.1415926535897932384626433832795028841;}
+
+template <class Real_t>
+inline Real_t const_e (){return 2.7182818284590452353602874713526624977;}
+
+//template <class Real_t>
+//inline std::ostream& operator<<(std::ostream& output, const Real_t q_);
+
+template <class Real_t>
+inline Real_t fabs(const Real_t f){return ::fabs(f);}
+
+template <class Real_t>
+inline Real_t sqrt(const Real_t a){return ::sqrt(a);}
+
+template <class Real_t>
+inline Real_t sin(const Real_t a){return ::sin(a);}
+
+template <class Real_t>
+inline Real_t cos(const Real_t a){return ::cos(a);}
+
+template <class Real_t>
+inline Real_t exp(const Real_t a){return ::exp(a);}
+
+template <class Real_t>
+inline Real_t log(const Real_t a){return ::log(a);}
+
+template <class Real_t>
+inline Real_t pow(const Real_t b, const Real_t e){return ::pow(b,e);}
+
+}//end namespace
+
+
+
+#ifdef PVFMM_QUAD_T
+
+typedef PVFMM_QUAD_T QuadReal_t;
+
+namespace pvfmm{
+inline QuadReal_t atoquad(const char* str);
+}
+
+inline std::ostream& operator<<(std::ostream& output, const QuadReal_t q_);
+
+#endif //PVFMM_QUAD_T
+
+#endif //_MATH_UTILS_HPP_
+

+ 2 - 2
include/matrix.txx

@@ -26,7 +26,7 @@ std::ostream& operator<<(std::ostream& output, const Matrix<T>& M){
   for(size_t i=0;i<M.Dim(0);i++){
     for(size_t j=0;j<M.Dim(1);j++){
       float f=((float)M(i,j));
-      if(fabs(f)<1e-25) f=0;
+      if(pvfmm::fabs<T>(f)<1e-25) f=0;
       output<<std::setw(10)<<((double)f)<<' ';
     }
     output<<";\n";
@@ -491,7 +491,7 @@ Matrix<T> Matrix<T>::pinv(T eps){
   if(eps<0){
     eps=1.0;
     while(eps+(T)1.0>1.0) eps*=0.5;
-    eps=sqrt(eps);
+    eps=pvfmm::sqrt<T>(eps);
   }
   Matrix<T> M_r(dim[1],dim[0]);
   mat::pinv(data_ptr,dim[0],dim[1],eps,M_r.data_ptr);

+ 3 - 3
include/mpi_node.txx

@@ -34,7 +34,7 @@ void MPI_Node<T>::Initialize(TreeNode* parent_,int path2node_, TreeNode::NodeDat
   }
 
   //Initialize colleagues array.
-  int n=(int)pow(3.0,Dim());
+  int n=pvfmm::pow<unsigned int>(3,Dim());
   for(int i=0;i<n;i++) colleague[i]=NULL;
 
   //Set MPI_Node specific data.
@@ -125,7 +125,7 @@ void MPI_Node<T>::Subdivide(){
     }
 
     Real_t* c=this->Coord();
-    Real_t s=pow(0.5,this->Depth()+1);
+    Real_t s=pvfmm::pow<Real_t>(0.5,this->Depth()+1);
     for(size_t j=0;j<pt_coord.size();j++){
       if(!pt_coord[j] || !pt_coord[j]->Dim()) continue;
       Vector<Real_t>& coord=*pt_coord[j];
@@ -526,7 +526,7 @@ void MPI_Node<T>::VTU_Data(VTUData_t& vtu_data, std::vector<Node_t*>& nodes, int
       if(n->IsGhost() || !n->IsLeaf()) continue;
 
       Real_t* c=n->Coord();
-      Real_t s=pow(0.5,n->Depth());
+      Real_t s=pvfmm::pow<Real_t>(0.5,n->Depth());
       for(int i=0;i<8;i++){
         coord.push_back(c[0]+(i&1?1:0)*s);
         coord.push_back(c[1]+(i&2?1:0)*s);

+ 7 - 7
include/mpi_tree.txx

@@ -1071,10 +1071,10 @@ void MPI_Tree<TreeNode>::Balance21_local(BoundaryType bndry){
     curr_node=this->PreorderNxt(curr_node);
   }
 
-  int n1=pow(3.0,this->Dim());
-  int n2=pow(2.0,this->Dim());
+  int n1=pvfmm::pow<unsigned int>(3,this->Dim());
+  int n2=pvfmm::pow<unsigned int>(2,this->Dim());
   for(int i=this->max_depth;i>0;i--){
-    Real_t s=pow(0.5,i);
+    Real_t s=pvfmm::pow<Real_t>(0.5,i);
     for(size_t j=0;j<node_lst[i].size();j++){
       curr_node=node_lst[i][j];
       Real_t* coord=curr_node->Coord();
@@ -1106,7 +1106,7 @@ void MPI_Tree<TreeNode>::Balance21_local(BoundaryType bndry){
                   }// */
                 }
               }
-              Real_t s1=pow(0.5,node->Depth()+1);
+              Real_t s1=pvfmm::pow<Real_t>(0.5,node->Depth()+1);
               Real_t* c1=node->Coord();
               int c_id=((c0[0]-c1[0])>s1?1:0)+
                        ((c0[1]-c1[1])>s1?2:0)+
@@ -1129,8 +1129,8 @@ void MPI_Tree<TreeNode>::Balance21_local(BoundaryType bndry){
 
 template <class TreeNode>
 void MPI_Tree<TreeNode>::SetColleagues(BoundaryType bndry, Node_t* node){
-  int n1=(int)pow(3.0,this->Dim());
-  int n2=(int)pow(2.0,this->Dim());
+  int n1=(int)pvfmm::pow<unsigned int>(3,this->Dim());
+  int n2=(int)pvfmm::pow<unsigned int>(2,this->Dim());
 
   if(node==NULL){
     Node_t* curr_node=this->PreorderFirst();
@@ -1163,7 +1163,7 @@ void MPI_Tree<TreeNode>::SetColleagues(BoundaryType bndry, Node_t* node){
     Node_t* root_node=this->RootNode();
     int d=node->Depth();
     Real_t* c0=node->Coord();
-    Real_t s=pow(0.5,d);
+    Real_t s=pvfmm::pow<Real_t>(0.5,d);
     Real_t c[COORD_DIM];
     int idx=0;
     for(int i=-1;i<=1;i++)

+ 1 - 1
include/parUtils.txx

@@ -438,7 +438,7 @@ namespace par{
 
           totSize_new=(myrank<=(npes-1)/2?*split_disp:totSize-*split_disp);
           //double err=(((double)*split_disp)/(totSize/2))-1.0;
-          //if(fabs(err)<0.01 || npes<=16) break;
+          //if(pvfmm::fabs<double>(err)<0.01 || npes<=16) break;
           //else if(!myrank) std::cout<<err<<'\n';
         }
 

+ 13 - 13
include/pvfmm.hpp

@@ -31,8 +31,8 @@ typedef FMM_Tree<ChebFMM>            ChebFMM_Tree;
 typedef ChebFMM_Node::NodeData       ChebFMM_Data;
 typedef ChebFMM_Node::Function_t<double> ChebFn;
 
-ChebFMM_Tree* ChebFMM_CreateTree(int cheb_deg, int data_dim, ChebFn fn_ptr, std::vector<double>& trg_coord, MPI_Comm& comm,
-                                 double tol=1e-6, int max_pts=100, BoundaryType bndry=FreeSpace, int init_depth=0){
+inline ChebFMM_Tree* ChebFMM_CreateTree(int cheb_deg, int data_dim, ChebFn fn_ptr, std::vector<double>& trg_coord, MPI_Comm& comm,
+                                        double tol=1e-6, int max_pts=100, BoundaryType bndry=FreeSpace, int init_depth=0){
   int np, myrank;
   MPI_Comm_size(comm, &np);
   MPI_Comm_rank(comm, &myrank);
@@ -50,9 +50,9 @@ ChebFMM_Tree* ChebFMM_CreateTree(int cheb_deg, int data_dim, ChebFn fn_ptr, std:
 
   { // Set points for initial tree.
     std::vector<double> coord;
-    size_t N=pow(8.0,init_depth);
+    size_t N=pvfmm::pow<unsigned int>(8,init_depth);
     N=(N<np?np:N)*max_pts;
-    size_t NN=ceil(pow((double)N,1.0/3.0));
+    size_t NN=ceil(pvfmm::pow<double>(N,1.0/3.0));
     size_t N_total=NN*NN*NN;
     size_t start= myrank   *N_total/np;
     size_t end  =(myrank+1)*N_total/np;
@@ -73,7 +73,7 @@ ChebFMM_Tree* ChebFMM_CreateTree(int cheb_deg, int data_dim, ChebFn fn_ptr, std:
   return tree;
 }
 
-void ChebFMM_Evaluate(ChebFMM_Tree* tree, std::vector<double>& trg_val, size_t loc_size=0){
+inline void ChebFMM_Evaluate(ChebFMM_Tree* tree, std::vector<double>& trg_val, size_t loc_size=0){
   tree->RunFMM();
   Vector<double> trg_value;
   Vector<size_t> trg_scatter;
@@ -104,10 +104,10 @@ typedef FMM_Pts<PtFMM_Node>         PtFMM;
 typedef FMM_Tree<PtFMM>             PtFMM_Tree;
 typedef PtFMM_Node::NodeData        PtFMM_Data;
 
-PtFMM_Tree* PtFMM_CreateTree(std::vector<double>&  src_coord, std::vector<double>&  src_value,
-                             std::vector<double>& surf_coord, std::vector<double>& surf_value,
-                             std::vector<double>& trg_coord, MPI_Comm& comm, int max_pts=100,
-                             BoundaryType bndry=FreeSpace, int init_depth=0){
+inline PtFMM_Tree* PtFMM_CreateTree(std::vector<double>&  src_coord, std::vector<double>&  src_value,
+                                    std::vector<double>& surf_coord, std::vector<double>& surf_value,
+                                    std::vector<double>& trg_coord, MPI_Comm& comm, int max_pts=100,
+                                    BoundaryType bndry=FreeSpace, int init_depth=0){
   int np, myrank;
   MPI_Comm_size(comm, &np);
   MPI_Comm_rank(comm, &myrank);
@@ -135,15 +135,15 @@ PtFMM_Tree* PtFMM_CreateTree(std::vector<double>&  src_coord, std::vector<double
   return tree;
 }
 
-PtFMM_Tree* PtFMM_CreateTree(std::vector<double>&  src_coord, std::vector<double>&  src_value,
-                             std::vector<double>& trg_coord, MPI_Comm& comm, int max_pts=100,
-                             BoundaryType bndry=FreeSpace, int init_depth=0){
+inline PtFMM_Tree* PtFMM_CreateTree(std::vector<double>&  src_coord, std::vector<double>&  src_value,
+                                    std::vector<double>& trg_coord, MPI_Comm& comm, int max_pts=100,
+                                    BoundaryType bndry=FreeSpace, int init_depth=0){
   std::vector<double> surf_coord;
   std::vector<double> surf_value;
   return PtFMM_CreateTree(src_coord, src_value, surf_coord,surf_value, trg_coord, comm, max_pts, bndry, init_depth);
 }
 
-void PtFMM_Evaluate(PtFMM_Tree* tree, std::vector<double>& trg_val, size_t loc_size=0, std::vector<double>* src_val=NULL, std::vector<double>* surf_val=NULL){
+inline void PtFMM_Evaluate(PtFMM_Tree* tree, std::vector<double>& trg_val, size_t loc_size=0, std::vector<double>* src_val=NULL, std::vector<double>* surf_val=NULL){
   if(src_val){
     std::vector<size_t> src_scatter_;
     std::vector<PtFMM_Node*>& nodes=tree->GetNodeList();

+ 4 - 16
include/pvfmm_common.hpp

@@ -16,9 +16,9 @@
 #endif
 
 //Disable assert checks.
-//#ifndef NDEBUG
-//#define NDEBUG
-//#endif
+#ifndef NDEBUG
+#define NDEBUG
+#endif
 
 //Enable profiling
 #define __PROFILE__ 5
@@ -59,18 +59,6 @@
 
 #include <stacktrace.h>
 
-#ifdef __INTEL_OFFLOAD
-#pragma offload_attribute(push,target(mic))
-#endif
-template <class T>
-inline T const_pi(){return 3.1415926535897932384626433832795028841;}
-
-template <class T>
-inline T const_e (){return 2.7182818284590452353602874713526624977;}
-#ifdef __INTEL_OFFLOAD
-#pragma offload_attribute(pop)
-#endif
-
-#include <quad_utils.hpp>
+#include <math_utils.hpp>
 
 #endif //_PVFMM_COMMON_HPP_

+ 0 - 59
include/quad_utils.hpp

@@ -1,59 +0,0 @@
-/**
- * \file quad_utils.hpp
- * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
- * \date 7-16-2014
- * \brief This file contains definition of QuadReal_t.
- */
-
-#include <cmath>
-#include <ostream>
-
-#ifndef _QUAD_UTILS_
-#define _QUAD_UTILS_
-
-#ifdef PVFMM_QUAD_T
-
-typedef PVFMM_QUAD_T QuadReal_t;
-
-inline std::ostream& operator<<(std::ostream& output, const QuadReal_t q_);
-
-#ifdef __INTEL_OFFLOAD
-#pragma offload_attribute(push,target(mic))
-#endif
-
-inline QuadReal_t atoquad(const char* str);
-
-inline QuadReal_t fabs(const QuadReal_t f);
-
-inline QuadReal_t sqrt(const QuadReal_t a);
-
-inline QuadReal_t sin(const QuadReal_t a);
-
-inline QuadReal_t cos(const QuadReal_t a);
-
-inline QuadReal_t exp(const QuadReal_t a);
-
-inline QuadReal_t log(const QuadReal_t a);
-
-template<>
-inline QuadReal_t const_pi<QuadReal_t>(){
-  static QuadReal_t pi=atoquad("3.1415926535897932384626433832795028841");
-  return pi;
-}
-
-template<>
-inline QuadReal_t const_e<QuadReal_t>(){
-  static QuadReal_t e =atoquad("2.7182818284590452353602874713526624977");
-  return e;
-}
-
-#ifdef __INTEL_OFFLOAD
-#pragma offload_attribute(pop)
-#endif
-
-#include <quad_utils.txx>
-
-#endif //PVFMM_QUAD_T
-
-#endif //_QUAD_UTILS_HPP_
-

+ 51 - 0
src/cheb_utils.cpp

@@ -0,0 +1,51 @@
+/**
+ * \file cheb_utils.cpp
+ * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
+ * \date 3-23-2015
+ * \brief This file contains implementation of Chebyshev functions.
+ */
+
+#include <cheb_utils.hpp>
+
+namespace pvfmm{
+
+template <>
+void quad_rule<double>(int n, double* x, double* w){
+  static std::vector<Vector<double> > x_lst(10000);
+  static std::vector<Vector<double> > w_lst(10000);
+  assert(n<10000);
+
+  bool done=false;
+  #pragma omp critical (QUAD_RULE)
+  if(x_lst[n].Dim()>0){
+    Vector<double>& x_=x_lst[n];
+    Vector<double>& w_=w_lst[n];
+    for(int i=0;i<n;i++){
+      x[i]=x_[i];
+      w[i]=w_[i];
+    }
+    done=true;
+  }
+  if(done) return;
+
+  Vector<double> x_(n);
+  Vector<double> w_(n);
+
+  { //Gauss-Legendre quadrature nodes and weights
+    double alpha=0.0;
+    double beta=0.0;
+    double a=-1.0;
+    double b= 1.0;
+    int kind = 1;
+    cgqf ( n, kind, (double)alpha, (double)beta, (double)a, (double)b, &x_[0], &w_[0] );
+  }
+
+  #pragma omp critical (QUAD_RULE)
+  { // Set x_lst, w_lst
+    x_lst[n]=x_;
+    w_lst[n]=w_;
+  }
+  quad_rule(n, x, w);
+}
+
+}//end namespace

+ 75 - 34
include/quad_utils.txx → src/math_utils.cpp

@@ -1,8 +1,9 @@
 /**
- * \file quad_utils.txx
+ * \file math_utils.cpp
  * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
  * \date 7-16-2014
- * \brief This file contains quadruple-precision related functions.
+ * \brief This file contains quadruple-precision implementation for functions
+ * declared in math.h.
  */
 
 #include <omp.h>
@@ -11,49 +12,47 @@
 #include <iostream>
 #include <iomanip>
 #include <vector>
+#include <math_utils.hpp>
 
-QuadReal_t atoquad(const char* str){
-  int i=0;
-  QuadReal_t sign=1.0;
-  for(;str[i]!='\0';i++){
-    char c=str[i];
-    if(c=='-') sign=-sign;
-    if(c>='0' && c<='9') break;
-  }
+template <>
+unsigned int pvfmm::pow(const unsigned int b, const unsigned int e){
+  unsigned int r=1;
+  for(unsigned int i=0;i<e;i++) r*=b;
+  return r;
+}
 
-  QuadReal_t val=0.0;
-  for(;str[i]!='\0';i++){
-    char c=str[i];
-    if(c>='0' && c<='9') val=val*10+(c-'0');
-    else break;
-  }
 
-  if(str[i]=='.'){
-    i++;
-    QuadReal_t exp=1.0;exp/=10;
-    for(;str[i]!='\0';i++){
-      char c=str[i];
-      if(c>='0' && c<='9') val=val+(c-'0')*exp;
-      else break;
-      exp/=10;
-    }
-  }
+#ifdef PVFMM_QUAD_T
 
-  return sign*val;
+namespace pvfmm{
+
+template<>
+inline QuadReal_t const_pi<QuadReal_t>(){
+  static QuadReal_t pi=atoquad("3.1415926535897932384626433832795028841");
+  return pi;
 }
 
+template<>
+inline QuadReal_t const_e<QuadReal_t>(){
+  static QuadReal_t e =atoquad("2.7182818284590452353602874713526624977");
+  return e;
+}
+
+template <>
 QuadReal_t fabs(const QuadReal_t f){
   if(f>=0.0) return f;
   else return -f;
 }
 
+template <>
 QuadReal_t sqrt(const QuadReal_t a){
-  QuadReal_t b=sqrt((double)a);
+  QuadReal_t b=::sqrt((double)a);
   b=(b+a/b)*0.5;
   b=(b+a/b)*0.5;
   return b;
 }
 
+template <>
 QuadReal_t sin(const QuadReal_t a){
   const int N=200;
   static std::vector<QuadReal_t> theta;
@@ -76,7 +75,7 @@ QuadReal_t sin(const QuadReal_t a){
       cosval[N-1]=1.0-sinval[N-1]*sinval[N-1]/2;
       for(int i=N-2;i>=0;i--){
         sinval[i]=2.0*sinval[i+1]*cosval[i+1];
-        cosval[i]=sqrt(1.0-sinval[i]*sinval[i]);
+        cosval[i]=pvfmm::sqrt<QuadReal_t>(1.0-sinval[i]*sinval[i]);
       }
     }
   }
@@ -96,6 +95,7 @@ QuadReal_t sin(const QuadReal_t a){
   return (a<0.0?-sval:sval);
 }
 
+template <>
 QuadReal_t cos(const QuadReal_t a){
   const int N=200;
   static std::vector<QuadReal_t> theta;
@@ -118,7 +118,7 @@ QuadReal_t cos(const QuadReal_t a){
       cosval[N-1]=1.0-sinval[N-1]*sinval[N-1]/2;
       for(int i=N-2;i>=0;i--){
         sinval[i]=2.0*sinval[i+1]*cosval[i+1];
-        cosval[i]=sqrt(1.0-sinval[i]*sinval[i]);
+        cosval[i]=pvfmm::sqrt<QuadReal_t>(1.0-sinval[i]*sinval[i]);
       }
     }
   }
@@ -138,6 +138,7 @@ QuadReal_t cos(const QuadReal_t a){
   return cval;
 }
 
+template <>
 QuadReal_t exp(const QuadReal_t a){
   const int N=200;
   static std::vector<QuadReal_t> theta0;
@@ -159,7 +160,7 @@ QuadReal_t exp(const QuadReal_t a){
       for(int i=1;i<N;i++){
         theta0[i]=theta0[i-1]*0.5;
         theta1[i]=theta1[i-1]*2.0;
-        expval0[i]=sqrt(expval0[i-1]);
+        expval0[i]=pvfmm::sqrt<QuadReal_t>(expval0[i-1]);
         expval1[i]=expval1[i-1]*expval1[i-1];
       }
     }
@@ -183,13 +184,52 @@ QuadReal_t exp(const QuadReal_t a){
   return (a<0.0?1.0/eval:eval);
 }
 
+template <>
 QuadReal_t log(const QuadReal_t a){
-  QuadReal_t y0=log((double)a);
-  y0=y0+(a/exp(y0)-1.0);
-  y0=y0+(a/exp(y0)-1.0);
+  QuadReal_t y0=::log((double)a);
+  y0=y0+(a/pvfmm::exp<QuadReal_t>(y0)-1.0);
+  y0=y0+(a/pvfmm::exp<QuadReal_t>(y0)-1.0);
   return y0;
 }
 
+template <>
+QuadReal_t pow(const QuadReal_t b, const QuadReal_t e){
+  if(b==0) return 1;
+  return pvfmm::exp<QuadReal_t>(pvfmm::log<QuadReal_t>(b)*e);
+}
+
+QuadReal_t atoquad(const char* str){
+  int i=0;
+  QuadReal_t sign=1.0;
+  for(;str[i]!='\0';i++){
+    char c=str[i];
+    if(c=='-') sign=-sign;
+    if(c>='0' && c<='9') break;
+  }
+
+  QuadReal_t val=0.0;
+  for(;str[i]!='\0';i++){
+    char c=str[i];
+    if(c>='0' && c<='9') val=val*10+(c-'0');
+    else break;
+  }
+
+  if(str[i]=='.'){
+    i++;
+    QuadReal_t exp=1.0;exp/=10;
+    for(;str[i]!='\0';i++){
+      char c=str[i];
+      if(c>='0' && c<='9') val=val+(c-'0')*exp;
+      else break;
+      exp/=10;
+    }
+  }
+
+  return sign*val;
+}
+
+}//end namespace
+
 std::ostream& operator<<(std::ostream& output, const QuadReal_t q_){
   //int width=output.width();
   output<<std::setw(1);
@@ -233,3 +273,4 @@ std::ostream& operator<<(std::ostream& output, const QuadReal_t q_){
   return output;
 }
 
+#endif //PVFMM_QUAD_T

+ 2 - 2
src/mortonid.cpp

@@ -14,8 +14,8 @@ namespace pvfmm{
 
 void MortonId::NbrList(std::vector<MortonId>& nbrs, uint8_t level, int periodic) const{
   nbrs.clear();
-  static int dim=3;
-  static int nbr_cnt=(int)(pow(3.0,dim)+0.5);
+  static unsigned int dim=3;
+  static unsigned int nbr_cnt=pvfmm::pow<unsigned int>(3,dim);
   static UINT_T maxCoord=(((UINT_T)1)<<(MAX_DEPTH));
 
   UINT_T mask=maxCoord-(((UINT_T)1)<<(MAX_DEPTH-level));