11 éve · 15eeec9134
--- a/examples/src/example1.cpp
+++ b/examples/src/example1.cpp
@@ -40,7 +40,7 @@ void nbody(vec& src_coord, vec& src_value,
 
				       size_t a=( i   *n_trg_glb)/omp_p;
			
 
				       size_t b=((i+1)*n_trg_glb)/omp_p;
			
 
				       kernel_fn.ker_poten(&    src_coord[0]            , n_src, &src_value[0], 1,
			
 
				-                          &glb_trg_coord[0]+a*COORD_DIM,   b-a, &glb_trg_value_[0]+a*kernel_fn.ker_dim[1]);
			
 
				+                          &glb_trg_coord[0]+a*COORD_DIM,   b-a, &glb_trg_value_[0]+a*kernel_fn.ker_dim[1],NULL);
			
 
				     }
			
 
				     MPI_Allreduce(&glb_trg_value_[0], &glb_trg_value[0], glb_trg_value.size(), MPI_DOUBLE, MPI_SUM, comm);
			
 
				   }
			
--- a/include/cheb_node.txx
+++ b/include/cheb_node.txx
@@ -52,7 +52,7 @@ void Cheb_Node<Real_t>::Initialize(TreeNode* parent_, int path2node_, TreeNode::
 
				       M_val=M_val.Transpose();
			
 
				 
			
 
				       cheb_coeff.Resize(((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6*data_dof); cheb_coeff.SetZero();
			
 
				-      cheb_approx<Real_t,double>(&input_val[0], cheb_deg, data_dof, &cheb_coeff[0]);
			
 
				+      cheb_approx<Real_t,Real_t>(&input_val[0], cheb_deg, data_dof, &cheb_coeff[0]);
			
 
				     }else if(this->cheb_value.Dim()>0){
			
 
				       size_t n_ptr=this->cheb_coord.Dim()/this->Dim();
			
 
				       assert(n_ptr*data_dof==this->cheb_value.Dim());
			
@@ -111,7 +111,7 @@ bool Cheb_Node<Real_t>::SubdivCond(){
 
				     read_val(x,y,z, cheb_deg+1, cheb_deg+1, cheb_deg+1, &val[0]);
			
 
				 
			
 
				     std::vector<Real_t> tmp_coeff(data_dof*((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6);
			
 
				-    cheb_approx<Real_t,double>(&val[0],cheb_deg,data_dof,&tmp_coeff[0]);
			
 
				+    cheb_approx<Real_t,Real_t>(&val[0],cheb_deg,data_dof,&tmp_coeff[0]);
			
 
				     return (cheb_err(&(tmp_coeff[0]),cheb_deg,data_dof)>tol);
			
 
				   }else{
			
 
				     assert(cheb_coeff.Dim()>0);
			
@@ -145,7 +145,7 @@ void Cheb_Node<Real_t>::Subdivide() {
 
				     cheb_eval(cheb_coeff, cheb_deg, x, y, z, val);
			
 
				     assert(val.Dim()==pow(cheb_deg+1,this->Dim())*data_dof);
			
 
				     child_cheb_coeff[i].Resize(data_dof*((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6);
			
 
				-    cheb_approx<Real_t,double>(&val[0],cheb_deg,data_dof,&(child_cheb_coeff[i][0]));
			
 
				+    cheb_approx<Real_t,Real_t>(&val[0],cheb_deg,data_dof,&(child_cheb_coeff[i][0]));
			
 
				 
			
 
				     Cheb_Node<Real_t>* child=static_cast<Cheb_Node<Real_t>*>(this->Child(i));
			
 
				     child->cheb_coeff=child_cheb_coeff[i];
			
@@ -174,7 +174,7 @@ void Cheb_Node<Real_t>::Truncate() {
 
				   }
			
 
				   read_val(x,y,z, cheb_deg+1, cheb_deg+1, cheb_deg+1, &val[0]);
			
 
				   cheb_coeff.Resize(data_dof*((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6);
			
 
				-  cheb_approx<Real_t,double>(&val[0],cheb_deg,data_dof,&cheb_coeff[0]);
			
 
				+  cheb_approx<Real_t,Real_t>(&val[0],cheb_deg,data_dof,&cheb_coeff[0]);
			
 
				   MPI_Node<Real_t>::Truncate();
			
 
				 }
			
 
				 
			
--- a/include/cheb_utils.txx
+++ b/include/cheb_utils.txx
@@ -94,12 +94,12 @@ T cheb_approx(T* fn_v, int cheb_deg, int dof, T* out){
 
				     if(precomp [d].Dim(0)==0 && precomp [d].Dim(1)==0){
			
 
				       std::vector<Y> x(d);
			
 
				       for(int i=0;i<d;i++)
			
 
				-        x[i]=-cos((i+0.5)*const_pi<T>()/d);
			
 
				+        x[i]=-cos((i+(T)0.5)*const_pi<T>()/d);
			
 
				 
			
 
				       std::vector<Y> p(d*d);
			
 
				       cheb_poly(d-1,&x[0],d,&p[0]);
			
 
				       for(int i=0;i<d*d;i++)
			
 
				-        p[i]=p[i]*(2.0/d);
			
 
				+        p[i]=p[i]*2.0/d;
			
 
				       Matrix<Y> Mp1(d,d,&p[0],false);
			
 
				       Matrix<Y> Mp1_=Mp1.Transpose();
			
 
				       precomp [d]=Mp1 ;
			
@@ -250,7 +250,7 @@ T gll2cheb(T* fn_v, int deg, int dof, T* out){//*
 
				 
			
 
				       std::vector<Y> x(d); //Cheb nodes.
			
 
				       for(int i=0;i<d;i++)
			
 
				-        x[i]=-cos((i+0.5)*const_pi<Y>()/d);
			
 
				+        x[i]=-cos((i+(T)0.5)*const_pi<Y>()/d);
			
 
				 
			
 
				       Vector<T> w(d);
			
 
				       Vector<T> x_legn(d); // GLL nodes.
			
@@ -271,7 +271,7 @@ T gll2cheb(T* fn_v, int deg, int dof, T* out){//*
 
				       std::vector<Y> p(d*d);
			
 
				       cheb_poly(d-1,&x[0],d,&p[0]);
			
 
				       for(int i=0;i<d*d;i++)
			
 
				-        p[i]=p[i]*(2.0/d);
			
 
				+        p[i]=p[i]*2.0/d;
			
 
				       Matrix<Y> Mp1(d,d,&p[0],false);
			
 
				       Mp1=Mp1*M_g2c;
			
 
				 
			
@@ -341,7 +341,7 @@ T cheb_approx(T (*fn)(T,T,T), int cheb_deg, T* coord, T s, std::vector<T>& out){
 
				   int d=cheb_deg+1;
			
 
				   std::vector<T> x(d);
			
 
				   for(int i=0;i<d;i++)
			
 
				-    x[i]=cos((i+0.5)*const_pi<T>()/d);
			
 
				+    x[i]=cos((i+(T)0.5)*const_pi<T>()/d);
			
 
				 
			
 
				   std::vector<T> p;
			
 
				   cheb_poly(d-1,&x[0],d,&p[0]);
			
@@ -1026,8 +1026,11 @@ std::vector<T> cheb_integ(int m, T* s_, T r_, Kernel<T>& kernel){
 
				   while(err>eps*n*n){
			
 
				     n=(int)round(n*1.3);
			
 
				     if(n>300){
			
 
				-      using ::operator<<;
			
 
				-      std::cout<<"Cheb_Integ::Failed to converge.["<<err<<","<<s[0]<<","<<s[1]<<","<<s[2]<<"]\n";
			
 
				+      std::cout<<"Cheb_Integ::Failed to converge.[";
			
 
				+      ::operator<<(std::cout,err); std::cout<<",";
			
 
				+      ::operator<<(std::cout,s[0]); std::cout<<",";
			
 
				+      ::operator<<(std::cout,s[1]); std::cout<<",";
			
 
				+      ::operator<<(std::cout,s[2]); std::cout<<"]\n";
			
 
				       break;
			
 
				     }
			
 
				     U_=integ<T>(m+1,s,r,n,kernel);
			
@@ -1060,7 +1063,7 @@ std::vector<T> cheb_nodes(int deg, int dim){
 
				   int d=deg+1;
			
 
				   std::vector<T> x(d);
			
 
				   for(int i=0;i<d;i++)
			
 
				-    x[i]=-cos((i+0.5)*const_pi<T>()/d)*0.5+0.5;
			
 
				+    x[i]=-cos((i+(T)0.5)*const_pi<T>()/d)*0.5+0.5;
			
 
				   if(dim==1) return x;
			
 
				 
			
 
				   int n1=(int)(pow((T)d,dim)+0.5);
			
--- a/include/fmm_cheb.txx
+++ b/include/fmm_cheb.txx
@@ -462,7 +462,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
 
				       for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
			
 
				         Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
			
 
				         M_trg=M_trg.Transpose();
			
 
				-        cheb_approx<Real_t,double>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
			
 
				+        cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
			
 
				       }
			
 
				       #pragma omp critical (PRECOMP_MATRIX_PTS)
			
 
				       {
			
@@ -516,7 +516,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
 
				       for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
			
 
				         Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
			
 
				         M_trg=M_trg.Transpose();
			
 
				-        cheb_approx<Real_t,double>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
			
 
				+        cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
			
 
				       }
			
 
				       break;
			
 
				     }
			
@@ -566,7 +566,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
 
				       for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
			
 
				         Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
			
 
				         M_trg=M_trg.Transpose();
			
 
				-        cheb_approx<Real_t,double>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
			
 
				+        cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
			
 
				       }
			
 
				       break;
			
 
				     }
			
@@ -616,7 +616,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
 
				       for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
			
 
				         Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
			
 
				         M_trg=M_trg.Transpose();
			
 
				-        cheb_approx<Real_t,double>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
			
 
				+        cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
			
 
				       }
			
 
				       break;
			
 
				     }
			
@@ -632,7 +632,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
 
				       for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
			
 
				         Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
			
 
				         M_trg=M_trg.Transpose();
			
 
				-        cheb_approx<Real_t,double>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
			
 
				+        cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
			
 
				       }
			
 
				       #pragma omp critical (PRECOMP_MATRIX_PTS)
			
 
				       {
			
--- a/include/fmm_pts.txx
+++ b/include/fmm_pts.txx
@@ -477,7 +477,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
 
				                              &uc_coord[0], n_uc, &(M_e2c[0][0]));
			
 
				 
			
 
				       Real_t eps=1.0;
			
 
				-      while(eps+(T)1.0>1.0) eps*=0.5;
			
 
				+      while(eps+(Real_t)1.0>1.0) eps*=0.5;
			
 
				       M=M_e2c.pinv(sqrt(eps)); //check 2 equivalent
			
 
				       break;
			
 
				     }
			
@@ -499,7 +499,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
 
				                              &check_surf[0], n_ch, &(M_e2c[0][0]));
			
 
				 
			
 
				       Real_t eps=1.0;
			
 
				-      while(eps+(T)1.0>1.0) eps*=0.5;
			
 
				+      while(eps+(Real_t)1.0>1.0) eps*=0.5;
			
 
				       M=M_e2c.pinv(sqrt(eps)); //check 2 equivalent
			
 
				       break;
			
 
				     }
			
--- a/include/mat_utils.txx
+++ b/include/mat_utils.txx
@@ -26,7 +26,7 @@ namespace mat{
 
				             for(size_t k=0;k<K;k++){
			
 
				               AxB+=A[m+lda*k]*B[k+ldb*n];
			
 
				             }
			
 
				-            C[m+ldc*n]=alpha*AxB+beta*C[m+ldc*n];
			
 
				+            C[m+ldc*n]=alpha*AxB+(beta==0?0:beta*C[m+ldc*n]);
			
 
				         }
			
 
				       }
			
 
				     }else if(TransA=='N' || TransA=='n'){
			
@@ -36,7 +36,7 @@ namespace mat{
 
				             for(size_t k=0;k<K;k++){
			
 
				               AxB+=A[m+lda*k]*B[n+ldb*k];
			
 
				             }
			
 
				-            C[m+ldc*n]=alpha*AxB+beta*C[m+ldc*n];
			
 
				+            C[m+ldc*n]=alpha*AxB+(beta==0?0:beta*C[m+ldc*n]);
			
 
				         }
			
 
				       }
			
 
				     }else if(TransB=='N' || TransB=='n'){
			
@@ -46,7 +46,7 @@ namespace mat{
 
				             for(size_t k=0;k<K;k++){
			
 
				               AxB+=A[k+lda*m]*B[k+ldb*n];
			
 
				             }
			
 
				-            C[m+ldc*n]=alpha*AxB+beta*C[m+ldc*n];
			
 
				+            C[m+ldc*n]=alpha*AxB+(beta==0?0:beta*C[m+ldc*n]);
			
 
				         }
			
 
				       }
			
 
				     }else{
			
@@ -56,7 +56,7 @@ namespace mat{
 
				             for(size_t k=0;k<K;k++){
			
 
				               AxB+=A[k+lda*m]*B[n+ldb*k];
			
 
				             }
			
 
				-            C[m+ldc*n]=alpha*AxB+beta*C[m+ldc*n];
			
 
				+            C[m+ldc*n]=alpha*AxB+(beta==0?0:beta*C[m+ldc*n]);
			
 
				         }
			
 
				       }
			
 
				     }
			
@@ -223,13 +223,16 @@ namespace mat{
 
				     while(k0<dim[1]-1){ // Diagonalization
			
 
				       iter++;
			
 
				 
			
 
				-      while(k0<dim[1]-1 && fabs(S(k0,k0+1))<=eps*(fabs(S(k0,k0))+fabs(S(k0+1,k0+1)))) k0++;
			
 
				-      //while(k0<dim[1]-1 && fabs(S(k0,k0+1))<eps) k0++;
			
 
				+      T S_max=0.0;
			
 
				+      for(size_t i=0;i<dim[1];i++) S_max=(S_max>S(i,i)?S_max:S(i,i));
			
 
				+
			
 
				+      //while(k0<dim[1]-1 && fabs(S(k0,k0+1))<=eps*(fabs(S(k0,k0))+fabs(S(k0+1,k0+1)))) k0++;
			
 
				+      while(k0<dim[1]-1 && fabs(S(k0,k0+1))<=eps*S_max) k0++;
			
 
				       size_t k=k0;
			
 
				 
			
 
				       size_t n=k0+1;
			
 
				-      while(n<dim[1] && fabs(S(n-1,n))>eps*(fabs(S(n-1,n-1))+fabs(S(n,n)))) n++;
			
 
				-      //while(n<dim[1] && fabs(S(n-1,n))>eps) n++;
			
 
				+      //while(n<dim[1] && fabs(S(n-1,n))>eps*(fabs(S(n-1,n-1))+fabs(S(n,n)))) n++;
			
 
				+      while(n<dim[1] && fabs(S(n-1,n))>eps*S_max) n++;
			
 
				 
			
 
				       T mu=0;
			
 
				       { // Compute mu
			
--- a/include/matrix.txx
+++ b/include/matrix.txx
@@ -15,13 +15,14 @@ namespace pvfmm{
 
				 
			
 
				 template <class T>
			
 
				 std::ostream& operator<<(std::ostream& output, const Matrix<T>& M){
			
 
				-  using ::operator<<;
			
 
				   output<<std::fixed<<std::setprecision(4)<<std::setiosflags(std::ios::left);
			
 
				   for(size_t i=0;i<M.Dim(0);i++){
			
 
				     for(size_t j=0;j<M.Dim(1);j++){
			
 
				       float f=((float)M(i,j));
			
 
				       if(fabs(f)<1e-25) f=0;
			
 
				-      output<<std::setw(10)<<f<<' ';
			
 
				+      output<<std::setw(10);
			
 
				+      ::operator<<(output,f);
			
 
				+      output<<' ';
			
 
				     }
			
 
				     output<<";\n";
			
 
				   }
			
--- a/include/pvfmm_common.hpp
+++ b/include/pvfmm_common.hpp
@@ -53,6 +53,9 @@
 
				 template <class T>
			
 
				 inline T const_pi(){return 3.1415926535897932384626433832795028841;}
			
 
				 
			
 
				+template <class T>
			
 
				+inline T const_e(){return 2.71828182845904523536028747135266249775724709369995;}
			
 
				+
			
 
				 #include <quad_utils.hpp>
			
 
				 
			
 
				 #endif //_PVFMM_COMMON_HPP_
			
--- a/include/quad_utils.hpp
+++ b/include/quad_utils.hpp
@@ -30,11 +30,20 @@ inline QuadReal_t sin(const QuadReal_t& a);
 
				 
			
 
				 inline QuadReal_t cos(const QuadReal_t& a);
			
 
				 
			
 
				+inline QuadReal_t exp(const QuadReal_t& a);
			
 
				+
			
 
				 inline std::ostream& operator<<(std::ostream& output, const QuadReal_t& q_);
			
 
				 
			
 
				 template<>
			
 
				 inline QuadReal_t const_pi<QuadReal_t>(){
			
 
				-  return atoquad("3.1415926535897932384626433832795028841");
			
 
				+  static QuadReal_t pi=atoquad("3.1415926535897932384626433832795028841");
			
 
				+  return pi;
			
 
				+}
			
 
				+
			
 
				+template<>
			
 
				+inline QuadReal_t const_e<QuadReal_t>(){
			
 
				+  static QuadReal_t e=atoquad("2.71828182845904523536028747135266249775724709369995");
			
 
				+  return e;
			
 
				 }
			
 
				 
			
 
				 #include <quad_utils.txx>
			
--- a/include/quad_utils.txx
+++ b/include/quad_utils.txx
@@ -99,7 +99,7 @@ QuadReal_t cos(const QuadReal_t& a){
 
				   static std::vector<QuadReal_t> sinval;
			
 
				   static std::vector<QuadReal_t> cosval;
			
 
				   if(theta.size()==0){
			
 
				-    #pragma omp critical (QUAD_SIN)
			
 
				+    #pragma omp critical (QUAD_COS)
			
 
				     if(theta.size()==0){
			
 
				       theta.resize(N);
			
 
				       sinval.resize(N);
			
@@ -135,6 +135,52 @@ QuadReal_t cos(const QuadReal_t& a){
 
				   return cval;
			
 
				 }
			
 
				 
			
 
				+QuadReal_t exp(const QuadReal_t& a){
			
 
				+  const size_t N=200;
			
 
				+  static std::vector<QuadReal_t> theta0;
			
 
				+  static std::vector<QuadReal_t> theta1;
			
 
				+  static std::vector<QuadReal_t> expval0;
			
 
				+  static std::vector<QuadReal_t> expval1;
			
 
				+  if(theta0.size()==0){
			
 
				+    #pragma omp critical (QUAD_EXP)
			
 
				+    if(theta0.size()==0){
			
 
				+      theta0.resize(N);
			
 
				+      theta1.resize(N);
			
 
				+      expval0.resize(N);
			
 
				+      expval1.resize(N);
			
 
				+
			
 
				+      theta0[0]=1.0;
			
 
				+      theta1[0]=1.0;
			
 
				+      expval0[0]=const_e<QuadReal_t>();
			
 
				+      expval1[0]=const_e<QuadReal_t>();
			
 
				+      for(int i=1;i<N;i++){
			
 
				+        theta0[i]=theta0[i-1]*0.5;
			
 
				+        theta1[i]=theta1[i-1]*2.0;
			
 
				+        expval0[i]=sqrt(expval0[i-1]);
			
 
				+        expval1[i]=expval1[i-1]*expval1[i-1];
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  QuadReal_t t=(a<0.0?-a:a);
			
 
				+  QuadReal_t eval=1.0;
			
 
				+  for(int i=N-1;i>0;i--){
			
 
				+    while(theta1[i]<=t){
			
 
				+      eval=eval*expval1[i];
			
 
				+      t=t-theta1[i];
			
 
				+    }
			
 
				+  }
			
 
				+  for(int i=0;i<N;i++){
			
 
				+    while(theta0[i]<=t){
			
 
				+      eval=eval*expval0[i];
			
 
				+      t=t-theta0[i];
			
 
				+    }
			
 
				+  }
			
 
				+  eval=eval*(1.0+t);
			
 
				+  return (a<0.0?1.0/eval:eval);
			
 
				+  return eval;
			
 
				+}
			
 
				+
			
 
				 std::ostream& operator<<(std::ostream& output, const QuadReal_t& q_){
			
 
				   int width=output.width();
			
 
				   output<<std::setw(1);