Browse Source

fix errors.

Dhairya Malhotra 10 năm trước cách đây
mục cha
commit
15eeec9134

+ 1 - 1
examples/src/example1.cpp

@@ -40,7 +40,7 @@ void nbody(vec& src_coord, vec& src_value,
       size_t a=( i   *n_trg_glb)/omp_p;
       size_t b=((i+1)*n_trg_glb)/omp_p;
       kernel_fn.ker_poten(&    src_coord[0]            , n_src, &src_value[0], 1,
-                          &glb_trg_coord[0]+a*COORD_DIM,   b-a, &glb_trg_value_[0]+a*kernel_fn.ker_dim[1]);
+                          &glb_trg_coord[0]+a*COORD_DIM,   b-a, &glb_trg_value_[0]+a*kernel_fn.ker_dim[1],NULL);
     }
     MPI_Allreduce(&glb_trg_value_[0], &glb_trg_value[0], glb_trg_value.size(), MPI_DOUBLE, MPI_SUM, comm);
   }

+ 4 - 4
include/cheb_node.txx

@@ -52,7 +52,7 @@ void Cheb_Node<Real_t>::Initialize(TreeNode* parent_, int path2node_, TreeNode::
       M_val=M_val.Transpose();
 
       cheb_coeff.Resize(((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6*data_dof); cheb_coeff.SetZero();
-      cheb_approx<Real_t,double>(&input_val[0], cheb_deg, data_dof, &cheb_coeff[0]);
+      cheb_approx<Real_t,Real_t>(&input_val[0], cheb_deg, data_dof, &cheb_coeff[0]);
     }else if(this->cheb_value.Dim()>0){
       size_t n_ptr=this->cheb_coord.Dim()/this->Dim();
       assert(n_ptr*data_dof==this->cheb_value.Dim());
@@ -111,7 +111,7 @@ bool Cheb_Node<Real_t>::SubdivCond(){
     read_val(x,y,z, cheb_deg+1, cheb_deg+1, cheb_deg+1, &val[0]);
 
     std::vector<Real_t> tmp_coeff(data_dof*((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6);
-    cheb_approx<Real_t,double>(&val[0],cheb_deg,data_dof,&tmp_coeff[0]);
+    cheb_approx<Real_t,Real_t>(&val[0],cheb_deg,data_dof,&tmp_coeff[0]);
     return (cheb_err(&(tmp_coeff[0]),cheb_deg,data_dof)>tol);
   }else{
     assert(cheb_coeff.Dim()>0);
@@ -145,7 +145,7 @@ void Cheb_Node<Real_t>::Subdivide() {
     cheb_eval(cheb_coeff, cheb_deg, x, y, z, val);
     assert(val.Dim()==pow(cheb_deg+1,this->Dim())*data_dof);
     child_cheb_coeff[i].Resize(data_dof*((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6);
-    cheb_approx<Real_t,double>(&val[0],cheb_deg,data_dof,&(child_cheb_coeff[i][0]));
+    cheb_approx<Real_t,Real_t>(&val[0],cheb_deg,data_dof,&(child_cheb_coeff[i][0]));
 
     Cheb_Node<Real_t>* child=static_cast<Cheb_Node<Real_t>*>(this->Child(i));
     child->cheb_coeff=child_cheb_coeff[i];
@@ -174,7 +174,7 @@ void Cheb_Node<Real_t>::Truncate() {
   }
   read_val(x,y,z, cheb_deg+1, cheb_deg+1, cheb_deg+1, &val[0]);
   cheb_coeff.Resize(data_dof*((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6);
-  cheb_approx<Real_t,double>(&val[0],cheb_deg,data_dof,&cheb_coeff[0]);
+  cheb_approx<Real_t,Real_t>(&val[0],cheb_deg,data_dof,&cheb_coeff[0]);
   MPI_Node<Real_t>::Truncate();
 }
 

+ 11 - 8
include/cheb_utils.txx

@@ -94,12 +94,12 @@ T cheb_approx(T* fn_v, int cheb_deg, int dof, T* out){
     if(precomp [d].Dim(0)==0 && precomp [d].Dim(1)==0){
       std::vector<Y> x(d);
       for(int i=0;i<d;i++)
-        x[i]=-cos((i+0.5)*const_pi<T>()/d);
+        x[i]=-cos((i+(T)0.5)*const_pi<T>()/d);
 
       std::vector<Y> p(d*d);
       cheb_poly(d-1,&x[0],d,&p[0]);
       for(int i=0;i<d*d;i++)
-        p[i]=p[i]*(2.0/d);
+        p[i]=p[i]*2.0/d;
       Matrix<Y> Mp1(d,d,&p[0],false);
       Matrix<Y> Mp1_=Mp1.Transpose();
       precomp [d]=Mp1 ;
@@ -250,7 +250,7 @@ T gll2cheb(T* fn_v, int deg, int dof, T* out){//*
 
       std::vector<Y> x(d); //Cheb nodes.
       for(int i=0;i<d;i++)
-        x[i]=-cos((i+0.5)*const_pi<Y>()/d);
+        x[i]=-cos((i+(T)0.5)*const_pi<Y>()/d);
 
       Vector<T> w(d);
       Vector<T> x_legn(d); // GLL nodes.
@@ -271,7 +271,7 @@ T gll2cheb(T* fn_v, int deg, int dof, T* out){//*
       std::vector<Y> p(d*d);
       cheb_poly(d-1,&x[0],d,&p[0]);
       for(int i=0;i<d*d;i++)
-        p[i]=p[i]*(2.0/d);
+        p[i]=p[i]*2.0/d;
       Matrix<Y> Mp1(d,d,&p[0],false);
       Mp1=Mp1*M_g2c;
 
@@ -341,7 +341,7 @@ T cheb_approx(T (*fn)(T,T,T), int cheb_deg, T* coord, T s, std::vector<T>& out){
   int d=cheb_deg+1;
   std::vector<T> x(d);
   for(int i=0;i<d;i++)
-    x[i]=cos((i+0.5)*const_pi<T>()/d);
+    x[i]=cos((i+(T)0.5)*const_pi<T>()/d);
 
   std::vector<T> p;
   cheb_poly(d-1,&x[0],d,&p[0]);
@@ -1026,8 +1026,11 @@ std::vector<T> cheb_integ(int m, T* s_, T r_, Kernel<T>& kernel){
   while(err>eps*n*n){
     n=(int)round(n*1.3);
     if(n>300){
-      using ::operator<<;
-      std::cout<<"Cheb_Integ::Failed to converge.["<<err<<","<<s[0]<<","<<s[1]<<","<<s[2]<<"]\n";
+      std::cout<<"Cheb_Integ::Failed to converge.[";
+      ::operator<<(std::cout,err); std::cout<<",";
+      ::operator<<(std::cout,s[0]); std::cout<<",";
+      ::operator<<(std::cout,s[1]); std::cout<<",";
+      ::operator<<(std::cout,s[2]); std::cout<<"]\n";
       break;
     }
     U_=integ<T>(m+1,s,r,n,kernel);
@@ -1060,7 +1063,7 @@ std::vector<T> cheb_nodes(int deg, int dim){
   int d=deg+1;
   std::vector<T> x(d);
   for(int i=0;i<d;i++)
-    x[i]=-cos((i+0.5)*const_pi<T>()/d)*0.5+0.5;
+    x[i]=-cos((i+(T)0.5)*const_pi<T>()/d)*0.5+0.5;
   if(dim==1) return x;
 
   int n1=(int)(pow((T)d,dim)+0.5);

+ 5 - 5
include/fmm_cheb.txx

@@ -462,7 +462,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
       for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
         Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
         M_trg=M_trg.Transpose();
-        cheb_approx<Real_t,double>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
+        cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
       }
       #pragma omp critical (PRECOMP_MATRIX_PTS)
       {
@@ -516,7 +516,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
       for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
         Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
         M_trg=M_trg.Transpose();
-        cheb_approx<Real_t,double>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
+        cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
       }
       break;
     }
@@ -566,7 +566,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
       for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
         Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
         M_trg=M_trg.Transpose();
-        cheb_approx<Real_t,double>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
+        cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
       }
       break;
     }
@@ -616,7 +616,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
       for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
         Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
         M_trg=M_trg.Transpose();
-        cheb_approx<Real_t,double>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
+        cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
       }
       break;
     }
@@ -632,7 +632,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type
       for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
         Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
         M_trg=M_trg.Transpose();
-        cheb_approx<Real_t,double>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
+        cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
       }
       #pragma omp critical (PRECOMP_MATRIX_PTS)
       {

+ 2 - 2
include/fmm_pts.txx

@@ -477,7 +477,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
                              &uc_coord[0], n_uc, &(M_e2c[0][0]));
 
       Real_t eps=1.0;
-      while(eps+(T)1.0>1.0) eps*=0.5;
+      while(eps+(Real_t)1.0>1.0) eps*=0.5;
       M=M_e2c.pinv(sqrt(eps)); //check 2 equivalent
       break;
     }
@@ -499,7 +499,7 @@ Matrix<typename FMMNode::Real_t>& FMM_Pts<FMMNode>::Precomp(int level, Mat_Type
                              &check_surf[0], n_ch, &(M_e2c[0][0]));
 
       Real_t eps=1.0;
-      while(eps+(T)1.0>1.0) eps*=0.5;
+      while(eps+(Real_t)1.0>1.0) eps*=0.5;
       M=M_e2c.pinv(sqrt(eps)); //check 2 equivalent
       break;
     }

+ 11 - 8
include/mat_utils.txx

@@ -26,7 +26,7 @@ namespace mat{
             for(size_t k=0;k<K;k++){
               AxB+=A[m+lda*k]*B[k+ldb*n];
             }
-            C[m+ldc*n]=alpha*AxB+beta*C[m+ldc*n];
+            C[m+ldc*n]=alpha*AxB+(beta==0?0:beta*C[m+ldc*n]);
         }
       }
     }else if(TransA=='N' || TransA=='n'){
@@ -36,7 +36,7 @@ namespace mat{
             for(size_t k=0;k<K;k++){
               AxB+=A[m+lda*k]*B[n+ldb*k];
             }
-            C[m+ldc*n]=alpha*AxB+beta*C[m+ldc*n];
+            C[m+ldc*n]=alpha*AxB+(beta==0?0:beta*C[m+ldc*n]);
         }
       }
     }else if(TransB=='N' || TransB=='n'){
@@ -46,7 +46,7 @@ namespace mat{
             for(size_t k=0;k<K;k++){
               AxB+=A[k+lda*m]*B[k+ldb*n];
             }
-            C[m+ldc*n]=alpha*AxB+beta*C[m+ldc*n];
+            C[m+ldc*n]=alpha*AxB+(beta==0?0:beta*C[m+ldc*n]);
         }
       }
     }else{
@@ -56,7 +56,7 @@ namespace mat{
             for(size_t k=0;k<K;k++){
               AxB+=A[k+lda*m]*B[n+ldb*k];
             }
-            C[m+ldc*n]=alpha*AxB+beta*C[m+ldc*n];
+            C[m+ldc*n]=alpha*AxB+(beta==0?0:beta*C[m+ldc*n]);
         }
       }
     }
@@ -223,13 +223,16 @@ namespace mat{
     while(k0<dim[1]-1){ // Diagonalization
       iter++;
 
-      while(k0<dim[1]-1 && fabs(S(k0,k0+1))<=eps*(fabs(S(k0,k0))+fabs(S(k0+1,k0+1)))) k0++;
-      //while(k0<dim[1]-1 && fabs(S(k0,k0+1))<eps) k0++;
+      T S_max=0.0;
+      for(size_t i=0;i<dim[1];i++) S_max=(S_max>S(i,i)?S_max:S(i,i));
+
+      //while(k0<dim[1]-1 && fabs(S(k0,k0+1))<=eps*(fabs(S(k0,k0))+fabs(S(k0+1,k0+1)))) k0++;
+      while(k0<dim[1]-1 && fabs(S(k0,k0+1))<=eps*S_max) k0++;
       size_t k=k0;
 
       size_t n=k0+1;
-      while(n<dim[1] && fabs(S(n-1,n))>eps*(fabs(S(n-1,n-1))+fabs(S(n,n)))) n++;
-      //while(n<dim[1] && fabs(S(n-1,n))>eps) n++;
+      //while(n<dim[1] && fabs(S(n-1,n))>eps*(fabs(S(n-1,n-1))+fabs(S(n,n)))) n++;
+      while(n<dim[1] && fabs(S(n-1,n))>eps*S_max) n++;
 
       T mu=0;
       { // Compute mu

+ 3 - 2
include/matrix.txx

@@ -15,13 +15,14 @@ namespace pvfmm{
 
 template <class T>
 std::ostream& operator<<(std::ostream& output, const Matrix<T>& M){
-  using ::operator<<;
   output<<std::fixed<<std::setprecision(4)<<std::setiosflags(std::ios::left);
   for(size_t i=0;i<M.Dim(0);i++){
     for(size_t j=0;j<M.Dim(1);j++){
       float f=((float)M(i,j));
       if(fabs(f)<1e-25) f=0;
-      output<<std::setw(10)<<f<<' ';
+      output<<std::setw(10);
+      ::operator<<(output,f);
+      output<<' ';
     }
     output<<";\n";
   }

+ 3 - 0
include/pvfmm_common.hpp

@@ -53,6 +53,9 @@
 template <class T>
 inline T const_pi(){return 3.1415926535897932384626433832795028841;}
 
+template <class T>
+inline T const_e(){return 2.71828182845904523536028747135266249775724709369995;}
+
 #include <quad_utils.hpp>
 
 #endif //_PVFMM_COMMON_HPP_

+ 10 - 1
include/quad_utils.hpp

@@ -30,11 +30,20 @@ inline QuadReal_t sin(const QuadReal_t& a);
 
 inline QuadReal_t cos(const QuadReal_t& a);
 
+inline QuadReal_t exp(const QuadReal_t& a);
+
 inline std::ostream& operator<<(std::ostream& output, const QuadReal_t& q_);
 
 template<>
 inline QuadReal_t const_pi<QuadReal_t>(){
-  return atoquad("3.1415926535897932384626433832795028841");
+  static QuadReal_t pi=atoquad("3.1415926535897932384626433832795028841");
+  return pi;
+}
+
+template<>
+inline QuadReal_t const_e<QuadReal_t>(){
+  static QuadReal_t e=atoquad("2.71828182845904523536028747135266249775724709369995");
+  return e;
 }
 
 #include <quad_utils.txx>

+ 47 - 1
include/quad_utils.txx

@@ -99,7 +99,7 @@ QuadReal_t cos(const QuadReal_t& a){
   static std::vector<QuadReal_t> sinval;
   static std::vector<QuadReal_t> cosval;
   if(theta.size()==0){
-    #pragma omp critical (QUAD_SIN)
+    #pragma omp critical (QUAD_COS)
     if(theta.size()==0){
       theta.resize(N);
       sinval.resize(N);
@@ -135,6 +135,52 @@ QuadReal_t cos(const QuadReal_t& a){
   return cval;
 }
 
+QuadReal_t exp(const QuadReal_t& a){
+  const size_t N=200;
+  static std::vector<QuadReal_t> theta0;
+  static std::vector<QuadReal_t> theta1;
+  static std::vector<QuadReal_t> expval0;
+  static std::vector<QuadReal_t> expval1;
+  if(theta0.size()==0){
+    #pragma omp critical (QUAD_EXP)
+    if(theta0.size()==0){
+      theta0.resize(N);
+      theta1.resize(N);
+      expval0.resize(N);
+      expval1.resize(N);
+
+      theta0[0]=1.0;
+      theta1[0]=1.0;
+      expval0[0]=const_e<QuadReal_t>();
+      expval1[0]=const_e<QuadReal_t>();
+      for(int i=1;i<N;i++){
+        theta0[i]=theta0[i-1]*0.5;
+        theta1[i]=theta1[i-1]*2.0;
+        expval0[i]=sqrt(expval0[i-1]);
+        expval1[i]=expval1[i-1]*expval1[i-1];
+      }
+    }
+  }
+
+  QuadReal_t t=(a<0.0?-a:a);
+  QuadReal_t eval=1.0;
+  for(int i=N-1;i>0;i--){
+    while(theta1[i]<=t){
+      eval=eval*expval1[i];
+      t=t-theta1[i];
+    }
+  }
+  for(int i=0;i<N;i++){
+    while(theta0[i]<=t){
+      eval=eval*expval0[i];
+      t=t-theta0[i];
+    }
+  }
+  eval=eval*(1.0+t);
+  return (a<0.0?1.0/eval:eval);
+  return eval;
+}
+
 std::ostream& operator<<(std::ostream& output, const QuadReal_t& q_){
   int width=output.width();
   output<<std::setw(1);