/** * \file fmm_cheb.txx * \author Dhairya Malhotra, dhairya.malhotra@gmail.com * \date 3-07-2011 * \brief This file contains the implementation of the FMM_Cheb class. */ #include #include #include #include #include #ifdef PVFMM_HAVE_SYS_STAT_H #include #endif #include #include #include #include #include namespace pvfmm{ template FMM_Cheb::~FMM_Cheb() { if(this->mat!=NULL){ int rank; MPI_Comm_rank(this->comm,&rank); if(!rank){ FILE* f=fopen(this->mat_fname.c_str(),"r"); if(f==NULL) { //File does not exists. { // Delete easy to compute matrices. Mat_Type type=W_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } type=V_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } type=V1_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } type=U2U_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } type=D2D_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } type=D2T_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } } this->mat->Save2File(this->mat_fname.c_str()); }else fclose(f); } } } template void FMM_Cheb::Initialize(int mult_order, int cheb_deg_, const MPI_Comm& comm_, const Kernel* kernel_){ Profile::Tic("InitFMM_Cheb",&comm_,true);{ int rank; MPI_Comm_rank(comm_,&rank); int dim=3; //Only supporting 3D cheb_deg=cheb_deg_; if(this->mat_fname.size()==0){ std::stringstream st; st<ker_name.c_str()<<"_q"<mat_fname=st.str(); } if(!rank){ FILE* f=fopen(this->mat_fname.c_str(),"r"); if(f==NULL) { //File does not exists. std::cout<<"Could not find precomputed data file for "<ker_name.c_str()<<" kernel with q="<mat_fname<<'\n'; std::cout<<"This may take a while...\n"; }else fclose(f); } //this->mat->LoadFile(this->mat_fname.c_str(), this->comm); FMM_Pts::Initialize(mult_order, comm_, kernel_); this->mat->RelativeTrgCoord()=cheb_nodes(ChebDeg(),dim); Profile::Tic("PrecompD2T",&this->comm,false,4); this->PrecompAll(D2T_Type); Profile::Toc(); //Volume solver. Profile::Tic("PrecompS2M",&this->comm,false,4); this->PrecompAll(S2U_Type); Profile::Toc(); Profile::Tic("PrecompX",&this->comm,false,4); this->PrecompAll(X_Type); Profile::Toc(); Profile::Tic("PrecompW",&this->comm,false,4); this->PrecompAll(W_Type); Profile::Toc(); Profile::Tic("PrecompU0",&this->comm,false,4); this->PrecompAll(U0_Type); Profile::Toc(); Profile::Tic("PrecompU1",&this->comm,false,4); this->PrecompAll(U1_Type); Profile::Toc(); Profile::Tic("PrecompU2",&this->comm,false,4); this->PrecompAll(U2_Type); Profile::Toc(); Profile::Tic("Save2File",&this->comm,false,4); if(!rank){ FILE* f=fopen(this->mat_fname.c_str(),"r"); if(f==NULL) { //File does not exists. { // Delete easy to compute matrices. Mat_Type type=W_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } type=V_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } type=V1_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } type=U2U_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } type=D2D_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } type=D2T_Type; for(int l=-BC_LEVELS;linterac_list.ListCount(type);indx++){ Matrix& M=this->mat->Mat(l, type, indx); M.Resize(0,0); } } this->mat->Save2File(this->mat_fname.c_str()); }else fclose(f); } Profile::Toc(); Profile::Tic("Recompute",&this->comm,false,4); { // Recompute matrices. this->PrecompAll(W_Type); this->PrecompAll(V_Type); this->PrecompAll(V1_Type); this->PrecompAll(U2U_Type); this->PrecompAll(D2D_Type); this->PrecompAll(D2T_Type); } Profile::Toc(); }Profile::Toc(); } template Permutation cheb_perm(size_t q, size_t p_indx, const Permutation& ker_perm, const Vector* scal_exp=NULL){ int dim=3; //Only supporting 3D int dof=ker_perm.Dim(); int coeff_cnt=((q+1)*(q+2)*(q+3))/6; int n3=(int)pow((Real_t)(q+1),dim); Permutation P0(n3*dof); if(scal_exp && p_indx==Scaling){ // Set level-by-level scaling assert(dof==scal_exp->Dim()); Vector scal(scal_exp->Dim()); for(size_t i=0;i coeff_map(n3*dof,0); { int indx=0; for(int j=0;j P=Permutation(coeff_cnt*dof); { int indx=0; for(int j=0;j Permutation& FMM_Cheb::PrecompPerm(Mat_Type type, Perm_Type perm_indx){ int dim=3; //Only supporting 3D Real_t eps=1e-10; //Check if the matrix already exists. Permutation& P_ = FMM_Pts::PrecompPerm(type, perm_indx); if(P_.Dim()!=0) return P_; size_t q=cheb_deg; size_t m=this->MultipoleOrder(); size_t p_indx=perm_indx % C_Perm; //Compute the matrix. Permutation P; switch (type){ case UC2UE_Type: { break; } case DC2DE_Type: { break; } case S2U_Type: { Vector scal_exp; Permutation ker_perm; if(perm_indxkernel->k_s2m->perm_vec[0 +p_indx]; scal_exp=this->kernel->k_s2m->src_scal; for(size_t i=0;iHomogen()?&scal_exp:NULL)); }else{ ker_perm=this->kernel->k_m2m->perm_vec[0 +p_indx]; scal_exp=this->kernel->k_m2m->src_scal; for(size_t i=0;i ker_perm0=this->kernel->k_s2m->perm_vec[C_Perm+p_indx]; Permutation ker_perm1=this->kernel->k_m2m->perm_vec[C_Perm+p_indx]; assert(ker_perm0.Dim()>0 && ker_perm0.Dim()==ker_perm1.Dim()); if(fabs(ker_perm0.scal[0]-ker_perm1.scal[0])>eps){ for(size_t i=0;i& scal_exp0=this->kernel->k_s2m->trg_scal; const Vector& scal_exp1=this->kernel->k_m2m->trg_scal; assert(scal_exp0.Dim()>0 && scal_exp0.Dim()==scal_exp1.Dim()); Real_t s=(scal_exp0[0]-scal_exp1[0]); for(size_t i=1;iHomogen()?&scal_exp:NULL)); } break; } case U2U_Type: { break; } case D2D_Type: { break; } case D2T_Type: { Vector scal_exp; Permutation ker_perm; if(perm_indxkernel->k_l2t->perm_vec[0 +p_indx]; scal_exp=this->kernel->k_l2t->src_scal; P=equiv_surf_perm(m, p_indx, ker_perm, (this->Homogen()?&scal_exp:NULL)); }else{ // Target permutation ker_perm=this->kernel->k_l2t->perm_vec[C_Perm+p_indx]; scal_exp=this->kernel->k_l2t->trg_scal; P=cheb_perm(q, p_indx, ker_perm, (this->Homogen()?&scal_exp:NULL)); } break; } case U0_Type: { Vector scal_exp; Permutation ker_perm; if(perm_indxkernel->k_s2t->perm_vec[0 +p_indx]; scal_exp=this->kernel->k_s2t->src_scal; for(size_t i=0;ikernel->k_s2t->perm_vec[C_Perm+p_indx]; scal_exp=this->kernel->k_s2t->trg_scal; } P=cheb_perm(q, p_indx, ker_perm, (this->Homogen()?&scal_exp:NULL)); break; } case U1_Type: { P=PrecompPerm(U0_Type, perm_indx); break; } case U2_Type: { P=PrecompPerm(U0_Type, perm_indx); break; } case V_Type: { break; } case V1_Type: { break; } case W_Type: { Vector scal_exp; Permutation ker_perm; if(perm_indxkernel->k_m2t->perm_vec[0 +p_indx]; scal_exp=this->kernel->k_m2t->src_scal; P=equiv_surf_perm(m, p_indx, ker_perm, (this->Homogen()?&scal_exp:NULL)); }else{ // Target permutation ker_perm=this->kernel->k_m2t->perm_vec[C_Perm+p_indx]; scal_exp=this->kernel->k_m2t->trg_scal; P=cheb_perm(q, p_indx, ker_perm, (this->Homogen()?&scal_exp:NULL)); } break; } case X_Type: { Vector scal_exp; Permutation ker_perm; if(perm_indxkernel->k_s2l->perm_vec[0 +p_indx]; scal_exp=this->kernel->k_s2l->src_scal; for(size_t i=0;iHomogen()?&scal_exp:NULL)); }else{ ker_perm=this->kernel->k_l2l->perm_vec[0 +p_indx]; scal_exp=this->kernel->k_l2l->src_scal; for(size_t i=0;i ker_perm0=this->kernel->k_s2l->perm_vec[C_Perm+p_indx]; Permutation ker_perm1=this->kernel->k_l2l->perm_vec[C_Perm+p_indx]; assert(ker_perm0.Dim()>0 && ker_perm0.Dim()==ker_perm1.Dim()); if(fabs(ker_perm0.scal[0]-ker_perm1.scal[0])>eps){ for(size_t i=0;i& scal_exp0=this->kernel->k_s2l->trg_scal; const Vector& scal_exp1=this->kernel->k_l2l->trg_scal; assert(scal_exp0.Dim()>0 && scal_exp0.Dim()==scal_exp1.Dim()); Real_t s=(scal_exp0[0]-scal_exp1[0]); for(size_t i=1;iHomogen()?&scal_exp:NULL)); } break; } default: return FMM_Pts::PrecompPerm(type, perm_indx); break; } //Save the matrix for future use. #pragma omp critical (PRECOMP_MATRIX_PTS) if(P_.Dim()==0){ P_=P;} return P_; } template Matrix& FMM_Cheb::Precomp(int level, Mat_Type type, size_t mat_indx){ if(this->Homogen()) level=0; //Check if the matrix already exists. //Compute matrix from symmetry class (if possible). Matrix& M_ = this->mat->Mat(level, type, mat_indx); if(M_.Dim(0)!=0 && M_.Dim(1)!=0) return M_; else{ //Compute matrix from symmetry class (if possible). size_t class_indx = this->interac_list.InteracClass(type, mat_indx); if(class_indx!=mat_indx){ Matrix& M0 = this->Precomp(level, type, class_indx); Permutation& Pr = this->interac_list.Perm_R(level, type, mat_indx); Permutation& Pc = this->interac_list.Perm_C(level, type, mat_indx); if(Pr.Dim()>0 && Pc.Dim()>0 && M0.Dim(0)>0 && M0.Dim(1)>0) return M_; } } int myrank, np; MPI_Comm_rank(this->comm, &myrank); MPI_Comm_size(this->comm,&np); size_t progress=0, class_count=0; { // Determine precomputation progress. size_t mat_cnt=this->interac_list.ListCount((Mat_Type)type); for(size_t i=0; iinterac_list.InteracClass((Mat_Type)type,i); if(indx==i){ class_count++; if(i M; int n_src=((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6; switch (type){ case S2U_Type: { if(this->MultipoleOrder()==0) break; Real_t r=pow(0.5,level); Real_t c[3]={0,0,0}; // Coord of upward check surface std::vector uc_coord=u_check_surf(this->MultipoleOrder(),c,level); size_t n_uc=uc_coord.size()/3; // Evaluate potential at check surface. Matrix M_s2c(n_src*this->kernel->k_s2m->ker_dim[0],n_uc*this->kernel->k_s2m->ker_dim[1]); //source 2 check Matrix M_s2c_local(n_src*this->kernel->k_s2m->ker_dim[0],n_uc*this->kernel->k_s2m->ker_dim[1]); { M_s2c.SetZero(); M_s2c_local.SetZero(); size_t cnt_done=0; #pragma omp parallel for schedule(dynamic) for(size_t i=myrank;i M_=cheb_integ(cheb_deg, &uc_coord[i*3], r, *this->kernel->k_s2m); #ifdef __VERBOSE__ #pragma omp critical if(!myrank){ cnt_done++; std::cout<<"\r Progress: "<<(100*progress*n_uc+100*cnt_done*np)/(class_count*n_uc)<<"% "<kernel->k_s2m->ker_dim[1]; k++) for(size_t j=0; j<(size_t)M_s2c.Dim(0); j++) M_s2c_local[j][i*this->kernel->k_s2m->ker_dim[1]+k] = M_[j+k*M_s2c.Dim(0)]; } if(!myrank) std::cout<<"\r \r"<::value(), par::Mpi_datatype::sum(), this->comm); } Matrix& M_c2e = this->Precomp(level, UC2UE_Type, 0); M=M_s2c*M_c2e; break; } case D2T_Type: { if(this->MultipoleOrder()==0) break; Matrix& M_s2t=FMM_Pts::Precomp(level, type, mat_indx); int n_trg=M_s2t.Dim(1)/this->kernel->k_l2t->ker_dim[1]; // Compute Chebyshev approx from target potential. M.Resize(M_s2t.Dim(0), n_src*this->kernel->k_l2t->ker_dim [1]); #pragma omp parallel for schedule(dynamic) for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){ Matrix M_trg(n_trg,this->kernel->k_l2t->ker_dim[1],M_s2t[j],false); M_trg=M_trg.Transpose(); cheb_approx(M_s2t[j],cheb_deg,this->kernel->k_l2t->ker_dim[1],M[j]); } #pragma omp critical (PRECOMP_MATRIX_PTS) { M_s2t.Resize(0,0); } break; } case U0_Type: { // Coord of target points Real_t s=pow(0.5,level); int* coord=this->interac_list.RelativeCoord(type,mat_indx); Real_t coord_diff[3]={(coord[0]-1)*s*0.5,(coord[1]-1)*s*0.5,(coord[2]-1)*s*0.5}; std::vector& rel_trg_coord = this->mat->RelativeTrgCoord(); size_t n_trg = rel_trg_coord.size()/3; std::vector trg_coord(n_trg*3); for(size_t j=0;j M_s2t(n_src*this->kernel->k_s2t->ker_dim [0], n_trg*this->kernel->k_s2t->ker_dim [1]); Matrix M_s2t_local(n_src*this->kernel->k_s2t->ker_dim [0], n_trg*this->kernel->k_s2t->ker_dim [1]); { M_s2t.SetZero(); M_s2t_local.SetZero(); size_t cnt_done=0; #pragma omp parallel for schedule(dynamic) for(size_t i=myrank;i s2t=cheb_integ(cheb_deg, &trg_coord[i*3], (Real_t)(s*2.0), *this->kernel->k_s2t); #ifdef __VERBOSE__ #pragma omp critical if(!myrank){ cnt_done++; std::cout<<"\r Progress: "<<(100*progress*n_trg+100*cnt_done*np)/(class_count*n_trg)<<"% "<kernel->k_s2t->ker_dim[1]; k++) for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++) M_s2t_local[j][i*this->kernel->k_s2t->ker_dim[1]+k] = s2t[j+k*M_s2t.Dim(0)]; } if(!myrank) std::cout<<"\r \r"<::value(), par::Mpi_datatype::sum(), this->comm); } // Compute Chebyshev approx from target potential. M.Resize(M_s2t.Dim(0), n_src*this->kernel->k_s2t->ker_dim [1]); #pragma omp parallel for schedule(dynamic) for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){ Matrix M_trg(n_trg,this->kernel->k_s2t->ker_dim[1],M_s2t[j],false); M_trg=M_trg.Transpose(); cheb_approx(M_s2t[j],cheb_deg,this->kernel->k_s2t->ker_dim[1],M[j]); } break; } case U1_Type: { // Coord of target points Real_t s=pow(0.5,level); int* coord=this->interac_list.RelativeCoord(type,mat_indx); Real_t coord_diff[3]={coord[0]*s,coord[1]*s,coord[2]*s}; std::vector& rel_trg_coord = this->mat->RelativeTrgCoord(); size_t n_trg = rel_trg_coord.size()/3; std::vector trg_coord(n_trg*3); for(size_t j=0;j M_s2t(n_src*this->kernel->k_s2t->ker_dim [0], n_trg*this->kernel->k_s2t->ker_dim [1]); Matrix M_s2t_local(n_src*this->kernel->k_s2t->ker_dim [0], n_trg*this->kernel->k_s2t->ker_dim [1]); { M_s2t.SetZero(); M_s2t_local.SetZero(); size_t cnt_done=0; #pragma omp parallel for schedule(dynamic) for(size_t i=myrank;i s2t=cheb_integ(cheb_deg, &trg_coord[i*3], s, *this->kernel->k_s2t); #ifdef __VERBOSE__ #pragma omp critical if(!myrank){ cnt_done++; std::cout<<"\r Progress: "<<(100*progress*n_trg+100*cnt_done*np)/(class_count*n_trg)<<"% "<kernel->k_s2t->ker_dim[1]; k++) for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++) M_s2t_local[j][i*this->kernel->k_s2t->ker_dim[1]+k] = s2t[j+k*M_s2t.Dim(0)]; } if(!myrank) std::cout<<"\r \r"<::value(), par::Mpi_datatype::sum(), this->comm); } // Compute Chebyshev approx from target potential. M.Resize(M_s2t.Dim(0), n_src*this->kernel->k_s2t->ker_dim [1]); #pragma omp parallel for schedule(dynamic) for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){ Matrix M_trg(n_trg,this->kernel->k_s2t->ker_dim[1],M_s2t[j],false); M_trg=M_trg.Transpose(); cheb_approx(M_s2t[j],cheb_deg,this->kernel->k_s2t->ker_dim[1],M[j]); } break; } case U2_Type: { // Coord of target points Real_t s=pow(0.5,level); int* coord=this->interac_list.RelativeCoord(type,mat_indx); Real_t coord_diff[3]={(coord[0]+1)*s*0.25,(coord[1]+1)*s*0.25,(coord[2]+1)*s*0.25}; std::vector& rel_trg_coord = this->mat->RelativeTrgCoord(); size_t n_trg = rel_trg_coord.size()/3; std::vector trg_coord(n_trg*3); for(size_t j=0;j M_s2t(n_src*this->kernel->k_s2t->ker_dim [0], n_trg*this->kernel->k_s2t->ker_dim [1]); Matrix M_s2t_local(n_src*this->kernel->k_s2t->ker_dim [0], n_trg*this->kernel->k_s2t->ker_dim [1]); { M_s2t.SetZero(); M_s2t_local.SetZero(); size_t cnt_done=0; #pragma omp parallel for schedule(dynamic) for(size_t i=myrank;i s2t=cheb_integ(cheb_deg, &trg_coord[i*3], (Real_t)(s*0.5), *this->kernel->k_s2t); #ifdef __VERBOSE__ #pragma omp critical if(!myrank){ cnt_done++; std::cout<<"\r Progress: "<<(100*progress*n_trg+100*cnt_done*np)/(class_count*n_trg)<<"% "<kernel->k_s2t->ker_dim[1]; k++) for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++) M_s2t_local[j][i*this->kernel->k_s2t->ker_dim[1]+k] = s2t[j+k*M_s2t.Dim(0)]; } if(!myrank) std::cout<<"\r \r"<::value(), par::Mpi_datatype::sum(), this->comm); } // Compute Chebyshev approx from target potential. M.Resize(M_s2t.Dim(0), n_src*this->kernel->k_s2t->ker_dim [1]); #pragma omp parallel for schedule(dynamic) for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){ Matrix M_trg(n_trg,this->kernel->k_s2t->ker_dim[1],M_s2t[j],false); M_trg=M_trg.Transpose(); cheb_approx(M_s2t[j],cheb_deg,this->kernel->k_s2t->ker_dim[1],M[j]); } break; } case W_Type: { if(this->MultipoleOrder()==0) break; Matrix& M_s2t=FMM_Pts::Precomp(level, type, mat_indx); int n_trg=M_s2t.Dim(1)/this->kernel->k_m2t->ker_dim[1]; // Compute Chebyshev approx from target potential. M.Resize(M_s2t.Dim(0), n_src*this->kernel->k_m2t->ker_dim [1]); #pragma omp parallel for schedule(dynamic) for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){ Matrix M_trg(n_trg,this->kernel->k_m2t->ker_dim[1],M_s2t[j],false); M_trg=M_trg.Transpose(); cheb_approx(M_s2t[j],cheb_deg,this->kernel->k_m2t->ker_dim[1],M[j]); } #pragma omp critical (PRECOMP_MATRIX_PTS) { M_s2t.Resize(0,0); } break; } case X_Type: { if(this->MultipoleOrder()==0) break; // Coord of target points Real_t s=pow(0.5,level-1); int* coord=this->interac_list.RelativeCoord(type,mat_indx); Real_t c[3]={-(coord[0]-1)*s*0.25,-(coord[1]-1)*s*0.25,-(coord[2]-1)*s*0.25}; std::vector trg_coord=d_check_surf(this->MultipoleOrder(),c,level); size_t n_trg=trg_coord.size()/3; // Evaluate potential at target points. Matrix M_xs2c(n_src*this->kernel->k_s2l->ker_dim[0], n_trg*this->kernel->k_s2l->ker_dim[1]); Matrix M_xs2c_local(n_src*this->kernel->k_s2l->ker_dim[0], n_trg*this->kernel->k_s2l->ker_dim[1]); { M_xs2c.SetZero(); M_xs2c_local.SetZero(); size_t cnt_done=0; #pragma omp parallel for schedule(dynamic) for(size_t i=myrank;i M_=cheb_integ(cheb_deg, &trg_coord[i*3], s, *this->kernel->k_s2l); #ifdef __VERBOSE__ #pragma omp critical if(!myrank){ cnt_done++; std::cout<<"\r Progress: "<<(100*progress*n_trg+100*cnt_done*np)/(class_count*n_trg)<<"% "<kernel->k_s2l->ker_dim[1]; k++) for(size_t j=0; j<(size_t)M_xs2c.Dim(0); j++) M_xs2c_local[j][i*this->kernel->k_s2l->ker_dim[1]+k] = M_[j+k*M_xs2c.Dim(0)]; } if(!myrank) std::cout<<"\r \r"<::value(), par::Mpi_datatype::sum(), this->comm); } Matrix& M_c2e = this->Precomp(level, DC2DE_Type, 0); M=M_xs2c*M_c2e; break; } default: { return FMM_Pts::Precomp(level, type, mat_indx); break; } } //Save the matrix for future use. #pragma omp critical (PRECOMP_MATRIX_PTS) if(M_.Dim(0)==0 && M_.Dim(1)==0){ M_=M;} return M_; } template void FMM_Cheb::CollectNodeData(FMMTree_t* tree, std::vector& node, std::vector >& buff, std::vector >& n_list, std::vector* > > vec_list){ if(vec_list.size()<6) vec_list.resize(6); size_t n_coeff=(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3)/6; if(node.size()==0) return; {// 4. cheb_in int indx=4; size_t vec_sz=this->kernel->ker_dim[0]*n_coeff; for(size_t i=0;iIsLeaf()){ Vector& data_vec=node[i]->ChebData(); if(data_vec.Dim()!=vec_sz) data_vec.ReInit(vec_sz); vec_list[indx].push_back(&data_vec); } } } {// 5. cheb_out int indx=5; size_t vec_sz=this->kernel->ker_dim[1]*n_coeff; for(size_t i=0;iIsLeaf() && !node[i]->IsGhost()){ Vector& data_vec=((FMMData*)node[i]->FMMData())->cheb_out; if(data_vec.Dim()!=vec_sz) data_vec.ReInit(vec_sz); vec_list[indx].push_back(&data_vec); } } } FMM_Pts::CollectNodeData(tree, node, buff, n_list, vec_list); } template void FMM_Cheb::Source2UpSetup(SetupData& setup_data, FMMTree_t* tree, std::vector >& buff, std::vector >& n_list, int level, bool device){ if(this->MultipoleOrder()==0) return; FMM_Pts::Source2UpSetup(setup_data, tree, buff, n_list, level, device); { // Set setup_data setup_data.level=level; setup_data.kernel=this->kernel->k_s2m; setup_data.interac_type.resize(1); setup_data.interac_type[0]=S2U_Type; setup_data. input_data=&buff[4]; setup_data.output_data=&buff[0]; Vector& nodes_in =n_list[4]; Vector& nodes_out=n_list[0]; setup_data.nodes_in .clear(); setup_data.nodes_out.clear(); for(size_t i=0;iDepth()==level || level==-1) setup_data.nodes_in .push_back(nodes_in [i]); for(size_t i=0;iDepth()==level || level==-1) setup_data.nodes_out.push_back(nodes_out[i]); } std::vector& nodes_in =setup_data.nodes_in ; std::vector& nodes_out=setup_data.nodes_out; std::vector*>& input_vector=setup_data. input_vector; input_vector.clear(); std::vector*>& output_vector=setup_data.output_vector; output_vector.clear(); for(size_t i=0;iChebData() ); for(size_t i=0;iFMMData())->upward_equiv); this->SetupInterac(setup_data,device); } template void FMM_Cheb::Source2Up (SetupData& setup_data, bool device){ //Add Source2Up contribution. this->EvalList(setup_data, device); } template void FMM_Cheb::X_ListSetup(SetupData& setup_data, FMMTree_t* tree, std::vector >& buff, std::vector >& n_list, int level, bool device){ if(this->MultipoleOrder()==0) return; FMM_Pts::X_ListSetup(setup_data, tree, buff, n_list, level, device); { // Set setup_data setup_data.level=level; setup_data.kernel=this->kernel->k_s2l; setup_data.interac_type.resize(1); setup_data.interac_type[0]=X_Type; setup_data. input_data=&buff[4]; setup_data.output_data=&buff[1]; Vector& nodes_in =n_list[4]; Vector& nodes_out=n_list[1]; setup_data.nodes_in .clear(); setup_data.nodes_out.clear(); for(size_t i=0;iDepth()==level-1 || level==-1) setup_data.nodes_in .push_back(nodes_in [i]); for(size_t i=0;iDepth()==level || level==-1) setup_data.nodes_out.push_back(nodes_out[i]); } std::vector& nodes_in =setup_data.nodes_in ; std::vector& nodes_out=setup_data.nodes_out; std::vector*>& input_vector=setup_data. input_vector; input_vector.clear(); std::vector*>& output_vector=setup_data.output_vector; output_vector.clear(); for(size_t i=0;iChebData() ); for(size_t i=0;iFMMData())->dnward_equiv); this->SetupInterac(setup_data,device); { // Resize device buffer size_t n=setup_data.output_data->Dim(0)*setup_data.output_data->Dim(1)*sizeof(Real_t); if(this->dev_buffer.Dim()dev_buffer.ReInit(n); } } template void FMM_Cheb::X_List (SetupData& setup_data, bool device){ //Add X_List contribution. FMM_Pts::X_List(setup_data, device); this->EvalList(setup_data, device); } template void FMM_Cheb::W_ListSetup(SetupData& setup_data, FMMTree_t* tree, std::vector >& buff, std::vector >& n_list, int level, bool device){ if(this->MultipoleOrder()==0) return; { // Set setup_data setup_data.level=level; setup_data.kernel=this->kernel->k_m2t; setup_data.interac_type.resize(1); setup_data.interac_type[0]=W_Type; setup_data. input_data=&buff[0]; setup_data.output_data=&buff[5]; Vector& nodes_in =n_list[0]; Vector& nodes_out=n_list[5]; setup_data.nodes_in .clear(); setup_data.nodes_out.clear(); for(size_t i=0;iDepth()==level+1 || level==-1) setup_data.nodes_in .push_back(nodes_in [i]); for(size_t i=0;iDepth()==level || level==-1) setup_data.nodes_out.push_back(nodes_out[i]); } std::vector& nodes_in =setup_data.nodes_in ; std::vector& nodes_out=setup_data.nodes_out; std::vector*>& input_vector=setup_data. input_vector; input_vector.clear(); std::vector*>& output_vector=setup_data.output_vector; output_vector.clear(); for(size_t i=0;iFMMData())->upward_equiv); for(size_t i=0;iFMMData())->cheb_out ); this->SetupInterac(setup_data,device); { // Resize device buffer size_t n=setup_data.output_data->Dim(0)*setup_data.output_data->Dim(1)*sizeof(Real_t); if(this->dev_buffer.Dim()dev_buffer.ReInit(n); } } template void FMM_Cheb::W_List (SetupData& setup_data, bool device){ //Add W_List contribution. this->EvalList(setup_data, device); } template void FMM_Cheb::U_ListSetup(SetupData& setup_data, FMMTree_t* tree, std::vector >& buff, std::vector >& n_list, int level, bool device){ FMM_Pts::U_ListSetup(setup_data, tree, buff, n_list, level, device); { // Set setup_data setup_data.level=level; setup_data.kernel=this->kernel->k_s2t; setup_data.interac_type.resize(3); setup_data.interac_type[0]=U0_Type; setup_data.interac_type[1]=U1_Type; setup_data.interac_type[2]=U2_Type; setup_data. input_data=&buff[4]; setup_data.output_data=&buff[5]; Vector& nodes_in =n_list[4]; Vector& nodes_out=n_list[5]; setup_data.nodes_in .clear(); setup_data.nodes_out.clear(); for(size_t i=0;iDepth() && nodes_in [i]->Depth()<=level+1) || level==-1) setup_data.nodes_in .push_back(nodes_in [i]); for(size_t i=0;iDepth()==level ) || level==-1) setup_data.nodes_out.push_back(nodes_out[i]); } std::vector& nodes_in =setup_data.nodes_in ; std::vector& nodes_out=setup_data.nodes_out; std::vector*>& input_vector=setup_data. input_vector; input_vector.clear(); std::vector*>& output_vector=setup_data.output_vector; output_vector.clear(); for(size_t i=0;iChebData()); for(size_t i=0;iFMMData())->cheb_out ); this->SetupInterac(setup_data,device); { // Resize device buffer size_t n=setup_data.output_data->Dim(0)*setup_data.output_data->Dim(1)*sizeof(Real_t); if(this->dev_buffer.Dim()dev_buffer.ReInit(n); } } template void FMM_Cheb::U_List (SetupData& setup_data, bool device){ //Add U_List contribution. FMM_Pts::U_List(setup_data, device); this->EvalList(setup_data, device); } template void FMM_Cheb::Down2TargetSetup(SetupData& setup_data, FMMTree_t* tree, std::vector >& buff, std::vector >& n_list, int level, bool device){ if(this->MultipoleOrder()==0) return; { // Set setup_data setup_data.level=level; setup_data.kernel=this->kernel->k_l2t; setup_data.interac_type.resize(1); setup_data.interac_type[0]=D2T_Type; setup_data. input_data=&buff[1]; setup_data.output_data=&buff[5]; Vector& nodes_in =n_list[1]; Vector& nodes_out=n_list[5]; setup_data.nodes_in .clear(); setup_data.nodes_out.clear(); for(size_t i=0;iDepth()==level || level==-1) setup_data.nodes_in .push_back(nodes_in [i]); for(size_t i=0;iDepth()==level || level==-1) setup_data.nodes_out.push_back(nodes_out[i]); } std::vector& nodes_in =setup_data.nodes_in ; std::vector& nodes_out=setup_data.nodes_out; std::vector*>& input_vector=setup_data. input_vector; input_vector.clear(); std::vector*>& output_vector=setup_data.output_vector; output_vector.clear(); for(size_t i=0;iFMMData())->dnward_equiv); for(size_t i=0;iFMMData())->cheb_out ); this->SetupInterac(setup_data,device); } template void FMM_Cheb::Down2Target (SetupData& setup_data, bool device){ //Add Down2Target contribution. this->EvalList(setup_data, device); } template void FMM_Cheb::PostProcessing(std::vector& nodes){ size_t n=nodes.size(); #pragma omp parallel { int omp_p=omp_get_num_threads(); int pid = omp_get_thread_num(); size_t a=(pid*n)/omp_p; size_t b=((pid+1)*n)/omp_p; //For each node, compute interaction from point sources. for(size_t i=a;i& trg_coord=nodes[i]->trg_coord; Vector& trg_value=nodes[i]->trg_value; Vector& cheb_out =((FMMData*)nodes[i]->FMMData())->cheb_out; //Initialize target potential. size_t trg_cnt=trg_coord.Dim()/COORD_DIM; //trg_value.assign(trg_cnt*dof*this->kernel->ker_dim[1],0); //Sample local expansion at target points. if(trg_cnt>0 && cheb_out.Dim()>0){ Real_t* c=nodes[i]->Coord(); Real_t scale=pow(2.0,nodes[i]->Depth()+1); std::vector rel_coord(COORD_DIM*trg_cnt); for(size_t j=0;j