123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421 |
- /**
- * \file mat_utils.txx
- * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
- * \date 1-1-2013
- */
- template <class FMM_Mat_t>
- void CheckFMMOutput(pvfmm::FMM_Tree<FMM_Mat_t>* mytree, pvfmm::Kernel<typename FMM_Mat_t::Real_t>* mykernel){
- if(mykernel==NULL) return;
- // Find out number of OMP thereads.
- int np=omp_get_max_threads();
- // Find out my identity in the default communicator
- int myrank, p;
- MPI_Comm c1=MPI_COMM_WORLD;
- MPI_Comm_rank(c1, &myrank);
- MPI_Comm_size(c1,&p);
- typedef typename FMM_Mat_t::FMMData FMM_Data_t;
- typedef typename FMM_Mat_t::FMMNode_t FMMNode_t;
- typedef typename FMM_Mat_t::Real_t Real_t;
- // Read source data.
- std::vector<Real_t> src_coord;
- std::vector<Real_t> src_value;
- FMMNode_t* n=static_cast<FMMNode_t*>(mytree->PreorderFirst());
- while(n!=NULL){
- if(n->IsLeaf() && !n->IsGhost()){
- pvfmm::Vector<Real_t>& coord_vec=n->src_coord;
- pvfmm::Vector<Real_t>& value_vec=n->src_value;
- for(size_t i=0;i<coord_vec.Dim();i++) src_coord.push_back(coord_vec[i]);
- for(size_t i=0;i<value_vec.Dim();i++) src_value.push_back(value_vec[i]);
- }
- n=static_cast<FMMNode_t*>(mytree->PreorderNxt(n));
- }
- long long glb_src_cnt=0, src_cnt=src_coord.size()/3;
- MPI_Allreduce(&src_cnt, &glb_src_cnt, 1, MPI_LONG_LONG, MPI_SUM, c1);
- long long glb_val_cnt=0, val_cnt=src_value.size();
- MPI_Allreduce(&val_cnt, &glb_val_cnt, 1, MPI_LONG_LONG, MPI_SUM, c1);
- if(glb_src_cnt==0) return;
- int dof=glb_val_cnt/glb_src_cnt/mykernel->ker_dim[0];
- int trg_dof=dof*mykernel->ker_dim[1];
- // Read target data.
- std::vector<Real_t> trg_coord;
- std::vector<Real_t> trg_poten_fmm;
- long long trg_iter=0;
- size_t step_size=1+glb_src_cnt*glb_src_cnt*1e-9/p;
- n=static_cast<FMMNode_t*>(mytree->PreorderFirst());
- while(n!=NULL){
- if(n->IsLeaf() && !n->IsGhost()){
- pvfmm::Vector<Real_t>& coord_vec=n->trg_coord;
- pvfmm::Vector<Real_t>& poten_vec=n->trg_value;
- for(size_t i=0;i<coord_vec.Dim()/3 ;i++){
- if(trg_iter%step_size==0){
- for(int j=0;j<3 ;j++) trg_coord .push_back(coord_vec[i*3 +j]);
- for(int j=0;j<trg_dof ;j++) trg_poten_fmm.push_back(poten_vec[i*trg_dof +j]);
- }
- trg_iter++;
- }
- }
- n=static_cast<FMMNode_t*>(mytree->PreorderNxt(n));
- }
- int trg_cnt=trg_coord.size()/3;
- int send_cnt=trg_cnt*3;
- std::vector<int> recv_cnts(p), recv_disp(p,0);
- MPI_Allgather(&send_cnt , 1, MPI_INT,
- &recv_cnts[0], 1, MPI_INT, c1);
- pvfmm::omp_par::scan(&recv_cnts[0], &recv_disp[0], p);
- int glb_trg_cnt=(recv_disp[p-1]+recv_cnts[p-1])/3;
- std::vector<Real_t> glb_trg_coord(glb_trg_cnt*3);
- MPI_Allgatherv(&trg_coord[0] , send_cnt , pvfmm::par::Mpi_datatype<Real_t>::value(),
- &glb_trg_coord[0], &recv_cnts[0], &recv_disp[0], pvfmm::par::Mpi_datatype<Real_t>::value(), c1);
- if(glb_trg_cnt==0) return;
- //Direct N-Body.
- std::vector<Real_t> trg_poten_dir(glb_trg_cnt*trg_dof ,0);
- std::vector<Real_t> glb_trg_poten_dir(glb_trg_cnt*trg_dof ,0);
- pvfmm::Profile::Tic("N-Body Direct",&c1,false,1);
- #pragma omp parallel for
- for(int i=0;i<np;i++){
- size_t a=(i*glb_trg_cnt)/np;
- size_t b=((i+1)*glb_trg_cnt)/np;
- mykernel->ker_poten(&src_coord[0], src_cnt, &src_value[0], dof, &glb_trg_coord[a*3], b-a, &trg_poten_dir[a*trg_dof ],NULL);
- }
- MPI_Allreduce(&trg_poten_dir[0], &glb_trg_poten_dir[0], trg_poten_dir.size(), pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::sum(), c1);
- pvfmm::Profile::Toc();
- //Compute error.
- {
- Real_t max_=0;
- Real_t max_err=0;
- for(size_t i=0;i<trg_poten_fmm.size();i++){
- Real_t err=fabs(glb_trg_poten_dir[i+(recv_disp[myrank]/3)*trg_dof]-trg_poten_fmm[i]);
- Real_t max=fabs(glb_trg_poten_dir[i+(recv_disp[myrank]/3)*trg_dof]);
- if(err>max_err) max_err=err;
- if(max>max_) max_=max;
- }
- Real_t glb_max, glb_max_err;
- MPI_Reduce(&max_ , &glb_max , 1, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::max(), 0, c1);
- MPI_Reduce(&max_err, &glb_max_err, 1, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::max(), 0, c1);
- if(!myrank){
- std::cout<<"Maximum Absolute Error ["<<mykernel->ker_name<<"] : "<<std::scientific<<glb_max_err<<'\n';
- std::cout<<"Maximum Relative Error ["<<mykernel->ker_name<<"] : "<<std::scientific<<glb_max_err/glb_max<<'\n';
- }
- }
- }
- template <class FMMTree_t>
- void CheckChebOutput(FMMTree_t* mytree, typename TestFn<typename FMMTree_t::Real_t>::Fn_t fn_poten, int fn_dof, std::string t_name){
- typedef typename FMMTree_t::FMM_Node_t FMMNode_t;
- typedef typename FMMTree_t::Real_t Real_t;
- MPI_Comm c1=*mytree->Comm();
- pvfmm::Profile::Tic((std::string("Compute Error ")+t_name).c_str(),&c1,true,1);
- int myrank; MPI_Comm_rank(c1, &myrank);
- FMMNode_t* r_node=static_cast<FMMNode_t*>(mytree->RootNode());
- int dof=r_node->DataDOF()/fn_dof;
- std::vector<FMMNode_t*> nodes;
- {
- FMMNode_t* node=static_cast<FMMNode_t*>(mytree->PreorderFirst());
- while(node!=NULL){
- if(node->IsLeaf() && !node->IsGhost()) nodes.push_back(node);
- node=static_cast<FMMNode_t*>(mytree->PreorderNxt(node));
- }
- if(nodes.size()==0) return;
- }
- int cheb_deg=nodes[0]->ChebDeg();
- std::vector<Real_t> cheb_nds=pvfmm::cheb_nodes<Real_t>(cheb_deg/3+1, 1);
- for(size_t i=0;i<cheb_nds.size();i++) cheb_nds[i]=2.0*cheb_nds[i]-1.0;
- std::vector<Real_t> cheb_pts=pvfmm::cheb_nodes<Real_t>(cheb_deg/3+1, COORD_DIM);
- int n_pts=cheb_pts.size()/COORD_DIM;
- int omp_p=omp_get_max_threads();
- std::vector<Real_t> glb_err_avg(dof*fn_dof,0);
- { // Determine glb_err_avg.
- std::vector<Real_t> err_avg(omp_p*dof*fn_dof,0);
- #pragma omp parallel for
- for(size_t tid=0;tid<omp_p;tid++){
- pvfmm::Vector<Real_t> out; out.SetZero();
- pvfmm::Vector<Real_t> fn_out(dof*fn_dof);
- for(size_t i=(nodes.size()*tid)/omp_p;i<(nodes.size()*(tid+1))/omp_p;i++){
- pvfmm::Vector<Real_t>& cheb_coeff=nodes[i]->ChebData();
- cheb_eval(cheb_coeff, cheb_deg, cheb_nds, cheb_nds, cheb_nds, out);
- Real_t* c=nodes[i]->Coord();
- Real_t s=pow(2.0,-nodes[i]->Depth());
- Real_t s3=s*s*s;
- for(size_t j=0;j<n_pts;j++){
- Real_t coord[3]={c[0]+s*cheb_pts[j*COORD_DIM+0],
- c[1]+s*cheb_pts[j*COORD_DIM+1],
- c[2]+s*cheb_pts[j*COORD_DIM+2]};
- fn_out.SetZero();
- for(size_t k=0;k<dof;k++)
- fn_poten(coord,1,&fn_out[k*fn_dof]);
- for(size_t k=0;k<dof*fn_dof;k++){
- Real_t err=out[n_pts*k+j]-fn_out[k];
- err_avg[tid*dof*fn_dof+k]+=err*s3;
- }
- }
- }
- for(size_t k=0;k<dof*fn_dof;k++)
- err_avg[tid*dof*fn_dof+k]/=n_pts;
- }
- for(size_t tid=1;tid<omp_p;tid++)
- for(size_t k=0;k<dof*fn_dof;k++)
- err_avg[k]+=err_avg[tid*dof*fn_dof+k];
- MPI_Allreduce(&err_avg[0], &glb_err_avg[0], dof*fn_dof, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::sum(), c1);
- }
- { // Write error to file.
- int nn_x=1;
- int nn_y=201;
- int nn_z=201;
- std::vector<Real_t> x(nn_x,1.0/2.0);
- std::vector<Real_t> y(nn_y);
- std::vector<Real_t> z(nn_z);
- //for(int i=0;i<nn_x;i++)
- // x[i]=((Real_t)i)/(nn_x-1);
- for(int i=0;i<nn_y;i++)
- y[i]=((Real_t)i)/(nn_y-1);
- for(int i=0;i<nn_z;i++)
- z[i]=((Real_t)i)/(nn_z-1);
- FMMNode_t* r_node=static_cast<FMMNode_t*>(mytree->RootNode());
- int fn_dof=r_node->DataDOF()/dof;
- Real_t* fn_out=new Real_t[dof*fn_dof];
- pvfmm::Matrix<Real_t> M_out (nn_z*fn_dof*dof,nn_y*nn_x,NULL,true); M_out .SetZero();
- pvfmm::Matrix<Real_t> M_out_err(nn_z*fn_dof*dof,nn_y*nn_x,NULL,true); M_out_err.SetZero();
- r_node->ReadVal(x,y,z,M_out[0],false);
- for(int l0=0;l0<dof;l0++)
- for(int l1=0;l1<fn_dof;l1++)
- for(int i=0;i<nn_x;i++)
- for(int j=0;j<nn_y;j++)
- for(int k=0;k<nn_z;k++){
- int l=l0*fn_dof+l1;
- Real_t ch_coord[3]={x[i],y[j],z[k]};
- Real_t cheb_val=M_out[k+l*nn_z][i+j*nn_x];
- if(fabs(cheb_val)>1.0e-20){
- fn_poten(ch_coord,1,fn_out);
- Real_t err=(cheb_val-fn_out[l])-glb_err_avg[l];
- M_out_err[k+l*nn_z][i+j*nn_x]=err;
- }
- }
- delete[] fn_out;
- pvfmm::Matrix<Real_t> M_global (nn_z*fn_dof*dof,nn_y*nn_x,NULL,true);
- pvfmm::Matrix<Real_t> M_global_err(nn_z*fn_dof*dof,nn_y*nn_x,NULL,true);
- MPI_Reduce(&M_out [0][0], &M_global [0][0], nn_x*nn_y*nn_z*fn_dof*dof, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::sum(), 0, c1);
- MPI_Reduce(&M_out_err[0][0], &M_global_err[0][0], nn_x*nn_y*nn_z*fn_dof*dof, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::sum(), 0, c1);
- //std::string fname;
- //fname=std::string("result/" )+t_name+std::string(".mat");
- //if(!myrank) M_global .Write(fname.c_str());
- //fname=std::string("result/err_")+t_name+std::string(".mat");
- //if(!myrank) M_global_err.Write(fname.c_str());
- }
- std::vector<Real_t> max (omp_p,0), l2 (omp_p,0);
- std::vector<Real_t> max_err(omp_p,0), l2_err(omp_p,0);
- #pragma omp parallel for
- for(size_t tid=0;tid<omp_p;tid++){
- pvfmm::Vector<Real_t> out; out.SetZero();
- pvfmm::Vector<Real_t> fn_out(dof*fn_dof);
- for(size_t i=(nodes.size()*tid)/omp_p;i<(nodes.size()*(tid+1))/omp_p;i++){
- pvfmm::Vector<Real_t>& cheb_coeff=nodes[i]->ChebData();
- cheb_eval(cheb_coeff, cheb_deg, cheb_nds, cheb_nds, cheb_nds, out);
- Real_t* c=nodes[i]->Coord();
- Real_t s=pow(2.0,-nodes[i]->Depth());
- Real_t s3=s*s*s;
- for(size_t j=0;j<n_pts;j++){
- Real_t coord[3]={c[0]+s*cheb_pts[j*COORD_DIM+0],
- c[1]+s*cheb_pts[j*COORD_DIM+1],
- c[2]+s*cheb_pts[j*COORD_DIM+2]};
- fn_out.SetZero();
- for(size_t k=0;k<dof;k++)
- fn_poten(coord,1,&fn_out[k*fn_dof]);
- for(size_t k=0;k<dof*fn_dof;k++){
- Real_t err=out[n_pts*k+j]-fn_out[k]-glb_err_avg[k];
- if(fabs(fn_out[k])>max [tid]) max [tid]=fabs(fn_out[k]);
- if(fabs(err )>max_err[tid]) max_err[tid]=fabs(err );
- l2[tid]+=fn_out[k]*fn_out[k]*s3;
- l2_err[tid]+=err*err*s3;
- }
- }
- }
- l2 [tid]/=n_pts;
- l2_err[tid]/=n_pts;
- }
- for(size_t tid=1;tid<omp_p;tid++){
- if(max [tid]>max [0]) max [0]=max [tid];
- if(max_err[tid]>max_err[0]) max_err[0]=max_err[tid];
- l2 [0]+=l2 [tid];
- l2_err[0]+=l2_err[tid];
- }
- Real_t global_l2, global_l2_err;
- Real_t global_max, global_max_err;
- MPI_Reduce(&l2 [0], &global_l2 , 1, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::sum(), 0, c1);
- MPI_Reduce(&l2_err [0], &global_l2_err , 1, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::sum(), 0, c1);
- MPI_Reduce(&max [0], &global_max , 1, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::max(), 0, c1);
- MPI_Reduce(&max_err[0], &global_max_err, 1, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::max(), 0, c1);
- if(!myrank){
- std::cout<<"Absolute L2 Error ["<<t_name<<"] : "<<std::scientific<<sqrt(global_l2_err)<<'\n';
- std::cout<<"Relative L2 Error ["<<t_name<<"] : "<<std::scientific<<sqrt(global_l2_err/global_l2)<<'\n';
- std::cout<<"Maximum Absolute Error ["<<t_name<<"]: "<<std::scientific<<global_max_err<<'\n';
- std::cout<<"Maximum Relative Error ["<<t_name<<"]: "<<std::scientific<<global_max_err/global_max<<'\n';
- }
- pvfmm::Profile::Toc();
- }
- template <class Real_t>
- std::vector<Real_t> point_distrib(DistribType dist_type, size_t N, MPI_Comm comm){
- int np, myrank;
- MPI_Comm_size(comm, &np);
- MPI_Comm_rank(comm, &myrank);
- std::vector<Real_t> coord;
- srand48(myrank+1);
- switch(dist_type){
- case UnifGrid:
- {
- size_t NN=(size_t)round(pow((double)N,1.0/3.0));
- size_t N_total=NN*NN*NN;
- size_t start= myrank *N_total/np;
- size_t end =(myrank+1)*N_total/np;
- for(size_t i=start;i<end;i++){
- coord.push_back(((Real_t)((i/ 1 )%NN)+0.5)/NN);
- coord.push_back(((Real_t)((i/ NN )%NN)+0.5)/NN);
- coord.push_back(((Real_t)((i/(NN*NN))%NN)+0.5)/NN);
- }
- }
- break;
- case RandUnif:
- {
- size_t N_total=N;
- size_t start= myrank *N_total/np;
- size_t end =(myrank+1)*N_total/np;
- size_t N_local=end-start;
- coord.resize(N_local*3);
- for(size_t i=0;i<N_local*3;i++)
- coord[i]=((Real_t)drand48());
- }
- break;
- case RandGaus:
- {
- Real_t e=2.7182818284590452;
- Real_t log_e=log(e);
- size_t N_total=N;
- size_t start= myrank *N_total/np;
- size_t end =(myrank+1)*N_total/np;
- for(size_t i=start*3;i<end*3;i++){
- Real_t y=-1;
- while(y<=0 || y>=1){
- Real_t r1=sqrt(-2*log(drand48())/log_e)*cos(2*M_PI*drand48());
- Real_t r2=pow(0.5,i*10/N_total);
- y=0.5+r1*r2;
- }
- coord.push_back(y);
- }
- }
- break;
- case RandElps:
- {
- size_t N_total=N;
- size_t start= myrank *N_total/np;
- size_t end =(myrank+1)*N_total/np;
- size_t N_local=end-start;
- coord.resize(N_local*3);
- const Real_t r=0.25;
- const Real_t center[3]={0.5,0.5,0.5};
- for(size_t i=0;i<N_local;i++){
- Real_t* y=&coord[i*3];
- Real_t phi=2*M_PI*drand48();
- Real_t theta=M_PI*drand48();
- y[0]=center[0]+0.25*r*sin(theta)*cos(phi);
- y[1]=center[1]+0.25*r*sin(theta)*sin(phi);
- y[2]=center[2]+r*cos(theta);
- }
- }
- break;
- case RandSphr:
- {
- size_t N_total=N;
- size_t start= myrank *N_total/np;
- size_t end =(myrank+1)*N_total/np;
- size_t N_local=end-start;
- coord.resize(N_local*3);
- const Real_t center[3]={0.5,0.5,0.5};
- for(size_t i=0;i<N_local;i++){
- Real_t* y=&coord[i*3];
- Real_t r=1;
- while(r>0.5 || r==0){
- y[0]=drand48(); y[1]=drand48(); y[2]=drand48();
- r=sqrt((y[0]-center[0])*(y[0]-center[0])
- +(y[1]-center[1])*(y[1]-center[1])
- +(y[2]-center[2])*(y[2]-center[2]));
- y[0]=center[0]+0.1*(y[0]-center[0])/r;
- y[1]=center[1]+0.1*(y[1]-center[1])/r;
- y[2]=center[2]+0.1*(y[2]-center[2])/r;
- }
- }
- }
- break;
- default:
- break;
- }
- return coord;
- }
- void commandline_option_start(int argc, char** argv, const char* help_text){
- char help[]="--help";
- for(int i=0;i<argc;i++){
- if(!strcmp(argv[i],help)){
- if(help_text!=NULL) std::cout<<help_text<<'\n';
- std::cout<<"Usage:\n";
- std::cout<<" "<<argv[0]<<" [options]\n\n";
- }
- }
- }
- const char* commandline_option(int argc, char** argv, const char* opt, const char* def_val, bool required, const char* err_msg){
- char help[]="--help";
- for(int i=0;i<argc;i++){
- if(!strcmp(argv[i],help)){
- std::cout<<" "<<err_msg<<'\n';
- return def_val;
- }
- }
- for(int i=0;i<argc;i++){
- if(!strcmp(argv[i],opt)){
- return argv[(i+1)%argc];
- }
- }
- if(required){
- std::cout<<"Missing: required option\n"<<" "<<err_msg<<"\n\n";
- std::cout<<"To see usage options\n"<<" "<<argv[0]<<" --help\n\n";
- exit(0);
- }
- return def_val;
- }
- void commandline_option_end(int argc, char** argv){
- char help[]="--help";
- for(int i=0;i<argc;i++){
- if(!strcmp(argv[i],help)){
- std::cout<<"\n";
- exit(0);
- }
- }
- }
|