fmm_gll.cpp 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099
  1. #if 0
  2. #include <fmm_gll.hpp>
  3. #include <iostream>
  4. #include <mpi.h>
  5. #include <omp.h>
  6. #include <pvfmm_common.hpp>
  7. #include <fmm_cheb.hpp>
  8. #include <fmm_node.hpp>
  9. #include <fmm_tree.hpp>
  10. #include <cheb_utils.hpp>
  11. #include <vector.hpp>
  12. #include <cheb_node.hpp>
  13. typedef pvfmm::FMM_Node<pvfmm::Cheb_Node<double> > FMMNode_t;
  14. typedef pvfmm::FMM_Cheb<FMMNode_t> FMM_Mat_t;
  15. typedef pvfmm::FMM_Tree<FMM_Mat_t> FMM_Tree_t;
  16. typedef FMM_Mat_t::FMMData FMM_Data_t;
  17. extern "C" {
  18. void fmm_gll_init(FMM_GLL_t* fmm_data, int gll_order_, int cheb_order_, int multipole_order, MPI_Comm comm){
  19. int np, myrank;
  20. MPI_Comm_size(comm,&np);
  21. MPI_Comm_rank(comm, &myrank);
  22. fmm_data->comm=comm;
  23. fmm_data->gll_order=gll_order_-1;
  24. fmm_data->cheb_order=cheb_order_-1;
  25. fmm_data->multipole_order=multipole_order;
  26. int gll_order=fmm_data->gll_order;
  27. int cheb_order=fmm_data->cheb_order;
  28. { // Get GLL node points.
  29. fmm_data->gll_nodes=new std::vector<double>(gll_order+1);
  30. std::vector<double>& gll_nodes=*((std::vector<double>*)fmm_data->gll_nodes);
  31. std::vector<double> w(gll_order+1);
  32. pvfmm::gll_quadrature(gll_order, &gll_nodes[0], &w[0]);
  33. }
  34. for(int ker_indx=0;ker_indx<2;ker_indx++)
  35. { // Initialize Matrices
  36. FMM_Mat_t* fmm_mat;
  37. pvfmm::Kernel<double>* mykernel;
  38. FMM_Tree_t* mytree;
  39. if(ker_indx==0){
  40. pvfmm::Profile::Tic("Init:Biot-Savart",&fmm_data->comm,true,3);
  41. fmm_data->fmm_mat_biotsavart=new FMM_Mat_t;
  42. fmm_mat=((FMM_Mat_t*)fmm_data->fmm_mat_biotsavart);
  43. fmm_data->kernel_biotsavart=&pvfmm::ker_biot_savart;
  44. mykernel=(pvfmm::Kernel<double>*)fmm_data->kernel_biotsavart;
  45. fmm_data->tree_biotsavart=new FMM_Tree_t(fmm_data->comm);
  46. mytree=((FMM_Tree_t*)fmm_data->tree_biotsavart);
  47. }else{
  48. pvfmm::Profile::Tic("Init:LaplaceGrad",&fmm_data->comm,true,3);
  49. fmm_data->fmm_mat_laplace_grad=new FMM_Mat_t;
  50. fmm_mat=((FMM_Mat_t*)fmm_data->fmm_mat_laplace_grad);
  51. fmm_data->kernel_laplace_grad=&pvfmm::LaplaceKernel<double>::grad_ker();
  52. mykernel=(pvfmm::Kernel<double>*)fmm_data->kernel_laplace_grad;
  53. fmm_data->tree_laplace_grad=new FMM_Tree_t(fmm_data->comm);
  54. mytree=((FMM_Tree_t*)fmm_data->tree_laplace_grad);
  55. }
  56. //Various parameters.
  57. FMMNode_t::NodeData mydata;
  58. mydata.dim=COORD_DIM;
  59. mydata.data_dof=mykernel->ker_dim[0];
  60. mydata.max_pts=1; // Points per octant.
  61. mydata.cheb_deg=cheb_order;
  62. mydata.tol=1e-10;
  63. //Set source coordinates and values.
  64. //mydata.pt_coord=point_distrib(UnifGrid,np*8,fmm_data->comm);
  65. {
  66. size_t N=np*8;
  67. size_t NN=(size_t)pow((double)N,1.0/3.0);
  68. size_t N_total=NN*NN*NN;
  69. size_t start= myrank *N_total/np;
  70. size_t end =(myrank+1)*N_total/np;
  71. mydata.pt_coord.Resize((end-start)*3);
  72. for(size_t i=start;i<end;i++){
  73. mydata.pt_coord[(i-start)*3+0]=(((double)((i/ 1 )%NN)+0.5)/NN);
  74. mydata.pt_coord[(i-start)*3+1]=(((double)((i/ NN )%NN)+0.5)/NN);
  75. mydata.pt_coord[(i-start)*3+2]=(((double)((i/(NN*NN))%NN)+0.5)/NN);
  76. }
  77. }
  78. mydata.pt_value.Resize((mydata.pt_coord.Dim()/3)*mydata.data_dof);
  79. mydata.cheb_coord=mydata.pt_coord;
  80. mydata.cheb_value=mydata.pt_value;
  81. mydata.input_fn=NULL;
  82. //Initialize FMM_Mat.
  83. pvfmm::Profile::Tic("InitMat",&fmm_data->comm,true,5);
  84. fmm_mat->Initialize(fmm_data->multipole_order,mydata.cheb_deg,fmm_data->comm,mykernel);
  85. pvfmm::Profile::Toc();
  86. //Create Tree and initialize with input data.
  87. pvfmm::Profile::Tic("Tree Construction",&fmm_data->comm,true,5);
  88. mytree->Initialize(&mydata);
  89. pvfmm::Profile::Toc();
  90. //Initialize FMM Tree
  91. pvfmm::Profile::Tic("Init FMM Tree",&fmm_data->comm,true,5);
  92. pvfmm::BoundaryType bndry=pvfmm::Periodic;//pvfmm::FreeSpace;
  93. mytree->InitFMM_Tree(false,bndry);
  94. pvfmm::Profile::Toc();
  95. #ifndef NDEBUG
  96. //Check Tree.
  97. pvfmm::Profile::Tic("Check Tree",&fmm_data->comm,true,5);
  98. mytree->CheckTree();
  99. pvfmm::Profile::Toc();
  100. pvfmm::Profile::Tic("FMM Eval",&fmm_data->comm,true,1);
  101. mytree->SetupFMM(fmm_mat);
  102. mytree->RunFMM();
  103. pvfmm::Profile::Toc();
  104. #endif
  105. pvfmm::Profile::Toc();
  106. }
  107. #ifndef NDEBUG
  108. pvfmm::Profile::print(&fmm_data->comm);
  109. #endif
  110. }
  111. void fmm_gll_free(FMM_GLL_t* fmm_data){
  112. fmm_data->gll_order=-1;
  113. fmm_data->cheb_order=-1;
  114. fmm_data->multipole_order=-1;
  115. fmm_data->kernel_biotsavart=NULL;
  116. delete (FMM_Mat_t*)fmm_data->fmm_mat_biotsavart; fmm_data->fmm_mat_biotsavart=NULL;
  117. delete (FMM_Tree_t*)fmm_data->tree_biotsavart; fmm_data->tree_biotsavart=NULL;
  118. fmm_data->kernel_laplace_grad=NULL;
  119. delete (FMM_Mat_t*)fmm_data->fmm_mat_laplace_grad; fmm_data->fmm_mat_laplace_grad=NULL;
  120. delete (FMM_Tree_t*)fmm_data->tree_laplace_grad; fmm_data->tree_laplace_grad=NULL;
  121. delete (std::vector<double>*)fmm_data->gll_nodes; fmm_data->gll_nodes=NULL;
  122. }
  123. void fmm_gll_run(FMM_GLL_t* fmm_data, size_t node_cnt, double* node_coord, unsigned char* node_depth, double** node_gll_data){
  124. int gll_order=fmm_data->gll_order;
  125. int cheb_order=fmm_data->cheb_order;
  126. FMM_Mat_t& fmm_mat=*((FMM_Mat_t*)fmm_data->fmm_mat_biotsavart);
  127. FMM_Tree_t& mytree=*((FMM_Tree_t*)fmm_data->tree_biotsavart);
  128. pvfmm::Kernel<double>* mykernel=((pvfmm::Kernel<double>*)fmm_data->kernel_biotsavart);
  129. pvfmm::BoundaryType bndry=pvfmm::Periodic;//pvfmm::FreeSpace;
  130. // Find out number of OMP thereads.
  131. int omp_p=omp_get_max_threads();
  132. pvfmm::Profile::Tic("Construct Tree",&fmm_data->comm,true,1);
  133. //Construct pvfmm::MortonId from node_coord and node_depth.
  134. std::vector<pvfmm::MortonId> nodes(node_cnt);
  135. double s=0.25/(1UL<<MAX_DEPTH);
  136. for(size_t i=0;i<node_cnt;i++)
  137. nodes[i]=pvfmm::MortonId(node_coord[i*3+0]+s,node_coord[i*3+1]+s,node_coord[i*3+2]+s,node_depth[i]);
  138. // Construct tree from morton id.
  139. size_t node_iter=0;
  140. std::vector<FMMNode_t*> tree_nodes;
  141. FMMNode_t* node=static_cast<FMMNode_t*>(mytree.PreorderFirst());
  142. while(node!=NULL && node_iter<node_cnt){
  143. pvfmm::MortonId mid=node->GetMortonId();
  144. if(mid.isAncestor(nodes[node_iter])){
  145. node->SetGhost(false);
  146. node->Subdivide();
  147. }else{
  148. node->Truncate();
  149. node->SetGhost(mid!=nodes[node_iter]);
  150. if(mid==nodes[node_iter]){
  151. tree_nodes.push_back(node);
  152. node_iter++;
  153. }
  154. }
  155. node->DataDOF()=mykernel->ker_dim[0];
  156. node=static_cast<FMMNode_t*>(mytree.PreorderNxt(node));
  157. }
  158. assert(node_iter==node_cnt);
  159. while(node!=NULL){
  160. node->Truncate();
  161. node->SetGhost(true);
  162. node->DataDOF()=mykernel->ker_dim[0];
  163. node=static_cast<FMMNode_t*>(mytree.PreorderNxt(node));
  164. }
  165. // Add GLL data
  166. size_t n_gll_coeff=(gll_order+1)*(gll_order+2)*(gll_order+3)/6;
  167. size_t n_cheb_coeff=(cheb_order+1)*(cheb_order+2)*(cheb_order+3)/6;
  168. #pragma omp parallel for
  169. for(int i=0;i<omp_p;i++){
  170. pvfmm::Vector<double> buff;
  171. size_t start=i*node_cnt/omp_p;
  172. size_t end=(i+1)*node_cnt/omp_p;
  173. for(size_t j=start;j<end;j++){
  174. FMMNode_t* node=tree_nodes[j];
  175. node->DataDOF()=mykernel->ker_dim[0];
  176. buff.Resize(n_gll_coeff*node->DataDOF());
  177. pvfmm::gll2cheb<double,double>(&(node_gll_data[j][0]), gll_order, node->DataDOF(), &(buff[0]));
  178. node->ChebData().Resize(n_cheb_coeff*node->DataDOF());
  179. double* cheb_out=&(node->ChebData()[0]);
  180. int indx_in=0;
  181. int indx_out=0;
  182. for(int k_=0;k_<node->DataDOF();k_++)
  183. for(int k0=0;k0 <=gll_order;k0++)
  184. for(int k1=0;k0+k1 <=gll_order;k1++)
  185. for(int k2=0;k0+k1+k2<=gll_order;k2++){
  186. if(k0+k1+k2<=cheb_order){
  187. cheb_out[indx_out]=buff[indx_in];
  188. indx_out++;
  189. }
  190. indx_in++;
  191. }
  192. }
  193. }
  194. // Set avg to zero for Periodic boundary condition
  195. std::vector<pvfmm::Vector<double> > cheb_w(node_cnt); //Copy vorticity.
  196. if(bndry==pvfmm::Periodic){
  197. std::vector<double> avg_w(mykernel->ker_dim[0],0);
  198. for(size_t j=0;j<node_cnt;j++){
  199. FMMNode_t* node=tree_nodes[j];
  200. for(int k=0;k<node->DataDOF();k++)
  201. avg_w[k]+=node->ChebData()[k*n_cheb_coeff]*pow(0.5,node->Depth()*3);
  202. }
  203. std::vector<double> glb_avg_w(mykernel->ker_dim[0],0);
  204. MPI_Allreduce(&avg_w[0], &glb_avg_w[0], mykernel->ker_dim[0], MPI_DOUBLE, MPI_SUM, fmm_data->comm);
  205. for(size_t j=0;j<node_cnt;j++){
  206. FMMNode_t* node=tree_nodes[j];
  207. for(int k=0;k<node->DataDOF();k++)
  208. node->ChebData()[k*n_cheb_coeff]-=glb_avg_w[k];
  209. cheb_w[j]=node->ChebData();
  210. }
  211. }
  212. pvfmm::Profile::Toc();
  213. #ifndef NDEBUG
  214. double max_w=0; // |w|_inf
  215. int n_gll_pts=(gll_order+1)*(gll_order+1)*(gll_order+1)*mykernel->ker_dim[0];
  216. for(size_t j=0;j<node_cnt;j++)
  217. for(int k=0; k<n_gll_pts; k++)
  218. if(max_w<fabs(node_gll_data[j][k]))
  219. max_w=node_gll_data[j][k];
  220. //Check Tree.
  221. pvfmm::Profile::Tic("Check Tree",&fmm_data->comm,true,1);
  222. mytree.CheckTree();
  223. pvfmm::Profile::Toc();
  224. //Report divergence.
  225. gll_div(fmm_data, node_cnt, node_coord, node_depth, node_gll_data);
  226. #endif
  227. //FMM Evaluate.
  228. pvfmm::Profile::Tic("FMM Eval",&fmm_data->comm,true,1);
  229. mytree.SetupFMM(&fmm_mat);
  230. mytree.RunFMM();
  231. pvfmm::Profile::Toc();
  232. //Copy FMM output to tree Data.
  233. mytree.Copy_FMMOutput();
  234. //Set average velocity to zero.
  235. if(bndry==pvfmm::Periodic){
  236. std::vector<double> avg_v(mykernel->ker_dim[1],0);
  237. for(size_t j=0;j<node_cnt;j++){
  238. FMMNode_t* node=tree_nodes[j];
  239. for(int k=0;k<node->DataDOF();k++)
  240. avg_v[k]+=node->ChebData()[k*n_cheb_coeff]*pow(0.5,node->Depth()*3);
  241. }
  242. std::vector<double> glb_avg_v(mykernel->ker_dim[1],0);
  243. MPI_Allreduce(&avg_v[0], &glb_avg_v[0], mykernel->ker_dim[1], MPI_DOUBLE, MPI_SUM, fmm_data->comm);
  244. //std::cout<<glb_avg_v[0]<<' '<<glb_avg_v[1]<<' '<<glb_avg_v[2]<<'\n';
  245. for(size_t j=0;j<node_cnt;j++){
  246. FMMNode_t* node=tree_nodes[j];
  247. for(int k=0;k<node->DataDOF();k++)
  248. node->ChebData()[k*n_cheb_coeff]-=glb_avg_v[k];
  249. }
  250. }
  251. #ifndef NDEBUG
  252. // Confirm that 2:1 balance did not change the number of tree nodes.
  253. size_t new_cnt=0;
  254. node=static_cast<FMMNode_t*>(mytree.PreorderFirst());
  255. while(node!=NULL){
  256. if(node->IsLeaf() && !node->IsGhost()) new_cnt++;
  257. node=static_cast<FMMNode_t*>(mytree.PreorderNxt(node));
  258. }
  259. assert(node_cnt==new_cnt);
  260. #endif
  261. // Construct GLL data from FMM output.
  262. pvfmm::Profile::Tic("Output",&fmm_data->comm,true,1);
  263. node=static_cast<FMMNode_t*>(mytree.PreorderFirst());
  264. std::vector<double> corner_val(8*mykernel->ker_dim[1],0);
  265. std::vector<double> corner_val_loc(8*mykernel->ker_dim[1],0);
  266. std::vector<double> corner_coord(2); corner_coord[0]=0.0; corner_coord[1]=1.0;
  267. node->ReadVal(corner_coord,corner_coord,corner_coord, &corner_val_loc[0], false);
  268. MPI_Allreduce(&corner_val_loc[0], &corner_val[0], 8*mykernel->ker_dim[1], MPI_DOUBLE, MPI_SUM, fmm_data->comm);
  269. std::vector<double> dx(3*mykernel->ker_dim[1],0);
  270. for(int i=0;i<mykernel->ker_dim[1];i++){
  271. dx[0+i*3]=corner_val[1+8*i]-corner_val[0+8*i];
  272. dx[1+i*3]=corner_val[2+8*i]-corner_val[0+8*i];
  273. dx[2+i*3]=corner_val[4+8*i]-corner_val[0+8*i];
  274. }
  275. std::vector<double>& gll_nodes=*((std::vector<double>*)fmm_data->gll_nodes);
  276. #pragma omp parallel for
  277. for(int i=0;i<omp_p;i++){
  278. size_t start=i*node_cnt/omp_p;
  279. size_t end=(i+1)*node_cnt/omp_p;
  280. for(size_t j=start;j<end;j++){
  281. pvfmm::Vector<double> gll_data((gll_order+1)*(gll_order+1)*(gll_order+1)*mykernel->ker_dim[1],&node_gll_data[j][0],false);
  282. cheb_eval(tree_nodes[j]->ChebData(), cheb_order, gll_nodes, gll_nodes, gll_nodes, gll_data);
  283. size_t indx=0;
  284. double* coord=tree_nodes[j]->Coord();
  285. double s=pow(0.5,tree_nodes[j]->Depth()+1);
  286. for(int l=0;l<mykernel->ker_dim[1];l++)
  287. for(int k2=0;k2<=gll_order;k2++)
  288. for(int k1=0;k1<=gll_order;k1++)
  289. for(int k0=0;k0<=gll_order;k0++){
  290. gll_data[indx]-=(coord[0]+(1.0+gll_nodes[k0])*s-0.5)*dx[0+l*3]+
  291. (coord[1]+(1.0+gll_nodes[k1])*s-0.5)*dx[1+l*3]+
  292. (coord[2]+(1.0+gll_nodes[k2])*s-0.5)*dx[2+l*3];
  293. indx++;
  294. }
  295. }
  296. }
  297. pvfmm::Profile::Toc();
  298. #ifndef NDEBUG
  299. //Compute |v|_inf
  300. double max_v=0;
  301. for(size_t j=0;j<node_cnt;j++){
  302. pvfmm::Vector<double> gll_data((gll_order+1)*(gll_order+1)*(gll_order+1)*mykernel->ker_dim[1],&node_gll_data[j][0],false);
  303. for(size_t k=0;k<gll_data.Dim();k++)
  304. if(max_v<fabs(gll_data[k])) max_v=gll_data[k];
  305. }
  306. //Compute |v^2|_l1
  307. double l2=0;
  308. pvfmm::Profile::Tic("L2-norm",&fmm_data->comm,true,1);
  309. std::vector<double> cheb_pts=pvfmm::cheb_nodes<double>(cheb_order*2, 1);
  310. pvfmm::Vector<double> integ_data((2*cheb_order+1)*(2*cheb_order+1)*(2*cheb_order+1)*mykernel->ker_dim[1]);
  311. pvfmm::Vector<double> integ_coeff((2*cheb_order+1)*(2*cheb_order+1)*(2*cheb_order+1)*mykernel->ker_dim[1]);
  312. for(size_t j=0;j<node_cnt;j++){
  313. double s=pow(0.5,tree_nodes[j]->Depth()*3);
  314. cheb_eval(tree_nodes[j]->ChebData(), cheb_order, cheb_pts, cheb_pts, cheb_pts, integ_data);
  315. //Make average zero.
  316. {
  317. size_t indx=0;
  318. double* coord=tree_nodes[j]->Coord();
  319. double s=pow(0.5,tree_nodes[j]->Depth()+1);
  320. for(int l=0;l<mykernel->ker_dim[1];l++)
  321. for(int k2=0;k2<=2*cheb_order;k2++)
  322. for(int k1=0;k1<=2*cheb_order;k1++)
  323. for(int k0=0;k0<=2*cheb_order;k0++){
  324. integ_data[indx]-=(coord[0]+(1.0+cheb_pts[k0])*s-0.5)*dx[0+l*3]+
  325. (coord[1]+(1.0+cheb_pts[k1])*s-0.5)*dx[1+l*3]+
  326. (coord[2]+(1.0+cheb_pts[k2])*s-0.5)*dx[2+l*3];
  327. indx++;
  328. }
  329. }
  330. for(size_t i=0;i<integ_data.Dim();i++) integ_data[i]*=integ_data[i];
  331. pvfmm::cheb_approx<double,double>(&integ_data[0], 2*cheb_order, mykernel->ker_dim[1], &integ_coeff[0]);
  332. l2+=s*integ_coeff[0*(2*cheb_order+1)*(2*cheb_order+2)*(2*cheb_order+3)/6];
  333. l2+=s*integ_coeff[1*(2*cheb_order+1)*(2*cheb_order+2)*(2*cheb_order+3)/6];
  334. l2+=s*integ_coeff[2*(2*cheb_order+1)*(2*cheb_order+2)*(2*cheb_order+3)/6];
  335. }
  336. pvfmm::Profile::Toc();
  337. //Compute |w - curl v|_inf
  338. double w_err=0;
  339. pvfmm::Profile::Tic("Curl",&fmm_data->comm,true,1);
  340. pvfmm::Vector<double> buff0(n_cheb_coeff*mykernel->ker_dim[0]);
  341. pvfmm::Vector<double> buff1((gll_order+1)*(gll_order+1)*(gll_order+1)*mykernel->ker_dim[0]);
  342. for(size_t j=0;j<node_cnt;j++){
  343. tree_nodes[j]->Curl();
  344. for(size_t k=0;k<cheb_w[j].Dim();k++)
  345. buff0[k]=tree_nodes[j]->ChebData()[k]-cheb_w[j][k];
  346. cheb_eval(buff0, cheb_order, gll_nodes, gll_nodes, gll_nodes, buff1);
  347. for(size_t k=0;k<buff1.Dim();k++)
  348. if(w_err<fabs(buff1[k])) w_err=fabs(buff1[k]);
  349. }
  350. pvfmm::Profile::Toc();
  351. //Display norms.
  352. double glb_l2=0;
  353. double glb_max_v=0;
  354. double glb_max_w=0;
  355. double glb_max_w_err=0;
  356. MPI_Allreduce(&l2, &glb_l2, 1, MPI_DOUBLE, MPI_SUM, fmm_data->comm);
  357. MPI_Allreduce(&max_v, &glb_max_v, 1, MPI_DOUBLE, MPI_MAX, fmm_data->comm);
  358. MPI_Allreduce(&max_w, &glb_max_w, 1, MPI_DOUBLE, MPI_MAX, fmm_data->comm);
  359. MPI_Allreduce(&w_err, &glb_max_w_err, 1, MPI_DOUBLE, MPI_MAX, fmm_data->comm);
  360. int myrank;
  361. MPI_Comm_rank(fmm_data->comm, &myrank);
  362. if(!myrank) std::cout<<"[fmm] |v^2|_1 = "<<glb_l2<<'\n';;
  363. if(!myrank) std::cout<<"[fmm] |v|_inf = "<<glb_max_v<<'\n';;
  364. if(!myrank) std::cout<<"[fmm] |w|_inf = "<<glb_max_w<<'\n';;
  365. if(!myrank) std::cout<<"[fmm] |w - curl v|_inf = "<<glb_max_w_err<<'\n';
  366. #endif
  367. #ifndef NDEBUG
  368. pvfmm::Profile::print(&fmm_data->comm);
  369. #endif
  370. }
  371. void fmm_gll_laplace_grad(FMM_GLL_t* fmm_data, size_t node_cnt, double* node_coord, unsigned char* node_depth, double** node_gll_data){
  372. int gll_order=fmm_data->gll_order;
  373. int cheb_order=fmm_data->cheb_order;
  374. FMM_Mat_t& fmm_mat=*((FMM_Mat_t*)fmm_data->fmm_mat_laplace_grad);
  375. FMM_Tree_t& mytree=*((FMM_Tree_t*)fmm_data->tree_laplace_grad);
  376. pvfmm::Kernel<double>* mykernel=((pvfmm::Kernel<double>*)fmm_data->kernel_laplace_grad);
  377. pvfmm::BoundaryType bndry=pvfmm::Periodic;//pvfmm::FreeSpace;
  378. // Find out number of OMP thereads.
  379. int omp_p=omp_get_max_threads();
  380. pvfmm::Profile::Tic("Construct Tree",&fmm_data->comm,true,1);
  381. //Construct pvfmm::MortonId from node_coord and node_depth.
  382. std::vector<pvfmm::MortonId> nodes(node_cnt);
  383. double s=0.25/(1UL<<MAX_DEPTH);
  384. for(size_t i=0;i<node_cnt;i++)
  385. nodes[i]=pvfmm::MortonId(node_coord[i*3+0]+s,node_coord[i*3+1]+s,node_coord[i*3+2]+s,node_depth[i]);
  386. // Construct tree from morton id.
  387. size_t node_iter=0;
  388. std::vector<FMMNode_t*> tree_nodes;
  389. FMMNode_t* node=static_cast<FMMNode_t*>(mytree.PreorderFirst());
  390. while(node!=NULL && node_iter<node_cnt){
  391. pvfmm::MortonId mid=node->GetMortonId();
  392. if(mid.isAncestor(nodes[node_iter])){
  393. node->SetGhost(false);
  394. node->Subdivide();
  395. }else{
  396. node->Truncate();
  397. node->SetGhost(mid!=nodes[node_iter]);
  398. if(mid==nodes[node_iter]){
  399. tree_nodes.push_back(node);
  400. node_iter++;
  401. }
  402. }
  403. node->DataDOF()=mykernel->ker_dim[0];
  404. node=static_cast<FMMNode_t*>(mytree.PreorderNxt(node));
  405. }
  406. assert(node_iter==node_cnt);
  407. while(node!=NULL){
  408. node->Truncate();
  409. node->SetGhost(true);
  410. node->DataDOF()=mykernel->ker_dim[0];
  411. node=static_cast<FMMNode_t*>(mytree.PreorderNxt(node));
  412. }
  413. // Add GLL data
  414. size_t n_gll_coeff=(gll_order+1)*(gll_order+2)*(gll_order+3)/6;
  415. size_t n_cheb_coeff=(cheb_order+1)*(cheb_order+2)*(cheb_order+3)/6;
  416. #pragma omp parallel for
  417. for(int i=0;i<omp_p;i++){
  418. pvfmm::Vector<double> buff;
  419. size_t start=i*node_cnt/omp_p;
  420. size_t end=(i+1)*node_cnt/omp_p;
  421. for(size_t j=start;j<end;j++){
  422. FMMNode_t* node=tree_nodes[j];
  423. node->DataDOF()=mykernel->ker_dim[0];
  424. buff.Resize(n_gll_coeff*node->DataDOF());
  425. pvfmm::gll2cheb<double,double>(&(node_gll_data[j][0]), gll_order, node->DataDOF(), &(buff[0]));
  426. node->ChebData().Resize(n_cheb_coeff*node->DataDOF());
  427. double* cheb_out=&(node->ChebData()[0]);
  428. int indx_in=0;
  429. int indx_out=0;
  430. for(int k_=0;k_<node->DataDOF();k_++)
  431. for(int k0=0;k0 <=gll_order;k0++)
  432. for(int k1=0;k0+k1 <=gll_order;k1++)
  433. for(int k2=0;k0+k1+k2<=gll_order;k2++){
  434. if(k0+k1+k2<=cheb_order){
  435. cheb_out[indx_out]=buff[indx_in];
  436. indx_out++;
  437. }
  438. indx_in++;
  439. }
  440. }
  441. }
  442. // Set avg to zero for Periodic boundary condition
  443. std::vector<pvfmm::Vector<double> > cheb_rho(node_cnt); //Copy vorticity.
  444. if(bndry==pvfmm::Periodic){
  445. std::vector<double> avg_rho(mykernel->ker_dim[0],0);
  446. for(size_t j=0;j<node_cnt;j++){
  447. FMMNode_t* node=tree_nodes[j];
  448. for(int k=0;k<node->DataDOF();k++)
  449. avg_rho[k]+=node->ChebData()[k*n_cheb_coeff]*pow(0.5,node->Depth()*3);
  450. }
  451. std::vector<double> glb_avg_rho(mykernel->ker_dim[0],0);
  452. MPI_Allreduce(&avg_rho[0], &glb_avg_rho[0], mykernel->ker_dim[0], MPI_DOUBLE, MPI_SUM, fmm_data->comm);
  453. for(size_t j=0;j<node_cnt;j++){
  454. FMMNode_t* node=tree_nodes[j];
  455. for(int k=0;k<node->DataDOF();k++)
  456. node->ChebData()[k*n_cheb_coeff]-=glb_avg_rho[k];
  457. cheb_rho[j]=node->ChebData();
  458. }
  459. }
  460. pvfmm::Profile::Toc();
  461. #ifndef NDEBUG
  462. //Check Tree.
  463. pvfmm::Profile::Tic("Check Tree",&fmm_data->comm,true,1);
  464. mytree.CheckTree();
  465. pvfmm::Profile::Toc();
  466. #endif
  467. //FMM Evaluate.
  468. pvfmm::Profile::Tic("FMM Eval",&fmm_data->comm,true,1);
  469. mytree.SetupFMM(&fmm_mat);
  470. mytree.RunFMM();
  471. pvfmm::Profile::Toc();
  472. //Copy FMM output to tree Data.
  473. mytree.Copy_FMMOutput();
  474. //Set average (grad phi) to zero. (Optional)
  475. if(bndry==pvfmm::Periodic){
  476. std::vector<double> avg_gradphi(mykernel->ker_dim[1],0);
  477. for(size_t j=0;j<node_cnt;j++){
  478. FMMNode_t* node=tree_nodes[j];
  479. for(int k=0;k<node->DataDOF();k++)
  480. avg_gradphi[k]+=node->ChebData()[k*n_cheb_coeff]*pow(0.5,node->Depth()*3);
  481. }
  482. std::vector<double> glb_avg_gradphi(mykernel->ker_dim[1],0);
  483. MPI_Allreduce(&avg_gradphi[0], &glb_avg_gradphi[0], mykernel->ker_dim[1], MPI_DOUBLE, MPI_SUM, fmm_data->comm);
  484. for(size_t j=0;j<node_cnt;j++){
  485. FMMNode_t* node=tree_nodes[j];
  486. for(int k=0;k<node->DataDOF();k++)
  487. node->ChebData()[k*n_cheb_coeff]-=glb_avg_gradphi[k];
  488. }
  489. }
  490. #ifndef NDEBUG
  491. // Confirm that 2:1 balance did not change the number of tree nodes.
  492. size_t new_cnt=0;
  493. node=static_cast<FMMNode_t*>(mytree.PreorderFirst());
  494. while(node!=NULL){
  495. if(node->IsLeaf() && !node->IsGhost()) new_cnt++;
  496. node=static_cast<FMMNode_t*>(mytree.PreorderNxt(node));
  497. }
  498. assert(node_cnt==new_cnt);
  499. #endif
  500. // Construct GLL data from FMM output.
  501. pvfmm::Profile::Tic("Output",&fmm_data->comm,true,1);
  502. std::vector<double>& gll_nodes=*((std::vector<double>*)fmm_data->gll_nodes);
  503. #pragma omp parallel for
  504. for(int i=0;i<omp_p;i++){
  505. pvfmm::Vector<double> buff;
  506. pvfmm::Vector<double> buff1;
  507. size_t start=i*node_cnt/omp_p;
  508. size_t end=(i+1)*node_cnt/omp_p;
  509. for(size_t j=start;j<end;j++){
  510. pvfmm::Vector<double> gll_data((gll_order+1)*(gll_order+1)*(gll_order+1)*mykernel->ker_dim[1],&node_gll_data[j][0],false);
  511. cheb_eval(tree_nodes[j]->ChebData(), cheb_order, gll_nodes, gll_nodes, gll_nodes, gll_data);
  512. }
  513. }
  514. pvfmm::Profile::Toc();
  515. #ifndef NDEBUG
  516. //Compute |rho - div grad phi|_inf
  517. double resid_err=0;
  518. pvfmm::Profile::Tic("Divergence",&fmm_data->comm,true,1);
  519. pvfmm::Vector<double> buff0(n_cheb_coeff*mykernel->ker_dim[0]);
  520. pvfmm::Vector<double> buff1((gll_order+1)*(gll_order+1)*(gll_order+1)*mykernel->ker_dim[0]);
  521. for(size_t j=0;j<node_cnt;j++){
  522. tree_nodes[j]->Divergence();
  523. for(size_t k=0;k<cheb_rho[j].Dim();k++)
  524. buff0[k]=tree_nodes[j]->ChebData()[k]+cheb_rho[j][k];
  525. cheb_eval(buff0, cheb_order, gll_nodes, gll_nodes, gll_nodes, buff1);
  526. for(size_t k=0;k<buff1.Dim();k++)
  527. if(resid_err<fabs(buff1[k])) resid_err=fabs(buff1[k]);
  528. }
  529. pvfmm::Profile::Toc();
  530. //Display norms.
  531. double glb_max_resid_err=0;
  532. MPI_Allreduce(&resid_err, &glb_max_resid_err, 1, MPI_DOUBLE, MPI_MAX, fmm_data->comm);
  533. int myrank;
  534. MPI_Comm_rank(fmm_data->comm, &myrank);
  535. if(!myrank) std::cout<<"[fmm] |rho - div grad phi|_inf = "<<glb_max_resid_err<<'\n';
  536. #endif
  537. #ifndef NDEBUG
  538. pvfmm::Profile::print(&fmm_data->comm);
  539. #endif
  540. }
  541. void gll_div(FMM_GLL_t* fmm_data, size_t node_cnt, double* node_coord, unsigned char* node_depth, double** node_gll_data){
  542. int myrank;
  543. MPI_Comm_rank(fmm_data->comm, &myrank);
  544. int dim=3;
  545. int gll_order=fmm_data->gll_order;
  546. int cheb_order=fmm_data->cheb_order;
  547. pvfmm::Kernel<double>* mykernel=((pvfmm::Kernel<double>*)fmm_data->kernel_biotsavart);
  548. // Find out number of OMP thereads.
  549. int omp_p=omp_get_max_threads();
  550. //Compute divergence.
  551. pvfmm::Profile::Tic("Div",&fmm_data->comm,true,1);
  552. size_t n_gll_coeff=(gll_order+1)*(gll_order+2)*(gll_order+3)/6;
  553. size_t n_cheb_coeff=(cheb_order+1)*(cheb_order+2)*(cheb_order+3)/6;
  554. std::vector<double>& gll_nodes=*((std::vector<double>*)fmm_data->gll_nodes);
  555. #pragma omp parallel for
  556. for(int i=0;i<omp_p;i++){
  557. size_t start=i*node_cnt/omp_p;
  558. size_t end=(i+1)*node_cnt/omp_p;
  559. pvfmm::Vector<double> buff(n_gll_coeff*mykernel->ker_dim[0]);
  560. pvfmm::Vector<double> cheb_coeff(n_cheb_coeff*mykernel->ker_dim[0]);
  561. pvfmm::Vector<double> cheb_div_coeff(n_cheb_coeff*mykernel->ker_dim[0]/dim);
  562. for(size_t j=start;j<end;j++){
  563. //Build Chebyshev approximation.
  564. pvfmm::gll2cheb<double,double>(&(node_gll_data[j][0]), gll_order, mykernel->ker_dim[0], &(buff[0]));
  565. //Truncate higher order terms.
  566. int indx_in=0;
  567. int indx_out=0;
  568. for(int k_=0;k_<mykernel->ker_dim[0];k_++)
  569. for(int k0=0;k0 <=gll_order;k0++)
  570. for(int k1=0;k0+k1 <=gll_order;k1++)
  571. for(int k2=0;k0+k1+k2<=gll_order;k2++){
  572. if(k0+k1+k2<=cheb_order){
  573. cheb_coeff[indx_out]=buff[indx_in];
  574. indx_out++;
  575. }
  576. indx_in++;
  577. }
  578. //Compute Divergence.
  579. double scale=(1<<node_depth[j]);
  580. for(int k=0;k<mykernel->ker_dim[0];k=k+dim)
  581. pvfmm::cheb_div<double>(&cheb_coeff[n_cheb_coeff*k], cheb_order, &cheb_div_coeff[n_cheb_coeff*(k/dim)]);
  582. for(size_t i=0;i<cheb_div_coeff.Dim();i++) cheb_div_coeff[i]*=scale;
  583. pvfmm::Vector<double> gll_data((gll_order+1)*(gll_order+1)*(gll_order+1)*1,&node_gll_data[j][0],false);
  584. cheb_eval(cheb_div_coeff, cheb_order, gll_nodes, gll_nodes, gll_nodes, gll_data);
  585. }
  586. }
  587. pvfmm::Profile::Toc();
  588. #ifndef NDEBUG
  589. //Maximum divergence.
  590. double w_div=0;
  591. for(size_t j=0;j<node_cnt;j++){
  592. pvfmm::Vector<double> gll_data((gll_order+1)*(gll_order+1)*(gll_order+1)*1,&node_gll_data[j][0],false);
  593. for(size_t k=0;k<gll_data.Dim();k++)
  594. if(w_div<fabs(gll_data[k])) w_div=fabs(gll_data[k]);
  595. }
  596. double glb_w_div=0;
  597. MPI_Allreduce(&w_div, &glb_w_div, 1, MPI_DOUBLE, MPI_MAX, fmm_data->comm);
  598. if(!myrank) std::cout<<"[fmm] |div w|_inf = "<<glb_w_div<<'\n';
  599. #endif
  600. }
  601. void gll_divfree(FMM_GLL_t* fmm_data, size_t node_cnt, double* node_coord, unsigned char* node_depth, double** node_gll_data){
  602. int gll_order=fmm_data->gll_order;
  603. int cheb_order=fmm_data->cheb_order;
  604. FMM_Mat_t& fmm_mat=*((FMM_Mat_t*)fmm_data->fmm_mat_biotsavart);
  605. FMM_Tree_t& mytree=*((FMM_Tree_t*)fmm_data->tree_biotsavart);
  606. pvfmm::Kernel<double>* mykernel=((pvfmm::Kernel<double>*)fmm_data->kernel_biotsavart);
  607. pvfmm::BoundaryType bndry=pvfmm::Periodic;//pvfmm::FreeSpace;
  608. // Find out number of OMP thereads.
  609. int omp_p=omp_get_max_threads();
  610. pvfmm::Profile::Tic("Construct Tree",&fmm_data->comm,true,1);
  611. //Construct pvfmm::MortonId from node_coord and node_depth.
  612. std::vector<pvfmm::MortonId> nodes(node_cnt);
  613. double s=0.25/(1UL<<MAX_DEPTH);
  614. for(size_t i=0;i<node_cnt;i++)
  615. nodes[i]=pvfmm::MortonId(node_coord[i*3+0]+s,node_coord[i*3+1]+s,node_coord[i*3+2]+s,node_depth[i]);
  616. // Construct tree from morton id.
  617. size_t node_iter=0;
  618. std::vector<FMMNode_t*> tree_nodes;
  619. FMMNode_t* node=static_cast<FMMNode_t*>(mytree.PreorderFirst());
  620. while(node!=NULL && node_iter<node_cnt){
  621. pvfmm::MortonId mid=node->GetMortonId();
  622. if(mid.isAncestor(nodes[node_iter])){
  623. node->SetGhost(false);
  624. node->Subdivide();
  625. }else{
  626. node->Truncate();
  627. node->SetGhost(mid!=nodes[node_iter]);
  628. if(mid==nodes[node_iter]){
  629. tree_nodes.push_back(node);
  630. node_iter++;
  631. }
  632. }
  633. node->DataDOF()=mykernel->ker_dim[0];
  634. node=static_cast<FMMNode_t*>(mytree.PreorderNxt(node));
  635. }
  636. assert(node_iter==node_cnt);
  637. while(node!=NULL){
  638. node->Truncate();
  639. node->SetGhost(true);
  640. node->DataDOF()=mykernel->ker_dim[0];
  641. node=static_cast<FMMNode_t*>(mytree.PreorderNxt(node));
  642. }
  643. // Add GLL data
  644. size_t n_gll_coeff=(gll_order+1)*(gll_order+2)*(gll_order+3)/6;
  645. size_t n_cheb_coeff=(cheb_order+1)*(cheb_order+2)*(cheb_order+3)/6;
  646. #pragma omp parallel for
  647. for(int i=0;i<omp_p;i++){
  648. pvfmm::Vector<double> buff;
  649. size_t start=i*node_cnt/omp_p;
  650. size_t end=(i+1)*node_cnt/omp_p;
  651. for(size_t j=start;j<end;j++){
  652. FMMNode_t* node=tree_nodes[j];
  653. node->DataDOF()=mykernel->ker_dim[0];
  654. buff.Resize(n_gll_coeff*node->DataDOF());
  655. pvfmm::gll2cheb<double,double>(&(node_gll_data[j][0]), gll_order, node->DataDOF(), &(buff[0]));
  656. node->ChebData().Resize(n_cheb_coeff*node->DataDOF());
  657. double* cheb_out=&(node->ChebData()[0]);
  658. int indx_in=0;
  659. int indx_out=0;
  660. for(int k_=0;k_<node->DataDOF();k_++)
  661. for(int k0=0;k0 <=gll_order;k0++)
  662. for(int k1=0;k0+k1 <=gll_order;k1++)
  663. for(int k2=0;k0+k1+k2<=gll_order;k2++){
  664. if(k0+k1+k2<=cheb_order){
  665. cheb_out[indx_out]=buff[indx_in];
  666. indx_out++;
  667. }
  668. indx_in++;
  669. }
  670. }
  671. }
  672. // Set avg to zero for Periodic boundary condition
  673. if(bndry==pvfmm::Periodic){
  674. std::vector<double> avg_w(mykernel->ker_dim[0],0);
  675. for(size_t j=0;j<node_cnt;j++){
  676. FMMNode_t* node=tree_nodes[j];
  677. for(int k=0;k<node->DataDOF();k++)
  678. avg_w[k]+=node->ChebData()[k*n_cheb_coeff]*pow(0.5,node->Depth()*3);
  679. }
  680. std::vector<double> glb_avg_w(mykernel->ker_dim[0],0);
  681. MPI_Allreduce(&avg_w[0], &glb_avg_w[0], mykernel->ker_dim[0], MPI_DOUBLE, MPI_SUM, fmm_data->comm);
  682. for(size_t j=0;j<node_cnt;j++){
  683. FMMNode_t* node=tree_nodes[j];
  684. for(int k=0;k<node->DataDOF();k++)
  685. node->ChebData()[k*n_cheb_coeff]-=glb_avg_w[k];
  686. }
  687. }
  688. pvfmm::Profile::Toc();
  689. #ifndef NDEBUG
  690. //Check Tree.
  691. pvfmm::Profile::Tic("Check Tree",&fmm_data->comm,true,1);
  692. mytree.CheckTree();
  693. pvfmm::Profile::Toc();
  694. #endif
  695. //FMM Evaluate.
  696. pvfmm::Profile::Tic("FMM Eval",&fmm_data->comm,true,1);
  697. mytree.SetupFMM(&fmm_mat);
  698. mytree.RunFMM();
  699. pvfmm::Profile::Toc();
  700. //Copy FMM output to tree Data.
  701. mytree.Copy_FMMOutput();
  702. //Set average velocity to zero.
  703. if(bndry==pvfmm::Periodic){
  704. std::vector<double> avg_v(mykernel->ker_dim[1],0);
  705. for(size_t j=0;j<node_cnt;j++){
  706. FMMNode_t* node=tree_nodes[j];
  707. for(int k=0;k<node->DataDOF();k++)
  708. avg_v[k]+=node->ChebData()[k*n_cheb_coeff]*pow(0.5,node->Depth()*3);
  709. }
  710. std::vector<double> glb_avg_v(mykernel->ker_dim[1],0);
  711. MPI_Allreduce(&avg_v[0], &glb_avg_v[0], mykernel->ker_dim[1], MPI_DOUBLE, MPI_SUM, fmm_data->comm);
  712. //std::cout<<glb_avg_v[0]<<' '<<glb_avg_v[1]<<' '<<glb_avg_v[2]<<'\n';
  713. for(size_t j=0;j<node_cnt;j++){
  714. FMMNode_t* node=tree_nodes[j];
  715. for(int k=0;k<node->DataDOF();k++)
  716. node->ChebData()[k*n_cheb_coeff]-=glb_avg_v[k];
  717. }
  718. }
  719. #ifndef NDEBUG
  720. // Confirm that 2:1 balance did not change the number of tree nodes.
  721. size_t new_cnt=0;
  722. node=static_cast<FMMNode_t*>(mytree.PreorderFirst());
  723. while(node!=NULL){
  724. if(node->IsLeaf() && !node->IsGhost()) new_cnt++;
  725. node=static_cast<FMMNode_t*>(mytree.PreorderNxt(node));
  726. }
  727. assert(node_cnt==new_cnt);
  728. #endif
  729. // Construct GLL data from FMM output.
  730. pvfmm::Profile::Tic("Output",&fmm_data->comm,true,1);
  731. std::vector<double>& gll_nodes=*((std::vector<double>*)fmm_data->gll_nodes);
  732. #pragma omp parallel for
  733. for(int i=0;i<omp_p;i++){
  734. size_t start=i*node_cnt/omp_p;
  735. size_t end=(i+1)*node_cnt/omp_p;
  736. for(size_t j=start;j<end;j++){
  737. tree_nodes[j]->Curl();
  738. pvfmm::Vector<double> gll_data((gll_order+1)*(gll_order+1)*(gll_order+1)*mykernel->ker_dim[1],&node_gll_data[j][0],false);
  739. cheb_eval(tree_nodes[j]->ChebData(), cheb_order+1, gll_nodes, gll_nodes, gll_nodes, gll_data);
  740. }
  741. }
  742. pvfmm::Profile::Toc();
  743. #ifndef NDEBUG
  744. pvfmm::Profile::print(&fmm_data->comm);
  745. #endif
  746. }
  747. void gll_filter(FMM_GLL_t* fmm_data, int cheb_order_, size_t node_cnt, double** node_gll_data, double* err){
  748. int gll_order=fmm_data->gll_order;
  749. int cheb_order=cheb_order_-1;
  750. assert( gll_order>1);
  751. assert(cheb_order>1);
  752. size_t n_gll =( gll_order+1)*( gll_order+1)*( gll_order+1);
  753. size_t n_cheb_in =( gll_order+1)*( gll_order+2)*( gll_order+3)/6;
  754. size_t n_cheb_out=(cheb_order+1)*(cheb_order+2)*(cheb_order+3)/6;
  755. std::vector<double>& gll_nodes=*((std::vector<double>*)fmm_data->gll_nodes);
  756. //double max_err=0, min_rate=1e+10;
  757. // Find out number of OMP thereads.
  758. int omp_p=omp_get_max_threads();
  759. #pragma omp parallel for
  760. for(int i=0;i<omp_p;i++){
  761. pvfmm::Vector<double> cheb_in (n_cheb_in );
  762. pvfmm::Vector<double> cheb_out(n_cheb_out);
  763. pvfmm::Vector<double> error(gll_order+1);
  764. size_t start=i*node_cnt/omp_p;
  765. size_t end=(i+1)*node_cnt/omp_p;
  766. for(size_t j=start;j<end;j++){
  767. pvfmm::gll2cheb<double,double>(&(node_gll_data[j][0]), gll_order, 1, &(cheb_in[0]));
  768. error.SetZero();
  769. int indx_in=0;
  770. int indx_out=0;
  771. for(int k0=0;k0 <=gll_order;k0++)
  772. for(int k1=0;k0+k1 <=gll_order;k1++)
  773. for(int k2=0;k0+k1+k2<=gll_order;k2++){
  774. error[k0+k1+k2]+=fabs(cheb_in[indx_in]);
  775. if(k0+k1+k2<=cheb_order){
  776. cheb_out[indx_out]=cheb_in[indx_in];
  777. indx_out++;
  778. }
  779. indx_in++;
  780. }
  781. // Compute error.
  782. {
  783. // Fit: y ~ A exp(B.k), using last k0 error values.
  784. int k0=(cheb_order+1<4?cheb_order+1:4);
  785. double A,B;
  786. double k_=0, k2_=0;
  787. double lny_=0, klny_=0;
  788. for(int k=0;k<k0;k++){
  789. k_+=((double)k);
  790. k2_+=((double)k*k);
  791. lny_+=log(error[cheb_order-k]+1e-20);
  792. klny_+=k*log(error[cheb_order-k]+1e-20);
  793. }
  794. B=-(k0*klny_-k_*lny_)/(k0*k2_-k_*k_);
  795. double ye_=0, e2_=0;
  796. for(int k=0;k<k0;k++){
  797. ye_+=error[cheb_order-k]*exp(B*k);
  798. e2_+=exp(2*B*k);
  799. }
  800. A=(ye_/e2_);
  801. // err+= integ_{cheb_order}^{inf}
  802. err[j]+=error[cheb_order];
  803. err[j]+=(-B>1e-3?-A/B:A);
  804. //if(max_err<err[j]) max_err=err[j];
  805. //if(-B>1e-3 && -B<min_rate) min_rate=-B;
  806. //for(int k=0;k<k0;k++)
  807. // std::cout<<error[cheb_order-k-1]<<' ';
  808. //std::cout<<'\n';
  809. //std::cout<<A<<' '<<B<<'\n';
  810. }
  811. pvfmm::Vector<double> gll_data(n_gll,&node_gll_data[j][0],false);
  812. cheb_eval(cheb_out, cheb_order, gll_nodes, gll_nodes, gll_nodes, gll_data);
  813. }
  814. }
  815. //std::cout<<max_err<<' '<<min_rate<<'\n';
  816. }
  817. void gll_interpolate(FMM_GLL_t* fmm_data, size_t node_cnt, double* node_coord, unsigned char* node_depth, double** node_gll_data){
  818. int gll_order=fmm_data->gll_order;
  819. int cheb_order=fmm_data->cheb_order;
  820. FMM_Tree_t& mytree=*((FMM_Tree_t*)fmm_data->tree_biotsavart);
  821. pvfmm::Kernel<double>* mykernel=((pvfmm::Kernel<double>*)fmm_data->kernel_biotsavart);
  822. pvfmm::BoundaryType bndry=pvfmm::Periodic;//pvfmm::FreeSpace;
  823. // Find out number of OMP thereads.
  824. int omp_p=omp_get_max_threads();
  825. pvfmm::Profile::Tic("Construct Tree",&fmm_data->comm,true,1);
  826. //Construct pvfmm::MortonId from node_coord and node_depth.
  827. std::vector<pvfmm::MortonId> nodes(node_cnt);
  828. double s=0.25/(1UL<<MAX_DEPTH);
  829. for(size_t i=0;i<node_cnt;i++)
  830. nodes[i]=pvfmm::MortonId(node_coord[i*3+0]+s,node_coord[i*3+1]+s,node_coord[i*3+2]+s,node_depth[i]);
  831. // Construct tree from morton id.
  832. size_t node_iter=0;
  833. std::vector<FMMNode_t*> tree_nodes;
  834. FMMNode_t* node=static_cast<FMMNode_t*>(mytree.PreorderFirst());
  835. while(node!=NULL && node_iter<node_cnt){
  836. pvfmm::MortonId mid=node->GetMortonId();
  837. node->DataDOF()=mykernel->ker_dim[0];
  838. if(mid.isAncestor(nodes[node_iter])){
  839. node->SetGhost(false);
  840. node->Subdivide();
  841. }else{
  842. node->Truncate();
  843. node->SetGhost(mid!=nodes[node_iter]);
  844. if(mid==nodes[node_iter]){
  845. tree_nodes.push_back(node);
  846. node_iter++;
  847. }
  848. }
  849. node=static_cast<FMMNode_t*>(mytree.PreorderNxt(node));
  850. }
  851. assert(node_iter==node_cnt);
  852. while(node!=NULL){
  853. node->Truncate();
  854. node->SetGhost(true);
  855. node->DataDOF()=mykernel->ker_dim[0];
  856. node=static_cast<FMMNode_t*>(mytree.PreorderNxt(node));
  857. }
  858. // Add GLL data
  859. size_t n_gll_coeff=(gll_order+1)*(gll_order+2)*(gll_order+3)/6;
  860. size_t n_cheb_coeff=(cheb_order+1)*(cheb_order+2)*(cheb_order+3)/6;
  861. #pragma omp parallel for
  862. for(int i=0;i<omp_p;i++){
  863. pvfmm::Vector<double> buff;
  864. size_t start=i*node_cnt/omp_p;
  865. size_t end=(i+1)*node_cnt/omp_p;
  866. for(size_t j=start;j<end;j++){
  867. FMMNode_t* node=tree_nodes[j];
  868. buff.Resize(n_gll_coeff*node->DataDOF());
  869. pvfmm::gll2cheb<double,double>(&(node_gll_data[j][0]), gll_order, node->DataDOF(), &(buff[0]));
  870. node->ChebData().Resize(n_cheb_coeff*node->DataDOF());
  871. double* cheb_out=&(node->ChebData()[0]);
  872. int indx_in=0;
  873. int indx_out=0;
  874. for(int k_=0;k_<node->DataDOF();k_++)
  875. for(int k0=0;k0 <=gll_order;k0++)
  876. for(int k1=0;k0+k1 <=gll_order;k1++)
  877. for(int k2=0;k0+k1+k2<=gll_order;k2++){
  878. if(k0+k1+k2<=cheb_order){
  879. cheb_out[indx_out]=buff[indx_in];
  880. indx_out++;
  881. }
  882. indx_in++;
  883. }
  884. }
  885. }
  886. #ifndef NDEBUG
  887. //Check Tree.
  888. pvfmm::Profile::Tic("Check Tree",&fmm_data->comm,true,1);
  889. mytree.CheckTree();
  890. pvfmm::Profile::Toc();
  891. #endif
  892. // Get neighbour data.
  893. pvfmm::Profile::Tic("ConstructLET",&fmm_data->comm,true,1);
  894. mytree.ConstructLET(bndry);
  895. pvfmm::Profile::Toc();
  896. // Interpolate across nearby octants.
  897. pvfmm::Profile::Tic("Interpolate",&fmm_data->comm,true,1);
  898. double alpha=1.25;
  899. FMMNode_t* r_node=static_cast<FMMNode_t*>(mytree.RootNode());
  900. pvfmm::Matrix<double> cheb_data(node_cnt, n_cheb_coeff*mykernel->ker_dim[0]);
  901. #pragma omp parallel for
  902. for(int i=0;i<omp_p;i++){
  903. std::vector<double> x1(cheb_order+1);
  904. std::vector<double> cheb_pts1=pvfmm::cheb_nodes<double>(cheb_order+1,1);
  905. for(int k=0;k<=cheb_order;k++) x1[k]=(cheb_pts1[k]-0.5)*2.0/alpha;
  906. std::vector<double> x2(2*(cheb_order+1));
  907. std::vector<double> y2(2*(cheb_order+1));
  908. std::vector<double> z2(2*(cheb_order+1));
  909. std::vector<double> x2_(2*(cheb_order+1));
  910. std::vector<double> y2_(2*(cheb_order+1));
  911. std::vector<double> z2_(2*(cheb_order+1));
  912. std::vector<int> px(2*(cheb_order+1)), py(2*(cheb_order+1)), pz(2*(cheb_order+1));
  913. std::vector<double> cheb_pts2=pvfmm::cheb_nodes<double>(2*cheb_order,1);
  914. pvfmm::Vector<double> buff0 ( 2*(cheb_order+1)* 2*(cheb_order+1) * 2*(cheb_order+1) *mykernel->ker_dim[0]);
  915. pvfmm::Vector<double> buff0_( 2*(cheb_order+1)* 2*(cheb_order+1) * 2*(cheb_order+1) *mykernel->ker_dim[0]);
  916. pvfmm::Vector<double> buff1 ((2*(cheb_order+1)*(2*(cheb_order+1)+1)*(2*(cheb_order+1)+2)/6)*mykernel->ker_dim[0]);
  917. pvfmm::Vector<double> buff2( (cheb_order+1)*(cheb_order+1)*(cheb_order+1) *mykernel->ker_dim[0]);
  918. pvfmm::Vector<double> buff3(((cheb_order+1)*(cheb_order+2)*(cheb_order+3)/6)*mykernel->ker_dim[0]);
  919. size_t start=i*node_cnt/omp_p;
  920. size_t end=(i+1)*node_cnt/omp_p;
  921. for(size_t j=start;j<end;j++){
  922. FMMNode_t* node=tree_nodes[j];
  923. double s=pow(0.5,node->Depth());
  924. double* coord=tree_nodes[j]->Coord();
  925. for(int k=0;k<2*(cheb_order+1);k++){
  926. x2[k]=(coord[0]+0.5*s)+s*(cheb_pts2[k]-0.5)*alpha+1.0;
  927. y2[k]=(coord[1]+0.5*s)+s*(cheb_pts2[k]-0.5)*alpha+1.0;
  928. z2[k]=(coord[2]+0.5*s)+s*(cheb_pts2[k]-0.5)*alpha+1.0;
  929. if(x2[k]>=1.0) x2[k]-=1.0; if(x2[k]>=1.0) x2[k]-=1.0;
  930. if(y2[k]>=1.0) y2[k]-=1.0; if(y2[k]>=1.0) y2[k]-=1.0;
  931. if(z2[k]>=1.0) z2[k]-=1.0; if(z2[k]>=1.0) z2[k]-=1.0;
  932. }
  933. x2_=x2; y2_=y2; z2_=z2;
  934. std::sort(x2.begin(),x2.end());
  935. std::sort(y2.begin(),y2.end());
  936. std::sort(z2.begin(),z2.end());
  937. for(int k0=0;k0<2*(cheb_order+1);k0++)
  938. for(int k1=0;k1<2*(cheb_order+1);k1++){
  939. if(x2[k0]==x2_[k1]) px[k1]=k0;
  940. if(y2[k0]==y2_[k1]) py[k1]=k0;
  941. if(z2[k0]==z2_[k1]) pz[k1]=k0;
  942. }
  943. r_node->ReadVal(x2, y2, z2, &buff0[0]);
  944. for(int l=0;l<mykernel->ker_dim[0];l++)
  945. for(int k0=0;k0<2*(cheb_order+1);k0++)
  946. for(int k1=0;k1<2*(cheb_order+1);k1++)
  947. for(int k2=0;k2<2*(cheb_order+1);k2++){
  948. buff0_[k2+(k1+(k0+l*2*(cheb_order+1))*2*(cheb_order+1))*2*(cheb_order+1)]=
  949. buff0[px[k2]+(py[k1]+(pz[k0]+l*2*(cheb_order+1))*2*(cheb_order+1))*2*(cheb_order+1)];
  950. }
  951. pvfmm::cheb_approx<double,double>(&buff0_[0], 2*cheb_order+1, mykernel->ker_dim[0], &buff1[0]);
  952. //TODO: Truncate?
  953. cheb_eval(buff1, 2*cheb_order+1, x1, x1, x1, buff2);
  954. pvfmm::cheb_approx<double,double>(&buff2[0], cheb_order, mykernel->ker_dim[0], &buff3[0]);
  955. node->ChebData()=buff3;
  956. }
  957. }
  958. pvfmm::Profile::Toc();
  959. // Cheb2GLL.
  960. pvfmm::Profile::Tic("Cheb2GLL",&fmm_data->comm,true,1);
  961. std::vector<double>& gll_nodes=*((std::vector<double>*)fmm_data->gll_nodes);
  962. #pragma omp parallel for
  963. for(int i=0;i<omp_p;i++){
  964. size_t start=i*node_cnt/omp_p;
  965. size_t end=(i+1)*node_cnt/omp_p;
  966. for(size_t j=start;j<end;j++){
  967. pvfmm::Vector<double> gll_data((gll_order+1)*(gll_order+1)*(gll_order+1)*mykernel->ker_dim[1],&node_gll_data[j][0],false);
  968. cheb_eval(tree_nodes[j]->ChebData(), cheb_order, gll_nodes, gll_nodes, gll_nodes, gll_data);
  969. }
  970. }
  971. pvfmm::Profile::Toc();
  972. #ifndef NDEBUG
  973. pvfmm::Profile::print(&fmm_data->comm);
  974. #endif
  975. }
  976. }
  977. #endif