fmm_cheb.txx 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021
  1. /**
  2. * \file fmm_cheb.txx
  3. * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
  4. * \date 3-07-2011
  5. * \brief This file contains the implementation of the FMM_Cheb class.
  6. */
  7. #include <omp.h>
  8. #include <sstream>
  9. #include <iostream>
  10. #include <cstdlib>
  11. #include <cmath>
  12. #ifdef PVFMM_HAVE_SYS_STAT_H
  13. #include <sys/stat.h>
  14. #endif
  15. #include <dtypes.h>
  16. #include <parUtils.h>
  17. #include <cheb_utils.hpp>
  18. #include <mem_utils.hpp>
  19. #include <profile.hpp>
  20. namespace pvfmm{
  21. template <class FMMNode>
  22. FMM_Cheb<FMMNode>::~FMM_Cheb() {
  23. if(this->mat!=NULL){
  24. int rank;
  25. MPI_Comm_rank(this->comm,&rank);
  26. if(!rank){
  27. FILE* f=fopen(this->mat_fname.c_str(),"r");
  28. if(f==NULL) { //File does not exists.
  29. { // Delete easy to compute matrices.
  30. Mat_Type type=W_Type;
  31. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  32. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  33. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  34. M.Resize(0,0);
  35. }
  36. type=V_Type;
  37. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  38. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  39. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  40. M.Resize(0,0);
  41. }
  42. type=V1_Type;
  43. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  44. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  45. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  46. M.Resize(0,0);
  47. }
  48. type=U2U_Type;
  49. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  50. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  51. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  52. M.Resize(0,0);
  53. }
  54. type=D2D_Type;
  55. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  56. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  57. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  58. M.Resize(0,0);
  59. }
  60. type=D2T_Type;
  61. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  62. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  63. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  64. M.Resize(0,0);
  65. }
  66. }
  67. this->mat->Save2File(this->mat_fname.c_str());
  68. }else fclose(f);
  69. }
  70. }
  71. }
  72. template <class FMMNode>
  73. void FMM_Cheb<FMMNode>::Initialize(int mult_order, int cheb_deg_, const MPI_Comm& comm_, const Kernel<Real_t>* kernel_, const Kernel<Real_t>* aux_kernel_){
  74. Profile::Tic("InitFMM_Cheb",&comm_,true);{
  75. int rank;
  76. MPI_Comm_rank(comm_,&rank);
  77. int dim=3; //Only supporting 3D
  78. cheb_deg=cheb_deg_;
  79. if(this->mat_fname.size()==0){
  80. std::stringstream st;
  81. st<<PVFMM_PRECOMP_DATA_PATH;
  82. if(!st.str().size()){ // look in PVFMM_DIR
  83. char* pvfmm_dir = getenv ("PVFMM_DIR");
  84. if(pvfmm_dir) st<<pvfmm_dir;
  85. }
  86. #ifndef STAT_MACROS_BROKEN
  87. if(st.str().size()){ // check if the path is a directory
  88. struct stat stat_buff;
  89. if(stat(st.str().c_str(), &stat_buff) || !S_ISDIR(stat_buff.st_mode)){
  90. std::cout<<"error: path not found: "<<st.str()<<'\n';
  91. exit(0);
  92. }
  93. }
  94. #endif
  95. if(st.str().size()) st<<'/';
  96. st<<"Precomp_"<<kernel_->ker_name.c_str()<<"_q"<<cheb_deg<<"_m"<<mult_order;
  97. if(sizeof(Real_t)==8) st<<"";
  98. else if(sizeof(Real_t)==4) st<<"_f";
  99. else st<<"_t"<<sizeof(Real_t);
  100. st<<".data";
  101. this->mat_fname=st.str();
  102. }
  103. if(!rank){
  104. FILE* f=fopen(this->mat_fname.c_str(),"r");
  105. if(f==NULL) { //File does not exists.
  106. std::cout<<"Could not find precomputed data file for "<<kernel_->ker_name.c_str()<<" kernel with q="<<cheb_deg<<" and m="<<mult_order<<".\n";
  107. std::cout<<"This data will be computed and stored for future use at:\n"<<this->mat_fname<<'\n';
  108. std::cout<<"This may take a while...\n";
  109. }else fclose(f);
  110. }
  111. //this->mat->LoadFile(this->mat_fname.c_str(), this->comm);
  112. FMM_Pts<FMMNode>::Initialize(mult_order, comm_, kernel_, aux_kernel_);
  113. this->mat->RelativeTrgCoord()=cheb_nodes<Real_t>(ChebDeg(),dim);
  114. Profile::Tic("PrecompD2T",&this->comm,false,4);
  115. this->PrecompAll(D2T_Type);
  116. Profile::Toc();
  117. //Volume solver.
  118. Profile::Tic("PrecompS2M",&this->comm,false,4);
  119. this->PrecompAll(S2U_Type);
  120. Profile::Toc();
  121. Profile::Tic("PrecompX",&this->comm,false,4);
  122. this->PrecompAll(X_Type);
  123. Profile::Toc();
  124. Profile::Tic("PrecompW",&this->comm,false,4);
  125. this->PrecompAll(W_Type);
  126. Profile::Toc();
  127. Profile::Tic("PrecompU0",&this->comm,false,4);
  128. this->PrecompAll(U0_Type);
  129. Profile::Toc();
  130. Profile::Tic("PrecompU1",&this->comm,false,4);
  131. this->PrecompAll(U1_Type);
  132. Profile::Toc();
  133. Profile::Tic("PrecompU2",&this->comm,false,4);
  134. this->PrecompAll(U2_Type);
  135. Profile::Toc();
  136. Profile::Tic("Save2File",&this->comm,false,4);
  137. if(!rank){
  138. FILE* f=fopen(this->mat_fname.c_str(),"r");
  139. if(f==NULL) { //File does not exists.
  140. { // Delete easy to compute matrices.
  141. Mat_Type type=W_Type;
  142. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  143. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  144. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  145. M.Resize(0,0);
  146. }
  147. type=V_Type;
  148. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  149. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  150. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  151. M.Resize(0,0);
  152. }
  153. type=V1_Type;
  154. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  155. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  156. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  157. M.Resize(0,0);
  158. }
  159. type=U2U_Type;
  160. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  161. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  162. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  163. M.Resize(0,0);
  164. }
  165. type=D2D_Type;
  166. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  167. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  168. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  169. M.Resize(0,0);
  170. }
  171. type=D2T_Type;
  172. for(int l=-BC_LEVELS;l<MAX_DEPTH;l++)
  173. for(size_t indx=0;indx<this->interac_list.ListCount(type);indx++){
  174. Matrix<Real_t>& M=this->mat->Mat(l, type, indx);
  175. M.Resize(0,0);
  176. }
  177. }
  178. this->mat->Save2File(this->mat_fname.c_str());
  179. }else fclose(f);
  180. }
  181. Profile::Toc();
  182. Profile::Tic("Recompute",&this->comm,false,4);
  183. { // Recompute matrices.
  184. this->PrecompAll(W_Type);
  185. this->PrecompAll(V_Type);
  186. this->PrecompAll(V1_Type);
  187. this->PrecompAll(U2U_Type);
  188. this->PrecompAll(D2D_Type);
  189. this->PrecompAll(D2T_Type);
  190. }
  191. Profile::Toc();
  192. }Profile::Toc();
  193. }
  194. template <class FMMNode>
  195. Permutation<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::PrecompPerm(Mat_Type type, Perm_Type perm_indx){
  196. int dim=3; //Only supporting 3D
  197. //Check if the matrix already exists.
  198. Permutation<Real_t>& P_ = FMM_Pts<FMMNode>::PrecompPerm(type, perm_indx);
  199. if(P_.Dim()!=0) return P_;
  200. Matrix<size_t> swap_xy(10,9);
  201. Matrix<size_t> swap_xz(10,9);
  202. { // This is repeated from FMM_Pts::PrecompPerm, but I dont see any other way.
  203. for(int i=0;i<9;i++)
  204. for(int j=0;j<9;j++){
  205. swap_xy[i][j]=j;
  206. swap_xz[i][j]=j;
  207. }
  208. swap_xy[3][0]=1; swap_xy[3][1]=0; swap_xy[3][2]=2;
  209. swap_xz[3][0]=2; swap_xz[3][1]=1; swap_xz[3][2]=0;
  210. swap_xy[6][0]=1; swap_xy[6][1]=0; swap_xy[6][2]=2;
  211. swap_xy[6][3]=4; swap_xy[6][4]=3; swap_xy[6][5]=5;
  212. swap_xz[6][0]=2; swap_xz[6][1]=1; swap_xz[6][2]=0;
  213. swap_xz[6][3]=5; swap_xz[6][4]=4; swap_xz[6][5]=3;
  214. swap_xy[9][0]=4; swap_xy[9][1]=3; swap_xy[9][2]=5;
  215. swap_xy[9][3]=1; swap_xy[9][4]=0; swap_xy[9][5]=2;
  216. swap_xy[9][6]=7; swap_xy[9][7]=6; swap_xy[9][8]=8;
  217. swap_xz[9][0]=8; swap_xz[9][1]=7; swap_xz[9][2]=6;
  218. swap_xz[9][3]=5; swap_xz[9][4]=4; swap_xz[9][5]=3;
  219. swap_xz[9][6]=2; swap_xz[9][7]=1; swap_xz[9][8]=0;
  220. }
  221. //Compute the matrix.
  222. Permutation<Real_t> P;
  223. switch (type){
  224. case UC2UE_Type:
  225. {
  226. break;
  227. }
  228. case DC2DE_Type:
  229. {
  230. break;
  231. }
  232. case S2U_Type:
  233. {
  234. break;
  235. }
  236. case U2U_Type:
  237. {
  238. break;
  239. }
  240. case D2D_Type:
  241. {
  242. break;
  243. }
  244. case D2T_Type:
  245. {
  246. break;
  247. }
  248. case U0_Type:
  249. {
  250. int coeff_cnt=((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6;
  251. int n3=(int)pow((Real_t)(cheb_deg+1),dim);
  252. int dof=(perm_indx<C_Perm?this->kernel.ker_dim[0]:this->kernel.ker_dim[1]);
  253. size_t p_indx=perm_indx % C_Perm;
  254. Permutation<Real_t> P0(n3*dof);
  255. if(dof%3==0 && this->kernel.ker_name.compare("biot_savart")==0) //biot_savart
  256. for(int j=0;j<dof;j++)
  257. for(int i=0;i<n3;i++)
  258. P0.scal[i+n3*j]*=(perm_indx<C_Perm?1:-1);
  259. if(p_indx==ReflecX || p_indx==ReflecY || p_indx==ReflecZ){
  260. for(int j=0;j<dof;j++)
  261. for(int i=0;i<n3;i++){
  262. int x[3]={i%(cheb_deg+1), (i/(cheb_deg+1))%(cheb_deg+1), i/(cheb_deg+1)/(cheb_deg+1)};
  263. P0.scal[i+n3*j]*=(x[p_indx]%2?-1.0:1.0);
  264. if(dof%3==0) //stokes_vel (and like kernels)
  265. P0.scal[i+n3*j]*=( j %3==p_indx?-1.0:1.0);
  266. if(dof%3==0 && (dof/3)%3==0)
  267. P0.scal[i+n3*j]*=((j/3)%3==p_indx?-1.0:1.0);
  268. }
  269. }else if(p_indx==SwapXY || p_indx==SwapXZ){
  270. int indx[3];
  271. if(p_indx==SwapXY) {indx[0]=1; indx[1]=0; indx[2]=2;}
  272. if(p_indx==SwapXZ) {indx[0]=2; indx[1]=1; indx[2]=0;}
  273. for(int j=0;j<dof;j++)
  274. for(int i=0;i<n3;i++){
  275. int x[3]={i%(cheb_deg+1), (i/(cheb_deg+1))%(cheb_deg+1), i/(cheb_deg+1)/(cheb_deg+1)};
  276. P0.perm[i+n3*j]=x[indx[0]]+(x[indx[1]]+x[indx[2]]*(cheb_deg+1))*(cheb_deg+1)
  277. +n3*(p_indx==SwapXY?swap_xy[dof][j]:swap_xz[dof][j]);
  278. }
  279. }
  280. std::vector<size_t> coeff_map(n3*dof,0);
  281. {
  282. int indx=0;
  283. for(int j=0;j<dof;j++)
  284. for(int i=0;i<n3;i++){
  285. int x[3]={i%(cheb_deg+1), (i/(cheb_deg+1))%(cheb_deg+1), i/(cheb_deg+1)/(cheb_deg+1)};
  286. if(x[0]+x[1]+x[2]<=cheb_deg){
  287. coeff_map[i+n3*j]=indx;
  288. indx++;
  289. }
  290. }
  291. }
  292. P=Permutation<Real_t>(coeff_cnt*dof);
  293. {
  294. int indx=0;
  295. for(int j=0;j<dof;j++)
  296. for(int i=0;i<n3;i++){
  297. int x[3]={i%(cheb_deg+1), (i/(cheb_deg+1))%(cheb_deg+1), i/(cheb_deg+1)/(cheb_deg+1)};
  298. if(x[0]+x[1]+x[2]<=cheb_deg){
  299. P.perm[indx]=coeff_map[P0.perm[i+n3*j]];
  300. P.scal[indx]= P0.scal[i+n3*j] ;
  301. indx++;
  302. }
  303. }
  304. }
  305. break;
  306. }
  307. case U1_Type:
  308. {
  309. P=PrecompPerm(U0_Type, perm_indx);
  310. break;
  311. }
  312. case U2_Type:
  313. {
  314. P=PrecompPerm(U0_Type, perm_indx);
  315. break;
  316. }
  317. case V_Type:
  318. {
  319. break;
  320. }
  321. case V1_Type:
  322. {
  323. break;
  324. }
  325. case W_Type:
  326. {
  327. if(perm_indx>=C_Perm) P=PrecompPerm(U0_Type, perm_indx);
  328. else P=PrecompPerm(D2D_Type, perm_indx);
  329. break;
  330. }
  331. case X_Type:
  332. {
  333. if(perm_indx<C_Perm) P=PrecompPerm(U0_Type, perm_indx);
  334. else P=PrecompPerm(D2D_Type, perm_indx);
  335. break;
  336. }
  337. default:
  338. return FMM_Pts<FMMNode>::PrecompPerm(type, perm_indx);
  339. break;
  340. }
  341. //Save the matrix for future use.
  342. #pragma omp critical (PRECOMP_MATRIX_PTS)
  343. if(P_.Dim()==0){ P_=P;}
  344. return P_;
  345. }
  346. template <class FMMNode>
  347. Matrix<typename FMMNode::Real_t>& FMM_Cheb<FMMNode>::Precomp(int level, Mat_Type type, size_t mat_indx){
  348. if(this->Homogen()) level=0;
  349. //Check if the matrix already exists.
  350. //Compute matrix from symmetry class (if possible).
  351. Matrix<Real_t>& M_ = this->mat->Mat(level, type, mat_indx);
  352. if(M_.Dim(0)!=0 && M_.Dim(1)!=0) return M_;
  353. else{ //Compute matrix from symmetry class (if possible).
  354. size_t class_indx = this->interac_list.InteracClass(type, mat_indx);
  355. if(class_indx!=mat_indx){
  356. Matrix<Real_t>& M0 = this->Precomp(level, type, class_indx);
  357. Permutation<Real_t>& Pr = this->interac_list.Perm_R(level, type, mat_indx);
  358. Permutation<Real_t>& Pc = this->interac_list.Perm_C(level, type, mat_indx);
  359. if(Pr.Dim()>0 && Pc.Dim()>0 && M0.Dim(0)>0 && M0.Dim(1)>0) return M_;
  360. }
  361. }
  362. int myrank, np;
  363. MPI_Comm_rank(this->comm, &myrank);
  364. MPI_Comm_size(this->comm,&np);
  365. size_t progress=0, class_count=0;
  366. { // Determine precomputation progress.
  367. size_t mat_cnt=this->interac_list.ListCount((Mat_Type)type);
  368. for(size_t i=0; i<mat_cnt; i++){
  369. size_t indx=this->interac_list.InteracClass((Mat_Type)type,i);
  370. if(indx==i){
  371. class_count++;
  372. if(i<mat_indx) progress++;
  373. }
  374. }
  375. }
  376. //Compute the matrix.
  377. Matrix<Real_t> M;
  378. int n_src=((cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6;
  379. switch (type){
  380. case S2U_Type:
  381. {
  382. if(this->MultipoleOrder()==0) break;
  383. Real_t r=pow(0.5,level);
  384. Real_t c[3]={0,0,0};
  385. // Coord of upward check surface
  386. std::vector<Real_t> uc_coord=u_check_surf(this->MultipoleOrder(),c,level);
  387. size_t n_uc=uc_coord.size()/3;
  388. // Evaluate potential at check surface.
  389. Matrix<Real_t> M_s2c(n_src*this->aux_kernel.ker_dim[0],n_uc*this->aux_kernel.ker_dim[1]); //source 2 check
  390. Matrix<Real_t> M_s2c_local(n_src*this->aux_kernel.ker_dim[0],n_uc*this->aux_kernel.ker_dim[1]);
  391. {
  392. M_s2c.SetZero();
  393. M_s2c_local.SetZero();
  394. size_t cnt_done=0;
  395. #pragma omp parallel for schedule(dynamic)
  396. for(size_t i=myrank;i<n_uc;i+=np){
  397. std::vector<Real_t> M_=cheb_integ(cheb_deg, &uc_coord[i*3], r, this->aux_kernel);
  398. #ifdef __VERBOSE__
  399. #pragma omp critical
  400. if(!myrank){
  401. cnt_done++;
  402. std::cout<<"\r Progress: "<<(100*progress*n_uc+100*cnt_done*np)/(class_count*n_uc)<<"% "<<std::flush;
  403. }
  404. #endif
  405. for(int k=0; k<this->aux_kernel.ker_dim[1]; k++)
  406. for(size_t j=0; j<(size_t)M_s2c.Dim(0); j++)
  407. M_s2c_local[j][i*this->aux_kernel.ker_dim[1]+k] = M_[j+k*M_s2c.Dim(0)];
  408. }
  409. if(!myrank) std::cout<<"\r \r"<<std::flush;
  410. MPI_Allreduce(M_s2c_local[0], M_s2c[0], M_s2c.Dim(0)*M_s2c.Dim(1), par::Mpi_datatype<Real_t>::value(), par::Mpi_datatype<Real_t>::sum(), this->comm);
  411. }
  412. Matrix<Real_t>& M_c2e = this->Precomp(level, UC2UE_Type, 0);
  413. M=M_s2c*M_c2e;
  414. break;
  415. }
  416. case D2T_Type:
  417. {
  418. if(this->MultipoleOrder()==0) break;
  419. Matrix<Real_t>& M_s2t=FMM_Pts<FMMNode>::Precomp(level, type, mat_indx);
  420. int n_trg=M_s2t.Dim(1)/this->kernel.ker_dim[1];
  421. // Compute Chebyshev approx from target potential.
  422. M.Resize(M_s2t.Dim(0), n_src*this->kernel.ker_dim [1]);
  423. #pragma omp parallel for schedule(dynamic)
  424. for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
  425. Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
  426. M_trg=M_trg.Transpose();
  427. cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
  428. }
  429. #pragma omp critical (PRECOMP_MATRIX_PTS)
  430. {
  431. M_s2t.Resize(0,0);
  432. }
  433. break;
  434. }
  435. case U0_Type:
  436. {
  437. // Coord of target points
  438. Real_t s=pow(0.5,level);
  439. int* coord=this->interac_list.RelativeCoord(type,mat_indx);
  440. Real_t coord_diff[3]={(coord[0]-1)*s*0.5,(coord[1]-1)*s*0.5,(coord[2]-1)*s*0.5};
  441. std::vector<Real_t>& rel_trg_coord = this->mat->RelativeTrgCoord();
  442. size_t n_trg = rel_trg_coord.size()/3;
  443. std::vector<Real_t> trg_coord(n_trg*3);
  444. for(size_t j=0;j<n_trg;j++){
  445. trg_coord[j*3+0]=rel_trg_coord[j*3+0]*s-coord_diff[0];
  446. trg_coord[j*3+1]=rel_trg_coord[j*3+1]*s-coord_diff[1];
  447. trg_coord[j*3+2]=rel_trg_coord[j*3+2]*s-coord_diff[2];
  448. }
  449. // Evaluate potential at target points.
  450. Matrix<Real_t> M_s2t(n_src*this->kernel.ker_dim [0], n_trg*this->kernel.ker_dim [1]);
  451. Matrix<Real_t> M_s2t_local(n_src*this->kernel.ker_dim [0], n_trg*this->kernel.ker_dim [1]);
  452. {
  453. M_s2t.SetZero();
  454. M_s2t_local.SetZero();
  455. size_t cnt_done=0;
  456. #pragma omp parallel for schedule(dynamic)
  457. for(size_t i=myrank;i<n_trg;i+=np){
  458. std::vector<Real_t> s2t=cheb_integ(cheb_deg, &trg_coord[i*3], (Real_t)(s*2.0), this->kernel);
  459. #ifdef __VERBOSE__
  460. #pragma omp critical
  461. if(!myrank){
  462. cnt_done++;
  463. std::cout<<"\r Progress: "<<(100*progress*n_trg+100*cnt_done*np)/(class_count*n_trg)<<"% "<<std::flush;
  464. }
  465. #endif
  466. for(int k=0; k<this->kernel.ker_dim[1]; k++)
  467. for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++)
  468. M_s2t_local[j][i*this->kernel.ker_dim[1]+k] = s2t[j+k*M_s2t.Dim(0)];
  469. }
  470. if(!myrank) std::cout<<"\r \r"<<std::flush;
  471. MPI_Allreduce(M_s2t_local[0], M_s2t[0], M_s2t.Dim(0)*M_s2t.Dim(1), par::Mpi_datatype<Real_t>::value(), par::Mpi_datatype<Real_t>::sum(), this->comm);
  472. }
  473. // Compute Chebyshev approx from target potential.
  474. M.Resize(M_s2t.Dim(0), n_src*this->kernel.ker_dim [1]);
  475. #pragma omp parallel for schedule(dynamic)
  476. for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
  477. Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
  478. M_trg=M_trg.Transpose();
  479. cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
  480. }
  481. break;
  482. }
  483. case U1_Type:
  484. {
  485. // Coord of target points
  486. Real_t s=pow(0.5,level);
  487. int* coord=this->interac_list.RelativeCoord(type,mat_indx);
  488. Real_t coord_diff[3]={coord[0]*s,coord[1]*s,coord[2]*s};
  489. std::vector<Real_t>& rel_trg_coord = this->mat->RelativeTrgCoord();
  490. size_t n_trg = rel_trg_coord.size()/3;
  491. std::vector<Real_t> trg_coord(n_trg*3);
  492. for(size_t j=0;j<n_trg;j++){
  493. trg_coord[j*3+0]=rel_trg_coord[j*3+0]*s-coord_diff[0];
  494. trg_coord[j*3+1]=rel_trg_coord[j*3+1]*s-coord_diff[1];
  495. trg_coord[j*3+2]=rel_trg_coord[j*3+2]*s-coord_diff[2];
  496. }
  497. // Evaluate potential at target points.
  498. Matrix<Real_t> M_s2t(n_src*this->kernel.ker_dim [0], n_trg*this->kernel.ker_dim [1]);
  499. Matrix<Real_t> M_s2t_local(n_src*this->kernel.ker_dim [0], n_trg*this->kernel.ker_dim [1]);
  500. {
  501. M_s2t.SetZero();
  502. M_s2t_local.SetZero();
  503. size_t cnt_done=0;
  504. #pragma omp parallel for schedule(dynamic)
  505. for(size_t i=myrank;i<n_trg;i+=np){
  506. std::vector<Real_t> s2t=cheb_integ(cheb_deg, &trg_coord[i*3], s, this->kernel);
  507. #ifdef __VERBOSE__
  508. #pragma omp critical
  509. if(!myrank){
  510. cnt_done++;
  511. std::cout<<"\r Progress: "<<(100*progress*n_trg+100*cnt_done*np)/(class_count*n_trg)<<"% "<<std::flush;
  512. }
  513. #endif
  514. for(int k=0; k<this->kernel.ker_dim[1]; k++)
  515. for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++)
  516. M_s2t_local[j][i*this->kernel.ker_dim[1]+k] = s2t[j+k*M_s2t.Dim(0)];
  517. }
  518. if(!myrank) std::cout<<"\r \r"<<std::flush;
  519. MPI_Allreduce(M_s2t_local[0], M_s2t[0], M_s2t.Dim(0)*M_s2t.Dim(1), par::Mpi_datatype<Real_t>::value(), par::Mpi_datatype<Real_t>::sum(), this->comm);
  520. }
  521. // Compute Chebyshev approx from target potential.
  522. M.Resize(M_s2t.Dim(0), n_src*this->kernel.ker_dim [1]);
  523. #pragma omp parallel for schedule(dynamic)
  524. for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
  525. Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
  526. M_trg=M_trg.Transpose();
  527. cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
  528. }
  529. break;
  530. }
  531. case U2_Type:
  532. {
  533. // Coord of target points
  534. Real_t s=pow(0.5,level);
  535. int* coord=this->interac_list.RelativeCoord(type,mat_indx);
  536. Real_t coord_diff[3]={(coord[0]+1)*s*0.25,(coord[1]+1)*s*0.25,(coord[2]+1)*s*0.25};
  537. std::vector<Real_t>& rel_trg_coord = this->mat->RelativeTrgCoord();
  538. size_t n_trg = rel_trg_coord.size()/3;
  539. std::vector<Real_t> trg_coord(n_trg*3);
  540. for(size_t j=0;j<n_trg;j++){
  541. trg_coord[j*3+0]=rel_trg_coord[j*3+0]*s-coord_diff[0];
  542. trg_coord[j*3+1]=rel_trg_coord[j*3+1]*s-coord_diff[1];
  543. trg_coord[j*3+2]=rel_trg_coord[j*3+2]*s-coord_diff[2];
  544. }
  545. // Evaluate potential at target points.
  546. Matrix<Real_t> M_s2t(n_src*this->kernel.ker_dim [0], n_trg*this->kernel.ker_dim [1]);
  547. Matrix<Real_t> M_s2t_local(n_src*this->kernel.ker_dim [0], n_trg*this->kernel.ker_dim [1]);
  548. {
  549. M_s2t.SetZero();
  550. M_s2t_local.SetZero();
  551. size_t cnt_done=0;
  552. #pragma omp parallel for schedule(dynamic)
  553. for(size_t i=myrank;i<n_trg;i+=np){
  554. std::vector<Real_t> s2t=cheb_integ(cheb_deg, &trg_coord[i*3], (Real_t)(s*0.5), this->kernel);
  555. #ifdef __VERBOSE__
  556. #pragma omp critical
  557. if(!myrank){
  558. cnt_done++;
  559. std::cout<<"\r Progress: "<<(100*progress*n_trg+100*cnt_done*np)/(class_count*n_trg)<<"% "<<std::flush;
  560. }
  561. #endif
  562. for(int k=0; k<this->kernel.ker_dim[1]; k++)
  563. for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++)
  564. M_s2t_local[j][i*this->kernel.ker_dim[1]+k] = s2t[j+k*M_s2t.Dim(0)];
  565. }
  566. if(!myrank) std::cout<<"\r \r"<<std::flush;
  567. MPI_Allreduce(M_s2t_local[0], M_s2t[0], M_s2t.Dim(0)*M_s2t.Dim(1), par::Mpi_datatype<Real_t>::value(), par::Mpi_datatype<Real_t>::sum(), this->comm);
  568. }
  569. // Compute Chebyshev approx from target potential.
  570. M.Resize(M_s2t.Dim(0), n_src*this->kernel.ker_dim [1]);
  571. #pragma omp parallel for schedule(dynamic)
  572. for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
  573. Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
  574. M_trg=M_trg.Transpose();
  575. cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
  576. }
  577. break;
  578. }
  579. case W_Type:
  580. {
  581. if(this->MultipoleOrder()==0) break;
  582. Matrix<Real_t>& M_s2t=FMM_Pts<FMMNode>::Precomp(level, type, mat_indx);
  583. int n_trg=M_s2t.Dim(1)/this->kernel.ker_dim[1];
  584. // Compute Chebyshev approx from target potential.
  585. M.Resize(M_s2t.Dim(0), n_src*this->kernel.ker_dim [1]);
  586. #pragma omp parallel for schedule(dynamic)
  587. for(size_t j=0; j<(size_t)M_s2t.Dim(0); j++){
  588. Matrix<Real_t> M_trg(n_trg,this->kernel.ker_dim[1],M_s2t[j],false);
  589. M_trg=M_trg.Transpose();
  590. cheb_approx<Real_t,Real_t>(M_s2t[j],cheb_deg,this->kernel.ker_dim[1],M[j]);
  591. }
  592. #pragma omp critical (PRECOMP_MATRIX_PTS)
  593. {
  594. M_s2t.Resize(0,0);
  595. }
  596. break;
  597. }
  598. case X_Type:
  599. {
  600. if(this->MultipoleOrder()==0) break;
  601. // Coord of target points
  602. Real_t s=pow(0.5,level-1);
  603. int* coord=this->interac_list.RelativeCoord(type,mat_indx);
  604. Real_t c[3]={-(coord[0]-1)*s*0.25,-(coord[1]-1)*s*0.25,-(coord[2]-1)*s*0.25};
  605. std::vector<Real_t> trg_coord=d_check_surf(this->MultipoleOrder(),c,level);
  606. size_t n_trg=trg_coord.size()/3;
  607. // Evaluate potential at target points.
  608. Matrix<Real_t> M_xs2c(n_src*this->aux_kernel.ker_dim[0], n_trg*this->aux_kernel.ker_dim[1]);
  609. Matrix<Real_t> M_xs2c_local(n_src*this->aux_kernel.ker_dim[0], n_trg*this->aux_kernel.ker_dim[1]);
  610. {
  611. M_xs2c.SetZero();
  612. M_xs2c_local.SetZero();
  613. size_t cnt_done=0;
  614. #pragma omp parallel for schedule(dynamic)
  615. for(size_t i=myrank;i<n_trg;i+=np){
  616. std::vector<Real_t> M_=cheb_integ(cheb_deg, &trg_coord[i*3], s, this->aux_kernel);
  617. #ifdef __VERBOSE__
  618. #pragma omp critical
  619. if(!myrank){
  620. cnt_done++;
  621. std::cout<<"\r Progress: "<<(100*progress*n_trg+100*cnt_done*np)/(class_count*n_trg)<<"% "<<std::flush;
  622. }
  623. #endif
  624. for(int k=0; k<this->aux_kernel.ker_dim[1]; k++)
  625. for(size_t j=0; j<(size_t)M_xs2c.Dim(0); j++)
  626. M_xs2c_local[j][i*this->aux_kernel.ker_dim[1]+k] = M_[j+k*M_xs2c.Dim(0)];
  627. }
  628. if(!myrank) std::cout<<"\r \r"<<std::flush;
  629. MPI_Allreduce(M_xs2c_local[0], M_xs2c[0], M_xs2c.Dim(0)*M_xs2c.Dim(1), par::Mpi_datatype<Real_t>::value(), par::Mpi_datatype<Real_t>::sum(), this->comm);
  630. }
  631. Matrix<Real_t>& M_c2e = this->Precomp(level, DC2DE_Type, 0);
  632. M=M_xs2c*M_c2e;
  633. break;
  634. }
  635. default:
  636. {
  637. return FMM_Pts<FMMNode>::Precomp(level, type, mat_indx);
  638. break;
  639. }
  640. }
  641. //Save the matrix for future use.
  642. #pragma omp critical (PRECOMP_MATRIX_PTS)
  643. if(M_.Dim(0)==0 && M_.Dim(1)==0){ M_=M;}
  644. return M_;
  645. }
  646. template <class FMMNode>
  647. void FMM_Cheb<FMMNode>::CollectNodeData(std::vector<FMMNode*>& node, std::vector<Matrix<Real_t> >& buff, std::vector<Vector<FMMNode_t*> >& n_list, std::vector<size_t> extra_size){
  648. if( buff.size()<6) buff.resize(6);
  649. if( n_list.size()<6) n_list.resize(6);
  650. if(extra_size.size()<6) extra_size.resize(6,0);
  651. size_t n_coeff=(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3)/6;
  652. if(node.size()==0) return;
  653. {// 4. cheb_in
  654. int indx=4;
  655. int dof=this->kernel.ker_dim[0];
  656. size_t vec_sz=dof*n_coeff;
  657. std::vector< FMMNode* > node_lst;
  658. for(size_t i=0;i<node.size();i++)
  659. if(node[i]->IsLeaf())
  660. node_lst.push_back(node[i]);
  661. n_list[indx]=node_lst;
  662. extra_size[indx]+=node_lst.size()*vec_sz;
  663. #pragma omp parallel for
  664. for(size_t i=0;i<node.size();i++){ // Move data before resizing buff[indx]
  665. Vector<Real_t>& cheb_in =node[i]->ChebData();
  666. Vector<Real_t> new_buff=cheb_in;
  667. cheb_in.Swap(new_buff);
  668. }
  669. }
  670. {// 5. cheb_out
  671. int indx=5;
  672. int dof=this->kernel.ker_dim[1];
  673. size_t vec_sz=dof*n_coeff;
  674. std::vector< FMMNode* > node_lst;
  675. for(size_t i=0;i<node.size();i++)
  676. if(node[i]->IsLeaf() && !node[i]->IsGhost())
  677. node_lst.push_back(node[i]);
  678. n_list[indx]=node_lst;
  679. extra_size[indx]+=node_lst.size()*vec_sz;
  680. #pragma omp parallel for
  681. for(size_t i=0;i<node.size();i++){ // Move data before resizing buff[indx]
  682. Vector<Real_t>& cheb_out=((FMMData*)node[i]->FMMData())->cheb_out;
  683. cheb_out.ReInit(0);
  684. }
  685. }
  686. FMM_Pts<FMMNode>::CollectNodeData(node, buff, n_list, extra_size);
  687. {// 4. cheb_in
  688. int indx=4;
  689. int dof=this->kernel.ker_dim[0];
  690. size_t vec_sz=dof*n_coeff;
  691. Vector< FMMNode* >& node_lst=n_list[indx];
  692. Real_t* buff_ptr=buff[indx][0]+buff[indx].Dim(0)*buff[indx].Dim(1)-extra_size[indx];
  693. #pragma omp parallel for
  694. for(size_t i=0;i<node_lst.Dim();i++){
  695. Vector<Real_t>& cheb_in =node_lst[i]->ChebData();
  696. mem::memcopy(buff_ptr+i*vec_sz, &cheb_in [0], cheb_in .Dim()*sizeof(Real_t));
  697. cheb_in .ReInit(vec_sz, buff_ptr+i*vec_sz, false);
  698. //if(node_lst[i]->IsGhost()) cheb_in .SetZero();
  699. }
  700. buff[indx].AllocDevice(true);
  701. }
  702. {// 5. cheb_out
  703. int indx=5;
  704. int dof=this->kernel.ker_dim[1];
  705. size_t vec_sz=dof*n_coeff;
  706. Vector< FMMNode* >& node_lst=n_list[indx];
  707. Real_t* buff_ptr=buff[indx][0]+buff[indx].Dim(0)*buff[indx].Dim(1)-extra_size[indx];
  708. #pragma omp parallel for
  709. for(size_t i=0;i<node_lst.Dim();i++){
  710. Vector<Real_t>& cheb_out=((FMMData*)node_lst[i]->FMMData())->cheb_out;
  711. cheb_out.ReInit(vec_sz, buff_ptr+i*vec_sz, false);
  712. cheb_out.SetZero();
  713. }
  714. buff[indx].AllocDevice(true);
  715. }
  716. }
  717. template <class FMMNode>
  718. void FMM_Cheb<FMMNode>::Source2UpSetup(SetupData<Real_t>& setup_data, std::vector<Matrix<Real_t> >& buff, std::vector<Vector<FMMNode_t*> >& n_list, int level, bool device){
  719. if(this->MultipoleOrder()==0) return;
  720. FMM_Pts<FMMNode>::Source2UpSetup(setup_data, buff, n_list, level, device);
  721. { // Set setup_data
  722. setup_data.level=level;
  723. setup_data.kernel=&this->aux_kernel;
  724. setup_data.interac_type.resize(1);
  725. setup_data.interac_type[0]=S2U_Type;
  726. setup_data. input_data=&buff[4];
  727. setup_data.output_data=&buff[0];
  728. Vector<FMMNode_t*>& nodes_in =n_list[4];
  729. Vector<FMMNode_t*>& nodes_out=n_list[0];
  730. setup_data.nodes_in .clear();
  731. setup_data.nodes_out.clear();
  732. for(size_t i=0;i<nodes_in .Dim();i++) if(nodes_in [i]->Depth()==level || level==-1) setup_data.nodes_in .push_back(nodes_in [i]);
  733. for(size_t i=0;i<nodes_out.Dim();i++) if(nodes_out[i]->Depth()==level || level==-1) setup_data.nodes_out.push_back(nodes_out[i]);
  734. }
  735. std::vector<void*>& nodes_in =setup_data.nodes_in ;
  736. std::vector<void*>& nodes_out=setup_data.nodes_out;
  737. std::vector<Vector<Real_t>*>& input_vector=setup_data. input_vector; input_vector.clear();
  738. std::vector<Vector<Real_t>*>& output_vector=setup_data.output_vector; output_vector.clear();
  739. for(size_t i=0;i<nodes_in .size();i++) input_vector.push_back(&( ((FMMNode*)nodes_in [i]) )->ChebData() );
  740. for(size_t i=0;i<nodes_out.size();i++) output_vector.push_back(&((FMMData*)((FMMNode*)nodes_out[i])->FMMData())->upward_equiv);
  741. this->SetupInterac(setup_data,device);
  742. }
  743. template <class FMMNode>
  744. void FMM_Cheb<FMMNode>::Source2Up (SetupData<Real_t>& setup_data, bool device){
  745. //Add Source2Up contribution.
  746. this->EvalList(setup_data, device);
  747. }
  748. template <class FMMNode>
  749. void FMM_Cheb<FMMNode>::X_ListSetup(SetupData<Real_t>& setup_data, std::vector<Matrix<Real_t> >& buff, std::vector<Vector<FMMNode_t*> >& n_list, int level, bool device){
  750. if(this->MultipoleOrder()==0) return;
  751. FMM_Pts<FMMNode>::X_ListSetup(setup_data, buff, n_list, level, device);
  752. { // Set setup_data
  753. setup_data.level=level;
  754. setup_data.kernel=&this->aux_kernel;
  755. setup_data.interac_type.resize(1);
  756. setup_data.interac_type[0]=X_Type;
  757. setup_data. input_data=&buff[4];
  758. setup_data.output_data=&buff[1];
  759. Vector<FMMNode_t*>& nodes_in =n_list[4];
  760. Vector<FMMNode_t*>& nodes_out=n_list[1];
  761. setup_data.nodes_in .clear();
  762. setup_data.nodes_out.clear();
  763. for(size_t i=0;i<nodes_in .Dim();i++) if(nodes_in [i]->Depth()==level-1 || level==-1) setup_data.nodes_in .push_back(nodes_in [i]);
  764. for(size_t i=0;i<nodes_out.Dim();i++) if(nodes_out[i]->Depth()==level || level==-1) setup_data.nodes_out.push_back(nodes_out[i]);
  765. }
  766. std::vector<void*>& nodes_in =setup_data.nodes_in ;
  767. std::vector<void*>& nodes_out=setup_data.nodes_out;
  768. std::vector<Vector<Real_t>*>& input_vector=setup_data. input_vector; input_vector.clear();
  769. std::vector<Vector<Real_t>*>& output_vector=setup_data.output_vector; output_vector.clear();
  770. for(size_t i=0;i<nodes_in .size();i++) input_vector.push_back(&( ((FMMNode*)nodes_in [i]) )->ChebData() );
  771. for(size_t i=0;i<nodes_out.size();i++) output_vector.push_back(&((FMMData*)((FMMNode*)nodes_out[i])->FMMData())->dnward_equiv);
  772. this->SetupInterac(setup_data,device);
  773. { // Resize device buffer
  774. size_t n=setup_data.output_data->Dim(0)*setup_data.output_data->Dim(1)*sizeof(Real_t);
  775. if(this->dev_buffer.Dim()<n) this->dev_buffer.Resize(n);
  776. }
  777. }
  778. template <class FMMNode>
  779. void FMM_Cheb<FMMNode>::X_List (SetupData<Real_t>& setup_data, bool device){
  780. //Add X_List contribution.
  781. FMM_Pts<FMMNode>::X_List(setup_data, device);
  782. this->EvalList(setup_data, device);
  783. }
  784. template <class FMMNode>
  785. void FMM_Cheb<FMMNode>::W_ListSetup(SetupData<Real_t>& setup_data, std::vector<Matrix<Real_t> >& buff, std::vector<Vector<FMMNode_t*> >& n_list, int level, bool device){
  786. if(this->MultipoleOrder()==0) return;
  787. { // Set setup_data
  788. setup_data.level=level;
  789. setup_data.kernel=&this->kernel;
  790. setup_data.interac_type.resize(1);
  791. setup_data.interac_type[0]=W_Type;
  792. setup_data. input_data=&buff[0];
  793. setup_data.output_data=&buff[5];
  794. Vector<FMMNode_t*>& nodes_in =n_list[0];
  795. Vector<FMMNode_t*>& nodes_out=n_list[5];
  796. setup_data.nodes_in .clear();
  797. setup_data.nodes_out.clear();
  798. for(size_t i=0;i<nodes_in .Dim();i++) if(nodes_in [i]->Depth()==level+1 || level==-1) setup_data.nodes_in .push_back(nodes_in [i]);
  799. for(size_t i=0;i<nodes_out.Dim();i++) if(nodes_out[i]->Depth()==level || level==-1) setup_data.nodes_out.push_back(nodes_out[i]);
  800. }
  801. std::vector<void*>& nodes_in =setup_data.nodes_in ;
  802. std::vector<void*>& nodes_out=setup_data.nodes_out;
  803. std::vector<Vector<Real_t>*>& input_vector=setup_data. input_vector; input_vector.clear();
  804. std::vector<Vector<Real_t>*>& output_vector=setup_data.output_vector; output_vector.clear();
  805. for(size_t i=0;i<nodes_in .size();i++) input_vector.push_back(&((FMMData*)((FMMNode*)nodes_in [i])->FMMData())->upward_equiv);
  806. for(size_t i=0;i<nodes_out.size();i++) output_vector.push_back(&((FMMData*)((FMMNode*)nodes_out[i])->FMMData())->cheb_out );
  807. this->SetupInterac(setup_data,device);
  808. { // Resize device buffer
  809. size_t n=setup_data.output_data->Dim(0)*setup_data.output_data->Dim(1)*sizeof(Real_t);
  810. if(this->dev_buffer.Dim()<n) this->dev_buffer.Resize(n);
  811. }
  812. }
  813. template <class FMMNode>
  814. void FMM_Cheb<FMMNode>::W_List (SetupData<Real_t>& setup_data, bool device){
  815. //Add W_List contribution.
  816. this->EvalList(setup_data, device);
  817. }
  818. template <class FMMNode>
  819. void FMM_Cheb<FMMNode>::U_ListSetup(SetupData<Real_t>& setup_data, std::vector<Matrix<Real_t> >& buff, std::vector<Vector<FMMNode_t*> >& n_list, int level, bool device){
  820. FMM_Pts<FMMNode>::U_ListSetup(setup_data, buff, n_list, level, device);
  821. { // Set setup_data
  822. setup_data.level=level;
  823. setup_data.kernel=&this->kernel;
  824. setup_data.interac_type.resize(3);
  825. setup_data.interac_type[0]=U0_Type;
  826. setup_data.interac_type[1]=U1_Type;
  827. setup_data.interac_type[2]=U2_Type;
  828. setup_data. input_data=&buff[4];
  829. setup_data.output_data=&buff[5];
  830. Vector<FMMNode_t*>& nodes_in =n_list[4];
  831. Vector<FMMNode_t*>& nodes_out=n_list[5];
  832. setup_data.nodes_in .clear();
  833. setup_data.nodes_out.clear();
  834. for(size_t i=0;i<nodes_in .Dim();i++) if((level-1<=nodes_in [i]->Depth() && nodes_in [i]->Depth()<=level+1) || level==-1) setup_data.nodes_in .push_back(nodes_in [i]);
  835. for(size_t i=0;i<nodes_out.Dim();i++) if(( nodes_out[i]->Depth()==level ) || level==-1) setup_data.nodes_out.push_back(nodes_out[i]);
  836. }
  837. std::vector<void*>& nodes_in =setup_data.nodes_in ;
  838. std::vector<void*>& nodes_out=setup_data.nodes_out;
  839. std::vector<Vector<Real_t>*>& input_vector=setup_data. input_vector; input_vector.clear();
  840. std::vector<Vector<Real_t>*>& output_vector=setup_data.output_vector; output_vector.clear();
  841. for(size_t i=0;i<nodes_in .size();i++) input_vector.push_back(&( ((FMMNode*)nodes_in [i]) )->ChebData());
  842. for(size_t i=0;i<nodes_out.size();i++) output_vector.push_back(&((FMMData*)((FMMNode*)nodes_out[i])->FMMData())->cheb_out );
  843. this->SetupInterac(setup_data,device);
  844. { // Resize device buffer
  845. size_t n=setup_data.output_data->Dim(0)*setup_data.output_data->Dim(1)*sizeof(Real_t);
  846. if(this->dev_buffer.Dim()<n) this->dev_buffer.Resize(n);
  847. }
  848. }
  849. template <class FMMNode>
  850. void FMM_Cheb<FMMNode>::U_List (SetupData<Real_t>& setup_data, bool device){
  851. //Add U_List contribution.
  852. FMM_Pts<FMMNode>::U_List(setup_data, device);
  853. this->EvalList(setup_data, device);
  854. }
  855. template <class FMMNode>
  856. void FMM_Cheb<FMMNode>::Down2TargetSetup(SetupData<Real_t>& setup_data, std::vector<Matrix<Real_t> >& buff, std::vector<Vector<FMMNode_t*> >& n_list, int level, bool device){
  857. if(this->MultipoleOrder()==0) return;
  858. { // Set setup_data
  859. setup_data.level=level;
  860. setup_data.kernel=&this->kernel;
  861. setup_data.interac_type.resize(1);
  862. setup_data.interac_type[0]=D2T_Type;
  863. setup_data. input_data=&buff[1];
  864. setup_data.output_data=&buff[5];
  865. Vector<FMMNode_t*>& nodes_in =n_list[1];
  866. Vector<FMMNode_t*>& nodes_out=n_list[5];
  867. setup_data.nodes_in .clear();
  868. setup_data.nodes_out.clear();
  869. for(size_t i=0;i<nodes_in .Dim();i++) if(nodes_in [i]->Depth()==level || level==-1) setup_data.nodes_in .push_back(nodes_in [i]);
  870. for(size_t i=0;i<nodes_out.Dim();i++) if(nodes_out[i]->Depth()==level || level==-1) setup_data.nodes_out.push_back(nodes_out[i]);
  871. }
  872. std::vector<void*>& nodes_in =setup_data.nodes_in ;
  873. std::vector<void*>& nodes_out=setup_data.nodes_out;
  874. std::vector<Vector<Real_t>*>& input_vector=setup_data. input_vector; input_vector.clear();
  875. std::vector<Vector<Real_t>*>& output_vector=setup_data.output_vector; output_vector.clear();
  876. for(size_t i=0;i<nodes_in .size();i++) input_vector.push_back(&((FMMData*)((FMMNode*)nodes_in [i])->FMMData())->dnward_equiv);
  877. for(size_t i=0;i<nodes_out.size();i++) output_vector.push_back(&((FMMData*)((FMMNode*)nodes_out[i])->FMMData())->cheb_out );
  878. this->SetupInterac(setup_data,device);
  879. }
  880. template <class FMMNode>
  881. void FMM_Cheb<FMMNode>::Down2Target (SetupData<Real_t>& setup_data, bool device){
  882. //Add Down2Target contribution.
  883. this->EvalList(setup_data, device);
  884. }
  885. template <class FMMNode>
  886. void FMM_Cheb<FMMNode>::PostProcessing(std::vector<FMMNode_t*>& nodes){
  887. size_t n=nodes.size();
  888. #pragma omp parallel
  889. {
  890. int omp_p=omp_get_num_threads();
  891. int pid = omp_get_thread_num();
  892. size_t a=(pid*n)/omp_p;
  893. size_t b=((pid+1)*n)/omp_p;
  894. //For each node, compute interaction from point sources.
  895. for(size_t i=a;i<b;i++){
  896. Vector<Real_t>& trg_coord=nodes[i]->trg_coord;
  897. Vector<Real_t>& trg_value=nodes[i]->trg_value;
  898. Vector<Real_t>& cheb_out =((FMMData*)nodes[i]->FMMData())->cheb_out;
  899. //Initialize target potential.
  900. size_t trg_cnt=trg_coord.Dim()/COORD_DIM;
  901. //trg_value.assign(trg_cnt*dof*this->kernel.ker_dim[1],0);
  902. //Sample local expansion at target points.
  903. if(trg_cnt>0 && cheb_out.Dim()>0){
  904. Real_t* c=nodes[i]->Coord();
  905. Real_t scale=pow(2.0,nodes[i]->Depth()+1);
  906. std::vector<Real_t> rel_coord(COORD_DIM*trg_cnt);
  907. for(size_t j=0;j<trg_cnt;j++) for(int k=0;k<COORD_DIM;k++)
  908. rel_coord[j*COORD_DIM+k]=(trg_coord[j*COORD_DIM+k]-c[k])*scale-1.0;
  909. cheb_eval(cheb_out, cheb_deg, rel_coord, trg_value);
  910. }
  911. }
  912. }
  913. }
  914. }//end namespace