mpi_tree.txx 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586
  1. /**
  2. * \file mpi_tree.txx
  3. * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
  4. * \date 12-11-2010
  5. * \brief This file contains the implementation of the class MPI_Tree.
  6. */
  7. #include <assert.h>
  8. #include <typeinfo>
  9. #include <cstring>
  10. #include <fstream>
  11. #include <list>
  12. #include <set>
  13. #include <parUtils.h>
  14. #include <ompUtils.h>
  15. #include <profile.hpp>
  16. namespace pvfmm{
  17. /**
  18. * @author Dhairya Malhotra, dhairya.malhotra@gmail.com
  19. * @date 08 Feb 2011
  20. */
  21. inline int p2oLocal(Vector<MortonId> & nodes, Vector<MortonId>& leaves,
  22. unsigned int maxNumPts, unsigned int maxDepth, bool complete) {
  23. assert(maxDepth<=MAX_DEPTH);
  24. std::vector<MortonId> leaves_lst;
  25. unsigned int init_size=leaves.Dim();
  26. unsigned int num_pts=nodes.Dim();
  27. MortonId curr_node=leaves[0];
  28. MortonId last_node=leaves[init_size-1].NextId();
  29. MortonId next_node;
  30. unsigned int curr_pt=0;
  31. unsigned int next_pt=curr_pt+maxNumPts;
  32. while(next_pt <= num_pts){
  33. next_node = curr_node.NextId();
  34. while( next_pt < num_pts && next_node > nodes[next_pt] && curr_node.GetDepth() < maxDepth-1 ){
  35. curr_node = curr_node.getDFD(curr_node.GetDepth()+1);
  36. next_node = curr_node.NextId();
  37. }
  38. leaves_lst.push_back(curr_node);
  39. curr_node = next_node;
  40. unsigned int inc=maxNumPts;
  41. while(next_pt < num_pts && curr_node > nodes[next_pt]){
  42. // We have more than maxNumPts points per octant because the node can
  43. // not be refined any further.
  44. inc=inc<<1;
  45. next_pt+=inc;
  46. if(next_pt > num_pts){
  47. next_pt = num_pts;
  48. break;
  49. }
  50. }
  51. curr_pt = std::lower_bound(&nodes[0]+curr_pt,&nodes[0]+next_pt,curr_node,std::less<MortonId>())-&nodes[0];
  52. if(curr_pt >= num_pts) break;
  53. next_pt = curr_pt + maxNumPts;
  54. if(next_pt > num_pts) next_pt = num_pts;
  55. }
  56. #ifndef NDEBUG
  57. for(size_t i=0;i<leaves_lst.size();i++){
  58. size_t a=std::lower_bound(&nodes[0],&nodes[0]+nodes.Dim(),leaves_lst[i],std::less<MortonId>())-&nodes[0];
  59. size_t b=std::lower_bound(&nodes[0],&nodes[0]+nodes.Dim(),leaves_lst[i].NextId(),std::less<MortonId>())-&nodes[0];
  60. assert(b-a<=maxNumPts || leaves_lst[i].GetDepth()==maxDepth-1);
  61. if(i==leaves_lst.size()-1) assert(b==nodes.Dim() && a<nodes.Dim());
  62. if(i==0) assert(a==0);
  63. }
  64. #endif
  65. if(complete)
  66. while(curr_node<last_node){
  67. while( curr_node.NextId() > last_node && curr_node.GetDepth() < maxDepth-1 )
  68. curr_node = curr_node.getDFD(curr_node.GetDepth()+1);
  69. leaves_lst.push_back(curr_node);
  70. curr_node = curr_node.NextId();
  71. }
  72. leaves=leaves_lst;
  73. return 0;
  74. }
  75. inline int points2Octree(const Vector<MortonId>& pt_mid, Vector<MortonId>& nodes,
  76. unsigned int maxDepth, unsigned int maxNumPts, const MPI_Comm& comm ) {
  77. int myrank, np;
  78. MPI_Comm_rank(comm, &myrank);
  79. MPI_Comm_size(comm, &np);
  80. // Sort morton id of points.
  81. Profile::Tic("SortMortonId", &comm, true, 5);
  82. Vector<MortonId> pt_sorted;
  83. //par::partitionW<MortonId>(pt_mid, NULL, comm);
  84. par::HyperQuickSort(pt_mid, pt_sorted, comm);
  85. size_t pt_cnt=pt_sorted.Dim();
  86. Profile::Toc();
  87. // Add last few points from next process, to get the boundary octant right.
  88. Profile::Tic("Comm", &comm, true, 5);
  89. {
  90. { // Adjust maxNumPts
  91. size_t glb_pt_cnt=0;
  92. MPI_Allreduce(&pt_cnt, &glb_pt_cnt, 1, par::Mpi_datatype<size_t>::value(), MPI_SUM, comm);
  93. if(glb_pt_cnt<maxNumPts*np) maxNumPts=glb_pt_cnt/np;
  94. }
  95. size_t recv_size=0;
  96. size_t send_size=(2*maxNumPts<pt_cnt?2*maxNumPts:pt_cnt);
  97. {
  98. MPI_Request recvRequest;
  99. MPI_Request sendRequest;
  100. MPI_Status statusWait;
  101. if(myrank < (np-1)) MPI_Irecv (&recv_size, 1, par::Mpi_datatype<size_t>::value(), myrank+1, 1, comm, &recvRequest);
  102. if(myrank > 0 ) MPI_Issend(&send_size, 1, par::Mpi_datatype<size_t>::value(), myrank-1, 1, comm, &sendRequest);
  103. if(myrank < (np-1)) MPI_Wait(&recvRequest, &statusWait);
  104. if(myrank > 0 ) MPI_Wait(&sendRequest, &statusWait); //This can be done later.
  105. }
  106. if(recv_size>0){// Resize pt_sorted.
  107. Vector<MortonId> pt_sorted_(pt_cnt+recv_size);
  108. mem::memcopy(&pt_sorted_[0], &pt_sorted[0], pt_cnt*sizeof(MortonId));
  109. pt_sorted.Swap(pt_sorted_);
  110. }
  111. {// Exchange data.
  112. MPI_Request recvRequest;
  113. MPI_Request sendRequest;
  114. MPI_Status statusWait;
  115. if(myrank < (np-1)) MPI_Irecv (&pt_sorted[0]+pt_cnt, recv_size, par::Mpi_datatype<MortonId>::value(), myrank+1, 1, comm, &recvRequest);
  116. if(myrank > 0 ) MPI_Issend(&pt_sorted[0] , send_size, par::Mpi_datatype<MortonId>::value(), myrank-1, 1, comm, &sendRequest);
  117. if(myrank < (np-1)) MPI_Wait(&recvRequest, &statusWait);
  118. if(myrank > 0 ) MPI_Wait(&sendRequest, &statusWait); //This can be done later.
  119. }
  120. }
  121. Profile::Toc();
  122. // Construct local octree.
  123. Profile::Tic("p2o_local", &comm, false, 5);
  124. Vector<MortonId> nodes_local(1); nodes_local[0]=MortonId();
  125. p2oLocal(pt_sorted, nodes_local, maxNumPts, maxDepth, myrank==np-1);
  126. Profile::Toc();
  127. // Remove duplicate nodes on adjacent processors.
  128. Profile::Tic("RemoveDuplicates", &comm, true, 5);
  129. {
  130. size_t node_cnt=nodes_local.Dim();
  131. MortonId first_node;
  132. MortonId last_node=nodes_local[node_cnt-1];
  133. { // Send last_node to next process and get first_node from previous process.
  134. MPI_Request recvRequest;
  135. MPI_Request sendRequest;
  136. MPI_Status statusWait;
  137. if(myrank < (np-1)) MPI_Issend(& last_node, 1, par::Mpi_datatype<MortonId>::value(), myrank+1, 1, comm, &recvRequest);
  138. if(myrank > 0 ) MPI_Irecv (&first_node, 1, par::Mpi_datatype<MortonId>::value(), myrank-1, 1, comm, &sendRequest);
  139. if(myrank < (np-1)) MPI_Wait(&recvRequest, &statusWait);
  140. if(myrank > 0 ) MPI_Wait(&sendRequest, &statusWait); //This can be done later.
  141. }
  142. size_t i=0;
  143. std::vector<MortonId> node_lst;
  144. if(myrank){
  145. while(i<node_cnt && nodes_local[i].getDFD(maxDepth)<first_node) i++; assert(i);
  146. last_node=nodes_local[i>0?i-1:0].NextId(); // Next MortonId in the tree after first_node.
  147. while(first_node<last_node){ // Complete nodes between first_node and last_node.
  148. while(first_node.isAncestor(last_node))
  149. first_node=first_node.getDFD(first_node.GetDepth()+1);
  150. if(first_node==last_node) break;
  151. node_lst.push_back(first_node);
  152. first_node=first_node.NextId();
  153. }
  154. }
  155. for(;i<node_cnt-(myrank==np-1?0:1);i++) node_lst.push_back(nodes_local[i]);
  156. nodes=node_lst;
  157. }
  158. Profile::Toc();
  159. // Repartition nodes.
  160. Profile::Tic("partitionW", &comm, false, 5);
  161. par::partitionW<MortonId>(nodes, NULL , comm);
  162. Profile::Toc();
  163. return 0;
  164. }
  165. template <class TreeNode>
  166. void MPI_Tree<TreeNode>::Initialize(typename Node_t::NodeData* init_data){
  167. //Initialize root node.
  168. Profile::Tic("InitRoot",Comm(),false,3);
  169. Tree<TreeNode>::Initialize(init_data);
  170. TreeNode* rnode=this->RootNode();
  171. assert(this->dim==COORD_DIM);
  172. Profile::Toc();
  173. Profile::Tic("Points2Octee",Comm(),true,3);
  174. Vector<MortonId> lin_oct;
  175. { //Get the linear tree.
  176. // Compute MortonId from pt_coord.
  177. Vector<MortonId> pt_mid;
  178. Vector<Real_t>& pt_coord=rnode->pt_coord;
  179. size_t pt_cnt=pt_coord.Dim()/this->dim;
  180. pt_mid.Resize(pt_cnt);
  181. #pragma omp parallel for
  182. for(size_t i=0;i<pt_cnt;i++){
  183. pt_mid[i]=MortonId(pt_coord[i*COORD_DIM+0],pt_coord[i*COORD_DIM+1],pt_coord[i*COORD_DIM+2],this->max_depth);
  184. }
  185. //Get the linear tree.
  186. points2Octree(pt_mid,lin_oct,this->max_depth,init_data->max_pts,*Comm());
  187. }
  188. Profile::Toc();
  189. Profile::Tic("ScatterPoints",Comm(),true,3);
  190. { // Sort and partition point coordinates and values.
  191. std::vector<Vector<Real_t>*> coord_lst;
  192. std::vector<Vector<Real_t>*> value_lst;
  193. std::vector<Vector<size_t>*> scatter_lst;
  194. rnode->NodeDataVec(coord_lst, value_lst, scatter_lst);
  195. assert(coord_lst.size()==value_lst.size());
  196. assert(coord_lst.size()==scatter_lst.size());
  197. Vector<MortonId> pt_mid;
  198. Vector<size_t> scatter_index;
  199. for(size_t i=0;i<coord_lst.size();i++){
  200. if(!coord_lst[i]) continue;
  201. Vector<Real_t>& pt_coord=*coord_lst[i];
  202. { // Compute MortonId from pt_coord.
  203. size_t pt_cnt=pt_coord.Dim()/this->dim;
  204. pt_mid.Resize(pt_cnt);
  205. #pragma omp parallel for
  206. for(size_t i=0;i<pt_cnt;i++){
  207. pt_mid[i]=MortonId(pt_coord[i*COORD_DIM+0],pt_coord[i*COORD_DIM+1],pt_coord[i*COORD_DIM+2],this->max_depth);
  208. }
  209. }
  210. par::SortScatterIndex(pt_mid , scatter_index, comm, &lin_oct[0]);
  211. par::ScatterForward (pt_coord, scatter_index, comm);
  212. if(value_lst[i]!=NULL){
  213. Vector<Real_t>& pt_value=*value_lst[i];
  214. par::ScatterForward(pt_value, scatter_index, comm);
  215. }
  216. if(scatter_lst[i]!=NULL){
  217. Vector<size_t>& pt_scatter=*scatter_lst[i];
  218. pt_scatter=scatter_index;
  219. }
  220. }
  221. }
  222. Profile::Toc();
  223. //Initialize the pointer based tree from the linear tree.
  224. Profile::Tic("PointerTree",Comm(),false,3);
  225. { // Construct the pointer tree from lin_oct
  226. int omp_p=omp_get_max_threads();
  227. // Partition nodes between threads
  228. rnode->SetGhost(false);
  229. for(int i=0;i<omp_p;i++){
  230. size_t idx=(lin_oct.Dim()*i)/omp_p;
  231. Node_t* n=FindNode(lin_oct[idx], true);
  232. assert(n->GetMortonId()==lin_oct[idx]);
  233. UNUSED(n);
  234. }
  235. #pragma omp parallel for
  236. for(int i=0;i<omp_p;i++){
  237. size_t a=(lin_oct.Dim()* i )/omp_p;
  238. size_t b=(lin_oct.Dim()*(i+1))/omp_p;
  239. size_t idx=a;
  240. Node_t* n=FindNode(lin_oct[idx], false);
  241. if(i==0) n=rnode;
  242. while(n!=NULL && (idx<b || i==omp_p-1)){
  243. n->SetGhost(false);
  244. MortonId dn=n->GetMortonId();
  245. if(idx<b && dn.isAncestor(lin_oct[idx])){
  246. if(n->IsLeaf()) n->Subdivide();
  247. }else if(idx<b && dn==lin_oct[idx]){
  248. if(!n->IsLeaf()) n->Truncate();
  249. assert(n->IsLeaf());
  250. idx++;
  251. }else{
  252. n->Truncate();
  253. n->SetGhost(true);
  254. }
  255. n=this->PreorderNxt(n);
  256. }
  257. assert(idx==b);
  258. }
  259. }
  260. Profile::Toc();
  261. #ifndef NDEBUG
  262. CheckTree();
  263. #endif
  264. }
  265. template <class TreeNode>
  266. void MPI_Tree<TreeNode>::CoarsenTree(){
  267. //Redistribute.
  268. {
  269. Node_t* n=this->PostorderFirst();
  270. while(n){
  271. if(n->IsLeaf() && !n->IsGhost()) break;
  272. n=this->PostorderNxt(n);
  273. }
  274. while(true){
  275. Node_t* n_parent=(Node_t*)n->Parent();
  276. Node_t* n_ = n_parent;
  277. while(n_ && !n_->IsLeaf()){
  278. n_=this->PostorderNxt(n_);
  279. if(!n_) break;
  280. }
  281. if(!n_ || n_->IsGhost()) break;
  282. if(n->Depth()<=n_->Depth()) break;
  283. if(n_->Depth()<=1) break;
  284. n=n_;
  285. }
  286. MortonId loc_min=n->GetMortonId();
  287. RedistNodes(&loc_min);
  288. }
  289. //Truncate ghost nodes and build node list
  290. std::vector<Node_t*> leaf_nodes;
  291. {
  292. Node_t* n=this->PostorderFirst();
  293. while(n!=NULL){
  294. if(n->IsLeaf() && !n->IsGhost()) break;
  295. n->Truncate();
  296. n->SetGhost(true);
  297. n->ClearData();
  298. n=this->PostorderNxt(n);
  299. }
  300. while(n!=NULL){
  301. if(n->IsLeaf() && n->IsGhost()) break;
  302. if(n->IsLeaf()) leaf_nodes.push_back(n);
  303. n=this->PreorderNxt(n);
  304. }
  305. while(n!=NULL){
  306. n->Truncate();
  307. n->SetGhost(true);
  308. n->ClearData();
  309. n=this->PreorderNxt(n);
  310. }
  311. }
  312. size_t node_cnt=leaf_nodes.size();
  313. //Partition nodes between OpenMP threads.
  314. int omp_p=omp_get_max_threads();
  315. std::vector<MortonId> mid(omp_p);
  316. std::vector<MortonId> split_key(omp_p);
  317. #pragma omp parallel for
  318. for(int i=0;i<omp_p;i++){
  319. mid[i]=leaf_nodes[(i*node_cnt)/omp_p]->GetMortonId();
  320. }
  321. //Coarsen Tree.
  322. #pragma omp parallel for
  323. for(int i=0;i<omp_p;i++){
  324. Node_t* n_=leaf_nodes[i*node_cnt/omp_p];
  325. if(i*node_cnt/omp_p<(i+1)*node_cnt/omp_p)
  326. while(n_!=NULL){
  327. MortonId n_mid=n_->GetMortonId();
  328. if(!n_->IsLeaf() && !n_mid.isAncestor(mid[i].getDFD()))
  329. if(i<omp_p-1? !n_mid.isAncestor(mid[i+1].getDFD()):true)
  330. if(!n_->SubdivCond()) n_->Truncate();
  331. if(i<omp_p-1? n_mid==mid[i+1]: false) break;
  332. n_=this->PostorderNxt(n_);
  333. }
  334. }
  335. //Truncate nodes along ancestors of splitters.
  336. for(int i=0;i<omp_p;i++){
  337. Node_t* n_=FindNode(mid[i], false, this->RootNode());
  338. while(n_->Depth()>0){
  339. n_=(Node_t*)n_->Parent();
  340. if(!n_->SubdivCond()) n_->Truncate();
  341. else break;
  342. }
  343. }
  344. }
  345. template <class TreeNode>
  346. void MPI_Tree<TreeNode>::RefineTree(){
  347. int np, myrank;
  348. MPI_Comm_size(*Comm(),&np);
  349. MPI_Comm_rank(*Comm(),&myrank);
  350. int omp_p=omp_get_max_threads();
  351. int n_child=1UL<<this->Dim();
  352. //Coarsen tree.
  353. MPI_Tree<TreeNode>::CoarsenTree();
  354. //Build node list.
  355. std::vector<Node_t*> leaf_nodes;
  356. {
  357. Node_t* n=this->PostorderFirst();
  358. while(n!=NULL){
  359. if(n->IsLeaf() && !n->IsGhost())
  360. leaf_nodes.push_back(n);
  361. n=this->PostorderNxt(n);
  362. }
  363. }
  364. size_t tree_node_cnt=leaf_nodes.size();
  365. //Adaptive subdivision of leaf nodes with load balancing.
  366. for(int l=0;l<this->max_depth;l++){
  367. //Subdivide nodes.
  368. std::vector<std::vector<Node_t*> > leaf_nodes_(omp_p);
  369. #pragma omp parallel for
  370. for(int i=0;i<omp_p;i++){
  371. size_t a=(leaf_nodes.size()* i )/omp_p;
  372. size_t b=(leaf_nodes.size()*(i+1))/omp_p;
  373. for(size_t j=a;j<b;j++){
  374. if(leaf_nodes[j]->IsLeaf() && !leaf_nodes[j]->IsGhost()){
  375. if(leaf_nodes[j]->SubdivCond()) leaf_nodes[j]->Subdivide();
  376. if(!leaf_nodes[j]->IsLeaf())
  377. for(int k=0;k<n_child;k++)
  378. leaf_nodes_[i].push_back((Node_t*)leaf_nodes[j]->Child(k));
  379. }
  380. }
  381. }
  382. for(int i=0;i<omp_p;i++)
  383. tree_node_cnt+=(leaf_nodes_[i].size()/n_child)*(n_child-1);
  384. //Determine load imbalance.
  385. int global_max, global_sum;
  386. MPI_Allreduce(&tree_node_cnt, &global_max, 1, MPI_INT, MPI_MAX, *Comm());
  387. MPI_Allreduce(&tree_node_cnt, &global_sum, 1, MPI_INT, MPI_SUM, *Comm());
  388. //RedistNodes if needed.
  389. if(global_max*np>4*global_sum){
  390. #ifndef NDEBUG
  391. Profile::Tic("RedistNodes",Comm(),true,4);
  392. #endif
  393. RedistNodes();
  394. #ifndef NDEBUG
  395. Profile::Toc();
  396. #endif
  397. //Rebuild node list.
  398. leaf_nodes.clear();
  399. Node_t* n=this->PostorderFirst();
  400. while(n!=NULL){
  401. if(n->IsLeaf() && !n->IsGhost())
  402. leaf_nodes.push_back(n);
  403. n=this->PostorderNxt(n);
  404. }
  405. tree_node_cnt=leaf_nodes.size();
  406. }else{
  407. //Combine partial list of nodes.
  408. int node_cnt=0;
  409. for(int j=0;j<omp_p;j++)
  410. node_cnt+=leaf_nodes_[j].size();
  411. leaf_nodes.resize(node_cnt);
  412. #pragma omp parallel for
  413. for(int i=0;i<omp_p;i++){
  414. int offset=0;
  415. for(int j=0;j<i;j++)
  416. offset+=leaf_nodes_[j].size();
  417. for(size_t j=0;j<leaf_nodes_[i].size();j++)
  418. leaf_nodes[offset+j]=leaf_nodes_[i][j];
  419. }
  420. }
  421. }
  422. RedistNodes();
  423. MPI_Tree<TreeNode>::CoarsenTree();
  424. RedistNodes();
  425. MPI_Tree<TreeNode>::CoarsenTree();
  426. RedistNodes();
  427. }
  428. template <class TreeNode>
  429. void MPI_Tree<TreeNode>::RedistNodes(MortonId* loc_min) {
  430. int np, myrank;
  431. MPI_Comm_size(*Comm(),&np);
  432. MPI_Comm_rank(*Comm(),&myrank);
  433. if(np==1)return;
  434. //Create a linear tree in dendro format.
  435. Node_t* curr_node=this->PreorderFirst();
  436. std::vector<MortonId> in;
  437. std::vector<Node_t*> node_lst;
  438. while(curr_node!=NULL){
  439. if(curr_node->IsLeaf() && !curr_node->IsGhost()){
  440. node_lst.push_back(curr_node);
  441. in.push_back(curr_node->GetMortonId());
  442. //in.back().setWeight(curr_node->NodeCost()); //Using default weights.
  443. }
  444. curr_node=this->PreorderNxt(curr_node);
  445. }
  446. size_t leaf_cnt=in.size();
  447. //Get new mins.
  448. std::vector<MortonId> new_mins(np);
  449. if(loc_min==NULL){
  450. //Partition vector of MortonIds using par::partitionW
  451. std::vector<MortonId> out=in;
  452. par::partitionW<MortonId>(out,NULL,*Comm());
  453. MPI_Allgather(&out[0] , 1, par::Mpi_datatype<MortonId>::value(),
  454. &new_mins[0], 1, par::Mpi_datatype<MortonId>::value(), *Comm());
  455. }else{
  456. MPI_Allgather(loc_min , 1, par::Mpi_datatype<MortonId>::value(),
  457. &new_mins[0], 1, par::Mpi_datatype<MortonId>::value(), *Comm());
  458. }
  459. //Now exchange nodes according to new mins
  460. std::vector<PackedData> data(leaf_cnt);
  461. std::vector<int> send_cnts; send_cnts.assign(np,0);
  462. std::vector<int> send_size; send_size.assign(np,0);
  463. size_t sbuff_size=0;
  464. int omp_p=omp_get_max_threads();
  465. #pragma omp parallel for reduction(+:sbuff_size)
  466. for(int i=0;i<omp_p;i++){
  467. size_t a=( i *np)/omp_p;
  468. size_t b=((i+1)*np)/omp_p;
  469. if(b>a){
  470. size_t p_iter=a;
  471. size_t node_iter=std::lower_bound(&in[0], &in[in.size()], new_mins[a])-&in[0];
  472. for( ;node_iter<node_lst.size();node_iter++){
  473. while(p_iter+1u<(size_t)np? in[node_iter]>=new_mins[p_iter+1]: false) p_iter++;
  474. if(p_iter>=b) break;
  475. send_cnts[p_iter]++;
  476. data[node_iter]=node_lst[node_iter]->Pack();
  477. send_size[p_iter]+=data[node_iter].length+sizeof(size_t)+sizeof(MortonId);
  478. sbuff_size +=data[node_iter].length+sizeof(size_t)+sizeof(MortonId);
  479. }
  480. }
  481. }
  482. std::vector<int> recv_cnts(np);
  483. std::vector<int> recv_size(np);
  484. MPI_Alltoall(&send_cnts[0], 1, par::Mpi_datatype<int>::value(),
  485. &recv_cnts[0], 1, par::Mpi_datatype<int>::value(), *Comm());
  486. MPI_Alltoall(&send_size[0], 1, par::Mpi_datatype<int>::value(),
  487. &recv_size[0], 1, par::Mpi_datatype<int>::value(), *Comm());
  488. size_t recv_cnt=0;
  489. #pragma omp parallel for reduction(+:recv_cnt)
  490. for(int i=0;i<np;i++) recv_cnt+=recv_cnts[i];
  491. std::vector<MortonId> out(recv_cnt);
  492. std::vector<int> sdisp; sdisp.assign(np,0);
  493. std::vector<int> rdisp; rdisp.assign(np,0);
  494. omp_par::scan(&send_size[0],&sdisp[0],np); //TODO Don't need to do a full scan
  495. omp_par::scan(&recv_size[0],&rdisp[0],np); // as most entries will be 0.
  496. size_t rbuff_size=rdisp[np-1]+recv_size[np-1];
  497. char* send_buff=new char[sbuff_size];
  498. char* recv_buff=new char[rbuff_size];
  499. std::vector<char*> data_ptr(leaf_cnt);
  500. char* s_ptr=send_buff;
  501. for(size_t i=0;i<leaf_cnt;i++){
  502. *((MortonId*)s_ptr)=in [i] ; s_ptr+=sizeof(MortonId);
  503. *(( size_t*)s_ptr)=data[i].length; s_ptr+=sizeof(size_t);
  504. data_ptr[i]=s_ptr ; s_ptr+=data[i].length;
  505. }
  506. #pragma omp parallel for
  507. for(int p=0;p<omp_p;p++){
  508. size_t a=( p *leaf_cnt)/omp_p;
  509. size_t b=((p+1)*leaf_cnt)/omp_p;
  510. for(size_t i=a;i<b;i++)
  511. mem::memcopy(data_ptr[i], data[i].data, data[i].length);
  512. }
  513. par::Mpi_Alltoallv_sparse<char>(&send_buff[0], &send_size[0], &sdisp[0],
  514. &recv_buff[0], &recv_size[0], &rdisp[0], *Comm());
  515. char* r_ptr=recv_buff;
  516. std::vector<PackedData> r_data(recv_cnt);
  517. for(size_t i=0;i<recv_cnt;i++){
  518. out [i] =*(MortonId*)r_ptr; r_ptr+=sizeof(MortonId);
  519. r_data[i].length=*( size_t*)r_ptr; r_ptr+=sizeof(size_t);
  520. r_data[i].data = r_ptr; r_ptr+=r_data[i].length;
  521. }
  522. //Initialize all new nodes.
  523. int nchld=1UL<<this->Dim();
  524. size_t node_iter=0;
  525. MortonId dn;
  526. node_lst.resize(recv_cnt);
  527. Node_t* n=this->PreorderFirst();
  528. while(n!=NULL && node_iter<recv_cnt){
  529. n->SetGhost(false);
  530. dn=n->GetMortonId();
  531. if(dn.isAncestor(out[node_iter]) && dn!=out[node_iter]){
  532. if(n->IsLeaf()){
  533. {
  534. n->SetGhost(true);
  535. n->Subdivide();
  536. n->SetGhost(false);
  537. for(int j=0;j<nchld;j++){
  538. Node_t* ch_node=(Node_t*)n->Child(j);
  539. ch_node->SetGhost(false);
  540. }
  541. }
  542. }
  543. }else if(dn==out[node_iter]){
  544. if(!n->IsLeaf()){
  545. n->Truncate();
  546. n->SetGhost(false);
  547. }
  548. node_lst[node_iter]=n;
  549. node_iter++;
  550. }else{
  551. n->Truncate(); //This node does not belong to this process.
  552. n->SetGhost(true);
  553. }
  554. n=this->PreorderNxt(n);
  555. }
  556. while(n!=NULL){
  557. n->Truncate();
  558. n->SetGhost(true);
  559. n=this->PreorderNxt(n);
  560. }
  561. #pragma omp parallel for
  562. for(int p=0;p<omp_p;p++){
  563. size_t a=( p *recv_cnt)/omp_p;
  564. size_t b=((p+1)*recv_cnt)/omp_p;
  565. for(size_t i=a;i<b;i++)
  566. node_lst[i]->Unpack(r_data[i]);
  567. }
  568. //Free memory buffers.
  569. delete[] recv_buff;
  570. delete[] send_buff;
  571. }
  572. template <class TreeNode>
  573. TreeNode* MPI_Tree<TreeNode>::FindNode(MortonId& key, bool subdiv, TreeNode* start){
  574. int num_child=1UL<<this->Dim();
  575. Node_t* n=start;
  576. if(n==NULL) n=this->RootNode();
  577. while(n->GetMortonId()<key && (!n->IsLeaf()||subdiv)){
  578. if(n->IsLeaf() && !n->IsGhost()) n->Subdivide();
  579. if(n->IsLeaf()) break;
  580. for(int j=0;j<num_child;j++){
  581. if(((Node_t*)n->Child(j))->GetMortonId().NextId()>key){
  582. n=(Node_t*)n->Child(j);
  583. break;
  584. }
  585. }
  586. }
  587. assert(!subdiv || n->IsGhost() || n->GetMortonId()==key);
  588. return n;
  589. }
  590. //list must be sorted.
  591. inline int lineariseList(std::vector<MortonId> & list, MPI_Comm comm) {
  592. int rank,size;
  593. MPI_Comm_rank(comm,&rank);
  594. MPI_Comm_size(comm,&size);
  595. //Remove empty processors...
  596. int new_rank, new_size;
  597. MPI_Comm new_comm;
  598. MPI_Comm_split(comm, (list.empty()?0:1), rank, &new_comm);
  599. MPI_Comm_rank (new_comm, &new_rank);
  600. MPI_Comm_size (new_comm, &new_size);
  601. if(!list.empty()) {
  602. //Send the last octant to the next processor.
  603. MortonId lastOctant = list[list.size()-1];
  604. MortonId lastOnPrev;
  605. MPI_Request recvRequest;
  606. MPI_Request sendRequest;
  607. if(new_rank > 0) {
  608. MPI_Irecv(&lastOnPrev, 1, par::Mpi_datatype<MortonId>::value(), new_rank-1, 1, new_comm, &recvRequest);
  609. }
  610. if(new_rank < (new_size-1)) {
  611. MPI_Issend( &lastOctant, 1, par::Mpi_datatype<MortonId>::value(), new_rank+1, 1, new_comm, &sendRequest);
  612. }
  613. if(new_rank > 0) {
  614. std::vector<MortonId> tmp(list.size()+1);
  615. for(size_t i = 0; i < list.size(); i++) {
  616. tmp[i+1] = list[i];
  617. }
  618. MPI_Status statusWait;
  619. MPI_Wait(&recvRequest, &statusWait);
  620. tmp[0] = lastOnPrev;
  621. list = tmp;
  622. tmp.clear();
  623. }
  624. {// Remove duplicates and ancestors.
  625. std::vector<MortonId> tmp;
  626. if(!(list.empty())) {
  627. for(unsigned int i = 0; i < (list.size()-1); i++) {
  628. if( (!(list[i].isAncestor(list[i+1]))) && (list[i] != list[i+1]) ) {
  629. tmp.push_back(list[i]);
  630. }
  631. }
  632. if(new_rank == (new_size-1)) {
  633. tmp.push_back(list[list.size()-1]);
  634. }
  635. }
  636. list = tmp;
  637. tmp.clear();
  638. }
  639. if(new_rank < (new_size-1)) {
  640. MPI_Status statusWait;
  641. MPI_Wait(&sendRequest, &statusWait);
  642. }
  643. }//not empty procs only
  644. return 1;
  645. }//end fn.
  646. inline unsigned int balance_wt(const MortonId* n){
  647. return n->GetDepth();
  648. }
  649. inline int balanceOctree (std::vector<MortonId > &in, std::vector<MortonId > &out,
  650. unsigned int dim, unsigned int maxDepth, bool periodic, MPI_Comm comm) {
  651. int omp_p=omp_get_max_threads();
  652. int rank, size;
  653. MPI_Comm_size(comm,&size);
  654. MPI_Comm_rank(comm,&rank);
  655. #ifdef __VERBOSE__
  656. long long locInSize = in.size();
  657. #endif
  658. //////////////////////////////////////////////////////////////////////////////////////////////////
  659. //Redistribute.
  660. par::partitionW<MortonId>(in, balance_wt, comm);
  661. //Build level-by-level set of nodes.
  662. std::vector<std::set<MortonId> > nodes((maxDepth+1)*omp_p);
  663. #pragma omp parallel for
  664. for(int p=0;p<omp_p;p++){
  665. size_t a=( p *in.size())/omp_p;
  666. size_t b=((p+1)*in.size())/omp_p;
  667. for(size_t i=a;i<b;i++)
  668. nodes[in[i].GetDepth()+(maxDepth+1)*p].insert(in[i]);
  669. //Add new nodes level-by-level.
  670. std::vector<MortonId> nbrs;
  671. unsigned int num_chld=1UL<<dim;
  672. for(unsigned int l=maxDepth;l>1;l--){
  673. //Build set of parents of balancing nodes.
  674. std::set<MortonId> nbrs_parent;
  675. std::set<MortonId>::iterator start=nodes[l+(maxDepth+1)*p].begin();
  676. std::set<MortonId>::iterator end =nodes[l+(maxDepth+1)*p].end();
  677. for(std::set<MortonId>::iterator node=start; node != end;){
  678. node->NbrList(nbrs, l-1, periodic);
  679. int nbr_cnt=nbrs.size();
  680. for(int i=0;i<nbr_cnt;i++)
  681. nbrs_parent.insert(nbrs[i].getAncestor(nbrs[i].GetDepth()-1));
  682. //node++; //Optimize this by skipping siblings.
  683. MortonId pnode=node->getAncestor(node->GetDepth()-1);
  684. while(node != end && pnode==node->getAncestor(node->GetDepth()-1)) node++;
  685. }
  686. //Get the balancing nodes.
  687. std::set<MortonId>& ancestor_nodes=nodes[l-1+(maxDepth+1)*p];
  688. start=nbrs_parent.begin();
  689. end =nbrs_parent.end();
  690. for(std::set<MortonId>::iterator node=start; node != end; node++){
  691. std::vector<MortonId> children;
  692. children=node->Children();
  693. for(unsigned int j=0;j<num_chld;j++)
  694. ancestor_nodes.insert(children[j]);
  695. }
  696. }
  697. //Remove non-leaf nodes.
  698. for(unsigned int l=1;l<=maxDepth;l++){
  699. std::set<MortonId>::iterator start=nodes[l +(maxDepth+1)*p].begin();
  700. std::set<MortonId>::iterator end =nodes[l +(maxDepth+1)*p].end();
  701. std::set<MortonId>& ancestor_nodes=nodes[l-1+(maxDepth+1)*p];
  702. for(std::set<MortonId>::iterator node=start; node != end; node++){
  703. MortonId parent=node->getAncestor(node->GetDepth()-1);
  704. ancestor_nodes.erase(parent);
  705. }
  706. }
  707. }
  708. //Resize in.
  709. std::vector<size_t> node_cnt(omp_p,0);
  710. std::vector<size_t> node_dsp(omp_p,0);
  711. #pragma omp parallel for
  712. for(int i=0;i<omp_p;i++){
  713. for(unsigned int j=0;j<=maxDepth;j++)
  714. node_cnt[i]+=nodes[j+i*(maxDepth+1)].size();
  715. }
  716. omp_par::scan(&node_cnt[0],&node_dsp[0], omp_p);
  717. in.resize(node_cnt[omp_p-1]+node_dsp[omp_p-1]);
  718. //Copy leaf nodes to in.
  719. #pragma omp parallel for
  720. for(int p=0;p<omp_p;p++){
  721. size_t node_iter=node_dsp[p];
  722. for(unsigned int l=maxDepth;l>=1;l--){
  723. std::set<MortonId>::iterator start=nodes[l +(maxDepth+1)*p].begin();
  724. std::set<MortonId>::iterator end =nodes[l +(maxDepth+1)*p].end();
  725. for(std::set<MortonId>::iterator node=start; node != end; node++)
  726. in[node_iter++]=*node;
  727. }
  728. }
  729. #ifdef __VERBOSE__
  730. //Local size before removing duplicates and ancestors (linearise).
  731. long long locTmpSize = in.size();
  732. #endif
  733. //Sort, Linearise, Redistribute.
  734. //TODO Sort and linearize non-leaf nodes and then add leaf nodes.
  735. //TODO The following might work better as it reduces the comm bandwidth:
  736. //Split comm into sqrt(np) processes and sort, linearise for each comm group.
  737. //Then do the global sort, linearise with the original comm.
  738. par::HyperQuickSort(in, out, comm);
  739. lineariseList(out, comm);
  740. par::partitionW<MortonId>(out, NULL , comm);
  741. //////////////////////////////////////////////////////////////////////////////////////////////////
  742. #ifdef __VERBOSE__
  743. long long locOutSize = out.size();
  744. long long globInSize, globTmpSize, globOutSize;
  745. MPI_Allreduce(&locInSize , &globInSize , 1, par::Mpi_datatype<long long>::value(), MPI_SUM, comm);
  746. MPI_Allreduce(&locTmpSize, &globTmpSize, 1, par::Mpi_datatype<long long>::value(), MPI_SUM, comm);
  747. MPI_Allreduce(&locOutSize, &globOutSize, 1, par::Mpi_datatype<long long>::value(), MPI_SUM, comm);
  748. if(!rank) std::cout<<"Balance Octree. inpSize: "<<globInSize
  749. <<" tmpSize: "<<globTmpSize
  750. <<" outSize: "<<globOutSize
  751. <<" activeNpes: "<<size<<std::endl;
  752. #endif
  753. return 0;
  754. }//end function
  755. template <class TreeNode>
  756. void MPI_Tree<TreeNode>::Balance21(BoundaryType bndry) {
  757. int num_proc,myrank;
  758. MPI_Comm_rank(*Comm(),&myrank);
  759. MPI_Comm_size(*Comm(),&num_proc);
  760. //Using Dendro for balancing
  761. //Create a linear tree in dendro format.
  762. Node_t* curr_node=this->PreorderFirst();
  763. std::vector<MortonId> in;
  764. while(curr_node!=NULL){
  765. if(curr_node->IsLeaf() && !curr_node->IsGhost()){
  766. in.push_back(curr_node->GetMortonId());
  767. }
  768. curr_node=this->PreorderNxt(curr_node);
  769. }
  770. //2:1 balance
  771. Profile::Tic("ot::balanceOctree",Comm(),true,3);
  772. std::vector<MortonId> out;
  773. balanceOctree(in, out, this->Dim(), this->max_depth, (bndry==Periodic), *Comm());
  774. Profile::Toc();
  775. //Get new_mins.
  776. std::vector<MortonId> new_mins(num_proc);
  777. MPI_Allgather(&out[0] , 1, par::Mpi_datatype<MortonId>::value(),
  778. &new_mins[0], 1, par::Mpi_datatype<MortonId>::value(), *Comm());
  779. // Refine to new_mins in my range of octants
  780. // or else RedistNodes(...) will not work correctly.
  781. {
  782. int i=0;
  783. std::vector<MortonId> mins=GetMins();
  784. while(new_mins[i]<mins[myrank] && i<num_proc) i++; //TODO: Use binary search.
  785. for(;i<num_proc;i++){
  786. Node_t* n=FindNode(new_mins[i], true);
  787. if(n->IsGhost()) break;
  788. else assert(n->GetMortonId()==new_mins[i]);
  789. }
  790. }
  791. //Redist nodes using new_mins.
  792. Profile::Tic("RedistNodes",Comm(),true,3);
  793. RedistNodes(&out[0]);
  794. #ifndef NDEBUG
  795. std::vector<MortonId> mins=GetMins();
  796. assert(mins[myrank].getDFD()==out[0].getDFD());
  797. #endif
  798. Profile::Toc();
  799. //Now subdivide the current tree as necessary to make it balanced.
  800. Profile::Tic("LocalSubdivide",Comm(),false,3);
  801. int omp_p=omp_get_max_threads();
  802. for(int i=0;i<omp_p;i++){
  803. size_t a=(out.size()*i)/omp_p;
  804. Node_t* n=FindNode(out[a], true);
  805. assert(n->GetMortonId()==out[a]);
  806. UNUSED(n);
  807. }
  808. #pragma omp parallel for
  809. for(int i=0;i<omp_p;i++){
  810. size_t a=(out.size()* i )/omp_p;
  811. size_t b=(out.size()*(i+1))/omp_p;
  812. MortonId dn;
  813. size_t node_iter=a;
  814. Node_t* n=FindNode(out[node_iter], false);
  815. while(n!=NULL && node_iter<b){
  816. n->SetGhost(false);
  817. dn=n->GetMortonId();
  818. if(dn.isAncestor(out[node_iter]) && dn!=out[node_iter]){
  819. if(n->IsLeaf()) n->Subdivide();
  820. }else if(dn==out[node_iter]){
  821. assert(n->IsLeaf());
  822. //if(!n->IsLeaf()){ //This should never happen
  823. // n->Truncate();
  824. // n->SetGhost(false);
  825. //}
  826. assert(n->IsLeaf());
  827. node_iter++;
  828. }else{
  829. n->Truncate(); //This node does not belong to this process.
  830. n->SetGhost(true);
  831. }
  832. n=this->PreorderNxt(n);
  833. }
  834. if(i==omp_p-1){
  835. while(n!=NULL){
  836. n->Truncate();
  837. n->SetGhost(true);
  838. n=this->PreorderNxt(n);
  839. }
  840. }
  841. }
  842. Profile::Toc();
  843. }
  844. template <class TreeNode>
  845. void MPI_Tree<TreeNode>::Balance21_local(BoundaryType bndry){
  846. //SetColleagues(bndry);
  847. std::vector<std::vector<Node_t*> > node_lst(this->max_depth+1);
  848. Node_t* curr_node=this->PreorderFirst();
  849. while(curr_node!=NULL){
  850. node_lst[curr_node->Depth()].push_back(curr_node);
  851. curr_node=this->PreorderNxt(curr_node);
  852. }
  853. int n1=pow(3.0,this->Dim());
  854. int n2=pow(2.0,this->Dim());
  855. for(int i=this->max_depth;i>0;i--){
  856. Real_t s=pow(0.5,i);
  857. for(size_t j=0;j<node_lst[i].size();j++){
  858. curr_node=node_lst[i][j];
  859. Real_t* coord=curr_node->Coord();
  860. if(!curr_node->IsLeaf()) for(int k=0;k<n1;k++){
  861. if(curr_node->Colleague(k)==NULL){
  862. Real_t c0[3]={coord[0]+((k/1)%3-1)*s+s*0.5,
  863. coord[1]+((k/3)%3-1)*s+s*0.5,
  864. coord[2]+((k/9)%3-1)*s+s*0.5};
  865. if(bndry==Periodic){
  866. c0[0]=c0[0]-floor(c0[0]);
  867. c0[1]=c0[1]-floor(c0[1]);
  868. c0[2]=c0[2]-floor(c0[2]);
  869. }
  870. if(c0[0]>0 && c0[0]<1)
  871. if(c0[1]>0 && c0[1]<1)
  872. if(c0[2]>0 && c0[2]<1){
  873. Node_t* node=this->RootNode();
  874. while(node->Depth()<i){
  875. if(node->IsLeaf()){
  876. node->Subdivide();
  877. for(int l=0;l<n2;l++){
  878. node_lst[node->Depth()+1].push_back((Node_t*)node->Child(l));
  879. /*
  880. SetColleagues(bndry,(Node_t*)node->Child(l));
  881. for(int i_=0;i_<n1;i_++){
  882. Node_t* coll=(Node_t*)((Node_t*)node->Child(l))->Colleague(i_);
  883. if(coll!=NULL) SetColleagues(bndry,coll);
  884. }// */
  885. }
  886. }
  887. Real_t s1=pow(0.5,node->Depth()+1);
  888. Real_t* c1=node->Coord();
  889. int c_id=((c0[0]-c1[0])>s1?1:0)+
  890. ((c0[1]-c1[1])>s1?2:0)+
  891. ((c0[2]-c1[2])>s1?4:0);
  892. node=(Node_t*)node->Child(c_id);
  893. /*if(node->Depth()==i){
  894. c1=node->Coord();
  895. std::cout<<(c0[0]-c1[0])-s1/2<<' '
  896. std::cout<<(c0[1]-c1[1])-s1/2<<' '
  897. std::cout<<(c0[2]-c1[2])-s1/2<<'\n';
  898. }// */
  899. }
  900. }
  901. }
  902. }
  903. }
  904. }
  905. }
  906. template <class TreeNode>
  907. void MPI_Tree<TreeNode>::SetColleagues(BoundaryType bndry, Node_t* node){
  908. int n1=(int)pow(3.0,this->Dim());
  909. int n2=(int)pow(2.0,this->Dim());
  910. if(node==NULL){
  911. Node_t* curr_node=this->PreorderFirst();
  912. if(curr_node!=NULL){
  913. if(bndry==Periodic){
  914. for(int i=0;i<n1;i++)
  915. curr_node->SetColleague(curr_node,i);
  916. }else{
  917. curr_node->SetColleague(curr_node,(n1-1)/2);
  918. }
  919. curr_node=this->PreorderNxt(curr_node);
  920. }
  921. Vector<std::vector<Node_t*> > nodes(MAX_DEPTH);
  922. while(curr_node!=NULL){
  923. nodes[curr_node->Depth()].push_back(curr_node);
  924. curr_node=this->PreorderNxt(curr_node);
  925. }
  926. for(size_t i=0;i<MAX_DEPTH;i++){
  927. size_t j0=nodes[i].size();
  928. Node_t** nodes_=&nodes[i][0];
  929. #pragma omp parallel for
  930. for(size_t j=0;j<j0;j++){
  931. SetColleagues(bndry, nodes_[j]);
  932. }
  933. }
  934. }else{
  935. /* //This is slower
  936. Node_t* root_node=this->RootNode();
  937. int d=node->Depth();
  938. Real_t* c0=node->Coord();
  939. Real_t s=pow(0.5,d);
  940. Real_t c[COORD_DIM];
  941. int idx=0;
  942. for(int i=-1;i<=1;i++)
  943. for(int j=-1;j<=1;j++)
  944. for(int k=-1;k<=1;k++){
  945. c[0]=c0[0]+s*0.5+s*k;
  946. c[1]=c0[1]+s*0.5+s*j;
  947. c[2]=c0[2]+s*0.5+s*i;
  948. if(bndry==Periodic){
  949. if(c[0]<0.0) c[0]+=1.0;
  950. if(c[0]>1.0) c[0]-=1.0;
  951. if(c[1]<1.0) c[1]+=1.0;
  952. if(c[1]>1.0) c[1]-=1.0;
  953. if(c[2]<1.0) c[2]+=1.0;
  954. if(c[2]>1.0) c[2]-=1.0;
  955. }
  956. node->SetColleague(NULL,idx);
  957. if(c[0]<1.0 && c[0]>0.0)
  958. if(c[1]<1.0 && c[1]>0.0)
  959. if(c[2]<1.0 && c[2]>0.0){
  960. MortonId m(c,d);
  961. Node_t* nbr=FindNode(m,false,root_node);
  962. while(nbr->Depth()>d) nbr=(Node_t*)nbr->Parent();
  963. if(nbr->Depth()==d) node->SetColleague(nbr,idx);
  964. }
  965. idx++;
  966. }
  967. /*/
  968. Node_t* parent_node;
  969. Node_t* tmp_node1;
  970. Node_t* tmp_node2;
  971. for(int i=0;i<n1;i++)node->SetColleague(NULL,i);
  972. parent_node=(Node_t*)node->Parent();
  973. if(parent_node==NULL) return;
  974. int l=node->Path2Node();
  975. for(int i=0;i<n1;i++){ //For each coll of the parent
  976. tmp_node1=(Node_t*)parent_node->Colleague(i);
  977. if(tmp_node1!=NULL)
  978. if(!tmp_node1->IsLeaf()){
  979. for(int j=0;j<n2;j++){ //For each child
  980. tmp_node2=(Node_t*)tmp_node1->Child(j);
  981. if(tmp_node2!=NULL){
  982. bool flag=true;
  983. int a=1,b=1,new_indx=0;
  984. for(int k=0;k<this->Dim();k++){
  985. int indx_diff=(((i/b)%3)-1)*2+((j/a)%2)-((l/a)%2);
  986. if(-1>indx_diff || indx_diff>1) flag=false;
  987. new_indx+=(indx_diff+1)*b;
  988. a*=2;b*=3;
  989. }
  990. if(flag){
  991. node->SetColleague(tmp_node2,new_indx);
  992. }
  993. }
  994. }
  995. }
  996. }// */
  997. }
  998. }
  999. template <class TreeNode>
  1000. bool MPI_Tree<TreeNode>::CheckTree(){
  1001. int myrank,np;
  1002. MPI_Comm_rank(*Comm(),&myrank);
  1003. MPI_Comm_size(*Comm(),&np);
  1004. std::vector<MortonId> mins=GetMins();
  1005. Node_t* n=this->PostorderFirst();
  1006. while(n!=NULL){
  1007. if(myrank<np-1) if(n->GetMortonId().getDFD()>=mins[myrank+1])break;
  1008. if(n->GetMortonId()>=mins[myrank] && n->IsLeaf() && n->IsGhost()){
  1009. std::cout<<n->GetMortonId()<<'\n';
  1010. std::cout<<mins[myrank]<<'\n';
  1011. if(myrank+1<np) std::cout<<mins[myrank+1]<<'\n';
  1012. std::cout<<myrank<<'\n';
  1013. assert(false);
  1014. }
  1015. if(n->GetMortonId()<mins[myrank] && n->IsLeaf() && !n->IsGhost()){
  1016. assert(false);
  1017. }
  1018. if(!n->IsGhost() && n->Depth()>0)
  1019. assert(!((Node_t*)n->Parent())->IsGhost());
  1020. n=this->PostorderNxt(n);
  1021. }
  1022. while(n!=NULL){
  1023. if(n->IsLeaf() && !n->IsGhost()){
  1024. assert(false);
  1025. }
  1026. n=this->PostorderNxt(n);
  1027. }
  1028. return true;
  1029. };
  1030. /**
  1031. * \brief Determines if node is used in the region between Morton Ids m1 and m2
  1032. * ( m1 <= m2 ).
  1033. */
  1034. template <class TreeNode>
  1035. void IsShared(std::vector<TreeNode*>& nodes, MortonId* m1, MortonId* m2, BoundaryType bndry, std::vector<char>& shared_flag){
  1036. MortonId mm1, mm2;
  1037. if(m1!=NULL) mm1=m1->getDFD();
  1038. if(m2!=NULL) mm2=m2->getDFD();
  1039. shared_flag.resize(nodes.size());
  1040. int omp_p=omp_get_max_threads();
  1041. #pragma omp parallel for
  1042. for(int j=0;j<omp_p;j++){
  1043. size_t a=((j )*nodes.size())/omp_p;
  1044. size_t b=((j+1)*nodes.size())/omp_p;
  1045. std::vector<MortonId> nbr_lst;
  1046. for(size_t i=a;i<b;i++){
  1047. shared_flag[i]=false;
  1048. TreeNode* node=nodes[i];
  1049. assert(node!=NULL);
  1050. if(node->Depth()<2){
  1051. shared_flag[i]=true;
  1052. continue;
  1053. }
  1054. node->GetMortonId().NbrList(nbr_lst, node->Depth()-1, bndry==Periodic);
  1055. for(size_t k=0;k<nbr_lst.size();k++){
  1056. MortonId n1=nbr_lst[k] .getDFD();
  1057. MortonId n2=nbr_lst[k].NextId().getDFD();
  1058. if(m1==NULL || n2>mm1)
  1059. if(m2==NULL || n1<mm2){
  1060. shared_flag[i]=true;
  1061. break;
  1062. }
  1063. }
  1064. }
  1065. }
  1066. }
  1067. inline void IsShared(std::vector<PackedData>& nodes, MortonId* m1, MortonId* m2, BoundaryType bndry, std::vector<char>& shared_flag){
  1068. MortonId mm1, mm2;
  1069. if(m1!=NULL) mm1=m1->getDFD();
  1070. if(m2!=NULL) mm2=m2->getDFD();
  1071. shared_flag.resize(nodes.size());
  1072. int omp_p=omp_get_max_threads();
  1073. #pragma omp parallel for
  1074. for(int j=0;j<omp_p;j++){
  1075. size_t a=((j )*nodes.size())/omp_p;
  1076. size_t b=((j+1)*nodes.size())/omp_p;
  1077. std::vector<MortonId> nbr_lst;
  1078. for(size_t i=a;i<b;i++){
  1079. shared_flag[i]=false;
  1080. MortonId* node=(MortonId*)nodes[i].data;
  1081. assert(node!=NULL);
  1082. if(node->GetDepth()<2){
  1083. shared_flag[i]=true;
  1084. continue;
  1085. }
  1086. node->NbrList(nbr_lst, node->GetDepth()-1, bndry==Periodic);
  1087. for(size_t k=0;k<nbr_lst.size();k++){
  1088. MortonId n1=nbr_lst[k] .getDFD();
  1089. MortonId n2=nbr_lst[k].NextId().getDFD();
  1090. if(m1==NULL || n2>mm1)
  1091. if(m2==NULL || n1<mm2){
  1092. shared_flag[i]=true;
  1093. break;
  1094. }
  1095. }
  1096. }
  1097. }
  1098. }
  1099. /**
  1100. * \brief Hypercube based scheme to exchange Ghost octants.
  1101. */
  1102. //#define PREFETCH_T0(addr,nrOfBytesAhead) _mm_prefetch(((char *)(addr))+nrOfBytesAhead,_MM_HINT_T0)
  1103. template <class TreeNode>
  1104. void MPI_Tree<TreeNode>::ConstructLET(BoundaryType bndry){
  1105. int num_p,rank;
  1106. MPI_Comm_size(*Comm(),&num_p);
  1107. MPI_Comm_rank(*Comm(),&rank );
  1108. if(num_p==1) return;
  1109. int omp_p=omp_get_max_threads();
  1110. std::vector<MortonId> mins=GetMins();
  1111. // Build list of shared nodes.
  1112. std::vector<Node_t*> shared_nodes; shared_nodes.clear();
  1113. std::vector<Node_t*> node_lst; node_lst.clear();
  1114. Node_t* curr_node=this->PreorderFirst();
  1115. while(curr_node!=NULL){
  1116. if(curr_node->GetMortonId().getDFD()>=mins[rank]) break;
  1117. curr_node=this->PreorderNxt(curr_node);
  1118. }
  1119. while(curr_node!=NULL){
  1120. if(curr_node->IsGhost()) break;
  1121. node_lst.push_back(curr_node);
  1122. curr_node=this->PreorderNxt(curr_node);
  1123. }
  1124. std::vector<char> node_flag0; node_flag0.clear();
  1125. std::vector<char> node_flag1; node_flag1.clear();
  1126. IsShared(node_lst,&mins[0],&mins[rank],bndry,node_flag0);
  1127. if(rank<num_p-1) IsShared(node_lst,&mins[rank+1],NULL,bndry,node_flag1);
  1128. for(size_t i=0;i<node_lst.size();i++){
  1129. if(node_flag0[i] || (rank<num_p-1 && node_flag1[i]))
  1130. shared_nodes.push_back(node_lst[i]);
  1131. }
  1132. //std::cout<<"Shared = "<<shared_nodes.size()<<'\n';
  1133. // Pack shared nodes.
  1134. #ifdef NDEBUG
  1135. static std::vector<char> shrd_buff_vec0(omp_p*64l*1024l*1024l);
  1136. static std::vector<char> shrd_buff_vec1(omp_p*128l*1024l*1024l);
  1137. static std::vector<char> send_buff_vec(omp_p*64l*1024l*1024l); char* send_buff;
  1138. static std::vector<char> recv_buff_vec(omp_p*64l*1024l*1024l); char* recv_buff;
  1139. #else
  1140. std::vector<char> shrd_buff_vec0(omp_p*64l*1024l*1024l);
  1141. std::vector<char> shrd_buff_vec1(omp_p*128l*1024l*1024l);
  1142. std::vector<char> send_buff_vec(omp_p*64l*1024l*1024l); char* send_buff;
  1143. std::vector<char> recv_buff_vec(omp_p*64l*1024l*1024l); char* recv_buff;
  1144. #endif
  1145. std::vector<PackedData> shrd_data;
  1146. size_t max_data_size=0;
  1147. {
  1148. long max_data_size_lcl=0;
  1149. long max_data_size_glb=0;
  1150. char* data_ptr=&shrd_buff_vec0[0];
  1151. for(size_t i=0;i<shared_nodes.size();i++){
  1152. PackedData p=shared_nodes[i]->Pack(true,data_ptr,sizeof(MortonId));
  1153. ((MortonId*)data_ptr)[0]=shared_nodes[i]->GetMortonId();
  1154. p.length+=sizeof(MortonId);
  1155. shrd_data.push_back(p);
  1156. data_ptr+=p.length;
  1157. if(max_data_size_lcl<(long)p.length) max_data_size_lcl=p.length;
  1158. assert(data_ptr<=&(*shrd_buff_vec0.end())); //TODO: resize if needed.
  1159. }
  1160. MPI_Allreduce(&max_data_size_lcl, &max_data_size_glb, 1, MPI_LONG, MPI_MAX, *Comm());
  1161. max_data_size=max_data_size_glb;
  1162. }
  1163. // Memory slots for storing node data.
  1164. std::set<void*> mem_set;
  1165. size_t mem_set_size=0;
  1166. size_t range[2]={0,(size_t)num_p-1};
  1167. while(range[1]-range[0]>0){
  1168. size_t split_p=(range[0]+range[1])/2;
  1169. size_t new_range[2]={(size_t)rank<=split_p?range[0]:split_p+1,(size_t)rank<=split_p?split_p:range[1]};
  1170. size_t com_range[2]={(size_t)rank> split_p?range[0]:split_p+1,(size_t)rank> split_p?split_p:range[1]};
  1171. size_t partner=rank-new_range[0]+com_range[0];
  1172. if(partner>range[1]) partner--;
  1173. bool extra_partner=((size_t)rank==range[1] && ((range[1]-range[0])%2)==0);
  1174. int send_length=0;
  1175. std::vector<PackedData> shrd_data_new;
  1176. IsShared(shrd_data, &mins[com_range[0]], (com_range[1]==(size_t)num_p-1?NULL:&mins[com_range[1]+1]),bndry, node_flag0);
  1177. IsShared(shrd_data, &mins[new_range[0]], (new_range[1]==(size_t)num_p-1?NULL:&mins[new_range[1]+1]),bndry, node_flag1);
  1178. {
  1179. std::vector<void*> srctrg_ptr;
  1180. std::vector<size_t> mem_size;
  1181. for(size_t i=0;i<shrd_data.size();i++){
  1182. PackedData& p=shrd_data[i];
  1183. if( node_flag0[i]){ // Copy data to send buffer.
  1184. char* data_ptr=(char*)&send_buff_vec[send_length];
  1185. ((size_t*)data_ptr)[0]=p.length; data_ptr+=sizeof(size_t);
  1186. //mem::memcopy(data_ptr,p.data,p.length);
  1187. mem_size.push_back(p.length);
  1188. srctrg_ptr.push_back(p.data);
  1189. srctrg_ptr.push_back(data_ptr);
  1190. send_length+=p.length+sizeof(size_t);
  1191. assert(send_length<=(int)send_buff_vec.size()); //TODO: resize if needed.
  1192. }
  1193. if(!node_flag1[i]){ // Free memory slot.
  1194. //assert(node_flag0[0]);
  1195. if(p.data>=&shrd_buff_vec1[0] && p.data<&shrd_buff_vec1[0]+shrd_buff_vec1.size())
  1196. mem_set.insert(p.data);
  1197. } else shrd_data_new.push_back(p);
  1198. }
  1199. shrd_data=shrd_data_new;
  1200. #pragma omp parallel for
  1201. for(int k=0;k<omp_p;k++){
  1202. size_t i0=((k+0)*mem_size.size())/omp_p;
  1203. size_t i1=((k+1)*mem_size.size())/omp_p;
  1204. for(size_t i=i0;i<i1;i++){
  1205. mem::memcopy(srctrg_ptr[2*i+1],srctrg_ptr[2*i+0],mem_size[i]);
  1206. }
  1207. }
  1208. }
  1209. //Exchange send size.
  1210. int recv_length=0;
  1211. int extra_recv_length=0;
  1212. int extra_send_length=0;
  1213. MPI_Status status;
  1214. MPI_Sendrecv (& send_length,1,MPI_INT,partner,0, &recv_length,1,MPI_INT,partner,0,*Comm(),&status);
  1215. if(extra_partner) MPI_Sendrecv(&extra_send_length,1,MPI_INT,split_p,0,&extra_recv_length,1,MPI_INT,split_p,0,*Comm(),&status);
  1216. //SendRecv data.
  1217. assert(send_length <=(int)send_buff_vec.size()); send_buff=&send_buff_vec[0];
  1218. assert(recv_length+extra_recv_length<=(int)recv_buff_vec.size()); recv_buff=&recv_buff_vec[0];
  1219. MPI_Sendrecv (send_buff,send_length,MPI_BYTE,partner,0, recv_buff , recv_length,MPI_BYTE,partner,0,*Comm(),&status);
  1220. if(extra_partner) MPI_Sendrecv( NULL, 0,MPI_BYTE,split_p,0,&recv_buff[recv_length],extra_recv_length,MPI_BYTE,split_p,0,*Comm(),&status);
  1221. //Get nodes from received data.
  1222. {
  1223. std::vector<void*> srctrg_ptr;
  1224. std::vector<size_t> mem_size;
  1225. int buff_length=0;
  1226. while(buff_length<recv_length+extra_recv_length){
  1227. PackedData p0,p1;
  1228. p0.length=((size_t*)&recv_buff_vec[buff_length])[0];
  1229. p0.data=(char*)&recv_buff_vec[buff_length]+sizeof(size_t);
  1230. buff_length+=p0.length+sizeof(size_t);
  1231. p1.length=p0.length;
  1232. if(mem_set.size()==0){
  1233. assert(mem_set_size*max_data_size<shrd_buff_vec1.size());
  1234. p1.data=&shrd_buff_vec1[mem_set_size*max_data_size];
  1235. mem_set_size++;
  1236. }else{
  1237. p1.data=*mem_set.begin();
  1238. mem_set.erase(mem_set.begin());
  1239. }
  1240. //mem::memcopy(p1.data,p0.data,p0.length);
  1241. mem_size.push_back(p0.length);
  1242. srctrg_ptr.push_back(p0.data);
  1243. srctrg_ptr.push_back(p1.data);
  1244. shrd_data.push_back(p1);
  1245. }
  1246. #pragma omp parallel for
  1247. for(int k=0;k<omp_p;k++){
  1248. size_t i0=((k+0)*mem_size.size())/omp_p;
  1249. size_t i1=((k+1)*mem_size.size())/omp_p;
  1250. for(size_t i=i0;i<i1;i++){
  1251. mem::memcopy(srctrg_ptr[2*i+1],srctrg_ptr[2*i+0],mem_size[i]);
  1252. }
  1253. }
  1254. }
  1255. range[0]=new_range[0];
  1256. range[1]=new_range[1];
  1257. }
  1258. //Add shared_nodes to the tree.
  1259. //std::cout<<"Number of Ghost Nodes = "<<shrd_data.size()<<'\n';
  1260. int nchld=(1UL<<this->Dim()); // Number of children.
  1261. std::vector<Node_t*> shrd_nodes(shrd_data.size());
  1262. for(size_t i=0;i<shrd_data.size();i++){ // Find shared nodes.
  1263. MortonId& mid=*(MortonId*)shrd_data[i].data;
  1264. Node_t* srch_node=this->RootNode();
  1265. while(srch_node->GetMortonId()!=mid){
  1266. Node_t* ch_node;
  1267. if(srch_node->IsLeaf()){
  1268. srch_node->SetGhost(true);
  1269. srch_node->Subdivide();
  1270. }
  1271. for(int j=nchld-1;j>=0;j--){
  1272. ch_node=(Node_t*)srch_node->Child(j);
  1273. if(ch_node->GetMortonId()<=mid){
  1274. srch_node=ch_node;
  1275. break;
  1276. }
  1277. }
  1278. }
  1279. shrd_nodes[i]=srch_node;
  1280. }
  1281. #pragma omp parallel for
  1282. for(size_t i=0;i<shrd_data.size();i++){
  1283. if(shrd_nodes[i]->IsGhost()) { // Initialize ghost node.
  1284. PackedData p=shrd_data[i];
  1285. p.data=((char*)p.data)+sizeof(MortonId);
  1286. p.length-=sizeof(MortonId);
  1287. shrd_nodes[i]->Unpack(p);
  1288. }
  1289. }
  1290. //Now LET is complete.
  1291. #ifndef NDEBUG
  1292. CheckTree();
  1293. #endif
  1294. }
  1295. inline bool isLittleEndian(){
  1296. uint16_t number = 0x1;
  1297. uint8_t *numPtr = (uint8_t*)&number;
  1298. return (numPtr[0] == 1);
  1299. }
  1300. template <class TreeNode>
  1301. void MPI_Tree<TreeNode>::Write2File(const char* fname, int lod){
  1302. int myrank, np;
  1303. MPI_Comm_size(*Comm(),&np);
  1304. MPI_Comm_rank(*Comm(),&myrank);
  1305. std::vector<Real_t> coord; //Coordinates of octant corners.
  1306. std::vector<Real_t> value; //Data value at points.
  1307. std::vector<int32_t> mpi_rank; //MPI_Rank at points.
  1308. std::vector<int32_t> connect; //Cell connectivity.
  1309. std::vector<int32_t> offset ; //Cell offset.
  1310. std::vector<uint8_t> types ; //Cell types.
  1311. //Build list of octant corner points.
  1312. Node_t* n=this->PreorderFirst();
  1313. while(n!=NULL){
  1314. if(!n->IsGhost() && n->IsLeaf())
  1315. n->VTU_Data(coord, value, connect, offset, types, lod);
  1316. n=this->PreorderNxt(n);
  1317. }
  1318. int pt_cnt=coord.size()/COORD_DIM;
  1319. int dof=(pt_cnt?value.size()/pt_cnt:0);
  1320. assert(value.size()==(size_t)pt_cnt*dof);
  1321. int cell_cnt=types.size();
  1322. mpi_rank.resize(pt_cnt);
  1323. int new_myrank=myrank;//rand();
  1324. for(int i=0;i<pt_cnt;i++) mpi_rank[i]=new_myrank;
  1325. //Open file for writing.
  1326. std::stringstream vtufname;
  1327. vtufname<<fname<<std::setfill('0')<<std::setw(6)<<myrank<<".vtu";
  1328. std::ofstream vtufile;
  1329. vtufile.open(vtufname.str().c_str());
  1330. if(vtufile.fail()) return;
  1331. //Proceed to write to file.
  1332. size_t data_size=0;
  1333. vtufile<<"<?xml version=\"1.0\"?>\n";
  1334. if(isLittleEndian()) vtufile<<"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n";
  1335. else vtufile<<"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">\n";
  1336. //===========================================================================
  1337. vtufile<<" <UnstructuredGrid>\n";
  1338. vtufile<<" <Piece NumberOfPoints=\""<<pt_cnt<<"\" NumberOfCells=\""<<cell_cnt<<"\">\n";
  1339. //---------------------------------------------------------------------------
  1340. vtufile<<" <Points>\n";
  1341. vtufile<<" <DataArray type=\"Float"<<sizeof(Real_t)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  1342. data_size+=sizeof(uint32_t)+coord.size()*sizeof(Real_t);
  1343. vtufile<<" </Points>\n";
  1344. //---------------------------------------------------------------------------
  1345. vtufile<<" <PointData>\n";
  1346. vtufile<<" <DataArray type=\"Float"<<sizeof(Real_t)*8<<"\" NumberOfComponents=\""<<dof<<"\" Name=\"value\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  1347. data_size+=sizeof(uint32_t)+value .size()*sizeof( Real_t);
  1348. vtufile<<" <DataArray type=\"Int32\" NumberOfComponents=\"1\" Name=\"mpi_rank\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  1349. data_size+=sizeof(uint32_t)+mpi_rank.size()*sizeof(int32_t);
  1350. vtufile<<" </PointData>\n";
  1351. //---------------------------------------------------------------------------
  1352. //---------------------------------------------------------------------------
  1353. vtufile<<" <Cells>\n";
  1354. vtufile<<" <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  1355. data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t);
  1356. vtufile<<" <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  1357. data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t);
  1358. vtufile<<" <DataArray type=\"UInt8\" Name=\"types\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  1359. data_size+=sizeof(uint32_t)+types.size() *sizeof(uint8_t);
  1360. vtufile<<" </Cells>\n";
  1361. //---------------------------------------------------------------------------
  1362. //vtufile<<" <CellData>\n";
  1363. //vtufile<<" <DataArray type=\"Float"<<sizeof(Real_t)*8<<"\" Name=\"Velocity\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  1364. //vtufile<<" </CellData>\n";
  1365. //---------------------------------------------------------------------------
  1366. vtufile<<" </Piece>\n";
  1367. vtufile<<" </UnstructuredGrid>\n";
  1368. //===========================================================================
  1369. vtufile<<" <AppendedData encoding=\"raw\">\n";
  1370. vtufile<<" _";
  1371. int32_t block_size;
  1372. block_size=coord .size()*sizeof( Real_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord .size()*sizeof( Real_t));
  1373. block_size=value .size()*sizeof( Real_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value .size()*sizeof( Real_t));
  1374. block_size=mpi_rank.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&mpi_rank[0], mpi_rank.size()*sizeof(int32_t));
  1375. block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t));
  1376. block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t));
  1377. block_size=types .size()*sizeof(uint8_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&types [0], types .size()*sizeof(uint8_t));
  1378. vtufile<<"\n";
  1379. vtufile<<" </AppendedData>\n";
  1380. //===========================================================================
  1381. vtufile<<"</VTKFile>\n";
  1382. vtufile.close();
  1383. if(myrank) return;
  1384. std::stringstream pvtufname;
  1385. pvtufname<<fname<<".pvtu";
  1386. std::ofstream pvtufile;
  1387. pvtufile.open(pvtufname.str().c_str());
  1388. if(pvtufile.fail()) return;
  1389. pvtufile<<"<?xml version=\"1.0\"?>\n";
  1390. pvtufile<<"<VTKFile type=\"PUnstructuredGrid\">\n";
  1391. pvtufile<<" <PUnstructuredGrid GhostLevel=\"0\">\n";
  1392. pvtufile<<" <PPoints>\n";
  1393. pvtufile<<" <PDataArray type=\"Float"<<sizeof(Real_t)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\"/>\n";
  1394. pvtufile<<" </PPoints>\n";
  1395. pvtufile<<" <PPointData>\n";
  1396. pvtufile<<" <PDataArray type=\"Float"<<sizeof(Real_t)*8<<"\" NumberOfComponents=\""<<dof<<"\" Name=\"value\"/>\n";
  1397. pvtufile<<" <PDataArray type=\"Int32\" NumberOfComponents=\"1\" Name=\"mpi_rank\"/>\n";
  1398. pvtufile<<" </PPointData>\n";
  1399. {
  1400. // Extract filename from path.
  1401. std::stringstream vtupath;
  1402. vtupath<<'/'<<fname<<'\0';
  1403. char *fname_ = (char*)strrchr(vtupath.str().c_str(), '/') + 1;
  1404. //std::string fname_ = boost::filesystem::path(fname).filename().string().
  1405. for(int i=0;i<np;i++) pvtufile<<" <Piece Source=\""<<fname_<<std::setfill('0')<<std::setw(6)<<i<<".vtu\"/>\n";
  1406. }
  1407. pvtufile<<" </PUnstructuredGrid>\n";
  1408. pvtufile<<"</VTKFile>\n";
  1409. pvtufile.close();
  1410. }
  1411. template <class TreeNode>
  1412. const std::vector<MortonId>& MPI_Tree<TreeNode>::GetMins(){
  1413. Node_t* n=this->PreorderFirst();
  1414. while(n!=NULL){
  1415. if(!n->IsGhost() && n->IsLeaf()) break;
  1416. n=this->PreorderNxt(n);
  1417. }
  1418. MortonId my_min;
  1419. if(n!=NULL)
  1420. my_min=n->GetMortonId();
  1421. int np;
  1422. MPI_Comm_size(*Comm(),&np);
  1423. mins.resize(np);
  1424. MPI_Allgather(&my_min , 1, par::Mpi_datatype<MortonId>::value(),
  1425. &mins[0], 1, par::Mpi_datatype<MortonId>::value(), *Comm());
  1426. return mins;
  1427. }
  1428. }//end namespace