/** * \file mpi_tree.txx * \author Dhairya Malhotra, dhairya.malhotra@gmail.com * \date 12-11-2010 * \brief This file contains the implementation of the class MPI_Tree. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace pvfmm{ /** * @author Dhairya Malhotra, dhairya.malhotra@gmail.com * @date 08 Feb 2011 */ inline int p2oLocal(Vector & nodes, Vector& leaves, unsigned int maxNumPts, unsigned int maxDepth, bool complete) { assert(maxDepth<=MAX_DEPTH); std::vector leaves_lst; unsigned int init_size=leaves.Dim(); unsigned int num_pts=nodes.Dim(); MortonId curr_node=leaves[0]; MortonId last_node=leaves[init_size-1].NextId(); MortonId next_node; unsigned int curr_pt=0; unsigned int next_pt=curr_pt+maxNumPts; while(next_pt <= num_pts){ next_node = curr_node.NextId(); while( next_pt < num_pts && next_node > nodes[next_pt] && curr_node.GetDepth() < maxDepth-1 ){ curr_node = curr_node.getDFD(curr_node.GetDepth()+1); next_node = curr_node.NextId(); } leaves_lst.push_back(curr_node); curr_node = next_node; unsigned int inc=maxNumPts; while(next_pt < num_pts && curr_node > nodes[next_pt]){ // We have more than maxNumPts points per octant because the node can // not be refined any further. inc=inc<<1; next_pt+=inc; if(next_pt > num_pts){ next_pt = num_pts; break; } } curr_pt = std::lower_bound(&nodes[0]+curr_pt,&nodes[0]+next_pt,curr_node,std::less())-&nodes[0]; if(curr_pt >= num_pts) break; next_pt = curr_pt + maxNumPts; if(next_pt > num_pts) next_pt = num_pts; } #ifndef NDEBUG for(size_t i=0;i())-&nodes[0]; size_t b=std::lower_bound(&nodes[0],&nodes[0]+nodes.Dim(),leaves_lst[i].NextId(),std::less())-&nodes[0]; assert(b-a<=maxNumPts || leaves_lst[i].GetDepth()==maxDepth-1); if(i==leaves_lst.size()-1) assert(b==nodes.Dim() && a last_node && curr_node.GetDepth() < maxDepth-1 ) curr_node = curr_node.getDFD(curr_node.GetDepth()+1); leaves_lst.push_back(curr_node); curr_node = curr_node.NextId(); } leaves=leaves_lst; return 0; } inline int points2Octree(const Vector& pt_mid, Vector& nodes, unsigned int maxDepth, unsigned int maxNumPts, const MPI_Comm& comm ) { int myrank, np; MPI_Comm_rank(comm, &myrank); MPI_Comm_size(comm, &np); // Sort morton id of points. Profile::Tic("SortMortonId", &comm, true, 5); Vector pt_sorted; //par::partitionW(pt_mid, NULL, comm); par::HyperQuickSort(pt_mid, pt_sorted, comm); size_t pt_cnt=pt_sorted.Dim(); Profile::Toc(); // Add last few points from next process, to get the boundary octant right. Profile::Tic("Comm", &comm, true, 5); { { // Adjust maxNumPts size_t glb_pt_cnt=0; MPI_Allreduce(&pt_cnt, &glb_pt_cnt, 1, par::Mpi_datatype::value(), par::Mpi_datatype::sum(), comm); if(glb_pt_cnt::value(), myrank+1, 1, comm, &recvRequest); if(myrank > 0 ) MPI_Issend(&send_size, 1, par::Mpi_datatype::value(), myrank-1, 1, comm, &sendRequest); if(myrank < (np-1)) MPI_Wait(&recvRequest, &statusWait); if(myrank > 0 ) MPI_Wait(&sendRequest, &statusWait); //This can be done later. } if(recv_size>0){// Resize pt_sorted. Vector pt_sorted_(pt_cnt+recv_size); mem::memcopy(&pt_sorted_[0], &pt_sorted[0], pt_cnt*sizeof(MortonId)); pt_sorted.Swap(pt_sorted_); } {// Exchange data. MPI_Request recvRequest; MPI_Request sendRequest; MPI_Status statusWait; if(myrank < (np-1)) MPI_Irecv (&pt_sorted[0]+pt_cnt, recv_size, par::Mpi_datatype::value(), myrank+1, 1, comm, &recvRequest); if(myrank > 0 ) MPI_Issend(&pt_sorted[0] , send_size, par::Mpi_datatype::value(), myrank-1, 1, comm, &sendRequest); if(myrank < (np-1)) MPI_Wait(&recvRequest, &statusWait); if(myrank > 0 ) MPI_Wait(&sendRequest, &statusWait); //This can be done later. } } Profile::Toc(); // Construct local octree. Profile::Tic("p2o_local", &comm, false, 5); Vector nodes_local(1); nodes_local[0]=MortonId(); p2oLocal(pt_sorted, nodes_local, maxNumPts, maxDepth, myrank==np-1); Profile::Toc(); // Remove duplicate nodes on adjacent processors. Profile::Tic("RemoveDuplicates", &comm, true, 5); { size_t node_cnt=nodes_local.Dim(); MortonId first_node; MortonId last_node=nodes_local[node_cnt-1]; { // Send last_node to next process and get first_node from previous process. MPI_Request recvRequest; MPI_Request sendRequest; MPI_Status statusWait; if(myrank < (np-1)) MPI_Issend(& last_node, 1, par::Mpi_datatype::value(), myrank+1, 1, comm, &recvRequest); if(myrank > 0 ) MPI_Irecv (&first_node, 1, par::Mpi_datatype::value(), myrank-1, 1, comm, &sendRequest); if(myrank < (np-1)) MPI_Wait(&recvRequest, &statusWait); if(myrank > 0 ) MPI_Wait(&sendRequest, &statusWait); //This can be done later. } size_t i=0; std::vector node_lst; if(myrank){ while(i0?i-1:0].NextId(); // Next MortonId in the tree after first_node. while(first_node(nodes, NULL , comm); Profile::Toc(); return 0; } template void MPI_Tree::Initialize(typename Node_t::NodeData* init_data){ //Initialize root node. Profile::Tic("InitRoot",Comm(),false,3); Tree::Initialize(init_data); TreeNode* rnode=this->RootNode(); assert(this->dim==COORD_DIM); Profile::Toc(); Profile::Tic("Points2Octee",Comm(),true,3); Vector lin_oct; { //Get the linear tree. // Compute MortonId from pt_coord. Vector pt_mid; Vector& pt_coord=rnode->pt_coord; size_t pt_cnt=pt_coord.Dim()/this->dim; pt_mid.Resize(pt_cnt); #pragma omp parallel for for(size_t i=0;imax_depth); } //Get the linear tree. points2Octree(pt_mid,lin_oct,this->max_depth,init_data->max_pts,*Comm()); } Profile::Toc(); Profile::Tic("ScatterPoints",Comm(),true,3); { // Sort and partition point coordinates and values. std::vector*> coord_lst; std::vector*> value_lst; std::vector*> scatter_lst; rnode->NodeDataVec(coord_lst, value_lst, scatter_lst); assert(coord_lst.size()==value_lst.size()); assert(coord_lst.size()==scatter_lst.size()); Vector pt_mid; Vector scatter_index; for(size_t i=0;i& pt_coord=*coord_lst[i]; { // Compute MortonId from pt_coord. size_t pt_cnt=pt_coord.Dim()/this->dim; pt_mid.Resize(pt_cnt); #pragma omp parallel for for(size_t i=0;imax_depth); } } par::SortScatterIndex(pt_mid , scatter_index, comm, &lin_oct[0]); par::ScatterForward (pt_coord, scatter_index, comm); if(value_lst[i]!=NULL){ Vector& pt_value=*value_lst[i]; par::ScatterForward(pt_value, scatter_index, comm); } if(scatter_lst[i]!=NULL){ Vector& pt_scatter=*scatter_lst[i]; pt_scatter=scatter_index; } } } Profile::Toc(); //Initialize the pointer based tree from the linear tree. Profile::Tic("PointerTree",Comm(),false,3); { // Construct the pointer tree from lin_oct int omp_p=omp_get_max_threads(); // Partition nodes between threads rnode->SetGhost(false); for(int i=0;iGetMortonId()==lin_oct[idx]); UNUSED(n); } #pragma omp parallel for for(int i=0;iSetGhost(false); MortonId dn=n->GetMortonId(); if(idxIsLeaf()) n->Subdivide(); }else if(idxIsLeaf()) n->Truncate(); assert(n->IsLeaf()); idx++; }else{ n->Truncate(); n->SetGhost(true); } n=this->PreorderNxt(n); } assert(idx==b); } } Profile::Toc(); #ifndef NDEBUG CheckTree(); #endif } template void MPI_Tree::CoarsenTree(){ int myrank; MPI_Comm_rank(*Comm(),&myrank); //Redistribute. { Node_t* n=this->PostorderFirst(); while(n){ if(n->IsLeaf() && !n->IsGhost()) break; n=this->PostorderNxt(n); } while(myrank){ Node_t* n_parent=(Node_t*)n->Parent(); Node_t* n_ = n_parent; while(n_ && !n_->IsLeaf()){ n_=this->PostorderNxt(n_); if(!n_) break; } if(!n_ || n_->IsGhost()) break; if(n->Depth()<=n_->Depth()) break; if(n_->Depth()<=1) break; n=n_; } MortonId loc_min=n->GetMortonId(); RedistNodes(&loc_min); } //Truncate ghost nodes and build node list std::vector leaf_nodes; { Node_t* n=this->PostorderFirst(); while(n!=NULL){ if(n->IsLeaf() && !n->IsGhost()) break; n->Truncate(); n->SetGhost(true); n->ClearData(); n=this->PostorderNxt(n); } while(n!=NULL){ if(n->IsLeaf() && n->IsGhost()) break; if(n->IsLeaf()) leaf_nodes.push_back(n); n=this->PreorderNxt(n); } while(n!=NULL){ n->Truncate(); n->SetGhost(true); n->ClearData(); n=this->PreorderNxt(n); } } size_t node_cnt=leaf_nodes.size(); //Partition nodes between OpenMP threads. int omp_p=omp_get_max_threads(); std::vector mid(omp_p); std::vector split_key(omp_p); #pragma omp parallel for for(int i=0;iGetMortonId(); } //Coarsen Tree. #pragma omp parallel for for(int i=0;iGetMortonId(); if(!n_->IsLeaf() && !n_mid.isAncestor(mid[i].getDFD())) if(iSubdivCond()) n_->Truncate(); if(iPostorderNxt(n_); } } //Truncate nodes along ancestors of splitters. for(int i=0;iRootNode()); while(n_->Depth()>0){ n_=(Node_t*)n_->Parent(); if(!n_->SubdivCond()) n_->Truncate(); else break; } } } template void MPI_Tree::RefineTree(){ int np, myrank; MPI_Comm_size(*Comm(),&np); MPI_Comm_rank(*Comm(),&myrank); int omp_p=omp_get_max_threads(); int n_child=1UL<Dim(); //Coarsen tree. MPI_Tree::CoarsenTree(); //Build node list. std::vector leaf_nodes; { Node_t* n=this->PostorderFirst(); while(n!=NULL){ if(n->IsLeaf() && !n->IsGhost()) leaf_nodes.push_back(n); n=this->PostorderNxt(n); } } size_t tree_node_cnt=leaf_nodes.size(); //Adaptive subdivision of leaf nodes with load balancing. for(int l=0;lmax_depth;l++){ //Subdivide nodes. std::vector > leaf_nodes_(omp_p); #pragma omp parallel for for(int i=0;iIsLeaf() && !leaf_nodes[j]->IsGhost()){ if(leaf_nodes[j]->SubdivCond()) leaf_nodes[j]->Subdivide(); if(!leaf_nodes[j]->IsLeaf()) for(int k=0;kChild(k)); } } } for(int i=0;i::value(), par::Mpi_datatype::max(), *Comm()); MPI_Allreduce(&tree_node_cnt, &global_sum, 1, par::Mpi_datatype::value(), par::Mpi_datatype::sum(), *Comm()); //RedistNodes if needed. if(global_max*np>4*global_sum){ #ifndef NDEBUG Profile::Tic("RedistNodes",Comm(),true,4); #endif RedistNodes(); #ifndef NDEBUG Profile::Toc(); #endif //Rebuild node list. leaf_nodes.clear(); Node_t* n=this->PostorderFirst(); while(n!=NULL){ if(n->IsLeaf() && !n->IsGhost()) leaf_nodes.push_back(n); n=this->PostorderNxt(n); } tree_node_cnt=leaf_nodes.size(); }else{ //Combine partial list of nodes. int node_cnt=0; for(int j=0;j::CoarsenTree(); RedistNodes(); MPI_Tree::CoarsenTree(); RedistNodes(); } template void MPI_Tree::RedistNodes(MortonId* loc_min) { int np, myrank; MPI_Comm_size(*Comm(),&np); MPI_Comm_rank(*Comm(),&myrank); if(np==1)return; //Create a linear tree in dendro format. Node_t* curr_node=this->PreorderFirst(); std::vector in; std::vector node_lst; while(curr_node!=NULL){ if(curr_node->IsLeaf() && !curr_node->IsGhost()){ node_lst.push_back(curr_node); in.push_back(curr_node->GetMortonId()); } curr_node=this->PreorderNxt(curr_node); } size_t leaf_cnt=in.size(); //Get new mins. std::vector new_mins(np); if(loc_min==NULL){ //Partition vector of MortonIds using par::partitionW std::vector in_=in; std::vector wts(in_.size()); #pragma omp parallel for for(size_t i=0;iNodeCost(); } par::partitionW(in_,&wts[0],*Comm()); MPI_Allgather(&in_[0] , 1, par::Mpi_datatype::value(), &new_mins[0], 1, par::Mpi_datatype::value(), *Comm()); }else{ MPI_Allgather(loc_min , 1, par::Mpi_datatype::value(), &new_mins[0], 1, par::Mpi_datatype::value(), *Comm()); } //Now exchange nodes according to new mins std::vector data(leaf_cnt); std::vector send_cnts; send_cnts.assign(np,0); std::vector send_size; send_size.assign(np,0); size_t sbuff_size=0; int omp_p=omp_get_max_threads(); #pragma omp parallel for reduction(+:sbuff_size) for(int i=0;ia){ size_t p_iter=a; size_t node_iter=std::lower_bound(&in[0], &in[in.size()], new_mins[a])-&in[0]; for( ;node_iter=new_mins[p_iter+1]: false) p_iter++; if(p_iter>=b) break; send_cnts[p_iter]++; data[node_iter]=node_lst[node_iter]->Pack(); send_size[p_iter]+=data[node_iter].length+sizeof(size_t)+sizeof(MortonId); sbuff_size +=data[node_iter].length+sizeof(size_t)+sizeof(MortonId); } } } std::vector recv_cnts(np); std::vector recv_size(np); MPI_Alltoall(&send_cnts[0], 1, par::Mpi_datatype::value(), &recv_cnts[0], 1, par::Mpi_datatype::value(), *Comm()); MPI_Alltoall(&send_size[0], 1, par::Mpi_datatype::value(), &recv_size[0], 1, par::Mpi_datatype::value(), *Comm()); size_t recv_cnt=0; #pragma omp parallel for reduction(+:recv_cnt) for(int i=0;i out(recv_cnt); std::vector sdisp; sdisp.assign(np,0); std::vector rdisp; rdisp.assign(np,0); omp_par::scan(&send_size[0],&sdisp[0],np); //TODO Don't need to do a full scan omp_par::scan(&recv_size[0],&rdisp[0],np); // as most entries will be 0. size_t rbuff_size=rdisp[np-1]+recv_size[np-1]; char* send_buff=new char[sbuff_size]; char* recv_buff=new char[rbuff_size]; std::vector data_ptr(leaf_cnt); char* s_ptr=send_buff; for(size_t i=0;i(&send_buff[0], &send_size[0], &sdisp[0], &recv_buff[0], &recv_size[0], &rdisp[0], *Comm()); char* r_ptr=recv_buff; std::vector r_data(recv_cnt); for(size_t i=0;iDim(); size_t node_iter=0; MortonId dn; node_lst.resize(recv_cnt); Node_t* n=this->PreorderFirst(); while(n!=NULL && node_iterSetGhost(false); dn=n->GetMortonId(); if(dn.isAncestor(out[node_iter]) && dn!=out[node_iter]){ if(n->IsLeaf()){ { n->SetGhost(true); n->Subdivide(); n->SetGhost(false); for(int j=0;jChild(j); ch_node->SetGhost(false); } } } }else if(dn==out[node_iter]){ if(!n->IsLeaf()){ n->Truncate(); n->SetGhost(false); } node_lst[node_iter]=n; node_iter++; }else{ n->Truncate(); //This node does not belong to this process. n->SetGhost(true); } n=this->PreorderNxt(n); } while(n!=NULL){ n->Truncate(); n->SetGhost(true); n=this->PreorderNxt(n); } #pragma omp parallel for for(int p=0;pUnpack(r_data[i]); } //Free memory buffers. delete[] recv_buff; delete[] send_buff; } template TreeNode* MPI_Tree::FindNode(MortonId& key, bool subdiv, TreeNode* start){ int num_child=1UL<Dim(); Node_t* n=start; if(n==NULL) n=this->RootNode(); while(n->GetMortonId()IsLeaf()||subdiv)){ if(n->IsLeaf() && !n->IsGhost()) n->Subdivide(); if(n->IsLeaf()) break; for(int j=0;jChild(j))->GetMortonId().NextId()>key){ n=(Node_t*)n->Child(j); break; } } } assert(!subdiv || n->IsGhost() || n->GetMortonId()==key); return n; } //list must be sorted. inline int lineariseList(std::vector & list, MPI_Comm comm) { int rank,size; MPI_Comm_rank(comm,&rank); MPI_Comm_size(comm,&size); //Remove empty processors... int new_rank, new_size; MPI_Comm new_comm; MPI_Comm_split(comm, (list.empty()?0:1), rank, &new_comm); MPI_Comm_rank (new_comm, &new_rank); MPI_Comm_size (new_comm, &new_size); if(!list.empty()) { //Send the last octant to the next processor. MortonId lastOctant = list[list.size()-1]; MortonId lastOnPrev; MPI_Request recvRequest; MPI_Request sendRequest; if(new_rank > 0) { MPI_Irecv(&lastOnPrev, 1, par::Mpi_datatype::value(), new_rank-1, 1, new_comm, &recvRequest); } if(new_rank < (new_size-1)) { MPI_Issend( &lastOctant, 1, par::Mpi_datatype::value(), new_rank+1, 1, new_comm, &sendRequest); } if(new_rank > 0) { std::vector tmp(list.size()+1); for(size_t i = 0; i < list.size(); i++) { tmp[i+1] = list[i]; } MPI_Status statusWait; MPI_Wait(&recvRequest, &statusWait); tmp[0] = lastOnPrev; list.swap(tmp); } {// Remove duplicates and ancestors. std::vector tmp; if(!(list.empty())) { for(unsigned int i = 0; i < (list.size()-1); i++) { if( (!(list[i].isAncestor(list[i+1]))) && (list[i] != list[i+1]) ) { tmp.push_back(list[i]); } } if(new_rank == (new_size-1)) { tmp.push_back(list[list.size()-1]); } } list.swap(tmp); } if(new_rank < (new_size-1)) { MPI_Status statusWait; MPI_Wait(&sendRequest, &statusWait); } }//not empty procs only return 1; }//end fn. inline int balanceOctree (std::vector &in, std::vector &out, unsigned int dim, unsigned int maxDepth, bool periodic, MPI_Comm comm) { int omp_p=omp_get_max_threads(); int rank, size; MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); if(size==1 && in.size()==1){ out=in; return 0; } #ifdef __VERBOSE__ long long locInSize = in.size(); #endif ////////////////////////////////////////////////////////////////////////////////////////////////// { //Redistribute. //Vector balance_wt(size); //#pragma omp parallel for //for(size_t i=0;i(in, &balance_wt[0], comm); par::partitionW(in, NULL, comm); } //Build level-by-level set of nodes. std::vector > nodes((maxDepth+1)*omp_p); #pragma omp parallel for for(int p=0;p nbrs; unsigned int num_chld=1UL<=1;l--){ //Build set of parents of balancing nodes. std::set nbrs_parent; std::set::iterator start=nodes[l+(maxDepth+1)*p].begin(); std::set::iterator end =nodes[l+(maxDepth+1)*p].end(); for(std::set::iterator node=start; node != end;){ node->NbrList(nbrs, l, periodic); int nbr_cnt=nbrs.size(); for(int i=0;i& ancestor_nodes=nodes[l-1+(maxDepth+1)*p]; start=nbrs_parent.begin(); end =nbrs_parent.end(); ancestor_nodes.insert(start,end); } //Remove non-leaf nodes. (optional) for(unsigned int l=1;l<=maxDepth;l++){ std::set::iterator start=nodes[l +(maxDepth+1)*p].begin(); std::set::iterator end =nodes[l +(maxDepth+1)*p].end(); std::set& ancestor_nodes=nodes[l-1+(maxDepth+1)*p]; for(std::set::iterator node=start; node != end; node++){ MortonId parent=node->getAncestor(node->GetDepth()-1); ancestor_nodes.erase(parent); } } } //Resize in. std::vector node_cnt(omp_p,0); std::vector node_dsp(omp_p,0); #pragma omp parallel for for(int i=0;i::iterator start=nodes[l +(maxDepth+1)*p].begin(); std::set::iterator end =nodes[l +(maxDepth+1)*p].end(); for(std::set::iterator node=start; node != end; node++) in[node_iter++]=*node; } } #ifdef __VERBOSE__ //Local size before removing duplicates and ancestors (linearise). long long locTmpSize = in.size(); #endif //Sort, Linearise, Redistribute. //TODO The following might work better as it reduces the comm bandwidth: //Split comm into sqrt(np) processes and sort, linearise for each comm group. //Then do the global sort, linearise with the original comm. par::HyperQuickSort(in, out, comm); lineariseList(out, comm); par::partitionW(out, NULL , comm); { // Add children //Remove empty processors... int new_rank, new_size; MPI_Comm new_comm; MPI_Comm_split(comm, (out.empty()?0:1), rank, &new_comm); MPI_Comm_rank (new_comm, &new_rank); MPI_Comm_size (new_comm, &new_size); if(!out.empty()) { MortonId nxt_mid(0,0,0,0); { // Get last octant from previous process. assert(out.size()); //Send the last octant to the next processor. MortonId lastOctant = out.back(); MortonId lastOnPrev; MPI_Request recvRequest; MPI_Request sendRequest; if(new_rank > 0) { MPI_Irecv(&lastOnPrev, 1, par::Mpi_datatype::value(), new_rank-1, 1, new_comm, &recvRequest); } if(new_rank < (new_size-1)) { MPI_Issend( &lastOctant, 1, par::Mpi_datatype::value(), new_rank+1, 1, new_comm, &sendRequest); } if(new_rank > 0) { MPI_Status statusWait; MPI_Wait(&recvRequest, &statusWait); nxt_mid = lastOnPrev.NextId(); } if(new_rank < (new_size-1)) { MPI_Status statusWait; MPI_Wait(&sendRequest, &statusWait); } } std::vector out1; std::vector children; for(size_t i=0;i0){ out1.push_back(nxt_mid); nxt_mid=nxt_mid.NextId(); } } out.swap(out1); } if(new_size(out, NULL , comm); } } ////////////////////////////////////////////////////////////////////////////////////////////////// #ifdef __VERBOSE__ long long locOutSize = out.size(); long long globInSize, globTmpSize, globOutSize; MPI_Allreduce(&locInSize , &globInSize , 1, par::Mpi_datatype::value(), par::Mpi_datatype::sum(), comm); MPI_Allreduce(&locTmpSize, &globTmpSize, 1, par::Mpi_datatype::value(), par::Mpi_datatype::sum(), comm); MPI_Allreduce(&locOutSize, &globOutSize, 1, par::Mpi_datatype::value(), par::Mpi_datatype::sum(), comm); if(!rank) std::cout<<"Balance Octree. inpSize: "< void MPI_Tree::Balance21(BoundaryType bndry) { int num_proc,myrank; MPI_Comm_rank(*Comm(),&myrank); MPI_Comm_size(*Comm(),&num_proc); //Using Dendro for balancing //Create a linear tree in dendro format. Node_t* curr_node=this->PreorderFirst(); std::vector in; while(curr_node!=NULL){ if(curr_node->IsLeaf() && !curr_node->IsGhost()){ in.push_back(curr_node->GetMortonId()); } curr_node=this->PreorderNxt(curr_node); } //2:1 balance Profile::Tic("ot::balanceOctree",Comm(),true,3); std::vector out; balanceOctree(in, out, this->Dim(), this->max_depth, (bndry==Periodic), *Comm()); Profile::Toc(); //Get new_mins. std::vector new_mins(num_proc); MPI_Allgather(&out[0] , 1, par::Mpi_datatype::value(), &new_mins[0], 1, par::Mpi_datatype::value(), *Comm()); // Refine to new_mins in my range of octants // or else RedistNodes(...) will not work correctly. { int i=0; std::vector mins=GetMins(); while(new_mins[i]IsGhost()) break; else assert(n->GetMortonId()==new_mins[i]); } } //Redist nodes using new_mins. Profile::Tic("RedistNodes",Comm(),true,3); RedistNodes(&out[0]); #ifndef NDEBUG std::vector mins=GetMins(); assert(mins[myrank].getDFD()==out[0].getDFD()); #endif Profile::Toc(); //Now subdivide the current tree as necessary to make it balanced. Profile::Tic("LocalSubdivide",Comm(),false,3); int omp_p=omp_get_max_threads(); for(int i=0;iGetMortonId()==out[a]); UNUSED(n); } #pragma omp parallel for for(int i=0;iSetGhost(false); dn=n->GetMortonId(); if(dn.isAncestor(out[node_iter]) && dn!=out[node_iter]){ if(n->IsLeaf()) n->Subdivide(); }else if(dn==out[node_iter]){ assert(n->IsLeaf()); //if(!n->IsLeaf()){ //This should never happen // n->Truncate(); // n->SetGhost(false); //} assert(n->IsLeaf()); node_iter++; }else{ n->Truncate(); //This node does not belong to this process. n->SetGhost(true); } n=this->PreorderNxt(n); } if(i==omp_p-1){ while(n!=NULL){ n->Truncate(); n->SetGhost(true); n=this->PreorderNxt(n); } } } Profile::Toc(); } template void MPI_Tree::Balance21_local(BoundaryType bndry){ //SetColleagues(bndry); std::vector > node_lst(this->max_depth+1); Node_t* curr_node=this->PreorderFirst(); while(curr_node!=NULL){ node_lst[curr_node->Depth()].push_back(curr_node); curr_node=this->PreorderNxt(curr_node); } int n1=pow(3.0,this->Dim()); int n2=pow(2.0,this->Dim()); for(int i=this->max_depth;i>0;i--){ Real_t s=pow(0.5,i); for(size_t j=0;jCoord(); if(!curr_node->IsLeaf()) for(int k=0;kColleague(k)==NULL){ Real_t c0[3]={coord[0]+((k/1)%3-1)*s+s*0.5, coord[1]+((k/3)%3-1)*s+s*0.5, coord[2]+((k/9)%3-1)*s+s*0.5}; if(bndry==Periodic){ c0[0]=c0[0]-floor(c0[0]); c0[1]=c0[1]-floor(c0[1]); c0[2]=c0[2]-floor(c0[2]); } if(c0[0]>0 && c0[0]<1) if(c0[1]>0 && c0[1]<1) if(c0[2]>0 && c0[2]<1){ Node_t* node=this->RootNode(); while(node->Depth()IsLeaf()){ node->Subdivide(); for(int l=0;lDepth()+1].push_back((Node_t*)node->Child(l)); /* SetColleagues(bndry,(Node_t*)node->Child(l)); for(int i_=0;i_Child(l))->Colleague(i_); if(coll!=NULL) SetColleagues(bndry,coll); }// */ } } Real_t s1=pow(0.5,node->Depth()+1); Real_t* c1=node->Coord(); int c_id=((c0[0]-c1[0])>s1?1:0)+ ((c0[1]-c1[1])>s1?2:0)+ ((c0[2]-c1[2])>s1?4:0); node=(Node_t*)node->Child(c_id); /*if(node->Depth()==i){ c1=node->Coord(); std::cout<<(c0[0]-c1[0])-s1/2<<' ' std::cout<<(c0[1]-c1[1])-s1/2<<' ' std::cout<<(c0[2]-c1[2])-s1/2<<'\n'; }// */ } } } } } } } template void MPI_Tree::SetColleagues(BoundaryType bndry, Node_t* node){ int n1=(int)pow(3.0,this->Dim()); int n2=(int)pow(2.0,this->Dim()); if(node==NULL){ Node_t* curr_node=this->PreorderFirst(); if(curr_node!=NULL){ if(bndry==Periodic){ for(int i=0;iSetColleague(curr_node,i); }else{ curr_node->SetColleague(curr_node,(n1-1)/2); } curr_node=this->PreorderNxt(curr_node); } Vector > nodes(MAX_DEPTH); while(curr_node!=NULL){ nodes[curr_node->Depth()].push_back(curr_node); curr_node=this->PreorderNxt(curr_node); } for(size_t i=0;iRootNode(); int d=node->Depth(); Real_t* c0=node->Coord(); Real_t s=pow(0.5,d); Real_t c[COORD_DIM]; int idx=0; for(int i=-1;i<=1;i++) for(int j=-1;j<=1;j++) for(int k=-1;k<=1;k++){ c[0]=c0[0]+s*0.5+s*k; c[1]=c0[1]+s*0.5+s*j; c[2]=c0[2]+s*0.5+s*i; if(bndry==Periodic){ if(c[0]<0.0) c[0]+=1.0; if(c[0]>1.0) c[0]-=1.0; if(c[1]<1.0) c[1]+=1.0; if(c[1]>1.0) c[1]-=1.0; if(c[2]<1.0) c[2]+=1.0; if(c[2]>1.0) c[2]-=1.0; } node->SetColleague(NULL,idx); if(c[0]<1.0 && c[0]>0.0) if(c[1]<1.0 && c[1]>0.0) if(c[2]<1.0 && c[2]>0.0){ MortonId m(c,d); Node_t* nbr=FindNode(m,false,root_node); while(nbr->Depth()>d) nbr=(Node_t*)nbr->Parent(); if(nbr->Depth()==d) node->SetColleague(nbr,idx); } idx++; } /*/ Node_t* parent_node; Node_t* tmp_node1; Node_t* tmp_node2; for(int i=0;iSetColleague(NULL,i); parent_node=(Node_t*)node->Parent(); if(parent_node==NULL) return; int l=node->Path2Node(); for(int i=0;iColleague(i); if(tmp_node1!=NULL) if(!tmp_node1->IsLeaf()){ for(int j=0;jChild(j); if(tmp_node2!=NULL){ bool flag=true; int a=1,b=1,new_indx=0; for(int k=0;kDim();k++){ int indx_diff=(((i/b)%3)-1)*2+((j/a)%2)-((l/a)%2); if(-1>indx_diff || indx_diff>1) flag=false; new_indx+=(indx_diff+1)*b; a*=2;b*=3; } if(flag){ node->SetColleague(tmp_node2,new_indx); } } } } }// */ } } template bool MPI_Tree::CheckTree(){ int myrank,np; MPI_Comm_rank(*Comm(),&myrank); MPI_Comm_size(*Comm(),&np); std::vector mins=GetMins(); std::stringstream st; st<<"PID_"<PostorderFirst(); while(n!=NULL){ if(myrankGetMortonId().getDFD()>=mins[myrank+1])break; if(n->GetMortonId()>=mins[myrank] && n->IsLeaf() && n->IsGhost()){ std::cout<GetMortonId()<<'\n'; std::cout<GetMortonId()IsLeaf() && !n->IsGhost()){ assert(false); } if(!n->IsGhost() && n->Depth()>0) assert(!((Node_t*)n->Parent())->IsGhost()); n=this->PostorderNxt(n); } while(n!=NULL){ if(n->IsLeaf() && !n->IsGhost()){ st<<"non-ghost leaf node "<GetMortonId()<<"; after last node."; str=st.str(); ASSERT_WITH_MSG(false,str.c_str()); } n=this->PostorderNxt(n); } return true; }; /** * \brief Determines if node is used in the region between Morton Ids m1 and m2 * ( m1 <= m2 ). */ template void IsShared(std::vector& nodes, MortonId* m1, MortonId* m2, BoundaryType bndry, std::vector& shared_flag){ MortonId mm1, mm2; if(m1!=NULL) mm1=m1->getDFD(); if(m2!=NULL) mm2=m2->getDFD(); shared_flag.resize(nodes.size()); int omp_p=omp_get_max_threads(); #pragma omp parallel for for(int j=0;j nbr_lst; for(size_t i=a;iDepth()<2){ shared_flag[i]=true; continue; } node->GetMortonId().NbrList(nbr_lst, node->Depth()-1, bndry==Periodic); for(size_t k=0;kmm1) if(m2==NULL || n1& nodes, MortonId* m1, MortonId* m2, BoundaryType bndry, std::vector& shared_flag){ MortonId mm1, mm2; if(m1!=NULL) mm1=m1->getDFD(); if(m2!=NULL) mm2=m2->getDFD(); shared_flag.resize(nodes.size()); int omp_p=omp_get_max_threads(); #pragma omp parallel for for(int j=0;j nbr_lst; for(size_t i=a;iGetDepth()<2){ shared_flag[i]=true; continue; } node->NbrList(nbr_lst, node->GetDepth()-1, bndry==Periodic); for(size_t k=0;kmm1) if(m2==NULL || n1 void MPI_Tree::ConstructLET(BoundaryType bndry){ //Profile::Tic("LET_Hypercube", &comm, true, 5); //ConstructLET_Hypercube(bndry); //Profile::Toc(); //Profile::Tic("LET_Sparse", &comm, true, 5); ConstructLET_Sparse(bndry); //Profile::Toc(); #ifndef NDEBUG CheckTree(); #endif } /** * \brief Hypercube based scheme to exchange Ghost octants. */ //#define PREFETCH_T0(addr,nrOfBytesAhead) _mm_prefetch(((char *)(addr))+nrOfBytesAhead,_MM_HINT_T0) template void MPI_Tree::ConstructLET_Hypercube(BoundaryType bndry){ int num_p,rank; MPI_Comm_size(*Comm(),&num_p); MPI_Comm_rank(*Comm(),&rank ); if(num_p==1) return; int omp_p=omp_get_max_threads(); std::vector mins=GetMins(); // Build list of shared nodes. std::vector shared_nodes; shared_nodes.clear(); std::vector node_lst; node_lst.clear(); Node_t* curr_node=this->PreorderFirst(); while(curr_node!=NULL){ if(curr_node->GetMortonId().getDFD()>=mins[rank]) break; curr_node=this->PreorderNxt(curr_node); } while(curr_node!=NULL){ if(curr_node->IsGhost()) break; node_lst.push_back(curr_node); curr_node=this->PreorderNxt(curr_node); } std::vector node_flag0; node_flag0.clear(); std::vector node_flag1; node_flag1.clear(); IsShared(node_lst,&mins[0],&mins[rank],bndry,node_flag0); if(rank shared_data; // CommData for shared nodes. std::vector > pid_node_pair; // list { // Set node_comm_data MortonId mins_r0=mins[ rank+0 ].getDFD(); MortonId mins_r1=mins[std::min(rank+1,num_p-1)].getDFD(); std::vector nodes=this->GetNodeList(); node_comm_data=(CommData*)this->memgr.malloc(sizeof(CommData)*nodes.size()); #pragma omp parallel for for(size_t tid=0;tid nbr_lst; size_t a=(nodes.size()* tid )/omp_p; size_t b=(nodes.size()*(tid+1))/omp_p; for(size_t i=a;iGetMortonId(); comm_data.usr_cnt=0; if(comm_data.node->IsGhost()) continue; if(comm_data.node->Depth()==0) continue; if(comm_data.mid.getDFD()Depth()-1, bndry==Periodic); comm_data.usr_cnt=nbr_lst.size(); for(size_t j=0;j=mins_r1)){ // Find the user pid. // size_t usr_pid=std::upper_bound(&mins[0],&mins[num_p],usr_mid_dfd)-&mins[0]-1; // comm_data.usr_pid[j]=usr_pid; // }else comm_data.usr_pid[j]=rank; if(!shared){ // Check if this node needs to be transferred during broadcast. if(comm_data.usr_pid[j]!=rank || (rank+1mins_r1) ){ shared=true; } } } #pragma omp critical (ADD_SHARED) if(shared){ for(size_t j=0;j p; p.key=comm_data.usr_pid[j]; p.data=shared_data.size(); pid_node_pair.push_back(p); } } shared_data.push_back(&comm_data); } } } omp_par::merge_sort(&pid_node_pair[0], &pid_node_pair[pid_node_pair.size()]); //std::cout<memgr.malloc(buff_length); size_t a=( tid *shared_data.size())/omp_p; size_t b=((tid+1)*shared_data.size())/omp_p; for(size_t i=a;iPack(true,buff); assert(p0.lengthmemgr.malloc(sizeof(CommData)+p0.length); CommData& new_comm_data=*(CommData*)shared_data[i]; new_comm_data=comm_data; new_comm_data.pkd_length=sizeof(CommData)+p0.length; mem::memcopy(((char*)shared_data[i])+sizeof(CommData),buff,p0.length); } this->memgr.free(buff); } // now CommData is stored in shared_data this->memgr.free(node_comm_data); node_comm_data=NULL; } //Profile::Toc(); //Profile::Tic("SendBuff", &comm, false, 5); std::vector send_size(num_p,0); std::vector send_disp(num_p,0); if(pid_node_pair.size()){ // Build send_buff. std::vector size(pid_node_pair.size(),0); std::vector disp(pid_node_pair.size(),0); #pragma omp parallel for for(size_t i=0;ipkd_length; } omp_par::scan(&size[0],&disp[0],pid_node_pair.size()); // Resize send_buff. if(send_buff.size()0 && a0 && b recv_size(num_p,0); std::vector recv_disp(num_p,0); MPI_Alltoall(&send_size[0], 1, par::Mpi_datatype::value(), &recv_size[0], 1, par::Mpi_datatype::value(), *Comm()); omp_par::scan(&recv_size[0],&recv_disp[0],num_p); recv_length=recv_size[num_p-1]+recv_disp[num_p-1]; if(recv_buff.size() recv_data; // CommData for received nodes. { // Unpack received octants. std::vector > mid_indx_pair; for(size_t i=0; i p; p.key=comm_data.mid; p.data=mid_indx_pair.size(); mid_indx_pair.push_back(p); } i+=comm_data.pkd_length; assert(comm_data.pkd_length>0); } std::vector recv_nodes(recv_data.size()); { // Find received octants in tree. omp_par::merge_sort(&mid_indx_pair[0], &mid_indx_pair[0]+mid_indx_pair.size()); std::vector indx(omp_p+1); for(size_t i=0;i<=omp_p;i++){ size_t j=(mid_indx_pair.size()*i)/omp_p; if(j>0) while(jDim()); // Number of children. if(mid_indx_pair.size()>0) for(size_t tid=1;tidRootNode(); while(srch_node->GetMortonId()!=mid){ Node_t* ch_node; if(srch_node->IsLeaf()){ srch_node->SetGhost(true); srch_node->Subdivide(); } for(int j=nchld-1;j>=0;j--){ ch_node=(Node_t*)srch_node->Child(j); if(ch_node->GetMortonId()<=mid){ srch_node=ch_node; break; } } } } #pragma omp parallel for for(size_t tid=0;tidRootNode(); while(srch_node->GetMortonId()!=mid){ Node_t* ch_node; if(srch_node->IsLeaf()){ srch_node->SetGhost(true); srch_node->Subdivide(); } for(int j=nchld-1;j>=0;j--){ ch_node=(Node_t*)srch_node->Child(j); if(ch_node->GetMortonId()<=mid){ srch_node=ch_node; break; } } } recv_nodes[i]=srch_node; } } } #pragma omp parallel for for(size_t i=0;iIsGhost()) continue; assert(recv_nodes[i]->IsGhost()); CommData& comm_data=*(CommData*)recv_data[i]; PackedData p; p.data=((char*)recv_data[i])+sizeof(CommData); p.length=comm_data.pkd_length-sizeof(CommData); recv_nodes[i]->Unpack(p); } } //Profile::Toc(); //Profile::Tic("Broadcast", &comm, true, 5); { // Broadcast octants. std::vector shrd_mid; if(rank+10 && m.getDFD()>=mins[rank+1]){ m=m.getAncestor(m.GetDepth()-1); } size_t d=m.GetDepth()+1; shrd_mid.resize(d); for(size_t i=0;i shrd_data; // CommData for shared nodes. { // Set shrd_data for(size_t i=0;i0); size_t d=comm_data.mid.GetDepth()-1; if(d=mins[rank]) for(size_t j=0;jmemgr.free(&comm_data); } for(size_t i=0;i0); size_t d=comm_data.mid.GetDepth()-1; if(d=mins[rank]) for(size_t j=0;jmemgr.malloc(comm_data.pkd_length); mem::memcopy(data_ptr, &comm_data, comm_data.pkd_length); shrd_data.push_back(data_ptr); break; } } } } size_t pid_shift=1; while(pid_shift=pid_shift?rank-pid_shift:rank); MPI_size_t send_pid=(rank+pid_shift send_data; std::vector send_size; for(size_t i=0; imins[send_pid].getDFD()); if(shared) for(size_t j=0;j send_disp(send_data.size(),0); omp_par::scan(&send_size[0],&send_disp[0],send_data.size()); if(send_data.size()>0) send_length=send_size.back()+send_disp.back(); // Resize send_buff. if(send_buff.size()::value(),recv_pid, 1, *Comm(), &request); if(send_pid!=rank) MPI_Send (&send_length, 1, par::Mpi_datatype::value(),send_pid, 1, *Comm()); if(recv_pid!=rank) MPI_Wait(&request, &status); // Resize recv_buff if(recv_buff.size()0) MPI_Irecv(&recv_buff[0], recv_length, par::Mpi_datatype::value(),recv_pid, 1, *Comm(), &request); if(send_length>0) MPI_Send (&send_buff[0], send_length, par::Mpi_datatype::value(),send_pid, 1, *Comm()); if(recv_length>0) MPI_Wait(&request, &status); } std::vector recv_data; // CommData for received nodes. { // Unpack received octants. std::vector > mid_indx_pair; for(size_t i=0; i p; p.key=comm_data.mid; p.data=mid_indx_pair.size(); mid_indx_pair.push_back(p); } i+=comm_data.pkd_length; assert(comm_data.pkd_length>0); } std::vector recv_nodes(recv_data.size()); int nchld=(1UL<Dim()); // Number of children. // for(size_t i=0;iRootNode(); // while(srch_node->GetMortonId()!=mid){ // Node_t* ch_node; // if(srch_node->IsLeaf()){ // srch_node->SetGhost(true); // srch_node->Subdivide(); // } // for(int j=nchld-1;j>=0;j--){ // ch_node=(Node_t*)srch_node->Child(j); // if(ch_node->GetMortonId()<=mid){ // srch_node=ch_node; // break; // } // } // } // recv_nodes[i]=srch_node; // } { // Find received octants in tree. omp_par::merge_sort(&mid_indx_pair[0], &mid_indx_pair[0]+mid_indx_pair.size()); std::vector indx(omp_p+1); for(size_t i=0;i<=omp_p;i++){ size_t j=(mid_indx_pair.size()*i)/omp_p; if(j>0) while(jDim()); // Number of children. if(mid_indx_pair.size()>0) for(size_t tid=1;tidRootNode(); while(srch_node->GetMortonId()!=mid){ Node_t* ch_node; if(srch_node->IsLeaf()){ srch_node->SetGhost(true); srch_node->Subdivide(); } for(int j=nchld-1;j>=0;j--){ ch_node=(Node_t*)srch_node->Child(j); if(ch_node->GetMortonId()<=mid){ srch_node=ch_node; break; } } } } #pragma omp parallel for for(size_t tid=0;tidRootNode(); while(srch_node->GetMortonId()!=mid){ Node_t* ch_node; if(srch_node->IsLeaf()){ srch_node->SetGhost(true); srch_node->Subdivide(); } for(int j=nchld-1;j>=0;j--){ ch_node=(Node_t*)srch_node->Child(j); if(ch_node->GetMortonId()<=mid){ srch_node=ch_node; break; } } } recv_nodes[i]=srch_node; } } } #pragma omp parallel for for(size_t i=0;iIsGhost()) continue; assert(recv_nodes[i]->IsGhost()); CommData& comm_data=*(CommData*)recv_data[i]; PackedData p; p.data=((char*)recv_data[i])+sizeof(CommData); p.length=comm_data.pkd_length-sizeof(CommData); recv_nodes[i]->Unpack(p); } } pid_shift<<=1; send_pid=(rank+pid_shift0); size_t d=comm_data.mid.GetDepth()-1; if(dmins[send_pid].getDFD()) for(size_t j=0;jmemgr.malloc(comm_data.pkd_length); mem::memcopy(data_ptr, &comm_data, comm_data.pkd_length); shrd_data.push_back(data_ptr); break; } } } } } // Free data //Profile::Tic("Free", &comm, false, 5); for(size_t i=0;imemgr.free(shrd_data[i]); //Profile::Toc(); } //Profile::Toc(); } inline bool isLittleEndian(){ uint16_t number = 0x1; uint8_t *numPtr = (uint8_t*)&number; return (numPtr[0] == 1); } template void MPI_Tree::Write2File(const char* fname, int lod){ typedef double VTKData_t; int myrank, np; MPI_Comm_size(*Comm(),&np); MPI_Comm_rank(*Comm(),&myrank); std::vector coord_; //Coordinates of octant corners. std::vector value_; //Data value at points. std::vector coord; //Coordinates of octant corners. std::vector value; //Data value at points. std::vector mpi_rank; //MPI_Rank at points. std::vector connect; //Cell connectivity. std::vector offset ; //Cell offset. std::vector types ; //Cell types. //Build list of octant corner points. Node_t* n=this->PreorderFirst(); while(n!=NULL){ if(!n->IsGhost() && n->IsLeaf()) n->VTU_Data(coord_, value_, connect, offset, types, lod); n=this->PreorderNxt(n); } for(size_t i=0;i\n"; if(isLittleEndian()) vtufile<<"\n"; else vtufile<<"\n"; //=========================================================================== vtufile<<" \n"; vtufile<<" \n"; //--------------------------------------------------------------------------- vtufile<<" \n"; vtufile<<" \n"; data_size+=sizeof(uint32_t)+coord.size()*sizeof(VTKData_t); vtufile<<" \n"; //--------------------------------------------------------------------------- vtufile<<" \n"; vtufile<<" \n"; data_size+=sizeof(uint32_t)+value.size()*sizeof(VTKData_t); vtufile<<" \n"; data_size+=sizeof(uint32_t)+mpi_rank.size()*sizeof(int32_t); vtufile<<" \n"; //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- vtufile<<" \n"; vtufile<<" \n"; data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t); vtufile<<" \n"; data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t); vtufile<<" \n"; data_size+=sizeof(uint32_t)+types.size() *sizeof(uint8_t); vtufile<<" \n"; //--------------------------------------------------------------------------- //vtufile<<" \n"; //vtufile<<" \n"; //vtufile<<" \n"; //--------------------------------------------------------------------------- vtufile<<" \n"; vtufile<<" \n"; //=========================================================================== vtufile<<" \n"; vtufile<<" _"; int32_t block_size; block_size=coord .size()*sizeof(VTKData_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord .size()*sizeof(VTKData_t)); block_size=value .size()*sizeof(VTKData_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value .size()*sizeof(VTKData_t)); block_size=mpi_rank.size()*sizeof( int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&mpi_rank[0], mpi_rank.size()*sizeof( int32_t)); block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t)); block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t)); block_size=types .size()*sizeof(uint8_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&types [0], types .size()*sizeof(uint8_t)); vtufile<<"\n"; vtufile<<" \n"; //=========================================================================== vtufile<<"\n"; vtufile.close(); if(myrank) return; std::stringstream pvtufname; pvtufname<\n"; pvtufile<<"\n"; pvtufile<<" \n"; pvtufile<<" \n"; pvtufile<<" \n"; pvtufile<<" \n"; pvtufile<<" \n"; pvtufile<<" \n"; pvtufile<<" \n"; pvtufile<<" \n"; { // Extract filename from path. std::stringstream vtupath; vtupath<<'/'<\n"; } pvtufile<<" \n"; pvtufile<<"\n"; pvtufile.close(); } template const std::vector& MPI_Tree::GetMins(){ Node_t* n=this->PreorderFirst(); while(n!=NULL){ if(!n->IsGhost() && n->IsLeaf()) break; n=this->PreorderNxt(n); } ASSERT_WITH_MSG(n!=NULL,"No non-ghost nodes found on this process."); MortonId my_min; my_min=n->GetMortonId(); int np; MPI_Comm_size(*Comm(),&np); mins.resize(np); MPI_Allgather(&my_min , 1, par::Mpi_datatype::value(), &mins[0], 1, par::Mpi_datatype::value(), *Comm()); return mins; } }//end namespace