浏览代码

bug fixes, everything works!

Dhairya Malhotra 11 年之前
父节点
当前提交
c5edaefd39
共有 1 个文件被更改,包括 54 次插入136 次删除
  1. 54 136
      include/mpi_tree.txx

+ 54 - 136
include/mpi_tree.txx

@@ -1234,7 +1234,8 @@ inline void IsShared(std::vector<PackedData>& nodes, MortonId* m1, MortonId* m2,
 //#define PREFETCH_T0(addr,nrOfBytesAhead) _mm_prefetch(((char *)(addr))+nrOfBytesAhead,_MM_HINT_T0)
 template <class TreeNode>
 void MPI_Tree<TreeNode>::ConstructLET(BoundaryType bndry){
-#if 0
+#if 1
+  Profile::Tic("LET_Hypercube", &comm, true, 5); //----
   int num_p,rank;
   MPI_Comm_size(*Comm(),&num_p);
   MPI_Comm_rank(*Comm(),&rank );
@@ -1426,15 +1427,19 @@ void MPI_Tree<TreeNode>::ConstructLET(BoundaryType bndry){
     }
   }
   //Now LET is complete.
-
-#ifndef NDEBUG
-  CheckTree();
-#endif
+  Profile::Toc();
 #endif
 
   Profile::Tic("LET_Sparse", &comm, true, 5);
   ConstructLET_Sparse(bndry);
   Profile::Toc();
+
+  Profile::Tic("LET_Sparse_", &comm, true, 5);
+  ConstructLET_Sparse_(bndry);
+  Profile::Toc();
+#ifndef NDEBUG
+  CheckTree();
+#endif
 }
 
 template <class TreeNode>
@@ -1455,16 +1460,16 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
   MPI_Comm_rank(*Comm(),&rank );
   if(num_p==1) return;
 
-  int omp_p=omp_get_max_threads();
+  int omp_p=omp_get_max_threads(); omp_p=1;
   std::vector<MortonId> mins=GetMins();
 
   // Allocate Memory.
-  static std::vector<char> shrd_buff_vec0(omp_p*64l*1024l*1024l); // TODO: Build memory manager for such allocations.
-  static std::vector<char> shrd_buff_vec1(omp_p*64l*1024l*1024l); // TODO: Build memory manager for such allocations.
+  static std::vector<char> shrd_buff_vec0(16*64l*1024l*1024l); // TODO: Build memory manager for such allocations.
+  static std::vector<char> shrd_buff_vec1(16*64l*1024l*1024l); // TODO: Build memory manager for such allocations.
   static std::vector<char> send_buff;
   static std::vector<char> recv_buff;
 
-  Profile::Tic("SharedNodes", &comm, true, 5); //----
+  //Profile::Tic("SharedNodes", &comm, false, 5);
   CommData* node_comm_data=NULL; // CommData for all nodes.
   std::vector<void*> shared_data; // CommData for shared nodes.
   std::vector<par::SortPair<size_t,size_t> > pid_node_pair; // <pid, shared_data index> list
@@ -1488,7 +1493,10 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
 
         if(comm_data.node->IsGhost()) continue;
         if(comm_data.node->Depth()==0) continue;
-        //if(comm_data.mid.getDFD()<mins_r0 && comm_data.mid.NextId().getDFD()<mins_r1) continue;
+        if(comm_data.mid.getDFD()<mins_r0) continue;
+
+        MortonId mid0=comm_data.mid.         getDFD();
+        MortonId mid1=comm_data.mid.NextId().getDFD();
 
         comm_data.mid.NbrList(nbr_lst,comm_data.node->Depth()-1, bndry==Periodic);
         comm_data.usr_cnt=nbr_lst.size();
@@ -1496,16 +1504,9 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
           MortonId usr_mid=nbr_lst[j];
           MortonId usr_mid_dfd=usr_mid.getDFD();
           comm_data.usr_mid[j]=usr_mid;
-          if(usr_mid_dfd<mins_r0 || (rank+1<num_p && usr_mid_dfd>=mins[rank+1])){ // Find the user pid.
-            size_t usr_pid=std::upper_bound(&mins[0],&mins[num_p],usr_mid_dfd)-&mins[0]-1;
-            if(comm_data.mid.NextId().getDFD()>mins[usr_pid].getDFD() && (usr_pid+1==num_p || comm_data.mid.getDFD()<mins[usr_pid+1].getDFD())){
-              // This node is non-ghost on usr_pid, so we don't need to send this to usr_pid.
-              usr_pid=rank;
-            }
-            comm_data.usr_pid[j]=usr_pid;
-          }else comm_data.usr_pid[j]=rank;
-          if(!shared){ // Check if this node needs to be transferred.
-            if(comm_data.usr_pid[j]!=rank || (rank+1<num_p && usr_mid.NextId().getDFD()>mins_r1) ){
+          comm_data.usr_pid[j]=std::upper_bound(&mins[0],&mins[num_p],usr_mid_dfd)-&mins[0]-1;
+          if(!shared){ // Check if this node needs to be transferred during broadcast.
+            if(comm_data.usr_pid[j]!=rank || (rank+1<num_p && usr_mid.NextId()>mins_r1) ){
               shared=true;
             }
           }
@@ -1530,40 +1531,18 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
           }
           shared_data.push_back(&comm_data);
         }
-        {// old code - commented
-////        for(size_t j=0;j<nbr_lst.size();j++){
-////          if(nbr_lst[j]<mins[rank] || (rank+1<num_p && nbr_lst[j]>=mins[rank+1])){
-////            prc_lst.push_back(std::lower_bound(&mins[0],&mins[num_p],nbr_lst[j])-&mins[0]);
-////          }else prc_lst.push_back(rank);
-////          for(size_t k=0;k<prc_lst.size()-1;k++){
-////            if(prc_lst[k]==prc_lst.back()){
-////              prc_lst.pop_back();
-////              break;
-////            }
-////          }
-////        }
-////        for(size_t j=0;j<prc_lst.size();j++){
-////          par::SortPair<size_t, TreeNode*> proc_node_pair;
-////          par::SortPair<size_t, MortonId > proc_mort_pair;
-////          proc_node_pair.key=prc_lst[j]; proc_node_pair.data=nodes  [i];
-////          proc_mort_pair.key=prc_lst[j]; proc_mort_pair.data=mid_lst[j];
-////          #pragma omp critical (ADD_PAIR)
-////          {
-////            node_usr.push_back(p);
-////          }
-////        }
-        }
       }
     }
     omp_par::merge_sort(&pid_node_pair[0], &pid_node_pair[pid_node_pair.size()]);
     //std::cout<<rank<<' '<<shared_data.size()<<' '<<pid_node_pair.size()<<'\n';
   }
-  Profile::Toc();
+  //Profile::Toc();
 
-  Profile::Tic("PackNodes", &comm, true, 5); //----
+  //Profile::Tic("PackNodes", &comm, false, 5);
   std::vector<PackedData> pkd_data(shared_data.size()); // PackedData for all shared nodes.
   { // Pack shared nodes.
     char* data_ptr=&shrd_buff_vec0[0];
+    // TODO: Add OpenMP parallelism.
     for(size_t i=0;i<shared_data.size();i++){
       PackedData& p=pkd_data[i];
       CommData& comm_data=*(CommData*)shared_data[i];
@@ -1581,9 +1560,9 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
     delete[] node_comm_data;
     node_comm_data=NULL;
   }
-  Profile::Toc();
+  //Profile::Toc();
 
-  Profile::Tic("SendBuff", &comm, true, 5); //----
+  //Profile::Tic("SendBuff", &comm, false, 5);
   std::vector<MPI_size_t> send_size(num_p,0);
   std::vector<MPI_size_t> send_disp(num_p,0);
   if(pid_node_pair.size()){ // Build send_buff.
@@ -1608,39 +1587,6 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
     }
 
     // Compute send_size, send_disp.
-    #if 0 // This is too complicated
-    {
-      // Compute send_disp.
-      #pragma omp parallel for
-      for(size_t tid=0;tid<omp_p;tid++){
-        size_t a=(pid_node_pair.size()* tid   )/omp_p;
-        size_t b=(pid_node_pair.size()*(tid+1))/omp_p;
-        size_t p0=pid_node_pair[a  ].key;
-        size_t p1=pid_node_pair[b-1].key;
-        if(tid>    0) while(a<pid_node_pair.size() && p0==pid_node_pair[a].key) a++;
-        if(tid<omp_p) while(b<pid_node_pair.size() && p1==pid_node_pair[b].key) b++;
-        for(size_t i=a;i<b;i++){
-          send_size[pid_node_pair[i].key]+=size[i];
-          if(p0!=pid_node_pair[i].key){
-            for(size_t j=p0+1;j<=pid_node_pair[i].key;j++){
-              send_disp[j]=disp[i];
-            }
-            p0=pid_node_pair[i].key;
-          }
-        }
-      }
-      for(size_t i=pid_node_pair[pid_node_pair.size()-1].key+1;i<num_p;i++){
-        send_disp[i]=size[pid_node_pair.size()-1]+disp[pid_node_pair.size()-1];
-      }
-
-      // Compute send_size.
-      #pragma omp parallel for
-      for(size_t i=0;i<num_p-1;i++){
-        send_size[i]=send_disp[i+1]-send_disp[i];
-      }
-      send_size[num_p-1]=size[pid_node_pair.size()-1]+disp[pid_node_pair.size()-1]-send_disp[num_p-1];
-    }
-    #else
     {
       // Compute send_size.
       #pragma omp parallel for
@@ -1659,11 +1605,10 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
       // Compute send_disp.
       omp_par::scan(&send_size[0],&send_disp[0],num_p);
     }
-    #endif
   }
-  Profile::Toc();
+  //Profile::Toc();
 
-  Profile::Tic("A2A_Sparse", &comm, true, 5);
+  //Profile::Tic("A2A_Sparse", &comm, true, 5);
   size_t recv_length=0;
   { // Allocate recv_buff.
     std::vector<MPI_size_t> recv_size(num_p,0);
@@ -1678,15 +1623,16 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
     par::Mpi_Alltoallv_sparse(&send_buff[0], &send_size[0], &send_disp[0],
                               &recv_buff[0], &recv_size[0], &recv_disp[0], *Comm());
   }
-  Profile::Toc();
+  //Profile::Toc();
 
-  Profile::Tic("Unpack", &comm, true, 5); //----
+  //Profile::Tic("Unpack", &comm, false, 5);
   std::vector<void*> recv_data; // CommData for received nodes.
   { // Unpack received octants.
     for(size_t i=0; i<recv_length;){
       CommData& comm_data=*(CommData*)&recv_buff[i];
       recv_data.push_back(&comm_data);
       i+=comm_data.pkd_length;
+      assert(comm_data.pkd_length>0);
     }
 
     int nchld=(1UL<<this->Dim()); // Number of children.
@@ -1713,6 +1659,7 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
     }
     #pragma omp parallel for
     for(size_t i=0;i<recv_data.size();i++){
+      if(!recv_nodes[i]->IsGhost()) continue;
       assert(recv_nodes[i]->IsGhost());
       CommData& comm_data=*(CommData*)recv_data[i];
 
@@ -1722,9 +1669,9 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
       recv_nodes[i]->Unpack(p);
     }
   }
-  Profile::Toc();
+  //Profile::Toc();
 
-  Profile::Tic("Broadcast", &comm, true, 5); //----
+  //Profile::Tic("Broadcast", &comm, true, 5);
   { // Broadcast octants.
     std::vector<MortonId> shrd_mid;
     if(rank+1<num_p){ // Set shrd_mid.
@@ -1747,7 +1694,7 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
         CommData& comm_data=*(CommData*)shared_data[i];
         assert(comm_data.mid.GetDepth()>0);
         size_t d=comm_data.mid.GetDepth()-1;
-        if(d<shrd_mid.size())
+        if(d<shrd_mid.size() && shrd_mid[d].getDFD()>=mins[rank])
         for(size_t j=0;j<comm_data.usr_cnt;j++){
           if(comm_data.usr_mid[j]==shrd_mid[d]){
             assert(data_ptr+comm_data.pkd_length<=&(*shrd_buff_vec1.end())); //TODO: resize if needed.
@@ -1762,7 +1709,7 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
         CommData& comm_data=*(CommData*)recv_data[i];
         assert(comm_data.mid.GetDepth()>0);
         size_t d=comm_data.mid.GetDepth()-1;
-        if(d<shrd_mid.size())
+        if(d<shrd_mid.size() && shrd_mid[d].getDFD()>=mins[rank])
         for(size_t j=0;j<comm_data.usr_cnt;j++){
           if(comm_data.usr_mid[j]==shrd_mid[d]){
             assert(data_ptr+comm_data.pkd_length<=&(*shrd_buff_vec1.end())); //TODO: resize if needed.
@@ -1788,7 +1735,6 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
           CommData& comm_data=*(CommData*)shrd_data[i];
           size_t d=comm_data.mid.GetDepth()-1;
           bool shared=(d<shrd_mid.size() && shrd_mid[d].NextId().getDFD()>mins[send_pid].getDFD());
-          shared=shared && ((send_pid+1<num_p && comm_data.mid.getDFD()>=mins[send_pid+1].getDFD()) || comm_data.mid.NextId().getDFD()<=mins[send_pid].getDFD()); // this node is non-ghost on send_pid. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
           if(shared) for(size_t j=0;j<comm_data.usr_cnt;j++){ // if send_pid already has this node then skip
             if(comm_data.usr_pid[j]==send_pid){
               shared=false;
@@ -1841,6 +1787,7 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
           CommData& comm_data=*(CommData*)&recv_buff[i];
           recv_data.push_back(&comm_data);
           i+=comm_data.pkd_length;
+          assert(comm_data.pkd_length>0);
         }
 
         int nchld=(1UL<<this->Dim()); // Number of children.
@@ -1867,8 +1814,8 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
         }
         #pragma omp parallel for
         for(size_t i=0;i<recv_data.size();i++){
-          //assert(recv_nodes[i]->IsGhost()); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
           if(!recv_nodes[i]->IsGhost()) continue;
+          assert(recv_nodes[i]->IsGhost());
           CommData& comm_data=*(CommData*)recv_data[i];
 
           PackedData p;
@@ -1883,9 +1830,23 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
       if(send_pid!=rank){ // Copy data to shrd_buff_vec1 and Set shrd_data
         for(size_t i=0;i<recv_data.size();i++){
           CommData& comm_data=*(CommData*)recv_data[i];
+
+          //{ // Skip if this node already exists.
+          //  bool skip=false;
+          //  for(size_t k=0;k<shrd_data.size();k++){
+          //    CommData& comm_data_=*(CommData*)shrd_data[k];
+          //    if(comm_data_.mid==comm_data.mid){
+          //      assert(false);
+          //      skip=true;
+          //      break;
+          //    }
+          //  }
+          //  if(skip) continue;
+          //}
+
           assert(comm_data.mid.GetDepth()>0);
           size_t d=comm_data.mid.GetDepth()-1;
-          if(d<shrd_mid.size() && shrd_mid[d].NextId().getDFD()>mins[send_pid].getDFD())
+          if(d<shrd_mid.size() && shrd_mid[d].isAncestor(mins[rank]) && shrd_mid[d].NextId().getDFD()>mins[send_pid].getDFD())
           for(size_t j=0;j<comm_data.usr_cnt;j++){
             if(comm_data.usr_mid[j]==shrd_mid[d]){
               assert(data_ptr+comm_data.pkd_length<=&(*shrd_buff_vec1.end())); //TODO: resize if needed.
@@ -1899,50 +1860,7 @@ void MPI_Tree<TreeNode>::ConstructLET_Sparse(BoundaryType bndry){
       }
     }
   }
-  Profile::Toc();
-
-  {
-//  std::vector<TreeNode*> node_lst;
-//  Profile::Tic("LET_Sparse", &comm, true, 5);
-//  {
-//    // Constants.
-//    const size_t chld_cnt=1UL<<COORD_DIM;
-//
-//    std::vector<TreeNode*> nodes;
-//    std::vector<char> is_shared0;
-//    std::vector<char> is_shared1;
-//    MortonId* m1=&mins[rank];
-//    MortonId* m2=(rank+1<num_p?&mins[rank+1]:NULL);
-//
-//    nodes.push_back(this->RootNode());
-//    while(nodes.size()>0){
-//
-//      // Determine which nodes are shared.
-//      is_shared0.resize(nodes.size(),false);
-//      is_shared1.resize(nodes.size(),false);
-//      IsShared(nodes, NULL, m2, bndry, is_shared0);
-//      IsShared(nodes, m1, NULL, bndry, is_shared1);
-//
-//      // Add shared nodes to node_lst.
-//      for(size_t i=0;i<nodes.size();i++){
-//        if(is_shared0[i] || is_shared1[i]) node_lst.push_back(nodes[i]);
-//      }
-//
-//      // Initialize nodes vector for next round
-//      std::vector<TreeNode*> new_nodes;
-//      for(size_t i=0;i<nodes.size();i++){
-//        if(!nodes[i]->IsLeaf()){
-//          for(size_t j=0;j<chld_cnt;j++){
-//            TreeNode* chld=(TreeNode*)nodes[i]->Child(j);
-//            if(!chld->IsGhost()) new_nodes.push_back(chld);
-//          }
-//        }
-//      }
-//      nodes.swap(new_nodes);
-//    }
-//  }
-//  Profile::Toc();
-  }
+  //Profile::Toc();
 }