tree.hpp 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207
  1. #ifndef _SCTL_TREE_
  2. #define _SCTL_TREE_
  3. #include SCTL_INCLUDE(common.hpp)
  4. #include SCTL_INCLUDE(morton.hpp)
  5. #include SCTL_INCLUDE(comm.hpp)
  6. #include <fstream>
  7. #include <algorithm>
  8. namespace SCTL_NAMESPACE {
  9. struct VTUData {
  10. typedef float VTKReal;
  11. // Point data
  12. Vector<VTKReal> coord; // always 3D
  13. Vector<VTKReal> value;
  14. // Cell data
  15. Vector<int32_t> connect;
  16. Vector<int32_t> offset;
  17. Vector<uint8_t> types;
  18. void WriteVTK(const std::string& fname, Comm comm = Comm::Self()) const {
  19. typedef typename VTUData::VTKReal VTKReal;
  20. Integer rank = comm.Rank();
  21. Integer np = comm.Size();
  22. Long value_dof = 0;
  23. { // Write vtu file.
  24. std::ofstream vtufile;
  25. { // Open file for writing.
  26. std::stringstream vtufname;
  27. vtufname << fname << std::setfill('0') << std::setw(6) << rank << ".vtu";
  28. vtufile.open(vtufname.str().c_str());
  29. if (vtufile.fail()) return;
  30. }
  31. { // Write to file.
  32. Long pt_cnt = coord.Dim() / 3;
  33. Long cell_cnt = types.Dim();
  34. value_dof = (pt_cnt ? value.Dim() / pt_cnt : 0);
  35. Vector<int32_t> mpi_rank;
  36. { // Set mpi_rank
  37. Integer new_myrank = rank;
  38. mpi_rank.ReInit(pt_cnt);
  39. for (Long i = 0; i < mpi_rank.Dim(); i++) mpi_rank[i] = new_myrank;
  40. }
  41. bool isLittleEndian;
  42. { // Set isLittleEndian
  43. uint16_t number = 0x1;
  44. uint8_t *numPtr = (uint8_t *)&number;
  45. isLittleEndian = (numPtr[0] == 1);
  46. }
  47. Long data_size = 0;
  48. vtufile << "<?xml version=\"1.0\"?>\n";
  49. vtufile << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"" << (isLittleEndian ? "LittleEndian" : "BigEndian") << "\">\n";
  50. // ===========================================================================
  51. vtufile << " <UnstructuredGrid>\n";
  52. vtufile << " <Piece NumberOfPoints=\"" << pt_cnt << "\" NumberOfCells=\"" << cell_cnt << "\">\n";
  53. //---------------------------------------------------------------------------
  54. vtufile << " <Points>\n";
  55. vtufile << " <DataArray type=\"Float" << sizeof(VTKReal) * 8 << "\" NumberOfComponents=\"3\" Name=\"Position\" format=\"appended\" offset=\"" << data_size << "\" />\n";
  56. data_size += sizeof(uint32_t) + coord.Dim() * sizeof(VTKReal);
  57. vtufile << " </Points>\n";
  58. //---------------------------------------------------------------------------
  59. vtufile << " <PointData>\n";
  60. if (value_dof) { // value
  61. vtufile << " <DataArray type=\"Float" << sizeof(VTKReal) * 8 << "\" NumberOfComponents=\"" << value_dof << "\" Name=\"value\" format=\"appended\" offset=\"" << data_size << "\" />\n";
  62. data_size += sizeof(uint32_t) + value.Dim() * sizeof(VTKReal);
  63. }
  64. { // mpi_rank
  65. vtufile << " <DataArray type=\"Int32\" NumberOfComponents=\"1\" Name=\"mpi_rank\" format=\"appended\" offset=\"" << data_size << "\" />\n";
  66. data_size += sizeof(uint32_t) + pt_cnt * sizeof(int32_t);
  67. }
  68. vtufile << " </PointData>\n";
  69. //---------------------------------------------------------------------------
  70. //---------------------------------------------------------------------------
  71. vtufile << " <Cells>\n";
  72. vtufile << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << data_size << "\" />\n";
  73. data_size += sizeof(uint32_t) + connect.Dim() * sizeof(int32_t);
  74. vtufile << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << data_size << "\" />\n";
  75. data_size += sizeof(uint32_t) + offset.Dim() * sizeof(int32_t);
  76. vtufile << " <DataArray type=\"UInt8\" Name=\"types\" format=\"appended\" offset=\"" << data_size << "\" />\n";
  77. data_size += sizeof(uint32_t) + types.Dim() * sizeof(uint8_t);
  78. vtufile << " </Cells>\n";
  79. //---------------------------------------------------------------------------
  80. vtufile << " </Piece>\n";
  81. vtufile << " </UnstructuredGrid>\n";
  82. // ===========================================================================
  83. vtufile << " <AppendedData encoding=\"raw\">\n";
  84. vtufile << " _";
  85. int32_t block_size;
  86. { // coord
  87. block_size = coord.Dim() * sizeof(VTKReal);
  88. vtufile.write((char *)&block_size, sizeof(int32_t));
  89. if (coord.Dim()) vtufile.write((char *)&coord[0], coord.Dim() * sizeof(VTKReal));
  90. }
  91. if (value_dof) { // value
  92. block_size = value.Dim() * sizeof(VTKReal);
  93. vtufile.write((char *)&block_size, sizeof(int32_t));
  94. if (value.Dim()) vtufile.write((char *)&value[0], value.Dim() * sizeof(VTKReal));
  95. }
  96. { // mpi_rank
  97. block_size = mpi_rank.Dim() * sizeof(int32_t);
  98. vtufile.write((char *)&block_size, sizeof(int32_t));
  99. if (mpi_rank.Dim()) vtufile.write((char *)&mpi_rank[0], mpi_rank.Dim() * sizeof(int32_t));
  100. }
  101. { // block_size
  102. block_size = connect.Dim() * sizeof(int32_t);
  103. vtufile.write((char *)&block_size, sizeof(int32_t));
  104. if (connect.Dim()) vtufile.write((char *)&connect[0], connect.Dim() * sizeof(int32_t));
  105. }
  106. { // offset
  107. block_size = offset.Dim() * sizeof(int32_t);
  108. vtufile.write((char *)&block_size, sizeof(int32_t));
  109. if (offset.Dim()) vtufile.write((char *)&offset[0], offset.Dim() * sizeof(int32_t));
  110. }
  111. { // types
  112. block_size = types.Dim() * sizeof(uint8_t);
  113. vtufile.write((char *)&block_size, sizeof(int32_t));
  114. if (types.Dim()) vtufile.write((char *)&types[0], types.Dim() * sizeof(uint8_t));
  115. }
  116. vtufile << "\n";
  117. vtufile << " </AppendedData>\n";
  118. // ===========================================================================
  119. vtufile << "</VTKFile>\n";
  120. }
  121. vtufile.close(); // close file
  122. }
  123. if (!rank) { // Write pvtu file
  124. std::ofstream pvtufile;
  125. { // Open file for writing
  126. std::stringstream pvtufname;
  127. pvtufname << fname << ".pvtu";
  128. pvtufile.open(pvtufname.str().c_str());
  129. if (pvtufile.fail()) return;
  130. }
  131. { // Write to file.
  132. pvtufile << "<?xml version=\"1.0\"?>\n";
  133. pvtufile << "<VTKFile type=\"PUnstructuredGrid\">\n";
  134. pvtufile << " <PUnstructuredGrid GhostLevel=\"0\">\n";
  135. pvtufile << " <PPoints>\n";
  136. pvtufile << " <PDataArray type=\"Float" << sizeof(VTKReal) * 8 << "\" NumberOfComponents=\"3\" Name=\"Position\"/>\n";
  137. pvtufile << " </PPoints>\n";
  138. pvtufile << " <PPointData>\n";
  139. if (value_dof) { // value
  140. pvtufile << " <PDataArray type=\"Float" << sizeof(VTKReal) * 8 << "\" NumberOfComponents=\"" << value_dof << "\" Name=\"value\"/>\n";
  141. }
  142. { // mpi_rank
  143. pvtufile << " <PDataArray type=\"Int32\" NumberOfComponents=\"1\" Name=\"mpi_rank\"/>\n";
  144. }
  145. pvtufile << " </PPointData>\n";
  146. {
  147. // Extract filename from path.
  148. std::stringstream vtupath;
  149. vtupath << '/' << fname;
  150. std::string pathname = vtupath.str();
  151. std::string fname_ = pathname.substr(pathname.find_last_of("/\\") + 1);
  152. // char *fname_ = (char*)strrchr(vtupath.str().c_str(), '/') + 1;
  153. // std::string fname_ =
  154. // boost::filesystem::path(fname).filename().string().
  155. for (Integer i = 0; i < np; i++) pvtufile << " <Piece Source=\"" << fname_ << std::setfill('0') << std::setw(6) << i << ".vtu\"/>\n";
  156. }
  157. pvtufile << " </PUnstructuredGrid>\n";
  158. pvtufile << "</VTKFile>\n";
  159. }
  160. pvtufile.close(); // close file
  161. }
  162. };
  163. };
  164. template <Integer DIM> class Tree {
  165. public:
  166. struct NodeAttr {
  167. unsigned char Leaf : 1, Ghost : 1;
  168. };
  169. struct NodeLists {
  170. Long p2n;
  171. Long parent;
  172. Long child[1 << DIM];
  173. Long nbr[sctl::pow<DIM,Integer>(3)];
  174. };
  175. static constexpr Integer Dim() {
  176. return DIM;
  177. }
  178. Tree(const Comm& comm_ = Comm::Self()) : comm(comm_) {
  179. Integer rank = comm.Rank();
  180. Integer np = comm.Size();
  181. Vector<double> coord;
  182. { // Set coord
  183. Long N0 = 1;
  184. while (sctl::pow<DIM,Long>(N0) < np) N0++;
  185. Long N = sctl::pow<DIM,Long>(N0);
  186. Long start = N * (rank+0) / np;
  187. Long end = N * (rank+1) / np;
  188. coord.ReInit((end-start)*DIM);
  189. for (Long i = start; i < end; i++) {
  190. Long idx = i;
  191. for (Integer k = 0; k < DIM; k++) {
  192. coord[(i-start)*DIM+k] = (idx % N0) / (double)N0;
  193. idx /= N0;
  194. }
  195. }
  196. }
  197. this->UpdateRefinement(coord);
  198. }
  199. ~Tree() {
  200. #ifdef SCTL_MEMDEBUG
  201. for (auto& pair : node_data) {
  202. SCTL_ASSERT(node_cnt.find(pair.first) != node_cnt.end());
  203. }
  204. #endif
  205. }
  206. const Vector<Morton<DIM>>& GetPartitionMID() const {
  207. return mins;
  208. }
  209. const Vector<Morton<DIM>>& GetNodeMID() const {
  210. return node_mid;
  211. }
  212. const Vector<NodeAttr>& GetNodeAttr() const {
  213. return node_attr;
  214. }
  215. const Vector<NodeLists>& GetNodeLists() const {
  216. return node_lst;
  217. }
  218. const Comm& GetComm() const {
  219. return comm;
  220. }
  221. template <class Real> void UpdateRefinement(const Vector<Real>& coord, Long M = 1, bool balance21 = 0, bool periodic = 0) {
  222. Integer np = comm.Size();
  223. Integer rank = comm.Rank();
  224. Vector<Morton<DIM>> node_mid_orig;
  225. Long start_idx_orig, end_idx_orig;
  226. if (mins.Dim()) { // Set start_idx_orig, end_idx_orig
  227. start_idx_orig = std::lower_bound(node_mid.begin(), node_mid.end(), mins[rank]) - node_mid.begin();
  228. end_idx_orig = std::lower_bound(node_mid.begin(), node_mid.end(), (rank+1==np ? Morton<DIM>().Next() : mins[rank+1])) - node_mid.begin();
  229. node_mid_orig.ReInit(end_idx_orig - start_idx_orig, node_mid.begin() + start_idx_orig, true);
  230. } else {
  231. start_idx_orig = 0;
  232. end_idx_orig = 0;
  233. }
  234. auto coarsest_ancestor_mid = [](const Morton<DIM>& m0) {
  235. Morton<DIM> md;
  236. Integer d0 = m0.Depth();
  237. for (Integer d = 0; d <= d0; d++) {
  238. md = m0.Ancestor(d);
  239. if (md.Ancestor(d0) == m0) break;
  240. }
  241. return md;
  242. };
  243. Morton<DIM> pt_mid0;
  244. Vector<Morton<DIM>> pt_mid;
  245. { // Construct sorted pt_mid
  246. Long Npt = coord.Dim() / DIM;
  247. pt_mid.ReInit(Npt);
  248. for (Long i = 0; i < Npt; i++) {
  249. pt_mid[i] = Morton<DIM>(coord.begin() + i*DIM);
  250. }
  251. Vector<Morton<DIM>> sorted_mid;
  252. comm.HyperQuickSort(pt_mid, sorted_mid);
  253. pt_mid.Swap(sorted_mid);
  254. SCTL_ASSERT(pt_mid.Dim());
  255. pt_mid0 = pt_mid[0];
  256. }
  257. { // Update M = global_min(pt_mid.Dim(), M)
  258. Long M0, M1, Npt = pt_mid.Dim();
  259. comm.Allreduce(Ptr2ConstItr<Long>(&M,1), Ptr2Itr<Long>(&M0,1), 1, Comm::CommOp::MIN);
  260. comm.Allreduce(Ptr2ConstItr<Long>(&Npt,1), Ptr2Itr<Long>(&M1,1), 1, Comm::CommOp::MIN);
  261. M = std::min(M0,M1);
  262. SCTL_ASSERT(M > 0);
  263. }
  264. { // pt_mid <-- [M points from rank-1; pt_mid; M points from rank+1]
  265. Long send_size0 = (rank+1<np ? M : 0);
  266. Long send_size1 = (rank > 0 ? M : 0);
  267. Long recv_size0 = (rank > 0 ? M : 0);
  268. Long recv_size1 = (rank+1<np ? M : 0);
  269. Vector<Morton<DIM>> pt_mid_(recv_size0 + pt_mid.Dim() + recv_size1);
  270. memcopy(pt_mid_.begin()+recv_size0, pt_mid.begin(), pt_mid.Dim());
  271. void* recv_req0 = comm.Irecv(pt_mid_.begin(), recv_size0, (rank+np-1)%np, 0);
  272. void* recv_req1 = comm.Irecv(pt_mid_.begin() + recv_size0 + pt_mid.Dim(), recv_size1, (rank+1)%np, 1);
  273. void* send_req0 = comm.Isend(pt_mid .begin() + pt_mid.Dim() - send_size0, send_size0, (rank+1)%np, 0);
  274. void* send_req1 = comm.Isend(pt_mid .begin(), send_size1, (rank+np-1)%np, 1);
  275. comm.Wait(recv_req0);
  276. comm.Wait(recv_req1);
  277. comm.Wait(send_req0);
  278. comm.Wait(send_req1);
  279. pt_mid.Swap(pt_mid_);
  280. }
  281. { // Build linear MortonID tree from pt_mid
  282. node_mid.ReInit(0);
  283. Long idx = 0;
  284. Morton<DIM> m0;
  285. Morton<DIM> mend = Morton<DIM>().Next();
  286. while (m0 < mend) {
  287. Integer d = m0.Depth();
  288. Morton<DIM> m1 = (idx + M < pt_mid.Dim() ? pt_mid[idx+M] : Morton<DIM>().Next());
  289. while (d < Morton<DIM>::MAX_DEPTH && m0.Ancestor(d) == m1.Ancestor(d)) {
  290. node_mid.PushBack(m0.Ancestor(d));
  291. d++;
  292. }
  293. m0 = m0.Ancestor(d);
  294. node_mid.PushBack(m0);
  295. m0 = m0.Next();
  296. idx = std::lower_bound(pt_mid.begin(), pt_mid.end(), m0) - pt_mid.begin();
  297. }
  298. }
  299. { // Set mins
  300. mins.ReInit(np);
  301. Long min_idx = std::lower_bound(node_mid.begin(), node_mid.end(), pt_mid0) - node_mid.begin() - 1;
  302. if (!rank || min_idx < 0) min_idx = 0;
  303. Morton<DIM> m0 = coarsest_ancestor_mid(node_mid[min_idx]);
  304. comm.Allgather(Ptr2ConstItr<Morton<DIM>>(&m0,1), 1, mins.begin(), 1);
  305. }
  306. if (balance21) { // 2:1 balance refinement // TODO: optimize
  307. Vector<Morton<DIM>> parent_mid;
  308. { // add balancing Morton IDs
  309. Vector<std::set<Morton<DIM>>> parent_mid_set(Morton<DIM>::MAX_DEPTH+1);
  310. Vector<Morton<DIM>> nlst;
  311. for (const auto& m0 : node_mid) {
  312. Integer d0 = m0.Depth();
  313. parent_mid_set[m0.Depth()].insert(m0.Ancestor(d0-1));
  314. }
  315. for (Integer d = Morton<DIM>::MAX_DEPTH; d > 0; d--) {
  316. for (const auto& m : parent_mid_set[d]) {
  317. m.NbrList(nlst, d-1, periodic);
  318. parent_mid_set[d-1].insert(nlst.begin(), nlst.end());
  319. parent_mid.PushBack(m);
  320. }
  321. }
  322. }
  323. Vector<Morton<DIM>> parent_mid_sorted;
  324. { // sort and repartition
  325. comm.HyperQuickSort(parent_mid, parent_mid_sorted);
  326. comm.PartitionS(parent_mid_sorted, mins[comm.Rank()]);
  327. }
  328. Vector<Morton<DIM>> tmp_mid;
  329. { // add children
  330. Vector<Morton<DIM>> clst;
  331. tmp_mid.PushBack(Morton<DIM>()); // include root node
  332. for (Long i = 0; i < parent_mid_sorted.Dim(); i++) {
  333. if (i+1 == parent_mid_sorted.Dim() || parent_mid_sorted[i] != parent_mid_sorted[i+1]) {
  334. const auto& m = parent_mid_sorted[i];
  335. tmp_mid.PushBack(m);
  336. m.Children(clst);
  337. for (const auto& c : clst) tmp_mid.PushBack(c);
  338. }
  339. }
  340. auto insert_ancestor_children = [](Vector<Morton<DIM>>& mvec, const Morton<DIM>& m0) {
  341. Integer d0 = m0.Depth();
  342. Vector<Morton<DIM>> clst;
  343. for (Integer d = 0; d < d0; d++) {
  344. m0.Ancestor(d).Children(clst);
  345. for (const auto& m : clst) mvec.PushBack(m);
  346. }
  347. };
  348. insert_ancestor_children(tmp_mid, mins[rank]);
  349. omp_par::merge_sort(tmp_mid.begin(), tmp_mid.end());
  350. }
  351. node_mid.ReInit(0);
  352. for (Long i = 0; i < tmp_mid.Dim(); i++) { // remove duplicates
  353. if (i+1 == tmp_mid.Dim() || tmp_mid[i] != tmp_mid[i+1]) {
  354. node_mid.PushBack(tmp_mid[i]);
  355. }
  356. }
  357. }
  358. { // Add place-holder for ghost nodes
  359. Long start_idx, end_idx;
  360. { // Set start_idx, end_idx
  361. start_idx = std::lower_bound(node_mid.begin(), node_mid.end(), mins[rank]) - node_mid.begin();
  362. end_idx = std::lower_bound(node_mid.begin(), node_mid.end(), (rank+1==np ? Morton<DIM>().Next() : mins[rank+1])) - node_mid.begin();
  363. }
  364. { // Set user_mid, user_cnt
  365. Vector<SortPair<Long,Morton<DIM>>> user_node_lst;
  366. Vector<Morton<DIM>> nlst;
  367. std::set<Long> user_procs;
  368. for (Long i = start_idx; i < end_idx; i++) {
  369. Morton<DIM> m0 = node_mid[i];
  370. Integer d0 = m0.Depth();
  371. m0.NbrList(nlst, std::max<Integer>(d0-2,0), periodic);
  372. user_procs.clear();
  373. for (const auto& m : nlst) {
  374. Morton<DIM> m_start = m.DFD();
  375. Morton<DIM> m_end = m.Next();
  376. Integer p_start = std::lower_bound(mins.begin(), mins.end(), m_start) - mins.begin() - 1;
  377. Integer p_end = std::lower_bound(mins.begin(), mins.end(), m_end ) - mins.begin();
  378. SCTL_ASSERT(0 <= p_start);
  379. SCTL_ASSERT(p_start < p_end);
  380. SCTL_ASSERT(p_end <= np);
  381. for (Long p = p_start; p < p_end; p++) {
  382. if (p != rank) user_procs.insert(p);
  383. }
  384. }
  385. for (const auto p : user_procs) {
  386. SortPair<Long,Morton<DIM>> pair;
  387. pair.key = p;
  388. pair.data = m0;
  389. user_node_lst.PushBack(pair);
  390. }
  391. }
  392. omp_par::merge_sort(user_node_lst.begin(), user_node_lst.end());
  393. user_cnt.ReInit(np);
  394. user_mid.ReInit(user_node_lst.Dim());
  395. for (Integer i = 0; i < np; i++) {
  396. SortPair<Long,Morton<DIM>> pair_start, pair_end;
  397. pair_start.key = i;
  398. pair_end.key = i+1;
  399. Long cnt_start = std::lower_bound(user_node_lst.begin(), user_node_lst.end(), pair_start) - user_node_lst.begin();
  400. Long cnt_end = std::lower_bound(user_node_lst.begin(), user_node_lst.end(), pair_end ) - user_node_lst.begin();
  401. user_cnt[i] = cnt_end - cnt_start;
  402. for (Long j = cnt_start; j < cnt_end; j++) {
  403. user_mid[j] = user_node_lst[j].data;
  404. }
  405. std::sort(user_mid.begin() + cnt_start, user_mid.begin() + cnt_end);
  406. }
  407. }
  408. Vector<Morton<DIM>> ghost_mid;
  409. { // SendRecv user_mid
  410. const Vector<Long>& send_cnt = user_cnt;
  411. Vector<Long> send_dsp(np);
  412. scan(send_dsp, send_cnt);
  413. Vector<Long> recv_cnt(np), recv_dsp(np);
  414. comm.Alltoall(send_cnt.begin(), 1, recv_cnt.begin(), 1);
  415. scan(recv_dsp, recv_cnt);
  416. const Vector<Morton<DIM>>& send_mid = user_mid;
  417. Long Nsend = send_dsp[np-1] + send_cnt[np-1];
  418. Long Nrecv = recv_dsp[np-1] + recv_cnt[np-1];
  419. SCTL_ASSERT(send_mid.Dim() == Nsend);
  420. ghost_mid.ReInit(Nrecv);
  421. comm.Alltoallv(send_mid.begin(), send_cnt.begin(), send_dsp.begin(), ghost_mid.begin(), recv_cnt.begin(), recv_dsp.begin());
  422. }
  423. { // Update node_mid <-- ghost_mid + node_mid
  424. Vector<Morton<DIM>> new_mid(end_idx-start_idx + ghost_mid.Dim());
  425. Long Nsplit = std::lower_bound(ghost_mid.begin(), ghost_mid.end(), mins[rank]) - ghost_mid.begin();
  426. for (Long i = 0; i < Nsplit; i++) {
  427. new_mid[i] = ghost_mid[i];
  428. }
  429. for (Long i = 0; i < end_idx - start_idx; i++) {
  430. new_mid[Nsplit + i] = node_mid[start_idx + i];
  431. }
  432. for (Long i = Nsplit; i < ghost_mid.Dim(); i++) {
  433. new_mid[end_idx - start_idx + i] = ghost_mid[i];
  434. }
  435. node_mid.Swap(new_mid);
  436. }
  437. }
  438. { // Set node_mid, node_attr
  439. Morton<DIM> m0 = (rank ? mins[rank] : Morton<DIM>() );
  440. Morton<DIM> m1 = (rank+1<np ? mins[rank+1] : Morton<DIM>().Next());
  441. Long Nnodes = node_mid.Dim();
  442. node_attr.ReInit(Nnodes);
  443. for (Long i = 0; i < Nnodes; i++) {
  444. node_attr[i].Leaf = !(i+1<Nnodes && node_mid[i].isAncestor(node_mid[i+1]));
  445. node_attr[i].Ghost = (node_mid[i] < m0 || node_mid[i] >= m1);
  446. }
  447. }
  448. { // Set node_lst
  449. static constexpr Integer MAX_CHILD = (1u << DIM);
  450. static constexpr Integer MAX_NBRS = sctl::pow<DIM,Integer>(3);
  451. Long Nnodes = node_mid.Dim();
  452. node_lst.ReInit(Nnodes);
  453. Vector<Long> ancestors(Morton<DIM>::MAX_DEPTH);
  454. Vector<Long> child_cnt(Morton<DIM>::MAX_DEPTH);
  455. #pragma omp parallel for schedule(static)
  456. for (Long i = 0; i < Nnodes; i++) {
  457. node_lst[i].p2n = -1;
  458. node_lst[i].parent = -1;
  459. for (Integer j = 0; j < MAX_CHILD; j++) node_lst[i].child[j] = -1;
  460. for (Integer j = 0; j < MAX_NBRS; j++) node_lst[i].nbr[j] = -1;
  461. }
  462. for (Long i = 0; i < Nnodes; i++) { // Set parent_lst, child_lst_
  463. Integer depth = node_mid[i].Depth();
  464. ancestors[depth] = i;
  465. child_cnt[depth] = 0;
  466. if (depth) {
  467. Long p = ancestors[depth-1];
  468. Long& c = child_cnt[depth-1];
  469. node_lst[i].parent = p;
  470. node_lst[p].child[c] = i;
  471. node_lst[p].p2n = c;
  472. c++;
  473. }
  474. }
  475. // TODO: add nbr-list
  476. }
  477. if (0) { // Check tree
  478. Morton<DIM> m0;
  479. SCTL_ASSERT(node_mid.Dim() && m0 == node_mid[0]);
  480. for (Long i = 1; i < node_mid.Dim(); i++) {
  481. const auto& m = node_mid[i];
  482. if (m0.isAncestor(m)) m0 = m0.Ancestor(m0.Depth()+1);
  483. else m0 = m0.Next();
  484. SCTL_ASSERT(m0 == m);
  485. }
  486. SCTL_ASSERT(m0.Next() == Morton<DIM>().Next());
  487. }
  488. { // Update node_data, node_cnt
  489. Long start_idx, end_idx;
  490. { // Set start_idx, end_idx
  491. start_idx = std::lower_bound(node_mid.begin(), node_mid.end(), mins[rank]) - node_mid.begin();
  492. end_idx = std::lower_bound(node_mid.begin(), node_mid.end(), (rank+1==np ? Morton<DIM>().Next() : mins[rank+1])) - node_mid.begin();
  493. }
  494. comm.PartitionS(node_mid_orig, mins[comm.Rank()]);
  495. Vector<Long> new_cnt_range0(node_mid.Dim()), new_cnt_range1(node_mid.Dim());
  496. { // Set new_cnt_range0, new_cnt_range1
  497. for (Long i = 0; i < start_idx; i++) {
  498. new_cnt_range0[i] = 0;
  499. new_cnt_range1[i] = 0;
  500. }
  501. for (Long i = start_idx; i < end_idx; i++) {
  502. auto m0 = (node_mid[i+0]);
  503. auto m1 = (i+1==end_idx ? Morton<DIM>().Next() : (node_mid[i+1]));
  504. new_cnt_range0[i] = std::lower_bound(node_mid_orig.begin(), node_mid_orig.begin() + node_mid_orig.Dim(), m0) - node_mid_orig.begin();
  505. new_cnt_range1[i] = std::lower_bound(node_mid_orig.begin(), node_mid_orig.begin() + node_mid_orig.Dim(), m1) - node_mid_orig.begin();
  506. }
  507. for (Long i = end_idx; i < node_mid.Dim(); i++) {
  508. new_cnt_range0[i] = 0;
  509. new_cnt_range1[i] = 0;
  510. }
  511. }
  512. Vector<Long> cnt_tmp;
  513. Vector<char> data_tmp;
  514. for (const auto& pair : node_data) {
  515. const std::string& data_name = pair.first;
  516. Long dof;
  517. Iterator<Vector<char>> data_;
  518. Iterator<Vector<Long>> cnt_;
  519. GetData_(data_, cnt_, data_name);
  520. { // Set dof
  521. StaticArray<Long,2> Nl, Ng;
  522. Nl[0] = data_->Dim();
  523. Nl[1] = omp_par::reduce(cnt_->begin(), cnt_->Dim());
  524. comm.Allreduce((ConstIterator<Long>)Nl, (Iterator<Long>)Ng, 2, Comm::CommOp::SUM);
  525. dof = Ng[0] / std::max<Long>(Ng[1],1);
  526. SCTL_ASSERT(Nl[0] == Nl[1] * dof);
  527. SCTL_ASSERT(Ng[0] == Ng[1] * dof);
  528. }
  529. Long data_dsp = omp_par::reduce(cnt_->begin(), start_idx_orig);
  530. Long data_cnt = omp_par::reduce(cnt_->begin() + start_idx_orig, end_idx_orig - start_idx_orig);
  531. data_tmp.ReInit(data_cnt * dof, data_->begin() + data_dsp * dof, true);
  532. cnt_tmp.ReInit(end_idx_orig - start_idx_orig, cnt_->begin() + start_idx_orig, true);
  533. comm.PartitionN(cnt_tmp, node_mid_orig.Dim());
  534. cnt_->ReInit(node_mid.Dim());
  535. for (Long i = 0; i < node_mid.Dim(); i++) {
  536. Long sum = 0;
  537. Long j0 = new_cnt_range0[i];
  538. Long j1 = new_cnt_range1[i];
  539. for (Long j = j0; j < j1; j++) sum += cnt_tmp[j];
  540. cnt_[0][i] = sum;
  541. }
  542. SCTL_ASSERT(omp_par::reduce(cnt_->begin(), cnt_->Dim()) == omp_par::reduce(cnt_tmp.begin(), cnt_tmp.Dim()));
  543. Long Ndata = omp_par::reduce(cnt_->begin(), cnt_->Dim()) * dof;
  544. comm.PartitionN(data_tmp, Ndata);
  545. SCTL_ASSERT(data_tmp.Dim() == Ndata);
  546. data_->Swap(data_tmp);
  547. }
  548. }
  549. }
  550. template <class ValueType> void AddData(const std::string& name, const Vector<ValueType>& data, const Vector<Long>& cnt) {
  551. Long dof;
  552. { // Check dof
  553. StaticArray<Long,2> Nl, Ng;
  554. Nl[0] = data.Dim();
  555. Nl[1] = omp_par::reduce(cnt.begin(), cnt.Dim());
  556. comm.Allreduce((ConstIterator<Long>)Nl, (Iterator<Long>)Ng, 2, Comm::CommOp::SUM);
  557. dof = Ng[0] / std::max<Long>(Ng[1],1);
  558. SCTL_ASSERT(Nl[0] == Nl[1] * dof);
  559. SCTL_ASSERT(Ng[0] == Ng[1] * dof);
  560. }
  561. if (dof) SCTL_ASSERT(cnt.Dim() == node_mid.Dim());
  562. SCTL_ASSERT(node_data.find(name) == node_data.end());
  563. node_data[name].ReInit(data.Dim()*sizeof(ValueType), (Iterator<char>)data.begin(), true);
  564. node_cnt [name] = cnt;
  565. }
  566. template <class ValueType> void GetData(Vector<ValueType>& data, Vector<Long>& cnt, const std::string& name) const {
  567. const auto data_ = node_data.find(name);
  568. const auto cnt_ = node_cnt.find(name);
  569. SCTL_ASSERT(data_ != node_data.end());
  570. SCTL_ASSERT( cnt_ != node_cnt .end());
  571. data.ReInit(data_->second.Dim()/sizeof(ValueType), (Iterator<ValueType>)data_->second.begin(), false);
  572. SCTL_ASSERT(data.Dim()*(Long)sizeof(ValueType) == data_->second.Dim());
  573. cnt .ReInit( cnt_->second.Dim(), (Iterator<Long>)cnt_->second.begin(), false);
  574. }
  575. template <class ValueType> void ReduceBroadcast(const std::string& name) {
  576. Integer np = comm.Size();
  577. Integer rank = comm.Rank();
  578. Vector<Long> dsp;
  579. Iterator<Vector<char>> data_;
  580. Iterator<Vector<Long>> cnt_;
  581. GetData_(data_, cnt_, name);
  582. Vector<ValueType> data(data_->Dim()/sizeof(ValueType), (Iterator<ValueType>)data_->begin(), false);
  583. Vector<Long>& cnt = *cnt_;
  584. scan(dsp, cnt);
  585. Long dof;
  586. { // Set dof
  587. StaticArray<Long,2> Nl, Ng;
  588. Nl[0] = data.Dim();
  589. Nl[1] = omp_par::reduce(cnt.begin(), cnt.Dim());
  590. comm.Allreduce((ConstIterator<Long>)Nl, (Iterator<Long>)Ng, 2, Comm::CommOp::SUM);
  591. dof = Ng[0] / std::max<Long>(Ng[1],1);
  592. SCTL_ASSERT(Nl[0] == Nl[1] * dof);
  593. SCTL_ASSERT(Ng[0] == Ng[1] * dof);
  594. }
  595. { // Reduce
  596. Vector<Morton<DIM>> send_mid, recv_mid;
  597. Vector<Long> send_node_cnt(np), send_node_dsp(np);
  598. Vector<Long> recv_node_cnt(np), recv_node_dsp(np);
  599. { // Set send_mid, send_node_cnt, send_node_dsp, recv_mid, recv_node_cnt, recv_node_dsp
  600. { // Set send_mid
  601. Morton<DIM> m0 = mins[rank];
  602. for (Integer d = 0; d < m0.Depth(); d++) {
  603. send_mid.PushBack(m0.Ancestor(d));
  604. }
  605. }
  606. for (Integer p = 0; p < np; p++) {
  607. Long start_idx = std::lower_bound(send_mid.begin(), send_mid.end(), mins[p]) - send_mid.begin();
  608. Long end_idx = std::lower_bound(send_mid.begin(), send_mid.end(), (p+1==np ? Morton<DIM>().Next() : mins[p+1])) - send_mid.begin();
  609. send_node_cnt[p] = end_idx - start_idx;
  610. }
  611. scan(send_node_dsp, send_node_cnt);
  612. SCTL_ASSERT(send_node_dsp[np-1]+send_node_cnt[np-1] == send_mid.Dim());
  613. comm.Alltoall(send_node_cnt.begin(), 1, recv_node_cnt.begin(), 1);
  614. scan(recv_node_dsp, recv_node_cnt);
  615. recv_mid.ReInit(recv_node_dsp[np-1] + recv_node_cnt[np-1]);
  616. comm.Alltoallv(send_mid.begin(), send_node_cnt.begin(), send_node_dsp.begin(), recv_mid.begin(), recv_node_cnt.begin(), recv_node_dsp.begin());
  617. }
  618. Vector<Long> send_data_cnt, send_data_dsp;
  619. Vector<Long> recv_data_cnt, recv_data_dsp;
  620. { // Set send_data_cnt, send_data_dsp
  621. send_data_cnt.ReInit(send_mid.Dim());
  622. recv_data_cnt.ReInit(recv_mid.Dim());
  623. for (Long i = 0; i < send_mid.Dim(); i++) {
  624. Long idx = std::lower_bound(node_mid.begin(), node_mid.end(), send_mid[i]) - node_mid.begin();
  625. SCTL_ASSERT(send_mid[i] == node_mid[idx]);
  626. send_data_cnt[i] = cnt[idx];
  627. }
  628. scan(send_data_dsp, send_data_cnt);
  629. comm.Alltoallv(send_data_cnt.begin(), send_node_cnt.begin(), send_node_dsp.begin(), recv_data_cnt.begin(), recv_node_cnt.begin(), recv_node_dsp.begin());
  630. scan(recv_data_dsp, recv_data_cnt);
  631. }
  632. Vector<ValueType> send_buff, recv_buff;
  633. Vector<Long> send_buff_cnt(np), send_buff_dsp(np);
  634. Vector<Long> recv_buff_cnt(np), recv_buff_dsp(np);
  635. { // Set send_buff, send_buff_cnt, send_buff_dsp, recv_buff, recv_buff_cnt, recv_buff_dsp
  636. Long N_send_nodes = send_mid.Dim();
  637. Long N_recv_nodes = recv_mid.Dim();
  638. if (N_send_nodes) send_buff.ReInit((send_data_dsp[N_send_nodes-1] + send_data_cnt[N_send_nodes-1]) * dof);
  639. if (N_recv_nodes) recv_buff.ReInit((recv_data_dsp[N_recv_nodes-1] + recv_data_cnt[N_recv_nodes-1]) * dof);
  640. for (Long i = 0; i < N_send_nodes; i++) {
  641. Long idx = std::lower_bound(node_mid.begin(), node_mid.end(), send_mid[i]) - node_mid.begin();
  642. SCTL_ASSERT(send_mid[i] == node_mid[idx]);
  643. Long dsp_ = dsp[idx] * dof;
  644. Long cnt_ = cnt[idx] * dof;
  645. Long send_data_dsp_ = send_data_dsp[i] * dof;
  646. Long send_data_cnt_ = send_data_cnt[i] * dof;
  647. SCTL_ASSERT(send_data_cnt_ == cnt_);
  648. for (Long j = 0; j < cnt_; j++) {
  649. send_buff[send_data_dsp_+j] = data[dsp_+j];
  650. }
  651. }
  652. for (Integer p = 0; p < np; p++) {
  653. Long send_buff_cnt_ = 0;
  654. Long recv_buff_cnt_ = 0;
  655. for (Long i = 0; i < send_node_cnt[p]; i++) {
  656. send_buff_cnt_ += send_data_cnt[send_node_dsp[p]+i];
  657. }
  658. for (Long i = 0; i < recv_node_cnt[p]; i++) {
  659. recv_buff_cnt_ += recv_data_cnt[recv_node_dsp[p]+i];
  660. }
  661. send_buff_cnt[p] = send_buff_cnt_ * dof;
  662. recv_buff_cnt[p] = recv_buff_cnt_ * dof;
  663. }
  664. scan(send_buff_dsp, send_buff_cnt);
  665. scan(recv_buff_dsp, recv_buff_cnt);
  666. comm.Alltoallv(send_buff.begin(), send_buff_cnt.begin(), send_buff_dsp.begin(), recv_buff.begin(), recv_buff_cnt.begin(), recv_buff_dsp.begin());
  667. }
  668. { // Reduction
  669. Long N_recv_nodes = recv_mid.Dim();
  670. for (Long i = 0; i < N_recv_nodes; i++) {
  671. Long idx = std::lower_bound(node_mid.begin(), node_mid.end(), recv_mid[i]) - node_mid.begin();
  672. Long dsp_ = dsp[idx] * dof;
  673. Long cnt_ = cnt[idx] * dof;
  674. Long recv_data_dsp_ = recv_data_dsp[i] * dof;
  675. Long recv_data_cnt_ = recv_data_cnt[i] * dof;
  676. if (recv_data_cnt_ == cnt_) {
  677. for (Long j = 0; j < cnt_; j++) {
  678. data[dsp_+j] += recv_buff[recv_data_dsp_+j];
  679. }
  680. }
  681. }
  682. }
  683. }
  684. { // Broadcast
  685. const Vector<Morton<DIM>>& send_mid = user_mid;
  686. const Vector<Long>& send_node_cnt = user_cnt;
  687. Vector<Long> send_node_dsp(np);
  688. { // Set send_dsp
  689. SCTL_ASSERT(send_node_cnt.Dim() == np);
  690. scan(send_node_dsp, send_node_cnt);
  691. SCTL_ASSERT(send_node_dsp[np-1] + send_node_cnt[np-1] == send_mid.Dim());
  692. }
  693. Vector<Morton<DIM>> recv_mid;
  694. Vector<Long> recv_node_cnt(np), recv_node_dsp(np);
  695. { // Set recv_mid, recv_node_cnt, recv_node_dsp
  696. comm.Alltoall(send_node_cnt.begin(), 1, recv_node_cnt.begin(), 1);
  697. scan(recv_node_dsp, recv_node_cnt);
  698. recv_mid.ReInit(recv_node_dsp[np-1] + recv_node_cnt[np-1]);
  699. comm.Alltoallv(send_mid.begin(), send_node_cnt.begin(), send_node_dsp.begin(), recv_mid.begin(), recv_node_cnt.begin(), recv_node_dsp.begin());
  700. }
  701. Vector<Long> send_data_cnt, send_data_dsp;
  702. Vector<Long> recv_data_cnt, recv_data_dsp;
  703. { // Set send_data_cnt, send_data_dsp
  704. send_data_cnt.ReInit(send_mid.Dim());
  705. recv_data_cnt.ReInit(recv_mid.Dim());
  706. for (Long i = 0; i < send_mid.Dim(); i++) {
  707. Long idx = std::lower_bound(node_mid.begin(), node_mid.end(), send_mid[i]) - node_mid.begin();
  708. SCTL_ASSERT(send_mid[i] == node_mid[idx]);
  709. send_data_cnt[i] = cnt[idx];
  710. }
  711. scan(send_data_dsp, send_data_cnt);
  712. comm.Alltoallv(send_data_cnt.begin(), send_node_cnt.begin(), send_node_dsp.begin(), recv_data_cnt.begin(), recv_node_cnt.begin(), recv_node_dsp.begin());
  713. scan(recv_data_dsp, recv_data_cnt);
  714. }
  715. Vector<ValueType> send_buff, recv_buff;
  716. Vector<Long> send_buff_cnt(np), send_buff_dsp(np);
  717. Vector<Long> recv_buff_cnt(np), recv_buff_dsp(np);
  718. { // Set send_buff, send_buff_cnt, send_buff_dsp, recv_buff, recv_buff_cnt, recv_buff_dsp
  719. Long N_send_nodes = send_mid.Dim();
  720. Long N_recv_nodes = recv_mid.Dim();
  721. if (N_send_nodes) send_buff.ReInit((send_data_dsp[N_send_nodes-1] + send_data_cnt[N_send_nodes-1]) * dof);
  722. if (N_recv_nodes) recv_buff.ReInit((recv_data_dsp[N_recv_nodes-1] + recv_data_cnt[N_recv_nodes-1]) * dof);
  723. for (Long i = 0; i < N_send_nodes; i++) {
  724. Long idx = std::lower_bound(node_mid.begin(), node_mid.end(), send_mid[i]) - node_mid.begin();
  725. SCTL_ASSERT(send_mid[i] == node_mid[idx]);
  726. Long dsp_ = dsp[idx] * dof;
  727. Long cnt_ = cnt[idx] * dof;
  728. Long send_data_dsp_ = send_data_dsp[i] * dof;
  729. Long send_data_cnt_ = send_data_cnt[i] * dof;
  730. SCTL_ASSERT(send_data_cnt_ == cnt_);
  731. for (Long j = 0; j < cnt_; j++) {
  732. send_buff[send_data_dsp_+j] = data[dsp_+j];
  733. }
  734. }
  735. for (Integer p = 0; p < np; p++) {
  736. Long send_buff_cnt_ = 0;
  737. Long recv_buff_cnt_ = 0;
  738. for (Long i = 0; i < send_node_cnt[p]; i++) {
  739. send_buff_cnt_ += send_data_cnt[send_node_dsp[p]+i];
  740. }
  741. for (Long i = 0; i < recv_node_cnt[p]; i++) {
  742. recv_buff_cnt_ += recv_data_cnt[recv_node_dsp[p]+i];
  743. }
  744. send_buff_cnt[p] = send_buff_cnt_ * dof;
  745. recv_buff_cnt[p] = recv_buff_cnt_ * dof;
  746. }
  747. scan(send_buff_dsp, send_buff_cnt);
  748. scan(recv_buff_dsp, recv_buff_cnt);
  749. comm.Alltoallv(send_buff.begin(), send_buff_cnt.begin(), send_buff_dsp.begin(), recv_buff.begin(), recv_buff_cnt.begin(), recv_buff_dsp.begin());
  750. }
  751. Long start_idx, end_idx;
  752. { // Set start_idx, end_idx
  753. start_idx = std::lower_bound(node_mid.begin(), node_mid.end(), mins[rank]) - node_mid.begin();
  754. end_idx = std::lower_bound(node_mid.begin(), node_mid.end(), (rank+1==np ? Morton<DIM>().Next() : mins[rank+1])) - node_mid.begin();
  755. SCTL_ASSERT(0 <= start_idx);
  756. SCTL_ASSERT(start_idx < end_idx);
  757. SCTL_ASSERT(end_idx <= node_mid.Dim());
  758. }
  759. { // Update data <-- data + recv_buff
  760. Long Nsplit = std::lower_bound(recv_mid.begin(), recv_mid.end(), mins[rank]) - recv_mid.begin();
  761. SCTL_ASSERT(recv_mid.Dim()-Nsplit == node_mid.Dim()-end_idx);
  762. SCTL_ASSERT(Nsplit == start_idx);
  763. Long N0 = (start_idx ? dsp[start_idx-1] + cnt[start_idx-1] : 0) * dof;
  764. Long N1 = (end_idx ? dsp[end_idx-1] + cnt[end_idx-1] : 0) * dof;
  765. Long Ns = (Nsplit ? recv_data_dsp[Nsplit-1] + recv_data_cnt[Nsplit-1] : 0) * dof;
  766. if (N0 != Ns || recv_buff.Dim() != N0+data.Dim()-N1) { // resize data and preserve non-ghost data
  767. Vector<char> data_new((recv_buff.Dim() + N1-N0) * sizeof(ValueType));
  768. memcopy(data_new.begin() + Ns * sizeof(ValueType), data_->begin() + N0 * sizeof(ValueType), (N1-N0) * sizeof(ValueType));
  769. data_->Swap(data_new);
  770. data.ReInit(data_->Dim()/sizeof(ValueType), (Iterator<ValueType>)data_->begin(), false);
  771. }
  772. memcopy(cnt.begin(), recv_data_cnt.begin(), start_idx);
  773. memcopy(cnt.begin()+end_idx, recv_data_cnt.begin()+Nsplit, node_mid.Dim()-end_idx);
  774. memcopy(data.begin(), recv_buff.begin(), Ns);
  775. memcopy(data.begin()+data.Dim()+Ns-recv_buff.Dim(), recv_buff.begin()+Ns, recv_buff.Dim()-Ns);
  776. }
  777. }
  778. }
  779. void DeleteData(const std::string& name) {
  780. SCTL_ASSERT(node_data.find(name) != node_data.end());
  781. SCTL_ASSERT(node_cnt .find(name) != node_cnt .end());
  782. node_data.erase(name);
  783. node_cnt .erase(name);
  784. }
  785. void WriteTreeVTK(std::string fname, bool show_ghost = false) const {
  786. typedef typename VTUData::VTKReal VTKReal;
  787. VTUData vtu_data;
  788. if (DIM <= 3) { // Set vtu data
  789. static const Integer Ncorner = (1u << DIM);
  790. Vector<VTKReal> &coord = vtu_data.coord;
  791. //Vector<VTKReal> &value = vtu_data.value;
  792. Vector<int32_t> &connect = vtu_data.connect;
  793. Vector<int32_t> &offset = vtu_data.offset;
  794. Vector<uint8_t> &types = vtu_data.types;
  795. StaticArray<VTKReal, DIM> c;
  796. Long point_cnt = coord.Dim() / 3;
  797. Long connect_cnt = connect.Dim();
  798. for (Long nid = 0; nid < node_mid.Dim(); nid++) {
  799. const Morton<DIM> &mid = node_mid[nid];
  800. const NodeAttr &attr = node_attr[nid];
  801. if (!show_ghost && attr.Ghost) continue;
  802. if (!attr.Leaf) continue;
  803. mid.Coord((Iterator<VTKReal>)c);
  804. VTKReal s = sctl::pow<VTKReal>(0.5, mid.Depth());
  805. for (Integer j = 0; j < Ncorner; j++) {
  806. for (Integer i = 0; i < DIM; i++) coord.PushBack(c[i] + (j & (1u << i) ? 1 : 0) * s);
  807. for (Integer i = DIM; i < 3; i++) coord.PushBack(0);
  808. connect.PushBack(point_cnt);
  809. connect_cnt++;
  810. point_cnt++;
  811. }
  812. offset.PushBack(connect_cnt);
  813. if (DIM == 2)
  814. types.PushBack(8);
  815. else if (DIM == 3)
  816. types.PushBack(11);
  817. else
  818. types.PushBack(4);
  819. }
  820. }
  821. vtu_data.WriteVTK(fname, comm);
  822. }
  823. protected:
  824. void GetData_(Iterator<Vector<char>>& data, Iterator<Vector<Long>>& cnt, const std::string& name) {
  825. auto data_ = node_data.find(name);
  826. const auto cnt_ = node_cnt.find(name);
  827. SCTL_ASSERT(data_ != node_data.end());
  828. SCTL_ASSERT( cnt_ != node_cnt .end());
  829. data = Ptr2Itr<Vector<char>>(&data_->second,1);
  830. cnt = Ptr2Itr<Vector<Long>>(& cnt_->second,1);
  831. }
  832. static void scan(Vector<Long>& dsp, const Vector<Long>& cnt) {
  833. dsp.ReInit(cnt.Dim());
  834. if (cnt.Dim()) dsp[0] = 0;
  835. omp_par::scan(cnt.begin(), dsp.begin(), cnt.Dim());
  836. }
  837. template <typename A, typename B> struct SortPair {
  838. int operator<(const SortPair<A, B> &p1) const { return key < p1.key; }
  839. A key;
  840. B data;
  841. };
  842. private:
  843. Vector<Morton<DIM>> mins;
  844. Vector<Morton<DIM>> node_mid;
  845. Vector<NodeAttr> node_attr;
  846. Vector<NodeLists> node_lst;
  847. std::map<std::string, Vector<char>> node_data;
  848. std::map<std::string, Vector<Long>> node_cnt;
  849. Vector<Morton<DIM>> user_mid;
  850. Vector<Long> user_cnt;
  851. Comm comm;
  852. };
  853. template <class Real, Integer DIM, class BaseTree = Tree<DIM>> class PtTree : public BaseTree {
  854. public:
  855. PtTree(const Comm& comm = Comm::Self()) : BaseTree(comm) {}
  856. ~PtTree() {
  857. #ifdef SCTL_MEMDEBUG
  858. for (auto& pair : data_pt_name) {
  859. Vector<Real> data;
  860. Vector<Long> cnt;
  861. this->GetData(data, cnt, pair.second);
  862. SCTL_ASSERT(scatter_idx.find(pair.second) != scatter_idx.end());
  863. }
  864. #endif
  865. }
  866. void UpdateRefinement(const Vector<Real>& coord, Long M = 1, bool balance21 = 0, bool periodic = 0) {
  867. const auto& comm = this->GetComm();
  868. BaseTree::UpdateRefinement(coord, M, balance21, periodic);
  869. Long start_node_idx, end_node_idx;
  870. { // Set start_node_idx, end_node_idx
  871. const auto& mins = this->GetPartitionMID();
  872. const auto& node_mid = this->GetNodeMID();
  873. Integer np = comm.Size();
  874. Integer rank = comm.Rank();
  875. start_node_idx = std::lower_bound(node_mid.begin(), node_mid.end(), mins[rank]) - node_mid.begin();
  876. end_node_idx = std::lower_bound(node_mid.begin(), node_mid.end(), (rank+1==np ? Morton<DIM>().Next() : mins[rank+1])) - node_mid.begin();
  877. }
  878. const auto& mins = this->GetPartitionMID();
  879. const auto& node_mid = this->GetNodeMID();
  880. for (const auto& pair : pt_mid) {
  881. const auto& pt_name = pair.first;
  882. auto& pt_mid_ = pt_mid[pt_name];
  883. auto& scatter_idx_ = scatter_idx[pt_name];
  884. comm.PartitionS(pt_mid_, mins[comm.Rank()]);
  885. comm.PartitionN(scatter_idx_, pt_mid_.Dim());
  886. Vector<Long> pt_cnt(node_mid.Dim());
  887. for (Long i = 0; i < node_mid.Dim(); i++) { // Set pt_cnt
  888. Long start = std::lower_bound(pt_mid_.begin(), pt_mid_.end(), node_mid[i]) - pt_mid_.begin();
  889. Long end = std::lower_bound(pt_mid_.begin(), pt_mid_.end(), (i+1==node_mid.Dim() ? Morton<DIM>().Next() : node_mid[i+1])) - pt_mid_.begin();
  890. if (i == 0) SCTL_ASSERT(start == 0);
  891. if (i+1 == node_mid.Dim()) SCTL_ASSERT(end == pt_mid_.Dim());
  892. pt_cnt[i] = end - start;
  893. }
  894. for (const auto& pair : data_pt_name) {
  895. if (pair.second == pt_name) {
  896. const auto& data_name = pair.first;
  897. Iterator<Vector<char>> data;
  898. Iterator<Vector<Long>> cnt;
  899. this->GetData_(data, cnt, data_name);
  900. { // Update data
  901. Long dof = 0;
  902. { // Set dof
  903. StaticArray<Long,2> Nl = {0, 0}, Ng;
  904. Nl[0] = data->Dim();
  905. for (Long i = 0; i < cnt->Dim(); i++) Nl[1] += cnt[0][i];
  906. comm.Allreduce((ConstIterator<Long>)Nl, (Iterator<Long>)Ng, 2, Comm::CommOp::SUM);
  907. dof = Ng[0] / std::max<Long>(Ng[1],1);
  908. }
  909. Long offset = 0, count = 0;
  910. SCTL_ASSERT(0 <= start_node_idx);
  911. SCTL_ASSERT(start_node_idx <= end_node_idx);
  912. SCTL_ASSERT(end_node_idx <= cnt->Dim());
  913. for (Long i = 0; i < start_node_idx; i++) offset += cnt[0][i];
  914. for (Long i = start_node_idx; i < end_node_idx; i++) count += cnt[0][i];
  915. offset *= dof;
  916. count *= dof;
  917. Vector<char> data_(count, data->begin() + offset);
  918. comm.PartitionN(data_, pt_mid_.Dim());
  919. data->Swap(data_);
  920. }
  921. cnt[0] = pt_cnt;
  922. }
  923. }
  924. }
  925. }
  926. void AddParticles(const std::string& name, const Vector<Real>& coord) {
  927. const auto& mins = this->GetPartitionMID();
  928. const auto& node_mid = this->GetNodeMID();
  929. const auto& comm = this->GetComm();
  930. SCTL_ASSERT(scatter_idx.find(name) == scatter_idx.end());
  931. Vector<Long>& scatter_idx_ = scatter_idx[name];
  932. Long N = coord.Dim() / DIM;
  933. SCTL_ASSERT(coord.Dim() == N * DIM);
  934. Nlocal[name] = N;
  935. Vector<Morton<DIM>>& pt_mid_ = pt_mid[name];
  936. if (pt_mid_.Dim() != N) pt_mid_.ReInit(N);
  937. for (Long i = 0; i < N; i++) {
  938. pt_mid_[i] = Morton<DIM>(coord.begin() + i*DIM);
  939. }
  940. comm.SortScatterIndex(pt_mid_, scatter_idx_, &mins[comm.Rank()]);
  941. comm.ScatterForward(pt_mid_, scatter_idx_);
  942. AddParticleData(name, name, coord);
  943. { // Set node_cnt
  944. Iterator<Vector<char>> data_;
  945. Iterator<Vector<Long>> cnt_;
  946. this->GetData_(data_,cnt_,name);
  947. cnt_[0].ReInit(node_mid.Dim());
  948. for (Long i = 0; i < node_mid.Dim(); i++) {
  949. Long start = std::lower_bound(pt_mid_.begin(), pt_mid_.end(), node_mid[i]) - pt_mid_.begin();
  950. Long end = std::lower_bound(pt_mid_.begin(), pt_mid_.end(), (i+1==node_mid.Dim() ? Morton<DIM>().Next() : node_mid[i+1])) - pt_mid_.begin();
  951. if (i == 0) SCTL_ASSERT(start == 0);
  952. if (i+1 == node_mid.Dim()) SCTL_ASSERT(end == pt_mid_.Dim());
  953. cnt_[0][i] = end - start;
  954. }
  955. }
  956. }
  957. void AddParticleData(const std::string& data_name, const std::string& particle_name, const Vector<Real>& data) {
  958. SCTL_ASSERT(scatter_idx.find(particle_name) != scatter_idx.end());
  959. SCTL_ASSERT(data_pt_name.find(data_name) == data_pt_name.end());
  960. data_pt_name[data_name] = particle_name;
  961. Iterator<Vector<char>> data_;
  962. Iterator<Vector<Long>> cnt_;
  963. this->AddData(data_name, Vector<Real>(), Vector<Long>());
  964. this->GetData_(data_,cnt_,data_name);
  965. { // Set data_[0]
  966. data_[0].ReInit(data.Dim()*sizeof(Real), (Iterator<char>)data.begin(), true);
  967. this->GetComm().ScatterForward(data_[0], scatter_idx[particle_name]);
  968. }
  969. if (data_name != particle_name) { // Set cnt_[0]
  970. Vector<Real> pt_coord;
  971. Vector<Long> pt_cnt;
  972. this->GetData(pt_coord, pt_cnt, particle_name);
  973. cnt_[0] = pt_cnt;
  974. }
  975. }
  976. void GetParticleData(Vector<Real>& data, const std::string& data_name) const {
  977. SCTL_ASSERT(data_pt_name.find(data_name) != data_pt_name.end());
  978. const std::string& particle_name = data_pt_name.find(data_name)->second;
  979. SCTL_ASSERT(scatter_idx.find(particle_name) != scatter_idx.end());
  980. const auto& scatter_idx_ = scatter_idx.find(particle_name)->second;
  981. const Long Nlocal_ = Nlocal.find(particle_name)->second;
  982. const auto& mins = this->GetPartitionMID();
  983. const auto& node_mid = this->GetNodeMID();
  984. const auto& comm = this->GetComm();
  985. Long dof;
  986. Vector<Long> dsp;
  987. Vector<Long> cnt_;
  988. Vector<Real> data_;
  989. this->GetData(data_, cnt_, data_name);
  990. SCTL_ASSERT(cnt_.Dim() == node_mid.Dim());
  991. BaseTree::scan(dsp, cnt_);
  992. { // Set dof
  993. Long Nn = node_mid.Dim();
  994. StaticArray<Long,2> Ng, Nl = {data_.Dim(), dsp[Nn-1]+cnt_[Nn-1]};
  995. comm.Allreduce((ConstIterator<Long>)Nl, (Iterator<Long>)Ng, 2, Comm::CommOp::SUM);
  996. dof = Ng[0] / std::max<Long>(Ng[1],1);
  997. }
  998. { // Set data
  999. Integer np = comm.Size();
  1000. Integer rank = comm.Rank();
  1001. Long N0 = std::lower_bound(node_mid.begin(), node_mid.end(), mins[rank]) - node_mid.begin();
  1002. Long N1 = std::lower_bound(node_mid.begin(), node_mid.end(), (rank+1==np ? Morton<DIM>().Next() : mins[rank+1])) - node_mid.begin();
  1003. Long start = dsp[N0] * dof;
  1004. Long end = (N1<dsp.Dim() ? dsp[N1] : dsp[N1-1]+cnt_[N1-1]) * dof;
  1005. data.ReInit(end-start, data_.begin()+start, true);
  1006. comm.ScatterReverse(data, scatter_idx_, Nlocal_ * dof);
  1007. }
  1008. }
  1009. void DeleteParticleData(const std::string& data_name) {
  1010. SCTL_ASSERT(data_pt_name.find(data_name) != data_pt_name.end());
  1011. auto particle_name = data_pt_name[data_name];
  1012. if (data_name == particle_name) {
  1013. std::vector<std::string> data_name_lst;
  1014. for (auto& pair : data_pt_name) {
  1015. if (pair.second == particle_name) {
  1016. data_name_lst.push_back(pair.first);
  1017. }
  1018. }
  1019. for (auto x : data_name_lst) {
  1020. if (x != particle_name) {
  1021. DeleteParticleData(x);
  1022. }
  1023. }
  1024. Nlocal.erase(particle_name);
  1025. }
  1026. this->DeleteData(data_name);
  1027. data_pt_name.erase(data_name);
  1028. }
  1029. void WriteParticleVTK(std::string fname, std::string data_name, bool show_ghost = false) const {
  1030. typedef typename VTUData::VTKReal VTKReal;
  1031. const auto& node_mid = this->GetNodeMID();
  1032. const auto& node_attr = this->GetNodeAttr();
  1033. VTUData vtu_data;
  1034. if (DIM <= 3) { // Set vtu data
  1035. SCTL_ASSERT(data_pt_name.find(data_name) != data_pt_name.end());
  1036. std::string particle_name = data_pt_name.find(data_name)->second;
  1037. Vector<Real> pt_coord;
  1038. Vector<Real> pt_value;
  1039. Vector<Long> pt_cnt;
  1040. Vector<Long> pt_dsp;
  1041. Long value_dof = 0;
  1042. { // Set pt_coord, pt_cnt, pt_dsp
  1043. this->GetData(pt_coord, pt_cnt, particle_name);
  1044. Tree<DIM>::scan(pt_dsp, pt_cnt);
  1045. }
  1046. if (particle_name != data_name) { // Set pt_value, value_dof
  1047. Vector<Long> pt_cnt;
  1048. this->GetData(pt_value, pt_cnt, data_name);
  1049. Long Npt = omp_par::reduce(pt_cnt.begin(), pt_cnt.Dim());
  1050. value_dof = pt_value.Dim() / std::max<Long>(Npt,1);
  1051. }
  1052. Vector<VTKReal> &coord = vtu_data.coord;
  1053. Vector<VTKReal> &value = vtu_data.value;
  1054. Vector<int32_t> &connect = vtu_data.connect;
  1055. Vector<int32_t> &offset = vtu_data.offset;
  1056. Vector<uint8_t> &types = vtu_data.types;
  1057. Long point_cnt = coord.Dim() / DIM;
  1058. Long connect_cnt = connect.Dim();
  1059. value.ReInit(point_cnt * value_dof);
  1060. value.SetZero();
  1061. SCTL_ASSERT(node_mid.Dim() == node_attr.Dim());
  1062. SCTL_ASSERT(node_mid.Dim() == pt_cnt.Dim());
  1063. for (Long i = 0; i < node_mid.Dim(); i++) {
  1064. if (!show_ghost && node_attr[i].Ghost) continue;
  1065. if (!node_attr[i].Leaf) continue;
  1066. for (Long j = 0; j < pt_cnt[i]; j++) {
  1067. ConstIterator<Real> pt_coord_ = pt_coord.begin() + (pt_dsp[i] + j) * DIM;
  1068. ConstIterator<Real> pt_value_ = (value_dof ? pt_value.begin() + (pt_dsp[i] + j) * value_dof : NullIterator<Real>());
  1069. for (Integer k = 0; k < DIM; k++) coord.PushBack((VTKReal)pt_coord_[k]);
  1070. for (Integer k = DIM; k < 3; k++) coord.PushBack(0);
  1071. for (Integer k = 0; k < value_dof; k++) value.PushBack((VTKReal)pt_value_[k]);
  1072. connect.PushBack(point_cnt);
  1073. connect_cnt++;
  1074. point_cnt++;
  1075. offset.PushBack(connect_cnt);
  1076. types.PushBack(1);
  1077. }
  1078. }
  1079. }
  1080. vtu_data.WriteVTK(fname, this->GetComm());
  1081. }
  1082. private:
  1083. std::map<std::string, Long> Nlocal;
  1084. std::map<std::string, Vector<Morton<DIM>>> pt_mid;
  1085. std::map<std::string, Vector<Long>> scatter_idx;
  1086. std::map<std::string, std::string> data_pt_name;
  1087. };
  1088. }
  1089. #endif //_SCTL_TREE_