NearInteraction.txx 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. template <class Real, int DIM> template <class SrcObj, class TrgObj> void NearInteraction<Real, DIM>::SetupRepartition(const std::vector<SrcObj>& src_vec, const std::vector<TrgObj>& trg_vec) {
  2. sctl::StaticArray<Long, 2> Nloc, Rglb;
  3. { // Set Nloc, Rglb
  4. Nloc[0] = src_vec.size();
  5. Nloc[1] = trg_vec.size();
  6. comm_.Scan<Long>(Nloc, Rglb, 2, sctl::Comm::CommOp::SUM);
  7. Rglb[0] -= Nloc[0];
  8. Rglb[1] -= Nloc[1];
  9. }
  10. sctl::Vector<Real> SRad(Nloc[0]), TRad(Nloc[1]);
  11. sctl::Vector<Real> SCoord(Nloc[0] * DIM), TCoord(Nloc[1] * DIM);
  12. for (sctl::Long i = 0; i < Nloc[0]; i++) { // Set SCoord, SRad
  13. for (sctl::Integer k = 0; k < DIM; k++) {
  14. SCoord[i * DIM + k] = src_vec[i].Coord()[k];
  15. }
  16. SRad[i] = src_vec[i].Rad();
  17. }
  18. for (sctl::Long i = 0; i < Nloc[1]; i++) { // Set TCoord, TRad
  19. for (sctl::Integer k = 0; k < DIM; k++) {
  20. TCoord[i * DIM + k] = trg_vec[i].Coord()[k];
  21. }
  22. TRad[i] = trg_vec[i].Rad();
  23. }
  24. //---------------------------------------------------------------------
  25. struct {
  26. Real L;
  27. sctl::StaticArray<Real, DIM> X;
  28. // Bounding box : [X, X+L)
  29. } BBox;
  30. { // Determine bounding box: BBox.X, BBox.L
  31. sctl::StaticArray<Real, DIM> X0;
  32. sctl::StaticArray<Real, DIM> X1;
  33. { // determine local bounding box
  34. { // Init X0, X1 <-- Broadcast one source or target point to all processors
  35. sctl::Integer src_proc, src_proc_ = (SCoord.Dim() || TCoord.Dim() ? comm_.Rank() : 0);
  36. comm_.Allreduce<sctl::Integer>(sctl::Ptr2ConstItr<sctl::Integer>(&src_proc_,1), sctl::Ptr2Itr<sctl::Integer>(&src_proc,1), 1, sctl::Comm::CommOp::MAX);
  37. if (comm_.Rank() == src_proc) {
  38. sctl::Iterator<Real> X_ = SCoord.begin();
  39. if (TCoord.Dim()) X_ = TCoord.begin();
  40. SCTL_ASSERT(SCoord.Dim() || SCoord.Dim());
  41. comm_.Allreduce<Real>(X_, X0, DIM, sctl::Comm::CommOp::SUM);
  42. } else {
  43. sctl::StaticArray<Real, DIM> X_;
  44. for (sctl::Integer k = 0; k < DIM; k++) X_[k] = 0;
  45. comm_.Allreduce<Real>(X_, X0, DIM, sctl::Comm::CommOp::SUM);
  46. }
  47. for (sctl::Integer k = 0; k < DIM; k++) X1[k] = X0[k];
  48. }
  49. auto determine_bbox = [&](const sctl::Vector<Real>& coord, sctl::Iterator<Real> X0, sctl::Iterator<Real> X1) {
  50. Long N = coord.Dim() / DIM;
  51. for (sctl::Long i = 0; i < N; i++) {
  52. for (sctl::Integer k = 0; k < DIM; k++) {
  53. X0[k] = std::min(X0[k], coord[i * DIM + k]);
  54. X1[k] = std::max(X1[k], coord[i * DIM + k]);
  55. }
  56. }
  57. };
  58. determine_bbox(SCoord, X0, X1);
  59. determine_bbox(TCoord, X0, X1);
  60. }
  61. BBox.L = 0;
  62. auto& X0glb = BBox.X;
  63. sctl::StaticArray<Real, DIM> X1glb;
  64. { // determine global bounding box
  65. comm_.Allreduce<Real>(X0, X0glb, DIM, sctl::Comm::CommOp::MIN);
  66. comm_.Allreduce<Real>(X1, X1glb, DIM, sctl::Comm::CommOp::MAX);
  67. for (sctl::Integer k = 0; k < DIM; k++) { // Set BBox.L = bbox length
  68. BBox.L = std::max(BBox.L, X1glb[k] - X0glb[k]);
  69. }
  70. BBox.L *= 1.01; // so that points are in [0,L) instead of [0,L]
  71. }
  72. }
  73. { // determine tree depth
  74. depth = 0;
  75. Real max_rad_glb, max_rad = 0.0, oct_size = 1.0;
  76. for (sctl::Long i = 0; i < SRad.Dim(); i++) max_rad = std::max(max_rad, SRad[i]);
  77. for (sctl::Long i = 0; i < TRad.Dim(); i++) max_rad = std::max(max_rad, TRad[i]);
  78. comm_.Allreduce(sctl::Ptr2ConstItr<Real>(&max_rad,1), sctl::Ptr2Itr<Real>(&max_rad_glb,1), 1, sctl::Comm::CommOp::MAX);
  79. while (0.5 * oct_size * BBox.L > 2 * max_rad_glb) {
  80. oct_size *= 0.5;
  81. depth++;
  82. }
  83. }
  84. { // Set period_length0 <-- period_length / BBox.L
  85. Real s = sctl::pow<Real>(2, depth);
  86. for (sctl::Integer i = 0; i < DIM; i++) {
  87. period_length0[i] = std::floor((period_length[i] / BBox.L) * s) / s;
  88. if (period_length[i] > 0) SCTL_ASSERT(period_length0[i] > 0);
  89. }
  90. }
  91. sctl::Vector<ObjData> SData(Nloc[0]), TData(Nloc[1]);
  92. { // Set SData, TData
  93. sctl::StaticArray<Real,DIM> s;
  94. for (sctl::Integer d = 0; d < DIM; d++) {
  95. s[d] = 1.0 / BBox.L;
  96. if (period_length[d] > 0) {
  97. s[d] = period_length0[d] / period_length[d];
  98. }
  99. }
  100. sctl::StaticArray<Real, DIM> X;
  101. for (sctl::Long i = 0; i < SData.Dim(); i++) {
  102. SData[i].rad = SRad[i];
  103. for (sctl::Integer k = 0; k < DIM; k++) {
  104. Real XX = SCoord[i * DIM + k];
  105. SData[i].coord[k] = XX;
  106. if (period_length[k] > 0) { // Map XX to unit box
  107. sctl::Long q = (sctl::Long)std::floor(XX / period_length[k]);
  108. XX = (XX - q * period_length[k]) * s[k];
  109. } else {
  110. XX = (XX - BBox.X[k]) * s[k];
  111. }
  112. X[k] = XX;
  113. }
  114. SData[i].mid = MID((sctl::ConstIterator<Real>)X, depth);
  115. SData[i].Rglb = Rglb[0] + i;
  116. }
  117. for (sctl::Long i = 0; i < TData.Dim(); i++) {
  118. TData[i].rad = TRad[i];
  119. for (sctl::Integer k = 0; k < DIM; k++) {
  120. Real XX = TCoord[i * DIM + k];
  121. TData[i].coord[k] = XX;
  122. if (period_length[k] > 0) { // Map XX to unit box
  123. sctl::Long q = (sctl::Long)std::floor(XX / period_length[k]);
  124. XX = (XX - q * period_length[k]) * s[k];
  125. } else {
  126. XX = (XX - BBox.X[k]) * s[k];
  127. }
  128. X[k] = XX;
  129. }
  130. TData[i].mid = MID((sctl::ConstIterator<Real>)X, depth);
  131. TData[i].Rglb = Rglb[1] + i;
  132. }
  133. }
  134. { // Set SData_, TData_
  135. comm_.HyperQuickSort(TData, TData_);
  136. comm_.HyperQuickSort(SData, SData_);
  137. SCTL_ASSERT(TData_.Dim());
  138. auto m0 = TData_[0];
  139. comm_.PartitionS(SData_, m0);
  140. comm_.PartitionS(TData_, m0);
  141. }
  142. { // Set TRglb, SRglb
  143. TRglb.ReInit(TData_.Dim());
  144. SRglb.ReInit(SData_.Dim());
  145. for (Long i = 0; i < TData_.Dim(); i++) TRglb[i] = TData_[i].Rglb;
  146. for (Long i = 0; i < SData_.Dim(); i++) SRglb[i] = SData_[i].Rglb;
  147. }
  148. sctl::omp_par::merge_sort(TRglb.begin(), TRglb.end());
  149. sctl::omp_par::merge_sort(SRglb.begin(), SRglb.end());
  150. }
  151. template <class Real, sctl::Integer DIM> static void NbrList(sctl::Vector<sctl::Morton<DIM>>& nbrlst, const sctl::Morton<DIM>& m0, sctl::Integer depth, sctl::ConstIterator<Real> period_length) {
  152. typedef typename sctl::Morton<DIM>::UINT_T UINT_T;
  153. const UINT_T N0 = (((UINT_T)1) << depth);
  154. Real s = ((Real)1) / N0;
  155. sctl::StaticArray<Real, (1<<((2*DIM)))*DIM> c;
  156. m0.Coord((sctl::Iterator<Real>)c);
  157. sctl::Integer len = 1;
  158. for (sctl::Integer d = 0; d < DIM; d++) { // Set c
  159. Real c0 = c[d] + 0.5 * s;
  160. sctl::Integer new_len = 0;
  161. for (sctl::Integer k = 0; k < 3; k++) {
  162. Real c0_new = c0 + (k-1) * s;
  163. if (period_length[d] > 0) {
  164. sctl::Long q = (sctl::Long)std::floor(c0_new / period_length[d]);
  165. c0_new = (c0_new - q * period_length[d]);
  166. }
  167. if (c0_new >= 0 && c0_new < 1) {
  168. for (sctl::Integer i = 0; i < len * DIM; i++) c[(new_len ) * DIM + i] = c[i];
  169. for (sctl::Integer i = 0; i < len ; i++) c[(new_len + i) * DIM + d] = c0_new;
  170. new_len += len;
  171. }
  172. }
  173. len = new_len;
  174. }
  175. nbrlst.ReInit(len);
  176. for (sctl::Integer i = 0; i < len; i++) { // Set nbrlst
  177. nbrlst[i] = sctl::Morton<DIM>(c + i * DIM, depth);
  178. }
  179. }
  180. template <class Real, int DIM> template <class SrcObj, class TrgObj> void NearInteraction<Real, DIM>::SetupNearInterac(const std::vector<SrcObj>& src_vec, const std::vector<TrgObj>& trg_vec) {
  181. SetupRepartition<SrcObj, TrgObj>(src_vec, trg_vec);
  182. sctl::Vector<ObjData> SData__; // With ghost sources
  183. { // sctl::Communicate ghost source data
  184. auto& Data = SData_;
  185. Long N = Data.Dim();
  186. sctl::Integer rank = comm_.Rank();
  187. sctl::Integer np = comm_.Size();
  188. sctl::Vector<MID> mins(np);
  189. { // Set mins
  190. MID m = MID().Next();
  191. if (!rank) m = MID().DFD(depth);
  192. if (TData_.Dim()) m = std::min(m, TData_[0].mid);
  193. if (SData_.Dim()) m = std::min(m, SData_[0].mid);
  194. comm_.Allgather(sctl::Ptr2ConstItr<MID>(&m,1), 1, mins.begin(), 1);
  195. }
  196. sctl::Vector<Long> usr_cnt, usr_dsp, usr_proc;
  197. { // Determine source user-count and user-processes
  198. usr_cnt.ReInit(N);
  199. usr_dsp.ReInit(N);
  200. usr_cnt = 0;
  201. sctl::Vector<MID> nbrlst;
  202. sctl::Vector<Long> pusr_tmp;
  203. for (sctl::Long i = 0; i < N; i++) { // TODO: increment by blocks of points with same MID
  204. pusr_tmp.ReInit(0);
  205. NbrList(nbrlst, Data[i].mid, depth, (sctl::ConstIterator<Real>)period_length0);
  206. for (sctl::Integer k = 0; k < nbrlst.Dim(); k++) {
  207. Long puser = std::lower_bound(mins.begin(), mins.end(), nbrlst[k].DFD()) - mins.begin() - 1;
  208. SCTL_ASSERT(0<=puser);
  209. SCTL_ASSERT(puser<np);
  210. if (puser != rank) {
  211. bool insert_puser = true;
  212. for (sctl::Integer j = 0; j < pusr_tmp.Dim(); j++) {
  213. if (pusr_tmp[j] == puser) {
  214. insert_puser = false;
  215. break;
  216. }
  217. }
  218. if (insert_puser) {
  219. pusr_tmp.PushBack(puser);
  220. }
  221. }
  222. }
  223. usr_dsp[i] = usr_proc.Dim();
  224. usr_cnt[i] += pusr_tmp.Dim();
  225. for (auto p:pusr_tmp) usr_proc.PushBack(p);
  226. }
  227. }
  228. sctl::Vector<std::pair<Long,ObjData>> usr_data_pair;
  229. if (N) {
  230. usr_data_pair.ReInit(usr_dsp[N - 1] + usr_cnt[N - 1]);
  231. for (sctl::Long i = 0; i < N; i++) {
  232. for (sctl::Long j = 0; j < usr_cnt[i]; j++) {
  233. Long idx = usr_dsp[i] + j;
  234. usr_data_pair[idx].first = usr_proc[idx];
  235. usr_data_pair[idx].second = Data[i];
  236. }
  237. }
  238. sctl::omp_par::merge_sort(usr_data_pair.begin(), usr_data_pair.end());
  239. }
  240. { // Set SData__
  241. sctl::Vector<Long> scnt(np), sdsp(np);
  242. scnt = 0;
  243. sdsp = 0;
  244. sctl::Vector<ObjData> sbuff(usr_data_pair.Dim());
  245. for (sctl::Long i = 0; i < sbuff.Dim(); i++) {
  246. Long p = usr_data_pair[i].first;
  247. sbuff[i] = usr_data_pair[i].second;
  248. scnt[p]++;
  249. }
  250. sctl::omp_par::scan(scnt.begin(), sdsp.begin(), np);
  251. sctl::Vector<Long> rcnt(np), rdsp(np);
  252. rcnt = 0;
  253. rdsp = 0;
  254. comm_.Alltoall(scnt.begin(), 1, rcnt.begin(), 1);
  255. SCTL_ASSERT(rcnt[rank] == 0);
  256. rcnt[rank] = N;
  257. sctl::omp_par::scan(rcnt.begin(), rdsp.begin(), np);
  258. SData__.ReInit(rdsp[np - 1] + rcnt[np - 1]);
  259. rcnt[rank] = 0;
  260. comm_.Alltoallv(sbuff.begin(), scnt.begin(), sdsp.begin(), SData__.begin(), rcnt.begin(), rdsp.begin());
  261. for (sctl::Long i = 0; i < N; i++) SData__[rdsp[rank] + i] = Data[i];
  262. }
  263. }
  264. TRglb.ReInit(0);
  265. SRglb.ReInit(0);
  266. TSPair.ReInit(0);
  267. { // Determine source and target pairs
  268. sctl::Vector<bool> S_flag(SData__.Dim());
  269. sctl::Vector<bool> T_flag(TData_.Dim());
  270. S_flag = false;
  271. T_flag = false;
  272. sctl::Vector<MID> nbrlst;
  273. for (sctl::Long t0 = 0; t0 < TData_.Dim();) {
  274. Long t1 = t0;
  275. MID Tmid = TData_[t0].mid;
  276. NbrList(nbrlst, Tmid, depth, (sctl::ConstIterator<Real>)period_length0);
  277. while (t1 < TData_.Dim() && TData_[t1].mid == Tmid) t1++;
  278. for (sctl::Integer j = 0; j < nbrlst.Dim(); j++) {
  279. ObjData src_key;
  280. src_key.mid = nbrlst[j];
  281. Long s0 = std::lower_bound(SData__.begin(), SData__.end(), src_key) - SData__.begin();
  282. if (s0 < SData__.Dim() && SData__[s0].mid == src_key.mid) {
  283. Long s1 = s0;
  284. while (s1 < SData__.Dim() && SData__[s1].mid == src_key.mid) s1++;
  285. for (sctl::Long t = t0; t < t1; t++) {
  286. for (sctl::Long s = s0; s < s1; s++) {
  287. Real r2 = 0, Rnear = SData__[s].rad + TData_[t].rad;
  288. for (sctl::Integer k = 0; k < DIM; k++) {
  289. Real dx = SData__[s].coord[k] - TData_[t].coord[k];
  290. if (period_length[k] > 0 && fabs(dx) > 0.5 * period_length[k]) {
  291. dx -= round(dx / period_length[k]) * period_length[k];
  292. }
  293. r2 += dx * dx;
  294. }
  295. if (r2 > 0 && r2 < Rnear * Rnear) {
  296. Long trg_idx = TData_[t].Rglb;
  297. Long src_idx = SData__[s].Rglb;
  298. if (S_flag[s] == false) {
  299. SRglb.PushBack(src_idx);
  300. S_flag[s] = true;
  301. }
  302. if (T_flag[t] == false) {
  303. TRglb.PushBack(trg_idx);
  304. T_flag[t] = true;
  305. }
  306. TSPair.PushBack(std::pair<Long,Long>(trg_idx,src_idx));
  307. }
  308. }
  309. }
  310. }
  311. }
  312. t0 = t1;
  313. }
  314. }
  315. sctl::omp_par::merge_sort(TRglb.begin(), TRglb.end());
  316. sctl::omp_par::merge_sort(SRglb.begin(), SRglb.end());
  317. sctl::omp_par::merge_sort(TSPair.begin(), TSPair.end());
  318. //std::cout<<comm_.Rank()<<' '<<TRglb.Dim()<<' '<<SRglb.Dim()<<' '<<TSPair.Dim()<<'\n';
  319. trg_src_pair.resize(TSPair.Dim());
  320. for (sctl::Long i = 0; i < TSPair.Dim(); i++) { // Set trg_src_pair
  321. const auto& TS = TSPair[i];
  322. auto trg_idx = TS.first;
  323. auto src_idx = TS.second;
  324. Long t = std::lower_bound(TRglb.begin(), TRglb.end(), trg_idx) - TRglb.begin();
  325. Long s = std::lower_bound(SRglb.begin(), SRglb.end(), src_idx) - SRglb.begin();
  326. SCTL_ASSERT(t < TRglb.Dim() && TRglb[t] == trg_idx);
  327. SCTL_ASSERT(s < SRglb.Dim() && SRglb[s] == src_idx);
  328. trg_src_pair[i] = std::pair<Long, Long>(t, s);
  329. //Real r2 = 0;
  330. //for (sctl::Integer k = 0; k < DIM; k++) r2 += (Svec[s].Coord()[k] - Tvec[t].Coord()[k]) * (Svec[s].Coord()[k] - Tvec[t].Coord()[k]);
  331. //SCTL_ASSERT(sqrt(r2) <= Svec[s].Rad() + Tvec[t].Rad());
  332. }
  333. }
  334. template <class Real, int DIM> template <class ObjType> void NearInteraction<Real, DIM>::ForwardScatterSrc(const std::vector<ObjType>& in, std::vector<ObjType>& out) const {
  335. ForwardScatter<ObjType>(in, out, SRglb);
  336. }
  337. template <class Real, int DIM> template <class ObjType> void NearInteraction<Real, DIM>::ForwardScatterTrg(const std::vector<ObjType>& in, std::vector<ObjType>& out) const {
  338. ForwardScatter<ObjType>(in, out, TRglb);
  339. }
  340. template <class Real, int DIM> template <class ObjType> void NearInteraction<Real, DIM>::ReverseScatterTrg(const std::vector<ObjType>& in, std::vector<ObjType>& out) const {
  341. ReverseScatter<ObjType>(in, out, TRglb);
  342. }
  343. template <class Real, int DIM> template <class ObjType> void NearInteraction<Real, DIM>::ForwardScatter(const std::vector<ObjType>& in_vec, std::vector<ObjType>& out_vec, const sctl::Vector<Long>& recv_idx) const {
  344. sctl::Integer np = comm_.Size();
  345. sctl::Integer rank = comm_.Rank();
  346. sctl::Vector<Long> mins;
  347. { // Set mins
  348. mins.ReInit(np);
  349. Long Rglb, Nloc = in_vec.size();
  350. comm_.Scan<Long>(sctl::Ptr2ConstItr<Long>(&Nloc,1), sctl::Ptr2Itr<Long>(&Rglb,1), 1, sctl::Comm::CommOp::SUM);
  351. Rglb -= Nloc;
  352. comm_.Allgather<Long>(sctl::Ptr2ConstItr<Long>(&Rglb,1), 1, mins.begin(), 1);
  353. }
  354. sctl::Vector<Long> rcnt0(np), rdsp0(np);
  355. sctl::Vector<Long> scnt0(np), sdsp0(np);
  356. { // Set rcnt0, rdsp0, scnt0, sdsp0
  357. for (sctl::Integer i = 0; i < np; i++) {
  358. Long idx0 = (i + 0 < np ? std::lower_bound(recv_idx.begin(), recv_idx.end(), mins[i+0]) - recv_idx.begin() : recv_idx.Dim());
  359. Long idx1 = (i + 1 < np ? std::lower_bound(recv_idx.begin(), recv_idx.end(), mins[i+1]) - recv_idx.begin() : recv_idx.Dim());
  360. rcnt0[i] = idx1 - idx0;
  361. rdsp0[i] = idx0;
  362. }
  363. comm_.Alltoall(rcnt0.begin(), 1, scnt0.begin(), 1);
  364. sdsp0[0] = 0;
  365. sctl::omp_par::scan(scnt0.begin(), sdsp0.begin(), np);
  366. }
  367. sctl::Vector<Long> send_idx(sdsp0[np - 1] + scnt0[np - 1]);
  368. comm_.Alltoallv<Long>(recv_idx.begin(), rcnt0.begin(), rdsp0.begin(), send_idx.begin(), scnt0.begin(), sdsp0.begin());
  369. for (auto& idx : send_idx) { // Set send_idx <-- send_idx - mins[rank]
  370. idx = idx - mins[rank];
  371. SCTL_ASSERT(0 <= idx);
  372. SCTL_ASSERT(idx < (sctl::Long)in_vec.size());
  373. }
  374. sctl::Vector<char> send_buff;
  375. sctl::Vector<Long> ssize(send_idx.Dim());
  376. sctl::Vector<Long> rsize(recv_idx.Dim());
  377. { // Init send_buff, ssize, rsize
  378. std::vector<char> tmp_buff;
  379. for (sctl::Long i = 0; i < send_idx.Dim(); i++) { // Set send_buff, ssize
  380. tmp_buff.clear();
  381. in_vec[send_idx[i]].Pack(tmp_buff);
  382. for (sctl::Long j = 0; j < (sctl::Long)tmp_buff.size(); j++) send_buff.PushBack(tmp_buff[j]);
  383. ssize[i] = tmp_buff.size();
  384. }
  385. { // Set rsize
  386. comm_.Alltoallv<Long>(ssize.begin(), scnt0.begin(), sdsp0.begin(), rsize.begin(), rcnt0.begin(), rdsp0.begin());
  387. }
  388. }
  389. sctl::Vector<Long> scnt1(np), sdsp1(np);
  390. sctl::Vector<Long> rcnt1(np), rdsp1(np);
  391. { // Set scnt1, sdsp1
  392. sdsp1[0] = 0;
  393. for (sctl::Long i = 0; i < np; i++) {
  394. scnt1[i] = 0;
  395. for (sctl::Long j = 0; j < scnt0[i]; j++) {
  396. scnt1[i] += ssize[sdsp0[i] + j];
  397. }
  398. }
  399. sctl::omp_par::scan(scnt1.begin(), sdsp1.begin(), np);
  400. }
  401. { // Set rcnt1, rdsp1
  402. rdsp1[0] = 0;
  403. comm_.Alltoall(scnt1.begin(), 1, rcnt1.begin(), 1);
  404. sctl::omp_par::scan(rcnt1.begin(), rdsp1.begin(), np);
  405. }
  406. sctl::Vector<char> recv_buff(rdsp1[np - 1] + rcnt1[np - 1]);
  407. comm_.Alltoallv(send_buff.begin(), scnt1.begin(), sdsp1.begin(), recv_buff.begin(), rcnt1.begin(), rdsp1.begin());
  408. { // out_vec <-- Unpack(recv_buff)
  409. Long Nelem = recv_idx.Dim();
  410. sctl::Vector<Long> rdisp(Nelem);
  411. SCTL_ASSERT(rsize.Dim() == Nelem);
  412. if (Nelem) { // Set rdisp <-- scan(rsize)
  413. rdisp[0] = 0;
  414. sctl::omp_par::scan(rsize.begin(), rdisp.begin(), recv_idx.Dim());
  415. }
  416. out_vec.resize(Nelem);
  417. std::vector<char> tmp_buff;
  418. for (sctl::Long i = 0; i < Nelem; i++) { // Unpack recv_buff
  419. Long Nsize = rsize[i];
  420. tmp_buff.resize(Nsize);
  421. for (sctl::Long j = 0; j < Nsize; j++) tmp_buff[j] = recv_buff[rdisp[i] + j];
  422. out_vec[i].Unpack(tmp_buff);
  423. }
  424. }
  425. }
  426. template <class Real, int DIM> template <class ObjType> void NearInteraction<Real, DIM>::ReverseScatter(const std::vector<ObjType>& in_vec, std::vector<ObjType>& out_vec, const sctl::Vector<Long>& send_idx) const {
  427. SCTL_ASSERT((sctl::Long)in_vec.size() == send_idx.Dim());
  428. sctl::Integer np = comm_.Size();
  429. sctl::Integer rank = comm_.Rank();
  430. sctl::Vector<Long> mins;
  431. { // Set mins
  432. mins.ReInit(np);
  433. Long Rglb, Nloc = out_vec.size();
  434. comm_.Scan<Long>(sctl::Ptr2ConstItr<Long>(&Nloc,1), sctl::Ptr2Itr<Long>(&Rglb,1), 1, sctl::Comm::CommOp::SUM);
  435. Rglb -= Nloc;
  436. comm_.Allgather<Long>(sctl::Ptr2ConstItr<Long>(&Rglb,1), 1, mins.begin(), 1);
  437. }
  438. sctl::Vector<Long> scnt0(np), sdsp0(np);
  439. sctl::Vector<Long> rcnt0(np), rdsp0(np);
  440. { // Set scnt0, sdsp0, rcnt0, rdsp0
  441. for (sctl::Integer i = 0; i < np; i++) {
  442. Long idx0 = (i + 0 < np ? std::lower_bound(send_idx.begin(), send_idx.end(), mins[i+0]) - send_idx.begin() : send_idx.Dim());
  443. Long idx1 = (i + 1 < np ? std::lower_bound(send_idx.begin(), send_idx.end(), mins[i+1]) - send_idx.begin() : send_idx.Dim());
  444. scnt0[i] = idx1 - idx0;
  445. sdsp0[i] = idx0;
  446. }
  447. comm_.Alltoall(scnt0.begin(), 1, rcnt0.begin(), 1);
  448. rdsp0[0] = 0;
  449. sctl::omp_par::scan(rcnt0.begin(), rdsp0.begin(), np);
  450. }
  451. sctl::Vector<Long> recv_idx(rdsp0[np - 1] + rcnt0[np - 1]);
  452. comm_.Alltoallv<Long>(send_idx.begin(), scnt0.begin(), sdsp0.begin(), recv_idx.begin(), rcnt0.begin(), rdsp0.begin());
  453. for (auto& idx : recv_idx) { // Set recv_idx <-- recv_idx - mins[rank]
  454. idx = idx - mins[rank];
  455. SCTL_ASSERT(0 <= idx);
  456. SCTL_ASSERT(idx < (sctl::Long)out_vec.size());
  457. }
  458. sctl::Vector<char> send_buff;
  459. sctl::Vector<Long> ssize(sdsp0[np - 1] + scnt0[np - 1]);
  460. sctl::Vector<Long> rsize(rdsp0[np - 1] + rcnt0[np - 1]);
  461. { // Init send_buff, ssize, rsize
  462. std::vector<char> tmp_buff;
  463. for (sctl::Long i = 0; i < (sctl::Long)in_vec.size(); i++) { // Set send_buff, ssize
  464. tmp_buff.clear();
  465. in_vec[i].Pack(tmp_buff);
  466. for (sctl::Long j = 0; j < (sctl::Long)tmp_buff.size(); j++) send_buff.PushBack(tmp_buff[j]);
  467. ssize[i] = tmp_buff.size();
  468. }
  469. { // Set rsize
  470. comm_.Alltoallv<Long>(ssize.begin(), scnt0.begin(), sdsp0.begin(), rsize.begin(), rcnt0.begin(), rdsp0.begin());
  471. }
  472. }
  473. sctl::Vector<Long> scnt1(np), sdsp1(np);
  474. sctl::Vector<Long> rcnt1(np), rdsp1(np);
  475. { // Set scnt1, sdsp1
  476. sdsp1[0] = 0;
  477. for (sctl::Long i = 0; i < np; i++) {
  478. scnt1[i] = 0;
  479. for (sctl::Long j = 0; j < scnt0[i]; j++) {
  480. scnt1[i] += ssize[sdsp0[i] + j];
  481. }
  482. }
  483. sctl::omp_par::scan(scnt1.begin(), sdsp1.begin(), np);
  484. }
  485. { // Set rcnt1, rdsp1
  486. rdsp1[0] = 0;
  487. comm_.Alltoall(scnt1.begin(), 1, rcnt1.begin(), 1);
  488. sctl::omp_par::scan(rcnt1.begin(), rdsp1.begin(), np);
  489. }
  490. sctl::Vector<char> recv_buff(rdsp1[np - 1] + rcnt1[np - 1]);
  491. comm_.Alltoallv(send_buff.begin(), scnt1.begin(), sdsp1.begin(), recv_buff.begin(), rcnt1.begin(), rdsp1.begin());
  492. { // out_vec <-- Unpack(recv_buff)
  493. Long Nelem = recv_idx.Dim();
  494. sctl::Vector<Long> rdisp(Nelem);
  495. SCTL_ASSERT(rsize.Dim() == Nelem);
  496. if (Nelem) { // Set rdisp <-- scan(rsize)
  497. rdisp[0] = 0;
  498. sctl::omp_par::scan(rsize.begin(), rdisp.begin(), recv_idx.Dim());
  499. }
  500. std::vector<char> tmp_buff;
  501. for (sctl::Long i = 0; i < Nelem; i++) { // Unpack recv_buff
  502. Long Nsize = rsize[i];
  503. tmp_buff.resize(Nsize);
  504. for (sctl::Long j = 0; j < Nsize; j++) tmp_buff[j] = recv_buff[rdisp[i] + j];
  505. out_vec[recv_idx[i]].Unpack(tmp_buff);
  506. }
  507. }
  508. }