template template void NearInteraction::SetupRepartition(const std::vector& src_vec, const std::vector& trg_vec) { sctl::StaticArray Nloc, Rglb; { // Set Nloc, Rglb Nloc[0] = src_vec.size(); Nloc[1] = trg_vec.size(); comm_.Scan(Nloc, Rglb, 2, sctl::Comm::CommOp::SUM); Rglb[0] -= Nloc[0]; Rglb[1] -= Nloc[1]; } sctl::Vector SRad(Nloc[0]), TRad(Nloc[1]); sctl::Vector SCoord(Nloc[0] * DIM), TCoord(Nloc[1] * DIM); for (sctl::Long i = 0; i < Nloc[0]; i++) { // Set SCoord, SRad for (sctl::Integer k = 0; k < DIM; k++) { SCoord[i * DIM + k] = src_vec[i].Coord()[k]; } SRad[i] = src_vec[i].Rad(); } for (sctl::Long i = 0; i < Nloc[1]; i++) { // Set TCoord, TRad for (sctl::Integer k = 0; k < DIM; k++) { TCoord[i * DIM + k] = trg_vec[i].Coord()[k]; } TRad[i] = trg_vec[i].Rad(); } //--------------------------------------------------------------------- struct { Real L; sctl::StaticArray X; // Bounding box : [X, X+L) } BBox; { // Determine bounding box: BBox.X, BBox.L sctl::StaticArray X0; sctl::StaticArray X1; { // determine local bounding box { // Init X0, X1 <-- Broadcast one source or target point to all processors sctl::Integer src_proc, src_proc_ = (SCoord.Dim() || TCoord.Dim() ? comm_.Rank() : 0); comm_.Allreduce(sctl::Ptr2ConstItr(&src_proc_,1), sctl::Ptr2Itr(&src_proc,1), 1, sctl::Comm::CommOp::MAX); if (comm_.Rank() == src_proc) { sctl::Iterator X_ = SCoord.begin(); if (TCoord.Dim()) X_ = TCoord.begin(); SCTL_ASSERT(SCoord.Dim() || SCoord.Dim()); comm_.Allreduce(X_, X0, DIM, sctl::Comm::CommOp::SUM); } else { sctl::StaticArray X_; for (sctl::Integer k = 0; k < DIM; k++) X_[k] = 0; comm_.Allreduce(X_, X0, DIM, sctl::Comm::CommOp::SUM); } for (sctl::Integer k = 0; k < DIM; k++) X1[k] = X0[k]; } auto determine_bbox = [&](const sctl::Vector& coord, sctl::Iterator X0, sctl::Iterator X1) { Long N = coord.Dim() / DIM; for (sctl::Long i = 0; i < N; i++) { for (sctl::Integer k = 0; k < DIM; k++) { X0[k] = std::min(X0[k], coord[i * DIM + k]); X1[k] = std::max(X1[k], coord[i * DIM + k]); } } }; determine_bbox(SCoord, X0, X1); determine_bbox(TCoord, X0, X1); } BBox.L = 0; auto& X0glb = BBox.X; sctl::StaticArray X1glb; { // determine global bounding box comm_.Allreduce(X0, X0glb, DIM, sctl::Comm::CommOp::MIN); comm_.Allreduce(X1, X1glb, DIM, sctl::Comm::CommOp::MAX); for (sctl::Integer k = 0; k < DIM; k++) { // Set BBox.L = bbox length BBox.L = std::max(BBox.L, X1glb[k] - X0glb[k]); } BBox.L *= 1.01; // so that points are in [0,L) instead of [0,L] } } { // determine tree depth depth = 0; Real max_rad_glb, max_rad = 0.0, oct_size = 1.0; for (sctl::Long i = 0; i < SRad.Dim(); i++) max_rad = std::max(max_rad, SRad[i]); for (sctl::Long i = 0; i < TRad.Dim(); i++) max_rad = std::max(max_rad, TRad[i]); comm_.Allreduce(sctl::Ptr2ConstItr(&max_rad,1), sctl::Ptr2Itr(&max_rad_glb,1), 1, sctl::Comm::CommOp::MAX); while (0.5 * oct_size * BBox.L > 2 * max_rad_glb) { oct_size *= 0.5; depth++; } } { // Set period_length0 <-- period_length / BBox.L Real s = sctl::pow(2, depth); for (sctl::Integer i = 0; i < DIM; i++) { period_length0[i] = std::floor((period_length[i] / BBox.L) * s) / s; if (period_length[i] > 0) SCTL_ASSERT(period_length0[i] > 0); } } sctl::Vector SData(Nloc[0]), TData(Nloc[1]); { // Set SData, TData sctl::StaticArray s; for (sctl::Integer d = 0; d < DIM; d++) { s[d] = 1.0 / BBox.L; if (period_length[d] > 0) { s[d] = period_length0[d] / period_length[d]; } } sctl::StaticArray X; for (sctl::Long i = 0; i < SData.Dim(); i++) { SData[i].rad = SRad[i]; for (sctl::Integer k = 0; k < DIM; k++) { Real XX = SCoord[i * DIM + k]; SData[i].coord[k] = XX; if (period_length[k] > 0) { // Map XX to unit box sctl::Long q = (sctl::Long)std::floor(XX / period_length[k]); XX = (XX - q * period_length[k]) * s[k]; } else { XX = (XX - BBox.X[k]) * s[k]; } X[k] = XX; } SData[i].mid = MID((sctl::ConstIterator)X, depth); SData[i].Rglb = Rglb[0] + i; } for (sctl::Long i = 0; i < TData.Dim(); i++) { TData[i].rad = TRad[i]; for (sctl::Integer k = 0; k < DIM; k++) { Real XX = TCoord[i * DIM + k]; TData[i].coord[k] = XX; if (period_length[k] > 0) { // Map XX to unit box sctl::Long q = (sctl::Long)std::floor(XX / period_length[k]); XX = (XX - q * period_length[k]) * s[k]; } else { XX = (XX - BBox.X[k]) * s[k]; } X[k] = XX; } TData[i].mid = MID((sctl::ConstIterator)X, depth); TData[i].Rglb = Rglb[1] + i; } } { // Set SData_, TData_ comm_.HyperQuickSort(TData, TData_); comm_.HyperQuickSort(SData, SData_); SCTL_ASSERT(TData_.Dim()); auto m0 = TData_[0]; comm_.PartitionS(SData_, m0); comm_.PartitionS(TData_, m0); } { // Set TRglb, SRglb TRglb.ReInit(TData_.Dim()); SRglb.ReInit(SData_.Dim()); for (Long i = 0; i < TData_.Dim(); i++) TRglb[i] = TData_[i].Rglb; for (Long i = 0; i < SData_.Dim(); i++) SRglb[i] = SData_[i].Rglb; } sctl::omp_par::merge_sort(TRglb.begin(), TRglb.end()); sctl::omp_par::merge_sort(SRglb.begin(), SRglb.end()); } template static void NbrList(sctl::Vector>& nbrlst, const sctl::Morton& m0, sctl::Integer depth, sctl::ConstIterator period_length) { typedef typename sctl::Morton::UINT_T UINT_T; const UINT_T N0 = (((UINT_T)1) << depth); Real s = ((Real)1) / N0; sctl::StaticArray c; m0.Coord((sctl::Iterator)c); sctl::Integer len = 1; for (sctl::Integer d = 0; d < DIM; d++) { // Set c Real c0 = c[d] + 0.5 * s; sctl::Integer new_len = 0; for (sctl::Integer k = 0; k < 3; k++) { Real c0_new = c0 + (k-1) * s; if (period_length[d] > 0) { sctl::Long q = (sctl::Long)std::floor(c0_new / period_length[d]); c0_new = (c0_new - q * period_length[d]); } if (c0_new >= 0 && c0_new < 1) { for (sctl::Integer i = 0; i < len * DIM; i++) c[(new_len ) * DIM + i] = c[i]; for (sctl::Integer i = 0; i < len ; i++) c[(new_len + i) * DIM + d] = c0_new; new_len += len; } } len = new_len; } nbrlst.ReInit(len); for (sctl::Integer i = 0; i < len; i++) { // Set nbrlst nbrlst[i] = sctl::Morton(c + i * DIM, depth); } } template template void NearInteraction::SetupNearInterac(const std::vector& src_vec, const std::vector& trg_vec) { SetupRepartition(src_vec, trg_vec); sctl::Vector SData__; // With ghost sources { // sctl::Communicate ghost source data auto& Data = SData_; Long N = Data.Dim(); sctl::Integer rank = comm_.Rank(); sctl::Integer np = comm_.Size(); sctl::Vector mins(np); { // Set mins MID m = MID().Next(); if (!rank) m = MID().DFD(depth); if (TData_.Dim()) m = std::min(m, TData_[0].mid); if (SData_.Dim()) m = std::min(m, SData_[0].mid); comm_.Allgather(sctl::Ptr2ConstItr(&m,1), 1, mins.begin(), 1); } sctl::Vector usr_cnt, usr_dsp, usr_proc; { // Determine source user-count and user-processes usr_cnt.ReInit(N); usr_dsp.ReInit(N); usr_cnt = 0; sctl::Vector nbrlst; sctl::Vector pusr_tmp; for (sctl::Long i = 0; i < N; i++) { // TODO: increment by blocks of points with same MID pusr_tmp.ReInit(0); NbrList(nbrlst, Data[i].mid, depth, (sctl::ConstIterator)period_length0); for (sctl::Integer k = 0; k < nbrlst.Dim(); k++) { Long puser = std::lower_bound(mins.begin(), mins.end(), nbrlst[k].DFD()) - mins.begin() - 1; SCTL_ASSERT(0<=puser); SCTL_ASSERT(puser> usr_data_pair; if (N) { usr_data_pair.ReInit(usr_dsp[N - 1] + usr_cnt[N - 1]); for (sctl::Long i = 0; i < N; i++) { for (sctl::Long j = 0; j < usr_cnt[i]; j++) { Long idx = usr_dsp[i] + j; usr_data_pair[idx].first = usr_proc[idx]; usr_data_pair[idx].second = Data[i]; } } sctl::omp_par::merge_sort(usr_data_pair.begin(), usr_data_pair.end()); } { // Set SData__ sctl::Vector scnt(np), sdsp(np); scnt = 0; sdsp = 0; sctl::Vector sbuff(usr_data_pair.Dim()); for (sctl::Long i = 0; i < sbuff.Dim(); i++) { Long p = usr_data_pair[i].first; sbuff[i] = usr_data_pair[i].second; scnt[p]++; } sctl::omp_par::scan(scnt.begin(), sdsp.begin(), np); sctl::Vector rcnt(np), rdsp(np); rcnt = 0; rdsp = 0; comm_.Alltoall(scnt.begin(), 1, rcnt.begin(), 1); SCTL_ASSERT(rcnt[rank] == 0); rcnt[rank] = N; sctl::omp_par::scan(rcnt.begin(), rdsp.begin(), np); SData__.ReInit(rdsp[np - 1] + rcnt[np - 1]); rcnt[rank] = 0; comm_.Alltoallv(sbuff.begin(), scnt.begin(), sdsp.begin(), SData__.begin(), rcnt.begin(), rdsp.begin()); for (sctl::Long i = 0; i < N; i++) SData__[rdsp[rank] + i] = Data[i]; } } TRglb.ReInit(0); SRglb.ReInit(0); TSPair.ReInit(0); { // Determine source and target pairs sctl::Vector S_flag(SData__.Dim()); sctl::Vector T_flag(TData_.Dim()); S_flag = false; T_flag = false; sctl::Vector nbrlst; for (sctl::Long t0 = 0; t0 < TData_.Dim();) { Long t1 = t0; MID Tmid = TData_[t0].mid; NbrList(nbrlst, Tmid, depth, (sctl::ConstIterator)period_length0); while (t1 < TData_.Dim() && TData_[t1].mid == Tmid) t1++; for (sctl::Integer j = 0; j < nbrlst.Dim(); j++) { ObjData src_key; src_key.mid = nbrlst[j]; Long s0 = std::lower_bound(SData__.begin(), SData__.end(), src_key) - SData__.begin(); if (s0 < SData__.Dim() && SData__[s0].mid == src_key.mid) { Long s1 = s0; while (s1 < SData__.Dim() && SData__[s1].mid == src_key.mid) s1++; for (sctl::Long t = t0; t < t1; t++) { for (sctl::Long s = s0; s < s1; s++) { Real r2 = 0, Rnear = SData__[s].rad + TData_[t].rad; for (sctl::Integer k = 0; k < DIM; k++) { Real dx = SData__[s].coord[k] - TData_[t].coord[k]; if (period_length[k] > 0 && fabs(dx) > 0.5 * period_length[k]) { dx -= round(dx / period_length[k]) * period_length[k]; } r2 += dx * dx; } if (r2 > 0 && r2 < Rnear * Rnear) { Long trg_idx = TData_[t].Rglb; Long src_idx = SData__[s].Rglb; if (S_flag[s] == false) { SRglb.PushBack(src_idx); S_flag[s] = true; } if (T_flag[t] == false) { TRglb.PushBack(trg_idx); T_flag[t] = true; } TSPair.PushBack(std::pair(trg_idx,src_idx)); } } } } } t0 = t1; } } sctl::omp_par::merge_sort(TRglb.begin(), TRglb.end()); sctl::omp_par::merge_sort(SRglb.begin(), SRglb.end()); sctl::omp_par::merge_sort(TSPair.begin(), TSPair.end()); //std::cout<(t, s); //Real r2 = 0; //for (sctl::Integer k = 0; k < DIM; k++) r2 += (Svec[s].Coord()[k] - Tvec[t].Coord()[k]) * (Svec[s].Coord()[k] - Tvec[t].Coord()[k]); //SCTL_ASSERT(sqrt(r2) <= Svec[s].Rad() + Tvec[t].Rad()); } } template template void NearInteraction::ForwardScatterSrc(const std::vector& in, std::vector& out) const { ForwardScatter(in, out, SRglb); } template template void NearInteraction::ForwardScatterTrg(const std::vector& in, std::vector& out) const { ForwardScatter(in, out, TRglb); } template template void NearInteraction::ReverseScatterTrg(const std::vector& in, std::vector& out) const { ReverseScatter(in, out, TRglb); } template template void NearInteraction::ForwardScatter(const std::vector& in_vec, std::vector& out_vec, const sctl::Vector& recv_idx) const { sctl::Integer np = comm_.Size(); sctl::Integer rank = comm_.Rank(); sctl::Vector mins; { // Set mins mins.ReInit(np); Long Rglb, Nloc = in_vec.size(); comm_.Scan(sctl::Ptr2ConstItr(&Nloc,1), sctl::Ptr2Itr(&Rglb,1), 1, sctl::Comm::CommOp::SUM); Rglb -= Nloc; comm_.Allgather(sctl::Ptr2ConstItr(&Rglb,1), 1, mins.begin(), 1); } sctl::Vector rcnt0(np), rdsp0(np); sctl::Vector scnt0(np), sdsp0(np); { // Set rcnt0, rdsp0, scnt0, sdsp0 for (sctl::Integer i = 0; i < np; i++) { Long idx0 = (i + 0 < np ? std::lower_bound(recv_idx.begin(), recv_idx.end(), mins[i+0]) - recv_idx.begin() : recv_idx.Dim()); Long idx1 = (i + 1 < np ? std::lower_bound(recv_idx.begin(), recv_idx.end(), mins[i+1]) - recv_idx.begin() : recv_idx.Dim()); rcnt0[i] = idx1 - idx0; rdsp0[i] = idx0; } comm_.Alltoall(rcnt0.begin(), 1, scnt0.begin(), 1); sdsp0[0] = 0; sctl::omp_par::scan(scnt0.begin(), sdsp0.begin(), np); } sctl::Vector send_idx(sdsp0[np - 1] + scnt0[np - 1]); comm_.Alltoallv(recv_idx.begin(), rcnt0.begin(), rdsp0.begin(), send_idx.begin(), scnt0.begin(), sdsp0.begin()); for (auto& idx : send_idx) { // Set send_idx <-- send_idx - mins[rank] idx = idx - mins[rank]; SCTL_ASSERT(0 <= idx); SCTL_ASSERT(idx < (sctl::Long)in_vec.size()); } sctl::Vector send_buff; sctl::Vector ssize(send_idx.Dim()); sctl::Vector rsize(recv_idx.Dim()); { // Init send_buff, ssize, rsize std::vector tmp_buff; for (sctl::Long i = 0; i < send_idx.Dim(); i++) { // Set send_buff, ssize tmp_buff.clear(); in_vec[send_idx[i]].Pack(tmp_buff); for (sctl::Long j = 0; j < (sctl::Long)tmp_buff.size(); j++) send_buff.PushBack(tmp_buff[j]); ssize[i] = tmp_buff.size(); } { // Set rsize comm_.Alltoallv(ssize.begin(), scnt0.begin(), sdsp0.begin(), rsize.begin(), rcnt0.begin(), rdsp0.begin()); } } sctl::Vector scnt1(np), sdsp1(np); sctl::Vector rcnt1(np), rdsp1(np); { // Set scnt1, sdsp1 sdsp1[0] = 0; for (sctl::Long i = 0; i < np; i++) { scnt1[i] = 0; for (sctl::Long j = 0; j < scnt0[i]; j++) { scnt1[i] += ssize[sdsp0[i] + j]; } } sctl::omp_par::scan(scnt1.begin(), sdsp1.begin(), np); } { // Set rcnt1, rdsp1 rdsp1[0] = 0; comm_.Alltoall(scnt1.begin(), 1, rcnt1.begin(), 1); sctl::omp_par::scan(rcnt1.begin(), rdsp1.begin(), np); } sctl::Vector recv_buff(rdsp1[np - 1] + rcnt1[np - 1]); comm_.Alltoallv(send_buff.begin(), scnt1.begin(), sdsp1.begin(), recv_buff.begin(), rcnt1.begin(), rdsp1.begin()); { // out_vec <-- Unpack(recv_buff) Long Nelem = recv_idx.Dim(); sctl::Vector rdisp(Nelem); SCTL_ASSERT(rsize.Dim() == Nelem); if (Nelem) { // Set rdisp <-- scan(rsize) rdisp[0] = 0; sctl::omp_par::scan(rsize.begin(), rdisp.begin(), recv_idx.Dim()); } out_vec.resize(Nelem); std::vector tmp_buff; for (sctl::Long i = 0; i < Nelem; i++) { // Unpack recv_buff Long Nsize = rsize[i]; tmp_buff.resize(Nsize); for (sctl::Long j = 0; j < Nsize; j++) tmp_buff[j] = recv_buff[rdisp[i] + j]; out_vec[i].Unpack(tmp_buff); } } } template template void NearInteraction::ReverseScatter(const std::vector& in_vec, std::vector& out_vec, const sctl::Vector& send_idx) const { SCTL_ASSERT((sctl::Long)in_vec.size() == send_idx.Dim()); sctl::Integer np = comm_.Size(); sctl::Integer rank = comm_.Rank(); sctl::Vector mins; { // Set mins mins.ReInit(np); Long Rglb, Nloc = out_vec.size(); comm_.Scan(sctl::Ptr2ConstItr(&Nloc,1), sctl::Ptr2Itr(&Rglb,1), 1, sctl::Comm::CommOp::SUM); Rglb -= Nloc; comm_.Allgather(sctl::Ptr2ConstItr(&Rglb,1), 1, mins.begin(), 1); } sctl::Vector scnt0(np), sdsp0(np); sctl::Vector rcnt0(np), rdsp0(np); { // Set scnt0, sdsp0, rcnt0, rdsp0 for (sctl::Integer i = 0; i < np; i++) { Long idx0 = (i + 0 < np ? std::lower_bound(send_idx.begin(), send_idx.end(), mins[i+0]) - send_idx.begin() : send_idx.Dim()); Long idx1 = (i + 1 < np ? std::lower_bound(send_idx.begin(), send_idx.end(), mins[i+1]) - send_idx.begin() : send_idx.Dim()); scnt0[i] = idx1 - idx0; sdsp0[i] = idx0; } comm_.Alltoall(scnt0.begin(), 1, rcnt0.begin(), 1); rdsp0[0] = 0; sctl::omp_par::scan(rcnt0.begin(), rdsp0.begin(), np); } sctl::Vector recv_idx(rdsp0[np - 1] + rcnt0[np - 1]); comm_.Alltoallv(send_idx.begin(), scnt0.begin(), sdsp0.begin(), recv_idx.begin(), rcnt0.begin(), rdsp0.begin()); for (auto& idx : recv_idx) { // Set recv_idx <-- recv_idx - mins[rank] idx = idx - mins[rank]; SCTL_ASSERT(0 <= idx); SCTL_ASSERT(idx < (sctl::Long)out_vec.size()); } sctl::Vector send_buff; sctl::Vector ssize(sdsp0[np - 1] + scnt0[np - 1]); sctl::Vector rsize(rdsp0[np - 1] + rcnt0[np - 1]); { // Init send_buff, ssize, rsize std::vector tmp_buff; for (sctl::Long i = 0; i < (sctl::Long)in_vec.size(); i++) { // Set send_buff, ssize tmp_buff.clear(); in_vec[i].Pack(tmp_buff); for (sctl::Long j = 0; j < (sctl::Long)tmp_buff.size(); j++) send_buff.PushBack(tmp_buff[j]); ssize[i] = tmp_buff.size(); } { // Set rsize comm_.Alltoallv(ssize.begin(), scnt0.begin(), sdsp0.begin(), rsize.begin(), rcnt0.begin(), rdsp0.begin()); } } sctl::Vector scnt1(np), sdsp1(np); sctl::Vector rcnt1(np), rdsp1(np); { // Set scnt1, sdsp1 sdsp1[0] = 0; for (sctl::Long i = 0; i < np; i++) { scnt1[i] = 0; for (sctl::Long j = 0; j < scnt0[i]; j++) { scnt1[i] += ssize[sdsp0[i] + j]; } } sctl::omp_par::scan(scnt1.begin(), sdsp1.begin(), np); } { // Set rcnt1, rdsp1 rdsp1[0] = 0; comm_.Alltoall(scnt1.begin(), 1, rcnt1.begin(), 1); sctl::omp_par::scan(rcnt1.begin(), rdsp1.begin(), np); } sctl::Vector recv_buff(rdsp1[np - 1] + rcnt1[np - 1]); comm_.Alltoallv(send_buff.begin(), scnt1.begin(), sdsp1.begin(), recv_buff.begin(), rcnt1.begin(), rdsp1.begin()); { // out_vec <-- Unpack(recv_buff) Long Nelem = recv_idx.Dim(); sctl::Vector rdisp(Nelem); SCTL_ASSERT(rsize.Dim() == Nelem); if (Nelem) { // Set rdisp <-- scan(rsize) rdisp[0] = 0; sctl::omp_par::scan(rsize.begin(), rdisp.begin(), recv_idx.Dim()); } std::vector tmp_buff; for (sctl::Long i = 0; i < Nelem; i++) { // Unpack recv_buff Long Nsize = rsize[i]; tmp_buff.resize(Nsize); for (sctl::Long j = 0; j < Nsize; j++) tmp_buff[j] = recv_buff[rdisp[i] + j]; out_vec[recv_idx[i]].Unpack(tmp_buff); } } }