boundary_integral.txx 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872
  1. #include SCTL_INCLUDE(kernel_functions.hpp)
  2. #include SCTL_INCLUDE(matrix.hpp)
  3. #include SCTL_INCLUDE(ompUtils.hpp)
  4. #include SCTL_INCLUDE(morton.hpp)
  5. #include SCTL_INCLUDE(profile.hpp)
  6. #include SCTL_INCLUDE(vector.hpp)
  7. #include SCTL_INCLUDE(comm.hpp)
  8. #include SCTL_INCLUDE(common.hpp)
  9. #include <map>
  10. #include <set>
  11. namespace SCTL_NAMESPACE {
  12. template <class VType> static void concat_vecs(Vector<VType>& v, const Vector<Vector<VType>>& vec_lst) {
  13. const Long N = vec_lst.Dim();
  14. Vector<Long> dsp(N+1); dsp[0] = 0;
  15. for (Long i = 0; i < N; i++) {
  16. dsp[i+1] = dsp[i] + vec_lst[i].Dim();
  17. }
  18. if (v.Dim() != dsp[N]) v.ReInit(dsp[N]);
  19. for (Long i = 0; i < N; i++) {
  20. Vector<VType> v_(vec_lst[i].Dim(), v.begin()+dsp[i], false);
  21. v_ = vec_lst[i];
  22. }
  23. }
  24. template <class Real, Integer COORD_DIM=3> static void BuildNearList(Vector<Real>& Xtrg_near, Vector<Real>& Xn_trg_near, Vector<Long>& near_elem_cnt, Vector<Long>& near_elem_dsp, Vector<Long>& near_scatter_index, Vector<Long>& near_trg_cnt, Vector<Long>& near_trg_dsp, const Vector<Real>& Xtrg, const Vector<Real>& Xn_trg, const Vector<Real>& Xsrc, const Vector<Real>& src_radius, const Vector<Long>& src_elem_nds_cnt, const Vector<Long>& src_elem_nds_dsp, const Comm& comm) {
  25. // Input: Xtrg, Xn_trg, Xsrc, src_radius, src_elem_nds_cnt, src_elem_nds_dsp, comm
  26. // Output: Xtrg_near, Xn_trg_near, near_elem_cnt, near_elem_dsp, near_scatter_index, near_trg_cnt, near_trg_dsp
  27. struct NodeData {
  28. Long idx;
  29. Real rad;
  30. StaticArray<Real,COORD_DIM> X;
  31. StaticArray<Real,COORD_DIM> Xn;
  32. Morton<COORD_DIM> mid;
  33. Long elem_idx;
  34. Long pid;
  35. };
  36. auto comp_node_mid = [](const NodeData& A, const NodeData& B) {
  37. return A.mid < B.mid;
  38. };
  39. auto comp_node_eid_idx = [](const NodeData& A, const NodeData& B) {
  40. return A.elem_idx<B.elem_idx || (A.elem_idx==B.elem_idx && A.idx<B.idx);
  41. };
  42. auto node_dist2 = [](const NodeData& A, const NodeData& B) {
  43. Real dist2 = 0;
  44. for (Long i = 0; i < COORD_DIM; i++) {
  45. Real dX = A.X[i] - B.X[i];
  46. dist2 += dX * dX;
  47. }
  48. return dist2;
  49. };
  50. bool have_trg_normal;
  51. { // Set have_trg_normal
  52. StaticArray<Long,1> Nloc{Xn_trg.Dim()}, Nglb{0};
  53. comm.Allreduce<Long>(Nloc, Nglb, 1, Comm::CommOp::SUM);
  54. have_trg_normal = (Nglb[0] > 0);
  55. SCTL_ASSERT(!have_trg_normal || (Xn_trg.Dim() == Xtrg.Dim()));
  56. }
  57. const Long Ntrg = Xtrg.Dim()/COORD_DIM;
  58. const Long Nsrc = Xsrc.Dim()/COORD_DIM;
  59. const Long Nelem = src_elem_nds_cnt.Dim();
  60. Comm comm_ = comm.Split(Nsrc || Ntrg);
  61. if (!Nsrc && !Ntrg) return;
  62. Long trg_offset, src_offset, elem_offset;
  63. { // set trg_offset, src_offset, elem_offset
  64. StaticArray<Long,3> send_buff{Ntrg, Nsrc, Nelem}, recv_buff{0,0,0};
  65. comm_.Scan((ConstIterator<Long>)send_buff, (Iterator<Long>)recv_buff, 3, Comm::CommOp::SUM);
  66. trg_offset = recv_buff[0] - send_buff[0];
  67. src_offset = recv_buff[1] - send_buff[1];
  68. elem_offset = recv_buff[2] - send_buff[2];
  69. }
  70. Vector<NodeData> trg_nodes(Ntrg), src_nodes(Nsrc);
  71. { // Set trg_nodes, src_nodes
  72. Real BBlen_inv;
  73. StaticArray<Real,COORD_DIM> BBX0;
  74. { // Determine bounding-box
  75. StaticArray<Real,COORD_DIM> X0_local;
  76. if (Ntrg) { // Init X0_local
  77. for (Long k = 0; k < COORD_DIM; k++) {
  78. X0_local[k] = Xtrg[k];
  79. }
  80. } else if (Nsrc) {
  81. for (Long k = 0; k < COORD_DIM; k++) {
  82. X0_local[k] = Xsrc[k];
  83. }
  84. }
  85. for (Long i = 0; i < Ntrg; i++) {
  86. for (Long k = 0; k < COORD_DIM; k++) {
  87. X0_local[k] = std::min<Real>(X0_local[k], Xtrg[i*COORD_DIM+k]);
  88. }
  89. }
  90. for (Long i = 0; i < Nsrc; i++) {
  91. for (Long k = 0; k < COORD_DIM; k++) {
  92. X0_local[k] = std::min<Real>(X0_local[k], Xsrc[i*COORD_DIM+k]);
  93. }
  94. }
  95. comm_.Allreduce<Real>(X0_local, BBX0, COORD_DIM, Comm::CommOp::MIN);
  96. Real BBlen, len_local = 0;
  97. for (Long i = 0; i < Ntrg; i++) {
  98. for (Long k = 0; k < COORD_DIM; k++) {
  99. len_local = std::max<Real>(len_local, Xtrg[i*COORD_DIM+k]-BBX0[k]);
  100. }
  101. }
  102. for (Long i = 0; i < Nsrc; i++) {
  103. for (Long k = 0; k < COORD_DIM; k++) {
  104. len_local = std::max<Real>(len_local, Xsrc[i*COORD_DIM+k]-BBX0[k]);
  105. }
  106. }
  107. comm_.Allreduce<Real>(Ptr2ConstItr<Real>(&len_local,1), Ptr2Itr<Real>(&BBlen,1), 1, Comm::CommOp::MAX);
  108. BBlen_inv = 1/BBlen;
  109. }
  110. { // Expand bounding-box so that no points are on the boundary
  111. for (Long i = 0; i < COORD_DIM; i++) {
  112. BBX0[i] -= 0.05/BBlen_inv;
  113. }
  114. BBlen_inv /= 1.1;
  115. }
  116. for (Long i = 0; i < Ntrg; i++) { // Set trg_nodes
  117. StaticArray<Real,COORD_DIM> Xmid;
  118. trg_nodes[i].idx = trg_offset + i;
  119. trg_nodes[i].rad = 0;
  120. for (Long k = 0; k < COORD_DIM; k++) {
  121. trg_nodes[i].X[k] = Xtrg[i*COORD_DIM+k];
  122. trg_nodes[i].Xn[k] = (have_trg_normal ? Xn_trg[i*COORD_DIM+k] : 0);
  123. Xmid[k] = (Xtrg[i*COORD_DIM+k]-BBX0[k]) * BBlen_inv;
  124. }
  125. trg_nodes[i].mid = Morton<COORD_DIM>((ConstIterator<Real>)Xmid);
  126. trg_nodes[i].elem_idx = 0;
  127. trg_nodes[i].pid = comm_.Rank();
  128. }
  129. for (Long i = 0; i < Nsrc; i++) { // Set src_nodes
  130. Integer depth = (Integer)(log(src_radius[i]*BBlen_inv)/log(0.5));
  131. depth = std::min(Morton<COORD_DIM>::MaxDepth(), std::max<Integer>(depth,0));
  132. StaticArray<Real,COORD_DIM> Xmid;
  133. src_nodes[i].idx = src_offset + i;
  134. src_nodes[i].rad = src_radius[i];
  135. for (Long k = 0; k < COORD_DIM; k++) {
  136. src_nodes[i].X[k] = Xsrc[i*COORD_DIM+k];
  137. Xmid[k] = (Xsrc[i*COORD_DIM+k]-BBX0[k]) * BBlen_inv;
  138. }
  139. src_nodes[i].mid = Morton<COORD_DIM>((ConstIterator<Real>)Xmid, depth);
  140. src_nodes[i].pid = comm_.Rank();
  141. }
  142. for (Long i = 0; i < Nelem; i++) { // Set src_nodes.elem_idx
  143. for (Long j = 0; j < src_elem_nds_cnt[i]; j++) {
  144. src_nodes[src_elem_nds_dsp[i]+j].elem_idx = elem_offset + i;
  145. }
  146. }
  147. }
  148. Vector<NodeData> trg_nodes0, src_nodes0, splitter_nodes(comm_.Size());
  149. { // Set trg_nodes0 <- sort(trg_nodes), src_nodes0 <- sort(src_nodes)
  150. comm_.HyperQuickSort(src_nodes, src_nodes0, comp_node_mid);
  151. comm_.HyperQuickSort(trg_nodes, trg_nodes0, comp_node_mid);
  152. SCTL_ASSERT(src_nodes0.Dim());
  153. StaticArray<NodeData,1> splitter_node{src_nodes0[0]};
  154. if (!comm_.Rank()) splitter_node[0].mid = Morton<COORD_DIM>();
  155. comm_.Allgather(splitter_node+0, 1, splitter_nodes.begin(), 1);
  156. comm_.PartitionS(trg_nodes0, splitter_node[0], comp_node_mid);
  157. }
  158. Vector<NodeData> src_nodes1;
  159. { // Set src_nodes1 <- src_nodes0 + halo // TODO: replace allgather with halo-exchange // TODO
  160. const Long Np = comm_.Size();
  161. Vector<Long> cnt0(1), cnt(Np), dsp(Np);
  162. cnt0[0] = src_nodes0.Dim(); dsp[0] = 0;
  163. comm_.Allgather(cnt0.begin(), 1, cnt.begin(), 1);
  164. omp_par::scan(cnt.begin(), dsp.begin(), Np);
  165. src_nodes1.ReInit(dsp[Np-1] + cnt[Np-1]);
  166. comm_.Allgatherv(src_nodes0.begin(), src_nodes0.Dim(), src_nodes1.begin(), cnt.begin(), dsp.begin());
  167. }
  168. Vector<NodeData> near_lst;
  169. if (src_nodes1.Dim()) { // Set near_lst
  170. // sort by elem_idx and mid
  171. auto comp_elem_idx_mid = [](const NodeData& A, const NodeData& B) {
  172. return (A.elem_idx<B.elem_idx) || (A.elem_idx==B.elem_idx && A.mid<B.mid);
  173. };
  174. omp_par::merge_sort(src_nodes1.begin(), src_nodes1.end(), comp_elem_idx_mid);
  175. // Preallocate memory // TODO: parallelize
  176. Vector<Morton<COORD_DIM>> src_mid_lst, trg_mid_lst, nbr_lst;
  177. Vector<std::pair<Long,Long>> trg_src_near_mid;
  178. std::set<Morton<COORD_DIM>> trg_mid_set;
  179. Vector<Long> src_range, trg_range;
  180. Long eid0 = src_nodes1[0].elem_idx;
  181. Long eid1 = src_nodes1[src_nodes1.Dim()-1].elem_idx + 1;
  182. for (Long eid = eid0; eid < eid1; eid++) { // loop over all elements
  183. Long src_idx0, src_idx1;
  184. { // Set (src_idx0, src_idx1) the index range of nodes with elem_idx eid
  185. NodeData srch_node;
  186. srch_node.elem_idx = eid;
  187. src_idx0 = std::lower_bound(src_nodes1.begin(), src_nodes1.end(), srch_node, [](const NodeData& A, const NodeData& B){return A.elem_idx<B.elem_idx;}) - src_nodes1.begin();
  188. src_idx1 = std::upper_bound(src_nodes1.begin(), src_nodes1.end(), srch_node, [](const NodeData& A, const NodeData& B){return A.elem_idx<B.elem_idx;}) - src_nodes1.begin();
  189. }
  190. { // build near-list for element eid
  191. trg_src_near_mid.ReInit(0);
  192. src_mid_lst.ReInit(0);
  193. trg_mid_lst.ReInit(0);
  194. src_range.ReInit(0);
  195. trg_range.ReInit(0);
  196. trg_mid_set.clear();
  197. { // build src_mid_lst, src_range
  198. Long src_idx = src_idx0;
  199. while (src_idx < src_idx1) {
  200. NodeData nxt_node;
  201. nxt_node.mid = src_nodes1[src_idx].mid.Next();
  202. Long src_idx_new = std::lower_bound(src_nodes1.begin()+src_idx, src_nodes1.begin()+src_idx1, nxt_node, comp_node_mid) - src_nodes1.begin();
  203. src_mid_lst.PushBack(src_nodes1[src_idx].mid);
  204. src_range.PushBack(src_idx );
  205. src_range.PushBack(src_idx_new);
  206. src_idx = src_idx_new;
  207. }
  208. }
  209. { // build trg_mid_lst, trg_range
  210. Morton<COORD_DIM> nxt_node;
  211. for (const auto& src_mid : src_mid_lst) {
  212. src_mid.NbrList(nbr_lst, src_mid.Depth(), false);
  213. for (const auto& mid : nbr_lst) {
  214. trg_mid_set.insert(mid);
  215. }
  216. }
  217. for (const auto& trg_mid : trg_mid_set) {
  218. if (trg_mid >= nxt_node) {
  219. nxt_node = trg_mid.Next();
  220. NodeData node0, node1;
  221. node0.mid = trg_mid;
  222. node1.mid = nxt_node;
  223. Long trg_range0 = std::lower_bound(trg_nodes0.begin(), trg_nodes0.end(), node0, comp_node_mid) - trg_nodes0.begin();
  224. Long trg_range1 = std::lower_bound(trg_nodes0.begin(), trg_nodes0.end(), node1, comp_node_mid) - trg_nodes0.begin();
  225. if (trg_range1 > trg_range0) {
  226. trg_range.PushBack(trg_range0);
  227. trg_range.PushBack(trg_range1);
  228. trg_mid_lst.PushBack(trg_mid);
  229. }
  230. }
  231. }
  232. }
  233. { // build interaction list trg_src_near_mid
  234. for (Long i = 0; i < src_mid_lst.Dim(); i++) {
  235. src_mid_lst[i].NbrList(nbr_lst, src_mid_lst[i].Depth(), false);
  236. for (const auto& mid : nbr_lst) {
  237. Long j = std::upper_bound(trg_mid_lst.begin(), trg_mid_lst.end(), mid) - trg_mid_lst.begin() - 1;
  238. if (j>=0 && trg_mid_lst[j].isAncestor(mid.DFD())) {
  239. trg_src_near_mid.PushBack(std::pair<Long,Long>(j,i));
  240. }
  241. }
  242. }
  243. std::sort(trg_src_near_mid.begin(), trg_src_near_mid.end());
  244. }
  245. { // build near_lst
  246. for (Long i = 0; i < trg_mid_lst.Dim(); i++) { // loop over trg_mid
  247. Long j0 = std::lower_bound(trg_src_near_mid.begin(), trg_src_near_mid.end(), std::pair<Long,Long>(i+0,0)) - trg_src_near_mid.begin();
  248. Long j1 = std::lower_bound(trg_src_near_mid.begin(), trg_src_near_mid.end(), std::pair<Long,Long>(i+1,0)) - trg_src_near_mid.begin();
  249. for (Long ii = trg_range[2*i+0]; ii < trg_range[2*i+1]; ii++) { // loop over trg_nodes0
  250. const NodeData& trg_node = trg_nodes0[ii];
  251. bool is_near = false;
  252. for (Long j = j0; j < j1; j++) { // loop over near src_mid
  253. Long jj = trg_src_near_mid[j].second;
  254. if (j==j0 || trg_src_near_mid[j-1].second!=jj) {
  255. for (Long jjj = src_range[jj*2+0]; jjj < src_range[jj*2+1]; jjj++) { // loop over src_nodes1
  256. const NodeData& src_node = src_nodes1[jjj];
  257. is_near = (node_dist2(src_node,trg_node) < src_node.rad*src_node.rad);
  258. if (is_near) break;
  259. }
  260. }
  261. if (is_near) break;
  262. }
  263. if (is_near) {
  264. NodeData node = trg_node;
  265. node.elem_idx = eid;
  266. near_lst.PushBack(node);
  267. }
  268. }
  269. }
  270. }
  271. }
  272. }
  273. }
  274. { // sort and partition by elem-ID
  275. Vector<NodeData> near_lst0;
  276. { // near_lst0 <-- partition(dist_sort(near_lst), elem_offset)
  277. NodeData split_node;
  278. split_node.idx=0;
  279. split_node.elem_idx=elem_offset;
  280. comm_.HyperQuickSort(near_lst, near_lst0, comp_node_eid_idx);
  281. comm_.PartitionS(near_lst0, split_node, comp_node_eid_idx);
  282. }
  283. near_lst.Swap(near_lst0);
  284. }
  285. { // Set Xtrg_near
  286. Xtrg_near.ReInit(near_lst.Dim()*COORD_DIM);
  287. #pragma omp parallel for schedule(static)
  288. for (Long i = 0; i < near_lst.Dim(); i++) {
  289. for (Long k = 0; k < COORD_DIM; k++) {
  290. Xtrg_near[i*COORD_DIM+k] = near_lst[i].X[k];
  291. }
  292. }
  293. }
  294. if (have_trg_normal) { // Set Xn_trg_near
  295. Xn_trg_near.ReInit(near_lst.Dim()*COORD_DIM);
  296. #pragma omp parallel for schedule(static)
  297. for (Long i = 0; i < near_lst.Dim(); i++) {
  298. for (Long k = 0; k < COORD_DIM; k++) {
  299. Xn_trg_near[i*COORD_DIM+k] = near_lst[i].Xn[k];
  300. }
  301. }
  302. }
  303. { // Set near_elem_cnt, near_elem_dsp
  304. near_elem_cnt.ReInit(Nelem);
  305. near_elem_dsp.ReInit(Nelem);
  306. #pragma omp parallel
  307. { // Set near_elem_cnt, near_elem_dsp
  308. const Integer tid = omp_get_thread_num();
  309. const Integer omp_p = omp_get_num_threads();
  310. const Long elem_idx0 = Nelem*(tid+0)/omp_p;
  311. const Long elem_idx1 = Nelem*(tid+1)/omp_p;
  312. for (Long i = elem_idx0; i < elem_idx1; i++) {
  313. near_elem_cnt[i] = 0;
  314. near_elem_dsp[i] = 0;
  315. }
  316. Long idx0, idx1;
  317. { // Set index range [idx0, idx1] in near_lst for this thread
  318. NodeData srch_node0, srch_node1;
  319. srch_node0.elem_idx = elem_offset + elem_idx0; srch_node0.idx = 0;
  320. srch_node1.elem_idx = elem_offset + elem_idx1; srch_node1.idx = 0;
  321. idx0 = std::lower_bound(near_lst.begin(), near_lst.end(), srch_node0, comp_node_eid_idx) - near_lst.begin();
  322. idx1 = std::lower_bound(near_lst.begin(), near_lst.end(), srch_node1, comp_node_eid_idx) - near_lst.begin();
  323. }
  324. for (Long i = idx0; i < idx1;) {
  325. Long elem_idx_ = near_lst[i].elem_idx, cnt = 0; //, dsp = i;
  326. for (; i<idx1 && near_lst[i].elem_idx==elem_idx_; i++) cnt++;
  327. //near_elem_dsp[elem_idx_-elem_offset] = dsp; // skips for elements with cnt == 0
  328. near_elem_cnt[elem_idx_-elem_offset] = cnt;
  329. }
  330. }
  331. omp_par::scan(near_elem_cnt.begin(), near_elem_dsp.begin(), Nelem);
  332. }
  333. { // Set scatter_index, near_trg_cnt, near_trg_dsp
  334. Vector<Long> trg_idx(near_lst.Dim());
  335. #pragma omp parallel for schedule(static)
  336. for (Long i = 0; i < trg_idx.Dim(); i++) {
  337. trg_idx[i] = near_lst[i].idx;
  338. }
  339. comm_.SortScatterIndex(trg_idx, near_scatter_index, &trg_offset);
  340. comm_.ScatterForward(trg_idx, near_scatter_index);
  341. near_trg_cnt.ReInit(Ntrg);
  342. near_trg_dsp.ReInit(Ntrg);
  343. #pragma omp parallel
  344. { // Set near_trg_cnt, near_trg_dsp
  345. const Integer tid = omp_get_thread_num();
  346. const Integer omp_p = omp_get_num_threads();
  347. const Long trg_idx0 = Ntrg*(tid+0)/omp_p;
  348. const Long trg_idx1 = Ntrg*(tid+1)/omp_p;
  349. for (Long i = trg_idx0; i < trg_idx1; i++) {
  350. near_trg_cnt[i] = 0;
  351. near_trg_dsp[i] = 0;
  352. }
  353. Long idx0 = std::lower_bound(trg_idx.begin(), trg_idx.end(), trg_offset + trg_idx0) - trg_idx.begin();
  354. Long idx1 = std::lower_bound(trg_idx.begin(), trg_idx.end(), trg_offset + trg_idx1) - trg_idx.begin();
  355. for (Long i = idx0; i < idx1;) {
  356. Long trg_idx_ = trg_idx[i], dsp = i, cnt = 0;
  357. for (; i<idx1 && trg_idx[i]==trg_idx_; i++) cnt++;
  358. near_trg_dsp[trg_idx_-trg_offset] = dsp;
  359. near_trg_cnt[trg_idx_-trg_offset] = cnt;
  360. }
  361. }
  362. }
  363. }
  364. template <class Real, class Kernel> BoundaryIntegralOp<Real,Kernel>::BoundaryIntegralOp(const Kernel& ker, bool trg_normal_dot_prod, const Comm& comm) : tol_(1e-10), ker_(ker), trg_normal_dot_prod_(trg_normal_dot_prod), comm_(comm), fmm(comm) {
  365. SCTL_ASSERT(!trg_normal_dot_prod_ || (KDIM1 % COORD_DIM == 0));
  366. ClearSetup();
  367. fmm.SetKernels(ker, ker, ker);
  368. fmm.AddSrc("Src", ker, ker);
  369. fmm.AddTrg("Trg", ker, ker);
  370. fmm.SetKernelS2T("Src", "Trg", ker);
  371. }
  372. template <class Real, class Kernel> BoundaryIntegralOp<Real,Kernel>::~BoundaryIntegralOp() {
  373. Vector<std::string> elem_lst_name;
  374. for (auto& it : elem_lst_map) elem_lst_name.PushBack(it.first);
  375. for (const auto& name : elem_lst_name) DeleteElemList(name);
  376. }
  377. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::SetAccuracy(Real tol) {
  378. setup_far_flag = false;
  379. setup_self_flag = false;
  380. setup_near_flag = false;
  381. tol_ = tol;
  382. fmm.SetAccuracy((Integer)(log(tol)/log(0.1))+1);
  383. }
  384. template <class Real, class Kernel> template <class KerS2M, class KerS2L, class KerS2T, class KerM2M, class KerM2L, class KerM2T, class KerL2L, class KerL2T> void BoundaryIntegralOp<Real,Kernel>::SetFMMKer(const KerS2M& k_s2m, const KerS2L& k_s2l, const KerS2T& k_s2t, const KerM2M& k_m2m, const KerM2L& k_m2l, const KerM2T& k_m2t, const KerL2L& k_l2l, const KerL2T& k_l2t) {
  385. fmm.DeleteSrc("Src");
  386. fmm.DeleteTrg("Trg");
  387. fmm.SetKernels(k_m2m, k_m2l, k_l2l);
  388. fmm.AddSrc("Src", k_s2m, k_s2l);
  389. fmm.AddTrg("Trg", k_m2t, k_l2t);
  390. fmm.SetKernelS2T("Src", "Trg", k_s2t);
  391. }
  392. template <class Real, class Kernel> template <class ElemLstType> void BoundaryIntegralOp<Real,Kernel>::AddElemList(const ElemLstType& elem_lst, const std::string& name) {
  393. static_assert(std::is_copy_constructible<ElemLstType>::value, "ElemeType must be copy-constructible.");
  394. SCTL_ASSERT_MSG(elem_lst_map.find(name) == elem_lst_map.end(), "Element list already exists.");
  395. elem_lst_map[name] = dynamic_cast<ElementListBase<Real>*>(new ElemLstType(elem_lst));
  396. elem_data_map[name].SelfInterac = ElemLstType::template SelfInterac<Kernel>;
  397. elem_data_map[name].NearInterac = ElemLstType::template NearInterac<Kernel>;
  398. ClearSetup();
  399. }
  400. template <class Real, class Kernel> template <class ElemLstType> const ElemLstType& BoundaryIntegralOp<Real,Kernel>::GetElemList(const std::string& name) const {
  401. SCTL_ASSERT_MSG(elem_lst_map.find(name) != elem_lst_map.end(), "Element list does not exist.");
  402. return *dynamic_cast<const ElemLstType*>(elem_lst_map.at(name));
  403. }
  404. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::DeleteElemList(const std::string& name) {
  405. SCTL_ASSERT_MSG(elem_lst_map.find(name) != elem_lst_map.end(), "Element list does not exist.");
  406. delete (ElementListBase<Real>*)elem_lst_map[name];
  407. elem_data_map.erase(name);
  408. elem_lst_map.erase(name);
  409. ClearSetup();
  410. }
  411. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::SetTargetCoord(const Vector<Real>& Xtrg) {
  412. Xt = Xtrg;
  413. setup_flag = false;
  414. setup_far_flag = false;
  415. setup_near_flag = false;
  416. }
  417. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::SetTargetNormal(const Vector<Real>& Xn_trg) {
  418. Xnt = Xn_trg;
  419. setup_flag = false;
  420. setup_near_flag = false;
  421. }
  422. template <class Real, class Kernel> template <class ElemLstType> void BoundaryIntegralOp<Real,Kernel>::DeleteElemList() {
  423. DeleteElemList(std::to_string(typeid(ElemLstType).hash_code()));
  424. }
  425. template <class Real, class Kernel> Long BoundaryIntegralOp<Real,Kernel>::Dim(Integer k) const {
  426. SetupBasic();
  427. if (k == 0) {
  428. const Long Nelem = elem_nds_cnt.Dim();
  429. return (Nelem ? (elem_nds_dsp[Nelem-1] + elem_nds_cnt[Nelem-1]) * KDIM0 : 0);
  430. }
  431. if (k == 1) {
  432. return (Xtrg.Dim()/COORD_DIM) * (trg_normal_dot_prod_ ? KDIM1/COORD_DIM : KDIM1);
  433. }
  434. SCTL_ASSERT(false);
  435. return -1;
  436. }
  437. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::Setup() const {
  438. if (setup_flag && setup_far_flag && setup_self_flag && setup_near_flag) return;
  439. Profile::Tic("Setup", &comm_, true, 5);
  440. SetupBasic();
  441. SetupFar();
  442. SetupSelf();
  443. SetupNear();
  444. Profile::Toc();
  445. }
  446. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::ClearSetup() const {
  447. setup_flag = false;
  448. setup_far_flag = false;
  449. setup_self_flag = false;
  450. setup_near_flag = false;
  451. }
  452. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::ComputePotential(Vector<Real>& U, const Vector<Real>& F) const {
  453. Setup();
  454. Profile::Tic("Eval", &comm_, true, 5);
  455. ComputeFarField(U, F);
  456. ComputeNearInterac(U, F);
  457. Profile::Toc();
  458. }
  459. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::SetupBasic() const {
  460. if (setup_flag) return;
  461. elem_lst_name.ReInit(0);
  462. elem_lst_cnt.ReInit(0);
  463. elem_lst_dsp.ReInit(0);
  464. elem_nds_cnt.ReInit(0);
  465. elem_nds_dsp.ReInit(0);
  466. Xsurf.ReInit(0);
  467. Xtrg.ReInit(0);
  468. Xn_trg.ReInit(0);
  469. const Long Nlst = elem_lst_map.size();
  470. { // Set elem_lst_name
  471. elem_lst_name.ReInit(0);
  472. for (const auto& x : elem_lst_map) {
  473. elem_lst_name.PushBack(x.first);
  474. }
  475. }
  476. { // Set elem_lst_cnt, elem_nds_cnt, Xsurf
  477. elem_lst_cnt .ReInit(Nlst);
  478. Vector<Vector<Real>> Xsurf_(Nlst);
  479. Vector<Vector<Real>> Xn_surf_(Nlst);
  480. Vector<Vector<Long>> elem_nds_cnt_(Nlst);
  481. for (Long i = 0; i < Nlst; i++) {
  482. elem_lst_map.at(elem_lst_name[i])->GetNodeCoord(&Xsurf_[i], &Xn_surf_[i], &elem_nds_cnt_[i]);
  483. elem_lst_cnt[i] = elem_nds_cnt_[i].Dim();
  484. }
  485. concat_vecs(Xsurf, Xsurf_);
  486. concat_vecs(Xn_surf, Xn_surf_);
  487. concat_vecs(elem_nds_cnt, elem_nds_cnt_);
  488. }
  489. { // Set elem_lst_dsp, elem_nds_dsp
  490. const Long Nelem = elem_nds_cnt.Dim();
  491. if (elem_lst_dsp.Dim() != Nlst ) elem_lst_dsp.ReInit(Nlst);
  492. if (elem_nds_dsp.Dim() != Nelem) elem_nds_dsp.ReInit(Nelem);
  493. if (Nlst ) elem_lst_dsp[0] = 0;
  494. if (Nelem) elem_nds_dsp[0] = 0;
  495. omp_par::scan(elem_lst_cnt.begin(), elem_lst_dsp.begin(), Nlst);
  496. omp_par::scan(elem_nds_cnt.begin(), elem_nds_dsp.begin(), Nelem);
  497. SCTL_ASSERT(Nelem == (Nlst ? elem_lst_dsp[Nlst-1] + elem_lst_cnt[Nlst-1] : 0));
  498. }
  499. if (Xt.Dim()) { // Set Xtrg
  500. Xtrg = Xt;
  501. } else {
  502. Xtrg = Xsurf;
  503. }
  504. if (trg_normal_dot_prod_) {
  505. if (Xnt.Dim()) {
  506. Xn_trg = Xnt;
  507. SCTL_ASSERT_MSG(Xn_trg.Dim() == Xtrg.Dim(), "Invalid normal vector at targets.");
  508. } else {
  509. Xn_trg = Xn_surf;
  510. }
  511. }
  512. setup_flag = true;
  513. }
  514. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::SetupFar() const {
  515. if (setup_far_flag) return;
  516. X_far.ReInit(0);
  517. Xn_far.ReInit(0);
  518. wts_far.ReInit(0);
  519. dist_far.ReInit(0);
  520. elem_nds_cnt_far.ReInit(0);
  521. elem_nds_dsp_far.ReInit(0);
  522. SetupBasic();
  523. Profile::Tic("SetupFarField", &comm_, false, 6);
  524. const Long Nlst = elem_lst_map.size();
  525. Vector<Vector<Real>> X_far_(Nlst);
  526. Vector<Vector<Real>> Xn_far_(Nlst);
  527. Vector<Vector<Real>> wts_far_(Nlst);
  528. Vector<Vector<Real>> dist_far_(Nlst);
  529. Vector<Vector<Long>> elem_nds_cnt_far_(Nlst);
  530. for (Long i = 0; i < Nlst; i++) {
  531. elem_lst_map.at(elem_lst_name[i])->GetFarFieldNodes(X_far_[i], Xn_far_[i], wts_far_[i], dist_far_[i], elem_nds_cnt_far_[i], tol_);
  532. }
  533. concat_vecs(X_far , X_far_ );
  534. concat_vecs(Xn_far , Xn_far_ );
  535. concat_vecs(wts_far , wts_far_ );
  536. concat_vecs(dist_far, dist_far_);
  537. concat_vecs(elem_nds_cnt_far, elem_nds_cnt_far_);
  538. { // Set elem_nds_dsp_far
  539. const Long Nelem = elem_nds_cnt.Dim();
  540. SCTL_ASSERT(elem_nds_cnt_far.Dim() == Nelem);
  541. if (elem_nds_dsp_far.Dim() != Nelem) elem_nds_dsp_far.ReInit(Nelem);
  542. if (Nelem) elem_nds_dsp_far[0] = 0;
  543. omp_par::scan(elem_nds_cnt_far.begin(), elem_nds_dsp_far.begin(), Nelem);
  544. }
  545. fmm.SetSrcCoord("Src", X_far, Xn_far);
  546. fmm.SetTrgCoord("Trg", Xtrg);
  547. Profile::Toc();
  548. setup_far_flag = true;
  549. }
  550. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::SetupSelf() const {
  551. if (setup_self_flag) return;
  552. K_self.ReInit(0);
  553. SetupBasic();
  554. Profile::Tic("SetupSingular", &comm_, false, 6);
  555. const Long Nlst = elem_lst_map.size();
  556. Vector<Vector<Matrix<Real>>> K_self_(Nlst);
  557. for (Long i = 0; i < Nlst; i++) {
  558. const auto& name = elem_lst_name[i];
  559. const auto& elem_lst = elem_lst_map.at(name);
  560. const auto& elem_data = elem_data_map.at(name);
  561. elem_data.SelfInterac(K_self_[i], ker_, tol_, trg_normal_dot_prod_, elem_lst);
  562. }
  563. concat_vecs(K_self, K_self_);
  564. Profile::Toc();
  565. setup_self_flag = true;
  566. // TODO: skip SetupSelf when no on-surface targets.
  567. }
  568. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::SetupNear() const {
  569. if (setup_near_flag) return;
  570. Xtrg_near.ReInit(0);
  571. near_scatter_index.ReInit(0);
  572. near_trg_cnt.ReInit(0);
  573. near_trg_dsp.ReInit(0);
  574. near_elem_cnt.ReInit(0);
  575. near_elem_dsp.ReInit(0);
  576. K_near_cnt.ReInit(0);
  577. K_near_dsp.ReInit(0);
  578. K_near.ReInit(0);
  579. SetupBasic();
  580. SetupFar();
  581. SetupSelf();
  582. Profile::Tic("SetupNear", &comm_, true, 6);
  583. Profile::Tic("BuildNearLst", &comm_, true, 7);
  584. BuildNearList(Xtrg_near, Xn_trg_near, near_elem_cnt, near_elem_dsp, near_scatter_index, near_trg_cnt, near_trg_dsp, Xtrg, Xn_trg, X_far, dist_far, elem_nds_cnt_far, elem_nds_dsp_far, comm_);
  585. Profile::Toc();
  586. { // Set K_near_cnt, K_near_dsp, K_near
  587. const Integer KDIM1_ = (trg_normal_dot_prod_ ? KDIM1/COORD_DIM : KDIM1);
  588. const Long Nlst = elem_lst_map.size();
  589. const Long Nelem = near_elem_cnt.Dim();
  590. SCTL_ASSERT(Nelem == elem_nds_cnt.Dim());
  591. if (Nelem) { // Set K_near_cnt, K_near_dsp
  592. K_near_cnt.ReInit(Nelem);
  593. K_near_dsp.ReInit(Nelem);
  594. if (Nelem) K_near_dsp[0] = 0;
  595. for (Long i = 0; i < Nelem; i++) {
  596. K_near_cnt[i] = elem_nds_cnt[i]*near_elem_cnt[i];
  597. }
  598. omp_par::scan(K_near_cnt.begin(), K_near_dsp.begin(), Nelem);
  599. }
  600. if (Nelem) { // Set K_near
  601. K_near.ReInit((K_near_dsp[Nelem-1]+K_near_cnt[Nelem-1])*KDIM0*KDIM1_);
  602. for (Long i = 0; i < Nlst; i++) {
  603. const auto& name = elem_lst_name[i];
  604. const auto& elem_lst = elem_lst_map.at(name);
  605. const auto& elem_data = elem_data_map.at(name);
  606. //#pragma omp parallel for
  607. for (Long j = 0; j < elem_lst_cnt[i]; j++) {
  608. const Long elem_idx = elem_lst_dsp[i]+j;
  609. const Long Ntrg = near_elem_cnt[elem_idx];
  610. const Vector<Real> Xsurf_(elem_nds_cnt[elem_idx]*COORD_DIM, Xsurf.begin()+elem_nds_dsp[elem_idx]*COORD_DIM, false);
  611. Matrix<Real> K_near_(elem_nds_cnt[elem_idx]*KDIM0,near_elem_cnt[elem_idx]*KDIM1_, K_near.begin()+K_near_dsp[elem_idx]*KDIM0*KDIM1_, false);
  612. for (Long k = 0; k < Ntrg; k++) {
  613. Long min_Xt = -1, min_Xsurf = -1;
  614. const Vector<Real> Xt(COORD_DIM, Xtrg_near.begin()+(near_elem_dsp[elem_idx]+k)*COORD_DIM, false);
  615. const Vector<Real> Xn((trg_normal_dot_prod_ ? COORD_DIM : 0), Xn_trg_near.begin()+(near_elem_dsp[elem_idx]+k)*COORD_DIM, false);
  616. auto compute_min_dist2 = [](Long& min_idx, Long& min_idy, const Vector<Real>& X, const Vector<Real>& Y) {
  617. const Long Nx = X.Dim() / COORD_DIM;
  618. const Long Ny = Y.Dim() / COORD_DIM;
  619. Real min_r2 = -1;
  620. for (Long i = 0 ; i < Nx; i++) {
  621. for (Long j = 0 ; j < Ny; j++) {
  622. Real r2 = 0;
  623. for (Long k = 0; k < COORD_DIM; k++) {
  624. Real d = X[i*COORD_DIM+k] - Y[j*COORD_DIM+k];
  625. r2 += d*d;
  626. }
  627. if (min_r2<0 || r2<min_r2) {
  628. min_idx = i;
  629. min_idy = j;
  630. min_r2 = r2;
  631. }
  632. }
  633. }
  634. return min_r2;
  635. };
  636. Real trg_elem_dist2 = compute_min_dist2(min_Xt, min_Xsurf, Xt, Xsurf_);
  637. SCTL_ASSERT(min_Xt >= 0 && min_Xsurf >= 0);
  638. if (trg_elem_dist2 == 0) { // Set K_near0
  639. Matrix<Real> K_near0(K_self[elem_idx].Dim(0),K_self[elem_idx].Dim(1), K_self[elem_idx].begin(), false);
  640. for (Long l = 0; l < K_near0.Dim(0); l++) {
  641. for (Long k1 = 0; k1 < KDIM1_; k1++) {
  642. K_near_[l][k*KDIM1_+k1] = K_near0[l][min_Xsurf*KDIM1_+k1];
  643. }
  644. }
  645. } else {
  646. Matrix<Real> K_near0;
  647. elem_data.NearInterac(K_near0, Xt, Xn, ker_, tol_, j, elem_lst);
  648. for (Long l = 0; l < K_near0.Dim(0); l++) {
  649. for (Long k1 = 0; k1 < KDIM1_; k1++) {
  650. K_near_[l][k*KDIM1_+k1] = K_near0[l][k1];
  651. }
  652. }
  653. }
  654. }
  655. }
  656. }
  657. }
  658. for (Long i = 0; i < Nlst; i++) { // Subtract direct-interaction part from K_near
  659. const auto& elem_lst = elem_lst_map.at(elem_lst_name[i]);
  660. //#pragma omp parallel for
  661. for (Long j = 0; j < elem_lst_cnt[i]; j++) { // subtract direct sum
  662. const Long elem_idx = elem_lst_dsp[i]+j;
  663. const Long trg_cnt = near_elem_cnt[elem_idx];
  664. const Long trg_dsp = near_elem_dsp[elem_idx];
  665. const Vector<Real> Xtrg_near_(trg_cnt*COORD_DIM, Xtrg_near.begin()+trg_dsp*COORD_DIM, false);
  666. const Vector<Real> Xn_trg_near_((trg_normal_dot_prod_ ? trg_cnt*COORD_DIM : 0), Xn_trg_near.begin()+trg_dsp*COORD_DIM, false);
  667. if (!trg_cnt) continue;
  668. const Long far_src_cnt = elem_nds_cnt_far[elem_idx];
  669. const Long far_src_dsp = elem_nds_dsp_far[elem_idx];
  670. const Vector<Real> X (far_src_cnt*COORD_DIM, X_far.begin() + far_src_dsp*COORD_DIM, false);
  671. const Vector<Real> Xn(far_src_cnt*COORD_DIM, Xn_far.begin() + far_src_dsp*COORD_DIM, false);
  672. const Vector<Real> wts(far_src_cnt, wts_far.begin() + far_src_dsp, false);
  673. SCTL_ASSERT(K_near_cnt[elem_idx] == elem_nds_cnt[elem_idx]*trg_cnt);
  674. Matrix<Real> K_near_(elem_nds_cnt[elem_idx]*KDIM0, trg_cnt*KDIM1_, K_near.begin()+K_near_dsp[elem_idx]*KDIM0*KDIM1_, false);
  675. { // Set K_near_
  676. Matrix<Real> Mker(far_src_cnt*KDIM0, trg_cnt*KDIM1_);
  677. if (trg_normal_dot_prod_) {
  678. Matrix<Real> Mker_;
  679. constexpr Integer KDIM1_ = KDIM1/COORD_DIM;
  680. ker_.KernelMatrix(Mker_, Xtrg_near_, X, Xn);
  681. for (Long s = 0; s < far_src_cnt; s++) {
  682. for (Long k0 = 0; k0 < KDIM0; k0++) {
  683. for (Long t = 0; t < trg_cnt; t++) {
  684. for (Long k1 = 0; k1 < KDIM1_; k1++) {
  685. Mker[s*KDIM0+k0][t*KDIM1_+k1] = 0;
  686. for (Long l = 0; l < COORD_DIM; l++) {
  687. Mker[s*KDIM0+k0][t*KDIM1_+k1] += Mker_[s*KDIM0+k0][(t*KDIM1_+k1)*COORD_DIM+l] * wts[s] * Xn_trg_near_[t*COORD_DIM+l];
  688. }
  689. }
  690. }
  691. }
  692. }
  693. } else {
  694. ker_.KernelMatrix(Mker, Xtrg_near_, X, Xn);
  695. for (Long s = 0; s < far_src_cnt; s++) {
  696. for (Long k0 = 0; k0 < KDIM0; k0++) {
  697. for (Long t = 0; t < trg_cnt; t++) {
  698. for (Long k1 = 0; k1 < KDIM1; k1++) {
  699. Mker[s*KDIM0+k0][t*KDIM1+k1] *= wts[s];
  700. }
  701. }
  702. }
  703. }
  704. }
  705. Matrix<Real> K_direct;
  706. elem_lst->FarFieldDensityOperatorTranspose(K_direct, Mker, j);
  707. K_near_ -= K_direct;
  708. }
  709. }
  710. }
  711. }
  712. Profile::Toc();
  713. setup_near_flag = true;
  714. }
  715. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::ComputeFarField(Vector<Real>& U, const Vector<Real>& F) const {
  716. Profile::Tic("EvalFar", &comm_, true, 6);
  717. const Long Nsrc = X_far.Dim()/COORD_DIM;
  718. const Long Ntrg = Xtrg.Dim()/COORD_DIM;
  719. { // Set F_far
  720. const Long Nlst = elem_lst_map.size();
  721. if (F_far.Dim() != Nsrc*KDIM0) F_far.ReInit(Nsrc*KDIM0);
  722. for (Long i = 0; i < Nlst; i++) { // Init F_far
  723. Long elem_idx0 = elem_lst_dsp[i];
  724. Long elem_idx1 = elem_lst_dsp[i]+elem_lst_cnt[i];
  725. Long offset0 = (!elem_lst_cnt[i] ? 0 : elem_nds_dsp[elem_idx0]);
  726. Long offset1 = (!elem_lst_cnt[i] ? 0 : elem_nds_dsp[elem_idx1-1] + elem_nds_cnt[elem_idx1-1]);
  727. const Vector<Real> F_((offset1-offset0)*KDIM0, (Iterator<Real>)F.begin() + offset0*KDIM0, false);
  728. Long offset0_far = (!elem_lst_cnt[i] ? 0 : elem_nds_dsp_far[elem_idx0]);
  729. Long offset1_far = (!elem_lst_cnt[i] ? 0 : elem_nds_dsp_far[elem_idx1-1] + elem_nds_cnt_far[elem_idx1-1]);
  730. Vector<Real> F_far_((offset1_far-offset0_far)*KDIM0, F_far.begin() + offset0_far*KDIM0, false);
  731. elem_lst_map.at(elem_lst_name[i])->GetFarFieldDensity(F_far_, F_);
  732. }
  733. for (Long i = 0; i < Nsrc; i++) { // apply wts_far
  734. for (Long j = 0; j < KDIM0; j++) {
  735. F_far[i*KDIM0+j] *= wts_far[i];
  736. }
  737. }
  738. }
  739. fmm.SetSrcDensity("Src", F_far);
  740. const Integer KDIM1_ = (trg_normal_dot_prod_ ? KDIM1/COORD_DIM : KDIM1);
  741. if (U.Dim() != Ntrg*KDIM1_) {
  742. U.ReInit(Ntrg*KDIM1_);
  743. U.SetZero();
  744. }
  745. if (trg_normal_dot_prod_) {
  746. constexpr Integer KDIM1_ = KDIM1/COORD_DIM;
  747. Vector<Real> U_(Ntrg * KDIM1); U_.SetZero();
  748. fmm.Eval(U_, "Trg");
  749. for (Long i = 0; i < Ntrg; i++) {
  750. for (Long k = 0; k < KDIM1_; k++) {
  751. for (Long l = 0; l < COORD_DIM; l++) {
  752. U[i*KDIM1_+k] += U_[(i*KDIM1_+k)*COORD_DIM+l] * Xn_trg[i*COORD_DIM+l];
  753. }
  754. }
  755. }
  756. } else {
  757. fmm.Eval(U, "Trg");
  758. }
  759. Profile::Toc();
  760. }
  761. template <class Real, class Kernel> void BoundaryIntegralOp<Real,Kernel>::ComputeNearInterac(Vector<Real>& U, const Vector<Real>& F) const {
  762. const Integer KDIM1_ = (trg_normal_dot_prod_ ? KDIM1/COORD_DIM : KDIM1);
  763. Profile::Tic("EvalNear", &comm_, true, 6);
  764. const Long Ntrg = Xtrg.Dim()/COORD_DIM;
  765. const Long Nelem = near_elem_cnt.Dim();
  766. if (U.Dim() != Ntrg*KDIM1_) {
  767. U.ReInit(Ntrg*KDIM1_);
  768. U.SetZero();
  769. }
  770. Vector<Real> U_near(Nelem ? (near_elem_dsp[Nelem-1]+near_elem_cnt[Nelem-1])*KDIM1_ : 0);
  771. for (Long elem_idx = 0; elem_idx < Nelem; elem_idx++) { // compute near-interactions
  772. const Long src_dof = elem_nds_cnt[elem_idx]*KDIM0;
  773. const Long trg_dof = near_elem_cnt[elem_idx]*KDIM1_;
  774. if (src_dof==0 || trg_dof == 0) continue;
  775. const Matrix<Real> K_near_(src_dof, trg_dof, K_near.begin() + K_near_dsp[elem_idx]*KDIM0*KDIM1_, false);
  776. const Matrix<Real> F_(1, src_dof, (Iterator<Real>)F.begin() + elem_nds_dsp[elem_idx]*KDIM0, false);
  777. Matrix<Real> U_(1, trg_dof, U_near.begin() + near_elem_dsp[elem_idx]*KDIM1_, false);
  778. Matrix<Real>::GEMM(U_, F_, K_near_);
  779. }
  780. SCTL_ASSERT(near_trg_cnt.Dim() == Ntrg);
  781. Profile::Tic("Comm", &comm_, true, 7);
  782. comm_.ScatterForward(U_near, near_scatter_index);
  783. Profile::Toc();
  784. //#pragma omp parallel for schedule(static)
  785. for (Long i = 0; i < Ntrg; i++) { // Accumulate result to U
  786. Long near_cnt = near_trg_cnt[i];
  787. Long near_dsp = near_trg_dsp[i];
  788. for (Long j = 0; j < near_cnt; j++) {
  789. for (Long k = 0; k < KDIM1_; k++) {
  790. U[i*KDIM1_+k] += U_near[(near_dsp+j)*KDIM1_+k];
  791. }
  792. }
  793. }
  794. Profile::Toc();
  795. }
  796. }