sph_harm.txx 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611
  1. #include SCTL_INCLUDE(legendre_rule.hpp)
  2. // TODO: Replace work vectors with dynamic-arrays
  3. namespace SCTL_NAMESPACE {
  4. template <class Real> void SphericalHarmonics<Real>::Grid2SHC(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& S, SHCArrange arrange){
  5. const auto& Mf = OpFourierInv(Np);
  6. assert(Mf.Dim(0) == Np);
  7. const std::vector<Matrix<Real>>& Ml = SphericalHarmonics<Real>::MatLegendreInv(Nt-1,p1);
  8. assert((Long)Ml.size() == p1+1);
  9. Long N = X.Dim() / (Np*Nt);
  10. assert(X.Dim() == N*Np*Nt);
  11. Vector<Real> B0((2*p1+1) * N*Nt);
  12. #pragma omp parallel
  13. { // B0 <-- Transpose(FFT(X))
  14. Integer tid=omp_get_thread_num();
  15. Integer omp_p=omp_get_num_threads();
  16. Long a=(tid+0)*N*Nt/omp_p;
  17. Long b=(tid+1)*N*Nt/omp_p;
  18. Vector<Real> buff(Mf.Dim(1));
  19. Long fft_coeff_len = std::min(buff.Dim(), 2*p1+2);
  20. Matrix<Real> B0_(2*p1+1, N*Nt, B0.begin(), false);
  21. const Matrix<Real> MX(N * Nt, Np, (Iterator<Real>)X.begin(), false);
  22. for (Long i = a; i < b; i++) {
  23. { // buff <-- FFT(Xi)
  24. const Vector<Real> Xi(Np, (Iterator<Real>)X.begin() + Np * i, false);
  25. Mf.Execute(Xi, buff);
  26. }
  27. { // B0 <-- Transpose(buff)
  28. B0_[0][i] = buff[0]; // skipping buff[1] == 0
  29. for (Long j = 2; j < fft_coeff_len; j++) B0_[j-1][i] = buff[j];
  30. for (Long j = fft_coeff_len; j < 2*p1+2; j++) B0_[j-1][i] = 0;
  31. }
  32. }
  33. }
  34. Vector<Real> B1(N*(p1+1)*(p1+1));
  35. #pragma omp parallel
  36. { // Evaluate Legendre polynomial
  37. Integer tid=omp_get_thread_num();
  38. Integer omp_p=omp_get_num_threads();
  39. Long offset0=0;
  40. Long offset1=0;
  41. for (Long i = 0; i < p1+1; i++) {
  42. Long N_ = (i==0 ? N : 2*N);
  43. Matrix<Real> Min (N_, Nt , B0.begin()+offset0, false);
  44. Matrix<Real> Mout(N_, p1+1-i, B1.begin()+offset1, false);
  45. { // Mout = Min * Ml[i] // split between threads
  46. Long a=(tid+0)*N_/omp_p;
  47. Long b=(tid+1)*N_/omp_p;
  48. if (a < b) {
  49. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  50. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  51. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  52. }
  53. }
  54. offset0+=Min .Dim(0)*Min .Dim(1);
  55. offset1+=Mout.Dim(0)*Mout.Dim(1);
  56. }
  57. assert(offset0 == B0.Dim());
  58. assert(offset1 == B1.Dim());
  59. }
  60. B1 *= 1 / sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  61. if (arrange == SHCArrange::ALL) { // S <-- Rearrange(B1)
  62. Long M = 2*(p1+1)*(p1+1);
  63. if(S.Dim() != N * M) S.ReInit(N * M);
  64. #pragma omp parallel
  65. { // S <-- Rearrange(B1)
  66. Integer tid=omp_get_thread_num();
  67. Integer omp_p=omp_get_num_threads();
  68. Long a=(tid+0)*N/omp_p;
  69. Long b=(tid+1)*N/omp_p;
  70. for (Long i = a; i < b; i++) {
  71. Long offset = 0;
  72. for (Long j = 0; j < p1+1; j++) {
  73. Long len = p1+1 - j;
  74. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  75. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  76. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 0;
  77. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  78. offset += len;
  79. }
  80. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  81. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  82. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  83. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  84. offset += len;
  85. } else {
  86. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  87. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = 0;
  88. }
  89. }
  90. }
  91. }
  92. }
  93. if (arrange == SHCArrange::ROW_MAJOR) { // S <-- Rearrange(B1)
  94. Long M = (p1+1)*(p1+2);
  95. if(S.Dim() != N * M) S.ReInit(N * M);
  96. #pragma omp parallel
  97. { // S <-- Rearrange(B1)
  98. Integer tid=omp_get_thread_num();
  99. Integer omp_p=omp_get_num_threads();
  100. Long a=(tid+0)*N/omp_p;
  101. Long b=(tid+1)*N/omp_p;
  102. for (Long i = a; i < b; i++) {
  103. Long offset = 0;
  104. for (Long j = 0; j < p1+1; j++) {
  105. Long len = p1+1 - j;
  106. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  107. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  108. Iterator<Real> S_ = S .begin() + i*M + 0;
  109. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  110. offset += len;
  111. }
  112. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  113. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  114. Iterator<Real> S_ = S .begin() + i*M + 1;
  115. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  116. offset += len;
  117. } else {
  118. Iterator<Real> S_ = S .begin() + i*M + 1;
  119. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = 0;
  120. }
  121. }
  122. }
  123. }
  124. }
  125. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // S <-- Rearrange(B1)
  126. Long M = (p1+1)*(p1+1);
  127. if(S.Dim() != N * M) S.ReInit(N * M);
  128. #pragma omp parallel
  129. { // S <-- Rearrange(B1)
  130. Integer tid=omp_get_thread_num();
  131. Integer omp_p=omp_get_num_threads();
  132. Long a=(tid+0)*N/omp_p;
  133. Long b=(tid+1)*N/omp_p;
  134. for (Long i = a; i < b; i++) {
  135. Long offset = 0;
  136. for (Long j = 0; j < p1+1; j++) {
  137. Long len = p1+1 - j;
  138. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  139. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  140. Iterator<Real> S_ = S .begin() + i*M + offset;
  141. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  142. offset += len;
  143. }
  144. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  145. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  146. Iterator<Real> S_ = S .begin() + i*M + offset;
  147. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  148. offset += len;
  149. }
  150. }
  151. }
  152. }
  153. }
  154. }
  155. template <class Real> void SphericalHarmonics<Real>::SHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_phi, Vector<Real>* X_theta){
  156. const auto& Mf = OpFourier(Np);
  157. assert(Mf.Dim(1) == Np);
  158. const std::vector<Matrix<Real>>& Ml =SphericalHarmonics<Real>::MatLegendre (p0,Nt-1);
  159. const std::vector<Matrix<Real>>& Mdl=SphericalHarmonics<Real>::MatLegendreGrad(p0,Nt-1);
  160. assert((Long)Ml .size() == p0+1);
  161. assert((Long)Mdl.size() == p0+1);
  162. Long M, N;
  163. { // Set M, N
  164. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  165. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  166. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  167. N = S.Dim() / M;
  168. assert(S.Dim() == N * M);
  169. }
  170. Vector<Real> B0(N*(p0+1)*(p0+1));
  171. if (arrange == SHCArrange::ALL) { // B0 <-- Rearrange(S)
  172. #pragma omp parallel
  173. { // B0 <-- Rearrange(S)
  174. Integer tid=omp_get_thread_num();
  175. Integer omp_p=omp_get_num_threads();
  176. Long a=(tid+0)*N/omp_p;
  177. Long b=(tid+1)*N/omp_p;
  178. for (Long i = a; i < b; i++) {
  179. Long offset = 0;
  180. for (Long j = 0; j < p0+1; j++) {
  181. Long len = p0+1 - j;
  182. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  183. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  184. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 0;
  185. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  186. offset += len;
  187. }
  188. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  189. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  190. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 1;
  191. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  192. offset += len;
  193. }
  194. }
  195. }
  196. }
  197. }
  198. if (arrange == SHCArrange::ROW_MAJOR) { // B0 <-- Rearrange(S)
  199. #pragma omp parallel
  200. { // B0 <-- Rearrange(S)
  201. Integer tid=omp_get_thread_num();
  202. Integer omp_p=omp_get_num_threads();
  203. Long a=(tid+0)*N/omp_p;
  204. Long b=(tid+1)*N/omp_p;
  205. for (Long i = a; i < b; i++) {
  206. Long offset = 0;
  207. for (Long j = 0; j < p0+1; j++) {
  208. Long len = p0+1 - j;
  209. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  210. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  211. ConstIterator<Real> S_ = S .begin() + i*M + 0;
  212. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  213. offset += len;
  214. }
  215. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  216. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  217. ConstIterator<Real> S_ = S .begin() + i*M + 1;
  218. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  219. offset += len;
  220. }
  221. }
  222. }
  223. }
  224. }
  225. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // B0 <-- Rearrange(S)
  226. #pragma omp parallel
  227. { // B0 <-- Rearrange(S)
  228. Integer tid=omp_get_thread_num();
  229. Integer omp_p=omp_get_num_threads();
  230. Long a=(tid+0)*N/omp_p;
  231. Long b=(tid+1)*N/omp_p;
  232. for (Long i = a; i < b; i++) {
  233. Long offset = 0;
  234. for (Long j = 0; j < p0+1; j++) {
  235. Long len = p0+1 - j;
  236. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  237. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  238. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  239. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  240. offset += len;
  241. }
  242. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  243. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  244. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  245. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  246. offset += len;
  247. }
  248. }
  249. }
  250. }
  251. }
  252. B0 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  253. if(X && X ->Dim()!=N*Np*Nt) X ->ReInit(N*Np*Nt);
  254. if(X_theta && X_theta->Dim()!=N*Np*Nt) X_theta->ReInit(N*Np*Nt);
  255. if(X_phi && X_phi ->Dim()!=N*Np*Nt) X_phi ->ReInit(N*Np*Nt);
  256. Vector<Real> B1(N*(2*p0+1)*Nt);
  257. if(X || X_phi){
  258. #pragma omp parallel
  259. { // Evaluate Legendre polynomial
  260. Integer tid=omp_get_thread_num();
  261. Integer omp_p=omp_get_num_threads();
  262. Long offset0=0;
  263. Long offset1=0;
  264. for(Long i=0;i<p0+1;i++){
  265. Long N_ = (i==0 ? N : 2*N);
  266. Matrix<Real> Min (N_, p0+1-i, B0.begin()+offset0, false);
  267. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  268. { // Mout = Min * Ml[i] // split between threads
  269. Long a=(tid+0)*N_/omp_p;
  270. Long b=(tid+1)*N_/omp_p;
  271. if(a<b){
  272. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  273. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  274. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  275. }
  276. }
  277. offset0+=Min .Dim(0)*Min .Dim(1);
  278. offset1+=Mout.Dim(0)*Mout.Dim(1);
  279. }
  280. }
  281. #pragma omp parallel
  282. { // Transpose and evaluate Fourier
  283. Integer tid=omp_get_thread_num();
  284. Integer omp_p=omp_get_num_threads();
  285. Long a=(tid+0)*N*Nt/omp_p;
  286. Long b=(tid+1)*N*Nt/omp_p;
  287. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  288. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  289. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  290. for (Long i = a; i < b; i++) {
  291. { // buff <-- Transpose(B1)
  292. buff[0] = B1_[0][i];
  293. buff[1] = 0;
  294. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  295. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  296. }
  297. { // X <-- FFT(buff)
  298. Vector<Real> Xi(Np, X->begin() + Np * i, false);
  299. Mf.Execute(buff, Xi);
  300. }
  301. if(X_phi){ // Evaluate Fourier gradient
  302. { // buff <-- Transpose(B1)
  303. buff[0] = 0;
  304. buff[1] = 0;
  305. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  306. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  307. for (Long j = 1; j < buff.Dim()/2; j++) {
  308. Real x = buff[2*j+0];
  309. Real y = buff[2*j+1];
  310. buff[2*j+0] = -j*y;
  311. buff[2*j+1] = j*x;
  312. }
  313. }
  314. { // X_phi <-- FFT(buff)
  315. Vector<Real> Xi(Np, X_phi->begin() + Np * i, false);
  316. Mf.Execute(buff, Xi);
  317. }
  318. }
  319. }
  320. }
  321. }
  322. if(X_theta){
  323. #pragma omp parallel
  324. { // Evaluate Legendre gradient
  325. Integer tid=omp_get_thread_num();
  326. Integer omp_p=omp_get_num_threads();
  327. Long offset0=0;
  328. Long offset1=0;
  329. for(Long i=0;i<p0+1;i++){
  330. Long N_ = (i==0 ? N : 2*N);
  331. Matrix<Real> Min (N_, p0+1-i, B0.begin()+offset0, false);
  332. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  333. { // Mout = Min * Mdl[i] // split between threads
  334. Long a=(tid+0)*N_/omp_p;
  335. Long b=(tid+1)*N_/omp_p;
  336. if(a<b){
  337. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  338. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  339. Matrix<Real>::GEMM(Mout_,Min_,Mdl[i]);
  340. }
  341. }
  342. offset0+=Min .Dim(0)*Min .Dim(1);
  343. offset1+=Mout.Dim(0)*Mout.Dim(1);
  344. }
  345. }
  346. #pragma omp parallel
  347. { // Transpose and evaluate Fourier
  348. Integer tid=omp_get_thread_num();
  349. Integer omp_p=omp_get_num_threads();
  350. Long a=(tid+0)*N*Nt/omp_p;
  351. Long b=(tid+1)*N*Nt/omp_p;
  352. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  353. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  354. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  355. for (Long i = a; i < b; i++) {
  356. { // buff <-- Transpose(B1)
  357. buff[0] = B1_[0][i];
  358. buff[1] = 0;
  359. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  360. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  361. }
  362. { // Xi <-- FFT(buff)
  363. Vector<Real> Xi(Np, X_theta->begin() + Np * i, false);
  364. Mf.Execute(buff, Xi);
  365. }
  366. }
  367. }
  368. }
  369. }
  370. template <class Real> void SphericalHarmonics<Real>::SHC2Pole(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& P){
  371. Vector<Real> QP[2];
  372. { // Set QP // TODO: store these weights
  373. Vector<Real> x(1), alp;
  374. const Real SQRT2PI = sqrt<Real>(4 * const_pi<Real>());
  375. for (Long i = 0; i < 2; i++) {
  376. x = (i ? -1 : 1);
  377. LegPoly(alp, x, p0);
  378. QP[i].ReInit(p0 + 1, alp.begin());
  379. QP[i] *= SQRT2PI;
  380. }
  381. }
  382. Long M, N;
  383. { // Set M, N
  384. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  385. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  386. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  387. N = S.Dim() / M;
  388. assert(S.Dim() == N * M);
  389. }
  390. if(P.Dim() != N * 2) P.ReInit(N * 2);
  391. if (arrange == SHCArrange::ALL) {
  392. #pragma omp parallel
  393. { // Compute pole
  394. Integer tid = omp_get_thread_num();
  395. Integer omp_p = omp_get_num_threads();
  396. Long a = (tid + 0) * N / omp_p;
  397. Long b = (tid + 1) * N / omp_p;
  398. for (Long i = a; i < b; i++) {
  399. Real P_[2] = {0, 0};
  400. for (Long j = 0; j < p0 + 1; j++) {
  401. P_[0] += S[i*M + j*(p0+1)*2] * QP[0][j];
  402. P_[1] += S[i*M + j*(p0+1)*2] * QP[1][j];
  403. }
  404. P[2*i+0] = P_[0];
  405. P[2*i+1] = P_[1];
  406. }
  407. }
  408. }
  409. if (arrange == SHCArrange::ROW_MAJOR) {
  410. #pragma omp parallel
  411. { // Compute pole
  412. Integer tid = omp_get_thread_num();
  413. Integer omp_p = omp_get_num_threads();
  414. Long a = (tid + 0) * N / omp_p;
  415. Long b = (tid + 1) * N / omp_p;
  416. for (Long i = a; i < b; i++) {
  417. Long idx = 0;
  418. Real P_[2] = {0, 0};
  419. for (Long j = 0; j < p0 + 1; j++) {
  420. P_[0] += S[i*M+idx] * QP[0][j];
  421. P_[1] += S[i*M+idx] * QP[1][j];
  422. idx += 2*(j+1);
  423. }
  424. P[2*i+0] = P_[0];
  425. P[2*i+1] = P_[1];
  426. }
  427. }
  428. }
  429. if (arrange == SHCArrange::COL_MAJOR_NONZERO) {
  430. #pragma omp parallel
  431. { // Compute pole
  432. Integer tid = omp_get_thread_num();
  433. Integer omp_p = omp_get_num_threads();
  434. Long a = (tid + 0) * N / omp_p;
  435. Long b = (tid + 1) * N / omp_p;
  436. for (Long i = a; i < b; i++) {
  437. Real P_[2] = {0, 0};
  438. for (Long j = 0; j < p0 + 1; j++) {
  439. P_[0] += S[i*M+j] * QP[0][j];
  440. P_[1] += S[i*M+j] * QP[1][j];
  441. }
  442. P[2*i+0] = P_[0];
  443. P[2*i+1] = P_[1];
  444. }
  445. }
  446. }
  447. }
  448. template <class Real> void SphericalHarmonics<Real>::WriteVTK(const char* fname, const Vector<Real>* S, const Vector<Real>* v_ptr, SHCArrange arrange, Long p0, Long p1, Real period, const Comm& comm){
  449. typedef double VTKReal;
  450. Vector<Real> SS;
  451. if (S == nullptr) {
  452. Integer p = 2;
  453. Integer Ncoeff = (p + 1) * (p + 1);
  454. Vector<Real> SSS(COORD_DIM * Ncoeff), SSS_grid;
  455. SSS.SetZero();
  456. SSS[1+0*p+0*Ncoeff] = sqrt<Real>(2.0)/sqrt<Real>(3.0);
  457. SSS[1+1*p+1*Ncoeff] = 1/sqrt<Real>(3.0);
  458. SSS[1+2*p+2*Ncoeff] = 1/sqrt<Real>(3.0);
  459. SphericalHarmonics<Real>::SHC2Grid(SSS, SHCArrange::COL_MAJOR_NONZERO, p, p+1, 2*p+2, &SSS_grid);
  460. SphericalHarmonics<Real>::Grid2SHC(SSS_grid, p+1, 2*p+2, p0, SS, arrange);
  461. S = &SS;
  462. }
  463. Vector<Real> X, Xp, V, Vp;
  464. { // Upsample X
  465. const Vector<Real>& X0=*S;
  466. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &X);
  467. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Xp);
  468. }
  469. if(v_ptr){ // Upsample V
  470. const Vector<Real>& X0=*v_ptr;
  471. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &V);
  472. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Vp);
  473. }
  474. std::vector<VTKReal> point_coord;
  475. std::vector<VTKReal> point_value;
  476. std::vector<int32_t> poly_connect;
  477. std::vector<int32_t> poly_offset;
  478. { // Set point_coord, point_value, poly_connect
  479. Long N_ves = X.Dim()/(2*p1*(p1+1)*COORD_DIM); // Number of vesicles
  480. assert(Xp.Dim() == N_ves*2*COORD_DIM);
  481. for(Long k=0;k<N_ves;k++){ // Set point_coord
  482. Real C[COORD_DIM]={0,0,0};
  483. if(period>0){
  484. for(Integer l=0;l<COORD_DIM;l++) C[l]=0;
  485. for(Long i=0;i<p1+1;i++){
  486. for(Long j=0;j<2*p1;j++){
  487. for(Integer l=0;l<COORD_DIM;l++){
  488. C[l]+=X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))];
  489. }
  490. }
  491. }
  492. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[0+2*(l+k*COORD_DIM)];
  493. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[1+2*(l+k*COORD_DIM)];
  494. for(Integer l=0;l<COORD_DIM;l++) C[l]/=2*p1*(p1+1)+2;
  495. for(Integer l=0;l<COORD_DIM;l++) C[l]=(round(C[l]/period))*period;
  496. }
  497. for(Long i=0;i<p1+1;i++){
  498. for(Long j=0;j<2*p1;j++){
  499. for(Integer l=0;l<COORD_DIM;l++){
  500. point_coord.push_back(X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))]-C[l]);
  501. }
  502. }
  503. }
  504. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[0+2*(l+k*COORD_DIM)]-C[l]);
  505. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[1+2*(l+k*COORD_DIM)]-C[l]);
  506. }
  507. if(v_ptr) {
  508. Long data__dof = V.Dim() / (2*p1*(p1+1));
  509. for(Long k=0;k<N_ves;k++){ // Set point_value
  510. for(Long i=0;i<p1+1;i++){
  511. for(Long j=0;j<2*p1;j++){
  512. for(Long l=0;l<data__dof;l++){
  513. point_value.push_back(V[j+2*p1*(i+(p1+1)*(l+k*data__dof))]);
  514. }
  515. }
  516. }
  517. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[0+2*(l+k*data__dof)]);
  518. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[1+2*(l+k*data__dof)]);
  519. }
  520. }
  521. for(Long k=0;k<N_ves;k++){
  522. for(Long j=0;j<2*p1;j++){
  523. Long i0= 0;
  524. Long i1=p1;
  525. Long j0=((j+0) );
  526. Long j1=((j+1)%(2*p1));
  527. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+0);
  528. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  529. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  530. poly_offset.push_back(poly_connect.size());
  531. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+1);
  532. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  533. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  534. poly_offset.push_back(poly_connect.size());
  535. }
  536. for(Long i=0;i<p1;i++){
  537. for(Long j=0;j<2*p1;j++){
  538. Long i0=((i+0) );
  539. Long i1=((i+1) );
  540. Long j0=((j+0) );
  541. Long j1=((j+1)%(2*p1));
  542. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  543. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  544. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  545. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  546. poly_offset.push_back(poly_connect.size());
  547. }
  548. }
  549. }
  550. }
  551. Integer np = comm.Size();
  552. Integer myrank = comm.Rank();
  553. std::vector<VTKReal>& coord=point_coord;
  554. std::vector<VTKReal>& value=point_value;
  555. std::vector<int32_t>& connect=poly_connect;
  556. std::vector<int32_t>& offset=poly_offset;
  557. Long pt_cnt=coord.size()/COORD_DIM;
  558. Long poly_cnt=poly_offset.size();
  559. // Open file for writing.
  560. std::stringstream vtufname;
  561. vtufname<<fname<<"_"<<std::setfill('0')<<std::setw(6)<<myrank<<".vtp";
  562. std::ofstream vtufile;
  563. vtufile.open(vtufname.str().c_str());
  564. if(vtufile.fail()) return;
  565. bool isLittleEndian;
  566. { // Set isLittleEndian
  567. uint16_t number = 0x1;
  568. uint8_t *numPtr = (uint8_t*)&number;
  569. isLittleEndian=(numPtr[0] == 1);
  570. }
  571. // Proceed to write to file.
  572. Long data_size=0;
  573. vtufile<<"<?xml version=\"1.0\"?>\n";
  574. if(isLittleEndian) vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"LittleEndian\">\n";
  575. else vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"BigEndian\">\n";
  576. //===========================================================================
  577. vtufile<<" <PolyData>\n";
  578. vtufile<<" <Piece NumberOfPoints=\""<<pt_cnt<<"\" NumberOfVerts=\"0\" NumberOfLines=\"0\" NumberOfStrips=\"0\" NumberOfPolys=\""<<poly_cnt<<"\">\n";
  579. //---------------------------------------------------------------------------
  580. vtufile<<" <Points>\n";
  581. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  582. data_size+=sizeof(uint32_t)+coord.size()*sizeof(VTKReal);
  583. vtufile<<" </Points>\n";
  584. //---------------------------------------------------------------------------
  585. if(value.size()){ // value
  586. vtufile<<" <PointData>\n";
  587. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  588. data_size+=sizeof(uint32_t)+value.size()*sizeof(VTKReal);
  589. vtufile<<" </PointData>\n";
  590. }
  591. //---------------------------------------------------------------------------
  592. vtufile<<" <Polys>\n";
  593. vtufile<<" <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  594. data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t);
  595. vtufile<<" <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  596. data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t);
  597. vtufile<<" </Polys>\n";
  598. //---------------------------------------------------------------------------
  599. vtufile<<" </Piece>\n";
  600. vtufile<<" </PolyData>\n";
  601. //===========================================================================
  602. vtufile<<" <AppendedData encoding=\"raw\">\n";
  603. vtufile<<" _";
  604. int32_t block_size;
  605. block_size=coord.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord.size()*sizeof(VTKReal));
  606. if(value.size()){ // value
  607. block_size=value.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value.size()*sizeof(VTKReal));
  608. }
  609. block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t));
  610. block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t));
  611. vtufile<<"\n";
  612. vtufile<<" </AppendedData>\n";
  613. //===========================================================================
  614. vtufile<<"</VTKFile>\n";
  615. vtufile.close();
  616. if(myrank) return;
  617. std::stringstream pvtufname;
  618. pvtufname<<fname<<".pvtp";
  619. std::ofstream pvtufile;
  620. pvtufile.open(pvtufname.str().c_str());
  621. if(pvtufile.fail()) return;
  622. pvtufile<<"<?xml version=\"1.0\"?>\n";
  623. pvtufile<<"<VTKFile type=\"PPolyData\">\n";
  624. pvtufile<<" <PPolyData GhostLevel=\"0\">\n";
  625. pvtufile<<" <PPoints>\n";
  626. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\"/>\n";
  627. pvtufile<<" </PPoints>\n";
  628. if(value.size()){ // value
  629. pvtufile<<" <PPointData>\n";
  630. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\"/>\n";
  631. pvtufile<<" </PPointData>\n";
  632. }
  633. {
  634. // Extract filename from path.
  635. std::stringstream vtupath;
  636. vtupath<<'/'<<fname;
  637. std::string pathname = vtupath.str();
  638. auto found = pathname.find_last_of("/\\");
  639. std::string fname_ = pathname.substr(found+1);
  640. for(Integer i=0;i<np;i++) pvtufile<<" <Piece Source=\""<<fname_<<"_"<<std::setfill('0')<<std::setw(6)<<i<<".vtp\"/>\n";
  641. }
  642. pvtufile<<" </PPolyData>\n";
  643. pvtufile<<"</VTKFile>\n";
  644. pvtufile.close();
  645. }
  646. template <class Real> void SphericalHarmonics<Real>::LegPolyDeriv(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  647. Long N = X.Dim();
  648. Long Npoly = (degree + 1) * (degree + 2) / 2;
  649. if (poly_val.Dim() != N * Npoly) {
  650. poly_val.ReInit(N * Npoly);
  651. }
  652. Vector<Real> leg_poly(Npoly * N);
  653. LegPoly(leg_poly, X, degree);
  654. for(Long m=0;m<=degree;m++){
  655. for(Long n=0;n<=degree;n++) if(m<=n){
  656. const Real* Pn =&leg_poly[0];
  657. const Real* Pn_=&leg_poly[0];
  658. if((m+0)<=(n+0)) Pn =&leg_poly[N*(((degree*2-abs(m+0)+1)*abs(m+0))/2+(n+0))];
  659. if((m+1)<=(n+0)) Pn_=&leg_poly[N*(((degree*2-abs(m+1)+1)*abs(m+1))/2+(n+0))];
  660. Real* Hn =&poly_val[N*(((degree*2-abs(m+0)+1)*abs(m+0))/2+(n+0))];
  661. Real c1=(abs(m+0)<=(n+0)?1.0:0)*m;
  662. Real c2=(abs(m+1)<=(n+0)?1.0:0)*sqrt(n+m+1)*sqrt(n>m?n-m:1);
  663. for(Long i=0;i<N;i++){
  664. Hn[i]=-(c1*X[i]*Pn[i]+c2*sqrt(1-X[i]*X[i])*Pn_[i])/sqrt(1-X[i]*X[i]);
  665. }
  666. }
  667. }
  668. }
  669. template <class Real> void SphericalHarmonics<Real>::LegPoly(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  670. Long N = X.Dim();
  671. Long Npoly = (degree + 1) * (degree + 2) / 2;
  672. if (poly_val.Dim() != Npoly * N) {
  673. poly_val.ReInit(Npoly * N);
  674. }
  675. Real fact=1.0/(Real)sqrt(4*M_PI);
  676. std::vector<Real> u(N);
  677. for(Long n=0;n<N;n++){
  678. u[n]=sqrt(1-X[n]*X[n]);
  679. if(X[n]*X[n]>1.0) u[n]=0;
  680. poly_val[n]=fact;
  681. }
  682. Long idx = 0;
  683. Long idx_nxt = 0;
  684. for(Long i=1;i<=degree;i++){
  685. idx_nxt += N*(degree-i+2);
  686. Real c=(i==1?sqrt(3.0/2.0):1);
  687. if(i>1)c*=sqrt((Real)(2*i+1)/(2*i));
  688. for(Long n=0;n<N;n++){
  689. poly_val[idx_nxt+n]=-poly_val[idx+n]*u[n]*c;
  690. }
  691. idx = idx_nxt;
  692. }
  693. idx = 0;
  694. for(Long m=0;m<degree;m++){
  695. for(Long n=0;n<N;n++){
  696. Real pmm=0;
  697. Real pmmp1=poly_val[idx+n];
  698. Real pll;
  699. for(Long ll=m+1;ll<=degree;ll++){
  700. Real a=sqrt(((Real)(2*ll-1)*(2*ll+1))/((ll-m)*(ll+m)));
  701. Real b=sqrt(((Real)(2*ll+1)*(ll+m-1)*(ll-m-1))/((ll-m)*(ll+m)*(2*ll-3)));
  702. pll=X[n]*a*pmmp1-b*pmm;
  703. pmm=pmmp1;
  704. pmmp1=pll;
  705. poly_val[idx+N*(ll-m)+n]=pll;
  706. }
  707. }
  708. idx+=N*(degree-m+1);
  709. }
  710. }
  711. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreNodes(Long p){
  712. assert(p<SCTL_SHMAXDEG);
  713. Vector<Real>& Qx=MatrixStore().Qx_[p];
  714. if(!Qx.Dim()){
  715. Vector<double> qx1(p+1);
  716. Vector<double> qw1(p+1);
  717. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  718. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  719. if (Qx.Dim() != p+1) Qx.ReInit(p+1);
  720. for (Long i = 0; i < p + 1; i++) Qx[i] = -qx1[i];
  721. }
  722. return Qx;
  723. }
  724. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreWeights(Long p){
  725. assert(p<SCTL_SHMAXDEG);
  726. Vector<Real>& Qw=MatrixStore().Qw_[p];
  727. if(!Qw.Dim()){
  728. Vector<double> qx1(p+1);
  729. Vector<double> qw1(p+1);
  730. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  731. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  732. if (Qw.Dim() != p+1) Qw.ReInit(p+1);
  733. for (Long i = 0; i < p + 1; i++) Qw[i] = qw1[i];
  734. }
  735. return Qw;
  736. }
  737. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::SingularWeights(Long p1){
  738. assert(p1<SCTL_SHMAXDEG);
  739. Vector<Real>& Sw=MatrixStore().Sw_[p1];
  740. if(!Sw.Dim()){
  741. const Vector<Real>& qx1 = LegendreNodes(p1);
  742. const Vector<Real>& qw1 = LegendreWeights(p1);
  743. std::vector<Real> Yf(p1+1,0);
  744. { // Set Yf
  745. Vector<Real> x0(1); x0=1.0;
  746. Vector<Real> alp0((p1+1)*(p1+2)/2);
  747. LegPoly(alp0, x0, p1);
  748. Vector<Real> alp((p1+1) * (p1+1)*(p1+2)/2);
  749. LegPoly(alp, qx1, p1);
  750. for(Long j=0;j<p1+1;j++){
  751. for(Long i=0;i<p1+1;i++){
  752. Yf[i]+=4*M_PI/(2*j+1) * alp0[j] * alp[j*(p1+1)+i];
  753. }
  754. }
  755. }
  756. Sw.ReInit(p1+1);
  757. for(Long i=0;i<p1+1;i++){
  758. Sw[i]=(qw1[i]*M_PI/p1)*Yf[i]/cos(acos(qx1[i])/2);
  759. }
  760. }
  761. return Sw;
  762. }
  763. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourier(Long p0, Long p1){
  764. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  765. Matrix<Real>& Mf =MatrixStore().Mf_ [p0*SCTL_SHMAXDEG+p1];
  766. if(!Mf.Dim(0)){
  767. const Real SQRT2PI=sqrt(2*M_PI);
  768. { // Set Mf
  769. Matrix<Real> M(2*p0,2*p1);
  770. for(Long j=0;j<2*p1;j++){
  771. M[0][j]=SQRT2PI*1.0;
  772. for(Long k=1;k<p0;k++){
  773. M[2*k-1][j]=SQRT2PI*cos(j*k*M_PI/p1);
  774. M[2*k-0][j]=SQRT2PI*sin(j*k*M_PI/p1);
  775. }
  776. M[2*p0-1][j]=SQRT2PI*cos(j*p0*M_PI/p1);
  777. }
  778. Mf=M;
  779. }
  780. }
  781. return Mf;
  782. }
  783. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierInv(Long p0, Long p1){
  784. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  785. Matrix<Real>& Mf =MatrixStore().Mfinv_ [p0*SCTL_SHMAXDEG+p1];
  786. if(!Mf.Dim(0)){
  787. const Real INVSQRT2PI=1.0/sqrt(2*M_PI)/p0;
  788. { // Set Mf
  789. Matrix<Real> M(2*p0,2*p1);
  790. M.SetZero();
  791. if(p1>p0) p1=p0;
  792. for(Long j=0;j<2*p0;j++){
  793. M[j][0]=INVSQRT2PI*0.5;
  794. for(Long k=1;k<p1;k++){
  795. M[j][2*k-1]=INVSQRT2PI*cos(j*k*M_PI/p0);
  796. M[j][2*k-0]=INVSQRT2PI*sin(j*k*M_PI/p0);
  797. }
  798. M[j][2*p1-1]=INVSQRT2PI*cos(j*p1*M_PI/p0);
  799. }
  800. if(p1==p0) for(Long j=0;j<2*p0;j++) M[j][2*p1-1]*=0.5;
  801. Mf=M;
  802. }
  803. }
  804. return Mf;
  805. }
  806. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourier(Long Np){
  807. assert(Np<SCTL_SHMAXDEG);
  808. auto& Mf =MatrixStore().Mfftinv_ [Np];
  809. #pragma omp critical (SCTL_FFT_PLAN0)
  810. if(!Mf.Dim(0)){
  811. StaticArray<Long,1> fft_dim = {Np};
  812. Mf.Setup(FFT_Type::C2R, 1, Vector<Long>(1,fft_dim,false));
  813. }
  814. return Mf;
  815. }
  816. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourierInv(Long Np){
  817. assert(Np<SCTL_SHMAXDEG);
  818. auto& Mf =MatrixStore().Mfft_ [Np];
  819. #pragma omp critical (SCTL_FFT_PLAN1)
  820. if(!Mf.Dim(0)){
  821. StaticArray<Long,1> fft_dim = {Np};
  822. Mf.Setup(FFT_Type::R2C, 1, Vector<Long>(1,fft_dim,false));
  823. }
  824. return Mf;
  825. }
  826. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierGrad(Long p0, Long p1){
  827. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  828. Matrix<Real>& Mdf=MatrixStore().Mdf_[p0*SCTL_SHMAXDEG+p1];
  829. if(!Mdf.Dim(0)){
  830. const Real SQRT2PI=sqrt(2*M_PI);
  831. { // Set Mdf_
  832. Matrix<Real> M(2*p0,2*p1);
  833. for(Long j=0;j<2*p1;j++){
  834. M[0][j]=SQRT2PI*0.0;
  835. for(Long k=1;k<p0;k++){
  836. M[2*k-1][j]=-SQRT2PI*k*sin(j*k*M_PI/p1);
  837. M[2*k-0][j]= SQRT2PI*k*cos(j*k*M_PI/p1);
  838. }
  839. M[2*p0-1][j]=-SQRT2PI*p0*sin(j*p0*M_PI/p1);
  840. }
  841. Mdf=M;
  842. }
  843. }
  844. return Mdf;
  845. }
  846. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendre(Long p0, Long p1){
  847. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  848. std::vector<Matrix<Real>>& Ml =MatrixStore().Ml_ [p0*SCTL_SHMAXDEG+p1];
  849. if(!Ml.size()){
  850. const Vector<Real>& qx1 = LegendreNodes(p1);
  851. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  852. LegPoly(alp, qx1, p0);
  853. Ml.resize(p0+1);
  854. auto ptr = alp.begin();
  855. for(Long i=0;i<=p0;i++){
  856. Ml[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  857. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  858. }
  859. }
  860. return Ml;
  861. }
  862. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreInv(Long p0, Long p1){
  863. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  864. std::vector<Matrix<Real>>& Ml =MatrixStore().Mlinv_ [p0*SCTL_SHMAXDEG+p1];
  865. if(!Ml.size()){
  866. const Vector<Real>& qx1 = LegendreNodes(p0);
  867. const Vector<Real>& qw1 = LegendreWeights(p0);
  868. Vector<Real> alp(qx1.Dim()*(p1+1)*(p1+2)/2);
  869. LegPoly(alp, qx1, p1);
  870. Ml.resize(p1+1);
  871. auto ptr = alp.begin();
  872. for(Long i=0;i<=p1;i++){
  873. Ml[i].ReInit(qx1.Dim(), p1+1-i);
  874. Matrix<Real> M(p1+1-i, qx1.Dim(), ptr, false);
  875. for(Long j=0;j<p1+1-i;j++){ // Transpose and weights
  876. for(Long k=0;k<qx1.Dim();k++){
  877. Ml[i][k][j]=M[j][k]*qw1[k]*2*M_PI;
  878. }
  879. }
  880. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  881. }
  882. }
  883. return Ml;
  884. }
  885. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreGrad(Long p0, Long p1){
  886. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  887. std::vector<Matrix<Real>>& Mdl=MatrixStore().Mdl_[p0*SCTL_SHMAXDEG+p1];
  888. if(!Mdl.size()){
  889. const Vector<Real>& qx1 = LegendreNodes(p1);
  890. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  891. LegPolyDeriv(alp, qx1, p0);
  892. Mdl.resize(p0+1);
  893. auto ptr = alp.begin();
  894. for(Long i=0;i<=p0;i++){
  895. Mdl[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  896. ptr+=Mdl[i].Dim(0)*Mdl[i].Dim(1);
  897. }
  898. }
  899. return Mdl;
  900. }
  901. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatRotate(Long p0){
  902. std::vector<std::vector<Long>> coeff_perm(p0+1);
  903. { // Set coeff_perm
  904. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  905. Long itr=0;
  906. for(Long i=0;i<2*p0;i++){
  907. Long m=(i+1)/2;
  908. for(Long n=m;n<=p0;n++){
  909. coeff_perm[n][i]=itr;
  910. itr++;
  911. }
  912. }
  913. }
  914. assert(p0<SCTL_SHMAXDEG);
  915. std::vector<Matrix<Real>>& Mr=MatrixStore().Mr_[p0];
  916. if(!Mr.size()){
  917. const Real SQRT2PI=sqrt(2*M_PI);
  918. Long Ncoef=p0*(p0+2);
  919. Long Ngrid=2*p0*(p0+1);
  920. Long Naleg=(p0+1)*(p0+2)/2;
  921. Matrix<Real> Mcoord0(3,Ngrid);
  922. const Vector<Real>& x=LegendreNodes(p0);
  923. for(Long i=0;i<p0+1;i++){ // Set Mcoord0
  924. for(Long j=0;j<2*p0;j++){
  925. Mcoord0[0][i*2*p0+j]=x[i];
  926. Mcoord0[1][i*2*p0+j]=sqrt(1-x[i]*x[i])*sin(M_PI*j/p0);
  927. Mcoord0[2][i*2*p0+j]=sqrt(1-x[i]*x[i])*cos(M_PI*j/p0);
  928. }
  929. }
  930. for(Long l=0;l<p0+1;l++){ // For each rotation angle
  931. Matrix<Real> Mcoord1;
  932. { // Rotate coordinates
  933. Matrix<Real> M(COORD_DIM, COORD_DIM);
  934. Real cos_=-x[l];
  935. Real sin_=-sqrt(1.0-x[l]*x[l]);
  936. M[0][0]= cos_; M[0][1]=0; M[0][2]=-sin_;
  937. M[1][0]= 0; M[1][1]=1; M[1][2]= 0;
  938. M[2][0]= sin_; M[2][1]=0; M[2][2]= cos_;
  939. Mcoord1=M*Mcoord0;
  940. }
  941. Matrix<Real> Mleg(Naleg, Ngrid);
  942. { // Set Mleg
  943. const Vector<Real> Vcoord1(Mcoord1.Dim(0)*Mcoord1.Dim(1), Mcoord1.begin(), false);
  944. Vector<Real> Vleg(Mleg.Dim(0)*Mleg.Dim(1), Mleg.begin(), false);
  945. LegPoly(Vleg, Vcoord1, p0);
  946. }
  947. Vector<Real> theta(Ngrid);
  948. for(Long i=0;i<theta.Dim();i++){ // Set theta
  949. theta[i]=atan2(Mcoord1[1][i],Mcoord1[2][i]);
  950. }
  951. Matrix<Real> Mcoef2grid(Ncoef, Ngrid);
  952. { // Build Mcoef2grid
  953. Long offset0=0;
  954. Long offset1=0;
  955. for(Long i=0;i<p0+1;i++){
  956. Long len=p0+1-i;
  957. { // P * cos
  958. for(Long j=0;j<len;j++){
  959. for(Long k=0;k<Ngrid;k++){
  960. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*cos(i*theta[k]);
  961. }
  962. }
  963. offset1+=len;
  964. }
  965. if(i!=0 && i!=p0){ // P * sin
  966. for(Long j=0;j<len;j++){
  967. for(Long k=0;k<Ngrid;k++){
  968. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*sin(i*theta[k]);
  969. }
  970. }
  971. offset1+=len;
  972. }
  973. offset0+=len;
  974. }
  975. assert(offset0==Naleg);
  976. assert(offset1==Ncoef);
  977. }
  978. Vector<Real> Vcoef2coef(Ncoef*Ncoef);
  979. Vector<Real> Vcoef2grid(Ncoef*Ngrid, Mcoef2grid[0], false);
  980. Grid2SHC(Vcoef2grid, p0+1, 2*p0, p0, Vcoef2coef, SHCArrange::COL_MAJOR_NONZERO);
  981. Matrix<Real> Mcoef2coef(Ncoef, Ncoef, Vcoef2coef.begin(), false);
  982. for(Long n=0;n<=p0;n++){ // Create matrices for fast rotation
  983. Matrix<Real> M(coeff_perm[n].size(),coeff_perm[n].size());
  984. for(Long i=0;i<(Long)coeff_perm[n].size();i++){
  985. for(Long j=0;j<(Long)coeff_perm[n].size();j++){
  986. M[i][j]=Mcoef2coef[coeff_perm[n][i]][coeff_perm[n][j]];
  987. }
  988. }
  989. Mr.push_back(M);
  990. }
  991. }
  992. }
  993. return Mr;
  994. }
  995. template <class Real> void SphericalHarmonics<Real>::SHC2GridTranspose(const Vector<Real>& X, Long p0, Long p1, Vector<Real>& S){
  996. Matrix<Real> Mf =SphericalHarmonics<Real>::MatFourier(p1,p0).Transpose();
  997. std::vector<Matrix<Real>> Ml =SphericalHarmonics<Real>::MatLegendre(p1,p0);
  998. for(Long i=0;i<(Long)Ml.size();i++) Ml[i]=Ml[i].Transpose();
  999. assert(p1==(Long)Ml.size()-1);
  1000. assert(p0==Mf.Dim(0)/2);
  1001. assert(p1==Mf.Dim(1)/2);
  1002. Long N=X.Dim()/(2*p0*(p0+1));
  1003. assert(N*2*p0*(p0+1)==X.Dim());
  1004. if(S.Dim()!=N*(p1*(p1+2))) S.ReInit(N*(p1*(p1+2)));
  1005. Vector<Real> B0, B1;
  1006. B0.ReInit(N* p1*(p1+2));
  1007. B1.ReInit(N*2*p1*(p0+1));
  1008. #pragma omp parallel
  1009. { // Evaluate Fourier and transpose
  1010. Integer tid=omp_get_thread_num();
  1011. Integer omp_p=omp_get_num_threads();
  1012. Long a=(tid+0)*N*(p0+1)/omp_p;
  1013. Long b=(tid+1)*N*(p0+1)/omp_p;
  1014. const Long block_size=16;
  1015. Matrix<Real> B2(block_size,2*p1);
  1016. for(Long i0=a;i0<b;i0+=block_size){
  1017. Long i1=std::min(b,i0+block_size);
  1018. const Matrix<Real> Min (i1-i0,2*p0, (Iterator<Real>)X.begin()+i0*2*p0, false);
  1019. Matrix<Real> Mout(i1-i0,2*p1, B2.begin(), false);
  1020. Matrix<Real>::GEMM(Mout, Min, Mf);
  1021. for(Long i=i0;i<i1;i++){
  1022. for(Long j=0;j<2*p1;j++){
  1023. B1[j*N*(p0+1)+i]=B2[i-i0][j];
  1024. }
  1025. }
  1026. }
  1027. }
  1028. #pragma omp parallel
  1029. { // Evaluate Legendre polynomial
  1030. Integer tid=omp_get_thread_num();
  1031. Integer omp_p=omp_get_num_threads();
  1032. Long offset0=0;
  1033. Long offset1=0;
  1034. for(Long i=0;i<p1+1;i++){
  1035. Long N0=2*N;
  1036. if(i==0 || i==p1) N0=N;
  1037. Matrix<Real> Min (N0, p0+1 , B1.begin()+offset0, false);
  1038. Matrix<Real> Mout(N0, p1+1-i, B0.begin()+offset1, false);
  1039. { // Mout = Min * Ml[i] // split between threads
  1040. Long a=(tid+0)*N0/omp_p;
  1041. Long b=(tid+1)*N0/omp_p;
  1042. if(a<b){
  1043. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  1044. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1045. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  1046. }
  1047. }
  1048. offset0+=Min .Dim(0)*Min .Dim(1);
  1049. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1050. }
  1051. }
  1052. #pragma omp parallel
  1053. { // S <-- Rearrange(B0)
  1054. Integer tid=omp_get_thread_num();
  1055. Integer omp_p=omp_get_num_threads();
  1056. Long a=(tid+0)*N/omp_p;
  1057. Long b=(tid+1)*N/omp_p;
  1058. for(Long i=a;i<b;i++){
  1059. Long offset=0;
  1060. for(Long j=0;j<2*p1;j++){
  1061. Long len=p1+1-(j+1)/2;
  1062. Real* B_=&B0[i*len+N*offset];
  1063. Real* S_=&S[i*p1*(p1+2)+offset];
  1064. for(Long k=0;k<len;k++) S_[k]=B_[k];
  1065. offset+=len;
  1066. }
  1067. }
  1068. }
  1069. }
  1070. template <class Real> void SphericalHarmonics<Real>::RotateAll(const Vector<Real>& S, Long p0, Long dof, Vector<Real>& S_){
  1071. const std::vector<Matrix<Real>>& Mr=MatRotate(p0);
  1072. std::vector<std::vector<Long>> coeff_perm(p0+1);
  1073. { // Set coeff_perm
  1074. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  1075. Long itr=0;
  1076. for(Long i=0;i<2*p0;i++){
  1077. Long m=(i+1)/2;
  1078. for(Long n=m;n<=p0;n++){
  1079. coeff_perm[n][i]=itr;
  1080. itr++;
  1081. }
  1082. }
  1083. }
  1084. Long Ncoef=p0*(p0+2);
  1085. Long N=S.Dim()/Ncoef/dof;
  1086. assert(N*Ncoef*dof==S.Dim());
  1087. if(S_.Dim()!=N*dof*Ncoef*p0*(p0+1)) S_.ReInit(N*dof*Ncoef*p0*(p0+1));
  1088. const Matrix<Real> S0(N*dof, Ncoef, (Iterator<Real>)S.begin(), false);
  1089. Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, S_.begin(), false);
  1090. #pragma omp parallel
  1091. { // Construct all p0*(p0+1) rotations
  1092. Integer tid=omp_get_thread_num();
  1093. Integer omp_p=omp_get_num_threads();
  1094. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  1095. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  1096. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  1097. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  1098. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  1099. }
  1100. Long a=(tid+0)*N/omp_p;
  1101. Long b=(tid+1)*N/omp_p;
  1102. for(Long i=a;i<b;i++){
  1103. for(Long d=0;d<dof;d++){
  1104. for(Long j=0;j<p0;j++){
  1105. Long offset=0;
  1106. for(Long k=0;k<p0+1;k++){
  1107. Real r[2]={cos(k*j*M_PI/p0),-sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  1108. Long len=p0+1-k;
  1109. if(k!=0 && k!=p0){
  1110. for(Long l=0;l<len;l++){
  1111. Real x[2];
  1112. x[0]=S0[i*dof+d][offset+len*0+l];
  1113. x[1]=S0[i*dof+d][offset+len*1+l];
  1114. B0[j*dof+d][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  1115. B0[j*dof+d][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  1116. }
  1117. offset+=2*len;
  1118. }else{
  1119. for(Long l=0;l<len;l++){
  1120. B0[j*dof+d][offset+l]=S0[i*dof+d][offset+l];
  1121. }
  1122. offset+=len;
  1123. }
  1124. }
  1125. assert(offset==Ncoef);
  1126. }
  1127. }
  1128. { // Fast rotation
  1129. for(Long k=0;k<dof*p0;k++){ // forward permutation
  1130. for(Long l=0;l<=p0;l++){
  1131. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1132. Bi[l][k][j]=B0[k][coeff_perm[l][j]];
  1133. }
  1134. }
  1135. }
  1136. for(Long t=0;t<=p0;t++){
  1137. for(Long l=0;l<=p0;l++){ // mat-vec
  1138. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  1139. }
  1140. Matrix<Real> Mout(dof*p0,Ncoef, S1[(i*(p0+1)+t)*dof*p0], false);
  1141. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  1142. for(Long l=0;l<=p0;l++){
  1143. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1144. Mout[k][coeff_perm[l][j]]=Bo[l][k][j];
  1145. }
  1146. }
  1147. }
  1148. }
  1149. }
  1150. }
  1151. }
  1152. }
  1153. template <class Real> void SphericalHarmonics<Real>::RotateTranspose(const Vector<Real>& S_, Long p0, Long dof, Vector<Real>& S){
  1154. std::vector<Matrix<Real>> Mr=MatRotate(p0);
  1155. for(Long i=0;i<(Long)Mr.size();i++) Mr[i]=Mr[i].Transpose();
  1156. std::vector<std::vector<Long>> coeff_perm(p0+1);
  1157. { // Set coeff_perm
  1158. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  1159. Long itr=0;
  1160. for(Long i=0;i<2*p0;i++){
  1161. Long m=(i+1)/2;
  1162. for(Long n=m;n<=p0;n++){
  1163. coeff_perm[n][i]=itr;
  1164. itr++;
  1165. }
  1166. }
  1167. }
  1168. Long Ncoef=p0*(p0+2);
  1169. Long N=S_.Dim()/Ncoef/dof/(p0*(p0+1));
  1170. assert(N*Ncoef*dof*(p0*(p0+1))==S_.Dim());
  1171. if(S.Dim()!=N*dof*Ncoef*p0*(p0+1)) S.ReInit(N*dof*Ncoef*p0*(p0+1));
  1172. Matrix<Real> S0(N*dof*p0*(p0+1), Ncoef, S.begin(), false);
  1173. const Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, (Iterator<Real>)S_.begin(), false);
  1174. #pragma omp parallel
  1175. { // Transpose all p0*(p0+1) rotations
  1176. Integer tid=omp_get_thread_num();
  1177. Integer omp_p=omp_get_num_threads();
  1178. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  1179. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  1180. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  1181. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  1182. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  1183. }
  1184. Long a=(tid+0)*N/omp_p;
  1185. Long b=(tid+1)*N/omp_p;
  1186. for(Long i=a;i<b;i++){
  1187. for(Long t=0;t<p0+1;t++){
  1188. Long idx0=(i*(p0+1)+t)*p0*dof;
  1189. { // Fast rotation
  1190. const Matrix<Real> Min(p0*dof, Ncoef, (Iterator<Real>)S1[idx0], false);
  1191. for(Long k=0;k<dof*p0;k++){ // forward permutation
  1192. for(Long l=0;l<=p0;l++){
  1193. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1194. Bi[l][k][j]=Min[k][coeff_perm[l][j]];
  1195. }
  1196. }
  1197. }
  1198. for(Long l=0;l<=p0;l++){ // mat-vec
  1199. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  1200. }
  1201. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  1202. for(Long l=0;l<=p0;l++){
  1203. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1204. B0[k][coeff_perm[l][j]]=Bo[l][k][j];
  1205. }
  1206. }
  1207. }
  1208. }
  1209. for(Long j=0;j<p0;j++){
  1210. for(Long d=0;d<dof;d++){
  1211. Long idx1=idx0+j*dof+d;
  1212. Long offset=0;
  1213. for(Long k=0;k<p0+1;k++){
  1214. Real r[2]={cos(k*j*M_PI/p0),sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  1215. Long len=p0+1-k;
  1216. if(k!=0 && k!=p0){
  1217. for(Long l=0;l<len;l++){
  1218. Real x[2];
  1219. x[0]=B0[j*dof+d][offset+len*0+l];
  1220. x[1]=B0[j*dof+d][offset+len*1+l];
  1221. S0[idx1][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  1222. S0[idx1][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  1223. }
  1224. offset+=2*len;
  1225. }else{
  1226. for(Long l=0;l<len;l++){
  1227. S0[idx1][offset+l]=B0[j*dof+d][offset+l];
  1228. }
  1229. offset+=len;
  1230. }
  1231. }
  1232. assert(offset==Ncoef);
  1233. }
  1234. }
  1235. }
  1236. }
  1237. }
  1238. }
  1239. template <class Real> void SphericalHarmonics<Real>::StokesSingularInteg(const Vector<Real>& S, Long p0, Long p1, Vector<Real>* SLMatrix, Vector<Real>* DLMatrix){
  1240. Long Ngrid=2*p0*(p0+1);
  1241. Long Ncoef= p0*(p0+2);
  1242. Long Nves=S.Dim()/(Ngrid*COORD_DIM);
  1243. if(SLMatrix) SLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  1244. if(DLMatrix) DLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  1245. Long BLOCK_SIZE=(Long)6e9/((3*2*p1*(p1+1))*(3*2*p0*(p0+1))*2*8); // Limit memory usage to 6GB
  1246. BLOCK_SIZE=std::min<Long>(BLOCK_SIZE,omp_get_max_threads());
  1247. BLOCK_SIZE=std::max<Long>(BLOCK_SIZE,1);
  1248. for(Long a=0;a<Nves;a+=BLOCK_SIZE){
  1249. Long b=std::min(a+BLOCK_SIZE, Nves);
  1250. Vector<Real> _SLMatrix, _DLMatrix;
  1251. if(SLMatrix) _SLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), SLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  1252. if(DLMatrix) _DLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), DLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  1253. const Vector<Real> _S ((b-a)*(Ngrid*COORD_DIM) , (Iterator<Real>)S.begin()+a*(Ngrid*COORD_DIM), false);
  1254. if(SLMatrix && DLMatrix) StokesSingularInteg_< true, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  1255. else if(SLMatrix) StokesSingularInteg_< true, false>(_S, p0, p1, _SLMatrix, _DLMatrix);
  1256. else if(DLMatrix) StokesSingularInteg_<false, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  1257. }
  1258. }
  1259. template <class Real> template <bool SLayer, bool DLayer> void SphericalHarmonics<Real>::StokesSingularInteg_(const Vector<Real>& X0, Long p0, Long p1, Vector<Real>& SL, Vector<Real>& DL){
  1260. Profile::Tic("Rotate");
  1261. Vector<Real> S0, S;
  1262. SphericalHarmonics<Real>::Grid2SHC(X0, p0+1, 2*p0, p0, S0, SHCArrange::COL_MAJOR_NONZERO);
  1263. SphericalHarmonics<Real>::RotateAll(S0, p0, COORD_DIM, S);
  1264. Profile::Toc();
  1265. Profile::Tic("Upsample");
  1266. Vector<Real> X, X_theta, X_phi, trg;
  1267. SphericalHarmonics<Real>::SHC2Grid(S, SHCArrange::COL_MAJOR_NONZERO, p0, p1+1, 2*p1, &X, &X_phi, &X_theta);
  1268. SphericalHarmonics<Real>::SHC2Pole(S, SHCArrange::COL_MAJOR_NONZERO, p0, trg);
  1269. Profile::Toc();
  1270. Profile::Tic("Stokes");
  1271. Vector<Real> SL0, DL0;
  1272. { // Stokes kernel
  1273. //Long M0=2*p0*(p0+1);
  1274. Long M1=2*p1*(p1+1);
  1275. Long N=trg.Dim()/(2*COORD_DIM);
  1276. assert(X.Dim()==M1*COORD_DIM*N);
  1277. if(SLayer && SL0.Dim()!=N*2*6*M1) SL0.ReInit(2*N*6*M1);
  1278. if(DLayer && DL0.Dim()!=N*2*6*M1) DL0.ReInit(2*N*6*M1);
  1279. const Vector<Real>& qw=SphericalHarmonics<Real>::SingularWeights(p1);
  1280. const Real scal_const_dl = 3.0/(4.0*M_PI);
  1281. const Real scal_const_sl = 1.0/(8.0*M_PI);
  1282. static Real eps=-1;
  1283. if(eps<0){
  1284. eps=1;
  1285. while(eps*(Real)0.5+(Real)1.0>1.0) eps*=0.5;
  1286. }
  1287. #pragma omp parallel
  1288. {
  1289. Integer tid=omp_get_thread_num();
  1290. Integer omp_p=omp_get_num_threads();
  1291. Long a=(tid+0)*N/omp_p;
  1292. Long b=(tid+1)*N/omp_p;
  1293. for(Long i=a;i<b;i++){
  1294. for(Long t=0;t<2;t++){
  1295. Real tx, ty, tz;
  1296. { // Read target coordinates
  1297. tx=trg[i*2*COORD_DIM+0*2+t];
  1298. ty=trg[i*2*COORD_DIM+1*2+t];
  1299. tz=trg[i*2*COORD_DIM+2*2+t];
  1300. }
  1301. for(Long j0=0;j0<p1+1;j0++){
  1302. for(Long j1=0;j1<2*p1;j1++){
  1303. Long s=2*p1*j0+j1;
  1304. Real dx, dy, dz;
  1305. { // Compute dx, dy, dz
  1306. dx=tx-X[(i*COORD_DIM+0)*M1+s];
  1307. dy=ty-X[(i*COORD_DIM+1)*M1+s];
  1308. dz=tz-X[(i*COORD_DIM+2)*M1+s];
  1309. }
  1310. Real nx, ny, nz;
  1311. { // Compute source normal
  1312. Real x_theta=X_phi[(i*COORD_DIM+0)*M1+s];
  1313. Real y_theta=X_phi[(i*COORD_DIM+1)*M1+s];
  1314. Real z_theta=X_phi[(i*COORD_DIM+2)*M1+s];
  1315. Real x_phi=X_theta[(i*COORD_DIM+0)*M1+s];
  1316. Real y_phi=X_theta[(i*COORD_DIM+1)*M1+s];
  1317. Real z_phi=X_theta[(i*COORD_DIM+2)*M1+s];
  1318. nx=(y_theta*z_phi-z_theta*y_phi);
  1319. ny=(z_theta*x_phi-x_theta*z_phi);
  1320. nz=(x_theta*y_phi-y_theta*x_phi);
  1321. }
  1322. Real area_elem=1.0;
  1323. if(SLayer){ // Compute area_elem
  1324. area_elem=sqrt(nx*nx+ny*ny+nz*nz);
  1325. }
  1326. Real rinv, rinv2;
  1327. { // Compute rinv, rinv2
  1328. Real r2=dx*dx+dy*dy+dz*dz;
  1329. rinv=1.0/sqrt(r2);
  1330. if(r2<=eps) rinv=0;
  1331. rinv2=rinv*rinv;
  1332. }
  1333. if(DLayer){
  1334. Real rinv5=rinv2*rinv2*rinv;
  1335. Real r_dot_n_rinv5=scal_const_dl*qw[j0*t+(p1-j0)*(1-t)] * (nx*dx+ny*dy+nz*dz)*rinv5;
  1336. DL0[((i*2+t)*6+0)*M1+s]=dx*dx*r_dot_n_rinv5;
  1337. DL0[((i*2+t)*6+1)*M1+s]=dx*dy*r_dot_n_rinv5;
  1338. DL0[((i*2+t)*6+2)*M1+s]=dx*dz*r_dot_n_rinv5;
  1339. DL0[((i*2+t)*6+3)*M1+s]=dy*dy*r_dot_n_rinv5;
  1340. DL0[((i*2+t)*6+4)*M1+s]=dy*dz*r_dot_n_rinv5;
  1341. DL0[((i*2+t)*6+5)*M1+s]=dz*dz*r_dot_n_rinv5;
  1342. }
  1343. if(SLayer){
  1344. Real area_rinv =scal_const_sl*qw[j0*t+(p1-j0)*(1-t)] * area_elem*rinv;
  1345. Real area_rinv2=area_rinv*rinv2;
  1346. SL0[((i*2+t)*6+0)*M1+s]=area_rinv+dx*dx*area_rinv2;
  1347. SL0[((i*2+t)*6+1)*M1+s]= dx*dy*area_rinv2;
  1348. SL0[((i*2+t)*6+2)*M1+s]= dx*dz*area_rinv2;
  1349. SL0[((i*2+t)*6+3)*M1+s]=area_rinv+dy*dy*area_rinv2;
  1350. SL0[((i*2+t)*6+4)*M1+s]= dy*dz*area_rinv2;
  1351. SL0[((i*2+t)*6+5)*M1+s]=area_rinv+dz*dz*area_rinv2;
  1352. }
  1353. }
  1354. }
  1355. }
  1356. }
  1357. }
  1358. Profile::Add_FLOP(20*(2*p1)*(p1+1)*2*N);
  1359. if(SLayer) Profile::Add_FLOP((19+6)*(2*p1)*(p1+1)*2*N);
  1360. if(DLayer) Profile::Add_FLOP( 22 *(2*p1)*(p1+1)*2*N);
  1361. }
  1362. Profile::Toc();
  1363. Profile::Tic("UpsampleTranspose");
  1364. Vector<Real> SL1, DL1;
  1365. SphericalHarmonics<Real>::SHC2GridTranspose(SL0, p1, p0, SL1);
  1366. SphericalHarmonics<Real>::SHC2GridTranspose(DL0, p1, p0, DL1);
  1367. Profile::Toc();
  1368. Profile::Tic("RotateTranspose");
  1369. Vector<Real> SL2, DL2;
  1370. SphericalHarmonics<Real>::RotateTranspose(SL1, p0, 2*6, SL2);
  1371. SphericalHarmonics<Real>::RotateTranspose(DL1, p0, 2*6, DL2);
  1372. Profile::Toc();
  1373. Profile::Tic("Rearrange");
  1374. Vector<Real> SL3, DL3;
  1375. { // Transpose
  1376. Long Ncoef=p0*(p0+2);
  1377. Long Ngrid=2*p0*(p0+1);
  1378. { // Transpose SL2
  1379. Long N=SL2.Dim()/(6*Ncoef*Ngrid);
  1380. SL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  1381. #pragma omp parallel
  1382. {
  1383. Integer tid=omp_get_thread_num();
  1384. Integer omp_p=omp_get_num_threads();
  1385. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  1386. Long a=(tid+0)*N/omp_p;
  1387. Long b=(tid+1)*N/omp_p;
  1388. for(Long i=a;i<b;i++){
  1389. Matrix<Real> M0(Ngrid*6, Ncoef, SL2.begin()+i*Ngrid*6*Ncoef, false);
  1390. for(Long k=0;k<Ncoef;k++){ // Transpose
  1391. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  1392. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  1393. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  1394. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  1395. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  1396. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  1397. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  1398. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  1399. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  1400. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  1401. }
  1402. }
  1403. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, SL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  1404. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  1405. for(Long j0=0;j0<COORD_DIM;j0++){
  1406. for(Long j1=0;j1<p0+1;j1++){
  1407. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  1408. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  1409. }
  1410. }
  1411. }
  1412. }
  1413. }
  1414. }
  1415. { // Transpose DL2
  1416. Long N=DL2.Dim()/(6*Ncoef*Ngrid);
  1417. DL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  1418. #pragma omp parallel
  1419. {
  1420. Integer tid=omp_get_thread_num();
  1421. Integer omp_p=omp_get_num_threads();
  1422. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  1423. Long a=(tid+0)*N/omp_p;
  1424. Long b=(tid+1)*N/omp_p;
  1425. for(Long i=a;i<b;i++){
  1426. Matrix<Real> M0(Ngrid*6, Ncoef, DL2.begin()+i*Ngrid*6*Ncoef, false);
  1427. for(Long k=0;k<Ncoef;k++){ // Transpose
  1428. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  1429. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  1430. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  1431. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  1432. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  1433. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  1434. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  1435. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  1436. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  1437. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  1438. }
  1439. }
  1440. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, DL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  1441. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  1442. for(Long j0=0;j0<COORD_DIM;j0++){
  1443. for(Long j1=0;j1<p0+1;j1++){
  1444. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  1445. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  1446. }
  1447. }
  1448. }
  1449. }
  1450. }
  1451. }
  1452. }
  1453. Profile::Toc();
  1454. Profile::Tic("Grid2SHC");
  1455. SphericalHarmonics<Real>::Grid2SHC(SL3, p0+1, 2*p0, p0, SL, SHCArrange::COL_MAJOR_NONZERO);
  1456. SphericalHarmonics<Real>::Grid2SHC(DL3, p0+1, 2*p0, p0, DL, SHCArrange::COL_MAJOR_NONZERO);
  1457. Profile::Toc();
  1458. }
  1459. } // end namespace