sph_harm.txx 84 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428
  1. #include SCTL_INCLUDE(legendre_rule.hpp)
  2. // TODO: Replace work vectors with dynamic-arrays
  3. namespace SCTL_NAMESPACE {
  4. template <class Real> void SphericalHarmonics<Real>::Grid2SHC(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& S, SHCArrange arrange){
  5. Long N = X.Dim() / (Np*Nt);
  6. assert(X.Dim() == N*Np*Nt);
  7. Vector<Real> B1(N*(p1+1)*(p1+1));
  8. Grid2SHC_(X, Nt, Np, p1, B1);
  9. SHCArrange0(B1, p1, S, arrange);
  10. }
  11. template <class Real> void SphericalHarmonics<Real>::SHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_theta, Vector<Real>* X_phi){
  12. Vector<Real> B0;
  13. SHCArrange1(S, arrange, p0, B0);
  14. SHC2Grid_(B0, p0, Nt, Np, X, X_phi, X_theta);
  15. }
  16. template <class Real> void SphericalHarmonics<Real>::SHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& cos_theta_phi, Vector<Real>& X) {
  17. Long M = (p0+1) * (p0+1);
  18. Long dof;
  19. Matrix<Real> B1;
  20. { // Set B1, dof
  21. Vector<Real> B0;
  22. SHCArrange1(S, arrange, p0, B0);
  23. dof = B0.Dim() / M;
  24. assert(B0.Dim() == dof * M);
  25. B1.ReInit(dof, M);
  26. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  27. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  28. }
  29. assert(B1.Dim(0) == dof);
  30. assert(B1.Dim(1) == M);
  31. Matrix<Real> SHBasis;
  32. SHBasisEval(p0, cos_theta_phi, SHBasis);
  33. assert(SHBasis.Dim(1) == M);
  34. Long N = SHBasis.Dim(0);
  35. { // Set X
  36. if (X.Dim() != N*dof) X.ReInit(N * dof);
  37. for (Long k0 = 0; k0 < N; k0++) {
  38. for (Long k1 = 0; k1 < dof; k1++) {
  39. Real X_ = 0;
  40. for (Long i = 0; i < M; i++) X_ += B1[k1][i] * SHBasis[k0][i];
  41. X[k0 * dof + k1] = X_;
  42. }
  43. }
  44. }
  45. }
  46. template <class Real> void SphericalHarmonics<Real>::SHC2Pole(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& P){
  47. Vector<Real> QP[2];
  48. { // Set QP // TODO: store these weights
  49. Vector<Real> x(1), alp;
  50. const Real SQRT2PI = sqrt<Real>(4 * const_pi<Real>());
  51. for (Long i = 0; i < 2; i++) {
  52. x = (i ? -1 : 1);
  53. LegPoly(alp, x, p0);
  54. QP[i].ReInit(p0 + 1, alp.begin());
  55. QP[i] *= SQRT2PI;
  56. }
  57. }
  58. Long M, N;
  59. { // Set M, N
  60. M = 0;
  61. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  62. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  63. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  64. if (M == 0) return;
  65. N = S.Dim() / M;
  66. assert(S.Dim() == N * M);
  67. }
  68. if(P.Dim() != N * 2) P.ReInit(N * 2);
  69. if (arrange == SHCArrange::ALL) {
  70. #pragma omp parallel
  71. { // Compute pole
  72. Integer tid = omp_get_thread_num();
  73. Integer omp_p = omp_get_num_threads();
  74. Long a = (tid + 0) * N / omp_p;
  75. Long b = (tid + 1) * N / omp_p;
  76. for (Long i = a; i < b; i++) {
  77. Real P_[2] = {0, 0};
  78. for (Long j = 0; j < p0 + 1; j++) {
  79. P_[0] += S[i*M + j*(p0+1)*2] * QP[0][j];
  80. P_[1] += S[i*M + j*(p0+1)*2] * QP[1][j];
  81. }
  82. P[2*i+0] = P_[0];
  83. P[2*i+1] = P_[1];
  84. }
  85. }
  86. }
  87. if (arrange == SHCArrange::ROW_MAJOR) {
  88. #pragma omp parallel
  89. { // Compute pole
  90. Integer tid = omp_get_thread_num();
  91. Integer omp_p = omp_get_num_threads();
  92. Long a = (tid + 0) * N / omp_p;
  93. Long b = (tid + 1) * N / omp_p;
  94. for (Long i = a; i < b; i++) {
  95. Long idx = 0;
  96. Real P_[2] = {0, 0};
  97. for (Long j = 0; j < p0 + 1; j++) {
  98. P_[0] += S[i*M+idx] * QP[0][j];
  99. P_[1] += S[i*M+idx] * QP[1][j];
  100. idx += 2*(j+1);
  101. }
  102. P[2*i+0] = P_[0];
  103. P[2*i+1] = P_[1];
  104. }
  105. }
  106. }
  107. if (arrange == SHCArrange::COL_MAJOR_NONZERO) {
  108. #pragma omp parallel
  109. { // Compute pole
  110. Integer tid = omp_get_thread_num();
  111. Integer omp_p = omp_get_num_threads();
  112. Long a = (tid + 0) * N / omp_p;
  113. Long b = (tid + 1) * N / omp_p;
  114. for (Long i = a; i < b; i++) {
  115. Real P_[2] = {0, 0};
  116. for (Long j = 0; j < p0 + 1; j++) {
  117. P_[0] += S[i*M+j] * QP[0][j];
  118. P_[1] += S[i*M+j] * QP[1][j];
  119. }
  120. P[2*i+0] = P_[0];
  121. P[2*i+1] = P_[1];
  122. }
  123. }
  124. }
  125. }
  126. template <class Real> void SphericalHarmonics<Real>::WriteVTK(const char* fname, const Vector<Real>* S, const Vector<Real>* v_ptr, SHCArrange arrange, Long p0, Long p1, Real period, const Comm& comm){
  127. typedef double VTKReal;
  128. Vector<Real> SS;
  129. if (S == nullptr) {
  130. Integer p = 2;
  131. Integer Ncoeff = (p + 1) * (p + 1);
  132. Vector<Real> SSS(COORD_DIM * Ncoeff), SSS_grid;
  133. SSS.SetZero();
  134. SSS[1+0*p+0*Ncoeff] = sqrt<Real>(2.0)/sqrt<Real>(3.0);
  135. SSS[1+1*p+1*Ncoeff] = 1/sqrt<Real>(3.0);
  136. SSS[1+2*p+2*Ncoeff] = 1/sqrt<Real>(3.0);
  137. SphericalHarmonics<Real>::SHC2Grid(SSS, SHCArrange::COL_MAJOR_NONZERO, p, p+1, 2*p+2, &SSS_grid);
  138. SphericalHarmonics<Real>::Grid2SHC(SSS_grid, p+1, 2*p+2, p0, SS, arrange);
  139. S = &SS;
  140. }
  141. Vector<Real> X, Xp, V, Vp;
  142. { // Upsample X
  143. const Vector<Real>& X0=*S;
  144. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &X);
  145. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Xp);
  146. }
  147. if(v_ptr){ // Upsample V
  148. const Vector<Real>& X0=*v_ptr;
  149. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &V);
  150. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Vp);
  151. }
  152. std::vector<VTKReal> point_coord;
  153. std::vector<VTKReal> point_value;
  154. std::vector<int32_t> poly_connect;
  155. std::vector<int32_t> poly_offset;
  156. { // Set point_coord, point_value, poly_connect
  157. Long N_ves = X.Dim()/(2*p1*(p1+1)*COORD_DIM); // Number of vesicles
  158. assert(Xp.Dim() == N_ves*2*COORD_DIM);
  159. for(Long k=0;k<N_ves;k++){ // Set point_coord
  160. Real C[COORD_DIM]={0,0,0};
  161. if(period>0){
  162. for(Integer l=0;l<COORD_DIM;l++) C[l]=0;
  163. for(Long i=0;i<p1+1;i++){
  164. for(Long j=0;j<2*p1;j++){
  165. for(Integer l=0;l<COORD_DIM;l++){
  166. C[l]+=X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))];
  167. }
  168. }
  169. }
  170. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[0+2*(l+k*COORD_DIM)];
  171. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[1+2*(l+k*COORD_DIM)];
  172. for(Integer l=0;l<COORD_DIM;l++) C[l]/=2*p1*(p1+1)+2;
  173. for(Integer l=0;l<COORD_DIM;l++) C[l]=(round(C[l]/period))*period;
  174. }
  175. for(Long i=0;i<p1+1;i++){
  176. for(Long j=0;j<2*p1;j++){
  177. for(Integer l=0;l<COORD_DIM;l++){
  178. point_coord.push_back(X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))]-C[l]);
  179. }
  180. }
  181. }
  182. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[0+2*(l+k*COORD_DIM)]-C[l]);
  183. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[1+2*(l+k*COORD_DIM)]-C[l]);
  184. }
  185. if(v_ptr) {
  186. Long data__dof = V.Dim() / (2*p1*(p1+1));
  187. for(Long k=0;k<N_ves;k++){ // Set point_value
  188. for(Long i=0;i<p1+1;i++){
  189. for(Long j=0;j<2*p1;j++){
  190. for(Long l=0;l<data__dof;l++){
  191. point_value.push_back(V[j+2*p1*(i+(p1+1)*(l+k*data__dof))]);
  192. }
  193. }
  194. }
  195. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[0+2*(l+k*data__dof)]);
  196. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[1+2*(l+k*data__dof)]);
  197. }
  198. }
  199. for(Long k=0;k<N_ves;k++){
  200. for(Long j=0;j<2*p1;j++){
  201. Long i0= 0;
  202. Long i1=p1;
  203. Long j0=((j+0) );
  204. Long j1=((j+1)%(2*p1));
  205. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+0);
  206. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  207. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  208. poly_offset.push_back(poly_connect.size());
  209. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+1);
  210. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  211. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  212. poly_offset.push_back(poly_connect.size());
  213. }
  214. for(Long i=0;i<p1;i++){
  215. for(Long j=0;j<2*p1;j++){
  216. Long i0=((i+0) );
  217. Long i1=((i+1) );
  218. Long j0=((j+0) );
  219. Long j1=((j+1)%(2*p1));
  220. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  221. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  222. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  223. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  224. poly_offset.push_back(poly_connect.size());
  225. }
  226. }
  227. }
  228. }
  229. Integer np = comm.Size();
  230. Integer myrank = comm.Rank();
  231. std::vector<VTKReal>& coord=point_coord;
  232. std::vector<VTKReal>& value=point_value;
  233. std::vector<int32_t>& connect=poly_connect;
  234. std::vector<int32_t>& offset=poly_offset;
  235. Long pt_cnt=coord.size()/COORD_DIM;
  236. Long poly_cnt=poly_offset.size();
  237. // Open file for writing.
  238. std::stringstream vtufname;
  239. vtufname<<fname<<"_"<<std::setfill('0')<<std::setw(6)<<myrank<<".vtp";
  240. std::ofstream vtufile;
  241. vtufile.open(vtufname.str().c_str());
  242. if(vtufile.fail()) return;
  243. bool isLittleEndian;
  244. { // Set isLittleEndian
  245. uint16_t number = 0x1;
  246. uint8_t *numPtr = (uint8_t*)&number;
  247. isLittleEndian=(numPtr[0] == 1);
  248. }
  249. // Proceed to write to file.
  250. Long data_size=0;
  251. vtufile<<"<?xml version=\"1.0\"?>\n";
  252. if(isLittleEndian) vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"LittleEndian\">\n";
  253. else vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"BigEndian\">\n";
  254. //===========================================================================
  255. vtufile<<" <PolyData>\n";
  256. vtufile<<" <Piece NumberOfPoints=\""<<pt_cnt<<"\" NumberOfVerts=\"0\" NumberOfLines=\"0\" NumberOfStrips=\"0\" NumberOfPolys=\""<<poly_cnt<<"\">\n";
  257. //---------------------------------------------------------------------------
  258. vtufile<<" <Points>\n";
  259. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  260. data_size+=sizeof(uint32_t)+coord.size()*sizeof(VTKReal);
  261. vtufile<<" </Points>\n";
  262. //---------------------------------------------------------------------------
  263. if(value.size()){ // value
  264. vtufile<<" <PointData>\n";
  265. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  266. data_size+=sizeof(uint32_t)+value.size()*sizeof(VTKReal);
  267. vtufile<<" </PointData>\n";
  268. }
  269. //---------------------------------------------------------------------------
  270. vtufile<<" <Polys>\n";
  271. vtufile<<" <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  272. data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t);
  273. vtufile<<" <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  274. data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t);
  275. vtufile<<" </Polys>\n";
  276. //---------------------------------------------------------------------------
  277. vtufile<<" </Piece>\n";
  278. vtufile<<" </PolyData>\n";
  279. //===========================================================================
  280. vtufile<<" <AppendedData encoding=\"raw\">\n";
  281. vtufile<<" _";
  282. int32_t block_size;
  283. block_size=coord.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord.size()*sizeof(VTKReal));
  284. if(value.size()){ // value
  285. block_size=value.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value.size()*sizeof(VTKReal));
  286. }
  287. block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t));
  288. block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t));
  289. vtufile<<"\n";
  290. vtufile<<" </AppendedData>\n";
  291. //===========================================================================
  292. vtufile<<"</VTKFile>\n";
  293. vtufile.close();
  294. if(myrank) return;
  295. std::stringstream pvtufname;
  296. pvtufname<<fname<<".pvtp";
  297. std::ofstream pvtufile;
  298. pvtufile.open(pvtufname.str().c_str());
  299. if(pvtufile.fail()) return;
  300. pvtufile<<"<?xml version=\"1.0\"?>\n";
  301. pvtufile<<"<VTKFile type=\"PPolyData\">\n";
  302. pvtufile<<" <PPolyData GhostLevel=\"0\">\n";
  303. pvtufile<<" <PPoints>\n";
  304. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\"/>\n";
  305. pvtufile<<" </PPoints>\n";
  306. if(value.size()){ // value
  307. pvtufile<<" <PPointData>\n";
  308. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\"/>\n";
  309. pvtufile<<" </PPointData>\n";
  310. }
  311. {
  312. // Extract filename from path.
  313. std::stringstream vtupath;
  314. vtupath<<'/'<<fname;
  315. std::string pathname = vtupath.str();
  316. auto found = pathname.find_last_of("/\\");
  317. std::string fname_ = pathname.substr(found+1);
  318. for(Integer i=0;i<np;i++) pvtufile<<" <Piece Source=\""<<fname_<<"_"<<std::setfill('0')<<std::setw(6)<<i<<".vtp\"/>\n";
  319. }
  320. pvtufile<<" </PPolyData>\n";
  321. pvtufile<<"</VTKFile>\n";
  322. pvtufile.close();
  323. }
  324. template <class Real> void SphericalHarmonics<Real>::Grid2VecSHC(const Vector<Real>& X, Long Nt, Long Np, Long p0, Vector<Real>& S, SHCArrange arrange) {
  325. Long N = X.Dim() / (Np*Nt);
  326. assert(X.Dim() == N*Np*Nt);
  327. assert(N % COORD_DIM == 0);
  328. Vector<Real> B0(N*Nt*Np);
  329. { // Set B0
  330. Vector<Real> sin_phi(Np), cos_phi(Np);
  331. for (Long i = 0; i < Np; i++) {
  332. sin_phi[i] = sin(2 * const_pi<Real>() * i / Np);
  333. cos_phi[i] = cos(2 * const_pi<Real>() * i / Np);
  334. }
  335. const auto& Y = LegendreNodes(Nt - 1);
  336. assert(Y.Dim() == Nt);
  337. Long Ngrid = Nt * Np;
  338. for (Long k = 0; k < N; k+=COORD_DIM) {
  339. for (Long i = 0; i < Nt; i++) {
  340. Real sin_theta = sqrt<Real>(1 - Y[i]*Y[i]);
  341. Real cos_theta = Y[i];
  342. Real s = 1 / sin_theta;
  343. const auto X_ = X.begin() + (k*Nt+i)*Np;
  344. auto B0_ = B0.begin() + (k*Nt+i)*Np;
  345. for (Long j = 0; j < Np; j++) {
  346. StaticArray<Real,3> in;
  347. in[0] = X_[0*Ngrid+j];
  348. in[1] = X_[1*Ngrid+j];
  349. in[2] = X_[2*Ngrid+j];
  350. StaticArray<Real,9> M;
  351. M[0] = sin_theta*cos_phi[j]; M[1] = sin_theta*sin_phi[j]; M[2] = cos_theta;
  352. M[3] = cos_theta*cos_phi[j]; M[4] = cos_theta*sin_phi[j]; M[5] =-sin_theta;
  353. M[6] = -sin_phi[j]; M[7] = cos_phi[j]; M[8] = 0;
  354. B0_[0*Ngrid+j] = ( M[0] * in[0] + M[1] * in[1] + M[2] * in[2] );
  355. B0_[1*Ngrid+j] = ( M[3] * in[0] + M[4] * in[1] + M[5] * in[2] ) * s;
  356. B0_[2*Ngrid+j] = ( M[6] * in[0] + M[7] * in[1] + M[8] * in[2] ) * s;
  357. }
  358. }
  359. }
  360. }
  361. Long p_ = p0 + 1;
  362. Long M0 = (p0+1)*(p0+1);
  363. Long M_ = (p_+1)*(p_+1);
  364. Vector<Real> B1(N*M_);
  365. Grid2SHC_(B0, Nt, Np, p_, B1);
  366. Vector<Real> B2(N*M0);
  367. const Complex<Real> imag(0,1);
  368. for (Long i=0; i<N; i+=COORD_DIM) {
  369. for (Long m=0; m<=p0; m++) {
  370. for (Long n=m; n<=p0; n++) {
  371. auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  372. Complex<Real> c;
  373. if (0<=m && m<=n && n<=p) {
  374. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  375. Long idx_imag = idx_real + (p+1-m)*N;
  376. c.real = coeff[idx_real];
  377. if (m) c.imag = coeff[idx_imag];
  378. }
  379. return c;
  380. };
  381. auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  382. if (0<=m && m<=n && n<=p) {
  383. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  384. Long idx_imag = idx_real + (p+1-m)*N;
  385. coeff[idx_real] = c.real;
  386. if (m) coeff[idx_imag] = c.imag;
  387. }
  388. };
  389. auto gr = [&](Long n, Long m) { return read_coeff(B1, i+0, p_, n, m); };
  390. auto gt = [&](Long n, Long m) { return read_coeff(B1, i+1, p_, n, m); };
  391. auto gp = [&](Long n, Long m) { return read_coeff(B1, i+2, p_, n, m); };
  392. Complex<Real> phiY, phiG, phiX;
  393. { // (phiG, phiX) <-- (gt, gp)
  394. auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  395. auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  396. phiY = gr(n,m);
  397. phiG = (gt(n+1,m)*A(n,m) - gt(n-1,m)*B(n,m) - imag*m*gp(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
  398. phiX = (gp(n+1,m)*A(n,m) - gp(n-1,m)*B(n,m) + imag*m*gt(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
  399. }
  400. auto phiV = (phiG * (n + 0) - phiY) * (1/(Real)(2*n + 1));
  401. auto phiW = (phiG * (n + 1) + phiY) * (1/(Real)(2*n + 1));
  402. if (n==0) {
  403. phiW = 0;
  404. phiX = 0;
  405. }
  406. write_coeff(phiV, B2, i+0, p0, n, m);
  407. write_coeff(phiW, B2, i+1, p0, n, m);
  408. write_coeff(phiX, B2, i+2, p0, n, m);
  409. }
  410. }
  411. }
  412. SHCArrange0(B2, p0, S, arrange);
  413. }
  414. template <class Real> void SphericalHarmonics<Real>::VecSHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>& X) {
  415. Vector<Real> B0;
  416. SHCArrange1(S, arrange, p0, B0);
  417. Long p_ = p0 + 1;
  418. Long M0 = (p0+1)*(p0+1);
  419. Long M_ = (p_+1)*(p_+1);
  420. Long N = B0.Dim() / M0;
  421. assert(B0.Dim() == N*M0);
  422. assert(N % COORD_DIM == 0);
  423. Vector<Real> B1(N*M_);
  424. const Complex<Real> imag(0,1);
  425. for (Long i=0; i<N; i+=COORD_DIM) {
  426. for (Long m=0; m<=p_; m++) {
  427. for (Long n=m; n<=p_; n++) {
  428. auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  429. Complex<Real> c;
  430. if (0<=m && m<=n && n<=p) {
  431. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  432. Long idx_imag = idx_real + (p+1-m)*N;
  433. c.real = coeff[idx_real];
  434. if (m) c.imag = coeff[idx_imag];
  435. }
  436. return c;
  437. };
  438. auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  439. if (0<=m && m<=n && n<=p) {
  440. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  441. Long idx_imag = idx_real + (p+1-m)*N;
  442. coeff[idx_real] = c.real;
  443. if (m) coeff[idx_imag] = c.imag;
  444. }
  445. };
  446. auto phiG = [&](Long n, Long m) {
  447. auto phiV = read_coeff(B0, i+0, p0, n, m);
  448. auto phiW = read_coeff(B0, i+1, p0, n, m);
  449. return phiV + phiW;
  450. };
  451. auto phiY = [&](Long n, Long m) {
  452. auto phiV = read_coeff(B0, i+0, p0, n, m);
  453. auto phiW = read_coeff(B0, i+1, p0, n, m);
  454. return phiW * n - phiV * (n + 1);
  455. };
  456. auto phiX = [&](Long n, Long m) {
  457. return read_coeff(B0, i+2, p0, n, m);
  458. };
  459. Complex<Real> gr, gt, gp;
  460. { // (gt, gp) <-- (phiG, phiX)
  461. auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  462. auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  463. gr = phiY(n,m);
  464. gt = phiG(n-1,m)*A(n-1,m) - phiG(n+1,m)*B(n+1,m) - imag*m*phiX(n,m);
  465. gp = phiX(n-1,m)*A(n-1,m) - phiX(n+1,m)*B(n+1,m) + imag*m*phiG(n,m);
  466. }
  467. write_coeff(gr, B1, i+0, p_, n, m);
  468. write_coeff(gt, B1, i+1, p_, n, m);
  469. write_coeff(gp, B1, i+2, p_, n, m);
  470. }
  471. }
  472. }
  473. { // Set X
  474. SHC2Grid_(B1, p_, Nt, Np, &X);
  475. Vector<Real> sin_phi(Np), cos_phi(Np);
  476. for (Long i = 0; i < Np; i++) {
  477. sin_phi[i] = sin(2 * const_pi<Real>() * i / Np);
  478. cos_phi[i] = cos(2 * const_pi<Real>() * i / Np);
  479. }
  480. const auto& Y = LegendreNodes(Nt - 1);
  481. assert(Y.Dim() == Nt);
  482. Long Ngrid = Nt * Np;
  483. for (Long k = 0; k < N; k+=COORD_DIM) {
  484. for (Long i = 0; i < Nt; i++) {
  485. Real sin_theta = sqrt<Real>(1 - Y[i]*Y[i]);
  486. Real cos_theta = Y[i];
  487. Real s = 1 / sin_theta;
  488. auto X_ = X.begin() + (k*Nt+i)*Np;
  489. for (Long j = 0; j < Np; j++) {
  490. StaticArray<Real,3> in;
  491. in[0] = X_[0*Ngrid+j];
  492. in[1] = X_[1*Ngrid+j] * s;
  493. in[2] = X_[2*Ngrid+j] * s;
  494. StaticArray<Real,9> M;
  495. M[0] = sin_theta*cos_phi[j]; M[1] = sin_theta*sin_phi[j]; M[2] = cos_theta;
  496. M[3] = cos_theta*cos_phi[j]; M[4] = cos_theta*sin_phi[j]; M[5] =-sin_theta;
  497. M[6] = -sin_phi[j]; M[7] = cos_phi[j]; M[8] = 0;
  498. X_[0*Ngrid+j] = ( M[0] * in[0] + M[3] * in[1] + M[6] * in[2] );
  499. X_[1*Ngrid+j] = ( M[1] * in[0] + M[4] * in[1] + M[7] * in[2] );
  500. X_[2*Ngrid+j] = ( M[2] * in[0] + M[5] * in[1] + M[8] * in[2] );
  501. }
  502. }
  503. }
  504. }
  505. }
  506. template <class Real> void SphericalHarmonics<Real>::VecSHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& cos_theta_phi, Vector<Real>& X) {
  507. Long M = (p0+1) * (p0+1);
  508. Long dof;
  509. Matrix<Real> B1;
  510. { // Set B1, dof
  511. Vector<Real> B0;
  512. SHCArrange1(S, arrange, p0, B0);
  513. dof = B0.Dim() / M / COORD_DIM;
  514. assert(B0.Dim() == dof * COORD_DIM * M);
  515. B1.ReInit(dof, COORD_DIM * M);
  516. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  517. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  518. }
  519. assert(B1.Dim(1) == COORD_DIM * M);
  520. assert(B1.Dim(0) == dof);
  521. Matrix<Real> SHBasis;
  522. VecSHBasisEval(p0, cos_theta_phi, SHBasis);
  523. assert(SHBasis.Dim(1) == COORD_DIM * M);
  524. Long N = SHBasis.Dim(0) / COORD_DIM;
  525. { // Set X
  526. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  527. for (Long k0 = 0; k0 < N; k0++) {
  528. for (Long k1 = 0; k1 < dof; k1++) {
  529. StaticArray<Real,COORD_DIM> in;
  530. for (Long j = 0; j < COORD_DIM; j++) {
  531. in[j] = 0;
  532. for (Long i = 0; i < COORD_DIM * M; i++) {
  533. in[j] += B1[k1][i] * SHBasis[k0 * COORD_DIM + j][i];
  534. }
  535. }
  536. StaticArray<Real,9> M;
  537. Real cos_theta = cos_theta_phi[k0 * 2 + 0];
  538. Real sin_theta = sqrt<Real>(1 - cos_theta * cos_theta);
  539. Real cos_phi = cos(cos_theta_phi[k0 * 2 + 1]);
  540. Real sin_phi = sin(cos_theta_phi[k0 * 2 + 1]);
  541. M[0] = sin_theta*cos_phi; M[1] = sin_theta*sin_phi; M[2] = cos_theta;
  542. M[3] = cos_theta*cos_phi; M[4] = cos_theta*sin_phi; M[5] =-sin_theta;
  543. M[6] = -sin_phi; M[7] = cos_phi; M[8] = 0;
  544. X[(k0 * dof + k1) * COORD_DIM + 0] = M[0] * in[0] + M[3] * in[1] + M[6] * in[2];
  545. X[(k0 * dof + k1) * COORD_DIM + 1] = M[1] * in[0] + M[4] * in[1] + M[7] * in[2];
  546. X[(k0 * dof + k1) * COORD_DIM + 2] = M[2] * in[0] + M[5] * in[1] + M[8] * in[2];
  547. }
  548. }
  549. }
  550. }
  551. template <class Real> void SphericalHarmonics<Real>::StokesEvalSL(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, bool interior, Vector<Real>& X) {
  552. Long M = (p0+1) * (p0+1);
  553. Long dof;
  554. Matrix<Real> B1;
  555. { // Set B1, dof
  556. Vector<Real> B0;
  557. SHCArrange1(S, arrange, p0, B0);
  558. dof = B0.Dim() / M / COORD_DIM;
  559. assert(B0.Dim() == dof * COORD_DIM * M);
  560. B1.ReInit(dof, COORD_DIM * M);
  561. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  562. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  563. }
  564. assert(B1.Dim(1) == COORD_DIM * M);
  565. assert(B1.Dim(0) == dof);
  566. Long N;
  567. Matrix<Real> SHBasis;
  568. Vector<Real> R, cos_theta_phi;
  569. { // Set N, R, SHBasis
  570. N = coord.Dim() / COORD_DIM;
  571. assert(coord.Dim() == N * COORD_DIM);
  572. R.ReInit(N);
  573. cos_theta_phi.ReInit(2 * N);
  574. for (Long i = 0; i < N; i++) { // Set R, cos_theta_phi
  575. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  576. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  577. cos_theta_phi[i * 2 + 0] = x[2] / R[i];
  578. cos_theta_phi[i * 2 + 1] = atan2(x[1], x[0]); // TODO: works only for float and double
  579. }
  580. VecSHBasisEval(p0, cos_theta_phi, SHBasis);
  581. assert(SHBasis.Dim(1) == COORD_DIM * M);
  582. assert(SHBasis.Dim(0) == N * COORD_DIM);
  583. }
  584. Matrix<Real> StokesOp(SHBasis.Dim(0), SHBasis.Dim(1));
  585. for (Long i = 0; i < N; i++) { // Set StokesOp
  586. for (Long m = 0; m <= p0; m++) {
  587. for (Long n = m; n <= p0; n++) {
  588. auto read_coeff = [&](Long n, Long m, Long k0, Long k1) {
  589. Complex<Real> c;
  590. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  591. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  592. c.real = SHBasis[i * COORD_DIM + k1][k0 * M + idx];
  593. if (m) {
  594. idx += (p0+1-m);
  595. c.imag = SHBasis[i * COORD_DIM + k1][k0 * M + idx];
  596. }
  597. }
  598. return c;
  599. };
  600. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  601. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  602. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  603. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  604. if (m) {
  605. idx += (p0+1-m);
  606. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  607. }
  608. }
  609. };
  610. auto Vr = read_coeff(n, m, 0, 0);
  611. auto Vt = read_coeff(n, m, 0, 1);
  612. auto Vp = read_coeff(n, m, 0, 2);
  613. auto Wr = read_coeff(n, m, 1, 0);
  614. auto Wt = read_coeff(n, m, 1, 1);
  615. auto Wp = read_coeff(n, m, 1, 2);
  616. auto Xr = read_coeff(n, m, 2, 0);
  617. auto Xt = read_coeff(n, m, 2, 1);
  618. auto Xp = read_coeff(n, m, 2, 2);
  619. Complex<Real> SVr, SVt, SVp;
  620. Complex<Real> SWr, SWt, SWp;
  621. Complex<Real> SXr, SXt, SXp;
  622. if (interior) {
  623. Real a,b;
  624. a = n / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], n+1);
  625. b = -(n+1) / (Real)(4*n+2) * (pow<Real>(R[i], n-1) - pow<Real>(R[i], n+1));
  626. SVr = a * Vr + b * Wr;
  627. SVt = a * Vt + b * Wt;
  628. SVp = a * Vp + b * Wp;
  629. a = (n+1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], n-1);
  630. SWr = a * Wr;
  631. SWt = a * Wt;
  632. SWp = a * Wp;
  633. a = 1 / (Real)(2*n+1) * pow<Real>(R[i], n);
  634. SXr = a * Xr;
  635. SXt = a * Xt;
  636. SXp = a * Xp;
  637. } else {
  638. Real a,b;
  639. a = n / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], -n-2);
  640. SVr = a * Vr;
  641. SVt = a * Vt;
  642. SVp = a * Vp;
  643. a = (n+1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], -n);
  644. b = n / (Real)(4*n+2) * (pow<Real>(R[i], -n-2) - pow<Real>(R[i], -n));
  645. SWr = a * Wr + b * Vr;
  646. SWt = a * Wt + b * Vt;
  647. SWp = a * Wp + b * Vp;
  648. a = 1 / (Real)(2*n+1) * pow<Real>(R[i], -n-1);
  649. SXr = a * Xr;
  650. SXt = a * Xt;
  651. SXp = a * Xp;
  652. }
  653. write_coeff(SVr, n, m, 0, 0);
  654. write_coeff(SVt, n, m, 0, 1);
  655. write_coeff(SVp, n, m, 0, 2);
  656. write_coeff(SWr, n, m, 1, 0);
  657. write_coeff(SWt, n, m, 1, 1);
  658. write_coeff(SWp, n, m, 1, 2);
  659. write_coeff(SXr, n, m, 2, 0);
  660. write_coeff(SXt, n, m, 2, 1);
  661. write_coeff(SXp, n, m, 2, 2);
  662. }
  663. }
  664. }
  665. { // Set X <-- Q * StokesOp * B1
  666. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  667. for (Long k0 = 0; k0 < N; k0++) {
  668. StaticArray<Real,9> Q;
  669. { // Set Q
  670. Real cos_theta = cos_theta_phi[k0 * 2 + 0];
  671. Real sin_theta = sqrt<Real>(1 - cos_theta * cos_theta);
  672. Real cos_phi = cos(cos_theta_phi[k0 * 2 + 1]);
  673. Real sin_phi = sin(cos_theta_phi[k0 * 2 + 1]);
  674. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  675. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  676. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  677. }
  678. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  679. StaticArray<Real,COORD_DIM> in;
  680. for (Long j = 0; j < COORD_DIM; j++) {
  681. in[j] = 0;
  682. for (Long i = 0; i < COORD_DIM * M; i++) {
  683. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  684. }
  685. }
  686. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  687. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  688. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  689. }
  690. }
  691. }
  692. }
  693. template <class Real> void SphericalHarmonics<Real>::StokesEvalDL(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, bool interior, Vector<Real>& X) {
  694. Long M = (p0+1) * (p0+1);
  695. Long dof;
  696. Matrix<Real> B1;
  697. { // Set B1, dof
  698. Vector<Real> B0;
  699. SHCArrange1(S, arrange, p0, B0);
  700. dof = B0.Dim() / M / COORD_DIM;
  701. assert(B0.Dim() == dof * COORD_DIM * M);
  702. B1.ReInit(dof, COORD_DIM * M);
  703. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  704. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  705. }
  706. assert(B1.Dim(1) == COORD_DIM * M);
  707. assert(B1.Dim(0) == dof);
  708. Long N;
  709. Matrix<Real> SHBasis;
  710. Vector<Real> R, cos_theta_phi;
  711. { // Set N, R, SHBasis
  712. N = coord.Dim() / COORD_DIM;
  713. assert(coord.Dim() == N * COORD_DIM);
  714. R.ReInit(N);
  715. cos_theta_phi.ReInit(2 * N);
  716. for (Long i = 0; i < N; i++) { // Set R, cos_theta_phi
  717. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  718. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  719. cos_theta_phi[i * 2 + 0] = x[2] / R[i];
  720. cos_theta_phi[i * 2 + 1] = atan2(x[1], x[0]); // TODO: works only for float and double
  721. }
  722. VecSHBasisEval(p0, cos_theta_phi, SHBasis);
  723. assert(SHBasis.Dim(1) == COORD_DIM * M);
  724. assert(SHBasis.Dim(0) == N * COORD_DIM);
  725. }
  726. Matrix<Real> StokesOp(SHBasis.Dim(0), SHBasis.Dim(1));
  727. for (Long i = 0; i < N; i++) { // Set StokesOp
  728. for (Long m = 0; m <= p0; m++) {
  729. for (Long n = m; n <= p0; n++) {
  730. auto read_coeff = [&](Long n, Long m, Long k0, Long k1) {
  731. Complex<Real> c;
  732. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  733. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  734. c.real = SHBasis[i * COORD_DIM + k1][k0 * M + idx];
  735. if (m) {
  736. idx += (p0+1-m);
  737. c.imag = SHBasis[i * COORD_DIM + k1][k0 * M + idx];
  738. }
  739. }
  740. return c;
  741. };
  742. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  743. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  744. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  745. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  746. if (m) {
  747. idx += (p0+1-m);
  748. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  749. }
  750. }
  751. };
  752. auto Vr = read_coeff(n, m, 0, 0);
  753. auto Vt = read_coeff(n, m, 0, 1);
  754. auto Vp = read_coeff(n, m, 0, 2);
  755. auto Wr = read_coeff(n, m, 1, 0);
  756. auto Wt = read_coeff(n, m, 1, 1);
  757. auto Wp = read_coeff(n, m, 1, 2);
  758. auto Xr = read_coeff(n, m, 2, 0);
  759. auto Xt = read_coeff(n, m, 2, 1);
  760. auto Xp = read_coeff(n, m, 2, 2);
  761. Complex<Real> SVr, SVt, SVp;
  762. Complex<Real> SWr, SWt, SWp;
  763. Complex<Real> SXr, SXt, SXp;
  764. if (interior) {
  765. Real a,b;
  766. a = -2*n*(n+2) / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], n+1);
  767. b = -(n+1)*(n+2) / (Real)(2*n+1) * (pow<Real>(R[i], n+1) - pow<Real>(R[i], n-1));
  768. SVr = a * Vr + b * Wr;
  769. SVt = a * Vt + b * Wt;
  770. SVp = a * Vp + b * Wp;
  771. a = -(2*n*n+1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], n-1);
  772. SWr = a * Wr;
  773. SWt = a * Wt;
  774. SWp = a * Wp;
  775. a = -(n+2) / (Real)(2*n+1) * pow<Real>(R[i], n);
  776. SXr = a * Xr;
  777. SXt = a * Xt;
  778. SXp = a * Xp;
  779. } else {
  780. Real a,b;
  781. a = (2*n*n+4*n+3) / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], -n-2);
  782. SVr = a * Vr;
  783. SVt = a * Vt;
  784. SVp = a * Vp;
  785. a = 2*(n+1)*(n-1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], -n);
  786. b = 2*n*(n-1) / (Real)(4*n+2) * (pow<Real>(R[i], -n-2) - pow<Real>(R[i], -n));
  787. SWr = a * Wr + b * Vr;
  788. SWt = a * Wt + b * Vt;
  789. SWp = a * Wp + b * Vp;
  790. a = (n-1) / (Real)(2*n+1) * pow<Real>(R[i], -n-1);
  791. SXr = a * Xr;
  792. SXt = a * Xt;
  793. SXp = a * Xp;
  794. }
  795. write_coeff(SVr, n, m, 0, 0);
  796. write_coeff(SVt, n, m, 0, 1);
  797. write_coeff(SVp, n, m, 0, 2);
  798. write_coeff(SWr, n, m, 1, 0);
  799. write_coeff(SWt, n, m, 1, 1);
  800. write_coeff(SWp, n, m, 1, 2);
  801. write_coeff(SXr, n, m, 2, 0);
  802. write_coeff(SXt, n, m, 2, 1);
  803. write_coeff(SXp, n, m, 2, 2);
  804. }
  805. }
  806. }
  807. { // Set X <-- Q * StokesOp * B1
  808. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  809. for (Long k0 = 0; k0 < N; k0++) {
  810. StaticArray<Real,9> Q;
  811. { // Set Q
  812. Real cos_theta = cos_theta_phi[k0 * 2 + 0];
  813. Real sin_theta = sqrt<Real>(1 - cos_theta * cos_theta);
  814. Real cos_phi = cos(cos_theta_phi[k0 * 2 + 1]);
  815. Real sin_phi = sin(cos_theta_phi[k0 * 2 + 1]);
  816. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  817. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  818. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  819. }
  820. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  821. StaticArray<Real,COORD_DIM> in;
  822. for (Long j = 0; j < COORD_DIM; j++) {
  823. in[j] = 0;
  824. for (Long i = 0; i < COORD_DIM * M; i++) {
  825. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  826. }
  827. }
  828. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  829. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  830. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  831. }
  832. }
  833. }
  834. }
  835. template <class Real> void SphericalHarmonics<Real>::Grid2SHC_(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& B1){
  836. const auto& Mf = OpFourierInv(Np);
  837. assert(Mf.Dim(0) == Np);
  838. const std::vector<Matrix<Real>>& Ml = SphericalHarmonics<Real>::MatLegendreInv(Nt-1,p1);
  839. assert((Long)Ml.size() == p1+1);
  840. Long N = X.Dim() / (Np*Nt);
  841. assert(X.Dim() == N*Np*Nt);
  842. Vector<Real> B0((2*p1+1) * N*Nt);
  843. #pragma omp parallel
  844. { // B0 <-- Transpose(FFT(X))
  845. Integer tid=omp_get_thread_num();
  846. Integer omp_p=omp_get_num_threads();
  847. Long a=(tid+0)*N*Nt/omp_p;
  848. Long b=(tid+1)*N*Nt/omp_p;
  849. Vector<Real> buff(Mf.Dim(1));
  850. Long fft_coeff_len = std::min(buff.Dim(), 2*p1+2);
  851. Matrix<Real> B0_(2*p1+1, N*Nt, B0.begin(), false);
  852. const Matrix<Real> MX(N * Nt, Np, (Iterator<Real>)X.begin(), false);
  853. for (Long i = a; i < b; i++) {
  854. { // buff <-- FFT(Xi)
  855. const Vector<Real> Xi(Np, (Iterator<Real>)X.begin() + Np * i, false);
  856. Mf.Execute(Xi, buff);
  857. }
  858. { // B0 <-- Transpose(buff)
  859. B0_[0][i] = buff[0]; // skipping buff[1] == 0
  860. for (Long j = 2; j < fft_coeff_len; j++) B0_[j-1][i] = buff[j];
  861. for (Long j = fft_coeff_len; j < 2*p1+2; j++) B0_[j-1][i] = 0;
  862. }
  863. }
  864. }
  865. if (B1.Dim() != N*(p1+1)*(p1+1)) B1.ReInit(N*(p1+1)*(p1+1));
  866. #pragma omp parallel
  867. { // Evaluate Legendre polynomial
  868. Integer tid=omp_get_thread_num();
  869. Integer omp_p=omp_get_num_threads();
  870. Long offset0=0;
  871. Long offset1=0;
  872. for (Long i = 0; i < p1+1; i++) {
  873. Long N_ = (i==0 ? N : 2*N);
  874. Matrix<Real> Min (N_, Nt , B0.begin()+offset0, false);
  875. Matrix<Real> Mout(N_, p1+1-i, B1.begin()+offset1, false);
  876. { // Mout = Min * Ml[i] // split between threads
  877. Long a=(tid+0)*N_/omp_p;
  878. Long b=(tid+1)*N_/omp_p;
  879. if (a < b) {
  880. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  881. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  882. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  883. }
  884. }
  885. offset0+=Min .Dim(0)*Min .Dim(1);
  886. offset1+=Mout.Dim(0)*Mout.Dim(1);
  887. }
  888. assert(offset0 == B0.Dim());
  889. assert(offset1 == B1.Dim());
  890. }
  891. B1 *= 1 / sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  892. }
  893. template <class Real> void SphericalHarmonics<Real>::SHCArrange0(const Vector<Real>& B1, Long p1, Vector<Real>& S, SHCArrange arrange){
  894. Long M = (p1+1)*(p1+1);
  895. Long N = B1.Dim() / M;
  896. assert(B1.Dim() == N*M);
  897. if (arrange == SHCArrange::ALL) { // S <-- Rearrange(B1)
  898. Long M = 2*(p1+1)*(p1+1);
  899. if(S.Dim() != N * M) S.ReInit(N * M);
  900. #pragma omp parallel
  901. { // S <-- Rearrange(B1)
  902. Integer tid=omp_get_thread_num();
  903. Integer omp_p=omp_get_num_threads();
  904. Long a=(tid+0)*N/omp_p;
  905. Long b=(tid+1)*N/omp_p;
  906. for (Long i = a; i < b; i++) {
  907. Long offset = 0;
  908. for (Long j = 0; j < p1+1; j++) {
  909. Long len = p1+1 - j;
  910. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  911. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  912. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 0;
  913. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  914. offset += len;
  915. }
  916. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  917. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  918. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  919. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  920. offset += len;
  921. } else {
  922. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  923. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = 0;
  924. }
  925. }
  926. }
  927. }
  928. }
  929. if (arrange == SHCArrange::ROW_MAJOR) { // S <-- Rearrange(B1)
  930. Long M = (p1+1)*(p1+2);
  931. if(S.Dim() != N * M) S.ReInit(N * M);
  932. #pragma omp parallel
  933. { // S <-- Rearrange(B1)
  934. Integer tid=omp_get_thread_num();
  935. Integer omp_p=omp_get_num_threads();
  936. Long a=(tid+0)*N/omp_p;
  937. Long b=(tid+1)*N/omp_p;
  938. for (Long i = a; i < b; i++) {
  939. Long offset = 0;
  940. for (Long j = 0; j < p1+1; j++) {
  941. Long len = p1+1 - j;
  942. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  943. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  944. Iterator<Real> S_ = S .begin() + i*M + 0;
  945. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  946. offset += len;
  947. }
  948. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  949. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  950. Iterator<Real> S_ = S .begin() + i*M + 1;
  951. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  952. offset += len;
  953. } else {
  954. Iterator<Real> S_ = S .begin() + i*M + 1;
  955. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = 0;
  956. }
  957. }
  958. }
  959. }
  960. }
  961. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // S <-- Rearrange(B1)
  962. Long M = (p1+1)*(p1+1);
  963. if(S.Dim() != N * M) S.ReInit(N * M);
  964. #pragma omp parallel
  965. { // S <-- Rearrange(B1)
  966. Integer tid=omp_get_thread_num();
  967. Integer omp_p=omp_get_num_threads();
  968. Long a=(tid+0)*N/omp_p;
  969. Long b=(tid+1)*N/omp_p;
  970. for (Long i = a; i < b; i++) {
  971. Long offset = 0;
  972. for (Long j = 0; j < p1+1; j++) {
  973. Long len = p1+1 - j;
  974. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  975. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  976. Iterator<Real> S_ = S .begin() + i*M + offset;
  977. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  978. offset += len;
  979. }
  980. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  981. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  982. Iterator<Real> S_ = S .begin() + i*M + offset;
  983. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  984. offset += len;
  985. }
  986. }
  987. }
  988. }
  989. }
  990. }
  991. template <class Real> void SphericalHarmonics<Real>::SHCArrange1(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& B0){
  992. Long M, N;
  993. { // Set M, N
  994. M = 0;
  995. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  996. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  997. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  998. if (M == 0) return;
  999. N = S.Dim() / M;
  1000. assert(S.Dim() == N * M);
  1001. }
  1002. if (B0.Dim() != N*(p0+1)*(p0+1)) B0.ReInit(N*(p0+1)*(p0+1));
  1003. if (arrange == SHCArrange::ALL) { // B0 <-- Rearrange(S)
  1004. #pragma omp parallel
  1005. { // B0 <-- Rearrange(S)
  1006. Integer tid=omp_get_thread_num();
  1007. Integer omp_p=omp_get_num_threads();
  1008. Long a=(tid+0)*N/omp_p;
  1009. Long b=(tid+1)*N/omp_p;
  1010. for (Long i = a; i < b; i++) {
  1011. Long offset = 0;
  1012. for (Long j = 0; j < p0+1; j++) {
  1013. Long len = p0+1 - j;
  1014. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1015. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1016. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 0;
  1017. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  1018. offset += len;
  1019. }
  1020. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1021. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1022. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 1;
  1023. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  1024. offset += len;
  1025. }
  1026. }
  1027. }
  1028. }
  1029. }
  1030. if (arrange == SHCArrange::ROW_MAJOR) { // B0 <-- Rearrange(S)
  1031. #pragma omp parallel
  1032. { // B0 <-- Rearrange(S)
  1033. Integer tid=omp_get_thread_num();
  1034. Integer omp_p=omp_get_num_threads();
  1035. Long a=(tid+0)*N/omp_p;
  1036. Long b=(tid+1)*N/omp_p;
  1037. for (Long i = a; i < b; i++) {
  1038. Long offset = 0;
  1039. for (Long j = 0; j < p0+1; j++) {
  1040. Long len = p0+1 - j;
  1041. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1042. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1043. ConstIterator<Real> S_ = S .begin() + i*M + 0;
  1044. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  1045. offset += len;
  1046. }
  1047. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1048. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1049. ConstIterator<Real> S_ = S .begin() + i*M + 1;
  1050. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  1051. offset += len;
  1052. }
  1053. }
  1054. }
  1055. }
  1056. }
  1057. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // B0 <-- Rearrange(S)
  1058. #pragma omp parallel
  1059. { // B0 <-- Rearrange(S)
  1060. Integer tid=omp_get_thread_num();
  1061. Integer omp_p=omp_get_num_threads();
  1062. Long a=(tid+0)*N/omp_p;
  1063. Long b=(tid+1)*N/omp_p;
  1064. for (Long i = a; i < b; i++) {
  1065. Long offset = 0;
  1066. for (Long j = 0; j < p0+1; j++) {
  1067. Long len = p0+1 - j;
  1068. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1069. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1070. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  1071. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  1072. offset += len;
  1073. }
  1074. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1075. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1076. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  1077. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  1078. offset += len;
  1079. }
  1080. }
  1081. }
  1082. }
  1083. }
  1084. }
  1085. template <class Real> void SphericalHarmonics<Real>::SHC2Grid_(const Vector<Real>& B0, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_phi, Vector<Real>* X_theta){
  1086. const auto& Mf = OpFourier(Np);
  1087. assert(Mf.Dim(1) == Np);
  1088. const std::vector<Matrix<Real>>& Ml =SphericalHarmonics<Real>::MatLegendre (p0,Nt-1);
  1089. const std::vector<Matrix<Real>>& Mdl=SphericalHarmonics<Real>::MatLegendreGrad(p0,Nt-1);
  1090. assert((Long)Ml .size() == p0+1);
  1091. assert((Long)Mdl.size() == p0+1);
  1092. Long N = B0.Dim() / ((p0+1)*(p0+1));
  1093. assert(B0.Dim() == N*(p0+1)*(p0+1));
  1094. if(X && X ->Dim()!=N*Np*Nt) X ->ReInit(N*Np*Nt);
  1095. if(X_theta && X_theta->Dim()!=N*Np*Nt) X_theta->ReInit(N*Np*Nt);
  1096. if(X_phi && X_phi ->Dim()!=N*Np*Nt) X_phi ->ReInit(N*Np*Nt);
  1097. Vector<Real> B1(N*(2*p0+1)*Nt);
  1098. if(X || X_phi){
  1099. #pragma omp parallel
  1100. { // Evaluate Legendre polynomial
  1101. Integer tid=omp_get_thread_num();
  1102. Integer omp_p=omp_get_num_threads();
  1103. Long offset0=0;
  1104. Long offset1=0;
  1105. for(Long i=0;i<p0+1;i++){
  1106. Long N_ = (i==0 ? N : 2*N);
  1107. const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
  1108. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  1109. { // Mout = Min * Ml[i] // split between threads
  1110. Long a=(tid+0)*N_/omp_p;
  1111. Long b=(tid+1)*N_/omp_p;
  1112. if(a<b){
  1113. const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
  1114. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1115. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  1116. }
  1117. }
  1118. offset0+=Min .Dim(0)*Min .Dim(1);
  1119. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1120. }
  1121. }
  1122. B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  1123. #pragma omp parallel
  1124. { // Transpose and evaluate Fourier
  1125. Integer tid=omp_get_thread_num();
  1126. Integer omp_p=omp_get_num_threads();
  1127. Long a=(tid+0)*N*Nt/omp_p;
  1128. Long b=(tid+1)*N*Nt/omp_p;
  1129. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  1130. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  1131. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  1132. for (Long i = a; i < b; i++) {
  1133. { // buff <-- Transpose(B1)
  1134. buff[0] = B1_[0][i];
  1135. buff[1] = 0;
  1136. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1137. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1138. }
  1139. { // X <-- FFT(buff)
  1140. Vector<Real> Xi(Np, X->begin() + Np * i, false);
  1141. Mf.Execute(buff, Xi);
  1142. }
  1143. if(X_phi){ // Evaluate Fourier gradient
  1144. { // buff <-- Transpose(B1)
  1145. buff[0] = 0;
  1146. buff[1] = 0;
  1147. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1148. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1149. for (Long j = 1; j < buff.Dim()/2; j++) {
  1150. Real x = buff[2*j+0];
  1151. Real y = buff[2*j+1];
  1152. buff[2*j+0] = -j*y;
  1153. buff[2*j+1] = j*x;
  1154. }
  1155. }
  1156. { // X_phi <-- FFT(buff)
  1157. Vector<Real> Xi(Np, X_phi->begin() + Np * i, false);
  1158. Mf.Execute(buff, Xi);
  1159. }
  1160. }
  1161. }
  1162. }
  1163. }
  1164. if(X_theta){
  1165. #pragma omp parallel
  1166. { // Evaluate Legendre gradient
  1167. Integer tid=omp_get_thread_num();
  1168. Integer omp_p=omp_get_num_threads();
  1169. Long offset0=0;
  1170. Long offset1=0;
  1171. for(Long i=0;i<p0+1;i++){
  1172. Long N_ = (i==0 ? N : 2*N);
  1173. const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
  1174. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  1175. { // Mout = Min * Mdl[i] // split between threads
  1176. Long a=(tid+0)*N_/omp_p;
  1177. Long b=(tid+1)*N_/omp_p;
  1178. if(a<b){
  1179. const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
  1180. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1181. Matrix<Real>::GEMM(Mout_,Min_,Mdl[i]);
  1182. }
  1183. }
  1184. offset0+=Min .Dim(0)*Min .Dim(1);
  1185. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1186. }
  1187. }
  1188. B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  1189. #pragma omp parallel
  1190. { // Transpose and evaluate Fourier
  1191. Integer tid=omp_get_thread_num();
  1192. Integer omp_p=omp_get_num_threads();
  1193. Long a=(tid+0)*N*Nt/omp_p;
  1194. Long b=(tid+1)*N*Nt/omp_p;
  1195. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  1196. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  1197. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  1198. for (Long i = a; i < b; i++) {
  1199. { // buff <-- Transpose(B1)
  1200. buff[0] = B1_[0][i];
  1201. buff[1] = 0;
  1202. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1203. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1204. }
  1205. { // Xi <-- FFT(buff)
  1206. Vector<Real> Xi(Np, X_theta->begin() + Np * i, false);
  1207. Mf.Execute(buff, Xi);
  1208. }
  1209. }
  1210. }
  1211. }
  1212. }
  1213. template <class Real> void SphericalHarmonics<Real>::LegPoly(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  1214. Long N = X.Dim();
  1215. Long Npoly = (degree + 1) * (degree + 2) / 2;
  1216. if (poly_val.Dim() != Npoly * N) poly_val.ReInit(Npoly * N);
  1217. Real fact = 1 / sqrt<Real>(4 * const_pi<Real>());
  1218. Vector<Real> u(N);
  1219. for (Long n = 0; n < N; n++) {
  1220. u[n] = (X[n]*X[n]<1 ? sqrt<Real>(1-X[n]*X[n]) : 0);
  1221. poly_val[n] = fact;
  1222. }
  1223. Long idx = 0;
  1224. Long idx_nxt = 0;
  1225. for (Long i = 1; i <= degree; i++) {
  1226. idx_nxt += N*(degree-i+2);
  1227. Real c = sqrt<Real>((2*i+1)/(Real)(2*i));
  1228. for (Long n = 0; n < N; n++) {
  1229. poly_val[idx_nxt+n] = -poly_val[idx+n] * u[n] * c;
  1230. }
  1231. idx = idx_nxt;
  1232. }
  1233. idx = 0;
  1234. for (Long m = 0; m < degree; m++) {
  1235. for (Long n = 0; n < N; n++) {
  1236. Real pmm = 0;
  1237. Real pmmp1 = poly_val[idx+n];
  1238. for (Long ll = m + 1; ll <= degree; ll++) {
  1239. Real a = sqrt<Real>(((2*ll-1)*(2*ll+1) ) / (Real)((ll-m)*(ll+m) ));
  1240. Real b = sqrt<Real>(((2*ll+1)*(ll+m-1)*(ll-m-1)) / (Real)((ll-m)*(ll+m)*(2*ll-3)));
  1241. Real pll = X[n]*a*pmmp1 - b*pmm;
  1242. pmm = pmmp1;
  1243. pmmp1 = pll;
  1244. poly_val[idx + N*(ll-m) + n] = pll;
  1245. }
  1246. }
  1247. idx += N * (degree - m + 1);
  1248. }
  1249. }
  1250. template <class Real> void SphericalHarmonics<Real>::LegPolyDeriv(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  1251. Long N = X.Dim();
  1252. Long Npoly = (degree + 1) * (degree + 2) / 2;
  1253. if (poly_val.Dim() != N * Npoly) poly_val.ReInit(N * Npoly);
  1254. Vector<Real> leg_poly(Npoly * N);
  1255. LegPoly(leg_poly, X, degree);
  1256. for (Long m = 0; m <= degree; m++) {
  1257. for (Long n = m; n <= degree; n++) {
  1258. ConstIterator<Real> Pn = leg_poly.begin() + N * ((degree * 2 - m + 1) * (m + 0) / 2 + n);
  1259. ConstIterator<Real> Pn_ = leg_poly.begin() + N * ((degree * 2 - m + 0) * (m + 1) / 2 + n) * (m < n);
  1260. Iterator <Real> Hn = poly_val.begin() + N * ((degree * 2 - m + 1) * (m + 0) / 2 + n);
  1261. Real c2 = sqrt<Real>(m<n ? (n+m+1)*(n-m) : 0);
  1262. for (Long i = 0; i < N; i++) {
  1263. Real c1 = (X[i]*X[i]<1 ? m/sqrt<Real>(1-X[i]*X[i]) : 0);
  1264. Hn[i] = c1*X[i]*Pn[i] + c2*Pn_[i];
  1265. }
  1266. }
  1267. }
  1268. }
  1269. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreNodes(Long p){
  1270. assert(p<SCTL_SHMAXDEG);
  1271. Vector<Real>& Qx=MatrixStore().Qx_[p];
  1272. if(!Qx.Dim()){
  1273. Vector<double> qx1(p+1);
  1274. Vector<double> qw1(p+1);
  1275. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  1276. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  1277. if (Qx.Dim() != p+1) Qx.ReInit(p+1);
  1278. for (Long i = 0; i < p + 1; i++) Qx[i] = -qx1[i];
  1279. }
  1280. return Qx;
  1281. }
  1282. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreWeights(Long p){
  1283. assert(p<SCTL_SHMAXDEG);
  1284. Vector<Real>& Qw=MatrixStore().Qw_[p];
  1285. if(!Qw.Dim()){
  1286. Vector<double> qx1(p+1);
  1287. Vector<double> qw1(p+1);
  1288. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  1289. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  1290. if (Qw.Dim() != p+1) Qw.ReInit(p+1);
  1291. for (Long i = 0; i < p + 1; i++) Qw[i] = qw1[i];
  1292. }
  1293. return Qw;
  1294. }
  1295. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::SingularWeights(Long p1){
  1296. assert(p1<SCTL_SHMAXDEG);
  1297. Vector<Real>& Sw=MatrixStore().Sw_[p1];
  1298. if(!Sw.Dim()){
  1299. const Vector<Real>& qx1 = LegendreNodes(p1);
  1300. const Vector<Real>& qw1 = LegendreWeights(p1);
  1301. std::vector<Real> Yf(p1+1,0);
  1302. { // Set Yf
  1303. Vector<Real> x0(1); x0=1.0;
  1304. Vector<Real> alp0((p1+1)*(p1+2)/2);
  1305. LegPoly(alp0, x0, p1);
  1306. Vector<Real> alp((p1+1) * (p1+1)*(p1+2)/2);
  1307. LegPoly(alp, qx1, p1);
  1308. for(Long j=0;j<p1+1;j++){
  1309. for(Long i=0;i<p1+1;i++){
  1310. Yf[i]+=4*M_PI/(2*j+1) * alp0[j] * alp[j*(p1+1)+i];
  1311. }
  1312. }
  1313. }
  1314. Sw.ReInit(p1+1);
  1315. for(Long i=0;i<p1+1;i++){
  1316. Sw[i]=(qw1[i]*M_PI/p1)*Yf[i]/cos(acos(qx1[i])/2);
  1317. }
  1318. }
  1319. return Sw;
  1320. }
  1321. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourier(Long p0, Long p1){
  1322. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1323. Matrix<Real>& Mf =MatrixStore().Mf_ [p0*SCTL_SHMAXDEG+p1];
  1324. if(!Mf.Dim(0)){
  1325. const Real SQRT2PI=sqrt(2*M_PI);
  1326. { // Set Mf
  1327. Matrix<Real> M(2*p0,2*p1);
  1328. for(Long j=0;j<2*p1;j++){
  1329. M[0][j]=SQRT2PI*1.0;
  1330. for(Long k=1;k<p0;k++){
  1331. M[2*k-1][j]=SQRT2PI*cos(j*k*M_PI/p1);
  1332. M[2*k-0][j]=SQRT2PI*sin(j*k*M_PI/p1);
  1333. }
  1334. M[2*p0-1][j]=SQRT2PI*cos(j*p0*M_PI/p1);
  1335. }
  1336. Mf=M;
  1337. }
  1338. }
  1339. return Mf;
  1340. }
  1341. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierInv(Long p0, Long p1){
  1342. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1343. Matrix<Real>& Mf =MatrixStore().Mfinv_ [p0*SCTL_SHMAXDEG+p1];
  1344. if(!Mf.Dim(0)){
  1345. const Real INVSQRT2PI=1.0/sqrt(2*M_PI)/p0;
  1346. { // Set Mf
  1347. Matrix<Real> M(2*p0,2*p1);
  1348. M.SetZero();
  1349. if(p1>p0) p1=p0;
  1350. for(Long j=0;j<2*p0;j++){
  1351. M[j][0]=INVSQRT2PI*0.5;
  1352. for(Long k=1;k<p1;k++){
  1353. M[j][2*k-1]=INVSQRT2PI*cos(j*k*M_PI/p0);
  1354. M[j][2*k-0]=INVSQRT2PI*sin(j*k*M_PI/p0);
  1355. }
  1356. M[j][2*p1-1]=INVSQRT2PI*cos(j*p1*M_PI/p0);
  1357. }
  1358. if(p1==p0) for(Long j=0;j<2*p0;j++) M[j][2*p1-1]*=0.5;
  1359. Mf=M;
  1360. }
  1361. }
  1362. return Mf;
  1363. }
  1364. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierGrad(Long p0, Long p1){
  1365. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1366. Matrix<Real>& Mdf=MatrixStore().Mdf_[p0*SCTL_SHMAXDEG+p1];
  1367. if(!Mdf.Dim(0)){
  1368. const Real SQRT2PI=sqrt(2*M_PI);
  1369. { // Set Mdf_
  1370. Matrix<Real> M(2*p0,2*p1);
  1371. for(Long j=0;j<2*p1;j++){
  1372. M[0][j]=SQRT2PI*0.0;
  1373. for(Long k=1;k<p0;k++){
  1374. M[2*k-1][j]=-SQRT2PI*k*sin(j*k*M_PI/p1);
  1375. M[2*k-0][j]= SQRT2PI*k*cos(j*k*M_PI/p1);
  1376. }
  1377. M[2*p0-1][j]=-SQRT2PI*p0*sin(j*p0*M_PI/p1);
  1378. }
  1379. Mdf=M;
  1380. }
  1381. }
  1382. return Mdf;
  1383. }
  1384. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourier(Long Np){
  1385. assert(Np<SCTL_SHMAXDEG);
  1386. auto& Mf =MatrixStore().Mfftinv_ [Np];
  1387. #pragma omp critical (SCTL_FFT_PLAN0)
  1388. if(!Mf.Dim(0)){
  1389. StaticArray<Long,1> fft_dim = {Np};
  1390. Mf.Setup(FFT_Type::C2R, 1, Vector<Long>(1,fft_dim,false));
  1391. }
  1392. return Mf;
  1393. }
  1394. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourierInv(Long Np){
  1395. assert(Np<SCTL_SHMAXDEG);
  1396. auto& Mf =MatrixStore().Mfft_ [Np];
  1397. #pragma omp critical (SCTL_FFT_PLAN1)
  1398. if(!Mf.Dim(0)){
  1399. StaticArray<Long,1> fft_dim = {Np};
  1400. Mf.Setup(FFT_Type::R2C, 1, Vector<Long>(1,fft_dim,false));
  1401. }
  1402. return Mf;
  1403. }
  1404. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendre(Long p0, Long p1){
  1405. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1406. std::vector<Matrix<Real>>& Ml =MatrixStore().Ml_ [p0*SCTL_SHMAXDEG+p1];
  1407. if(!Ml.size()){
  1408. const Vector<Real>& qx1 = LegendreNodes(p1);
  1409. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  1410. LegPoly(alp, qx1, p0);
  1411. Ml.resize(p0+1);
  1412. auto ptr = alp.begin();
  1413. for(Long i=0;i<=p0;i++){
  1414. Ml[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  1415. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  1416. }
  1417. }
  1418. return Ml;
  1419. }
  1420. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreInv(Long p0, Long p1){
  1421. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1422. std::vector<Matrix<Real>>& Ml =MatrixStore().Mlinv_ [p0*SCTL_SHMAXDEG+p1];
  1423. if(!Ml.size()){
  1424. const Vector<Real>& qx1 = LegendreNodes(p0);
  1425. const Vector<Real>& qw1 = LegendreWeights(p0);
  1426. Vector<Real> alp(qx1.Dim()*(p1+1)*(p1+2)/2);
  1427. LegPoly(alp, qx1, p1);
  1428. Ml.resize(p1+1);
  1429. auto ptr = alp.begin();
  1430. for(Long i=0;i<=p1;i++){
  1431. Ml[i].ReInit(qx1.Dim(), p1+1-i);
  1432. Matrix<Real> M(p1+1-i, qx1.Dim(), ptr, false);
  1433. for(Long j=0;j<p1+1-i;j++){ // Transpose and weights
  1434. for(Long k=0;k<qx1.Dim();k++){
  1435. Ml[i][k][j]=M[j][k]*qw1[k]*2*M_PI;
  1436. }
  1437. }
  1438. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  1439. }
  1440. }
  1441. return Ml;
  1442. }
  1443. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreGrad(Long p0, Long p1){
  1444. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1445. std::vector<Matrix<Real>>& Mdl=MatrixStore().Mdl_[p0*SCTL_SHMAXDEG+p1];
  1446. if(!Mdl.size()){
  1447. const Vector<Real>& qx1 = LegendreNodes(p1);
  1448. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  1449. LegPolyDeriv(alp, qx1, p0);
  1450. Mdl.resize(p0+1);
  1451. auto ptr = alp.begin();
  1452. for(Long i=0;i<=p0;i++){
  1453. Mdl[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  1454. ptr+=Mdl[i].Dim(0)*Mdl[i].Dim(1);
  1455. }
  1456. }
  1457. return Mdl;
  1458. }
  1459. template <class Real> void SphericalHarmonics<Real>::SHBasisEval(Long p0, const Vector<Real>& cos_theta_phi, Matrix<Real>& SHBasis) {
  1460. Long M = (p0+1) * (p0+1);
  1461. Long N = cos_theta_phi.Dim() / 2;
  1462. assert(cos_theta_phi.Dim() == N * 2);
  1463. Vector<Complex<Real>> exp_phi(N);
  1464. Matrix<Real> LegP((p0+1)*(p0+2)/2, N);
  1465. { // Set exp_phi, LegP
  1466. Vector<Real> cos_theta(N);
  1467. for (Long i = 0; i < N; i++) { // Set cos_theta, exp_phi
  1468. cos_theta[i] = cos_theta_phi[i*2+0];
  1469. exp_phi[i].real = cos<Real>(cos_theta_phi[i*2+1]);
  1470. exp_phi[i].imag = sin<Real>(cos_theta_phi[i*2+1]);
  1471. }
  1472. Vector<Real> alp(LegP.Dim(0) * LegP.Dim(1), LegP.begin(), false);
  1473. LegPoly(alp, cos_theta, p0);
  1474. }
  1475. { // Set SHBasis
  1476. SHBasis.ReInit(N, M);
  1477. Real s = 4 * sqrt<Real>(const_pi<Real>());
  1478. for (Long k0 = 0; k0 < N; k0++) {
  1479. Complex<Real> exp_phi_ = 1;
  1480. Complex<Real> exp_phi1 = exp_phi[k0];
  1481. for (Long m = 0; m <= p0; m++) {
  1482. for (Long n = m; n <= p0; n++) {
  1483. Long poly_idx = (2 * p0 - m + 1) * m / 2 + n;
  1484. Long basis_idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  1485. SHBasis[k0][basis_idx] = LegP[poly_idx][k0] * exp_phi_.real * s;
  1486. if (m) { // imaginary part
  1487. basis_idx += (p0+1-m);
  1488. SHBasis[k0][basis_idx] = -LegP[poly_idx][k0] * exp_phi_.imag * s;
  1489. } else {
  1490. SHBasis[k0][basis_idx] = SHBasis[k0][basis_idx] * 0.5;
  1491. }
  1492. }
  1493. exp_phi_ = exp_phi_ * exp_phi1;
  1494. }
  1495. }
  1496. }
  1497. assert(SHBasis.Dim(0) == N);
  1498. assert(SHBasis.Dim(1) == M);
  1499. }
  1500. template <class Real> void SphericalHarmonics<Real>::VecSHBasisEval(Long p0, const Vector<Real>& cos_theta_phi, Matrix<Real>& SHBasis) {
  1501. Long M = (p0+1) * (p0+1);
  1502. Long N = cos_theta_phi.Dim() / 2;
  1503. assert(cos_theta_phi.Dim() == N * 2);
  1504. Long p_ = p0 + 1;
  1505. Long M_ = (p_+1) * (p_+1);
  1506. Matrix<Real> Ynm(N, M_);
  1507. SHBasisEval(p_, cos_theta_phi, Ynm);
  1508. Vector<Real> cos_theta(N);
  1509. for (Long i = 0; i < N; i++) { // Set cos_theta
  1510. cos_theta[i] = cos_theta_phi[i*2+0];
  1511. }
  1512. { // Set SHBasis
  1513. SHBasis.ReInit(N * COORD_DIM, COORD_DIM * M);
  1514. SHBasis = 0;
  1515. const Complex<Real> imag(0,1);
  1516. for (Long i = 0; i < N; i++) {
  1517. auto Y = [p_, &Ynm, i](Long n, Long m) {
  1518. Complex<Real> c;
  1519. if (0 <= m && m <= n && n <= p_) {
  1520. Long idx = (2 * p_ - m + 2) * m - (m ? p_+1 : 0) + n;
  1521. c.real = Ynm[i][idx];
  1522. if (m) {
  1523. idx += (p_+1-m);
  1524. c.imag = Ynm[i][idx];
  1525. }
  1526. }
  1527. return c;
  1528. };
  1529. auto write_coeff = [p0, &SHBasis, i, M](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  1530. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  1531. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  1532. SHBasis[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  1533. if (m) {
  1534. idx += (p0+1-m);
  1535. SHBasis[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  1536. }
  1537. }
  1538. };
  1539. auto A = [p_](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  1540. auto B = [p_](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  1541. Real s = 1 / sqrt<Real>(1 - cos_theta[i] * cos_theta[i]);
  1542. if (fabs(cos_theta[i]) < 1) {
  1543. for (Long m = 0; m <= p0; m++) {
  1544. for (Long n = m; n <= p0; n++) {
  1545. Complex<Real> AYBY = A(n,m) * Y(n+1,m) - B(n,m) * Y(n-1,m);
  1546. Complex<Real> Fv2r = Y(n,m) * (-n-1);
  1547. Complex<Real> Fw2r = Y(n,m) * n;
  1548. Complex<Real> Fx2r = 0;
  1549. Complex<Real> Fv2t = AYBY * s;
  1550. Complex<Real> Fw2t = AYBY * s;
  1551. Complex<Real> Fx2t = imag * m * Y(n,m) * s;
  1552. Complex<Real> Fv2p = -imag * m * Y(n,m) * s;
  1553. Complex<Real> Fw2p = -imag * m * Y(n,m) * s;
  1554. Complex<Real> Fx2p = AYBY * s;
  1555. write_coeff(Fv2r, n, m, 0, 0);
  1556. write_coeff(Fw2r, n, m, 1, 0);
  1557. write_coeff(Fx2r, n, m, 2, 0);
  1558. write_coeff(Fv2t, n, m, 0, 1);
  1559. write_coeff(Fw2t, n, m, 1, 1);
  1560. write_coeff(Fx2t, n, m, 2, 1);
  1561. write_coeff(Fv2p, n, m, 0, 2);
  1562. write_coeff(Fw2p, n, m, 1, 2);
  1563. write_coeff(Fx2p, n, m, 2, 2);
  1564. }
  1565. }
  1566. } else {
  1567. Complex<Real> exp_phi;
  1568. exp_phi.real = cos<Real>(cos_theta_phi[i*2+1]);
  1569. exp_phi.imag = -sin<Real>(cos_theta_phi[i*2+1]);
  1570. for (Long m = 0; m <= p0; m++) {
  1571. for (Long n = m; n <= p0; n++) {
  1572. Complex<Real> Fv2r = 0;
  1573. Complex<Real> Fw2r = 0;
  1574. Complex<Real> Fx2r = 0;
  1575. Complex<Real> Fv2t = 0;
  1576. Complex<Real> Fw2t = 0;
  1577. Complex<Real> Fx2t = 0;
  1578. Complex<Real> Fv2p = 0;
  1579. Complex<Real> Fw2p = 0;
  1580. Complex<Real> Fx2p = 0;
  1581. if (m == 0) {
  1582. Fv2r = Y(n,m) * (-n-1);
  1583. Fw2r = Y(n,m) * n;
  1584. Fx2r = 0;
  1585. }
  1586. if (m == 1) {
  1587. auto dY = [&cos_theta, i](Long n) { return -sqrt<Real>((2*n+1)*n*(n+1)) * ((n%2==0) && (cos_theta[i]<0) ? -1 : 1); };
  1588. Complex<Real> AYBY = A(n,m) * dY(n+1) - B(n,m) * dY(n-1);
  1589. Fv2t = AYBY * exp_phi;
  1590. Fw2t = AYBY * exp_phi;
  1591. Fx2t = imag * m * dY(n) * exp_phi;
  1592. Fv2p =-imag * m * dY(n) * exp_phi;
  1593. Fw2p =-imag * m * dY(n) * exp_phi;
  1594. Fx2p = AYBY * exp_phi;
  1595. }
  1596. write_coeff(Fv2r, n, m, 0, 0);
  1597. write_coeff(Fw2r, n, m, 1, 0);
  1598. write_coeff(Fx2r, n, m, 2, 0);
  1599. write_coeff(Fv2t, n, m, 0, 1);
  1600. write_coeff(Fw2t, n, m, 1, 1);
  1601. write_coeff(Fx2t, n, m, 2, 1);
  1602. write_coeff(Fv2p, n, m, 0, 2);
  1603. write_coeff(Fw2p, n, m, 1, 2);
  1604. write_coeff(Fx2p, n, m, 2, 2);
  1605. }
  1606. }
  1607. }
  1608. }
  1609. }
  1610. assert(SHBasis.Dim(0) == N * COORD_DIM);
  1611. assert(SHBasis.Dim(1) == COORD_DIM * M);
  1612. }
  1613. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatRotate(Long p0){
  1614. std::vector<std::vector<Long>> coeff_perm(p0+1);
  1615. { // Set coeff_perm
  1616. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  1617. Long itr=0;
  1618. for(Long i=0;i<2*p0;i++){
  1619. Long m=(i+1)/2;
  1620. for(Long n=m;n<=p0;n++){
  1621. coeff_perm[n][i]=itr;
  1622. itr++;
  1623. }
  1624. }
  1625. }
  1626. assert(p0<SCTL_SHMAXDEG);
  1627. std::vector<Matrix<Real>>& Mr=MatrixStore().Mr_[p0];
  1628. if(!Mr.size()){
  1629. const Real SQRT2PI=sqrt(2*M_PI);
  1630. Long Ncoef=p0*(p0+2);
  1631. Long Ngrid=2*p0*(p0+1);
  1632. Long Naleg=(p0+1)*(p0+2)/2;
  1633. Matrix<Real> Mcoord0(3,Ngrid);
  1634. const Vector<Real>& x=LegendreNodes(p0);
  1635. for(Long i=0;i<p0+1;i++){ // Set Mcoord0
  1636. for(Long j=0;j<2*p0;j++){
  1637. Mcoord0[0][i*2*p0+j]=x[i];
  1638. Mcoord0[1][i*2*p0+j]=sqrt(1-x[i]*x[i])*sin(M_PI*j/p0);
  1639. Mcoord0[2][i*2*p0+j]=sqrt(1-x[i]*x[i])*cos(M_PI*j/p0);
  1640. }
  1641. }
  1642. for(Long l=0;l<p0+1;l++){ // For each rotation angle
  1643. Matrix<Real> Mcoord1;
  1644. { // Rotate coordinates
  1645. Matrix<Real> M(COORD_DIM, COORD_DIM);
  1646. Real cos_=-x[l];
  1647. Real sin_=-sqrt(1.0-x[l]*x[l]);
  1648. M[0][0]= cos_; M[0][1]=0; M[0][2]=-sin_;
  1649. M[1][0]= 0; M[1][1]=1; M[1][2]= 0;
  1650. M[2][0]= sin_; M[2][1]=0; M[2][2]= cos_;
  1651. Mcoord1=M*Mcoord0;
  1652. }
  1653. Matrix<Real> Mleg(Naleg, Ngrid);
  1654. { // Set Mleg
  1655. const Vector<Real> Vcoord1(Mcoord1.Dim(0)*Mcoord1.Dim(1), Mcoord1.begin(), false);
  1656. Vector<Real> Vleg(Mleg.Dim(0)*Mleg.Dim(1), Mleg.begin(), false);
  1657. LegPoly(Vleg, Vcoord1, p0);
  1658. }
  1659. Vector<Real> theta(Ngrid);
  1660. for(Long i=0;i<theta.Dim();i++){ // Set theta
  1661. theta[i]=atan2(Mcoord1[1][i],Mcoord1[2][i]); // TODO: works only for float and double
  1662. }
  1663. Matrix<Real> Mcoef2grid(Ncoef, Ngrid);
  1664. { // Build Mcoef2grid
  1665. Long offset0=0;
  1666. Long offset1=0;
  1667. for(Long i=0;i<p0+1;i++){
  1668. Long len=p0+1-i;
  1669. { // P * cos
  1670. for(Long j=0;j<len;j++){
  1671. for(Long k=0;k<Ngrid;k++){
  1672. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*cos(i*theta[k]);
  1673. }
  1674. }
  1675. offset1+=len;
  1676. }
  1677. if(i!=0 && i!=p0){ // P * sin
  1678. for(Long j=0;j<len;j++){
  1679. for(Long k=0;k<Ngrid;k++){
  1680. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*sin(i*theta[k]);
  1681. }
  1682. }
  1683. offset1+=len;
  1684. }
  1685. offset0+=len;
  1686. }
  1687. assert(offset0==Naleg);
  1688. assert(offset1==Ncoef);
  1689. }
  1690. Vector<Real> Vcoef2coef(Ncoef*Ncoef);
  1691. Vector<Real> Vcoef2grid(Ncoef*Ngrid, Mcoef2grid[0], false);
  1692. Grid2SHC(Vcoef2grid, p0+1, 2*p0, p0, Vcoef2coef, SHCArrange::COL_MAJOR_NONZERO);
  1693. Matrix<Real> Mcoef2coef(Ncoef, Ncoef, Vcoef2coef.begin(), false);
  1694. for(Long n=0;n<=p0;n++){ // Create matrices for fast rotation
  1695. Matrix<Real> M(coeff_perm[n].size(),coeff_perm[n].size());
  1696. for(Long i=0;i<(Long)coeff_perm[n].size();i++){
  1697. for(Long j=0;j<(Long)coeff_perm[n].size();j++){
  1698. M[i][j]=Mcoef2coef[coeff_perm[n][i]][coeff_perm[n][j]];
  1699. }
  1700. }
  1701. Mr.push_back(M);
  1702. }
  1703. }
  1704. }
  1705. return Mr;
  1706. }
  1707. template <class Real> void SphericalHarmonics<Real>::SHC2GridTranspose(const Vector<Real>& X, Long p0, Long p1, Vector<Real>& S){
  1708. Matrix<Real> Mf =SphericalHarmonics<Real>::MatFourier(p1,p0).Transpose();
  1709. std::vector<Matrix<Real>> Ml =SphericalHarmonics<Real>::MatLegendre(p1,p0);
  1710. for(Long i=0;i<(Long)Ml.size();i++) Ml[i]=Ml[i].Transpose();
  1711. assert(p1==(Long)Ml.size()-1);
  1712. assert(p0==Mf.Dim(0)/2);
  1713. assert(p1==Mf.Dim(1)/2);
  1714. Long N=X.Dim()/(2*p0*(p0+1));
  1715. assert(N*2*p0*(p0+1)==X.Dim());
  1716. if(S.Dim()!=N*(p1*(p1+2))) S.ReInit(N*(p1*(p1+2)));
  1717. Vector<Real> B0, B1;
  1718. B0.ReInit(N* p1*(p1+2));
  1719. B1.ReInit(N*2*p1*(p0+1));
  1720. #pragma omp parallel
  1721. { // Evaluate Fourier and transpose
  1722. Integer tid=omp_get_thread_num();
  1723. Integer omp_p=omp_get_num_threads();
  1724. Long a=(tid+0)*N*(p0+1)/omp_p;
  1725. Long b=(tid+1)*N*(p0+1)/omp_p;
  1726. const Long block_size=16;
  1727. Matrix<Real> B2(block_size,2*p1);
  1728. for(Long i0=a;i0<b;i0+=block_size){
  1729. Long i1=std::min(b,i0+block_size);
  1730. const Matrix<Real> Min (i1-i0,2*p0, (Iterator<Real>)X.begin()+i0*2*p0, false);
  1731. Matrix<Real> Mout(i1-i0,2*p1, B2.begin(), false);
  1732. Matrix<Real>::GEMM(Mout, Min, Mf);
  1733. for(Long i=i0;i<i1;i++){
  1734. for(Long j=0;j<2*p1;j++){
  1735. B1[j*N*(p0+1)+i]=B2[i-i0][j];
  1736. }
  1737. }
  1738. }
  1739. }
  1740. #pragma omp parallel
  1741. { // Evaluate Legendre polynomial
  1742. Integer tid=omp_get_thread_num();
  1743. Integer omp_p=omp_get_num_threads();
  1744. Long offset0=0;
  1745. Long offset1=0;
  1746. for(Long i=0;i<p1+1;i++){
  1747. Long N0=2*N;
  1748. if(i==0 || i==p1) N0=N;
  1749. Matrix<Real> Min (N0, p0+1 , B1.begin()+offset0, false);
  1750. Matrix<Real> Mout(N0, p1+1-i, B0.begin()+offset1, false);
  1751. { // Mout = Min * Ml[i] // split between threads
  1752. Long a=(tid+0)*N0/omp_p;
  1753. Long b=(tid+1)*N0/omp_p;
  1754. if(a<b){
  1755. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  1756. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1757. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  1758. }
  1759. }
  1760. offset0+=Min .Dim(0)*Min .Dim(1);
  1761. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1762. }
  1763. }
  1764. #pragma omp parallel
  1765. { // S <-- Rearrange(B0)
  1766. Integer tid=omp_get_thread_num();
  1767. Integer omp_p=omp_get_num_threads();
  1768. Long a=(tid+0)*N/omp_p;
  1769. Long b=(tid+1)*N/omp_p;
  1770. for(Long i=a;i<b;i++){
  1771. Long offset=0;
  1772. for(Long j=0;j<2*p1;j++){
  1773. Long len=p1+1-(j+1)/2;
  1774. Real* B_=&B0[i*len+N*offset];
  1775. Real* S_=&S[i*p1*(p1+2)+offset];
  1776. for(Long k=0;k<len;k++) S_[k]=B_[k];
  1777. offset+=len;
  1778. }
  1779. }
  1780. }
  1781. }
  1782. template <class Real> void SphericalHarmonics<Real>::RotateAll(const Vector<Real>& S, Long p0, Long dof, Vector<Real>& S_){
  1783. const std::vector<Matrix<Real>>& Mr=MatRotate(p0);
  1784. std::vector<std::vector<Long>> coeff_perm(p0+1);
  1785. { // Set coeff_perm
  1786. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  1787. Long itr=0;
  1788. for(Long i=0;i<2*p0;i++){
  1789. Long m=(i+1)/2;
  1790. for(Long n=m;n<=p0;n++){
  1791. coeff_perm[n][i]=itr;
  1792. itr++;
  1793. }
  1794. }
  1795. }
  1796. Long Ncoef=p0*(p0+2);
  1797. Long N=S.Dim()/Ncoef/dof;
  1798. assert(N*Ncoef*dof==S.Dim());
  1799. if(S_.Dim()!=N*dof*Ncoef*p0*(p0+1)) S_.ReInit(N*dof*Ncoef*p0*(p0+1));
  1800. const Matrix<Real> S0(N*dof, Ncoef, (Iterator<Real>)S.begin(), false);
  1801. Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, S_.begin(), false);
  1802. #pragma omp parallel
  1803. { // Construct all p0*(p0+1) rotations
  1804. Integer tid=omp_get_thread_num();
  1805. Integer omp_p=omp_get_num_threads();
  1806. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  1807. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  1808. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  1809. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  1810. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  1811. }
  1812. Long a=(tid+0)*N/omp_p;
  1813. Long b=(tid+1)*N/omp_p;
  1814. for(Long i=a;i<b;i++){
  1815. for(Long d=0;d<dof;d++){
  1816. for(Long j=0;j<p0;j++){
  1817. Long offset=0;
  1818. for(Long k=0;k<p0+1;k++){
  1819. Real r[2]={cos(k*j*M_PI/p0),-sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  1820. Long len=p0+1-k;
  1821. if(k!=0 && k!=p0){
  1822. for(Long l=0;l<len;l++){
  1823. Real x[2];
  1824. x[0]=S0[i*dof+d][offset+len*0+l];
  1825. x[1]=S0[i*dof+d][offset+len*1+l];
  1826. B0[j*dof+d][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  1827. B0[j*dof+d][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  1828. }
  1829. offset+=2*len;
  1830. }else{
  1831. for(Long l=0;l<len;l++){
  1832. B0[j*dof+d][offset+l]=S0[i*dof+d][offset+l];
  1833. }
  1834. offset+=len;
  1835. }
  1836. }
  1837. assert(offset==Ncoef);
  1838. }
  1839. }
  1840. { // Fast rotation
  1841. for(Long k=0;k<dof*p0;k++){ // forward permutation
  1842. for(Long l=0;l<=p0;l++){
  1843. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1844. Bi[l][k][j]=B0[k][coeff_perm[l][j]];
  1845. }
  1846. }
  1847. }
  1848. for(Long t=0;t<=p0;t++){
  1849. for(Long l=0;l<=p0;l++){ // mat-vec
  1850. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  1851. }
  1852. Matrix<Real> Mout(dof*p0,Ncoef, S1[(i*(p0+1)+t)*dof*p0], false);
  1853. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  1854. for(Long l=0;l<=p0;l++){
  1855. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1856. Mout[k][coeff_perm[l][j]]=Bo[l][k][j];
  1857. }
  1858. }
  1859. }
  1860. }
  1861. }
  1862. }
  1863. }
  1864. }
  1865. template <class Real> void SphericalHarmonics<Real>::RotateTranspose(const Vector<Real>& S_, Long p0, Long dof, Vector<Real>& S){
  1866. std::vector<Matrix<Real>> Mr=MatRotate(p0);
  1867. for(Long i=0;i<(Long)Mr.size();i++) Mr[i]=Mr[i].Transpose();
  1868. std::vector<std::vector<Long>> coeff_perm(p0+1);
  1869. { // Set coeff_perm
  1870. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  1871. Long itr=0;
  1872. for(Long i=0;i<2*p0;i++){
  1873. Long m=(i+1)/2;
  1874. for(Long n=m;n<=p0;n++){
  1875. coeff_perm[n][i]=itr;
  1876. itr++;
  1877. }
  1878. }
  1879. }
  1880. Long Ncoef=p0*(p0+2);
  1881. Long N=S_.Dim()/Ncoef/dof/(p0*(p0+1));
  1882. assert(N*Ncoef*dof*(p0*(p0+1))==S_.Dim());
  1883. if(S.Dim()!=N*dof*Ncoef*p0*(p0+1)) S.ReInit(N*dof*Ncoef*p0*(p0+1));
  1884. Matrix<Real> S0(N*dof*p0*(p0+1), Ncoef, S.begin(), false);
  1885. const Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, (Iterator<Real>)S_.begin(), false);
  1886. #pragma omp parallel
  1887. { // Transpose all p0*(p0+1) rotations
  1888. Integer tid=omp_get_thread_num();
  1889. Integer omp_p=omp_get_num_threads();
  1890. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  1891. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  1892. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  1893. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  1894. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  1895. }
  1896. Long a=(tid+0)*N/omp_p;
  1897. Long b=(tid+1)*N/omp_p;
  1898. for(Long i=a;i<b;i++){
  1899. for(Long t=0;t<p0+1;t++){
  1900. Long idx0=(i*(p0+1)+t)*p0*dof;
  1901. { // Fast rotation
  1902. const Matrix<Real> Min(p0*dof, Ncoef, (Iterator<Real>)S1[idx0], false);
  1903. for(Long k=0;k<dof*p0;k++){ // forward permutation
  1904. for(Long l=0;l<=p0;l++){
  1905. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1906. Bi[l][k][j]=Min[k][coeff_perm[l][j]];
  1907. }
  1908. }
  1909. }
  1910. for(Long l=0;l<=p0;l++){ // mat-vec
  1911. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  1912. }
  1913. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  1914. for(Long l=0;l<=p0;l++){
  1915. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1916. B0[k][coeff_perm[l][j]]=Bo[l][k][j];
  1917. }
  1918. }
  1919. }
  1920. }
  1921. for(Long j=0;j<p0;j++){
  1922. for(Long d=0;d<dof;d++){
  1923. Long idx1=idx0+j*dof+d;
  1924. Long offset=0;
  1925. for(Long k=0;k<p0+1;k++){
  1926. Real r[2]={cos(k*j*M_PI/p0),sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  1927. Long len=p0+1-k;
  1928. if(k!=0 && k!=p0){
  1929. for(Long l=0;l<len;l++){
  1930. Real x[2];
  1931. x[0]=B0[j*dof+d][offset+len*0+l];
  1932. x[1]=B0[j*dof+d][offset+len*1+l];
  1933. S0[idx1][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  1934. S0[idx1][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  1935. }
  1936. offset+=2*len;
  1937. }else{
  1938. for(Long l=0;l<len;l++){
  1939. S0[idx1][offset+l]=B0[j*dof+d][offset+l];
  1940. }
  1941. offset+=len;
  1942. }
  1943. }
  1944. assert(offset==Ncoef);
  1945. }
  1946. }
  1947. }
  1948. }
  1949. }
  1950. }
  1951. template <class Real> void SphericalHarmonics<Real>::StokesSingularInteg(const Vector<Real>& S, Long p0, Long p1, Vector<Real>* SLMatrix, Vector<Real>* DLMatrix){
  1952. Long Ngrid=2*p0*(p0+1);
  1953. Long Ncoef= p0*(p0+2);
  1954. Long Nves=S.Dim()/(Ngrid*COORD_DIM);
  1955. if(SLMatrix) SLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  1956. if(DLMatrix) DLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  1957. Long BLOCK_SIZE=(Long)6e9/((3*2*p1*(p1+1))*(3*2*p0*(p0+1))*2*8); // Limit memory usage to 6GB
  1958. BLOCK_SIZE=std::min<Long>(BLOCK_SIZE,omp_get_max_threads());
  1959. BLOCK_SIZE=std::max<Long>(BLOCK_SIZE,1);
  1960. for(Long a=0;a<Nves;a+=BLOCK_SIZE){
  1961. Long b=std::min(a+BLOCK_SIZE, Nves);
  1962. Vector<Real> _SLMatrix, _DLMatrix;
  1963. if(SLMatrix) _SLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), SLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  1964. if(DLMatrix) _DLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), DLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  1965. const Vector<Real> _S ((b-a)*(Ngrid*COORD_DIM) , (Iterator<Real>)S.begin()+a*(Ngrid*COORD_DIM), false);
  1966. if(SLMatrix && DLMatrix) StokesSingularInteg_< true, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  1967. else if(SLMatrix) StokesSingularInteg_< true, false>(_S, p0, p1, _SLMatrix, _DLMatrix);
  1968. else if(DLMatrix) StokesSingularInteg_<false, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  1969. }
  1970. }
  1971. template <class Real> template <bool SLayer, bool DLayer> void SphericalHarmonics<Real>::StokesSingularInteg_(const Vector<Real>& X0, Long p0, Long p1, Vector<Real>& SL, Vector<Real>& DL){
  1972. Profile::Tic("Rotate");
  1973. Vector<Real> S0, S;
  1974. SphericalHarmonics<Real>::Grid2SHC(X0, p0+1, 2*p0, p0, S0, SHCArrange::COL_MAJOR_NONZERO);
  1975. SphericalHarmonics<Real>::RotateAll(S0, p0, COORD_DIM, S);
  1976. Profile::Toc();
  1977. Profile::Tic("Upsample");
  1978. Vector<Real> X, X_theta, X_phi, trg;
  1979. SphericalHarmonics<Real>::SHC2Grid(S, SHCArrange::COL_MAJOR_NONZERO, p0, p1+1, 2*p1, &X, &X_theta, &X_phi);
  1980. SphericalHarmonics<Real>::SHC2Pole(S, SHCArrange::COL_MAJOR_NONZERO, p0, trg);
  1981. Profile::Toc();
  1982. Profile::Tic("Stokes");
  1983. Vector<Real> SL0, DL0;
  1984. { // Stokes kernel
  1985. //Long M0=2*p0*(p0+1);
  1986. Long M1=2*p1*(p1+1);
  1987. Long N=trg.Dim()/(2*COORD_DIM);
  1988. assert(X.Dim()==M1*COORD_DIM*N);
  1989. if(SLayer && SL0.Dim()!=N*2*6*M1) SL0.ReInit(2*N*6*M1);
  1990. if(DLayer && DL0.Dim()!=N*2*6*M1) DL0.ReInit(2*N*6*M1);
  1991. const Vector<Real>& qw=SphericalHarmonics<Real>::SingularWeights(p1);
  1992. const Real scal_const_dl = 3.0/(4.0*M_PI);
  1993. const Real scal_const_sl = 1.0/(8.0*M_PI);
  1994. static Real eps=-1;
  1995. if(eps<0){
  1996. eps=1;
  1997. while(eps*(Real)0.5+(Real)1.0>1.0) eps*=0.5;
  1998. }
  1999. #pragma omp parallel
  2000. {
  2001. Integer tid=omp_get_thread_num();
  2002. Integer omp_p=omp_get_num_threads();
  2003. Long a=(tid+0)*N/omp_p;
  2004. Long b=(tid+1)*N/omp_p;
  2005. for(Long i=a;i<b;i++){
  2006. for(Long t=0;t<2;t++){
  2007. Real tx, ty, tz;
  2008. { // Read target coordinates
  2009. tx=trg[i*2*COORD_DIM+0*2+t];
  2010. ty=trg[i*2*COORD_DIM+1*2+t];
  2011. tz=trg[i*2*COORD_DIM+2*2+t];
  2012. }
  2013. for(Long j0=0;j0<p1+1;j0++){
  2014. for(Long j1=0;j1<2*p1;j1++){
  2015. Long s=2*p1*j0+j1;
  2016. Real dx, dy, dz;
  2017. { // Compute dx, dy, dz
  2018. dx=tx-X[(i*COORD_DIM+0)*M1+s];
  2019. dy=ty-X[(i*COORD_DIM+1)*M1+s];
  2020. dz=tz-X[(i*COORD_DIM+2)*M1+s];
  2021. }
  2022. Real nx, ny, nz;
  2023. { // Compute source normal
  2024. Real x_theta=X_phi[(i*COORD_DIM+0)*M1+s];
  2025. Real y_theta=X_phi[(i*COORD_DIM+1)*M1+s];
  2026. Real z_theta=X_phi[(i*COORD_DIM+2)*M1+s];
  2027. Real x_phi=X_theta[(i*COORD_DIM+0)*M1+s];
  2028. Real y_phi=X_theta[(i*COORD_DIM+1)*M1+s];
  2029. Real z_phi=X_theta[(i*COORD_DIM+2)*M1+s];
  2030. nx=(y_theta*z_phi-z_theta*y_phi);
  2031. ny=(z_theta*x_phi-x_theta*z_phi);
  2032. nz=(x_theta*y_phi-y_theta*x_phi);
  2033. }
  2034. Real area_elem=1.0;
  2035. if(SLayer){ // Compute area_elem
  2036. area_elem=sqrt(nx*nx+ny*ny+nz*nz);
  2037. }
  2038. Real rinv, rinv2;
  2039. { // Compute rinv, rinv2
  2040. Real r2=dx*dx+dy*dy+dz*dz;
  2041. rinv=1.0/sqrt(r2);
  2042. if(r2<=eps) rinv=0;
  2043. rinv2=rinv*rinv;
  2044. }
  2045. if(DLayer){
  2046. Real rinv5=rinv2*rinv2*rinv;
  2047. Real r_dot_n_rinv5=scal_const_dl*qw[j0*t+(p1-j0)*(1-t)] * (nx*dx+ny*dy+nz*dz)*rinv5;
  2048. DL0[((i*2+t)*6+0)*M1+s]=dx*dx*r_dot_n_rinv5;
  2049. DL0[((i*2+t)*6+1)*M1+s]=dx*dy*r_dot_n_rinv5;
  2050. DL0[((i*2+t)*6+2)*M1+s]=dx*dz*r_dot_n_rinv5;
  2051. DL0[((i*2+t)*6+3)*M1+s]=dy*dy*r_dot_n_rinv5;
  2052. DL0[((i*2+t)*6+4)*M1+s]=dy*dz*r_dot_n_rinv5;
  2053. DL0[((i*2+t)*6+5)*M1+s]=dz*dz*r_dot_n_rinv5;
  2054. }
  2055. if(SLayer){
  2056. Real area_rinv =scal_const_sl*qw[j0*t+(p1-j0)*(1-t)] * area_elem*rinv;
  2057. Real area_rinv2=area_rinv*rinv2;
  2058. SL0[((i*2+t)*6+0)*M1+s]=area_rinv+dx*dx*area_rinv2;
  2059. SL0[((i*2+t)*6+1)*M1+s]= dx*dy*area_rinv2;
  2060. SL0[((i*2+t)*6+2)*M1+s]= dx*dz*area_rinv2;
  2061. SL0[((i*2+t)*6+3)*M1+s]=area_rinv+dy*dy*area_rinv2;
  2062. SL0[((i*2+t)*6+4)*M1+s]= dy*dz*area_rinv2;
  2063. SL0[((i*2+t)*6+5)*M1+s]=area_rinv+dz*dz*area_rinv2;
  2064. }
  2065. }
  2066. }
  2067. }
  2068. }
  2069. }
  2070. Profile::Add_FLOP(20*(2*p1)*(p1+1)*2*N);
  2071. if(SLayer) Profile::Add_FLOP((19+6)*(2*p1)*(p1+1)*2*N);
  2072. if(DLayer) Profile::Add_FLOP( 22 *(2*p1)*(p1+1)*2*N);
  2073. }
  2074. Profile::Toc();
  2075. Profile::Tic("UpsampleTranspose");
  2076. Vector<Real> SL1, DL1;
  2077. SphericalHarmonics<Real>::SHC2GridTranspose(SL0, p1, p0, SL1);
  2078. SphericalHarmonics<Real>::SHC2GridTranspose(DL0, p1, p0, DL1);
  2079. Profile::Toc();
  2080. Profile::Tic("RotateTranspose");
  2081. Vector<Real> SL2, DL2;
  2082. SphericalHarmonics<Real>::RotateTranspose(SL1, p0, 2*6, SL2);
  2083. SphericalHarmonics<Real>::RotateTranspose(DL1, p0, 2*6, DL2);
  2084. Profile::Toc();
  2085. Profile::Tic("Rearrange");
  2086. Vector<Real> SL3, DL3;
  2087. { // Transpose
  2088. Long Ncoef=p0*(p0+2);
  2089. Long Ngrid=2*p0*(p0+1);
  2090. { // Transpose SL2
  2091. Long N=SL2.Dim()/(6*Ncoef*Ngrid);
  2092. SL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  2093. #pragma omp parallel
  2094. {
  2095. Integer tid=omp_get_thread_num();
  2096. Integer omp_p=omp_get_num_threads();
  2097. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  2098. Long a=(tid+0)*N/omp_p;
  2099. Long b=(tid+1)*N/omp_p;
  2100. for(Long i=a;i<b;i++){
  2101. Matrix<Real> M0(Ngrid*6, Ncoef, SL2.begin()+i*Ngrid*6*Ncoef, false);
  2102. for(Long k=0;k<Ncoef;k++){ // Transpose
  2103. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  2104. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  2105. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  2106. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  2107. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  2108. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  2109. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  2110. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  2111. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  2112. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  2113. }
  2114. }
  2115. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, SL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  2116. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  2117. for(Long j0=0;j0<COORD_DIM;j0++){
  2118. for(Long j1=0;j1<p0+1;j1++){
  2119. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  2120. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  2121. }
  2122. }
  2123. }
  2124. }
  2125. }
  2126. }
  2127. { // Transpose DL2
  2128. Long N=DL2.Dim()/(6*Ncoef*Ngrid);
  2129. DL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  2130. #pragma omp parallel
  2131. {
  2132. Integer tid=omp_get_thread_num();
  2133. Integer omp_p=omp_get_num_threads();
  2134. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  2135. Long a=(tid+0)*N/omp_p;
  2136. Long b=(tid+1)*N/omp_p;
  2137. for(Long i=a;i<b;i++){
  2138. Matrix<Real> M0(Ngrid*6, Ncoef, DL2.begin()+i*Ngrid*6*Ncoef, false);
  2139. for(Long k=0;k<Ncoef;k++){ // Transpose
  2140. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  2141. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  2142. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  2143. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  2144. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  2145. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  2146. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  2147. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  2148. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  2149. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  2150. }
  2151. }
  2152. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, DL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  2153. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  2154. for(Long j0=0;j0<COORD_DIM;j0++){
  2155. for(Long j1=0;j1<p0+1;j1++){
  2156. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  2157. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  2158. }
  2159. }
  2160. }
  2161. }
  2162. }
  2163. }
  2164. }
  2165. Profile::Toc();
  2166. Profile::Tic("Grid2SHC");
  2167. SphericalHarmonics<Real>::Grid2SHC(SL3, p0+1, 2*p0, p0, SL, SHCArrange::COL_MAJOR_NONZERO);
  2168. SphericalHarmonics<Real>::Grid2SHC(DL3, p0+1, 2*p0, p0, DL, SHCArrange::COL_MAJOR_NONZERO);
  2169. Profile::Toc();
  2170. }
  2171. } // end namespace