sph_harm.txx 67 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967
  1. #include SCTL_INCLUDE(legendre_rule.hpp)
  2. // TODO: Replace work vectors with dynamic-arrays
  3. namespace SCTL_NAMESPACE {
  4. template <class Real> void SphericalHarmonics<Real>::Grid2SHC(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& S, SHCArrange arrange){
  5. Long N = X.Dim() / (Np*Nt);
  6. assert(X.Dim() == N*Np*Nt);
  7. Vector<Real> B1(N*(p1+1)*(p1+1));
  8. Grid2SHC_(X, Nt, Np, p1, B1);
  9. SHCArrange0(B1, p1, S, arrange);
  10. }
  11. template <class Real> void SphericalHarmonics<Real>::SHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_phi, Vector<Real>* X_theta){
  12. Vector<Real> B0;
  13. SHCArrange1(S, arrange, p0, B0);
  14. SHC2Grid_(B0, p0, Nt, Np, X, X_phi, X_theta);
  15. }
  16. template <class Real> void SphericalHarmonics<Real>::SHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& cos_theta_phi, Vector<Real>& X) {
  17. Long M = (p0+1) * (p0+1);
  18. Long dof;
  19. Matrix<Real> B1;
  20. { // Set B1, dof
  21. Vector<Real> B0;
  22. SHCArrange1(S, arrange, p0, B0);
  23. dof = B0.Dim() / M;
  24. assert(B0.Dim() == dof * M);
  25. B1.ReInit(dof, M);
  26. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  27. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  28. }
  29. assert(B1.Dim(0) == dof);
  30. assert(B1.Dim(1) == M);
  31. Matrix<Real> SHBasis;
  32. SHBasisEval(p0, cos_theta_phi, SHBasis);
  33. assert(SHBasis.Dim(1) == M);
  34. Long N = SHBasis.Dim(0);
  35. { // Set X
  36. if (X.Dim() != N*dof) X.ReInit(N * dof);
  37. for (Long k0 = 0; k0 < N; k0++) {
  38. for (Long k1 = 0; k1 < dof; k1++) {
  39. Real X_ = 0;
  40. for (Long i = 0; i < M; i++) X_ += B1[k1][i] * SHBasis[k0][i];
  41. X[k0 * dof + k1] = X_;
  42. }
  43. }
  44. }
  45. }
  46. template <class Real> void SphericalHarmonics<Real>::SHC2Pole(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& P){
  47. Vector<Real> QP[2];
  48. { // Set QP // TODO: store these weights
  49. Vector<Real> x(1), alp;
  50. const Real SQRT2PI = sqrt<Real>(4 * const_pi<Real>());
  51. for (Long i = 0; i < 2; i++) {
  52. x = (i ? -1 : 1);
  53. LegPoly(alp, x, p0);
  54. QP[i].ReInit(p0 + 1, alp.begin());
  55. QP[i] *= SQRT2PI;
  56. }
  57. }
  58. Long M, N;
  59. { // Set M, N
  60. M = 0;
  61. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  62. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  63. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  64. if (M == 0) return;
  65. N = S.Dim() / M;
  66. assert(S.Dim() == N * M);
  67. }
  68. if(P.Dim() != N * 2) P.ReInit(N * 2);
  69. if (arrange == SHCArrange::ALL) {
  70. #pragma omp parallel
  71. { // Compute pole
  72. Integer tid = omp_get_thread_num();
  73. Integer omp_p = omp_get_num_threads();
  74. Long a = (tid + 0) * N / omp_p;
  75. Long b = (tid + 1) * N / omp_p;
  76. for (Long i = a; i < b; i++) {
  77. Real P_[2] = {0, 0};
  78. for (Long j = 0; j < p0 + 1; j++) {
  79. P_[0] += S[i*M + j*(p0+1)*2] * QP[0][j];
  80. P_[1] += S[i*M + j*(p0+1)*2] * QP[1][j];
  81. }
  82. P[2*i+0] = P_[0];
  83. P[2*i+1] = P_[1];
  84. }
  85. }
  86. }
  87. if (arrange == SHCArrange::ROW_MAJOR) {
  88. #pragma omp parallel
  89. { // Compute pole
  90. Integer tid = omp_get_thread_num();
  91. Integer omp_p = omp_get_num_threads();
  92. Long a = (tid + 0) * N / omp_p;
  93. Long b = (tid + 1) * N / omp_p;
  94. for (Long i = a; i < b; i++) {
  95. Long idx = 0;
  96. Real P_[2] = {0, 0};
  97. for (Long j = 0; j < p0 + 1; j++) {
  98. P_[0] += S[i*M+idx] * QP[0][j];
  99. P_[1] += S[i*M+idx] * QP[1][j];
  100. idx += 2*(j+1);
  101. }
  102. P[2*i+0] = P_[0];
  103. P[2*i+1] = P_[1];
  104. }
  105. }
  106. }
  107. if (arrange == SHCArrange::COL_MAJOR_NONZERO) {
  108. #pragma omp parallel
  109. { // Compute pole
  110. Integer tid = omp_get_thread_num();
  111. Integer omp_p = omp_get_num_threads();
  112. Long a = (tid + 0) * N / omp_p;
  113. Long b = (tid + 1) * N / omp_p;
  114. for (Long i = a; i < b; i++) {
  115. Real P_[2] = {0, 0};
  116. for (Long j = 0; j < p0 + 1; j++) {
  117. P_[0] += S[i*M+j] * QP[0][j];
  118. P_[1] += S[i*M+j] * QP[1][j];
  119. }
  120. P[2*i+0] = P_[0];
  121. P[2*i+1] = P_[1];
  122. }
  123. }
  124. }
  125. }
  126. template <class Real> void SphericalHarmonics<Real>::WriteVTK(const char* fname, const Vector<Real>* S, const Vector<Real>* v_ptr, SHCArrange arrange, Long p0, Long p1, Real period, const Comm& comm){
  127. typedef double VTKReal;
  128. Vector<Real> SS;
  129. if (S == nullptr) {
  130. Integer p = 2;
  131. Integer Ncoeff = (p + 1) * (p + 1);
  132. Vector<Real> SSS(COORD_DIM * Ncoeff), SSS_grid;
  133. SSS.SetZero();
  134. SSS[1+0*p+0*Ncoeff] = sqrt<Real>(2.0)/sqrt<Real>(3.0);
  135. SSS[1+1*p+1*Ncoeff] = 1/sqrt<Real>(3.0);
  136. SSS[1+2*p+2*Ncoeff] = 1/sqrt<Real>(3.0);
  137. SphericalHarmonics<Real>::SHC2Grid(SSS, SHCArrange::COL_MAJOR_NONZERO, p, p+1, 2*p+2, &SSS_grid);
  138. SphericalHarmonics<Real>::Grid2SHC(SSS_grid, p+1, 2*p+2, p0, SS, arrange);
  139. S = &SS;
  140. }
  141. Vector<Real> X, Xp, V, Vp;
  142. { // Upsample X
  143. const Vector<Real>& X0=*S;
  144. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &X);
  145. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Xp);
  146. }
  147. if(v_ptr){ // Upsample V
  148. const Vector<Real>& X0=*v_ptr;
  149. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &V);
  150. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Vp);
  151. }
  152. std::vector<VTKReal> point_coord;
  153. std::vector<VTKReal> point_value;
  154. std::vector<int32_t> poly_connect;
  155. std::vector<int32_t> poly_offset;
  156. { // Set point_coord, point_value, poly_connect
  157. Long N_ves = X.Dim()/(2*p1*(p1+1)*COORD_DIM); // Number of vesicles
  158. assert(Xp.Dim() == N_ves*2*COORD_DIM);
  159. for(Long k=0;k<N_ves;k++){ // Set point_coord
  160. Real C[COORD_DIM]={0,0,0};
  161. if(period>0){
  162. for(Integer l=0;l<COORD_DIM;l++) C[l]=0;
  163. for(Long i=0;i<p1+1;i++){
  164. for(Long j=0;j<2*p1;j++){
  165. for(Integer l=0;l<COORD_DIM;l++){
  166. C[l]+=X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))];
  167. }
  168. }
  169. }
  170. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[0+2*(l+k*COORD_DIM)];
  171. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[1+2*(l+k*COORD_DIM)];
  172. for(Integer l=0;l<COORD_DIM;l++) C[l]/=2*p1*(p1+1)+2;
  173. for(Integer l=0;l<COORD_DIM;l++) C[l]=(round(C[l]/period))*period;
  174. }
  175. for(Long i=0;i<p1+1;i++){
  176. for(Long j=0;j<2*p1;j++){
  177. for(Integer l=0;l<COORD_DIM;l++){
  178. point_coord.push_back(X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))]-C[l]);
  179. }
  180. }
  181. }
  182. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[0+2*(l+k*COORD_DIM)]-C[l]);
  183. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[1+2*(l+k*COORD_DIM)]-C[l]);
  184. }
  185. if(v_ptr) {
  186. Long data__dof = V.Dim() / (2*p1*(p1+1));
  187. for(Long k=0;k<N_ves;k++){ // Set point_value
  188. for(Long i=0;i<p1+1;i++){
  189. for(Long j=0;j<2*p1;j++){
  190. for(Long l=0;l<data__dof;l++){
  191. point_value.push_back(V[j+2*p1*(i+(p1+1)*(l+k*data__dof))]);
  192. }
  193. }
  194. }
  195. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[0+2*(l+k*data__dof)]);
  196. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[1+2*(l+k*data__dof)]);
  197. }
  198. }
  199. for(Long k=0;k<N_ves;k++){
  200. for(Long j=0;j<2*p1;j++){
  201. Long i0= 0;
  202. Long i1=p1;
  203. Long j0=((j+0) );
  204. Long j1=((j+1)%(2*p1));
  205. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+0);
  206. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  207. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  208. poly_offset.push_back(poly_connect.size());
  209. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+1);
  210. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  211. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  212. poly_offset.push_back(poly_connect.size());
  213. }
  214. for(Long i=0;i<p1;i++){
  215. for(Long j=0;j<2*p1;j++){
  216. Long i0=((i+0) );
  217. Long i1=((i+1) );
  218. Long j0=((j+0) );
  219. Long j1=((j+1)%(2*p1));
  220. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  221. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  222. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  223. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  224. poly_offset.push_back(poly_connect.size());
  225. }
  226. }
  227. }
  228. }
  229. Integer np = comm.Size();
  230. Integer myrank = comm.Rank();
  231. std::vector<VTKReal>& coord=point_coord;
  232. std::vector<VTKReal>& value=point_value;
  233. std::vector<int32_t>& connect=poly_connect;
  234. std::vector<int32_t>& offset=poly_offset;
  235. Long pt_cnt=coord.size()/COORD_DIM;
  236. Long poly_cnt=poly_offset.size();
  237. // Open file for writing.
  238. std::stringstream vtufname;
  239. vtufname<<fname<<"_"<<std::setfill('0')<<std::setw(6)<<myrank<<".vtp";
  240. std::ofstream vtufile;
  241. vtufile.open(vtufname.str().c_str());
  242. if(vtufile.fail()) return;
  243. bool isLittleEndian;
  244. { // Set isLittleEndian
  245. uint16_t number = 0x1;
  246. uint8_t *numPtr = (uint8_t*)&number;
  247. isLittleEndian=(numPtr[0] == 1);
  248. }
  249. // Proceed to write to file.
  250. Long data_size=0;
  251. vtufile<<"<?xml version=\"1.0\"?>\n";
  252. if(isLittleEndian) vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"LittleEndian\">\n";
  253. else vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"BigEndian\">\n";
  254. //===========================================================================
  255. vtufile<<" <PolyData>\n";
  256. vtufile<<" <Piece NumberOfPoints=\""<<pt_cnt<<"\" NumberOfVerts=\"0\" NumberOfLines=\"0\" NumberOfStrips=\"0\" NumberOfPolys=\""<<poly_cnt<<"\">\n";
  257. //---------------------------------------------------------------------------
  258. vtufile<<" <Points>\n";
  259. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  260. data_size+=sizeof(uint32_t)+coord.size()*sizeof(VTKReal);
  261. vtufile<<" </Points>\n";
  262. //---------------------------------------------------------------------------
  263. if(value.size()){ // value
  264. vtufile<<" <PointData>\n";
  265. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  266. data_size+=sizeof(uint32_t)+value.size()*sizeof(VTKReal);
  267. vtufile<<" </PointData>\n";
  268. }
  269. //---------------------------------------------------------------------------
  270. vtufile<<" <Polys>\n";
  271. vtufile<<" <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  272. data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t);
  273. vtufile<<" <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  274. data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t);
  275. vtufile<<" </Polys>\n";
  276. //---------------------------------------------------------------------------
  277. vtufile<<" </Piece>\n";
  278. vtufile<<" </PolyData>\n";
  279. //===========================================================================
  280. vtufile<<" <AppendedData encoding=\"raw\">\n";
  281. vtufile<<" _";
  282. int32_t block_size;
  283. block_size=coord.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord.size()*sizeof(VTKReal));
  284. if(value.size()){ // value
  285. block_size=value.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value.size()*sizeof(VTKReal));
  286. }
  287. block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t));
  288. block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t));
  289. vtufile<<"\n";
  290. vtufile<<" </AppendedData>\n";
  291. //===========================================================================
  292. vtufile<<"</VTKFile>\n";
  293. vtufile.close();
  294. if(myrank) return;
  295. std::stringstream pvtufname;
  296. pvtufname<<fname<<".pvtp";
  297. std::ofstream pvtufile;
  298. pvtufile.open(pvtufname.str().c_str());
  299. if(pvtufile.fail()) return;
  300. pvtufile<<"<?xml version=\"1.0\"?>\n";
  301. pvtufile<<"<VTKFile type=\"PPolyData\">\n";
  302. pvtufile<<" <PPolyData GhostLevel=\"0\">\n";
  303. pvtufile<<" <PPoints>\n";
  304. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\"/>\n";
  305. pvtufile<<" </PPoints>\n";
  306. if(value.size()){ // value
  307. pvtufile<<" <PPointData>\n";
  308. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\"/>\n";
  309. pvtufile<<" </PPointData>\n";
  310. }
  311. {
  312. // Extract filename from path.
  313. std::stringstream vtupath;
  314. vtupath<<'/'<<fname;
  315. std::string pathname = vtupath.str();
  316. auto found = pathname.find_last_of("/\\");
  317. std::string fname_ = pathname.substr(found+1);
  318. for(Integer i=0;i<np;i++) pvtufile<<" <Piece Source=\""<<fname_<<"_"<<std::setfill('0')<<std::setw(6)<<i<<".vtp\"/>\n";
  319. }
  320. pvtufile<<" </PPolyData>\n";
  321. pvtufile<<"</VTKFile>\n";
  322. pvtufile.close();
  323. }
  324. template <class Real> void SphericalHarmonics<Real>::Grid2VecSHC(const Vector<Real>& X, Long Nt, Long Np, Long p0, Vector<Real>& S, SHCArrange arrange) {
  325. Long N = X.Dim() / (Np*Nt);
  326. assert(X.Dim() == N*Np*Nt);
  327. assert(N % COORD_DIM == 0);
  328. Vector<Real> B0(N*Nt*Np);
  329. { // Set B0
  330. B0 = X;
  331. const auto& Y = LegendreNodes(Nt - 1);
  332. assert(Y.Dim() == Nt);
  333. for (Long k = 0; k < N; k++) {
  334. if (k % COORD_DIM) {
  335. for (Long i = 0; i < Nt; i++) {
  336. Real s = 1/sqrt<Real>(1 - Y[i]*Y[i]);
  337. for (Long j = 0; j < Np; j++) {
  338. B0[(k*Nt+i)*Np+j] *= s;
  339. }
  340. }
  341. }
  342. }
  343. }
  344. Long p_ = p0 + 1;
  345. Long M0 = (p0+1)*(p0+1);
  346. Long M_ = (p_+1)*(p_+1);
  347. Vector<Real> B1(N*M_);
  348. Grid2SHC_(B0, Nt, Np, p_, B1);
  349. Vector<Real> B2(N*M0);
  350. const Complex<Real> imag(0,1);
  351. for (Long i=0; i<N; i+=COORD_DIM) {
  352. for (Long m=0; m<=p0; m++) {
  353. for (Long n=m; n<=p0; n++) {
  354. auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  355. Complex<Real> c;
  356. if (0<=m && m<=n && n<=p) {
  357. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  358. Long idx_imag = idx_real + (p+1-m)*N;
  359. c.real = coeff[idx_real];
  360. if (m) c.imag = coeff[idx_imag];
  361. }
  362. return c;
  363. };
  364. auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  365. if (0<=m && m<=n && n<=p) {
  366. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  367. Long idx_imag = idx_real + (p+1-m)*N;
  368. coeff[idx_real] = c.real;
  369. if (m) coeff[idx_imag] = c.imag;
  370. }
  371. };
  372. auto gr = [&](Long n, Long m) { return read_coeff(B1, i+0, p_, n, m); };
  373. auto gt = [&](Long n, Long m) { return read_coeff(B1, i+1, p_, n, m); };
  374. auto gp = [&](Long n, Long m) { return read_coeff(B1, i+2, p_, n, m); };
  375. Complex<Real> phiY, phiG, phiX;
  376. { // (phiG, phiX) <-- (gt, gp)
  377. auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  378. auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  379. phiY = gr(n,m);
  380. phiG = (gt(n+1,m)*A(n,m) - gt(n-1,m)*B(n,m) - imag*m*gp(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
  381. phiX = (gp(n+1,m)*A(n,m) - gp(n-1,m)*B(n,m) + imag*m*gt(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
  382. }
  383. auto phiV = (phiG * (n + 0) - phiY) * (1/(Real)(2*n + 1));
  384. auto phiW = (phiG * (n + 1) + phiY) * (1/(Real)(2*n + 1));
  385. if (n==0) {
  386. phiW = 0;
  387. phiX = 0;
  388. }
  389. write_coeff(phiV, B2, i+0, p0, n, m);
  390. write_coeff(phiW, B2, i+1, p0, n, m);
  391. write_coeff(phiX, B2, i+2, p0, n, m);
  392. }
  393. }
  394. }
  395. SHCArrange0(B2, p0, S, arrange);
  396. }
  397. template <class Real> void SphericalHarmonics<Real>::VecSHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>* X) {
  398. Vector<Real> B0;
  399. SHCArrange1(S, arrange, p0, B0);
  400. Long p_ = p0 + 1;
  401. Long M0 = (p0+1)*(p0+1);
  402. Long M_ = (p_+1)*(p_+1);
  403. Long N = B0.Dim() / M0;
  404. assert(B0.Dim() == N*M0);
  405. assert(N % COORD_DIM == 0);
  406. Vector<Real> B1(N*M_);
  407. const Complex<Real> imag(0,1);
  408. for (Long i=0; i<N; i+=COORD_DIM) {
  409. for (Long m=0; m<=p_; m++) {
  410. for (Long n=m; n<=p_; n++) {
  411. auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  412. Complex<Real> c;
  413. if (0<=m && m<=n && n<=p) {
  414. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  415. Long idx_imag = idx_real + (p+1-m)*N;
  416. c.real = coeff[idx_real];
  417. if (m) c.imag = coeff[idx_imag];
  418. }
  419. return c;
  420. };
  421. auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  422. if (0<=m && m<=n && n<=p) {
  423. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  424. Long idx_imag = idx_real + (p+1-m)*N;
  425. coeff[idx_real] = c.real;
  426. if (m) coeff[idx_imag] = c.imag;
  427. }
  428. };
  429. auto phiG = [&](Long n, Long m) {
  430. auto phiV = read_coeff(B0, i+0, p0, n, m);
  431. auto phiW = read_coeff(B0, i+1, p0, n, m);
  432. return phiV + phiW;
  433. };
  434. auto phiY = [&](Long n, Long m) {
  435. auto phiV = read_coeff(B0, i+0, p0, n, m);
  436. auto phiW = read_coeff(B0, i+1, p0, n, m);
  437. return -phiV * (n + 1) + phiW * n;
  438. };
  439. auto phiX = [&](Long n, Long m) {
  440. return read_coeff(B0, i+2, p0, n, m);
  441. };
  442. Complex<Real> gr, gt, gp;
  443. { // (gt, gp) <-- (phiG, phiX)
  444. auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  445. auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  446. gr = phiY(n,m);
  447. gt = phiG(n-1,m)*A(n-1,m) - phiG(n+1,m)*B(n+1,m) - imag*m*phiX(n,m);
  448. gp = phiX(n-1,m)*A(n-1,m) - phiX(n+1,m)*B(n+1,m) + imag*m*phiG(n,m);
  449. }
  450. write_coeff(gr, B1, i+0, p_, n, m);
  451. write_coeff(gt, B1, i+1, p_, n, m);
  452. write_coeff(gp, B1, i+2, p_, n, m);
  453. }
  454. }
  455. }
  456. if (X) { // Set X
  457. SHC2Grid_(B1, p_, Nt, Np, X);
  458. const auto& Y = LegendreNodes(Nt - 1);
  459. assert(Y.Dim() == Nt);
  460. for (Long k = 0; k < N; k++) {
  461. if (k % COORD_DIM) {
  462. for (Long i = 0; i < Nt; i++) {
  463. Real s = 1/sqrt<Real>(1 - Y[i]*Y[i]);
  464. for (Long j = 0; j < Np; j++) {
  465. (*X)[(k*Nt+i)*Np+j] *= s;
  466. }
  467. }
  468. }
  469. }
  470. }
  471. }
  472. template <class Real> void SphericalHarmonics<Real>::VecSHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& cos_theta_phi, Vector<Real>& X) {
  473. Long M = (p0+1) * (p0+1);
  474. Long dof;
  475. Matrix<Real> B1;
  476. { // Set B1, dof
  477. Vector<Real> B0;
  478. SHCArrange1(S, arrange, p0, B0);
  479. dof = B0.Dim() / M / COORD_DIM;
  480. assert(B0.Dim() == dof * COORD_DIM * M);
  481. B1.ReInit(dof, COORD_DIM * M);
  482. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  483. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  484. }
  485. assert(B1.Dim(1) == COORD_DIM * M);
  486. assert(B1.Dim(0) == dof);
  487. Matrix<Real> SHBasis;
  488. VecSHBasisEval(p0, cos_theta_phi, SHBasis);
  489. assert(SHBasis.Dim(1) == COORD_DIM * M);
  490. Long N = SHBasis.Dim(0);
  491. { // Set X
  492. if (X.Dim() != N*dof) X.ReInit(N * dof);
  493. for (Long k0 = 0; k0 < N; k0++) {
  494. for (Long k1 = 0; k1 < dof * COORD_DIM; k1++) {
  495. Real X_ = 0;
  496. for (Long i = 0; i < COORD_DIM * M; i++) X_ += B1[k1][i] * SHBasis[k0][i];
  497. X[k0 * dof * COORD_DIM + k1] = X_;
  498. }
  499. }
  500. }
  501. }
  502. template <class Real> void SphericalHarmonics<Real>::Grid2SHC_(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& B1){
  503. const auto& Mf = OpFourierInv(Np);
  504. assert(Mf.Dim(0) == Np);
  505. const std::vector<Matrix<Real>>& Ml = SphericalHarmonics<Real>::MatLegendreInv(Nt-1,p1);
  506. assert((Long)Ml.size() == p1+1);
  507. Long N = X.Dim() / (Np*Nt);
  508. assert(X.Dim() == N*Np*Nt);
  509. Vector<Real> B0((2*p1+1) * N*Nt);
  510. #pragma omp parallel
  511. { // B0 <-- Transpose(FFT(X))
  512. Integer tid=omp_get_thread_num();
  513. Integer omp_p=omp_get_num_threads();
  514. Long a=(tid+0)*N*Nt/omp_p;
  515. Long b=(tid+1)*N*Nt/omp_p;
  516. Vector<Real> buff(Mf.Dim(1));
  517. Long fft_coeff_len = std::min(buff.Dim(), 2*p1+2);
  518. Matrix<Real> B0_(2*p1+1, N*Nt, B0.begin(), false);
  519. const Matrix<Real> MX(N * Nt, Np, (Iterator<Real>)X.begin(), false);
  520. for (Long i = a; i < b; i++) {
  521. { // buff <-- FFT(Xi)
  522. const Vector<Real> Xi(Np, (Iterator<Real>)X.begin() + Np * i, false);
  523. Mf.Execute(Xi, buff);
  524. }
  525. { // B0 <-- Transpose(buff)
  526. B0_[0][i] = buff[0]; // skipping buff[1] == 0
  527. for (Long j = 2; j < fft_coeff_len; j++) B0_[j-1][i] = buff[j];
  528. for (Long j = fft_coeff_len; j < 2*p1+2; j++) B0_[j-1][i] = 0;
  529. }
  530. }
  531. }
  532. if (B1.Dim() != N*(p1+1)*(p1+1)) B1.ReInit(N*(p1+1)*(p1+1));
  533. #pragma omp parallel
  534. { // Evaluate Legendre polynomial
  535. Integer tid=omp_get_thread_num();
  536. Integer omp_p=omp_get_num_threads();
  537. Long offset0=0;
  538. Long offset1=0;
  539. for (Long i = 0; i < p1+1; i++) {
  540. Long N_ = (i==0 ? N : 2*N);
  541. Matrix<Real> Min (N_, Nt , B0.begin()+offset0, false);
  542. Matrix<Real> Mout(N_, p1+1-i, B1.begin()+offset1, false);
  543. { // Mout = Min * Ml[i] // split between threads
  544. Long a=(tid+0)*N_/omp_p;
  545. Long b=(tid+1)*N_/omp_p;
  546. if (a < b) {
  547. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  548. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  549. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  550. }
  551. }
  552. offset0+=Min .Dim(0)*Min .Dim(1);
  553. offset1+=Mout.Dim(0)*Mout.Dim(1);
  554. }
  555. assert(offset0 == B0.Dim());
  556. assert(offset1 == B1.Dim());
  557. }
  558. B1 *= 1 / sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  559. }
  560. template <class Real> void SphericalHarmonics<Real>::SHCArrange0(const Vector<Real>& B1, Long p1, Vector<Real>& S, SHCArrange arrange){
  561. Long M = (p1+1)*(p1+1);
  562. Long N = B1.Dim() / M;
  563. assert(B1.Dim() == N*M);
  564. if (arrange == SHCArrange::ALL) { // S <-- Rearrange(B1)
  565. Long M = 2*(p1+1)*(p1+1);
  566. if(S.Dim() != N * M) S.ReInit(N * M);
  567. #pragma omp parallel
  568. { // S <-- Rearrange(B1)
  569. Integer tid=omp_get_thread_num();
  570. Integer omp_p=omp_get_num_threads();
  571. Long a=(tid+0)*N/omp_p;
  572. Long b=(tid+1)*N/omp_p;
  573. for (Long i = a; i < b; i++) {
  574. Long offset = 0;
  575. for (Long j = 0; j < p1+1; j++) {
  576. Long len = p1+1 - j;
  577. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  578. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  579. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 0;
  580. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  581. offset += len;
  582. }
  583. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  584. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  585. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  586. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  587. offset += len;
  588. } else {
  589. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  590. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = 0;
  591. }
  592. }
  593. }
  594. }
  595. }
  596. if (arrange == SHCArrange::ROW_MAJOR) { // S <-- Rearrange(B1)
  597. Long M = (p1+1)*(p1+2);
  598. if(S.Dim() != N * M) S.ReInit(N * M);
  599. #pragma omp parallel
  600. { // S <-- Rearrange(B1)
  601. Integer tid=omp_get_thread_num();
  602. Integer omp_p=omp_get_num_threads();
  603. Long a=(tid+0)*N/omp_p;
  604. Long b=(tid+1)*N/omp_p;
  605. for (Long i = a; i < b; i++) {
  606. Long offset = 0;
  607. for (Long j = 0; j < p1+1; j++) {
  608. Long len = p1+1 - j;
  609. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  610. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  611. Iterator<Real> S_ = S .begin() + i*M + 0;
  612. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  613. offset += len;
  614. }
  615. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  616. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  617. Iterator<Real> S_ = S .begin() + i*M + 1;
  618. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  619. offset += len;
  620. } else {
  621. Iterator<Real> S_ = S .begin() + i*M + 1;
  622. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = 0;
  623. }
  624. }
  625. }
  626. }
  627. }
  628. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // S <-- Rearrange(B1)
  629. Long M = (p1+1)*(p1+1);
  630. if(S.Dim() != N * M) S.ReInit(N * M);
  631. #pragma omp parallel
  632. { // S <-- Rearrange(B1)
  633. Integer tid=omp_get_thread_num();
  634. Integer omp_p=omp_get_num_threads();
  635. Long a=(tid+0)*N/omp_p;
  636. Long b=(tid+1)*N/omp_p;
  637. for (Long i = a; i < b; i++) {
  638. Long offset = 0;
  639. for (Long j = 0; j < p1+1; j++) {
  640. Long len = p1+1 - j;
  641. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  642. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  643. Iterator<Real> S_ = S .begin() + i*M + offset;
  644. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  645. offset += len;
  646. }
  647. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  648. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  649. Iterator<Real> S_ = S .begin() + i*M + offset;
  650. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  651. offset += len;
  652. }
  653. }
  654. }
  655. }
  656. }
  657. }
  658. template <class Real> void SphericalHarmonics<Real>::SHCArrange1(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& B0){
  659. Long M, N;
  660. { // Set M, N
  661. M = 0;
  662. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  663. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  664. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  665. if (M == 0) return;
  666. N = S.Dim() / M;
  667. assert(S.Dim() == N * M);
  668. }
  669. if (B0.Dim() != N*(p0+1)*(p0+1)) B0.ReInit(N*(p0+1)*(p0+1));
  670. if (arrange == SHCArrange::ALL) { // B0 <-- Rearrange(S)
  671. #pragma omp parallel
  672. { // B0 <-- Rearrange(S)
  673. Integer tid=omp_get_thread_num();
  674. Integer omp_p=omp_get_num_threads();
  675. Long a=(tid+0)*N/omp_p;
  676. Long b=(tid+1)*N/omp_p;
  677. for (Long i = a; i < b; i++) {
  678. Long offset = 0;
  679. for (Long j = 0; j < p0+1; j++) {
  680. Long len = p0+1 - j;
  681. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  682. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  683. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 0;
  684. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  685. offset += len;
  686. }
  687. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  688. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  689. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 1;
  690. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  691. offset += len;
  692. }
  693. }
  694. }
  695. }
  696. }
  697. if (arrange == SHCArrange::ROW_MAJOR) { // B0 <-- Rearrange(S)
  698. #pragma omp parallel
  699. { // B0 <-- Rearrange(S)
  700. Integer tid=omp_get_thread_num();
  701. Integer omp_p=omp_get_num_threads();
  702. Long a=(tid+0)*N/omp_p;
  703. Long b=(tid+1)*N/omp_p;
  704. for (Long i = a; i < b; i++) {
  705. Long offset = 0;
  706. for (Long j = 0; j < p0+1; j++) {
  707. Long len = p0+1 - j;
  708. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  709. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  710. ConstIterator<Real> S_ = S .begin() + i*M + 0;
  711. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  712. offset += len;
  713. }
  714. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  715. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  716. ConstIterator<Real> S_ = S .begin() + i*M + 1;
  717. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  718. offset += len;
  719. }
  720. }
  721. }
  722. }
  723. }
  724. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // B0 <-- Rearrange(S)
  725. #pragma omp parallel
  726. { // B0 <-- Rearrange(S)
  727. Integer tid=omp_get_thread_num();
  728. Integer omp_p=omp_get_num_threads();
  729. Long a=(tid+0)*N/omp_p;
  730. Long b=(tid+1)*N/omp_p;
  731. for (Long i = a; i < b; i++) {
  732. Long offset = 0;
  733. for (Long j = 0; j < p0+1; j++) {
  734. Long len = p0+1 - j;
  735. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  736. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  737. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  738. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  739. offset += len;
  740. }
  741. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  742. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  743. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  744. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  745. offset += len;
  746. }
  747. }
  748. }
  749. }
  750. }
  751. }
  752. template <class Real> void SphericalHarmonics<Real>::SHC2Grid_(const Vector<Real>& B0, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_phi, Vector<Real>* X_theta){
  753. const auto& Mf = OpFourier(Np);
  754. assert(Mf.Dim(1) == Np);
  755. const std::vector<Matrix<Real>>& Ml =SphericalHarmonics<Real>::MatLegendre (p0,Nt-1);
  756. const std::vector<Matrix<Real>>& Mdl=SphericalHarmonics<Real>::MatLegendreGrad(p0,Nt-1);
  757. assert((Long)Ml .size() == p0+1);
  758. assert((Long)Mdl.size() == p0+1);
  759. Long N = B0.Dim() / ((p0+1)*(p0+1));
  760. assert(B0.Dim() == N*(p0+1)*(p0+1));
  761. if(X && X ->Dim()!=N*Np*Nt) X ->ReInit(N*Np*Nt);
  762. if(X_theta && X_theta->Dim()!=N*Np*Nt) X_theta->ReInit(N*Np*Nt);
  763. if(X_phi && X_phi ->Dim()!=N*Np*Nt) X_phi ->ReInit(N*Np*Nt);
  764. Vector<Real> B1(N*(2*p0+1)*Nt);
  765. if(X || X_phi){
  766. #pragma omp parallel
  767. { // Evaluate Legendre polynomial
  768. Integer tid=omp_get_thread_num();
  769. Integer omp_p=omp_get_num_threads();
  770. Long offset0=0;
  771. Long offset1=0;
  772. for(Long i=0;i<p0+1;i++){
  773. Long N_ = (i==0 ? N : 2*N);
  774. const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
  775. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  776. { // Mout = Min * Ml[i] // split between threads
  777. Long a=(tid+0)*N_/omp_p;
  778. Long b=(tid+1)*N_/omp_p;
  779. if(a<b){
  780. const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
  781. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  782. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  783. }
  784. }
  785. offset0+=Min .Dim(0)*Min .Dim(1);
  786. offset1+=Mout.Dim(0)*Mout.Dim(1);
  787. }
  788. }
  789. B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  790. #pragma omp parallel
  791. { // Transpose and evaluate Fourier
  792. Integer tid=omp_get_thread_num();
  793. Integer omp_p=omp_get_num_threads();
  794. Long a=(tid+0)*N*Nt/omp_p;
  795. Long b=(tid+1)*N*Nt/omp_p;
  796. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  797. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  798. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  799. for (Long i = a; i < b; i++) {
  800. { // buff <-- Transpose(B1)
  801. buff[0] = B1_[0][i];
  802. buff[1] = 0;
  803. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  804. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  805. }
  806. { // X <-- FFT(buff)
  807. Vector<Real> Xi(Np, X->begin() + Np * i, false);
  808. Mf.Execute(buff, Xi);
  809. }
  810. if(X_phi){ // Evaluate Fourier gradient
  811. { // buff <-- Transpose(B1)
  812. buff[0] = 0;
  813. buff[1] = 0;
  814. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  815. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  816. for (Long j = 1; j < buff.Dim()/2; j++) {
  817. Real x = buff[2*j+0];
  818. Real y = buff[2*j+1];
  819. buff[2*j+0] = -j*y;
  820. buff[2*j+1] = j*x;
  821. }
  822. }
  823. { // X_phi <-- FFT(buff)
  824. Vector<Real> Xi(Np, X_phi->begin() + Np * i, false);
  825. Mf.Execute(buff, Xi);
  826. }
  827. }
  828. }
  829. }
  830. }
  831. if(X_theta){
  832. #pragma omp parallel
  833. { // Evaluate Legendre gradient
  834. Integer tid=omp_get_thread_num();
  835. Integer omp_p=omp_get_num_threads();
  836. Long offset0=0;
  837. Long offset1=0;
  838. for(Long i=0;i<p0+1;i++){
  839. Long N_ = (i==0 ? N : 2*N);
  840. const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
  841. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  842. { // Mout = Min * Mdl[i] // split between threads
  843. Long a=(tid+0)*N_/omp_p;
  844. Long b=(tid+1)*N_/omp_p;
  845. if(a<b){
  846. const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
  847. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  848. Matrix<Real>::GEMM(Mout_,Min_,Mdl[i]);
  849. }
  850. }
  851. offset0+=Min .Dim(0)*Min .Dim(1);
  852. offset1+=Mout.Dim(0)*Mout.Dim(1);
  853. }
  854. }
  855. B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  856. #pragma omp parallel
  857. { // Transpose and evaluate Fourier
  858. Integer tid=omp_get_thread_num();
  859. Integer omp_p=omp_get_num_threads();
  860. Long a=(tid+0)*N*Nt/omp_p;
  861. Long b=(tid+1)*N*Nt/omp_p;
  862. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  863. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  864. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  865. for (Long i = a; i < b; i++) {
  866. { // buff <-- Transpose(B1)
  867. buff[0] = B1_[0][i];
  868. buff[1] = 0;
  869. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  870. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  871. }
  872. { // Xi <-- FFT(buff)
  873. Vector<Real> Xi(Np, X_theta->begin() + Np * i, false);
  874. Mf.Execute(buff, Xi);
  875. }
  876. }
  877. }
  878. }
  879. }
  880. template <class Real> void SphericalHarmonics<Real>::LegPoly(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  881. Long N = X.Dim();
  882. Long Npoly = (degree + 1) * (degree + 2) / 2;
  883. if (poly_val.Dim() != Npoly * N) poly_val.ReInit(Npoly * N);
  884. Real fact = 1 / sqrt<Real>(4 * const_pi<Real>());
  885. Vector<Real> u(N);
  886. for (Long n = 0; n < N; n++) {
  887. u[n] = (X[n]*X[n]<1 ? sqrt<Real>(1-X[n]*X[n]) : 0);
  888. poly_val[n] = fact;
  889. }
  890. Long idx = 0;
  891. Long idx_nxt = 0;
  892. for (Long i = 1; i <= degree; i++) {
  893. idx_nxt += N*(degree-i+2);
  894. Real c = sqrt<Real>((2*i+1)/(Real)(2*i));
  895. for (Long n = 0; n < N; n++) {
  896. poly_val[idx_nxt+n] = -poly_val[idx+n] * u[n] * c;
  897. }
  898. idx = idx_nxt;
  899. }
  900. idx = 0;
  901. for (Long m = 0; m < degree; m++) {
  902. for (Long n = 0; n < N; n++) {
  903. Real pmm = 0;
  904. Real pmmp1 = poly_val[idx+n];
  905. for (Long ll = m + 1; ll <= degree; ll++) {
  906. Real a = sqrt<Real>(((2*ll-1)*(2*ll+1) ) / (Real)((ll-m)*(ll+m) ));
  907. Real b = sqrt<Real>(((2*ll+1)*(ll+m-1)*(ll-m-1)) / (Real)((ll-m)*(ll+m)*(2*ll-3)));
  908. Real pll = X[n]*a*pmmp1 - b*pmm;
  909. pmm = pmmp1;
  910. pmmp1 = pll;
  911. poly_val[idx + N*(ll-m) + n] = pll;
  912. }
  913. }
  914. idx += N * (degree - m + 1);
  915. }
  916. }
  917. template <class Real> void SphericalHarmonics<Real>::LegPolyDeriv(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  918. Long N = X.Dim();
  919. Long Npoly = (degree + 1) * (degree + 2) / 2;
  920. if (poly_val.Dim() != N * Npoly) poly_val.ReInit(N * Npoly);
  921. Vector<Real> leg_poly(Npoly * N);
  922. LegPoly(leg_poly, X, degree);
  923. for (Long m = 0; m <= degree; m++) {
  924. for (Long n = m; n <= degree; n++) {
  925. ConstIterator<Real> Pn = leg_poly.begin() + N * ((degree * 2 - m + 1) * (m + 0) / 2 + n);
  926. ConstIterator<Real> Pn_ = leg_poly.begin() + N * ((degree * 2 - m + 0) * (m + 1) / 2 + n) * (m < n);
  927. Iterator <Real> Hn = poly_val.begin() + N * ((degree * 2 - m + 1) * (m + 0) / 2 + n);
  928. Real c2 = sqrt<Real>(m<n ? (n+m+1)*(n-m) : 0);
  929. for (Long i = 0; i < N; i++) {
  930. Real c1 = (X[i]*X[i]<1 ? m/sqrt<Real>(1-X[i]*X[i]) : 0);
  931. Hn[i] = -c1*X[i]*Pn[i] - c2*Pn_[i];
  932. }
  933. }
  934. }
  935. }
  936. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreNodes(Long p){
  937. assert(p<SCTL_SHMAXDEG);
  938. Vector<Real>& Qx=MatrixStore().Qx_[p];
  939. if(!Qx.Dim()){
  940. Vector<double> qx1(p+1);
  941. Vector<double> qw1(p+1);
  942. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  943. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  944. if (Qx.Dim() != p+1) Qx.ReInit(p+1);
  945. for (Long i = 0; i < p + 1; i++) Qx[i] = -qx1[i];
  946. }
  947. return Qx;
  948. }
  949. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreWeights(Long p){
  950. assert(p<SCTL_SHMAXDEG);
  951. Vector<Real>& Qw=MatrixStore().Qw_[p];
  952. if(!Qw.Dim()){
  953. Vector<double> qx1(p+1);
  954. Vector<double> qw1(p+1);
  955. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  956. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  957. if (Qw.Dim() != p+1) Qw.ReInit(p+1);
  958. for (Long i = 0; i < p + 1; i++) Qw[i] = qw1[i];
  959. }
  960. return Qw;
  961. }
  962. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::SingularWeights(Long p1){
  963. assert(p1<SCTL_SHMAXDEG);
  964. Vector<Real>& Sw=MatrixStore().Sw_[p1];
  965. if(!Sw.Dim()){
  966. const Vector<Real>& qx1 = LegendreNodes(p1);
  967. const Vector<Real>& qw1 = LegendreWeights(p1);
  968. std::vector<Real> Yf(p1+1,0);
  969. { // Set Yf
  970. Vector<Real> x0(1); x0=1.0;
  971. Vector<Real> alp0((p1+1)*(p1+2)/2);
  972. LegPoly(alp0, x0, p1);
  973. Vector<Real> alp((p1+1) * (p1+1)*(p1+2)/2);
  974. LegPoly(alp, qx1, p1);
  975. for(Long j=0;j<p1+1;j++){
  976. for(Long i=0;i<p1+1;i++){
  977. Yf[i]+=4*M_PI/(2*j+1) * alp0[j] * alp[j*(p1+1)+i];
  978. }
  979. }
  980. }
  981. Sw.ReInit(p1+1);
  982. for(Long i=0;i<p1+1;i++){
  983. Sw[i]=(qw1[i]*M_PI/p1)*Yf[i]/cos(acos(qx1[i])/2);
  984. }
  985. }
  986. return Sw;
  987. }
  988. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourier(Long p0, Long p1){
  989. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  990. Matrix<Real>& Mf =MatrixStore().Mf_ [p0*SCTL_SHMAXDEG+p1];
  991. if(!Mf.Dim(0)){
  992. const Real SQRT2PI=sqrt(2*M_PI);
  993. { // Set Mf
  994. Matrix<Real> M(2*p0,2*p1);
  995. for(Long j=0;j<2*p1;j++){
  996. M[0][j]=SQRT2PI*1.0;
  997. for(Long k=1;k<p0;k++){
  998. M[2*k-1][j]=SQRT2PI*cos(j*k*M_PI/p1);
  999. M[2*k-0][j]=SQRT2PI*sin(j*k*M_PI/p1);
  1000. }
  1001. M[2*p0-1][j]=SQRT2PI*cos(j*p0*M_PI/p1);
  1002. }
  1003. Mf=M;
  1004. }
  1005. }
  1006. return Mf;
  1007. }
  1008. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierInv(Long p0, Long p1){
  1009. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1010. Matrix<Real>& Mf =MatrixStore().Mfinv_ [p0*SCTL_SHMAXDEG+p1];
  1011. if(!Mf.Dim(0)){
  1012. const Real INVSQRT2PI=1.0/sqrt(2*M_PI)/p0;
  1013. { // Set Mf
  1014. Matrix<Real> M(2*p0,2*p1);
  1015. M.SetZero();
  1016. if(p1>p0) p1=p0;
  1017. for(Long j=0;j<2*p0;j++){
  1018. M[j][0]=INVSQRT2PI*0.5;
  1019. for(Long k=1;k<p1;k++){
  1020. M[j][2*k-1]=INVSQRT2PI*cos(j*k*M_PI/p0);
  1021. M[j][2*k-0]=INVSQRT2PI*sin(j*k*M_PI/p0);
  1022. }
  1023. M[j][2*p1-1]=INVSQRT2PI*cos(j*p1*M_PI/p0);
  1024. }
  1025. if(p1==p0) for(Long j=0;j<2*p0;j++) M[j][2*p1-1]*=0.5;
  1026. Mf=M;
  1027. }
  1028. }
  1029. return Mf;
  1030. }
  1031. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierGrad(Long p0, Long p1){
  1032. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1033. Matrix<Real>& Mdf=MatrixStore().Mdf_[p0*SCTL_SHMAXDEG+p1];
  1034. if(!Mdf.Dim(0)){
  1035. const Real SQRT2PI=sqrt(2*M_PI);
  1036. { // Set Mdf_
  1037. Matrix<Real> M(2*p0,2*p1);
  1038. for(Long j=0;j<2*p1;j++){
  1039. M[0][j]=SQRT2PI*0.0;
  1040. for(Long k=1;k<p0;k++){
  1041. M[2*k-1][j]=-SQRT2PI*k*sin(j*k*M_PI/p1);
  1042. M[2*k-0][j]= SQRT2PI*k*cos(j*k*M_PI/p1);
  1043. }
  1044. M[2*p0-1][j]=-SQRT2PI*p0*sin(j*p0*M_PI/p1);
  1045. }
  1046. Mdf=M;
  1047. }
  1048. }
  1049. return Mdf;
  1050. }
  1051. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourier(Long Np){
  1052. assert(Np<SCTL_SHMAXDEG);
  1053. auto& Mf =MatrixStore().Mfftinv_ [Np];
  1054. #pragma omp critical (SCTL_FFT_PLAN0)
  1055. if(!Mf.Dim(0)){
  1056. StaticArray<Long,1> fft_dim = {Np};
  1057. Mf.Setup(FFT_Type::C2R, 1, Vector<Long>(1,fft_dim,false));
  1058. }
  1059. return Mf;
  1060. }
  1061. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourierInv(Long Np){
  1062. assert(Np<SCTL_SHMAXDEG);
  1063. auto& Mf =MatrixStore().Mfft_ [Np];
  1064. #pragma omp critical (SCTL_FFT_PLAN1)
  1065. if(!Mf.Dim(0)){
  1066. StaticArray<Long,1> fft_dim = {Np};
  1067. Mf.Setup(FFT_Type::R2C, 1, Vector<Long>(1,fft_dim,false));
  1068. }
  1069. return Mf;
  1070. }
  1071. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendre(Long p0, Long p1){
  1072. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1073. std::vector<Matrix<Real>>& Ml =MatrixStore().Ml_ [p0*SCTL_SHMAXDEG+p1];
  1074. if(!Ml.size()){
  1075. const Vector<Real>& qx1 = LegendreNodes(p1);
  1076. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  1077. LegPoly(alp, qx1, p0);
  1078. Ml.resize(p0+1);
  1079. auto ptr = alp.begin();
  1080. for(Long i=0;i<=p0;i++){
  1081. Ml[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  1082. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  1083. }
  1084. }
  1085. return Ml;
  1086. }
  1087. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreInv(Long p0, Long p1){
  1088. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1089. std::vector<Matrix<Real>>& Ml =MatrixStore().Mlinv_ [p0*SCTL_SHMAXDEG+p1];
  1090. if(!Ml.size()){
  1091. const Vector<Real>& qx1 = LegendreNodes(p0);
  1092. const Vector<Real>& qw1 = LegendreWeights(p0);
  1093. Vector<Real> alp(qx1.Dim()*(p1+1)*(p1+2)/2);
  1094. LegPoly(alp, qx1, p1);
  1095. Ml.resize(p1+1);
  1096. auto ptr = alp.begin();
  1097. for(Long i=0;i<=p1;i++){
  1098. Ml[i].ReInit(qx1.Dim(), p1+1-i);
  1099. Matrix<Real> M(p1+1-i, qx1.Dim(), ptr, false);
  1100. for(Long j=0;j<p1+1-i;j++){ // Transpose and weights
  1101. for(Long k=0;k<qx1.Dim();k++){
  1102. Ml[i][k][j]=M[j][k]*qw1[k]*2*M_PI;
  1103. }
  1104. }
  1105. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  1106. }
  1107. }
  1108. return Ml;
  1109. }
  1110. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreGrad(Long p0, Long p1){
  1111. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1112. std::vector<Matrix<Real>>& Mdl=MatrixStore().Mdl_[p0*SCTL_SHMAXDEG+p1];
  1113. if(!Mdl.size()){
  1114. const Vector<Real>& qx1 = LegendreNodes(p1);
  1115. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  1116. LegPolyDeriv(alp, qx1, p0);
  1117. Mdl.resize(p0+1);
  1118. auto ptr = alp.begin();
  1119. for(Long i=0;i<=p0;i++){
  1120. Mdl[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  1121. ptr+=Mdl[i].Dim(0)*Mdl[i].Dim(1);
  1122. }
  1123. }
  1124. return Mdl;
  1125. }
  1126. template <class Real> void SphericalHarmonics<Real>::SHBasisEval(Long p0, const Vector<Real>& cos_theta_phi, Matrix<Real>& SHBasis) {
  1127. Long M = (p0+1) * (p0+1);
  1128. Long N = cos_theta_phi.Dim() / 2;
  1129. assert(cos_theta_phi.Dim() == N * 2);
  1130. Vector<Complex<Real>> exp_phi(N);
  1131. Matrix<Real> LegP((p0+1)*(p0+2)/2, N);
  1132. { // Set exp_phi, LegP
  1133. Vector<Real> cos_theta(N);
  1134. for (Long i = 0; i < N; i++) { // Set cos_theta, exp_phi
  1135. cos_theta[i] = cos_theta_phi[i*2+0];
  1136. exp_phi[i].real = cos(cos_theta_phi[i*2+1]);
  1137. exp_phi[i].imag = sin(cos_theta_phi[i*2+1]);
  1138. }
  1139. Vector<Real> alp(LegP.Dim(0) * LegP.Dim(1), LegP.begin(), false);
  1140. LegPoly(alp, cos_theta, p0);
  1141. }
  1142. { // Set SHBasis
  1143. SHBasis.ReInit(N, M);
  1144. Real s = 4 * sqrt<Real>(const_pi<Real>());
  1145. for (Long k0 = 0; k0 < N; k0++) {
  1146. Complex<Real> exp_phi_ = 1;
  1147. Complex<Real> exp_phi1 = exp_phi[k0];
  1148. for (Long m = 0; m <= p0; m++) {
  1149. for (Long n = m; n <= p0; n++) {
  1150. Long poly_idx = (2 * p0 - m + 1) * m / 2 + n;
  1151. Long basis_idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  1152. SHBasis[k0][basis_idx] = LegP[poly_idx][k0] * exp_phi_.real * s;
  1153. if (m) { // imaginary part
  1154. basis_idx += (p0+1-m);
  1155. SHBasis[k0][basis_idx] = -LegP[poly_idx][k0] * exp_phi_.imag * s;
  1156. } else {
  1157. SHBasis[k0][basis_idx] = SHBasis[k0][basis_idx] * 0.5;
  1158. }
  1159. }
  1160. exp_phi_ = exp_phi_ * exp_phi1;
  1161. }
  1162. }
  1163. }
  1164. assert(SHBasis.Dim(0) == N);
  1165. assert(SHBasis.Dim(1) == M);
  1166. }
  1167. template <class Real> void SphericalHarmonics<Real>::VecSHBasisEval(Long p0, const Vector<Real>& cos_theta_phi, Matrix<Real>& SHBasis) {
  1168. Long M = (p0+1) * (p0+1);
  1169. Long N = cos_theta_phi.Dim() / 2;
  1170. assert(cos_theta_phi.Dim() == N * 2);
  1171. Vector<Complex<Real>> exp_phi(N);
  1172. Matrix<Real> LegP((p0+1)*(p0+2)/2, N);
  1173. { // Set exp_phi, LegP
  1174. Vector<Real> cos_theta(N);
  1175. for (Long i = 0; i < N; i++) { // Set cos_theta, exp_phi
  1176. cos_theta[i] = cos_theta_phi[i*2+0];
  1177. exp_phi[i].real = cos(cos_theta_phi[i*2+1]);
  1178. exp_phi[i].imag = sin(cos_theta_phi[i*2+1]);
  1179. }
  1180. Vector<Real> alp(LegP.Dim(0) * LegP.Dim(1), LegP.begin(), false);
  1181. LegPoly(alp, cos_theta, p0);
  1182. }
  1183. { // Set SHBasis
  1184. SHBasis.ReInit(N, M);
  1185. Real s = 4 * sqrt<Real>(const_pi<Real>());
  1186. for (Long k0 = 0; k0 < N; k0++) {
  1187. Complex<Real> exp_phi_ = 1;
  1188. Complex<Real> exp_phi1 = exp_phi[k0];
  1189. for (Long m = 0; m <= p0; m++) {
  1190. for (Long n = m; n <= p0; n++) {
  1191. Long poly_idx = (2 * p0 - m + 1) * m / 2 + n;
  1192. Long basis_idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  1193. SHBasis[k0][basis_idx] = LegP[poly_idx][k0] * exp_phi_.real * s;
  1194. if (m) { // imaginary part
  1195. basis_idx += (p0+1-m);
  1196. SHBasis[k0][basis_idx] = -LegP[poly_idx][k0] * exp_phi_.imag * s;
  1197. } else {
  1198. SHBasis[k0][basis_idx] = SHBasis[k0][basis_idx] * 0.5;
  1199. }
  1200. }
  1201. exp_phi_ = exp_phi_ * exp_phi1;
  1202. }
  1203. }
  1204. }
  1205. assert(SHBasis.Dim(0) == N);
  1206. assert(SHBasis.Dim(1) == M);
  1207. }
  1208. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatRotate(Long p0){
  1209. std::vector<std::vector<Long>> coeff_perm(p0+1);
  1210. { // Set coeff_perm
  1211. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  1212. Long itr=0;
  1213. for(Long i=0;i<2*p0;i++){
  1214. Long m=(i+1)/2;
  1215. for(Long n=m;n<=p0;n++){
  1216. coeff_perm[n][i]=itr;
  1217. itr++;
  1218. }
  1219. }
  1220. }
  1221. assert(p0<SCTL_SHMAXDEG);
  1222. std::vector<Matrix<Real>>& Mr=MatrixStore().Mr_[p0];
  1223. if(!Mr.size()){
  1224. const Real SQRT2PI=sqrt(2*M_PI);
  1225. Long Ncoef=p0*(p0+2);
  1226. Long Ngrid=2*p0*(p0+1);
  1227. Long Naleg=(p0+1)*(p0+2)/2;
  1228. Matrix<Real> Mcoord0(3,Ngrid);
  1229. const Vector<Real>& x=LegendreNodes(p0);
  1230. for(Long i=0;i<p0+1;i++){ // Set Mcoord0
  1231. for(Long j=0;j<2*p0;j++){
  1232. Mcoord0[0][i*2*p0+j]=x[i];
  1233. Mcoord0[1][i*2*p0+j]=sqrt(1-x[i]*x[i])*sin(M_PI*j/p0);
  1234. Mcoord0[2][i*2*p0+j]=sqrt(1-x[i]*x[i])*cos(M_PI*j/p0);
  1235. }
  1236. }
  1237. for(Long l=0;l<p0+1;l++){ // For each rotation angle
  1238. Matrix<Real> Mcoord1;
  1239. { // Rotate coordinates
  1240. Matrix<Real> M(COORD_DIM, COORD_DIM);
  1241. Real cos_=-x[l];
  1242. Real sin_=-sqrt(1.0-x[l]*x[l]);
  1243. M[0][0]= cos_; M[0][1]=0; M[0][2]=-sin_;
  1244. M[1][0]= 0; M[1][1]=1; M[1][2]= 0;
  1245. M[2][0]= sin_; M[2][1]=0; M[2][2]= cos_;
  1246. Mcoord1=M*Mcoord0;
  1247. }
  1248. Matrix<Real> Mleg(Naleg, Ngrid);
  1249. { // Set Mleg
  1250. const Vector<Real> Vcoord1(Mcoord1.Dim(0)*Mcoord1.Dim(1), Mcoord1.begin(), false);
  1251. Vector<Real> Vleg(Mleg.Dim(0)*Mleg.Dim(1), Mleg.begin(), false);
  1252. LegPoly(Vleg, Vcoord1, p0);
  1253. }
  1254. Vector<Real> theta(Ngrid);
  1255. for(Long i=0;i<theta.Dim();i++){ // Set theta
  1256. theta[i]=atan2(Mcoord1[1][i],Mcoord1[2][i]);
  1257. }
  1258. Matrix<Real> Mcoef2grid(Ncoef, Ngrid);
  1259. { // Build Mcoef2grid
  1260. Long offset0=0;
  1261. Long offset1=0;
  1262. for(Long i=0;i<p0+1;i++){
  1263. Long len=p0+1-i;
  1264. { // P * cos
  1265. for(Long j=0;j<len;j++){
  1266. for(Long k=0;k<Ngrid;k++){
  1267. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*cos(i*theta[k]);
  1268. }
  1269. }
  1270. offset1+=len;
  1271. }
  1272. if(i!=0 && i!=p0){ // P * sin
  1273. for(Long j=0;j<len;j++){
  1274. for(Long k=0;k<Ngrid;k++){
  1275. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*sin(i*theta[k]);
  1276. }
  1277. }
  1278. offset1+=len;
  1279. }
  1280. offset0+=len;
  1281. }
  1282. assert(offset0==Naleg);
  1283. assert(offset1==Ncoef);
  1284. }
  1285. Vector<Real> Vcoef2coef(Ncoef*Ncoef);
  1286. Vector<Real> Vcoef2grid(Ncoef*Ngrid, Mcoef2grid[0], false);
  1287. Grid2SHC(Vcoef2grid, p0+1, 2*p0, p0, Vcoef2coef, SHCArrange::COL_MAJOR_NONZERO);
  1288. Matrix<Real> Mcoef2coef(Ncoef, Ncoef, Vcoef2coef.begin(), false);
  1289. for(Long n=0;n<=p0;n++){ // Create matrices for fast rotation
  1290. Matrix<Real> M(coeff_perm[n].size(),coeff_perm[n].size());
  1291. for(Long i=0;i<(Long)coeff_perm[n].size();i++){
  1292. for(Long j=0;j<(Long)coeff_perm[n].size();j++){
  1293. M[i][j]=Mcoef2coef[coeff_perm[n][i]][coeff_perm[n][j]];
  1294. }
  1295. }
  1296. Mr.push_back(M);
  1297. }
  1298. }
  1299. }
  1300. return Mr;
  1301. }
  1302. template <class Real> void SphericalHarmonics<Real>::SHC2GridTranspose(const Vector<Real>& X, Long p0, Long p1, Vector<Real>& S){
  1303. Matrix<Real> Mf =SphericalHarmonics<Real>::MatFourier(p1,p0).Transpose();
  1304. std::vector<Matrix<Real>> Ml =SphericalHarmonics<Real>::MatLegendre(p1,p0);
  1305. for(Long i=0;i<(Long)Ml.size();i++) Ml[i]=Ml[i].Transpose();
  1306. assert(p1==(Long)Ml.size()-1);
  1307. assert(p0==Mf.Dim(0)/2);
  1308. assert(p1==Mf.Dim(1)/2);
  1309. Long N=X.Dim()/(2*p0*(p0+1));
  1310. assert(N*2*p0*(p0+1)==X.Dim());
  1311. if(S.Dim()!=N*(p1*(p1+2))) S.ReInit(N*(p1*(p1+2)));
  1312. Vector<Real> B0, B1;
  1313. B0.ReInit(N* p1*(p1+2));
  1314. B1.ReInit(N*2*p1*(p0+1));
  1315. #pragma omp parallel
  1316. { // Evaluate Fourier and transpose
  1317. Integer tid=omp_get_thread_num();
  1318. Integer omp_p=omp_get_num_threads();
  1319. Long a=(tid+0)*N*(p0+1)/omp_p;
  1320. Long b=(tid+1)*N*(p0+1)/omp_p;
  1321. const Long block_size=16;
  1322. Matrix<Real> B2(block_size,2*p1);
  1323. for(Long i0=a;i0<b;i0+=block_size){
  1324. Long i1=std::min(b,i0+block_size);
  1325. const Matrix<Real> Min (i1-i0,2*p0, (Iterator<Real>)X.begin()+i0*2*p0, false);
  1326. Matrix<Real> Mout(i1-i0,2*p1, B2.begin(), false);
  1327. Matrix<Real>::GEMM(Mout, Min, Mf);
  1328. for(Long i=i0;i<i1;i++){
  1329. for(Long j=0;j<2*p1;j++){
  1330. B1[j*N*(p0+1)+i]=B2[i-i0][j];
  1331. }
  1332. }
  1333. }
  1334. }
  1335. #pragma omp parallel
  1336. { // Evaluate Legendre polynomial
  1337. Integer tid=omp_get_thread_num();
  1338. Integer omp_p=omp_get_num_threads();
  1339. Long offset0=0;
  1340. Long offset1=0;
  1341. for(Long i=0;i<p1+1;i++){
  1342. Long N0=2*N;
  1343. if(i==0 || i==p1) N0=N;
  1344. Matrix<Real> Min (N0, p0+1 , B1.begin()+offset0, false);
  1345. Matrix<Real> Mout(N0, p1+1-i, B0.begin()+offset1, false);
  1346. { // Mout = Min * Ml[i] // split between threads
  1347. Long a=(tid+0)*N0/omp_p;
  1348. Long b=(tid+1)*N0/omp_p;
  1349. if(a<b){
  1350. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  1351. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1352. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  1353. }
  1354. }
  1355. offset0+=Min .Dim(0)*Min .Dim(1);
  1356. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1357. }
  1358. }
  1359. #pragma omp parallel
  1360. { // S <-- Rearrange(B0)
  1361. Integer tid=omp_get_thread_num();
  1362. Integer omp_p=omp_get_num_threads();
  1363. Long a=(tid+0)*N/omp_p;
  1364. Long b=(tid+1)*N/omp_p;
  1365. for(Long i=a;i<b;i++){
  1366. Long offset=0;
  1367. for(Long j=0;j<2*p1;j++){
  1368. Long len=p1+1-(j+1)/2;
  1369. Real* B_=&B0[i*len+N*offset];
  1370. Real* S_=&S[i*p1*(p1+2)+offset];
  1371. for(Long k=0;k<len;k++) S_[k]=B_[k];
  1372. offset+=len;
  1373. }
  1374. }
  1375. }
  1376. }
  1377. template <class Real> void SphericalHarmonics<Real>::RotateAll(const Vector<Real>& S, Long p0, Long dof, Vector<Real>& S_){
  1378. const std::vector<Matrix<Real>>& Mr=MatRotate(p0);
  1379. std::vector<std::vector<Long>> coeff_perm(p0+1);
  1380. { // Set coeff_perm
  1381. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  1382. Long itr=0;
  1383. for(Long i=0;i<2*p0;i++){
  1384. Long m=(i+1)/2;
  1385. for(Long n=m;n<=p0;n++){
  1386. coeff_perm[n][i]=itr;
  1387. itr++;
  1388. }
  1389. }
  1390. }
  1391. Long Ncoef=p0*(p0+2);
  1392. Long N=S.Dim()/Ncoef/dof;
  1393. assert(N*Ncoef*dof==S.Dim());
  1394. if(S_.Dim()!=N*dof*Ncoef*p0*(p0+1)) S_.ReInit(N*dof*Ncoef*p0*(p0+1));
  1395. const Matrix<Real> S0(N*dof, Ncoef, (Iterator<Real>)S.begin(), false);
  1396. Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, S_.begin(), false);
  1397. #pragma omp parallel
  1398. { // Construct all p0*(p0+1) rotations
  1399. Integer tid=omp_get_thread_num();
  1400. Integer omp_p=omp_get_num_threads();
  1401. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  1402. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  1403. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  1404. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  1405. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  1406. }
  1407. Long a=(tid+0)*N/omp_p;
  1408. Long b=(tid+1)*N/omp_p;
  1409. for(Long i=a;i<b;i++){
  1410. for(Long d=0;d<dof;d++){
  1411. for(Long j=0;j<p0;j++){
  1412. Long offset=0;
  1413. for(Long k=0;k<p0+1;k++){
  1414. Real r[2]={cos(k*j*M_PI/p0),-sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  1415. Long len=p0+1-k;
  1416. if(k!=0 && k!=p0){
  1417. for(Long l=0;l<len;l++){
  1418. Real x[2];
  1419. x[0]=S0[i*dof+d][offset+len*0+l];
  1420. x[1]=S0[i*dof+d][offset+len*1+l];
  1421. B0[j*dof+d][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  1422. B0[j*dof+d][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  1423. }
  1424. offset+=2*len;
  1425. }else{
  1426. for(Long l=0;l<len;l++){
  1427. B0[j*dof+d][offset+l]=S0[i*dof+d][offset+l];
  1428. }
  1429. offset+=len;
  1430. }
  1431. }
  1432. assert(offset==Ncoef);
  1433. }
  1434. }
  1435. { // Fast rotation
  1436. for(Long k=0;k<dof*p0;k++){ // forward permutation
  1437. for(Long l=0;l<=p0;l++){
  1438. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1439. Bi[l][k][j]=B0[k][coeff_perm[l][j]];
  1440. }
  1441. }
  1442. }
  1443. for(Long t=0;t<=p0;t++){
  1444. for(Long l=0;l<=p0;l++){ // mat-vec
  1445. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  1446. }
  1447. Matrix<Real> Mout(dof*p0,Ncoef, S1[(i*(p0+1)+t)*dof*p0], false);
  1448. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  1449. for(Long l=0;l<=p0;l++){
  1450. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1451. Mout[k][coeff_perm[l][j]]=Bo[l][k][j];
  1452. }
  1453. }
  1454. }
  1455. }
  1456. }
  1457. }
  1458. }
  1459. }
  1460. template <class Real> void SphericalHarmonics<Real>::RotateTranspose(const Vector<Real>& S_, Long p0, Long dof, Vector<Real>& S){
  1461. std::vector<Matrix<Real>> Mr=MatRotate(p0);
  1462. for(Long i=0;i<(Long)Mr.size();i++) Mr[i]=Mr[i].Transpose();
  1463. std::vector<std::vector<Long>> coeff_perm(p0+1);
  1464. { // Set coeff_perm
  1465. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  1466. Long itr=0;
  1467. for(Long i=0;i<2*p0;i++){
  1468. Long m=(i+1)/2;
  1469. for(Long n=m;n<=p0;n++){
  1470. coeff_perm[n][i]=itr;
  1471. itr++;
  1472. }
  1473. }
  1474. }
  1475. Long Ncoef=p0*(p0+2);
  1476. Long N=S_.Dim()/Ncoef/dof/(p0*(p0+1));
  1477. assert(N*Ncoef*dof*(p0*(p0+1))==S_.Dim());
  1478. if(S.Dim()!=N*dof*Ncoef*p0*(p0+1)) S.ReInit(N*dof*Ncoef*p0*(p0+1));
  1479. Matrix<Real> S0(N*dof*p0*(p0+1), Ncoef, S.begin(), false);
  1480. const Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, (Iterator<Real>)S_.begin(), false);
  1481. #pragma omp parallel
  1482. { // Transpose all p0*(p0+1) rotations
  1483. Integer tid=omp_get_thread_num();
  1484. Integer omp_p=omp_get_num_threads();
  1485. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  1486. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  1487. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  1488. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  1489. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  1490. }
  1491. Long a=(tid+0)*N/omp_p;
  1492. Long b=(tid+1)*N/omp_p;
  1493. for(Long i=a;i<b;i++){
  1494. for(Long t=0;t<p0+1;t++){
  1495. Long idx0=(i*(p0+1)+t)*p0*dof;
  1496. { // Fast rotation
  1497. const Matrix<Real> Min(p0*dof, Ncoef, (Iterator<Real>)S1[idx0], false);
  1498. for(Long k=0;k<dof*p0;k++){ // forward permutation
  1499. for(Long l=0;l<=p0;l++){
  1500. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1501. Bi[l][k][j]=Min[k][coeff_perm[l][j]];
  1502. }
  1503. }
  1504. }
  1505. for(Long l=0;l<=p0;l++){ // mat-vec
  1506. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  1507. }
  1508. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  1509. for(Long l=0;l<=p0;l++){
  1510. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  1511. B0[k][coeff_perm[l][j]]=Bo[l][k][j];
  1512. }
  1513. }
  1514. }
  1515. }
  1516. for(Long j=0;j<p0;j++){
  1517. for(Long d=0;d<dof;d++){
  1518. Long idx1=idx0+j*dof+d;
  1519. Long offset=0;
  1520. for(Long k=0;k<p0+1;k++){
  1521. Real r[2]={cos(k*j*M_PI/p0),sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  1522. Long len=p0+1-k;
  1523. if(k!=0 && k!=p0){
  1524. for(Long l=0;l<len;l++){
  1525. Real x[2];
  1526. x[0]=B0[j*dof+d][offset+len*0+l];
  1527. x[1]=B0[j*dof+d][offset+len*1+l];
  1528. S0[idx1][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  1529. S0[idx1][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  1530. }
  1531. offset+=2*len;
  1532. }else{
  1533. for(Long l=0;l<len;l++){
  1534. S0[idx1][offset+l]=B0[j*dof+d][offset+l];
  1535. }
  1536. offset+=len;
  1537. }
  1538. }
  1539. assert(offset==Ncoef);
  1540. }
  1541. }
  1542. }
  1543. }
  1544. }
  1545. }
  1546. template <class Real> void SphericalHarmonics<Real>::StokesSingularInteg(const Vector<Real>& S, Long p0, Long p1, Vector<Real>* SLMatrix, Vector<Real>* DLMatrix){
  1547. Long Ngrid=2*p0*(p0+1);
  1548. Long Ncoef= p0*(p0+2);
  1549. Long Nves=S.Dim()/(Ngrid*COORD_DIM);
  1550. if(SLMatrix) SLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  1551. if(DLMatrix) DLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  1552. Long BLOCK_SIZE=(Long)6e9/((3*2*p1*(p1+1))*(3*2*p0*(p0+1))*2*8); // Limit memory usage to 6GB
  1553. BLOCK_SIZE=std::min<Long>(BLOCK_SIZE,omp_get_max_threads());
  1554. BLOCK_SIZE=std::max<Long>(BLOCK_SIZE,1);
  1555. for(Long a=0;a<Nves;a+=BLOCK_SIZE){
  1556. Long b=std::min(a+BLOCK_SIZE, Nves);
  1557. Vector<Real> _SLMatrix, _DLMatrix;
  1558. if(SLMatrix) _SLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), SLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  1559. if(DLMatrix) _DLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), DLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  1560. const Vector<Real> _S ((b-a)*(Ngrid*COORD_DIM) , (Iterator<Real>)S.begin()+a*(Ngrid*COORD_DIM), false);
  1561. if(SLMatrix && DLMatrix) StokesSingularInteg_< true, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  1562. else if(SLMatrix) StokesSingularInteg_< true, false>(_S, p0, p1, _SLMatrix, _DLMatrix);
  1563. else if(DLMatrix) StokesSingularInteg_<false, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  1564. }
  1565. }
  1566. template <class Real> template <bool SLayer, bool DLayer> void SphericalHarmonics<Real>::StokesSingularInteg_(const Vector<Real>& X0, Long p0, Long p1, Vector<Real>& SL, Vector<Real>& DL){
  1567. Profile::Tic("Rotate");
  1568. Vector<Real> S0, S;
  1569. SphericalHarmonics<Real>::Grid2SHC(X0, p0+1, 2*p0, p0, S0, SHCArrange::COL_MAJOR_NONZERO);
  1570. SphericalHarmonics<Real>::RotateAll(S0, p0, COORD_DIM, S);
  1571. Profile::Toc();
  1572. Profile::Tic("Upsample");
  1573. Vector<Real> X, X_theta, X_phi, trg;
  1574. SphericalHarmonics<Real>::SHC2Grid(S, SHCArrange::COL_MAJOR_NONZERO, p0, p1+1, 2*p1, &X, &X_phi, &X_theta);
  1575. SphericalHarmonics<Real>::SHC2Pole(S, SHCArrange::COL_MAJOR_NONZERO, p0, trg);
  1576. Profile::Toc();
  1577. Profile::Tic("Stokes");
  1578. Vector<Real> SL0, DL0;
  1579. { // Stokes kernel
  1580. //Long M0=2*p0*(p0+1);
  1581. Long M1=2*p1*(p1+1);
  1582. Long N=trg.Dim()/(2*COORD_DIM);
  1583. assert(X.Dim()==M1*COORD_DIM*N);
  1584. if(SLayer && SL0.Dim()!=N*2*6*M1) SL0.ReInit(2*N*6*M1);
  1585. if(DLayer && DL0.Dim()!=N*2*6*M1) DL0.ReInit(2*N*6*M1);
  1586. const Vector<Real>& qw=SphericalHarmonics<Real>::SingularWeights(p1);
  1587. const Real scal_const_dl = 3.0/(4.0*M_PI);
  1588. const Real scal_const_sl = 1.0/(8.0*M_PI);
  1589. static Real eps=-1;
  1590. if(eps<0){
  1591. eps=1;
  1592. while(eps*(Real)0.5+(Real)1.0>1.0) eps*=0.5;
  1593. }
  1594. #pragma omp parallel
  1595. {
  1596. Integer tid=omp_get_thread_num();
  1597. Integer omp_p=omp_get_num_threads();
  1598. Long a=(tid+0)*N/omp_p;
  1599. Long b=(tid+1)*N/omp_p;
  1600. for(Long i=a;i<b;i++){
  1601. for(Long t=0;t<2;t++){
  1602. Real tx, ty, tz;
  1603. { // Read target coordinates
  1604. tx=trg[i*2*COORD_DIM+0*2+t];
  1605. ty=trg[i*2*COORD_DIM+1*2+t];
  1606. tz=trg[i*2*COORD_DIM+2*2+t];
  1607. }
  1608. for(Long j0=0;j0<p1+1;j0++){
  1609. for(Long j1=0;j1<2*p1;j1++){
  1610. Long s=2*p1*j0+j1;
  1611. Real dx, dy, dz;
  1612. { // Compute dx, dy, dz
  1613. dx=tx-X[(i*COORD_DIM+0)*M1+s];
  1614. dy=ty-X[(i*COORD_DIM+1)*M1+s];
  1615. dz=tz-X[(i*COORD_DIM+2)*M1+s];
  1616. }
  1617. Real nx, ny, nz;
  1618. { // Compute source normal
  1619. Real x_theta=X_phi[(i*COORD_DIM+0)*M1+s];
  1620. Real y_theta=X_phi[(i*COORD_DIM+1)*M1+s];
  1621. Real z_theta=X_phi[(i*COORD_DIM+2)*M1+s];
  1622. Real x_phi=X_theta[(i*COORD_DIM+0)*M1+s];
  1623. Real y_phi=X_theta[(i*COORD_DIM+1)*M1+s];
  1624. Real z_phi=X_theta[(i*COORD_DIM+2)*M1+s];
  1625. nx=(y_theta*z_phi-z_theta*y_phi);
  1626. ny=(z_theta*x_phi-x_theta*z_phi);
  1627. nz=(x_theta*y_phi-y_theta*x_phi);
  1628. }
  1629. Real area_elem=1.0;
  1630. if(SLayer){ // Compute area_elem
  1631. area_elem=sqrt(nx*nx+ny*ny+nz*nz);
  1632. }
  1633. Real rinv, rinv2;
  1634. { // Compute rinv, rinv2
  1635. Real r2=dx*dx+dy*dy+dz*dz;
  1636. rinv=1.0/sqrt(r2);
  1637. if(r2<=eps) rinv=0;
  1638. rinv2=rinv*rinv;
  1639. }
  1640. if(DLayer){
  1641. Real rinv5=rinv2*rinv2*rinv;
  1642. Real r_dot_n_rinv5=scal_const_dl*qw[j0*t+(p1-j0)*(1-t)] * (nx*dx+ny*dy+nz*dz)*rinv5;
  1643. DL0[((i*2+t)*6+0)*M1+s]=dx*dx*r_dot_n_rinv5;
  1644. DL0[((i*2+t)*6+1)*M1+s]=dx*dy*r_dot_n_rinv5;
  1645. DL0[((i*2+t)*6+2)*M1+s]=dx*dz*r_dot_n_rinv5;
  1646. DL0[((i*2+t)*6+3)*M1+s]=dy*dy*r_dot_n_rinv5;
  1647. DL0[((i*2+t)*6+4)*M1+s]=dy*dz*r_dot_n_rinv5;
  1648. DL0[((i*2+t)*6+5)*M1+s]=dz*dz*r_dot_n_rinv5;
  1649. }
  1650. if(SLayer){
  1651. Real area_rinv =scal_const_sl*qw[j0*t+(p1-j0)*(1-t)] * area_elem*rinv;
  1652. Real area_rinv2=area_rinv*rinv2;
  1653. SL0[((i*2+t)*6+0)*M1+s]=area_rinv+dx*dx*area_rinv2;
  1654. SL0[((i*2+t)*6+1)*M1+s]= dx*dy*area_rinv2;
  1655. SL0[((i*2+t)*6+2)*M1+s]= dx*dz*area_rinv2;
  1656. SL0[((i*2+t)*6+3)*M1+s]=area_rinv+dy*dy*area_rinv2;
  1657. SL0[((i*2+t)*6+4)*M1+s]= dy*dz*area_rinv2;
  1658. SL0[((i*2+t)*6+5)*M1+s]=area_rinv+dz*dz*area_rinv2;
  1659. }
  1660. }
  1661. }
  1662. }
  1663. }
  1664. }
  1665. Profile::Add_FLOP(20*(2*p1)*(p1+1)*2*N);
  1666. if(SLayer) Profile::Add_FLOP((19+6)*(2*p1)*(p1+1)*2*N);
  1667. if(DLayer) Profile::Add_FLOP( 22 *(2*p1)*(p1+1)*2*N);
  1668. }
  1669. Profile::Toc();
  1670. Profile::Tic("UpsampleTranspose");
  1671. Vector<Real> SL1, DL1;
  1672. SphericalHarmonics<Real>::SHC2GridTranspose(SL0, p1, p0, SL1);
  1673. SphericalHarmonics<Real>::SHC2GridTranspose(DL0, p1, p0, DL1);
  1674. Profile::Toc();
  1675. Profile::Tic("RotateTranspose");
  1676. Vector<Real> SL2, DL2;
  1677. SphericalHarmonics<Real>::RotateTranspose(SL1, p0, 2*6, SL2);
  1678. SphericalHarmonics<Real>::RotateTranspose(DL1, p0, 2*6, DL2);
  1679. Profile::Toc();
  1680. Profile::Tic("Rearrange");
  1681. Vector<Real> SL3, DL3;
  1682. { // Transpose
  1683. Long Ncoef=p0*(p0+2);
  1684. Long Ngrid=2*p0*(p0+1);
  1685. { // Transpose SL2
  1686. Long N=SL2.Dim()/(6*Ncoef*Ngrid);
  1687. SL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  1688. #pragma omp parallel
  1689. {
  1690. Integer tid=omp_get_thread_num();
  1691. Integer omp_p=omp_get_num_threads();
  1692. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  1693. Long a=(tid+0)*N/omp_p;
  1694. Long b=(tid+1)*N/omp_p;
  1695. for(Long i=a;i<b;i++){
  1696. Matrix<Real> M0(Ngrid*6, Ncoef, SL2.begin()+i*Ngrid*6*Ncoef, false);
  1697. for(Long k=0;k<Ncoef;k++){ // Transpose
  1698. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  1699. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  1700. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  1701. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  1702. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  1703. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  1704. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  1705. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  1706. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  1707. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  1708. }
  1709. }
  1710. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, SL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  1711. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  1712. for(Long j0=0;j0<COORD_DIM;j0++){
  1713. for(Long j1=0;j1<p0+1;j1++){
  1714. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  1715. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  1716. }
  1717. }
  1718. }
  1719. }
  1720. }
  1721. }
  1722. { // Transpose DL2
  1723. Long N=DL2.Dim()/(6*Ncoef*Ngrid);
  1724. DL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  1725. #pragma omp parallel
  1726. {
  1727. Integer tid=omp_get_thread_num();
  1728. Integer omp_p=omp_get_num_threads();
  1729. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  1730. Long a=(tid+0)*N/omp_p;
  1731. Long b=(tid+1)*N/omp_p;
  1732. for(Long i=a;i<b;i++){
  1733. Matrix<Real> M0(Ngrid*6, Ncoef, DL2.begin()+i*Ngrid*6*Ncoef, false);
  1734. for(Long k=0;k<Ncoef;k++){ // Transpose
  1735. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  1736. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  1737. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  1738. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  1739. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  1740. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  1741. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  1742. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  1743. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  1744. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  1745. }
  1746. }
  1747. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, DL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  1748. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  1749. for(Long j0=0;j0<COORD_DIM;j0++){
  1750. for(Long j1=0;j1<p0+1;j1++){
  1751. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  1752. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  1753. }
  1754. }
  1755. }
  1756. }
  1757. }
  1758. }
  1759. }
  1760. Profile::Toc();
  1761. Profile::Tic("Grid2SHC");
  1762. SphericalHarmonics<Real>::Grid2SHC(SL3, p0+1, 2*p0, p0, SL, SHCArrange::COL_MAJOR_NONZERO);
  1763. SphericalHarmonics<Real>::Grid2SHC(DL3, p0+1, 2*p0, p0, DL, SHCArrange::COL_MAJOR_NONZERO);
  1764. Profile::Toc();
  1765. }
  1766. } // end namespace