sph_harm.txx 104 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924
  1. #include SCTL_INCLUDE(legendre_rule.hpp)
  2. // TODO: Replace work vectors with dynamic-arrays
  3. namespace SCTL_NAMESPACE {
  4. template <class Real> void SphericalHarmonics<Real>::Grid2SHC(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& S, SHCArrange arrange){
  5. Long N = X.Dim() / (Np*Nt);
  6. assert(X.Dim() == N*Np*Nt);
  7. Vector<Real> B1(N*(p1+1)*(p1+1));
  8. Grid2SHC_(X, Nt, Np, p1, B1);
  9. SHCArrange0(B1, p1, S, arrange);
  10. }
  11. template <class Real> void SphericalHarmonics<Real>::SHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_theta, Vector<Real>* X_phi){
  12. Vector<Real> B0;
  13. SHCArrange1(S, arrange, p0, B0);
  14. SHC2Grid_(B0, p0, Nt, Np, X, X_phi, X_theta);
  15. }
  16. template <class Real> void SphericalHarmonics<Real>::SHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& theta_phi, Vector<Real>& X) {
  17. Long M = (p0+1) * (p0+1);
  18. Long dof;
  19. Matrix<Real> B1;
  20. { // Set B1, dof
  21. Vector<Real> B0;
  22. SHCArrange1(S, arrange, p0, B0);
  23. dof = B0.Dim() / M;
  24. assert(B0.Dim() == dof * M);
  25. B1.ReInit(dof, M);
  26. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  27. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  28. }
  29. assert(B1.Dim(0) == dof);
  30. assert(B1.Dim(1) == M);
  31. Matrix<Real> SHBasis;
  32. SHBasisEval(p0, theta_phi, SHBasis);
  33. assert(SHBasis.Dim(1) == M);
  34. Long N = SHBasis.Dim(0);
  35. { // Set X
  36. if (X.Dim() != N*dof) X.ReInit(N * dof);
  37. for (Long k0 = 0; k0 < N; k0++) {
  38. for (Long k1 = 0; k1 < dof; k1++) {
  39. Real X_ = 0;
  40. for (Long i = 0; i < M; i++) X_ += B1[k1][i] * SHBasis[k0][i];
  41. X[k0 * dof + k1] = X_;
  42. }
  43. }
  44. }
  45. }
  46. template <class Real> void SphericalHarmonics<Real>::SHC2Pole(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& P){
  47. Vector<Real> QP[2];
  48. { // Set QP // TODO: store these weights
  49. Vector<Real> x(1), alp;
  50. const Real SQRT2PI = sqrt<Real>(4 * const_pi<Real>());
  51. for (Long i = 0; i < 2; i++) {
  52. x = (i ? const_pi<Real>() : 0);
  53. LegPoly_(alp, x, p0);
  54. QP[i].ReInit(p0 + 1, alp.begin());
  55. QP[i] *= SQRT2PI;
  56. }
  57. }
  58. Long M, N;
  59. { // Set M, N
  60. M = 0;
  61. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  62. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  63. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  64. if (M == 0) return;
  65. N = S.Dim() / M;
  66. assert(S.Dim() == N * M);
  67. }
  68. if(P.Dim() != N * 2) P.ReInit(N * 2);
  69. if (arrange == SHCArrange::ALL) {
  70. #pragma omp parallel
  71. { // Compute pole
  72. Integer tid = omp_get_thread_num();
  73. Integer omp_p = omp_get_num_threads();
  74. Long a = (tid + 0) * N / omp_p;
  75. Long b = (tid + 1) * N / omp_p;
  76. for (Long i = a; i < b; i++) {
  77. Real P_[2] = {0, 0};
  78. for (Long j = 0; j < p0 + 1; j++) {
  79. P_[0] += S[i*M + j*(p0+1)*2] * QP[0][j];
  80. P_[1] += S[i*M + j*(p0+1)*2] * QP[1][j];
  81. }
  82. P[2*i+0] = P_[0];
  83. P[2*i+1] = P_[1];
  84. }
  85. }
  86. }
  87. if (arrange == SHCArrange::ROW_MAJOR) {
  88. #pragma omp parallel
  89. { // Compute pole
  90. Integer tid = omp_get_thread_num();
  91. Integer omp_p = omp_get_num_threads();
  92. Long a = (tid + 0) * N / omp_p;
  93. Long b = (tid + 1) * N / omp_p;
  94. for (Long i = a; i < b; i++) {
  95. Long idx = 0;
  96. Real P_[2] = {0, 0};
  97. for (Long j = 0; j < p0 + 1; j++) {
  98. P_[0] += S[i*M+idx] * QP[0][j];
  99. P_[1] += S[i*M+idx] * QP[1][j];
  100. idx += 2*(j+1);
  101. }
  102. P[2*i+0] = P_[0];
  103. P[2*i+1] = P_[1];
  104. }
  105. }
  106. }
  107. if (arrange == SHCArrange::COL_MAJOR_NONZERO) {
  108. #pragma omp parallel
  109. { // Compute pole
  110. Integer tid = omp_get_thread_num();
  111. Integer omp_p = omp_get_num_threads();
  112. Long a = (tid + 0) * N / omp_p;
  113. Long b = (tid + 1) * N / omp_p;
  114. for (Long i = a; i < b; i++) {
  115. Real P_[2] = {0, 0};
  116. for (Long j = 0; j < p0 + 1; j++) {
  117. P_[0] += S[i*M+j] * QP[0][j];
  118. P_[1] += S[i*M+j] * QP[1][j];
  119. }
  120. P[2*i+0] = P_[0];
  121. P[2*i+1] = P_[1];
  122. }
  123. }
  124. }
  125. }
  126. template <class Real> void SphericalHarmonics<Real>::WriteVTK(const char* fname, const Vector<Real>* S, const Vector<Real>* v_ptr, SHCArrange arrange, Long p0, Long p1, Real period, const Comm& comm){
  127. typedef double VTKReal;
  128. Vector<Real> SS;
  129. if (S == nullptr) {
  130. Integer p = 2;
  131. Integer Ncoeff = (p + 1) * (p + 1);
  132. Vector<Real> SSS(COORD_DIM * Ncoeff), SSS_grid;
  133. SSS.SetZero();
  134. SSS[1+0*p+0*Ncoeff] = sqrt<Real>(2.0)/sqrt<Real>(3.0);
  135. SSS[1+1*p+1*Ncoeff] = 1/sqrt<Real>(3.0);
  136. SSS[1+2*p+2*Ncoeff] = 1/sqrt<Real>(3.0);
  137. SphericalHarmonics<Real>::SHC2Grid(SSS, SHCArrange::COL_MAJOR_NONZERO, p, p+1, 2*p+2, &SSS_grid);
  138. SphericalHarmonics<Real>::Grid2SHC(SSS_grid, p+1, 2*p+2, p0, SS, arrange);
  139. S = &SS;
  140. }
  141. Vector<Real> X, Xp, V, Vp;
  142. { // Upsample X
  143. const Vector<Real>& X0=*S;
  144. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &X);
  145. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Xp);
  146. }
  147. if(v_ptr){ // Upsample V
  148. const Vector<Real>& X0=*v_ptr;
  149. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &V);
  150. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Vp);
  151. }
  152. std::vector<VTKReal> point_coord;
  153. std::vector<VTKReal> point_value;
  154. std::vector<int32_t> poly_connect;
  155. std::vector<int32_t> poly_offset;
  156. { // Set point_coord, point_value, poly_connect
  157. Long N_ves = X.Dim()/(2*p1*(p1+1)*COORD_DIM); // Number of vesicles
  158. assert(Xp.Dim() == N_ves*2*COORD_DIM);
  159. for(Long k=0;k<N_ves;k++){ // Set point_coord
  160. Real C[COORD_DIM]={0,0,0};
  161. if(period>0){
  162. for(Integer l=0;l<COORD_DIM;l++) C[l]=0;
  163. for(Long i=0;i<p1+1;i++){
  164. for(Long j=0;j<2*p1;j++){
  165. for(Integer l=0;l<COORD_DIM;l++){
  166. C[l]+=X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))];
  167. }
  168. }
  169. }
  170. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[0+2*(l+k*COORD_DIM)];
  171. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[1+2*(l+k*COORD_DIM)];
  172. for(Integer l=0;l<COORD_DIM;l++) C[l]/=2*p1*(p1+1)+2;
  173. for(Integer l=0;l<COORD_DIM;l++) C[l]=(round(C[l]/period))*period;
  174. }
  175. for(Long i=0;i<p1+1;i++){
  176. for(Long j=0;j<2*p1;j++){
  177. for(Integer l=0;l<COORD_DIM;l++){
  178. point_coord.push_back(X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))]-C[l]);
  179. }
  180. }
  181. }
  182. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[0+2*(l+k*COORD_DIM)]-C[l]);
  183. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[1+2*(l+k*COORD_DIM)]-C[l]);
  184. }
  185. if(v_ptr) {
  186. Long data__dof = V.Dim() / (2*p1*(p1+1));
  187. for(Long k=0;k<N_ves;k++){ // Set point_value
  188. for(Long i=0;i<p1+1;i++){
  189. for(Long j=0;j<2*p1;j++){
  190. for(Long l=0;l<data__dof;l++){
  191. point_value.push_back(V[j+2*p1*(i+(p1+1)*(l+k*data__dof))]);
  192. }
  193. }
  194. }
  195. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[0+2*(l+k*data__dof)]);
  196. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[1+2*(l+k*data__dof)]);
  197. }
  198. }
  199. for(Long k=0;k<N_ves;k++){
  200. for(Long j=0;j<2*p1;j++){
  201. Long i0= 0;
  202. Long i1=p1;
  203. Long j0=((j+0) );
  204. Long j1=((j+1)%(2*p1));
  205. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+0);
  206. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  207. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  208. poly_offset.push_back(poly_connect.size());
  209. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+1);
  210. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  211. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  212. poly_offset.push_back(poly_connect.size());
  213. }
  214. for(Long i=0;i<p1;i++){
  215. for(Long j=0;j<2*p1;j++){
  216. Long i0=((i+0) );
  217. Long i1=((i+1) );
  218. Long j0=((j+0) );
  219. Long j1=((j+1)%(2*p1));
  220. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  221. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  222. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  223. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  224. poly_offset.push_back(poly_connect.size());
  225. }
  226. }
  227. }
  228. }
  229. Integer np = comm.Size();
  230. Integer myrank = comm.Rank();
  231. std::vector<VTKReal>& coord=point_coord;
  232. std::vector<VTKReal>& value=point_value;
  233. std::vector<int32_t>& connect=poly_connect;
  234. std::vector<int32_t>& offset=poly_offset;
  235. Long pt_cnt=coord.size()/COORD_DIM;
  236. Long poly_cnt=poly_offset.size();
  237. // Open file for writing.
  238. std::stringstream vtufname;
  239. vtufname<<fname<<"_"<<std::setfill('0')<<std::setw(6)<<myrank<<".vtp";
  240. std::ofstream vtufile;
  241. vtufile.open(vtufname.str().c_str());
  242. if(vtufile.fail()) return;
  243. bool isLittleEndian;
  244. { // Set isLittleEndian
  245. uint16_t number = 0x1;
  246. uint8_t *numPtr = (uint8_t*)&number;
  247. isLittleEndian=(numPtr[0] == 1);
  248. }
  249. // Proceed to write to file.
  250. Long data_size=0;
  251. vtufile<<"<?xml version=\"1.0\"?>\n";
  252. if(isLittleEndian) vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"LittleEndian\">\n";
  253. else vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"BigEndian\">\n";
  254. //===========================================================================
  255. vtufile<<" <PolyData>\n";
  256. vtufile<<" <Piece NumberOfPoints=\""<<pt_cnt<<"\" NumberOfVerts=\"0\" NumberOfLines=\"0\" NumberOfStrips=\"0\" NumberOfPolys=\""<<poly_cnt<<"\">\n";
  257. //---------------------------------------------------------------------------
  258. vtufile<<" <Points>\n";
  259. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  260. data_size+=sizeof(uint32_t)+coord.size()*sizeof(VTKReal);
  261. vtufile<<" </Points>\n";
  262. //---------------------------------------------------------------------------
  263. if(value.size()){ // value
  264. vtufile<<" <PointData>\n";
  265. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  266. data_size+=sizeof(uint32_t)+value.size()*sizeof(VTKReal);
  267. vtufile<<" </PointData>\n";
  268. }
  269. //---------------------------------------------------------------------------
  270. vtufile<<" <Polys>\n";
  271. vtufile<<" <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  272. data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t);
  273. vtufile<<" <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  274. data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t);
  275. vtufile<<" </Polys>\n";
  276. //---------------------------------------------------------------------------
  277. vtufile<<" </Piece>\n";
  278. vtufile<<" </PolyData>\n";
  279. //===========================================================================
  280. vtufile<<" <AppendedData encoding=\"raw\">\n";
  281. vtufile<<" _";
  282. int32_t block_size;
  283. block_size=coord.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord.size()*sizeof(VTKReal));
  284. if(value.size()){ // value
  285. block_size=value.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value.size()*sizeof(VTKReal));
  286. }
  287. block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t));
  288. block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t));
  289. vtufile<<"\n";
  290. vtufile<<" </AppendedData>\n";
  291. //===========================================================================
  292. vtufile<<"</VTKFile>\n";
  293. vtufile.close();
  294. if(myrank) return;
  295. std::stringstream pvtufname;
  296. pvtufname<<fname<<".pvtp";
  297. std::ofstream pvtufile;
  298. pvtufile.open(pvtufname.str().c_str());
  299. if(pvtufile.fail()) return;
  300. pvtufile<<"<?xml version=\"1.0\"?>\n";
  301. pvtufile<<"<VTKFile type=\"PPolyData\">\n";
  302. pvtufile<<" <PPolyData GhostLevel=\"0\">\n";
  303. pvtufile<<" <PPoints>\n";
  304. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\"/>\n";
  305. pvtufile<<" </PPoints>\n";
  306. if(value.size()){ // value
  307. pvtufile<<" <PPointData>\n";
  308. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\"/>\n";
  309. pvtufile<<" </PPointData>\n";
  310. }
  311. {
  312. // Extract filename from path.
  313. std::stringstream vtupath;
  314. vtupath<<'/'<<fname;
  315. std::string pathname = vtupath.str();
  316. auto found = pathname.find_last_of("/\\");
  317. std::string fname_ = pathname.substr(found+1);
  318. for(Integer i=0;i<np;i++) pvtufile<<" <Piece Source=\""<<fname_<<"_"<<std::setfill('0')<<std::setw(6)<<i<<".vtp\"/>\n";
  319. }
  320. pvtufile<<" </PPolyData>\n";
  321. pvtufile<<"</VTKFile>\n";
  322. pvtufile.close();
  323. }
  324. template <class Real> void SphericalHarmonics<Real>::Grid2VecSHC(const Vector<Real>& X, Long Nt, Long Np, Long p0, Vector<Real>& S, SHCArrange arrange) {
  325. Long N = X.Dim() / (Np*Nt);
  326. assert(X.Dim() == N*Np*Nt);
  327. assert(N % COORD_DIM == 0);
  328. Vector<Real> B0(N*Nt*Np);
  329. { // Set B0
  330. Vector<Real> sin_phi(Np), cos_phi(Np);
  331. for (Long i = 0; i < Np; i++) {
  332. sin_phi[i] = sin(2 * const_pi<Real>() * i / Np);
  333. cos_phi[i] = cos(2 * const_pi<Real>() * i / Np);
  334. }
  335. const auto& Y = LegendreNodes(Nt - 1);
  336. assert(Y.Dim() == Nt);
  337. Long Ngrid = Nt * Np;
  338. for (Long k = 0; k < N; k+=COORD_DIM) {
  339. for (Long i = 0; i < Nt; i++) {
  340. Real sin_theta = sqrt<Real>(1 - Y[i]*Y[i]);
  341. Real cos_theta = Y[i];
  342. Real csc_theta = 1 / sin_theta;
  343. const auto X_ = X.begin() + (k*Nt+i)*Np;
  344. auto B0_ = B0.begin() + (k*Nt+i)*Np;
  345. for (Long j = 0; j < Np; j++) {
  346. StaticArray<Real,3> in;
  347. in[0] = X_[0*Ngrid+j];
  348. in[1] = X_[1*Ngrid+j];
  349. in[2] = X_[2*Ngrid+j];
  350. StaticArray<Real,9> Q;
  351. { // Set Q
  352. Q[0] = sin_theta*cos_phi[j]; Q[1] = sin_theta*sin_phi[j]; Q[2] = cos_theta;
  353. Q[3] = cos_theta*cos_phi[j]; Q[4] = cos_theta*sin_phi[j]; Q[5] =-sin_theta;
  354. Q[6] = -sin_phi[j]; Q[7] = cos_phi[j]; Q[8] = 0;
  355. }
  356. B0_[0*Ngrid+j] = ( Q[0] * in[0] + Q[1] * in[1] + Q[2] * in[2] );
  357. B0_[1*Ngrid+j] = ( Q[3] * in[0] + Q[4] * in[1] + Q[5] * in[2] ) * csc_theta;
  358. B0_[2*Ngrid+j] = ( Q[6] * in[0] + Q[7] * in[1] + Q[8] * in[2] ) * csc_theta;
  359. }
  360. }
  361. }
  362. }
  363. Long p_ = p0 + 1;
  364. Long M0 = (p0+1)*(p0+1);
  365. Long M_ = (p_+1)*(p_+1);
  366. Vector<Real> B1(N*M_);
  367. Grid2SHC_(B0, Nt, Np, p_, B1);
  368. Vector<Real> B2(N*M0);
  369. const Complex<Real> imag(0,1);
  370. for (Long i=0; i<N; i+=COORD_DIM) {
  371. for (Long m=0; m<=p0; m++) {
  372. for (Long n=m; n<=p0; n++) {
  373. auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  374. Complex<Real> c;
  375. if (0<=m && m<=n && n<=p) {
  376. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  377. Long idx_imag = idx_real + (p+1-m)*N;
  378. c.real = coeff[idx_real];
  379. if (m) c.imag = coeff[idx_imag];
  380. }
  381. return c;
  382. };
  383. auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  384. if (0<=m && m<=n && n<=p) {
  385. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  386. Long idx_imag = idx_real + (p+1-m)*N;
  387. coeff[idx_real] = c.real;
  388. if (m) coeff[idx_imag] = c.imag;
  389. }
  390. };
  391. auto gr = [&](Long n, Long m) { return read_coeff(B1, i+0, p_, n, m); };
  392. auto gt = [&](Long n, Long m) { return read_coeff(B1, i+1, p_, n, m); };
  393. auto gp = [&](Long n, Long m) { return read_coeff(B1, i+2, p_, n, m); };
  394. Complex<Real> phiY, phiG, phiX;
  395. { // (phiG, phiX) <-- (gt, gp)
  396. auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  397. auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  398. phiY = gr(n,m);
  399. phiG = (gt(n+1,m)*A(n,m) - gt(n-1,m)*B(n,m) - imag*m*gp(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
  400. phiX = (gp(n+1,m)*A(n,m) - gp(n-1,m)*B(n,m) + imag*m*gt(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
  401. }
  402. auto phiV = (phiG * (n + 0) - phiY) * (1/(Real)(2*n + 1));
  403. auto phiW = (phiG * (n + 1) + phiY) * (1/(Real)(2*n + 1));
  404. if (n==0) {
  405. phiW = 0;
  406. phiX = 0;
  407. }
  408. write_coeff(phiV, B2, i+0, p0, n, m);
  409. write_coeff(phiW, B2, i+1, p0, n, m);
  410. write_coeff(phiX, B2, i+2, p0, n, m);
  411. }
  412. }
  413. }
  414. SHCArrange0(B2, p0, S, arrange);
  415. }
  416. template <class Real> void SphericalHarmonics<Real>::VecSHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>& X) {
  417. Vector<Real> B0;
  418. SHCArrange1(S, arrange, p0, B0);
  419. Long p_ = p0 + 1;
  420. Long M0 = (p0+1)*(p0+1);
  421. Long M_ = (p_+1)*(p_+1);
  422. Long N = B0.Dim() / M0;
  423. assert(B0.Dim() == N*M0);
  424. assert(N % COORD_DIM == 0);
  425. Vector<Real> B1(N*M_);
  426. const Complex<Real> imag(0,1);
  427. for (Long i=0; i<N; i+=COORD_DIM) {
  428. for (Long m=0; m<=p_; m++) {
  429. for (Long n=m; n<=p_; n++) {
  430. auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  431. Complex<Real> c;
  432. if (0<=m && m<=n && n<=p) {
  433. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  434. Long idx_imag = idx_real + (p+1-m)*N;
  435. c.real = coeff[idx_real];
  436. if (m) c.imag = coeff[idx_imag];
  437. }
  438. return c;
  439. };
  440. auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  441. if (0<=m && m<=n && n<=p) {
  442. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  443. Long idx_imag = idx_real + (p+1-m)*N;
  444. coeff[idx_real] = c.real;
  445. if (m) coeff[idx_imag] = c.imag;
  446. }
  447. };
  448. auto phiG = [&](Long n, Long m) {
  449. auto phiV = read_coeff(B0, i+0, p0, n, m);
  450. auto phiW = read_coeff(B0, i+1, p0, n, m);
  451. return phiV + phiW;
  452. };
  453. auto phiY = [&](Long n, Long m) {
  454. auto phiV = read_coeff(B0, i+0, p0, n, m);
  455. auto phiW = read_coeff(B0, i+1, p0, n, m);
  456. return phiW * n - phiV * (n + 1);
  457. };
  458. auto phiX = [&](Long n, Long m) {
  459. return read_coeff(B0, i+2, p0, n, m);
  460. };
  461. Complex<Real> gr, gt, gp;
  462. { // (gt, gp) <-- (phiG, phiX)
  463. auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  464. auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  465. gr = phiY(n,m);
  466. gt = phiG(n-1,m)*A(n-1,m) - phiG(n+1,m)*B(n+1,m) - imag*m*phiX(n,m);
  467. gp = phiX(n-1,m)*A(n-1,m) - phiX(n+1,m)*B(n+1,m) + imag*m*phiG(n,m);
  468. }
  469. write_coeff(gr, B1, i+0, p_, n, m);
  470. write_coeff(gt, B1, i+1, p_, n, m);
  471. write_coeff(gp, B1, i+2, p_, n, m);
  472. }
  473. }
  474. }
  475. { // Set X
  476. SHC2Grid_(B1, p_, Nt, Np, &X);
  477. Vector<Real> sin_phi(Np), cos_phi(Np);
  478. for (Long i = 0; i < Np; i++) {
  479. sin_phi[i] = sin(2 * const_pi<Real>() * i / Np);
  480. cos_phi[i] = cos(2 * const_pi<Real>() * i / Np);
  481. }
  482. const auto& Y = LegendreNodes(Nt - 1);
  483. assert(Y.Dim() == Nt);
  484. Long Ngrid = Nt * Np;
  485. for (Long k = 0; k < N; k+=COORD_DIM) {
  486. for (Long i = 0; i < Nt; i++) {
  487. Real sin_theta = sqrt<Real>(1 - Y[i]*Y[i]);
  488. Real cos_theta = Y[i];
  489. Real csc_theta = 1 / sin_theta;
  490. auto X_ = X.begin() + (k*Nt+i)*Np;
  491. for (Long j = 0; j < Np; j++) {
  492. StaticArray<Real,3> in;
  493. in[0] = X_[0*Ngrid+j];
  494. in[1] = X_[1*Ngrid+j] * csc_theta;
  495. in[2] = X_[2*Ngrid+j] * csc_theta;
  496. StaticArray<Real,9> Q;
  497. { // Set Q
  498. Q[0] = sin_theta*cos_phi[j]; Q[1] = sin_theta*sin_phi[j]; Q[2] = cos_theta;
  499. Q[3] = cos_theta*cos_phi[j]; Q[4] = cos_theta*sin_phi[j]; Q[5] =-sin_theta;
  500. Q[6] = -sin_phi[j]; Q[7] = cos_phi[j]; Q[8] = 0;
  501. }
  502. X_[0*Ngrid+j] = ( Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2] );
  503. X_[1*Ngrid+j] = ( Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2] );
  504. X_[2*Ngrid+j] = ( Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2] );
  505. }
  506. }
  507. }
  508. }
  509. }
  510. template <class Real> void SphericalHarmonics<Real>::VecSHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& theta_phi, Vector<Real>& X) {
  511. Long M = (p0+1) * (p0+1);
  512. Long dof;
  513. Matrix<Real> B1;
  514. { // Set B1, dof
  515. Vector<Real> B0;
  516. SHCArrange1(S, arrange, p0, B0);
  517. dof = B0.Dim() / M / COORD_DIM;
  518. assert(B0.Dim() == dof * COORD_DIM * M);
  519. B1.ReInit(dof, COORD_DIM * M);
  520. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  521. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  522. }
  523. assert(B1.Dim(1) == COORD_DIM * M);
  524. assert(B1.Dim(0) == dof);
  525. Matrix<Real> SHBasis;
  526. VecSHBasisEval(p0, theta_phi, SHBasis);
  527. assert(SHBasis.Dim(1) == COORD_DIM * M);
  528. Long N = SHBasis.Dim(0) / COORD_DIM;
  529. { // Set X <-- Q * SHBasis * B1
  530. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  531. for (Long k0 = 0; k0 < N; k0++) {
  532. StaticArray<Real,9> Q;
  533. { // Set Q
  534. Real cos_theta = cos(theta_phi[k0 * 2 + 0]);
  535. Real sin_theta = sin(theta_phi[k0 * 2 + 0]);
  536. Real cos_phi = cos(theta_phi[k0 * 2 + 1]);
  537. Real sin_phi = sin(theta_phi[k0 * 2 + 1]);
  538. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  539. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  540. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  541. }
  542. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * SHBasis * B1
  543. StaticArray<Real,COORD_DIM> in;
  544. for (Long j = 0; j < COORD_DIM; j++) {
  545. in[j] = 0;
  546. for (Long i = 0; i < COORD_DIM * M; i++) {
  547. in[j] += B1[k1][i] * SHBasis[k0 * COORD_DIM + j][i];
  548. }
  549. }
  550. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  551. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  552. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  553. }
  554. }
  555. }
  556. }
  557. template <class Real> void SphericalHarmonics<Real>::StokesEvalSL(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, bool interior, Vector<Real>& X) {
  558. Long M = (p0+1) * (p0+1);
  559. Long dof;
  560. Matrix<Real> B1;
  561. { // Set B1, dof
  562. Vector<Real> B0;
  563. SHCArrange1(S, arrange, p0, B0);
  564. dof = B0.Dim() / M / COORD_DIM;
  565. assert(B0.Dim() == dof * COORD_DIM * M);
  566. B1.ReInit(dof, COORD_DIM * M);
  567. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  568. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  569. }
  570. assert(B1.Dim(1) == COORD_DIM * M);
  571. assert(B1.Dim(0) == dof);
  572. Long N, p_;
  573. Matrix<Real> SHBasis;
  574. Vector<Real> R, theta_phi;
  575. { // Set N, p_, R, SHBasis
  576. p_ = p0 + 1;
  577. Real M_ = (p_+1) * (p_+1);
  578. N = coord.Dim() / COORD_DIM;
  579. assert(coord.Dim() == N * COORD_DIM);
  580. R.ReInit(N);
  581. theta_phi.ReInit(2 * N);
  582. for (Long i = 0; i < N; i++) { // Set R, theta_phi
  583. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  584. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  585. theta_phi[i * 2 + 0] = atan2(sqrt<Real>(x[0]*x[0] + x[1]*x[1]), x[2]);
  586. theta_phi[i * 2 + 1] = atan2(x[1], x[0]);
  587. }
  588. SHBasisEval(p_, theta_phi, SHBasis);
  589. assert(SHBasis.Dim(1) == M_);
  590. assert(SHBasis.Dim(0) == N);
  591. SCTL_UNUSED(M_);
  592. }
  593. Matrix<Real> StokesOp(N * COORD_DIM, COORD_DIM * M);
  594. for (Long i = 0; i < N; i++) { // Set StokesOp
  595. Real cos_theta, sin_theta, csc_theta, cos_phi, sin_phi;
  596. { // Set cos_theta, csc_theta, cos_phi, sin_phi
  597. cos_theta = cos(theta_phi[i * 2 + 0]);
  598. sin_theta = sin(theta_phi[i * 2 + 0]);
  599. csc_theta = 1 / sin_theta;
  600. cos_phi = cos(theta_phi[i * 2 + 1]);
  601. sin_phi = sin(theta_phi[i * 2 + 1]);
  602. }
  603. Complex<Real> imag(0,1), exp_phi(cos_phi, -sin_phi);
  604. for (Long m = 0; m <= p0; m++) {
  605. for (Long n = m; n <= p0; n++) {
  606. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  607. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  608. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  609. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  610. if (m) {
  611. idx += (p0+1-m);
  612. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  613. }
  614. }
  615. };
  616. Complex<Real> Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp;
  617. { // Set vector spherical harmonics
  618. auto Y = [&SHBasis,p_,i](Long n, Long m) {
  619. Complex<Real> c;
  620. if (0 <= m && m <= n && n <= p_) {
  621. Long idx = (2 * p_ - m + 2) * m - (m ? p_+1 : 0) + n;
  622. c.real = SHBasis[i][idx];
  623. if (m) {
  624. idx += (p_+1-m);
  625. c.imag = SHBasis[i][idx];
  626. }
  627. }
  628. return c;
  629. };
  630. auto Yt = [exp_phi, &Y, &R, i](Long n, Long m) {
  631. auto A = (0<=n && m<=n ? 0.5 * sqrt<Real>((n+m)*(n-m+1)) * (m-1==0?2.0:1.0) : 0);
  632. auto B = (0<=n && m<=n ? 0.5 * sqrt<Real>((n-m)*(n+m+1)) * (m+1==0?2.0:1.0) : 0);
  633. return (B / exp_phi * Y(n, m + 1) - A * exp_phi * Y(n, m - 1)) / R[i];
  634. };
  635. Complex<Real> Y_1 = Y(n + 0, m);
  636. Complex<Real> Y_1t = Yt(n + 0, m);
  637. Complex<Real> Ycsc_1 = Y_1 * csc_theta;
  638. if (fabs(sin_theta) == 0) {
  639. auto Y_csc0 = [exp_phi, cos_theta](Long n, Long m) {
  640. if (m == 1) return -sqrt<Real>((2*n+1)*n*(n+1)) * ((n%2==0) && (cos_theta<0) ? -1 : 1) * exp_phi;
  641. return Complex<Real>(0, 0);
  642. };
  643. Ycsc_1 = Y_csc0(n + 0, m);
  644. }
  645. auto SetVecSH = [&imag,n,m](Complex<Real>& Vr, Complex<Real>& Vt, Complex<Real>& Vp, Complex<Real>& Wr, Complex<Real>& Wt, Complex<Real>& Wp, Complex<Real>& Xr, Complex<Real>& Xt, Complex<Real>& Xp, const Complex<Real> C0, const Complex<Real> C1, const Complex<Real> C2) {
  646. Vr = C0 * (-n-1);
  647. Vt = C2;
  648. Vp = -imag * m * C1;
  649. Wr = C0 * n;
  650. Wt = C2;
  651. Wp = -imag * m * C1;
  652. Xr = 0;
  653. Xt = imag * m * C1;
  654. Xp = C2;
  655. };
  656. { // Set Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp
  657. auto C0 = Y_1;
  658. auto C1 = Ycsc_1;
  659. auto C2 = Y_1t * R[i];
  660. SetVecSH(Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp, C0, C1, C2);
  661. }
  662. }
  663. Complex<Real> SVr, SVt, SVp;
  664. Complex<Real> SWr, SWt, SWp;
  665. Complex<Real> SXr, SXt, SXp;
  666. if (interior) {
  667. Real a,b;
  668. a = n / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], n+1);
  669. b = -(n+1) / (Real)(4*n+2) * (pow<Real>(R[i], n-1) - pow<Real>(R[i], n+1));
  670. SVr = a * Vr + b * Wr;
  671. SVt = a * Vt + b * Wt;
  672. SVp = a * Vp + b * Wp;
  673. a = (n+1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], n-1);
  674. SWr = a * Wr;
  675. SWt = a * Wt;
  676. SWp = a * Wp;
  677. a = 1 / (Real)(2*n+1) * pow<Real>(R[i], n);
  678. SXr = a * Xr;
  679. SXt = a * Xt;
  680. SXp = a * Xp;
  681. } else {
  682. Real a,b;
  683. a = n / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], -n-2);
  684. SVr = a * Vr;
  685. SVt = a * Vt;
  686. SVp = a * Vp;
  687. a = (n+1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], -n);
  688. b = n / (Real)(4*n+2) * (pow<Real>(R[i], -n-2) - pow<Real>(R[i], -n));
  689. SWr = a * Wr + b * Vr;
  690. SWt = a * Wt + b * Vt;
  691. SWp = a * Wp + b * Vp;
  692. a = 1 / (Real)(2*n+1) * pow<Real>(R[i], -n-1);
  693. SXr = a * Xr;
  694. SXt = a * Xt;
  695. SXp = a * Xp;
  696. }
  697. write_coeff(SVr, n, m, 0, 0);
  698. write_coeff(SVt, n, m, 0, 1);
  699. write_coeff(SVp, n, m, 0, 2);
  700. write_coeff(SWr, n, m, 1, 0);
  701. write_coeff(SWt, n, m, 1, 1);
  702. write_coeff(SWp, n, m, 1, 2);
  703. write_coeff(SXr, n, m, 2, 0);
  704. write_coeff(SXt, n, m, 2, 1);
  705. write_coeff(SXp, n, m, 2, 2);
  706. }
  707. }
  708. }
  709. { // Set X <-- Q * StokesOp * B1
  710. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  711. for (Long k0 = 0; k0 < N; k0++) {
  712. StaticArray<Real,9> Q;
  713. { // Set Q
  714. Real cos_theta = cos(theta_phi[k0 * 2 + 0]);
  715. Real sin_theta = sin(theta_phi[k0 * 2 + 0]);
  716. Real cos_phi = cos(theta_phi[k0 * 2 + 1]);
  717. Real sin_phi = sin(theta_phi[k0 * 2 + 1]);
  718. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  719. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  720. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  721. }
  722. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  723. StaticArray<Real,COORD_DIM> in;
  724. for (Long j = 0; j < COORD_DIM; j++) {
  725. in[j] = 0;
  726. for (Long i = 0; i < COORD_DIM * M; i++) {
  727. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  728. }
  729. }
  730. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  731. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  732. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  733. }
  734. }
  735. }
  736. }
  737. template <class Real> void SphericalHarmonics<Real>::StokesEvalDL(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, bool interior, Vector<Real>& X) {
  738. Long M = (p0+1) * (p0+1);
  739. Long dof;
  740. Matrix<Real> B1;
  741. { // Set B1, dof
  742. Vector<Real> B0;
  743. SHCArrange1(S, arrange, p0, B0);
  744. dof = B0.Dim() / M / COORD_DIM;
  745. assert(B0.Dim() == dof * COORD_DIM * M);
  746. B1.ReInit(dof, COORD_DIM * M);
  747. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  748. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  749. }
  750. assert(B1.Dim(1) == COORD_DIM * M);
  751. assert(B1.Dim(0) == dof);
  752. Long N, p_;
  753. Matrix<Real> SHBasis;
  754. Vector<Real> R, theta_phi;
  755. { // Set N, p_, R, SHBasis
  756. p_ = p0 + 1;
  757. Real M_ = (p_+1) * (p_+1);
  758. N = coord.Dim() / COORD_DIM;
  759. assert(coord.Dim() == N * COORD_DIM);
  760. R.ReInit(N);
  761. theta_phi.ReInit(2 * N);
  762. for (Long i = 0; i < N; i++) { // Set R, theta_phi
  763. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  764. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  765. theta_phi[i * 2 + 0] = atan2(sqrt<Real>(x[0]*x[0] + x[1]*x[1]), x[2]);
  766. theta_phi[i * 2 + 1] = atan2(x[1], x[0]);
  767. }
  768. SHBasisEval(p_, theta_phi, SHBasis);
  769. assert(SHBasis.Dim(1) == M_);
  770. assert(SHBasis.Dim(0) == N);
  771. SCTL_UNUSED(M_);
  772. }
  773. Matrix<Real> StokesOp(N * COORD_DIM, COORD_DIM * M);
  774. for (Long i = 0; i < N; i++) { // Set StokesOp
  775. Real cos_theta, sin_theta, csc_theta, cos_phi, sin_phi;
  776. { // Set cos_theta, csc_theta, cos_phi, sin_phi
  777. cos_theta = cos(theta_phi[i * 2 + 0]);
  778. sin_theta = sin(theta_phi[i * 2 + 0]);
  779. csc_theta = 1 / sin_theta;
  780. cos_phi = cos(theta_phi[i * 2 + 1]);
  781. sin_phi = sin(theta_phi[i * 2 + 1]);
  782. }
  783. Complex<Real> imag(0,1), exp_phi(cos_phi, -sin_phi);
  784. for (Long m = 0; m <= p0; m++) {
  785. for (Long n = m; n <= p0; n++) {
  786. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  787. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  788. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  789. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  790. if (m) {
  791. idx += (p0+1-m);
  792. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  793. }
  794. }
  795. };
  796. Complex<Real> Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp;
  797. { // Set vector spherical harmonics
  798. auto Y = [&SHBasis,p_,i](Long n, Long m) {
  799. Complex<Real> c;
  800. if (0 <= m && m <= n && n <= p_) {
  801. Long idx = (2 * p_ - m + 2) * m - (m ? p_+1 : 0) + n;
  802. c.real = SHBasis[i][idx];
  803. if (m) {
  804. idx += (p_+1-m);
  805. c.imag = SHBasis[i][idx];
  806. }
  807. }
  808. return c;
  809. };
  810. auto Yt = [exp_phi, &Y, &R, i](Long n, Long m) {
  811. auto A = (0<=n && m<=n ? 0.5 * sqrt<Real>((n+m)*(n-m+1)) * (m-1==0?2.0:1.0) : 0);
  812. auto B = (0<=n && m<=n ? 0.5 * sqrt<Real>((n-m)*(n+m+1)) * (m+1==0?2.0:1.0) : 0);
  813. return (B / exp_phi * Y(n, m + 1) - A * exp_phi * Y(n, m - 1)) / R[i];
  814. };
  815. Complex<Real> Y_1 = Y(n + 0, m);
  816. Complex<Real> Y_1t = Yt(n + 0, m);
  817. Complex<Real> Ycsc_1 = Y_1 * csc_theta;
  818. if (fabs(sin_theta) == 0) {
  819. auto Y_csc0 = [exp_phi, cos_theta](Long n, Long m) {
  820. if (m == 1) return -sqrt<Real>((2*n+1)*n*(n+1)) * ((n%2==0) && (cos_theta<0) ? -1 : 1) * exp_phi;
  821. return Complex<Real>(0, 0);
  822. };
  823. Ycsc_1 = Y_csc0(n + 0, m);
  824. }
  825. auto SetVecSH = [&imag,n,m](Complex<Real>& Vr, Complex<Real>& Vt, Complex<Real>& Vp, Complex<Real>& Wr, Complex<Real>& Wt, Complex<Real>& Wp, Complex<Real>& Xr, Complex<Real>& Xt, Complex<Real>& Xp, const Complex<Real> C0, const Complex<Real> C1, const Complex<Real> C2) {
  826. Vr = C0 * (-n-1);
  827. Vt = C2;
  828. Vp = -imag * m * C1;
  829. Wr = C0 * n;
  830. Wt = C2;
  831. Wp = -imag * m * C1;
  832. Xr = 0;
  833. Xt = imag * m * C1;
  834. Xp = C2;
  835. };
  836. { // Set Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp
  837. auto C0 = Y_1;
  838. auto C1 = Ycsc_1;
  839. auto C2 = Y_1t * R[i];
  840. SetVecSH(Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp, C0, C1, C2);
  841. }
  842. }
  843. Complex<Real> SVr, SVt, SVp;
  844. Complex<Real> SWr, SWt, SWp;
  845. Complex<Real> SXr, SXt, SXp;
  846. if (interior) {
  847. Real a,b;
  848. a = -2*n*(n+2) / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], n+1);
  849. b = -(n+1)*(n+2) / (Real)(2*n+1) * (pow<Real>(R[i], n+1) - pow<Real>(R[i], n-1));
  850. SVr = a * Vr + b * Wr;
  851. SVt = a * Vt + b * Wt;
  852. SVp = a * Vp + b * Wp;
  853. a = -(2*n*n+1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], n-1);
  854. SWr = a * Wr;
  855. SWt = a * Wt;
  856. SWp = a * Wp;
  857. a = -(n+2) / (Real)(2*n+1) * pow<Real>(R[i], n);
  858. SXr = a * Xr;
  859. SXt = a * Xt;
  860. SXp = a * Xp;
  861. } else {
  862. Real a,b;
  863. a = (2*n*n+4*n+3) / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], -n-2);
  864. SVr = a * Vr;
  865. SVt = a * Vt;
  866. SVp = a * Vp;
  867. a = 2*(n+1)*(n-1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], -n);
  868. b = 2*n*(n-1) / (Real)(4*n+2) * (pow<Real>(R[i], -n-2) - pow<Real>(R[i], -n));
  869. SWr = a * Wr + b * Vr;
  870. SWt = a * Wt + b * Vt;
  871. SWp = a * Wp + b * Vp;
  872. a = (n-1) / (Real)(2*n+1) * pow<Real>(R[i], -n-1);
  873. SXr = a * Xr;
  874. SXt = a * Xt;
  875. SXp = a * Xp;
  876. }
  877. write_coeff(SVr, n, m, 0, 0);
  878. write_coeff(SVt, n, m, 0, 1);
  879. write_coeff(SVp, n, m, 0, 2);
  880. write_coeff(SWr, n, m, 1, 0);
  881. write_coeff(SWt, n, m, 1, 1);
  882. write_coeff(SWp, n, m, 1, 2);
  883. write_coeff(SXr, n, m, 2, 0);
  884. write_coeff(SXt, n, m, 2, 1);
  885. write_coeff(SXp, n, m, 2, 2);
  886. }
  887. }
  888. }
  889. { // Set X <-- Q * StokesOp * B1
  890. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  891. for (Long k0 = 0; k0 < N; k0++) {
  892. StaticArray<Real,9> Q;
  893. { // Set Q
  894. Real cos_theta = cos(theta_phi[k0 * 2 + 0]);
  895. Real sin_theta = sin(theta_phi[k0 * 2 + 0]);
  896. Real cos_phi = cos(theta_phi[k0 * 2 + 1]);
  897. Real sin_phi = sin(theta_phi[k0 * 2 + 1]);
  898. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  899. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  900. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  901. }
  902. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  903. StaticArray<Real,COORD_DIM> in;
  904. for (Long j = 0; j < COORD_DIM; j++) {
  905. in[j] = 0;
  906. for (Long i = 0; i < COORD_DIM * M; i++) {
  907. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  908. }
  909. }
  910. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  911. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  912. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  913. }
  914. }
  915. }
  916. }
  917. template <class Real> void SphericalHarmonics<Real>::StokesEvalKL(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, const Vector<Real>& norm, bool interior, Vector<Real>& X) {
  918. Long M = (p0+1) * (p0+1);
  919. Long dof;
  920. Matrix<Real> B1;
  921. { // Set B1, dof
  922. Vector<Real> B0;
  923. SHCArrange1(S, arrange, p0, B0);
  924. dof = B0.Dim() / M / COORD_DIM;
  925. assert(B0.Dim() == dof * COORD_DIM * M);
  926. B1.ReInit(dof, COORD_DIM * M);
  927. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  928. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  929. }
  930. assert(B1.Dim(1) == COORD_DIM * M);
  931. assert(B1.Dim(0) == dof);
  932. Long N, p_;
  933. Matrix<Real> SHBasis;
  934. Vector<Real> R, theta_phi;
  935. { // Set N, p_, R, SHBasis
  936. p_ = p0 + 2;
  937. Real M_ = (p_+1) * (p_+1);
  938. N = coord.Dim() / COORD_DIM;
  939. assert(coord.Dim() == N * COORD_DIM);
  940. R.ReInit(N);
  941. theta_phi.ReInit(2 * N);
  942. for (Long i = 0; i < N; i++) { // Set R, theta_phi
  943. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  944. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  945. theta_phi[i * 2 + 0] = atan2(sqrt<Real>(x[0]*x[0] + x[1]*x[1]) + 1e-50, x[2]);
  946. theta_phi[i * 2 + 1] = atan2(x[1], x[0]);
  947. }
  948. SHBasisEval(p_, theta_phi, SHBasis);
  949. assert(SHBasis.Dim(1) == M_);
  950. assert(SHBasis.Dim(0) == N);
  951. SCTL_UNUSED(M_);
  952. }
  953. Matrix<Real> StokesOp(N * COORD_DIM, COORD_DIM * M);
  954. for (Long i = 0; i < N; i++) { // Set StokesOp
  955. Real cos_theta, sin_theta, csc_theta, cot_theta, cos_phi, sin_phi;
  956. { // Set cos_theta, sin_theta, cos_phi, sin_phi
  957. cos_theta = cos(theta_phi[i * 2 + 0]);
  958. sin_theta = sin(theta_phi[i * 2 + 0]);
  959. csc_theta = 1 / sin_theta;
  960. cot_theta = cos_theta * csc_theta;
  961. cos_phi = cos(theta_phi[i * 2 + 1]);
  962. sin_phi = sin(theta_phi[i * 2 + 1]);
  963. }
  964. Complex<Real> imag(0,1), exp_phi(cos_phi, -sin_phi);
  965. StaticArray<Real, COORD_DIM> norm0;
  966. { // Set norm0 <-- Q^t * norm
  967. StaticArray<Real,9> Q;
  968. { // Set Q
  969. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  970. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  971. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  972. }
  973. StaticArray<Real,COORD_DIM> in;
  974. in[0] = norm[i * COORD_DIM + 0];
  975. in[1] = norm[i * COORD_DIM + 1];
  976. in[2] = norm[i * COORD_DIM + 2];
  977. norm0[0] = Q[0] * in[0] + Q[1] * in[1] + Q[2] * in[2];
  978. norm0[1] = Q[3] * in[0] + Q[4] * in[1] + Q[5] * in[2];
  979. norm0[2] = Q[6] * in[0] + Q[7] * in[1] + Q[8] * in[2];
  980. }
  981. for (Long m = 0; m <= p0; m++) {
  982. for (Long n = m; n <= p0; n++) {
  983. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  984. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  985. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  986. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  987. if (m) {
  988. idx += (p0+1-m);
  989. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  990. }
  991. }
  992. };
  993. Complex<Real> Ynm;
  994. Complex<Real> Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp;
  995. Complex<Real> Vr_t, Vt_t, Vp_t, Wr_t, Wt_t, Wp_t, Xr_t, Xt_t, Xp_t;
  996. Complex<Real> Vr_p, Vt_p, Vp_p, Wr_p, Wt_p, Wp_p, Xr_p, Xt_p, Xp_p;
  997. { // Set vector spherical harmonics
  998. auto Y = [&SHBasis,p_,i](Long n, Long m) {
  999. Complex<Real> c;
  1000. if (0 <= m && m <= n && n <= p_) {
  1001. Long idx = (2 * p_ - m + 2) * m - (m ? p_+1 : 0) + n;
  1002. c.real = SHBasis[i][idx];
  1003. if (m) {
  1004. idx += (p_+1-m);
  1005. c.imag = SHBasis[i][idx];
  1006. }
  1007. }
  1008. return c;
  1009. };
  1010. auto Yt = [exp_phi, &Y, &R, i](Long n, Long m) {
  1011. auto A = (0<=n && m<=n ? 0.5 * sqrt<Real>((n+m)*(n-m+1)) * (m-1==0?2.0:1.0) : 0);
  1012. auto B = (0<=n && m<=n ? 0.5 * sqrt<Real>((n-m)*(n+m+1)) * (m+1==0?2.0:1.0) : 0);
  1013. return (B / exp_phi * Y(n, m + 1) - A * exp_phi * Y(n, m - 1)) / R[i];
  1014. };
  1015. auto Yp = [&Y, &imag, &R, i, csc_theta](Long n, Long m) {
  1016. return imag * m * Y(n, m) * csc_theta / R[i];
  1017. };
  1018. auto Ypt = [&Yt, &imag](Long n, Long m) {
  1019. return imag * m * Yt(n, m);
  1020. };
  1021. auto Ytt = [sin_theta, exp_phi, &Yt, &R, i](Long n, Long m) {
  1022. auto A = (0<=n && m<=n ? 0.5 * sqrt<Real>((n+m)*(n-m+1)) * (m-1==0?2.0:1.0) : 0);
  1023. auto B = (0<=n && m<=n ? 0.5 * sqrt<Real>((n-m)*(n+m+1)) * (m+1==0?2.0:1.0) : 0);
  1024. return (n==0 ? 0 : (B / exp_phi * Yt(n, m + 1) - A * exp_phi * Yt(n, m - 1)));
  1025. };
  1026. Complex<Real> Y_1 = Y(n + 0, m);
  1027. Complex<Real> Y_0t = Yt(n - 1, m);
  1028. Complex<Real> Y_1t = Yt(n + 0, m);
  1029. Complex<Real> Y_2t = Yt(n + 1, m);
  1030. //Complex<Real> Y_0p = Yp(n - 1, m);
  1031. Complex<Real> Y_1p = Yp(n + 0, m);
  1032. //Complex<Real> Y_2p = Yp(n + 1, m);
  1033. auto Anm = (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0);
  1034. auto Bnm = (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0);
  1035. auto SetVecSH = [&imag,n,m](Complex<Real>& Vr, Complex<Real>& Vt, Complex<Real>& Vp, Complex<Real>& Wr, Complex<Real>& Wt, Complex<Real>& Wp, Complex<Real>& Xr, Complex<Real>& Xt, Complex<Real>& Xp, const Complex<Real> C0, const Complex<Real> C1, const Complex<Real> C2) {
  1036. Vr = C0 * (-n-1);
  1037. Vt = C2;
  1038. Vp = -imag * m * C1;
  1039. Wr = C0 * n;
  1040. Wt = C2;
  1041. Wp = -imag * m * C1;
  1042. Xr = 0;
  1043. Xt = imag * m * C1;
  1044. Xp = C2;
  1045. };
  1046. { // Set Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp
  1047. auto C0 = Y_1;
  1048. auto C1 = Y_1 * csc_theta;
  1049. auto C2 = Yt(n,m) * R[i];
  1050. SetVecSH(Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp, C0, C1, C2);
  1051. }
  1052. { // Set Vr_t, Vt_t, Vp_t, Wr_t, Wt_t, Wp_t, Xr_t, Xt_t, Xp_t
  1053. auto C0 = Y_1t;
  1054. auto C1 = (Y_1t - Y_1 * cot_theta / R[i]) * csc_theta;
  1055. if (fabs(cos_theta) == 1 && m == 1) C1 = 0; ///////////// TODO
  1056. auto C2 = Ytt(n,m);
  1057. if (!m) C2 = (Anm * Y_2t - Bnm * Y_0t) * csc_theta - Y_1t * cot_theta; ///////////// TODO
  1058. SetVecSH(Vr_t, Vt_t, Vp_t, Wr_t, Wt_t, Wp_t, Xr_t, Xt_t, Xp_t, C0, C1, C2);
  1059. Vr_t += (-Vt) / R[i];
  1060. Vt_t += ( Vr) / R[i];
  1061. Wr_t += (-Wt) / R[i];
  1062. Wt_t += ( Wr) / R[i];
  1063. Xr_t += (-Xt) / R[i];
  1064. Xt_t += ( Xr) / R[i];
  1065. }
  1066. { // Set Vr_p, Vt_p, Vp_p, Wr_p, Wt_p, Wp_p, Xr_p, Xt_p, Xp_p
  1067. auto C0 = -Y_1p;
  1068. auto C1 = -Y_1p * csc_theta;
  1069. auto C2 = -Ypt(n, m) * csc_theta;
  1070. //auto C2 = -(Anm * Y_2p - Bnm * Y_0p) * csc_theta;
  1071. SetVecSH(Vr_p, Vt_p, Vp_p, Wr_p, Wt_p, Wp_p, Xr_p, Xt_p, Xp_p, C0, C1, C2);
  1072. Vr_p += (-sin_theta * Vp ) * csc_theta / R[i];
  1073. Vt_p += (-cos_theta * Vp ) * csc_theta / R[i];
  1074. Vp_p += ( sin_theta * Vr + cos_theta * Vt) * csc_theta / R[i];
  1075. Wr_p += (-sin_theta * Wp ) * csc_theta / R[i];
  1076. Wt_p += (-cos_theta * Wp ) * csc_theta / R[i];
  1077. Wp_p += ( sin_theta * Wr + cos_theta * Wt) * csc_theta / R[i];
  1078. Xr_p += (-sin_theta * Xp ) * csc_theta / R[i];
  1079. Xt_p += (-cos_theta * Xp ) * csc_theta / R[i];
  1080. Xp_p += ( sin_theta * Xr + cos_theta * Xt) * csc_theta / R[i];
  1081. if (fabs(cos_theta) == 1 && m == 1) {
  1082. Vt_p = 0;
  1083. Vp_p = 0;
  1084. Wt_p = 0;
  1085. Wp_p = 0;
  1086. Xt_p = 0;
  1087. Xp_p = 0;
  1088. }
  1089. }
  1090. Ynm = Y_1;
  1091. }
  1092. if (fabs(cos_theta) == 1) {
  1093. if (m!=0) Vr = 0;
  1094. if (m!=1) Vt = 0;
  1095. if (m!=1) Vp = 0;
  1096. if (m!=0) Wr = 0;
  1097. if (m!=1) Wt = 0;
  1098. if (m!=1) Wp = 0;
  1099. Xr = 0;
  1100. if (m!=1) Xt = 0;
  1101. if (m!=1) Xp = 0;
  1102. if (m!=1 ) Vr_t = 0;
  1103. if (m!=0 && m!=2) Vt_t = 0;
  1104. if (m!=2 ) Vp_t = 0;
  1105. if (m!=1 ) Wr_t = 0;
  1106. if (m!=0 && m!=2) Wt_t = 0;
  1107. if (m!=2 ) Wp_t = 0;
  1108. if (m!=1 ) Xr_t = 0;
  1109. if (m!=2 ) Xt_t = 0;
  1110. if (m!=0 && m!=2) Xp_t = 0;
  1111. if (m!=1 ) Vr_p = 0;
  1112. if (m!=2 ) Vt_p = 0;
  1113. if (m!=0 && m!=2) Vp_p = 0;
  1114. if (m!=1 ) Wr_p = 0;
  1115. if (m!=2 ) Wt_p = 0;
  1116. if (m!=0 && m!=2) Wp_p = 0;
  1117. if (m!=1 ) Xr_p = 0;
  1118. if (m!=0 && m!=2) Xt_p = 0;
  1119. if (m!=2 ) Xp_p = 0;
  1120. }
  1121. Complex<Real> PV, PW, PX;
  1122. Complex<Real> SV[COORD_DIM][COORD_DIM];
  1123. Complex<Real> SW[COORD_DIM][COORD_DIM];
  1124. Complex<Real> SX[COORD_DIM][COORD_DIM];
  1125. if (interior) {
  1126. PV = (n+1) * pow<Real>(R[i],n) * Ynm;
  1127. PW = 0;
  1128. PX = 0;
  1129. Real a, b;
  1130. Real a_r, b_r;
  1131. a = n / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], n+1);
  1132. b = -(n+1) / (Real)(4*n+2) * (pow<Real>(R[i], n-1) - pow<Real>(R[i], n+1));
  1133. a_r = n / (Real)((2*n+1) * (2*n+3)) * (n+1) * pow<Real>(R[i], n);
  1134. b_r = -(n+1) / (Real)(4*n+2) * ((n-1) * pow<Real>(R[i], n-2) - (n+1) * pow<Real>(R[i], n));
  1135. SV[0][0] = a_r * Vr + b_r * Wr;
  1136. SV[1][0] = a_r * Vt + b_r * Wt;
  1137. SV[2][0] = a_r * Vp + b_r * Wp;
  1138. SV[0][1] = a * Vr_t + b * Wr_t;
  1139. SV[1][1] = a * Vt_t + b * Wt_t;
  1140. SV[2][1] = a * Vp_t + b * Wp_t;
  1141. SV[0][2] = a * Vr_p + b * Wr_p;
  1142. SV[1][2] = a * Vt_p + b * Wt_p;
  1143. SV[2][2] = a * Vp_p + b * Wp_p;
  1144. a = (n+1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], n-1);
  1145. a_r = (n+1) / (Real)((2*n+1) * (2*n-1)) * (n-1) * pow<Real>(R[i], n-2);
  1146. SW[0][0] = a_r * Wr;
  1147. SW[1][0] = a_r * Wt;
  1148. SW[2][0] = a_r * Wp;
  1149. SW[0][1] = a * Wr_t;
  1150. SW[1][1] = a * Wt_t;
  1151. SW[2][1] = a * Wp_t;
  1152. SW[0][2] = a * Wr_p;
  1153. SW[1][2] = a * Wt_p;
  1154. SW[2][2] = a * Wp_p;
  1155. a = 1 / (Real)(2*n+1) * pow<Real>(R[i], n);
  1156. a_r = 1 / (Real)(2*n+1) * (n) * pow<Real>(R[i], n-1);
  1157. SX[0][0] = a_r * Xr;
  1158. SX[1][0] = a_r * Xt;
  1159. SX[2][0] = a_r * Xp;
  1160. SX[0][1] = a * Xr_t;
  1161. SX[1][1] = a * Xt_t;
  1162. SX[2][1] = a * Xp_t;
  1163. SX[0][2] = a * Xr_p;
  1164. SX[1][2] = a * Xt_p;
  1165. SX[2][2] = a * Xp_p;
  1166. } else {
  1167. PV = 0;
  1168. PW = n * pow<Real>(R[i],-n-1) * Ynm;
  1169. PX = 0;
  1170. Real a,b;
  1171. Real a_r, b_r;
  1172. a = n / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], -n-2);
  1173. a_r = n / (Real)((2*n+1) * (2*n+3)) * (-n-2) * pow<Real>(R[i], -n-3);
  1174. SV[0][0] = a_r * Vr;
  1175. SV[1][0] = a_r * Vt;
  1176. SV[2][0] = a_r * Vp;
  1177. SV[0][1] = a * Vr_t;
  1178. SV[1][1] = a * Vt_t;
  1179. SV[2][1] = a * Vp_t;
  1180. SV[0][2] = a * Vr_p;
  1181. SV[1][2] = a * Vt_p;
  1182. SV[2][2] = a * Vp_p;
  1183. a = (n+1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], -n);
  1184. b = n / (Real)(4*n+2) * (pow<Real>(R[i], -n-2) - pow<Real>(R[i], -n));
  1185. a_r = (n+1) / (Real)((2*n+1) * (2*n-1)) * (-n) * pow<Real>(R[i], -n-1);
  1186. b_r = n / (Real)(4*n+2) * ((-n-2)*pow<Real>(R[i], -n-3) - (-n)*pow<Real>(R[i], -n-1));
  1187. SW[0][0] = a_r * Wr + b_r * Vr;
  1188. SW[1][0] = a_r * Wt + b_r * Vt;
  1189. SW[2][0] = a_r * Wp + b_r * Vp;
  1190. SW[0][1] = a * Wr_t + b * Vr_t;
  1191. SW[1][1] = a * Wt_t + b * Vt_t;
  1192. SW[2][1] = a * Wp_t + b * Vp_t;
  1193. SW[0][2] = a * Wr_p + b * Vr_p;
  1194. SW[1][2] = a * Wt_p + b * Vt_p;
  1195. SW[2][2] = a * Wp_p + b * Vp_p;
  1196. a = 1 / (Real)(2*n+1) * pow<Real>(R[i], -n-1);
  1197. a_r = 1 / (Real)(2*n+1) * (-n-1) * pow<Real>(R[i], -n-2);
  1198. SX[0][0] = a_r * Xr;
  1199. SX[1][0] = a_r * Xt;
  1200. SX[2][0] = a_r * Xp;
  1201. SX[0][1] = a * Xr_t;
  1202. SX[1][1] = a * Xt_t;
  1203. SX[2][1] = a * Xp_t;
  1204. SX[0][2] = a * Xr_p;
  1205. SX[1][2] = a * Xt_p;
  1206. SX[2][2] = a * Xp_p;
  1207. }
  1208. Complex<Real> KV[COORD_DIM][COORD_DIM], KW[COORD_DIM][COORD_DIM], KX[COORD_DIM][COORD_DIM];
  1209. KV[0][0] = SV[0][0] + SV[0][0] - PV; KV[0][1] = SV[0][1] + SV[1][0] ; KV[0][2] = SV[0][2] + SV[2][0] ;
  1210. KV[1][0] = SV[1][0] + SV[0][1] ; KV[1][1] = SV[1][1] + SV[1][1] - PV; KV[1][2] = SV[1][2] + SV[2][1] ;
  1211. KV[2][0] = SV[2][0] + SV[0][2] ; KV[2][1] = SV[2][1] + SV[1][2] ; KV[2][2] = SV[2][2] + SV[2][2] - PV;
  1212. KW[0][0] = SW[0][0] + SW[0][0] - PW; KW[0][1] = SW[0][1] + SW[1][0] ; KW[0][2] = SW[0][2] + SW[2][0] ;
  1213. KW[1][0] = SW[1][0] + SW[0][1] ; KW[1][1] = SW[1][1] + SW[1][1] - PW; KW[1][2] = SW[1][2] + SW[2][1] ;
  1214. KW[2][0] = SW[2][0] + SW[0][2] ; KW[2][1] = SW[2][1] + SW[1][2] ; KW[2][2] = SW[2][2] + SW[2][2] - PW;
  1215. KX[0][0] = SX[0][0] + SX[0][0] - PX; KX[0][1] = SX[0][1] + SX[1][0] ; KX[0][2] = SX[0][2] + SX[2][0] ;
  1216. KX[1][0] = SX[1][0] + SX[0][1] ; KX[1][1] = SX[1][1] + SX[1][1] - PX; KX[1][2] = SX[1][2] + SX[2][1] ;
  1217. KX[2][0] = SX[2][0] + SX[0][2] ; KX[2][1] = SX[2][1] + SX[1][2] ; KX[2][2] = SX[2][2] + SX[2][2] - PX;
  1218. write_coeff(KV[0][0]*norm0[0] + KV[0][1]*norm0[1] + KV[0][2]*norm0[2], n, m, 0, 0);
  1219. write_coeff(KV[1][0]*norm0[0] + KV[1][1]*norm0[1] + KV[1][2]*norm0[2], n, m, 0, 1);
  1220. write_coeff(KV[2][0]*norm0[0] + KV[2][1]*norm0[1] + KV[2][2]*norm0[2], n, m, 0, 2);
  1221. write_coeff(KW[0][0]*norm0[0] + KW[0][1]*norm0[1] + KW[0][2]*norm0[2], n, m, 1, 0);
  1222. write_coeff(KW[1][0]*norm0[0] + KW[1][1]*norm0[1] + KW[1][2]*norm0[2], n, m, 1, 1);
  1223. write_coeff(KW[2][0]*norm0[0] + KW[2][1]*norm0[1] + KW[2][2]*norm0[2], n, m, 1, 2);
  1224. write_coeff(KX[0][0]*norm0[0] + KX[0][1]*norm0[1] + KX[0][2]*norm0[2], n, m, 2, 0);
  1225. write_coeff(KX[1][0]*norm0[0] + KX[1][1]*norm0[1] + KX[1][2]*norm0[2], n, m, 2, 1);
  1226. write_coeff(KX[2][0]*norm0[0] + KX[2][1]*norm0[1] + KX[2][2]*norm0[2], n, m, 2, 2);
  1227. }
  1228. }
  1229. }
  1230. { // Set X <-- Q * StokesOp * B1
  1231. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  1232. for (Long k0 = 0; k0 < N; k0++) {
  1233. StaticArray<Real,9> Q;
  1234. { // Set Q
  1235. Real cos_theta = cos(theta_phi[k0 * 2 + 0]);
  1236. Real sin_theta = sin(theta_phi[k0 * 2 + 0]);
  1237. Real cos_phi = cos(theta_phi[k0 * 2 + 1]);
  1238. Real sin_phi = sin(theta_phi[k0 * 2 + 1]);
  1239. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  1240. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  1241. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  1242. }
  1243. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  1244. StaticArray<Real,COORD_DIM> in;
  1245. for (Long j = 0; j < COORD_DIM; j++) {
  1246. in[j] = 0;
  1247. for (Long i = 0; i < COORD_DIM * M; i++) {
  1248. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  1249. }
  1250. }
  1251. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  1252. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  1253. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  1254. }
  1255. }
  1256. }
  1257. }
  1258. template <class Real> void SphericalHarmonics<Real>::Grid2SHC_(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& B1){
  1259. const auto& Mf = OpFourierInv(Np);
  1260. assert(Mf.Dim(0) == Np);
  1261. const std::vector<Matrix<Real>>& Ml = SphericalHarmonics<Real>::MatLegendreInv(Nt-1,p1);
  1262. assert((Long)Ml.size() == p1+1);
  1263. Long N = X.Dim() / (Np*Nt);
  1264. assert(X.Dim() == N*Np*Nt);
  1265. Vector<Real> B0((2*p1+1) * N*Nt);
  1266. #pragma omp parallel
  1267. { // B0 <-- Transpose(FFT(X))
  1268. Integer tid=omp_get_thread_num();
  1269. Integer omp_p=omp_get_num_threads();
  1270. Long a=(tid+0)*N*Nt/omp_p;
  1271. Long b=(tid+1)*N*Nt/omp_p;
  1272. Vector<Real> buff(Mf.Dim(1));
  1273. Long fft_coeff_len = std::min(buff.Dim(), 2*p1+2);
  1274. Matrix<Real> B0_(2*p1+1, N*Nt, B0.begin(), false);
  1275. const Matrix<Real> MX(N * Nt, Np, (Iterator<Real>)X.begin(), false);
  1276. for (Long i = a; i < b; i++) {
  1277. { // buff <-- FFT(Xi)
  1278. const Vector<Real> Xi(Np, (Iterator<Real>)X.begin() + Np * i, false);
  1279. Mf.Execute(Xi, buff);
  1280. }
  1281. { // B0 <-- Transpose(buff)
  1282. B0_[0][i] = buff[0]; // skipping buff[1] == 0
  1283. for (Long j = 2; j < fft_coeff_len; j++) B0_[j-1][i] = buff[j];
  1284. for (Long j = fft_coeff_len; j < 2*p1+2; j++) B0_[j-1][i] = 0;
  1285. }
  1286. }
  1287. }
  1288. if (B1.Dim() != N*(p1+1)*(p1+1)) B1.ReInit(N*(p1+1)*(p1+1));
  1289. #pragma omp parallel
  1290. { // Evaluate Legendre polynomial
  1291. Integer tid=omp_get_thread_num();
  1292. Integer omp_p=omp_get_num_threads();
  1293. Long offset0=0;
  1294. Long offset1=0;
  1295. for (Long i = 0; i < p1+1; i++) {
  1296. Long N_ = (i==0 ? N : 2*N);
  1297. Matrix<Real> Min (N_, Nt , B0.begin()+offset0, false);
  1298. Matrix<Real> Mout(N_, p1+1-i, B1.begin()+offset1, false);
  1299. { // Mout = Min * Ml[i] // split between threads
  1300. Long a=(tid+0)*N_/omp_p;
  1301. Long b=(tid+1)*N_/omp_p;
  1302. if (a < b) {
  1303. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  1304. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1305. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  1306. }
  1307. }
  1308. offset0+=Min .Dim(0)*Min .Dim(1);
  1309. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1310. }
  1311. assert(offset0 == B0.Dim());
  1312. assert(offset1 == B1.Dim());
  1313. }
  1314. B1 *= 1 / sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  1315. }
  1316. template <class Real> void SphericalHarmonics<Real>::SHCArrange0(const Vector<Real>& B1, Long p1, Vector<Real>& S, SHCArrange arrange){
  1317. Long M = (p1+1)*(p1+1);
  1318. Long N = B1.Dim() / M;
  1319. assert(B1.Dim() == N*M);
  1320. if (arrange == SHCArrange::ALL) { // S <-- Rearrange(B1)
  1321. Long M = 2*(p1+1)*(p1+1);
  1322. if(S.Dim() != N * M) S.ReInit(N * M);
  1323. #pragma omp parallel
  1324. { // S <-- Rearrange(B1)
  1325. Integer tid=omp_get_thread_num();
  1326. Integer omp_p=omp_get_num_threads();
  1327. Long a=(tid+0)*N/omp_p;
  1328. Long b=(tid+1)*N/omp_p;
  1329. for (Long i = a; i < b; i++) {
  1330. Long offset = 0;
  1331. for (Long j = 0; j < p1+1; j++) {
  1332. Long len = p1+1 - j;
  1333. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  1334. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1335. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 0;
  1336. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  1337. offset += len;
  1338. }
  1339. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  1340. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1341. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  1342. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  1343. offset += len;
  1344. } else {
  1345. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  1346. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = 0;
  1347. }
  1348. }
  1349. }
  1350. }
  1351. }
  1352. if (arrange == SHCArrange::ROW_MAJOR) { // S <-- Rearrange(B1)
  1353. Long M = (p1+1)*(p1+2);
  1354. if(S.Dim() != N * M) S.ReInit(N * M);
  1355. #pragma omp parallel
  1356. { // S <-- Rearrange(B1)
  1357. Integer tid=omp_get_thread_num();
  1358. Integer omp_p=omp_get_num_threads();
  1359. Long a=(tid+0)*N/omp_p;
  1360. Long b=(tid+1)*N/omp_p;
  1361. for (Long i = a; i < b; i++) {
  1362. Long offset = 0;
  1363. for (Long j = 0; j < p1+1; j++) {
  1364. Long len = p1+1 - j;
  1365. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  1366. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1367. Iterator<Real> S_ = S .begin() + i*M + 0;
  1368. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  1369. offset += len;
  1370. }
  1371. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  1372. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1373. Iterator<Real> S_ = S .begin() + i*M + 1;
  1374. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  1375. offset += len;
  1376. } else {
  1377. Iterator<Real> S_ = S .begin() + i*M + 1;
  1378. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = 0;
  1379. }
  1380. }
  1381. }
  1382. }
  1383. }
  1384. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // S <-- Rearrange(B1)
  1385. Long M = (p1+1)*(p1+1);
  1386. if(S.Dim() != N * M) S.ReInit(N * M);
  1387. #pragma omp parallel
  1388. { // S <-- Rearrange(B1)
  1389. Integer tid=omp_get_thread_num();
  1390. Integer omp_p=omp_get_num_threads();
  1391. Long a=(tid+0)*N/omp_p;
  1392. Long b=(tid+1)*N/omp_p;
  1393. for (Long i = a; i < b; i++) {
  1394. Long offset = 0;
  1395. for (Long j = 0; j < p1+1; j++) {
  1396. Long len = p1+1 - j;
  1397. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  1398. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1399. Iterator<Real> S_ = S .begin() + i*M + offset;
  1400. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  1401. offset += len;
  1402. }
  1403. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  1404. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1405. Iterator<Real> S_ = S .begin() + i*M + offset;
  1406. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  1407. offset += len;
  1408. }
  1409. }
  1410. }
  1411. }
  1412. }
  1413. }
  1414. template <class Real> void SphericalHarmonics<Real>::SHCArrange1(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& B0){
  1415. Long M, N;
  1416. { // Set M, N
  1417. M = 0;
  1418. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  1419. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  1420. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  1421. if (M == 0) return;
  1422. N = S.Dim() / M;
  1423. assert(S.Dim() == N * M);
  1424. }
  1425. if (B0.Dim() != N*(p0+1)*(p0+1)) B0.ReInit(N*(p0+1)*(p0+1));
  1426. if (arrange == SHCArrange::ALL) { // B0 <-- Rearrange(S)
  1427. #pragma omp parallel
  1428. { // B0 <-- Rearrange(S)
  1429. Integer tid=omp_get_thread_num();
  1430. Integer omp_p=omp_get_num_threads();
  1431. Long a=(tid+0)*N/omp_p;
  1432. Long b=(tid+1)*N/omp_p;
  1433. for (Long i = a; i < b; i++) {
  1434. Long offset = 0;
  1435. for (Long j = 0; j < p0+1; j++) {
  1436. Long len = p0+1 - j;
  1437. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1438. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1439. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 0;
  1440. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  1441. offset += len;
  1442. }
  1443. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1444. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1445. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 1;
  1446. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  1447. offset += len;
  1448. }
  1449. }
  1450. }
  1451. }
  1452. }
  1453. if (arrange == SHCArrange::ROW_MAJOR) { // B0 <-- Rearrange(S)
  1454. #pragma omp parallel
  1455. { // B0 <-- Rearrange(S)
  1456. Integer tid=omp_get_thread_num();
  1457. Integer omp_p=omp_get_num_threads();
  1458. Long a=(tid+0)*N/omp_p;
  1459. Long b=(tid+1)*N/omp_p;
  1460. for (Long i = a; i < b; i++) {
  1461. Long offset = 0;
  1462. for (Long j = 0; j < p0+1; j++) {
  1463. Long len = p0+1 - j;
  1464. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1465. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1466. ConstIterator<Real> S_ = S .begin() + i*M + 0;
  1467. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  1468. offset += len;
  1469. }
  1470. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1471. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1472. ConstIterator<Real> S_ = S .begin() + i*M + 1;
  1473. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  1474. offset += len;
  1475. }
  1476. }
  1477. }
  1478. }
  1479. }
  1480. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // B0 <-- Rearrange(S)
  1481. #pragma omp parallel
  1482. { // B0 <-- Rearrange(S)
  1483. Integer tid=omp_get_thread_num();
  1484. Integer omp_p=omp_get_num_threads();
  1485. Long a=(tid+0)*N/omp_p;
  1486. Long b=(tid+1)*N/omp_p;
  1487. for (Long i = a; i < b; i++) {
  1488. Long offset = 0;
  1489. for (Long j = 0; j < p0+1; j++) {
  1490. Long len = p0+1 - j;
  1491. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1492. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1493. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  1494. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  1495. offset += len;
  1496. }
  1497. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1498. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1499. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  1500. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  1501. offset += len;
  1502. }
  1503. }
  1504. }
  1505. }
  1506. }
  1507. }
  1508. template <class Real> void SphericalHarmonics<Real>::SHC2Grid_(const Vector<Real>& B0, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_phi, Vector<Real>* X_theta){
  1509. const auto& Mf = OpFourier(Np);
  1510. assert(Mf.Dim(1) == Np);
  1511. const std::vector<Matrix<Real>>& Ml =SphericalHarmonics<Real>::MatLegendre (p0,Nt-1);
  1512. const std::vector<Matrix<Real>>& Mdl=SphericalHarmonics<Real>::MatLegendreGrad(p0,Nt-1);
  1513. assert((Long)Ml .size() == p0+1);
  1514. assert((Long)Mdl.size() == p0+1);
  1515. Long N = B0.Dim() / ((p0+1)*(p0+1));
  1516. assert(B0.Dim() == N*(p0+1)*(p0+1));
  1517. if(X && X ->Dim()!=N*Np*Nt) X ->ReInit(N*Np*Nt);
  1518. if(X_theta && X_theta->Dim()!=N*Np*Nt) X_theta->ReInit(N*Np*Nt);
  1519. if(X_phi && X_phi ->Dim()!=N*Np*Nt) X_phi ->ReInit(N*Np*Nt);
  1520. Vector<Real> B1(N*(2*p0+1)*Nt);
  1521. if(X || X_phi){
  1522. #pragma omp parallel
  1523. { // Evaluate Legendre polynomial
  1524. Integer tid=omp_get_thread_num();
  1525. Integer omp_p=omp_get_num_threads();
  1526. Long offset0=0;
  1527. Long offset1=0;
  1528. for(Long i=0;i<p0+1;i++){
  1529. Long N_ = (i==0 ? N : 2*N);
  1530. const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
  1531. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  1532. { // Mout = Min * Ml[i] // split between threads
  1533. Long a=(tid+0)*N_/omp_p;
  1534. Long b=(tid+1)*N_/omp_p;
  1535. if(a<b){
  1536. const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
  1537. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1538. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  1539. }
  1540. }
  1541. offset0+=Min .Dim(0)*Min .Dim(1);
  1542. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1543. }
  1544. }
  1545. B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  1546. #pragma omp parallel
  1547. { // Transpose and evaluate Fourier
  1548. Integer tid=omp_get_thread_num();
  1549. Integer omp_p=omp_get_num_threads();
  1550. Long a=(tid+0)*N*Nt/omp_p;
  1551. Long b=(tid+1)*N*Nt/omp_p;
  1552. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  1553. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  1554. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  1555. for (Long i = a; i < b; i++) {
  1556. { // buff <-- Transpose(B1)
  1557. buff[0] = B1_[0][i];
  1558. buff[1] = 0;
  1559. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1560. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1561. }
  1562. { // X <-- FFT(buff)
  1563. Vector<Real> Xi(Np, X->begin() + Np * i, false);
  1564. Mf.Execute(buff, Xi);
  1565. }
  1566. if(X_phi){ // Evaluate Fourier gradient
  1567. { // buff <-- Transpose(B1)
  1568. buff[0] = 0;
  1569. buff[1] = 0;
  1570. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1571. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1572. for (Long j = 1; j < buff.Dim()/2; j++) {
  1573. Real x = buff[2*j+0];
  1574. Real y = buff[2*j+1];
  1575. buff[2*j+0] = -j*y;
  1576. buff[2*j+1] = j*x;
  1577. }
  1578. }
  1579. { // X_phi <-- FFT(buff)
  1580. Vector<Real> Xi(Np, X_phi->begin() + Np * i, false);
  1581. Mf.Execute(buff, Xi);
  1582. }
  1583. }
  1584. }
  1585. }
  1586. }
  1587. if(X_theta){
  1588. #pragma omp parallel
  1589. { // Evaluate Legendre gradient
  1590. Integer tid=omp_get_thread_num();
  1591. Integer omp_p=omp_get_num_threads();
  1592. Long offset0=0;
  1593. Long offset1=0;
  1594. for(Long i=0;i<p0+1;i++){
  1595. Long N_ = (i==0 ? N : 2*N);
  1596. const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
  1597. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  1598. { // Mout = Min * Mdl[i] // split between threads
  1599. Long a=(tid+0)*N_/omp_p;
  1600. Long b=(tid+1)*N_/omp_p;
  1601. if(a<b){
  1602. const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
  1603. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1604. Matrix<Real>::GEMM(Mout_,Min_,Mdl[i]);
  1605. }
  1606. }
  1607. offset0+=Min .Dim(0)*Min .Dim(1);
  1608. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1609. }
  1610. }
  1611. B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  1612. #pragma omp parallel
  1613. { // Transpose and evaluate Fourier
  1614. Integer tid=omp_get_thread_num();
  1615. Integer omp_p=omp_get_num_threads();
  1616. Long a=(tid+0)*N*Nt/omp_p;
  1617. Long b=(tid+1)*N*Nt/omp_p;
  1618. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  1619. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  1620. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  1621. for (Long i = a; i < b; i++) {
  1622. { // buff <-- Transpose(B1)
  1623. buff[0] = B1_[0][i];
  1624. buff[1] = 0;
  1625. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1626. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1627. }
  1628. { // Xi <-- FFT(buff)
  1629. Vector<Real> Xi(Np, X_theta->begin() + Np * i, false);
  1630. Mf.Execute(buff, Xi);
  1631. }
  1632. }
  1633. }
  1634. }
  1635. }
  1636. template <class Real> void SphericalHarmonics<Real>::LegPoly(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  1637. Vector<Real> theta(X.Dim());
  1638. for (Long i = 0; i < X.Dim(); i++) theta[i] = acos(X[i]);
  1639. LegPoly_(poly_val, theta, degree);
  1640. }
  1641. template <class Real> void SphericalHarmonics<Real>::LegPoly_(Vector<Real>& poly_val, const Vector<Real>& theta, Long degree){
  1642. Long N = theta.Dim();
  1643. Long Npoly = (degree + 1) * (degree + 2) / 2;
  1644. if (poly_val.Dim() != Npoly * N) poly_val.ReInit(Npoly * N);
  1645. Real fact = 1 / sqrt<Real>(4 * const_pi<Real>());
  1646. Vector<Real> cos_theta(N), sin_theta(N);
  1647. for (Long n = 0; n < N; n++) {
  1648. cos_theta[n] = cos(theta[n]);
  1649. sin_theta[n] = sin(theta[n]);
  1650. poly_val[n] = fact;
  1651. }
  1652. Long idx = 0;
  1653. Long idx_nxt = 0;
  1654. for (Long i = 1; i <= degree; i++) {
  1655. idx_nxt += N*(degree-i+2);
  1656. Real c = sqrt<Real>((2*i+1)/(Real)(2*i));
  1657. for (Long n = 0; n < N; n++) {
  1658. poly_val[idx_nxt+n] = -poly_val[idx+n] * sin_theta[n] * c;
  1659. }
  1660. idx = idx_nxt;
  1661. }
  1662. idx = 0;
  1663. for (Long m = 0; m < degree; m++) {
  1664. for (Long n = 0; n < N; n++) {
  1665. Real pmm = 0;
  1666. Real pmmp1 = poly_val[idx+n];
  1667. for (Long ll = m + 1; ll <= degree; ll++) {
  1668. Real a = sqrt<Real>(((2*ll-1)*(2*ll+1) ) / (Real)((ll-m)*(ll+m) ));
  1669. Real b = sqrt<Real>(((2*ll+1)*(ll+m-1)*(ll-m-1)) / (Real)((ll-m)*(ll+m)*(2*ll-3)));
  1670. Real pll = cos_theta[n]*a*pmmp1 - b*pmm;
  1671. pmm = pmmp1;
  1672. pmmp1 = pll;
  1673. poly_val[idx + N*(ll-m) + n] = pll;
  1674. }
  1675. }
  1676. idx += N * (degree - m + 1);
  1677. }
  1678. }
  1679. template <class Real> void SphericalHarmonics<Real>::LegPolyDeriv(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  1680. Vector<Real> theta(X.Dim());
  1681. for (Long i = 0; i < X.Dim(); i++) theta[i] = acos(X[i]);
  1682. LegPolyDeriv_(poly_val, theta, degree);
  1683. }
  1684. template <class Real> void SphericalHarmonics<Real>::LegPolyDeriv_(Vector<Real>& poly_val, const Vector<Real>& theta, Long degree){
  1685. Long N = theta.Dim();
  1686. Long Npoly = (degree + 1) * (degree + 2) / 2;
  1687. if (poly_val.Dim() != N * Npoly) poly_val.ReInit(N * Npoly);
  1688. Vector<Real> cos_theta(N), sin_theta(N);
  1689. for (Long i = 0; i < N; i++) {
  1690. cos_theta[i] = cos(theta[i]);
  1691. sin_theta[i] = sin(theta[i]);
  1692. }
  1693. Vector<Real> leg_poly(Npoly * N);
  1694. LegPoly_(leg_poly, theta, degree);
  1695. for (Long m = 0; m <= degree; m++) {
  1696. for (Long n = m; n <= degree; n++) {
  1697. ConstIterator<Real> Pn = leg_poly.begin() + N * ((degree * 2 - m + 1) * (m + 0) / 2 + n);
  1698. ConstIterator<Real> Pn_ = leg_poly.begin() + N * ((degree * 2 - m + 0) * (m + 1) / 2 + n) * (m < n);
  1699. Iterator <Real> Hn = poly_val.begin() + N * ((degree * 2 - m + 1) * (m + 0) / 2 + n);
  1700. Real c2 = sqrt<Real>(m<n ? (n+m+1)*(n-m) : 0);
  1701. for (Long i = 0; i < N; i++) {
  1702. Real c1 = (sin_theta[i]>0 ? m/sin_theta[i] : 0);
  1703. Hn[i] = c1*cos_theta[i]*Pn[i] + c2*Pn_[i];
  1704. }
  1705. }
  1706. }
  1707. }
  1708. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreNodes(Long p){
  1709. assert(p<SCTL_SHMAXDEG);
  1710. Vector<Real>& Qx=MatrixStore().Qx_[p];
  1711. #pragma omp critical (SCTL_LEGNODES)
  1712. if(!Qx.Dim()){
  1713. Vector<double> qx1(p+1);
  1714. Vector<double> qw1(p+1);
  1715. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  1716. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  1717. if (Qx.Dim() != p+1) Qx.ReInit(p+1);
  1718. for (Long i = 0; i < p + 1; i++) Qx[i] = -qx1[i];
  1719. }
  1720. return Qx;
  1721. }
  1722. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreWeights(Long p){
  1723. assert(p<SCTL_SHMAXDEG);
  1724. Vector<Real>& Qw=MatrixStore().Qw_[p];
  1725. #pragma omp critical (SCTL_LEGWEIGHTS)
  1726. if(!Qw.Dim()){
  1727. Vector<double> qx1(p+1);
  1728. Vector<double> qw1(p+1);
  1729. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  1730. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  1731. if (Qw.Dim() != p+1) Qw.ReInit(p+1);
  1732. for (Long i = 0; i < p + 1; i++) Qw[i] = qw1[i];
  1733. }
  1734. return Qw;
  1735. }
  1736. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::SingularWeights(Long p1){
  1737. assert(p1<SCTL_SHMAXDEG);
  1738. Vector<Real>& Sw=MatrixStore().Sw_[p1];
  1739. #pragma omp critical (SCTL_SINWEIGHTS)
  1740. if(!Sw.Dim()){
  1741. const Vector<Real>& qx1 = LegendreNodes(p1);
  1742. const Vector<Real>& qw1 = LegendreWeights(p1);
  1743. std::vector<Real> Yf(p1+1,0);
  1744. { // Set Yf
  1745. Vector<Real> x0(1); x0=1.0;
  1746. Vector<Real> alp0((p1+1)*(p1+2)/2);
  1747. LegPoly(alp0, x0, p1);
  1748. Vector<Real> alp((p1+1) * (p1+1)*(p1+2)/2);
  1749. LegPoly(alp, qx1, p1);
  1750. for(Long j=0;j<p1+1;j++){
  1751. for(Long i=0;i<p1+1;i++){
  1752. Yf[i]+=4*M_PI/(2*j+1) * alp0[j] * alp[j*(p1+1)+i];
  1753. }
  1754. }
  1755. }
  1756. Sw.ReInit(p1+1);
  1757. for(Long i=0;i<p1+1;i++){
  1758. Sw[i]=(qw1[i]*M_PI/p1)*Yf[i]/cos(acos(qx1[i])/2);
  1759. }
  1760. }
  1761. return Sw;
  1762. }
  1763. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourier(Long p0, Long p1){
  1764. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1765. Matrix<Real>& Mf =MatrixStore().Mf_ [p0*SCTL_SHMAXDEG+p1];
  1766. #pragma omp critical (SCTL_MATFOURIER)
  1767. if(!Mf.Dim(0)){
  1768. const Real SQRT2PI=sqrt(2*M_PI);
  1769. { // Set Mf
  1770. Matrix<Real> M(2*p0,2*p1);
  1771. for(Long j=0;j<2*p1;j++){
  1772. M[0][j]=SQRT2PI*1.0;
  1773. for(Long k=1;k<p0;k++){
  1774. M[2*k-1][j]=SQRT2PI*cos(j*k*M_PI/p1);
  1775. M[2*k-0][j]=SQRT2PI*sin(j*k*M_PI/p1);
  1776. }
  1777. M[2*p0-1][j]=SQRT2PI*cos(j*p0*M_PI/p1);
  1778. }
  1779. Mf=M;
  1780. }
  1781. }
  1782. return Mf;
  1783. }
  1784. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierInv(Long p0, Long p1){
  1785. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1786. Matrix<Real>& Mf =MatrixStore().Mfinv_ [p0*SCTL_SHMAXDEG+p1];
  1787. #pragma omp critical (SCTL_MATFOURIERINV)
  1788. if(!Mf.Dim(0)){
  1789. const Real INVSQRT2PI=1.0/sqrt(2*M_PI)/p0;
  1790. { // Set Mf
  1791. Matrix<Real> M(2*p0,2*p1);
  1792. M.SetZero();
  1793. if(p1>p0) p1=p0;
  1794. for(Long j=0;j<2*p0;j++){
  1795. M[j][0]=INVSQRT2PI*0.5;
  1796. for(Long k=1;k<p1;k++){
  1797. M[j][2*k-1]=INVSQRT2PI*cos(j*k*M_PI/p0);
  1798. M[j][2*k-0]=INVSQRT2PI*sin(j*k*M_PI/p0);
  1799. }
  1800. M[j][2*p1-1]=INVSQRT2PI*cos(j*p1*M_PI/p0);
  1801. }
  1802. if(p1==p0) for(Long j=0;j<2*p0;j++) M[j][2*p1-1]*=0.5;
  1803. Mf=M;
  1804. }
  1805. }
  1806. return Mf;
  1807. }
  1808. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierGrad(Long p0, Long p1){
  1809. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1810. Matrix<Real>& Mdf=MatrixStore().Mdf_[p0*SCTL_SHMAXDEG+p1];
  1811. #pragma omp critical (SCTL_MATFOURIERGRAD)
  1812. if(!Mdf.Dim(0)){
  1813. const Real SQRT2PI=sqrt(2*M_PI);
  1814. { // Set Mdf_
  1815. Matrix<Real> M(2*p0,2*p1);
  1816. for(Long j=0;j<2*p1;j++){
  1817. M[0][j]=SQRT2PI*0.0;
  1818. for(Long k=1;k<p0;k++){
  1819. M[2*k-1][j]=-SQRT2PI*k*sin(j*k*M_PI/p1);
  1820. M[2*k-0][j]= SQRT2PI*k*cos(j*k*M_PI/p1);
  1821. }
  1822. M[2*p0-1][j]=-SQRT2PI*p0*sin(j*p0*M_PI/p1);
  1823. }
  1824. Mdf=M;
  1825. }
  1826. }
  1827. return Mdf;
  1828. }
  1829. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourier(Long Np){
  1830. assert(Np<SCTL_SHMAXDEG);
  1831. auto& Mf =MatrixStore().Mfftinv_ [Np];
  1832. #pragma omp critical (SCTL_FFT_PLAN0)
  1833. if(!Mf.Dim(0)){
  1834. StaticArray<Long,1> fft_dim = {Np};
  1835. Mf.Setup(FFT_Type::C2R, 1, Vector<Long>(1,fft_dim,false));
  1836. }
  1837. return Mf;
  1838. }
  1839. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourierInv(Long Np){
  1840. assert(Np<SCTL_SHMAXDEG);
  1841. auto& Mf =MatrixStore().Mfft_ [Np];
  1842. #pragma omp critical (SCTL_FFT_PLAN1)
  1843. if(!Mf.Dim(0)){
  1844. StaticArray<Long,1> fft_dim = {Np};
  1845. Mf.Setup(FFT_Type::R2C, 1, Vector<Long>(1,fft_dim,false));
  1846. }
  1847. return Mf;
  1848. }
  1849. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendre(Long p0, Long p1){
  1850. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1851. std::vector<Matrix<Real>>& Ml =MatrixStore().Ml_ [p0*SCTL_SHMAXDEG+p1];
  1852. #pragma omp critical (SCTL_MATLEG)
  1853. if(!Ml.size()){
  1854. const Vector<Real>& qx1 = LegendreNodes(p1);
  1855. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  1856. LegPoly(alp, qx1, p0);
  1857. Ml.resize(p0+1);
  1858. auto ptr = alp.begin();
  1859. for(Long i=0;i<=p0;i++){
  1860. Ml[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  1861. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  1862. }
  1863. }
  1864. return Ml;
  1865. }
  1866. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreInv(Long p0, Long p1){
  1867. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1868. std::vector<Matrix<Real>>& Ml =MatrixStore().Mlinv_ [p0*SCTL_SHMAXDEG+p1];
  1869. #pragma omp critical (SCTL_MATLEGINV)
  1870. if(!Ml.size()){
  1871. const Vector<Real>& qx1 = LegendreNodes(p0);
  1872. const Vector<Real>& qw1 = LegendreWeights(p0);
  1873. Vector<Real> alp(qx1.Dim()*(p1+1)*(p1+2)/2);
  1874. LegPoly(alp, qx1, p1);
  1875. Ml.resize(p1+1);
  1876. auto ptr = alp.begin();
  1877. for(Long i=0;i<=p1;i++){
  1878. Ml[i].ReInit(qx1.Dim(), p1+1-i);
  1879. Matrix<Real> M(p1+1-i, qx1.Dim(), ptr, false);
  1880. for(Long j=0;j<p1+1-i;j++){ // Transpose and weights
  1881. for(Long k=0;k<qx1.Dim();k++){
  1882. Ml[i][k][j]=M[j][k]*qw1[k]*2*M_PI;
  1883. }
  1884. }
  1885. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  1886. }
  1887. }
  1888. return Ml;
  1889. }
  1890. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreGrad(Long p0, Long p1){
  1891. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1892. std::vector<Matrix<Real>>& Mdl=MatrixStore().Mdl_[p0*SCTL_SHMAXDEG+p1];
  1893. #pragma omp critical (SCTL_MATLEGGRAD)
  1894. if(!Mdl.size()){
  1895. const Vector<Real>& qx1 = LegendreNodes(p1);
  1896. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  1897. LegPolyDeriv(alp, qx1, p0);
  1898. Mdl.resize(p0+1);
  1899. auto ptr = alp.begin();
  1900. for(Long i=0;i<=p0;i++){
  1901. Mdl[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  1902. ptr+=Mdl[i].Dim(0)*Mdl[i].Dim(1);
  1903. }
  1904. }
  1905. return Mdl;
  1906. }
  1907. template <class Real> void SphericalHarmonics<Real>::SHBasisEval(Long p0, const Vector<Real>& theta_phi, Matrix<Real>& SHBasis) {
  1908. Long M = (p0+1) * (p0+1);
  1909. Long N = theta_phi.Dim() / 2;
  1910. assert(theta_phi.Dim() == N * 2);
  1911. Vector<Complex<Real>> exp_phi(N);
  1912. Matrix<Real> LegP((p0+1)*(p0+2)/2, N);
  1913. { // Set exp_phi, LegP
  1914. Vector<Real> theta(N);
  1915. for (Long i = 0; i < N; i++) { // Set theta, exp_phi
  1916. theta[i] = theta_phi[i*2+0];
  1917. exp_phi[i].real = cos<Real>(theta_phi[i*2+1]);
  1918. exp_phi[i].imag = sin<Real>(theta_phi[i*2+1]);
  1919. }
  1920. Vector<Real> alp(LegP.Dim(0) * LegP.Dim(1), LegP.begin(), false);
  1921. LegPoly_(alp, theta, p0);
  1922. }
  1923. { // Set SHBasis
  1924. SHBasis.ReInit(N, M);
  1925. Real s = 4 * sqrt<Real>(const_pi<Real>());
  1926. for (Long k0 = 0; k0 < N; k0++) {
  1927. Complex<Real> exp_phi_ = 1;
  1928. Complex<Real> exp_phi1 = exp_phi[k0];
  1929. for (Long m = 0; m <= p0; m++) {
  1930. for (Long n = m; n <= p0; n++) {
  1931. Long poly_idx = (2 * p0 - m + 1) * m / 2 + n;
  1932. Long basis_idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  1933. SHBasis[k0][basis_idx] = LegP[poly_idx][k0] * exp_phi_.real * s;
  1934. if (m) { // imaginary part
  1935. basis_idx += (p0+1-m);
  1936. SHBasis[k0][basis_idx] = -LegP[poly_idx][k0] * exp_phi_.imag * s;
  1937. } else {
  1938. SHBasis[k0][basis_idx] = SHBasis[k0][basis_idx] * 0.5;
  1939. }
  1940. }
  1941. exp_phi_ = exp_phi_ * exp_phi1;
  1942. }
  1943. }
  1944. }
  1945. assert(SHBasis.Dim(0) == N);
  1946. assert(SHBasis.Dim(1) == M);
  1947. }
  1948. template <class Real> void SphericalHarmonics<Real>::VecSHBasisEval(Long p0, const Vector<Real>& theta_phi, Matrix<Real>& SHBasis) {
  1949. Long M = (p0+1) * (p0+1);
  1950. Long N = theta_phi.Dim() / 2;
  1951. assert(theta_phi.Dim() == N * 2);
  1952. Long p_ = p0 + 1;
  1953. Long M_ = (p_+1) * (p_+1);
  1954. Matrix<Real> Ynm(N, M_);
  1955. SHBasisEval(p_, theta_phi, Ynm);
  1956. Vector<Real> cos_theta(N), csc_theta(N);
  1957. for (Long i = 0; i < N; i++) { // Set theta
  1958. cos_theta[i] = cos(theta_phi[i*2+0]);
  1959. csc_theta[i] = 1.0 / sin(theta_phi[i*2+0]);
  1960. }
  1961. { // Set SHBasis
  1962. SHBasis.ReInit(N * COORD_DIM, COORD_DIM * M);
  1963. SHBasis = 0;
  1964. const Complex<Real> imag(0,1);
  1965. for (Long i = 0; i < N; i++) {
  1966. auto Y = [p_, &Ynm, i](Long n, Long m) {
  1967. Complex<Real> c;
  1968. if (0 <= m && m <= n && n <= p_) {
  1969. Long idx = (2 * p_ - m + 2) * m - (m ? p_+1 : 0) + n;
  1970. c.real = Ynm[i][idx];
  1971. if (m) {
  1972. idx += (p_+1-m);
  1973. c.imag = Ynm[i][idx];
  1974. }
  1975. }
  1976. return c;
  1977. };
  1978. auto write_coeff = [p0, &SHBasis, i, M](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  1979. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  1980. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  1981. SHBasis[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  1982. if (m) {
  1983. idx += (p0+1-m);
  1984. SHBasis[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  1985. }
  1986. }
  1987. };
  1988. auto A = [p_](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  1989. auto B = [p_](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  1990. if (fabs(csc_theta[i]) > 0) {
  1991. for (Long m = 0; m <= p0; m++) {
  1992. for (Long n = m; n <= p0; n++) {
  1993. Complex<Real> AYBY = A(n,m) * Y(n+1,m) - B(n,m) * Y(n-1,m);
  1994. Complex<Real> Fv2r = Y(n,m) * (-n-1);
  1995. Complex<Real> Fw2r = Y(n,m) * n;
  1996. Complex<Real> Fx2r = 0;
  1997. Complex<Real> Fv2t = AYBY * csc_theta[i];
  1998. Complex<Real> Fw2t = AYBY * csc_theta[i];
  1999. Complex<Real> Fx2t = imag * m * Y(n,m) * csc_theta[i];
  2000. Complex<Real> Fv2p = -imag * m * Y(n,m) * csc_theta[i];
  2001. Complex<Real> Fw2p = -imag * m * Y(n,m) * csc_theta[i];
  2002. Complex<Real> Fx2p = AYBY * csc_theta[i];
  2003. write_coeff(Fv2r, n, m, 0, 0);
  2004. write_coeff(Fw2r, n, m, 1, 0);
  2005. write_coeff(Fx2r, n, m, 2, 0);
  2006. write_coeff(Fv2t, n, m, 0, 1);
  2007. write_coeff(Fw2t, n, m, 1, 1);
  2008. write_coeff(Fx2t, n, m, 2, 1);
  2009. write_coeff(Fv2p, n, m, 0, 2);
  2010. write_coeff(Fw2p, n, m, 1, 2);
  2011. write_coeff(Fx2p, n, m, 2, 2);
  2012. }
  2013. }
  2014. } else {
  2015. Complex<Real> exp_phi;
  2016. exp_phi.real = cos<Real>(theta_phi[i*2+1]);
  2017. exp_phi.imag = -sin<Real>(theta_phi[i*2+1]);
  2018. for (Long m = 0; m <= p0; m++) {
  2019. for (Long n = m; n <= p0; n++) {
  2020. Complex<Real> Fv2r = 0;
  2021. Complex<Real> Fw2r = 0;
  2022. Complex<Real> Fx2r = 0;
  2023. Complex<Real> Fv2t = 0;
  2024. Complex<Real> Fw2t = 0;
  2025. Complex<Real> Fx2t = 0;
  2026. Complex<Real> Fv2p = 0;
  2027. Complex<Real> Fw2p = 0;
  2028. Complex<Real> Fx2p = 0;
  2029. if (m == 0) {
  2030. Fv2r = Y(n,m) * (-n-1);
  2031. Fw2r = Y(n,m) * n;
  2032. Fx2r = 0;
  2033. }
  2034. if (m == 1) {
  2035. auto Ycsc = [&cos_theta, &exp_phi, i](Long n) { return -sqrt<Real>((2*n+1)*n*(n+1)) * ((n%2==0) && (cos_theta[i]<0) ? -1 : 1) * exp_phi; };
  2036. Complex<Real> AYBY = A(n,m) * Ycsc(n+1) - B(n,m) * Ycsc(n-1);
  2037. Fv2t = AYBY;
  2038. Fw2t = AYBY;
  2039. Fx2t = imag * m * Ycsc(n);
  2040. Fv2p =-imag * m * Ycsc(n);
  2041. Fw2p =-imag * m * Ycsc(n);
  2042. Fx2p = AYBY;
  2043. }
  2044. write_coeff(Fv2r, n, m, 0, 0);
  2045. write_coeff(Fw2r, n, m, 1, 0);
  2046. write_coeff(Fx2r, n, m, 2, 0);
  2047. write_coeff(Fv2t, n, m, 0, 1);
  2048. write_coeff(Fw2t, n, m, 1, 1);
  2049. write_coeff(Fx2t, n, m, 2, 1);
  2050. write_coeff(Fv2p, n, m, 0, 2);
  2051. write_coeff(Fw2p, n, m, 1, 2);
  2052. write_coeff(Fx2p, n, m, 2, 2);
  2053. }
  2054. }
  2055. }
  2056. }
  2057. }
  2058. assert(SHBasis.Dim(0) == N * COORD_DIM);
  2059. assert(SHBasis.Dim(1) == COORD_DIM * M);
  2060. }
  2061. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatRotate(Long p0){
  2062. std::vector<std::vector<Long>> coeff_perm(p0+1);
  2063. { // Set coeff_perm
  2064. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  2065. Long itr=0;
  2066. for(Long i=0;i<2*p0;i++){
  2067. Long m=(i+1)/2;
  2068. for(Long n=m;n<=p0;n++){
  2069. coeff_perm[n][i]=itr;
  2070. itr++;
  2071. }
  2072. }
  2073. }
  2074. assert(p0<SCTL_SHMAXDEG);
  2075. std::vector<Matrix<Real>>& Mr=MatrixStore().Mr_[p0];
  2076. #pragma omp critical (SCTL_MATROTATE)
  2077. if(!Mr.size()){
  2078. const Real SQRT2PI=sqrt(2*M_PI);
  2079. Long Ncoef=p0*(p0+2);
  2080. Long Ngrid=2*p0*(p0+1);
  2081. Long Naleg=(p0+1)*(p0+2)/2;
  2082. Matrix<Real> Mcoord0(3,Ngrid);
  2083. const Vector<Real>& x=LegendreNodes(p0);
  2084. for(Long i=0;i<p0+1;i++){ // Set Mcoord0
  2085. for(Long j=0;j<2*p0;j++){
  2086. Mcoord0[0][i*2*p0+j]=x[i];
  2087. Mcoord0[1][i*2*p0+j]=sqrt(1-x[i]*x[i])*sin(M_PI*j/p0);
  2088. Mcoord0[2][i*2*p0+j]=sqrt(1-x[i]*x[i])*cos(M_PI*j/p0);
  2089. }
  2090. }
  2091. for(Long l=0;l<p0+1;l++){ // For each rotation angle
  2092. Matrix<Real> Mcoord1;
  2093. { // Rotate coordinates
  2094. Matrix<Real> M(COORD_DIM, COORD_DIM);
  2095. Real cos_=-x[l];
  2096. Real sin_=-sqrt(1.0-x[l]*x[l]);
  2097. M[0][0]= cos_; M[0][1]=0; M[0][2]=-sin_;
  2098. M[1][0]= 0; M[1][1]=1; M[1][2]= 0;
  2099. M[2][0]= sin_; M[2][1]=0; M[2][2]= cos_;
  2100. Mcoord1=M*Mcoord0;
  2101. }
  2102. Matrix<Real> Mleg(Naleg, Ngrid);
  2103. { // Set Mleg
  2104. const Vector<Real> Vcoord1(Mcoord1.Dim(0)*Mcoord1.Dim(1), Mcoord1.begin(), false);
  2105. Vector<Real> Vleg(Mleg.Dim(0)*Mleg.Dim(1), Mleg.begin(), false);
  2106. LegPoly(Vleg, Vcoord1, p0);
  2107. }
  2108. Vector<Real> theta(Ngrid);
  2109. for(Long i=0;i<theta.Dim();i++){ // Set theta
  2110. theta[i]=atan2(Mcoord1[1][i],Mcoord1[2][i]); // TODO: works only for float and double
  2111. }
  2112. Matrix<Real> Mcoef2grid(Ncoef, Ngrid);
  2113. { // Build Mcoef2grid
  2114. Long offset0=0;
  2115. Long offset1=0;
  2116. for(Long i=0;i<p0+1;i++){
  2117. Long len=p0+1-i;
  2118. { // P * cos
  2119. for(Long j=0;j<len;j++){
  2120. for(Long k=0;k<Ngrid;k++){
  2121. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*cos(i*theta[k]);
  2122. }
  2123. }
  2124. offset1+=len;
  2125. }
  2126. if(i!=0 && i!=p0){ // P * sin
  2127. for(Long j=0;j<len;j++){
  2128. for(Long k=0;k<Ngrid;k++){
  2129. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*sin(i*theta[k]);
  2130. }
  2131. }
  2132. offset1+=len;
  2133. }
  2134. offset0+=len;
  2135. }
  2136. assert(offset0==Naleg);
  2137. assert(offset1==Ncoef);
  2138. }
  2139. Vector<Real> Vcoef2coef(Ncoef*Ncoef);
  2140. Vector<Real> Vcoef2grid(Ncoef*Ngrid, Mcoef2grid[0], false);
  2141. Grid2SHC(Vcoef2grid, p0+1, 2*p0, p0, Vcoef2coef, SHCArrange::COL_MAJOR_NONZERO);
  2142. Matrix<Real> Mcoef2coef(Ncoef, Ncoef, Vcoef2coef.begin(), false);
  2143. for(Long n=0;n<=p0;n++){ // Create matrices for fast rotation
  2144. Matrix<Real> M(coeff_perm[n].size(),coeff_perm[n].size());
  2145. for(Long i=0;i<(Long)coeff_perm[n].size();i++){
  2146. for(Long j=0;j<(Long)coeff_perm[n].size();j++){
  2147. M[i][j]=Mcoef2coef[coeff_perm[n][i]][coeff_perm[n][j]];
  2148. }
  2149. }
  2150. Mr.push_back(M);
  2151. }
  2152. }
  2153. }
  2154. return Mr;
  2155. }
  2156. template <class Real> void SphericalHarmonics<Real>::SHC2GridTranspose(const Vector<Real>& X, Long p0, Long p1, Vector<Real>& S){
  2157. Matrix<Real> Mf =SphericalHarmonics<Real>::MatFourier(p1,p0).Transpose();
  2158. std::vector<Matrix<Real>> Ml =SphericalHarmonics<Real>::MatLegendre(p1,p0);
  2159. for(Long i=0;i<(Long)Ml.size();i++) Ml[i]=Ml[i].Transpose();
  2160. assert(p1==(Long)Ml.size()-1);
  2161. assert(p0==Mf.Dim(0)/2);
  2162. assert(p1==Mf.Dim(1)/2);
  2163. Long N=X.Dim()/(2*p0*(p0+1));
  2164. assert(N*2*p0*(p0+1)==X.Dim());
  2165. if(S.Dim()!=N*(p1*(p1+2))) S.ReInit(N*(p1*(p1+2)));
  2166. Vector<Real> B0, B1;
  2167. B0.ReInit(N* p1*(p1+2));
  2168. B1.ReInit(N*2*p1*(p0+1));
  2169. #pragma omp parallel
  2170. { // Evaluate Fourier and transpose
  2171. Integer tid=omp_get_thread_num();
  2172. Integer omp_p=omp_get_num_threads();
  2173. Long a=(tid+0)*N*(p0+1)/omp_p;
  2174. Long b=(tid+1)*N*(p0+1)/omp_p;
  2175. const Long block_size=16;
  2176. Matrix<Real> B2(block_size,2*p1);
  2177. for(Long i0=a;i0<b;i0+=block_size){
  2178. Long i1=std::min(b,i0+block_size);
  2179. const Matrix<Real> Min (i1-i0,2*p0, (Iterator<Real>)X.begin()+i0*2*p0, false);
  2180. Matrix<Real> Mout(i1-i0,2*p1, B2.begin(), false);
  2181. Matrix<Real>::GEMM(Mout, Min, Mf);
  2182. for(Long i=i0;i<i1;i++){
  2183. for(Long j=0;j<2*p1;j++){
  2184. B1[j*N*(p0+1)+i]=B2[i-i0][j];
  2185. }
  2186. }
  2187. }
  2188. }
  2189. #pragma omp parallel
  2190. { // Evaluate Legendre polynomial
  2191. Integer tid=omp_get_thread_num();
  2192. Integer omp_p=omp_get_num_threads();
  2193. Long offset0=0;
  2194. Long offset1=0;
  2195. for(Long i=0;i<p1+1;i++){
  2196. Long N0=2*N;
  2197. if(i==0 || i==p1) N0=N;
  2198. Matrix<Real> Min (N0, p0+1 , B1.begin()+offset0, false);
  2199. Matrix<Real> Mout(N0, p1+1-i, B0.begin()+offset1, false);
  2200. { // Mout = Min * Ml[i] // split between threads
  2201. Long a=(tid+0)*N0/omp_p;
  2202. Long b=(tid+1)*N0/omp_p;
  2203. if(a<b){
  2204. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  2205. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  2206. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  2207. }
  2208. }
  2209. offset0+=Min .Dim(0)*Min .Dim(1);
  2210. offset1+=Mout.Dim(0)*Mout.Dim(1);
  2211. }
  2212. }
  2213. #pragma omp parallel
  2214. { // S <-- Rearrange(B0)
  2215. Integer tid=omp_get_thread_num();
  2216. Integer omp_p=omp_get_num_threads();
  2217. Long a=(tid+0)*N/omp_p;
  2218. Long b=(tid+1)*N/omp_p;
  2219. for(Long i=a;i<b;i++){
  2220. Long offset=0;
  2221. for(Long j=0;j<2*p1;j++){
  2222. Long len=p1+1-(j+1)/2;
  2223. Real* B_=&B0[i*len+N*offset];
  2224. Real* S_=&S[i*p1*(p1+2)+offset];
  2225. for(Long k=0;k<len;k++) S_[k]=B_[k];
  2226. offset+=len;
  2227. }
  2228. }
  2229. }
  2230. }
  2231. template <class Real> void SphericalHarmonics<Real>::RotateAll(const Vector<Real>& S, Long p0, Long dof, Vector<Real>& S_){
  2232. const std::vector<Matrix<Real>>& Mr=MatRotate(p0);
  2233. std::vector<std::vector<Long>> coeff_perm(p0+1);
  2234. { // Set coeff_perm
  2235. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  2236. Long itr=0;
  2237. for(Long i=0;i<2*p0;i++){
  2238. Long m=(i+1)/2;
  2239. for(Long n=m;n<=p0;n++){
  2240. coeff_perm[n][i]=itr;
  2241. itr++;
  2242. }
  2243. }
  2244. }
  2245. Long Ncoef=p0*(p0+2);
  2246. Long N=S.Dim()/Ncoef/dof;
  2247. assert(N*Ncoef*dof==S.Dim());
  2248. if(S_.Dim()!=N*dof*Ncoef*p0*(p0+1)) S_.ReInit(N*dof*Ncoef*p0*(p0+1));
  2249. const Matrix<Real> S0(N*dof, Ncoef, (Iterator<Real>)S.begin(), false);
  2250. Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, S_.begin(), false);
  2251. #pragma omp parallel
  2252. { // Construct all p0*(p0+1) rotations
  2253. Integer tid=omp_get_thread_num();
  2254. Integer omp_p=omp_get_num_threads();
  2255. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  2256. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  2257. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  2258. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  2259. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  2260. }
  2261. Long a=(tid+0)*N/omp_p;
  2262. Long b=(tid+1)*N/omp_p;
  2263. for(Long i=a;i<b;i++){
  2264. for(Long d=0;d<dof;d++){
  2265. for(Long j=0;j<p0;j++){
  2266. Long offset=0;
  2267. for(Long k=0;k<p0+1;k++){
  2268. Real r[2]={cos(k*j*M_PI/p0),-sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  2269. Long len=p0+1-k;
  2270. if(k!=0 && k!=p0){
  2271. for(Long l=0;l<len;l++){
  2272. Real x[2];
  2273. x[0]=S0[i*dof+d][offset+len*0+l];
  2274. x[1]=S0[i*dof+d][offset+len*1+l];
  2275. B0[j*dof+d][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  2276. B0[j*dof+d][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  2277. }
  2278. offset+=2*len;
  2279. }else{
  2280. for(Long l=0;l<len;l++){
  2281. B0[j*dof+d][offset+l]=S0[i*dof+d][offset+l];
  2282. }
  2283. offset+=len;
  2284. }
  2285. }
  2286. assert(offset==Ncoef);
  2287. }
  2288. }
  2289. { // Fast rotation
  2290. for(Long k=0;k<dof*p0;k++){ // forward permutation
  2291. for(Long l=0;l<=p0;l++){
  2292. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2293. Bi[l][k][j]=B0[k][coeff_perm[l][j]];
  2294. }
  2295. }
  2296. }
  2297. for(Long t=0;t<=p0;t++){
  2298. for(Long l=0;l<=p0;l++){ // mat-vec
  2299. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  2300. }
  2301. Matrix<Real> Mout(dof*p0,Ncoef, S1[(i*(p0+1)+t)*dof*p0], false);
  2302. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  2303. for(Long l=0;l<=p0;l++){
  2304. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2305. Mout[k][coeff_perm[l][j]]=Bo[l][k][j];
  2306. }
  2307. }
  2308. }
  2309. }
  2310. }
  2311. }
  2312. }
  2313. }
  2314. template <class Real> void SphericalHarmonics<Real>::RotateTranspose(const Vector<Real>& S_, Long p0, Long dof, Vector<Real>& S){
  2315. std::vector<Matrix<Real>> Mr=MatRotate(p0);
  2316. for(Long i=0;i<(Long)Mr.size();i++) Mr[i]=Mr[i].Transpose();
  2317. std::vector<std::vector<Long>> coeff_perm(p0+1);
  2318. { // Set coeff_perm
  2319. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  2320. Long itr=0;
  2321. for(Long i=0;i<2*p0;i++){
  2322. Long m=(i+1)/2;
  2323. for(Long n=m;n<=p0;n++){
  2324. coeff_perm[n][i]=itr;
  2325. itr++;
  2326. }
  2327. }
  2328. }
  2329. Long Ncoef=p0*(p0+2);
  2330. Long N=S_.Dim()/Ncoef/dof/(p0*(p0+1));
  2331. assert(N*Ncoef*dof*(p0*(p0+1))==S_.Dim());
  2332. if(S.Dim()!=N*dof*Ncoef*p0*(p0+1)) S.ReInit(N*dof*Ncoef*p0*(p0+1));
  2333. Matrix<Real> S0(N*dof*p0*(p0+1), Ncoef, S.begin(), false);
  2334. const Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, (Iterator<Real>)S_.begin(), false);
  2335. #pragma omp parallel
  2336. { // Transpose all p0*(p0+1) rotations
  2337. Integer tid=omp_get_thread_num();
  2338. Integer omp_p=omp_get_num_threads();
  2339. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  2340. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  2341. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  2342. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  2343. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  2344. }
  2345. Long a=(tid+0)*N/omp_p;
  2346. Long b=(tid+1)*N/omp_p;
  2347. for(Long i=a;i<b;i++){
  2348. for(Long t=0;t<p0+1;t++){
  2349. Long idx0=(i*(p0+1)+t)*p0*dof;
  2350. { // Fast rotation
  2351. const Matrix<Real> Min(p0*dof, Ncoef, (Iterator<Real>)S1[idx0], false);
  2352. for(Long k=0;k<dof*p0;k++){ // forward permutation
  2353. for(Long l=0;l<=p0;l++){
  2354. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2355. Bi[l][k][j]=Min[k][coeff_perm[l][j]];
  2356. }
  2357. }
  2358. }
  2359. for(Long l=0;l<=p0;l++){ // mat-vec
  2360. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  2361. }
  2362. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  2363. for(Long l=0;l<=p0;l++){
  2364. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2365. B0[k][coeff_perm[l][j]]=Bo[l][k][j];
  2366. }
  2367. }
  2368. }
  2369. }
  2370. for(Long j=0;j<p0;j++){
  2371. for(Long d=0;d<dof;d++){
  2372. Long idx1=idx0+j*dof+d;
  2373. Long offset=0;
  2374. for(Long k=0;k<p0+1;k++){
  2375. Real r[2]={cos(k*j*M_PI/p0),sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  2376. Long len=p0+1-k;
  2377. if(k!=0 && k!=p0){
  2378. for(Long l=0;l<len;l++){
  2379. Real x[2];
  2380. x[0]=B0[j*dof+d][offset+len*0+l];
  2381. x[1]=B0[j*dof+d][offset+len*1+l];
  2382. S0[idx1][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  2383. S0[idx1][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  2384. }
  2385. offset+=2*len;
  2386. }else{
  2387. for(Long l=0;l<len;l++){
  2388. S0[idx1][offset+l]=B0[j*dof+d][offset+l];
  2389. }
  2390. offset+=len;
  2391. }
  2392. }
  2393. assert(offset==Ncoef);
  2394. }
  2395. }
  2396. }
  2397. }
  2398. }
  2399. }
  2400. template <class Real> void SphericalHarmonics<Real>::StokesSingularInteg(const Vector<Real>& S, Long p0, Long p1, Vector<Real>* SLMatrix, Vector<Real>* DLMatrix){
  2401. Long Ngrid=2*p0*(p0+1);
  2402. Long Ncoef= p0*(p0+2);
  2403. Long Nves=S.Dim()/(Ngrid*COORD_DIM);
  2404. if(SLMatrix) SLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  2405. if(DLMatrix) DLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  2406. Long BLOCK_SIZE=(Long)6e9/((3*2*p1*(p1+1))*(3*2*p0*(p0+1))*2*8); // Limit memory usage to 6GB
  2407. BLOCK_SIZE=std::min<Long>(BLOCK_SIZE,omp_get_max_threads());
  2408. BLOCK_SIZE=std::max<Long>(BLOCK_SIZE,1);
  2409. for(Long a=0;a<Nves;a+=BLOCK_SIZE){
  2410. Long b=std::min(a+BLOCK_SIZE, Nves);
  2411. Vector<Real> _SLMatrix, _DLMatrix;
  2412. if(SLMatrix) _SLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), SLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  2413. if(DLMatrix) _DLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), DLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  2414. const Vector<Real> _S ((b-a)*(Ngrid*COORD_DIM) , (Iterator<Real>)S.begin()+a*(Ngrid*COORD_DIM), false);
  2415. if(SLMatrix && DLMatrix) StokesSingularInteg_< true, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  2416. else if(SLMatrix) StokesSingularInteg_< true, false>(_S, p0, p1, _SLMatrix, _DLMatrix);
  2417. else if(DLMatrix) StokesSingularInteg_<false, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  2418. }
  2419. }
  2420. template <class Real> template <bool SLayer, bool DLayer> void SphericalHarmonics<Real>::StokesSingularInteg_(const Vector<Real>& X0, Long p0, Long p1, Vector<Real>& SL, Vector<Real>& DL){
  2421. Profile::Tic("Rotate");
  2422. Vector<Real> S0, S;
  2423. SphericalHarmonics<Real>::Grid2SHC(X0, p0+1, 2*p0, p0, S0, SHCArrange::COL_MAJOR_NONZERO);
  2424. SphericalHarmonics<Real>::RotateAll(S0, p0, COORD_DIM, S);
  2425. Profile::Toc();
  2426. Profile::Tic("Upsample");
  2427. Vector<Real> X, X_theta, X_phi, trg;
  2428. SphericalHarmonics<Real>::SHC2Grid(S, SHCArrange::COL_MAJOR_NONZERO, p0, p1+1, 2*p1, &X, &X_theta, &X_phi);
  2429. SphericalHarmonics<Real>::SHC2Pole(S, SHCArrange::COL_MAJOR_NONZERO, p0, trg);
  2430. Profile::Toc();
  2431. Profile::Tic("Stokes");
  2432. Vector<Real> SL0, DL0;
  2433. { // Stokes kernel
  2434. //Long M0=2*p0*(p0+1);
  2435. Long M1=2*p1*(p1+1);
  2436. Long N=trg.Dim()/(2*COORD_DIM);
  2437. assert(X.Dim()==M1*COORD_DIM*N);
  2438. if(SLayer && SL0.Dim()!=N*2*6*M1) SL0.ReInit(2*N*6*M1);
  2439. if(DLayer && DL0.Dim()!=N*2*6*M1) DL0.ReInit(2*N*6*M1);
  2440. const Vector<Real>& qw=SphericalHarmonics<Real>::SingularWeights(p1);
  2441. const Real scal_const_dl = 3.0/(4.0*M_PI);
  2442. const Real scal_const_sl = 1.0/(8.0*M_PI);
  2443. static Real eps=-1;
  2444. if(eps<0){
  2445. eps=1;
  2446. while(eps*(Real)0.5+(Real)1.0>1.0) eps*=0.5;
  2447. }
  2448. #pragma omp parallel
  2449. {
  2450. Integer tid=omp_get_thread_num();
  2451. Integer omp_p=omp_get_num_threads();
  2452. Long a=(tid+0)*N/omp_p;
  2453. Long b=(tid+1)*N/omp_p;
  2454. for(Long i=a;i<b;i++){
  2455. for(Long t=0;t<2;t++){
  2456. Real tx, ty, tz;
  2457. { // Read target coordinates
  2458. tx=trg[i*2*COORD_DIM+0*2+t];
  2459. ty=trg[i*2*COORD_DIM+1*2+t];
  2460. tz=trg[i*2*COORD_DIM+2*2+t];
  2461. }
  2462. for(Long j0=0;j0<p1+1;j0++){
  2463. for(Long j1=0;j1<2*p1;j1++){
  2464. Long s=2*p1*j0+j1;
  2465. Real dx, dy, dz;
  2466. { // Compute dx, dy, dz
  2467. dx=tx-X[(i*COORD_DIM+0)*M1+s];
  2468. dy=ty-X[(i*COORD_DIM+1)*M1+s];
  2469. dz=tz-X[(i*COORD_DIM+2)*M1+s];
  2470. }
  2471. Real nx, ny, nz;
  2472. { // Compute source normal
  2473. Real x_theta=X_phi[(i*COORD_DIM+0)*M1+s];
  2474. Real y_theta=X_phi[(i*COORD_DIM+1)*M1+s];
  2475. Real z_theta=X_phi[(i*COORD_DIM+2)*M1+s];
  2476. Real x_phi=X_theta[(i*COORD_DIM+0)*M1+s];
  2477. Real y_phi=X_theta[(i*COORD_DIM+1)*M1+s];
  2478. Real z_phi=X_theta[(i*COORD_DIM+2)*M1+s];
  2479. nx=(y_theta*z_phi-z_theta*y_phi);
  2480. ny=(z_theta*x_phi-x_theta*z_phi);
  2481. nz=(x_theta*y_phi-y_theta*x_phi);
  2482. }
  2483. Real area_elem=1.0;
  2484. if(SLayer){ // Compute area_elem
  2485. area_elem=sqrt(nx*nx+ny*ny+nz*nz);
  2486. }
  2487. Real rinv, rinv2;
  2488. { // Compute rinv, rinv2
  2489. Real r2=dx*dx+dy*dy+dz*dz;
  2490. rinv=1.0/sqrt(r2);
  2491. if(r2<=eps) rinv=0;
  2492. rinv2=rinv*rinv;
  2493. }
  2494. if(DLayer){
  2495. Real rinv5=rinv2*rinv2*rinv;
  2496. Real r_dot_n_rinv5=scal_const_dl*qw[j0*t+(p1-j0)*(1-t)] * (nx*dx+ny*dy+nz*dz)*rinv5;
  2497. DL0[((i*2+t)*6+0)*M1+s]=dx*dx*r_dot_n_rinv5;
  2498. DL0[((i*2+t)*6+1)*M1+s]=dx*dy*r_dot_n_rinv5;
  2499. DL0[((i*2+t)*6+2)*M1+s]=dx*dz*r_dot_n_rinv5;
  2500. DL0[((i*2+t)*6+3)*M1+s]=dy*dy*r_dot_n_rinv5;
  2501. DL0[((i*2+t)*6+4)*M1+s]=dy*dz*r_dot_n_rinv5;
  2502. DL0[((i*2+t)*6+5)*M1+s]=dz*dz*r_dot_n_rinv5;
  2503. }
  2504. if(SLayer){
  2505. Real area_rinv =scal_const_sl*qw[j0*t+(p1-j0)*(1-t)] * area_elem*rinv;
  2506. Real area_rinv2=area_rinv*rinv2;
  2507. SL0[((i*2+t)*6+0)*M1+s]=area_rinv+dx*dx*area_rinv2;
  2508. SL0[((i*2+t)*6+1)*M1+s]= dx*dy*area_rinv2;
  2509. SL0[((i*2+t)*6+2)*M1+s]= dx*dz*area_rinv2;
  2510. SL0[((i*2+t)*6+3)*M1+s]=area_rinv+dy*dy*area_rinv2;
  2511. SL0[((i*2+t)*6+4)*M1+s]= dy*dz*area_rinv2;
  2512. SL0[((i*2+t)*6+5)*M1+s]=area_rinv+dz*dz*area_rinv2;
  2513. }
  2514. }
  2515. }
  2516. }
  2517. }
  2518. }
  2519. Profile::Add_FLOP(20*(2*p1)*(p1+1)*2*N);
  2520. if(SLayer) Profile::Add_FLOP((19+6)*(2*p1)*(p1+1)*2*N);
  2521. if(DLayer) Profile::Add_FLOP( 22 *(2*p1)*(p1+1)*2*N);
  2522. }
  2523. Profile::Toc();
  2524. Profile::Tic("UpsampleTranspose");
  2525. Vector<Real> SL1, DL1;
  2526. SphericalHarmonics<Real>::SHC2GridTranspose(SL0, p1, p0, SL1);
  2527. SphericalHarmonics<Real>::SHC2GridTranspose(DL0, p1, p0, DL1);
  2528. Profile::Toc();
  2529. Profile::Tic("RotateTranspose");
  2530. Vector<Real> SL2, DL2;
  2531. SphericalHarmonics<Real>::RotateTranspose(SL1, p0, 2*6, SL2);
  2532. SphericalHarmonics<Real>::RotateTranspose(DL1, p0, 2*6, DL2);
  2533. Profile::Toc();
  2534. Profile::Tic("Rearrange");
  2535. Vector<Real> SL3, DL3;
  2536. { // Transpose
  2537. Long Ncoef=p0*(p0+2);
  2538. Long Ngrid=2*p0*(p0+1);
  2539. { // Transpose SL2
  2540. Long N=SL2.Dim()/(6*Ncoef*Ngrid);
  2541. SL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  2542. #pragma omp parallel
  2543. {
  2544. Integer tid=omp_get_thread_num();
  2545. Integer omp_p=omp_get_num_threads();
  2546. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  2547. Long a=(tid+0)*N/omp_p;
  2548. Long b=(tid+1)*N/omp_p;
  2549. for(Long i=a;i<b;i++){
  2550. Matrix<Real> M0(Ngrid*6, Ncoef, SL2.begin()+i*Ngrid*6*Ncoef, false);
  2551. for(Long k=0;k<Ncoef;k++){ // Transpose
  2552. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  2553. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  2554. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  2555. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  2556. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  2557. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  2558. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  2559. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  2560. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  2561. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  2562. }
  2563. }
  2564. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, SL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  2565. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  2566. for(Long j0=0;j0<COORD_DIM;j0++){
  2567. for(Long j1=0;j1<p0+1;j1++){
  2568. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  2569. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  2570. }
  2571. }
  2572. }
  2573. }
  2574. }
  2575. }
  2576. { // Transpose DL2
  2577. Long N=DL2.Dim()/(6*Ncoef*Ngrid);
  2578. DL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  2579. #pragma omp parallel
  2580. {
  2581. Integer tid=omp_get_thread_num();
  2582. Integer omp_p=omp_get_num_threads();
  2583. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  2584. Long a=(tid+0)*N/omp_p;
  2585. Long b=(tid+1)*N/omp_p;
  2586. for(Long i=a;i<b;i++){
  2587. Matrix<Real> M0(Ngrid*6, Ncoef, DL2.begin()+i*Ngrid*6*Ncoef, false);
  2588. for(Long k=0;k<Ncoef;k++){ // Transpose
  2589. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  2590. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  2591. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  2592. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  2593. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  2594. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  2595. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  2596. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  2597. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  2598. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  2599. }
  2600. }
  2601. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, DL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  2602. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  2603. for(Long j0=0;j0<COORD_DIM;j0++){
  2604. for(Long j1=0;j1<p0+1;j1++){
  2605. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  2606. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  2607. }
  2608. }
  2609. }
  2610. }
  2611. }
  2612. }
  2613. }
  2614. Profile::Toc();
  2615. Profile::Tic("Grid2SHC");
  2616. SphericalHarmonics<Real>::Grid2SHC(SL3, p0+1, 2*p0, p0, SL, SHCArrange::COL_MAJOR_NONZERO);
  2617. SphericalHarmonics<Real>::Grid2SHC(DL3, p0+1, 2*p0, p0, DL, SHCArrange::COL_MAJOR_NONZERO);
  2618. Profile::Toc();
  2619. }
  2620. } // end namespace