sph_harm.txx 114 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174
  1. #include SCTL_INCLUDE(fft_wrapper.hpp)
  2. #include SCTL_INCLUDE(legendre_rule.hpp)
  3. #include <fstream>
  4. // TODO: Replace work vectors with dynamic-arrays
  5. namespace SCTL_NAMESPACE {
  6. template <class Real> void SphericalHarmonics<Real>::Grid2SHC(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& S, SHCArrange arrange){
  7. Long N = X.Dim() / (Np*Nt);
  8. assert(X.Dim() == N*Np*Nt);
  9. Vector<Real> B1(N*(p1+1)*(p1+1));
  10. Grid2SHC_(X, Nt, Np, p1, B1);
  11. SHCArrange0(B1, p1, S, arrange);
  12. }
  13. template <class Real> void SphericalHarmonics<Real>::SHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_theta, Vector<Real>* X_phi){
  14. Vector<Real> B0;
  15. SHCArrange1(S, arrange, p0, B0);
  16. SHC2Grid_(B0, p0, Nt, Np, X, X_phi, X_theta);
  17. }
  18. template <class Real> void SphericalHarmonics<Real>::SHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& theta_phi, Vector<Real>& X) {
  19. Long M = (p0+1) * (p0+1);
  20. Long dof;
  21. Matrix<Real> B1;
  22. { // Set B1, dof
  23. Vector<Real> B0;
  24. SHCArrange1(S, arrange, p0, B0);
  25. dof = B0.Dim() / M;
  26. assert(B0.Dim() == dof * M);
  27. B1.ReInit(dof, M);
  28. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  29. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  30. }
  31. assert(B1.Dim(0) == dof);
  32. assert(B1.Dim(1) == M);
  33. Matrix<Real> SHBasis;
  34. SHBasisEval(p0, theta_phi, SHBasis);
  35. assert(SHBasis.Dim(1) == M);
  36. Long N = SHBasis.Dim(0);
  37. { // Set X
  38. if (X.Dim() != N*dof) X.ReInit(N * dof);
  39. for (Long k0 = 0; k0 < N; k0++) {
  40. for (Long k1 = 0; k1 < dof; k1++) {
  41. Real X_ = 0;
  42. for (Long i = 0; i < M; i++) X_ += B1[k1][i] * SHBasis[k0][i];
  43. X[k0 * dof + k1] = X_;
  44. }
  45. }
  46. }
  47. }
  48. template <class Real> void SphericalHarmonics<Real>::SHC2Pole(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& P){
  49. Vector<Real> QP[2];
  50. { // Set QP // TODO: store these weights
  51. Vector<Real> x(1), alp;
  52. const Real SQRT2PI = sqrt<Real>(4 * const_pi<Real>());
  53. for (Long i = 0; i < 2; i++) {
  54. x = (i ? const_pi<Real>() : 0);
  55. LegPoly_(alp, x, p0);
  56. QP[i].ReInit(p0 + 1, alp.begin());
  57. QP[i] *= SQRT2PI;
  58. }
  59. }
  60. Long M, N;
  61. { // Set M, N
  62. M = 0;
  63. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  64. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  65. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  66. if (M == 0) return;
  67. N = S.Dim() / M;
  68. assert(S.Dim() == N * M);
  69. }
  70. if(P.Dim() != N * 2) P.ReInit(N * 2);
  71. if (arrange == SHCArrange::ALL) {
  72. #pragma omp parallel
  73. { // Compute pole
  74. Integer tid = omp_get_thread_num();
  75. Integer omp_p = omp_get_num_threads();
  76. Long a = (tid + 0) * N / omp_p;
  77. Long b = (tid + 1) * N / omp_p;
  78. for (Long i = a; i < b; i++) {
  79. Real P_[2] = {0, 0};
  80. for (Long j = 0; j < p0 + 1; j++) {
  81. P_[0] += S[i*M + j*(p0+1)*2] * QP[0][j];
  82. P_[1] += S[i*M + j*(p0+1)*2] * QP[1][j];
  83. }
  84. P[2*i+0] = P_[0];
  85. P[2*i+1] = P_[1];
  86. }
  87. }
  88. }
  89. if (arrange == SHCArrange::ROW_MAJOR) {
  90. #pragma omp parallel
  91. { // Compute pole
  92. Integer tid = omp_get_thread_num();
  93. Integer omp_p = omp_get_num_threads();
  94. Long a = (tid + 0) * N / omp_p;
  95. Long b = (tid + 1) * N / omp_p;
  96. for (Long i = a; i < b; i++) {
  97. Long idx = 0;
  98. Real P_[2] = {0, 0};
  99. for (Long j = 0; j < p0 + 1; j++) {
  100. P_[0] += S[i*M+idx] * QP[0][j];
  101. P_[1] += S[i*M+idx] * QP[1][j];
  102. idx += 2*(j+1);
  103. }
  104. P[2*i+0] = P_[0];
  105. P[2*i+1] = P_[1];
  106. }
  107. }
  108. }
  109. if (arrange == SHCArrange::COL_MAJOR_NONZERO) {
  110. #pragma omp parallel
  111. { // Compute pole
  112. Integer tid = omp_get_thread_num();
  113. Integer omp_p = omp_get_num_threads();
  114. Long a = (tid + 0) * N / omp_p;
  115. Long b = (tid + 1) * N / omp_p;
  116. for (Long i = a; i < b; i++) {
  117. Real P_[2] = {0, 0};
  118. for (Long j = 0; j < p0 + 1; j++) {
  119. P_[0] += S[i*M+j] * QP[0][j];
  120. P_[1] += S[i*M+j] * QP[1][j];
  121. }
  122. P[2*i+0] = P_[0];
  123. P[2*i+1] = P_[1];
  124. }
  125. }
  126. }
  127. }
  128. template <class Real> void SphericalHarmonics<Real>::WriteVTK(const char* fname, const Vector<Real>* S, const Vector<Real>* v_ptr, SHCArrange arrange, Long p0, Long p1, Real period, const Comm& comm){
  129. typedef double VTKReal;
  130. Vector<Real> SS;
  131. if (S == nullptr) {
  132. Integer p = 2;
  133. Integer Ncoeff = (p + 1) * (p + 1);
  134. Vector<Real> SSS(COORD_DIM * Ncoeff), SSS_grid;
  135. SSS.SetZero();
  136. SSS[1+0*p+0*Ncoeff] = sqrt<Real>(2.0)/sqrt<Real>(3.0);
  137. SSS[1+1*p+1*Ncoeff] = 1/sqrt<Real>(3.0);
  138. SSS[1+2*p+2*Ncoeff] = 1/sqrt<Real>(3.0);
  139. SphericalHarmonics<Real>::SHC2Grid(SSS, SHCArrange::COL_MAJOR_NONZERO, p, p+1, 2*p+2, &SSS_grid);
  140. SphericalHarmonics<Real>::Grid2SHC(SSS_grid, p+1, 2*p+2, p0, SS, arrange);
  141. S = &SS;
  142. }
  143. Vector<Real> X, Xp, V, Vp;
  144. { // Upsample X
  145. const Vector<Real>& X0=*S;
  146. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &X);
  147. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Xp);
  148. }
  149. if(v_ptr){ // Upsample V
  150. const Vector<Real>& X0=*v_ptr;
  151. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &V);
  152. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Vp);
  153. }
  154. std::vector<VTKReal> point_coord;
  155. std::vector<VTKReal> point_value;
  156. std::vector<int32_t> poly_connect;
  157. std::vector<int32_t> poly_offset;
  158. { // Set point_coord, point_value, poly_connect
  159. Long N_ves = X.Dim()/(2*p1*(p1+1)*COORD_DIM); // Number of vesicles
  160. assert(Xp.Dim() == N_ves*2*COORD_DIM);
  161. for(Long k=0;k<N_ves;k++){ // Set point_coord
  162. Real C[COORD_DIM]={0,0,0};
  163. if(period>0){
  164. for(Integer l=0;l<COORD_DIM;l++) C[l]=0;
  165. for(Long i=0;i<p1+1;i++){
  166. for(Long j=0;j<2*p1;j++){
  167. for(Integer l=0;l<COORD_DIM;l++){
  168. C[l]+=X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))];
  169. }
  170. }
  171. }
  172. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[0+2*(l+k*COORD_DIM)];
  173. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[1+2*(l+k*COORD_DIM)];
  174. for(Integer l=0;l<COORD_DIM;l++) C[l]/=2*p1*(p1+1)+2;
  175. for(Integer l=0;l<COORD_DIM;l++) C[l]=(round(C[l]/period))*period;
  176. }
  177. for(Long i=0;i<p1+1;i++){
  178. for(Long j=0;j<2*p1;j++){
  179. for(Integer l=0;l<COORD_DIM;l++){
  180. point_coord.push_back(X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))]-C[l]);
  181. }
  182. }
  183. }
  184. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[0+2*(l+k*COORD_DIM)]-C[l]);
  185. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[1+2*(l+k*COORD_DIM)]-C[l]);
  186. }
  187. if(v_ptr) {
  188. Long data__dof = V.Dim() / (N_ves * 2*p1*(p1+1));
  189. for(Long k=0;k<N_ves;k++){ // Set point_value
  190. for(Long i=0;i<p1+1;i++){
  191. for(Long j=0;j<2*p1;j++){
  192. for(Long l=0;l<data__dof;l++){
  193. point_value.push_back(V[j+2*p1*(i+(p1+1)*(l+k*data__dof))]);
  194. }
  195. }
  196. }
  197. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[0+2*(l+k*data__dof)]);
  198. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[1+2*(l+k*data__dof)]);
  199. }
  200. }
  201. for(Long k=0;k<N_ves;k++){
  202. for(Long j=0;j<2*p1;j++){
  203. Long i0= 0;
  204. Long i1=p1;
  205. Long j0=((j+0) );
  206. Long j1=((j+1)%(2*p1));
  207. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+0);
  208. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  209. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  210. poly_offset.push_back(poly_connect.size());
  211. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+1);
  212. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  213. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  214. poly_offset.push_back(poly_connect.size());
  215. }
  216. for(Long i=0;i<p1;i++){
  217. for(Long j=0;j<2*p1;j++){
  218. Long i0=((i+0) );
  219. Long i1=((i+1) );
  220. Long j0=((j+0) );
  221. Long j1=((j+1)%(2*p1));
  222. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  223. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  224. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  225. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  226. poly_offset.push_back(poly_connect.size());
  227. }
  228. }
  229. }
  230. }
  231. Integer np = comm.Size();
  232. Integer myrank = comm.Rank();
  233. std::vector<VTKReal>& coord=point_coord;
  234. std::vector<VTKReal>& value=point_value;
  235. std::vector<int32_t>& connect=poly_connect;
  236. std::vector<int32_t>& offset=poly_offset;
  237. Long pt_cnt=coord.size()/COORD_DIM;
  238. Long poly_cnt=poly_offset.size();
  239. // Open file for writing.
  240. std::stringstream vtufname;
  241. vtufname<<fname<<"_"<<std::setfill('0')<<std::setw(6)<<myrank<<".vtp";
  242. std::ofstream vtufile;
  243. vtufile.open(vtufname.str().c_str());
  244. if(vtufile.fail()) return;
  245. bool isLittleEndian;
  246. { // Set isLittleEndian
  247. uint16_t number = 0x1;
  248. uint8_t *numPtr = (uint8_t*)&number;
  249. isLittleEndian=(numPtr[0] == 1);
  250. }
  251. // Proceed to write to file.
  252. Long data_size=0;
  253. vtufile<<"<?xml version=\"1.0\"?>\n";
  254. if(isLittleEndian) vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"LittleEndian\">\n";
  255. else vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"BigEndian\">\n";
  256. //===========================================================================
  257. vtufile<<" <PolyData>\n";
  258. vtufile<<" <Piece NumberOfPoints=\""<<pt_cnt<<"\" NumberOfVerts=\"0\" NumberOfLines=\"0\" NumberOfStrips=\"0\" NumberOfPolys=\""<<poly_cnt<<"\">\n";
  259. //---------------------------------------------------------------------------
  260. vtufile<<" <Points>\n";
  261. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  262. data_size+=sizeof(uint32_t)+coord.size()*sizeof(VTKReal);
  263. vtufile<<" </Points>\n";
  264. //---------------------------------------------------------------------------
  265. if(value.size()){ // value
  266. vtufile<<" <PointData>\n";
  267. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  268. data_size+=sizeof(uint32_t)+value.size()*sizeof(VTKReal);
  269. vtufile<<" </PointData>\n";
  270. }
  271. //---------------------------------------------------------------------------
  272. vtufile<<" <Polys>\n";
  273. vtufile<<" <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  274. data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t);
  275. vtufile<<" <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  276. data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t);
  277. vtufile<<" </Polys>\n";
  278. //---------------------------------------------------------------------------
  279. vtufile<<" </Piece>\n";
  280. vtufile<<" </PolyData>\n";
  281. //===========================================================================
  282. vtufile<<" <AppendedData encoding=\"raw\">\n";
  283. vtufile<<" _";
  284. int32_t block_size;
  285. block_size=coord.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord.size()*sizeof(VTKReal));
  286. if(value.size()){ // value
  287. block_size=value.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value.size()*sizeof(VTKReal));
  288. }
  289. block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t));
  290. block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t));
  291. vtufile<<"\n";
  292. vtufile<<" </AppendedData>\n";
  293. //===========================================================================
  294. vtufile<<"</VTKFile>\n";
  295. vtufile.close();
  296. if(myrank) return;
  297. std::stringstream pvtufname;
  298. pvtufname<<fname<<".pvtp";
  299. std::ofstream pvtufile;
  300. pvtufile.open(pvtufname.str().c_str());
  301. if(pvtufile.fail()) return;
  302. pvtufile<<"<?xml version=\"1.0\"?>\n";
  303. pvtufile<<"<VTKFile type=\"PPolyData\">\n";
  304. pvtufile<<" <PPolyData GhostLevel=\"0\">\n";
  305. pvtufile<<" <PPoints>\n";
  306. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\"/>\n";
  307. pvtufile<<" </PPoints>\n";
  308. if(value.size()){ // value
  309. pvtufile<<" <PPointData>\n";
  310. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\"/>\n";
  311. pvtufile<<" </PPointData>\n";
  312. }
  313. {
  314. // Extract filename from path.
  315. std::stringstream vtupath;
  316. vtupath<<'/'<<fname;
  317. std::string pathname = vtupath.str();
  318. auto found = pathname.find_last_of("/\\");
  319. std::string fname_ = pathname.substr(found+1);
  320. for(Integer i=0;i<np;i++) pvtufile<<" <Piece Source=\""<<fname_<<"_"<<std::setfill('0')<<std::setw(6)<<i<<".vtp\"/>\n";
  321. }
  322. pvtufile<<" </PPolyData>\n";
  323. pvtufile<<"</VTKFile>\n";
  324. pvtufile.close();
  325. }
  326. template <class Real> void SphericalHarmonics<Real>::Grid2VecSHC(const Vector<Real>& X, Long Nt, Long Np, Long p0, Vector<Real>& S, SHCArrange arrange) {
  327. Long N = X.Dim() / (Np*Nt);
  328. assert(X.Dim() == N*Np*Nt);
  329. assert(N % COORD_DIM == 0);
  330. Vector<Real> B0(N*Nt*Np);
  331. { // Set B0
  332. Vector<Real> sin_phi(Np), cos_phi(Np);
  333. for (Long i = 0; i < Np; i++) {
  334. sin_phi[i] = sin(2 * const_pi<Real>() * i / Np);
  335. cos_phi[i] = cos(2 * const_pi<Real>() * i / Np);
  336. }
  337. const auto& Y = LegendreNodes(Nt - 1);
  338. assert(Y.Dim() == Nt);
  339. Long Ngrid = Nt * Np;
  340. for (Long k = 0; k < N; k+=COORD_DIM) {
  341. for (Long i = 0; i < Nt; i++) {
  342. Real sin_theta = sqrt<Real>(1 - Y[i]*Y[i]);
  343. Real cos_theta = Y[i];
  344. Real csc_theta = 1 / sin_theta;
  345. const auto X_ = X.begin() + (k*Nt+i)*Np;
  346. auto B0_ = B0.begin() + (k*Nt+i)*Np;
  347. for (Long j = 0; j < Np; j++) {
  348. StaticArray<Real,3> in;
  349. in[0] = X_[0*Ngrid+j];
  350. in[1] = X_[1*Ngrid+j];
  351. in[2] = X_[2*Ngrid+j];
  352. StaticArray<Real,9> Q;
  353. { // Set Q
  354. Q[0] = sin_theta*cos_phi[j]; Q[1] = sin_theta*sin_phi[j]; Q[2] = cos_theta;
  355. Q[3] = cos_theta*cos_phi[j]; Q[4] = cos_theta*sin_phi[j]; Q[5] =-sin_theta;
  356. Q[6] = -sin_phi[j]; Q[7] = cos_phi[j]; Q[8] = 0;
  357. }
  358. B0_[0*Ngrid+j] = ( Q[0] * in[0] + Q[1] * in[1] + Q[2] * in[2] );
  359. B0_[1*Ngrid+j] = ( Q[3] * in[0] + Q[4] * in[1] + Q[5] * in[2] ) * csc_theta;
  360. B0_[2*Ngrid+j] = ( Q[6] * in[0] + Q[7] * in[1] + Q[8] * in[2] ) * csc_theta;
  361. }
  362. }
  363. }
  364. }
  365. Long p_ = p0 + 1;
  366. Long M0 = (p0+1)*(p0+1);
  367. Long M_ = (p_+1)*(p_+1);
  368. Vector<Real> B1(N*M_);
  369. Grid2SHC_(B0, Nt, Np, p_, B1);
  370. Vector<Real> B2(N*M0);
  371. const Complex<Real> imag(0,1);
  372. for (Long i=0; i<N; i+=COORD_DIM) {
  373. for (Long m=0; m<=p0; m++) {
  374. for (Long n=m; n<=p0; n++) {
  375. auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  376. Complex<Real> c;
  377. if (0<=m && m<=n && n<=p) {
  378. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  379. Long idx_imag = idx_real + (p+1-m)*N;
  380. c.real = coeff[idx_real];
  381. if (m) c.imag = coeff[idx_imag];
  382. }
  383. return c;
  384. };
  385. auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  386. if (0<=m && m<=n && n<=p) {
  387. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  388. Long idx_imag = idx_real + (p+1-m)*N;
  389. coeff[idx_real] = c.real;
  390. if (m) coeff[idx_imag] = c.imag;
  391. }
  392. };
  393. auto gr = [&](Long n, Long m) { return read_coeff(B1, i+0, p_, n, m); };
  394. auto gt = [&](Long n, Long m) { return read_coeff(B1, i+1, p_, n, m); };
  395. auto gp = [&](Long n, Long m) { return read_coeff(B1, i+2, p_, n, m); };
  396. Complex<Real> phiY, phiG, phiX;
  397. { // (phiG, phiX) <-- (gt, gp)
  398. auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  399. auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  400. phiY = gr(n,m);
  401. phiG = (gt(n+1,m)*A(n,m) - gt(n-1,m)*B(n,m) - imag*m*gp(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
  402. phiX = (gp(n+1,m)*A(n,m) - gp(n-1,m)*B(n,m) + imag*m*gt(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
  403. }
  404. auto phiV = (phiG * (n + 0) - phiY) * (1/(Real)(2*n + 1));
  405. auto phiW = (phiG * (n + 1) + phiY) * (1/(Real)(2*n + 1));
  406. if (n==0) {
  407. phiW = 0;
  408. phiX = 0;
  409. }
  410. write_coeff(phiV, B2, i+0, p0, n, m);
  411. write_coeff(phiW, B2, i+1, p0, n, m);
  412. write_coeff(phiX, B2, i+2, p0, n, m);
  413. }
  414. }
  415. }
  416. SHCArrange0(B2, p0, S, arrange);
  417. }
  418. template <class Real> void SphericalHarmonics<Real>::VecSHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>& X) {
  419. Vector<Real> B0;
  420. SHCArrange1(S, arrange, p0, B0);
  421. Long p_ = p0 + 1;
  422. Long M0 = (p0+1)*(p0+1);
  423. Long M_ = (p_+1)*(p_+1);
  424. Long N = B0.Dim() / M0;
  425. assert(B0.Dim() == N*M0);
  426. assert(N % COORD_DIM == 0);
  427. Vector<Real> B1(N*M_);
  428. const Complex<Real> imag(0,1);
  429. for (Long i=0; i<N; i+=COORD_DIM) {
  430. for (Long m=0; m<=p_; m++) {
  431. for (Long n=m; n<=p_; n++) {
  432. auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  433. Complex<Real> c;
  434. if (0<=m && m<=n && n<=p) {
  435. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  436. Long idx_imag = idx_real + (p+1-m)*N;
  437. c.real = coeff[idx_real];
  438. if (m) c.imag = coeff[idx_imag];
  439. }
  440. return c;
  441. };
  442. auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  443. if (0<=m && m<=n && n<=p) {
  444. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  445. Long idx_imag = idx_real + (p+1-m)*N;
  446. coeff[idx_real] = c.real;
  447. if (m) coeff[idx_imag] = c.imag;
  448. }
  449. };
  450. auto phiG = [&](Long n, Long m) {
  451. auto phiV = read_coeff(B0, i+0, p0, n, m);
  452. auto phiW = read_coeff(B0, i+1, p0, n, m);
  453. return phiV + phiW;
  454. };
  455. auto phiY = [&](Long n, Long m) {
  456. auto phiV = read_coeff(B0, i+0, p0, n, m);
  457. auto phiW = read_coeff(B0, i+1, p0, n, m);
  458. return phiW * n - phiV * (n + 1);
  459. };
  460. auto phiX = [&](Long n, Long m) {
  461. return read_coeff(B0, i+2, p0, n, m);
  462. };
  463. Complex<Real> gr, gt, gp;
  464. { // (gt, gp) <-- (phiG, phiX)
  465. auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  466. auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  467. gr = phiY(n,m);
  468. gt = phiG(n-1,m)*A(n-1,m) - phiG(n+1,m)*B(n+1,m) - imag*m*phiX(n,m);
  469. gp = phiX(n-1,m)*A(n-1,m) - phiX(n+1,m)*B(n+1,m) + imag*m*phiG(n,m);
  470. }
  471. write_coeff(gr, B1, i+0, p_, n, m);
  472. write_coeff(gt, B1, i+1, p_, n, m);
  473. write_coeff(gp, B1, i+2, p_, n, m);
  474. }
  475. }
  476. }
  477. { // Set X
  478. SHC2Grid_(B1, p_, Nt, Np, &X);
  479. Vector<Real> sin_phi(Np), cos_phi(Np);
  480. for (Long i = 0; i < Np; i++) {
  481. sin_phi[i] = sin(2 * const_pi<Real>() * i / Np);
  482. cos_phi[i] = cos(2 * const_pi<Real>() * i / Np);
  483. }
  484. const auto& Y = LegendreNodes(Nt - 1);
  485. assert(Y.Dim() == Nt);
  486. Long Ngrid = Nt * Np;
  487. for (Long k = 0; k < N; k+=COORD_DIM) {
  488. for (Long i = 0; i < Nt; i++) {
  489. Real sin_theta = sqrt<Real>(1 - Y[i]*Y[i]);
  490. Real cos_theta = Y[i];
  491. Real csc_theta = 1 / sin_theta;
  492. auto X_ = X.begin() + (k*Nt+i)*Np;
  493. for (Long j = 0; j < Np; j++) {
  494. StaticArray<Real,3> in;
  495. in[0] = X_[0*Ngrid+j];
  496. in[1] = X_[1*Ngrid+j] * csc_theta;
  497. in[2] = X_[2*Ngrid+j] * csc_theta;
  498. StaticArray<Real,9> Q;
  499. { // Set Q
  500. Q[0] = sin_theta*cos_phi[j]; Q[1] = sin_theta*sin_phi[j]; Q[2] = cos_theta;
  501. Q[3] = cos_theta*cos_phi[j]; Q[4] = cos_theta*sin_phi[j]; Q[5] =-sin_theta;
  502. Q[6] = -sin_phi[j]; Q[7] = cos_phi[j]; Q[8] = 0;
  503. }
  504. X_[0*Ngrid+j] = ( Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2] );
  505. X_[1*Ngrid+j] = ( Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2] );
  506. X_[2*Ngrid+j] = ( Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2] );
  507. }
  508. }
  509. }
  510. }
  511. }
  512. template <class Real> void SphericalHarmonics<Real>::VecSHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& theta_phi, Vector<Real>& X) {
  513. Long M = (p0+1) * (p0+1);
  514. Long dof;
  515. Matrix<Real> B1;
  516. { // Set B1, dof
  517. Vector<Real> B0;
  518. SHCArrange1(S, arrange, p0, B0);
  519. dof = B0.Dim() / M / COORD_DIM;
  520. assert(B0.Dim() == dof * COORD_DIM * M);
  521. B1.ReInit(dof, COORD_DIM * M);
  522. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  523. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  524. }
  525. assert(B1.Dim(1) == COORD_DIM * M);
  526. assert(B1.Dim(0) == dof);
  527. Matrix<Real> SHBasis;
  528. VecSHBasisEval(p0, theta_phi, SHBasis);
  529. assert(SHBasis.Dim(1) == COORD_DIM * M);
  530. Long N = SHBasis.Dim(0) / COORD_DIM;
  531. { // Set X <-- Q * SHBasis * B1
  532. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  533. for (Long k0 = 0; k0 < N; k0++) {
  534. StaticArray<Real,9> Q;
  535. { // Set Q
  536. Real cos_theta = cos(theta_phi[k0 * 2 + 0]);
  537. Real sin_theta = sin(theta_phi[k0 * 2 + 0]);
  538. Real cos_phi = cos(theta_phi[k0 * 2 + 1]);
  539. Real sin_phi = sin(theta_phi[k0 * 2 + 1]);
  540. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  541. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  542. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  543. }
  544. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * SHBasis * B1
  545. StaticArray<Real,COORD_DIM> in;
  546. for (Long j = 0; j < COORD_DIM; j++) {
  547. in[j] = 0;
  548. for (Long i = 0; i < COORD_DIM * M; i++) {
  549. in[j] += B1[k1][i] * SHBasis[k0 * COORD_DIM + j][i];
  550. }
  551. }
  552. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  553. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  554. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  555. }
  556. }
  557. }
  558. }
  559. template <class Real> void SphericalHarmonics<Real>::StokesEvalSL(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, bool interior, Vector<Real>& X) {
  560. Long M = (p0+1) * (p0+1);
  561. Long dof;
  562. Matrix<Real> B1;
  563. { // Set B1, dof
  564. Vector<Real> B0;
  565. SHCArrange1(S, arrange, p0, B0);
  566. dof = B0.Dim() / M / COORD_DIM;
  567. assert(B0.Dim() == dof * COORD_DIM * M);
  568. B1.ReInit(dof, COORD_DIM * M);
  569. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  570. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  571. }
  572. assert(B1.Dim(1) == COORD_DIM * M);
  573. assert(B1.Dim(0) == dof);
  574. Long N, p_;
  575. Matrix<Real> SHBasis;
  576. Vector<Real> R, theta_phi;
  577. { // Set N, p_, R, SHBasis
  578. p_ = p0 + 1;
  579. Real M_ = (p_+1) * (p_+1);
  580. N = coord.Dim() / COORD_DIM;
  581. assert(coord.Dim() == N * COORD_DIM);
  582. R.ReInit(N);
  583. theta_phi.ReInit(2 * N);
  584. for (Long i = 0; i < N; i++) { // Set R, theta_phi
  585. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  586. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  587. theta_phi[i * 2 + 0] = atan2(sqrt<Real>(x[0]*x[0] + x[1]*x[1]), x[2]);
  588. theta_phi[i * 2 + 1] = atan2(x[1], x[0]);
  589. }
  590. SHBasisEval(p_, theta_phi, SHBasis);
  591. assert(SHBasis.Dim(1) == M_);
  592. assert(SHBasis.Dim(0) == N);
  593. SCTL_UNUSED(M_);
  594. }
  595. Matrix<Real> StokesOp(N * COORD_DIM, COORD_DIM * M);
  596. for (Long i = 0; i < N; i++) { // Set StokesOp
  597. Real cos_theta, sin_theta, csc_theta, cos_phi, sin_phi;
  598. { // Set cos_theta, csc_theta, cos_phi, sin_phi
  599. cos_theta = cos(theta_phi[i * 2 + 0]);
  600. sin_theta = sin(theta_phi[i * 2 + 0]);
  601. csc_theta = 1 / sin_theta;
  602. cos_phi = cos(theta_phi[i * 2 + 1]);
  603. sin_phi = sin(theta_phi[i * 2 + 1]);
  604. }
  605. Complex<Real> imag(0,1), exp_phi(cos_phi, -sin_phi);
  606. const Real radius = R[i];
  607. Vector<Real> rpow;
  608. rpow.ReInit(p0 + 4);
  609. if (interior) {
  610. rpow[0] = 1 / radius;
  611. for (Long ri = 1; ri < p0 + 4; ri++) rpow[ri] = rpow[ri - 1] * radius; // rpow[n] = r^(n-1)
  612. } else {
  613. rpow[0] = 1;
  614. const Real rinv = 1 / radius;
  615. for (Long ri = 1; ri < p0 + 4; ri++) rpow[ri] = rpow[ri - 1] * rinv; // rpow[n] = r^(-n)
  616. }
  617. for (Long m = 0; m <= p0; m++) {
  618. for (Long n = m; n <= p0; n++) {
  619. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  620. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  621. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  622. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  623. if (m) {
  624. idx += (p0+1-m);
  625. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  626. }
  627. }
  628. };
  629. Complex<Real> Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp;
  630. { // Set vector spherical harmonics
  631. auto Y = [&SHBasis,p_,i](Long n, Long m) {
  632. Complex<Real> c;
  633. if (0 <= m && m <= n && n <= p_) {
  634. Long idx = (2 * p_ - m + 2) * m - (m ? p_+1 : 0) + n;
  635. c.real = SHBasis[i][idx];
  636. if (m) {
  637. idx += (p_+1-m);
  638. c.imag = SHBasis[i][idx];
  639. }
  640. }
  641. return c;
  642. };
  643. auto Yt = [exp_phi, &Y, &R, i](Long n, Long m) {
  644. auto A = (0<=n && m<=n ? 0.5 * sqrt<Real>((n+m)*(n-m+1)) * (m-1==0?2.0:1.0) : 0);
  645. auto B = (0<=n && m<=n ? 0.5 * sqrt<Real>((n-m)*(n+m+1)) * (m+1==0?2.0:1.0) : 0);
  646. return (B / exp_phi * Y(n, m + 1) - A * exp_phi * Y(n, m - 1)) / R[i];
  647. };
  648. Complex<Real> Y_1 = Y(n + 0, m);
  649. Complex<Real> Y_1t = Yt(n + 0, m);
  650. Complex<Real> Ycsc_1 = Y_1 * csc_theta;
  651. if (fabs(sin_theta) == 0) {
  652. auto Y_csc0 = [exp_phi, cos_theta](Long n, Long m) {
  653. if (m == 1) return -sqrt<Real>((2*n+1)*n*(n+1)) * ((n%2==0) && (cos_theta<0) ? -1 : 1) * exp_phi;
  654. return Complex<Real>(0, 0);
  655. };
  656. Ycsc_1 = Y_csc0(n + 0, m);
  657. }
  658. auto SetVecSH = [&imag,n,m](Complex<Real>& Vr, Complex<Real>& Vt, Complex<Real>& Vp, Complex<Real>& Wr, Complex<Real>& Wt, Complex<Real>& Wp, Complex<Real>& Xr, Complex<Real>& Xt, Complex<Real>& Xp, const Complex<Real> C0, const Complex<Real> C1, const Complex<Real> C2) {
  659. Vr = C0 * (-n-1);
  660. Vt = C2;
  661. Vp = -imag * m * C1;
  662. Wr = C0 * n;
  663. Wt = C2;
  664. Wp = -imag * m * C1;
  665. Xr = 0;
  666. Xt = imag * m * C1;
  667. Xp = C2;
  668. };
  669. { // Set Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp
  670. auto C0 = Y_1;
  671. auto C1 = Ycsc_1;
  672. auto C2 = Y_1t * R[i];
  673. SetVecSH(Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp, C0, C1, C2);
  674. }
  675. }
  676. Complex<Real> SVr, SVt, SVp;
  677. Complex<Real> SWr, SWt, SWp;
  678. Complex<Real> SXr, SXt, SXp;
  679. if (interior) {
  680. Real a, b;
  681. a = n / (Real)((2 * n + 1) * (2 * n + 3)) * rpow[n + 2];
  682. b = -(n + 1) / (Real)(4 * n + 2) * (rpow[n] - rpow[n + 2]);
  683. SVr = a * Vr + b * Wr;
  684. SVt = a * Vt + b * Wt;
  685. SVp = a * Vp + b * Wp;
  686. a = (n + 1) / (Real)((2 * n + 1) * (2 * n - 1)) * rpow[n];
  687. SWr = a * Wr;
  688. SWt = a * Wt;
  689. SWp = a * Wp;
  690. a = 1 / (Real)(2 * n + 1) * rpow[n + 1];
  691. SXr = a * Xr;
  692. SXt = a * Xt;
  693. SXp = a * Xp;
  694. } else {
  695. Real a, b;
  696. a = n / (Real)((2 * n + 1) * (2 * n + 3)) * rpow[n + 2];
  697. SVr = a * Vr;
  698. SVt = a * Vt;
  699. SVp = a * Vp;
  700. a = (n + 1) / (Real)((2 * n + 1) * (2 * n - 1)) * rpow[n];
  701. b = n / (Real)(4 * n + 2) * (rpow[n + 2] - rpow[n]);
  702. SWr = a * Wr + b * Vr;
  703. SWt = a * Wt + b * Vt;
  704. SWp = a * Wp + b * Vp;
  705. a = 1 / (Real)(2 * n + 1) * rpow[n + 1];
  706. SXr = a * Xr;
  707. SXt = a * Xt;
  708. SXp = a * Xp;
  709. }
  710. write_coeff(SVr, n, m, 0, 0);
  711. write_coeff(SVt, n, m, 0, 1);
  712. write_coeff(SVp, n, m, 0, 2);
  713. write_coeff(SWr, n, m, 1, 0);
  714. write_coeff(SWt, n, m, 1, 1);
  715. write_coeff(SWp, n, m, 1, 2);
  716. write_coeff(SXr, n, m, 2, 0);
  717. write_coeff(SXt, n, m, 2, 1);
  718. write_coeff(SXp, n, m, 2, 2);
  719. }
  720. }
  721. }
  722. { // Set X <-- Q * StokesOp * B1
  723. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  724. for (Long k0 = 0; k0 < N; k0++) {
  725. StaticArray<Real,9> Q;
  726. { // Set Q
  727. Real cos_theta = cos(theta_phi[k0 * 2 + 0]);
  728. Real sin_theta = sin(theta_phi[k0 * 2 + 0]);
  729. Real cos_phi = cos(theta_phi[k0 * 2 + 1]);
  730. Real sin_phi = sin(theta_phi[k0 * 2 + 1]);
  731. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  732. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  733. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  734. }
  735. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  736. StaticArray<Real,COORD_DIM> in;
  737. for (Long j = 0; j < COORD_DIM; j++) {
  738. in[j] = 0;
  739. for (Long i = 0; i < COORD_DIM * M; i++) {
  740. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  741. }
  742. }
  743. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  744. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  745. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  746. }
  747. }
  748. }
  749. }
  750. template <class Real> void SphericalHarmonics<Real>::StokesEvalDL(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, bool interior, Vector<Real>& X) {
  751. Long M = (p0+1) * (p0+1);
  752. Long dof;
  753. Matrix<Real> B1;
  754. { // Set B1, dof
  755. Vector<Real> B0;
  756. SHCArrange1(S, arrange, p0, B0);
  757. dof = B0.Dim() / M / COORD_DIM;
  758. assert(B0.Dim() == dof * COORD_DIM * M);
  759. B1.ReInit(dof, COORD_DIM * M);
  760. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  761. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  762. }
  763. assert(B1.Dim(1) == COORD_DIM * M);
  764. assert(B1.Dim(0) == dof);
  765. Long N, p_;
  766. Matrix<Real> SHBasis;
  767. Vector<Real> R, theta_phi;
  768. { // Set N, p_, R, SHBasis
  769. p_ = p0 + 1;
  770. Real M_ = (p_+1) * (p_+1);
  771. N = coord.Dim() / COORD_DIM;
  772. assert(coord.Dim() == N * COORD_DIM);
  773. R.ReInit(N);
  774. theta_phi.ReInit(2 * N);
  775. for (Long i = 0; i < N; i++) { // Set R, theta_phi
  776. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  777. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  778. theta_phi[i * 2 + 0] = atan2(sqrt<Real>(x[0]*x[0] + x[1]*x[1]), x[2]);
  779. theta_phi[i * 2 + 1] = atan2(x[1], x[0]);
  780. }
  781. SHBasisEval(p_, theta_phi, SHBasis);
  782. assert(SHBasis.Dim(1) == M_);
  783. assert(SHBasis.Dim(0) == N);
  784. SCTL_UNUSED(M_);
  785. }
  786. Matrix<Real> StokesOp(N * COORD_DIM, COORD_DIM * M);
  787. for (Long i = 0; i < N; i++) { // Set StokesOp
  788. Real cos_theta, sin_theta, csc_theta, cos_phi, sin_phi;
  789. { // Set cos_theta, csc_theta, cos_phi, sin_phi
  790. cos_theta = cos(theta_phi[i * 2 + 0]);
  791. sin_theta = sin(theta_phi[i * 2 + 0]);
  792. csc_theta = 1 / sin_theta;
  793. cos_phi = cos(theta_phi[i * 2 + 1]);
  794. sin_phi = sin(theta_phi[i * 2 + 1]);
  795. }
  796. Complex<Real> imag(0,1), exp_phi(cos_phi, -sin_phi);
  797. const Real radius = R[i];
  798. Vector<Real> rpow;
  799. rpow.ReInit(p0 + 4);
  800. if (interior) {
  801. rpow[0] = 1 / radius;
  802. for (Long ri = 1; ri < p0 + 4; ri++) rpow[ri] = rpow[ri - 1] * radius; // rpow[n] = r^(n-1)
  803. } else {
  804. rpow[0] = 1;
  805. const Real rinv = 1 / radius;
  806. for (Long ri = 1; ri < p0 + 4; ri++) rpow[ri] = rpow[ri - 1] * rinv; // rpow[n] = r^(-n)
  807. }
  808. for (Long m = 0; m <= p0; m++) {
  809. for (Long n = m; n <= p0; n++) {
  810. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  811. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  812. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  813. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  814. if (m) {
  815. idx += (p0+1-m);
  816. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  817. }
  818. }
  819. };
  820. Complex<Real> Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp;
  821. { // Set vector spherical harmonics
  822. auto Y = [&SHBasis,p_,i](Long n, Long m) {
  823. Complex<Real> c;
  824. if (0 <= m && m <= n && n <= p_) {
  825. Long idx = (2 * p_ - m + 2) * m - (m ? p_+1 : 0) + n;
  826. c.real = SHBasis[i][idx];
  827. if (m) {
  828. idx += (p_+1-m);
  829. c.imag = SHBasis[i][idx];
  830. }
  831. }
  832. return c;
  833. };
  834. auto Yt = [exp_phi, &Y, &R, i](Long n, Long m) {
  835. auto A = (0<=n && m<=n ? 0.5 * sqrt<Real>((n+m)*(n-m+1)) * (m-1==0?2.0:1.0) : 0);
  836. auto B = (0<=n && m<=n ? 0.5 * sqrt<Real>((n-m)*(n+m+1)) * (m+1==0?2.0:1.0) : 0);
  837. return (B / exp_phi * Y(n, m + 1) - A * exp_phi * Y(n, m - 1)) / R[i];
  838. };
  839. Complex<Real> Y_1 = Y(n + 0, m);
  840. Complex<Real> Y_1t = Yt(n + 0, m);
  841. Complex<Real> Ycsc_1 = Y_1 * csc_theta;
  842. if (fabs(sin_theta) == 0) {
  843. auto Y_csc0 = [exp_phi, cos_theta](Long n, Long m) {
  844. if (m == 1) return -sqrt<Real>((2*n+1)*n*(n+1)) * ((n%2==0) && (cos_theta<0) ? -1 : 1) * exp_phi;
  845. return Complex<Real>(0, 0);
  846. };
  847. Ycsc_1 = Y_csc0(n + 0, m);
  848. }
  849. auto SetVecSH = [&imag,n,m](Complex<Real>& Vr, Complex<Real>& Vt, Complex<Real>& Vp, Complex<Real>& Wr, Complex<Real>& Wt, Complex<Real>& Wp, Complex<Real>& Xr, Complex<Real>& Xt, Complex<Real>& Xp, const Complex<Real> C0, const Complex<Real> C1, const Complex<Real> C2) {
  850. Vr = C0 * (-n-1);
  851. Vt = C2;
  852. Vp = -imag * m * C1;
  853. Wr = C0 * n;
  854. Wt = C2;
  855. Wp = -imag * m * C1;
  856. Xr = 0;
  857. Xt = imag * m * C1;
  858. Xp = C2;
  859. };
  860. { // Set Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp
  861. auto C0 = Y_1;
  862. auto C1 = Ycsc_1;
  863. auto C2 = Y_1t * R[i];
  864. SetVecSH(Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp, C0, C1, C2);
  865. }
  866. }
  867. Complex<Real> SVr, SVt, SVp;
  868. Complex<Real> SWr, SWt, SWp;
  869. Complex<Real> SXr, SXt, SXp;
  870. if (interior) {
  871. Real a, b;
  872. a = -2 * n * (n + 2) / (Real)((2 * n + 1) * (2 * n + 3)) * rpow[n + 2]; // pow<Real>(R[i], n+1);
  873. b = -(n + 1) * (n + 2) / (Real)(2 * n + 1) * (rpow[n + 2] - rpow[n]); //(pow<Real>(R[i], n+1) - pow<Real>(R[i], n-1));
  874. SVr = a * Vr + b * Wr;
  875. SVt = a * Vt + b * Wt;
  876. SVp = a * Vp + b * Wp;
  877. a = -(2 * n * n + 1) / (Real)((2 * n + 1) * (2 * n - 1)) * rpow[n]; // pow<Real>(R[i], n-1);
  878. SWr = a * Wr;
  879. SWt = a * Wt;
  880. SWp = a * Wp;
  881. a = -(n + 2) / (Real)(2 * n + 1) * rpow[n + 1]; // pow<Real>(R[i], n);
  882. SXr = a * Xr;
  883. SXt = a * Xt;
  884. SXp = a * Xp;
  885. } else {
  886. Real a, b;
  887. a = (2 * n * n + 4 * n + 3) / (Real)((2 * n + 1) * (2 * n + 3)) * rpow[n + 2]; // pow<Real>(R[i], -n-2);
  888. SVr = a * Vr;
  889. SVt = a * Vt;
  890. SVp = a * Vp;
  891. a = 2 * (n + 1) * (n - 1) / (Real)((2 * n + 1) * (2 * n - 1)) * rpow[n]; // pow<Real>(R[i], -n);
  892. b = 2 * n * (n - 1) / (Real)(4 * n + 2) * (rpow[n + 2] - rpow[n]); // (pow<Real>(R[i], -n-2) - pow<Real>(R[i], -n));
  893. SWr = a * Wr + b * Vr;
  894. SWt = a * Wt + b * Vt;
  895. SWp = a * Wp + b * Vp;
  896. a = (n - 1) / (Real)(2 * n + 1) * rpow[n + 1]; // pow<Real>(R[i], -n-1);
  897. SXr = a * Xr;
  898. SXt = a * Xt;
  899. SXp = a * Xp;
  900. }
  901. write_coeff(SVr, n, m, 0, 0);
  902. write_coeff(SVt, n, m, 0, 1);
  903. write_coeff(SVp, n, m, 0, 2);
  904. write_coeff(SWr, n, m, 1, 0);
  905. write_coeff(SWt, n, m, 1, 1);
  906. write_coeff(SWp, n, m, 1, 2);
  907. write_coeff(SXr, n, m, 2, 0);
  908. write_coeff(SXt, n, m, 2, 1);
  909. write_coeff(SXp, n, m, 2, 2);
  910. }
  911. }
  912. }
  913. { // Set X <-- Q * StokesOp * B1
  914. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  915. for (Long k0 = 0; k0 < N; k0++) {
  916. StaticArray<Real,9> Q;
  917. { // Set Q
  918. Real cos_theta = cos(theta_phi[k0 * 2 + 0]);
  919. Real sin_theta = sin(theta_phi[k0 * 2 + 0]);
  920. Real cos_phi = cos(theta_phi[k0 * 2 + 1]);
  921. Real sin_phi = sin(theta_phi[k0 * 2 + 1]);
  922. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  923. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  924. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  925. }
  926. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  927. StaticArray<Real,COORD_DIM> in;
  928. for (Long j = 0; j < COORD_DIM; j++) {
  929. in[j] = 0;
  930. for (Long i = 0; i < COORD_DIM * M; i++) {
  931. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  932. }
  933. }
  934. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  935. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  936. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  937. }
  938. }
  939. }
  940. }
  941. template <class Real> void SphericalHarmonics<Real>::StokesEvalKL(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, const Vector<Real>& norm, bool interior, Vector<Real>& X) {
  942. Long M = (p0+1) * (p0+1);
  943. Long dof;
  944. Matrix<Real> B1;
  945. { // Set B1, dof
  946. Vector<Real> B0;
  947. SHCArrange1(S, arrange, p0, B0);
  948. dof = B0.Dim() / M / COORD_DIM;
  949. assert(B0.Dim() == dof * COORD_DIM * M);
  950. B1.ReInit(dof, COORD_DIM * M);
  951. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  952. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  953. }
  954. assert(B1.Dim(1) == COORD_DIM * M);
  955. assert(B1.Dim(0) == dof);
  956. Long N, p_;
  957. Matrix<Real> SHBasis;
  958. Vector<Real> R, theta_phi;
  959. { // Set N, p_, R, SHBasis
  960. p_ = p0 + 2;
  961. Real M_ = (p_+1) * (p_+1);
  962. N = coord.Dim() / COORD_DIM;
  963. assert(coord.Dim() == N * COORD_DIM);
  964. R.ReInit(N);
  965. theta_phi.ReInit(2 * N);
  966. for (Long i = 0; i < N; i++) { // Set R, theta_phi
  967. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  968. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  969. theta_phi[i * 2 + 0] = atan2(sqrt<Real>(x[0]*x[0] + x[1]*x[1]) + 1e-50, x[2]);
  970. theta_phi[i * 2 + 1] = atan2(x[1], x[0]);
  971. }
  972. SHBasisEval(p_, theta_phi, SHBasis);
  973. assert(SHBasis.Dim(1) == M_);
  974. assert(SHBasis.Dim(0) == N);
  975. SCTL_UNUSED(M_);
  976. }
  977. Matrix<Real> StokesOp(N * COORD_DIM, COORD_DIM * M);
  978. for (Long i = 0; i < N; i++) { // Set StokesOp
  979. Real cos_theta, sin_theta, csc_theta, cot_theta, cos_phi, sin_phi;
  980. { // Set cos_theta, sin_theta, cos_phi, sin_phi
  981. cos_theta = cos(theta_phi[i * 2 + 0]);
  982. sin_theta = sin(theta_phi[i * 2 + 0]);
  983. csc_theta = 1 / sin_theta;
  984. cot_theta = cos_theta * csc_theta;
  985. cos_phi = cos(theta_phi[i * 2 + 1]);
  986. sin_phi = sin(theta_phi[i * 2 + 1]);
  987. }
  988. Complex<Real> imag(0,1), exp_phi(cos_phi, -sin_phi);
  989. const Real radius = R[i];
  990. Vector<Real> rpow;
  991. rpow.ReInit(p0 + 4);
  992. if (interior) {
  993. rpow[0] = 1 / (radius * radius);
  994. for (Long ri = 1; ri < p0 + 4; ri++) rpow[ri] = rpow[ri - 1] * radius; // rpow[n] = r^(n-2)
  995. } else {
  996. rpow[0] = 1;
  997. const Real rinv = 1 / radius;
  998. for (Long ri = 1; ri < p0 + 4; ri++) rpow[ri] = rpow[ri - 1] * rinv; // rpow[n] = r^(-n)
  999. }
  1000. StaticArray<Real, COORD_DIM> norm0;
  1001. { // Set norm0 <-- Q^t * norm
  1002. StaticArray<Real,9> Q;
  1003. { // Set Q
  1004. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  1005. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  1006. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  1007. }
  1008. StaticArray<Real,COORD_DIM> in;
  1009. in[0] = norm[i * COORD_DIM + 0];
  1010. in[1] = norm[i * COORD_DIM + 1];
  1011. in[2] = norm[i * COORD_DIM + 2];
  1012. norm0[0] = Q[0] * in[0] + Q[1] * in[1] + Q[2] * in[2];
  1013. norm0[1] = Q[3] * in[0] + Q[4] * in[1] + Q[5] * in[2];
  1014. norm0[2] = Q[6] * in[0] + Q[7] * in[1] + Q[8] * in[2];
  1015. }
  1016. for (Long m = 0; m <= p0; m++) {
  1017. for (Long n = m; n <= p0; n++) {
  1018. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  1019. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  1020. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  1021. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  1022. if (m) {
  1023. idx += (p0+1-m);
  1024. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  1025. }
  1026. }
  1027. };
  1028. Complex<Real> Ynm;
  1029. Complex<Real> Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp;
  1030. Complex<Real> Vr_t, Vt_t, Vp_t, Wr_t, Wt_t, Wp_t, Xr_t, Xt_t, Xp_t;
  1031. Complex<Real> Vr_p, Vt_p, Vp_p, Wr_p, Wt_p, Wp_p, Xr_p, Xt_p, Xp_p;
  1032. { // Set vector spherical harmonics
  1033. auto Y = [&SHBasis,p_,i](Long n, Long m) {
  1034. Complex<Real> c;
  1035. if (0 <= m && m <= n && n <= p_) {
  1036. Long idx = (2 * p_ - m + 2) * m - (m ? p_+1 : 0) + n;
  1037. c.real = SHBasis[i][idx];
  1038. if (m) {
  1039. idx += (p_+1-m);
  1040. c.imag = SHBasis[i][idx];
  1041. }
  1042. }
  1043. return c;
  1044. };
  1045. auto Yt = [exp_phi, &Y, &R, i](Long n, Long m) {
  1046. auto A = (0<=n && m<=n ? 0.5 * sqrt<Real>((n+m)*(n-m+1)) * (m-1==0?2.0:1.0) : 0);
  1047. auto B = (0<=n && m<=n ? 0.5 * sqrt<Real>((n-m)*(n+m+1)) * (m+1==0?2.0:1.0) : 0);
  1048. return (B / exp_phi * Y(n, m + 1) - A * exp_phi * Y(n, m - 1)) / R[i];
  1049. };
  1050. auto Yp = [&Y, &imag, &R, i, csc_theta](Long n, Long m) {
  1051. return imag * m * Y(n, m) * csc_theta / R[i];
  1052. };
  1053. auto Ypt = [&Yt, &imag](Long n, Long m) {
  1054. return imag * m * Yt(n, m);
  1055. };
  1056. auto Ytt = [exp_phi, &Yt](Long n, Long m) {
  1057. auto A = (0<=n && m<=n ? 0.5 * sqrt<Real>((n+m)*(n-m+1)) * (m-1==0?2.0:1.0) : 0);
  1058. auto B = (0<=n && m<=n ? 0.5 * sqrt<Real>((n-m)*(n+m+1)) * (m+1==0?2.0:1.0) : 0);
  1059. return (n==0 ? 0 : (B / exp_phi * Yt(n, m + 1) - A * exp_phi * Yt(n, m - 1)));
  1060. };
  1061. Complex<Real> Y_1 = Y(n + 0, m);
  1062. Complex<Real> Y_0t = Yt(n - 1, m);
  1063. Complex<Real> Y_1t = Yt(n + 0, m);
  1064. Complex<Real> Y_2t = Yt(n + 1, m);
  1065. //Complex<Real> Y_0p = Yp(n - 1, m);
  1066. Complex<Real> Y_1p = Yp(n + 0, m);
  1067. //Complex<Real> Y_2p = Yp(n + 1, m);
  1068. auto Anm = (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0);
  1069. auto Bnm = (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0);
  1070. auto SetVecSH = [&imag,n,m](Complex<Real>& Vr, Complex<Real>& Vt, Complex<Real>& Vp, Complex<Real>& Wr, Complex<Real>& Wt, Complex<Real>& Wp, Complex<Real>& Xr, Complex<Real>& Xt, Complex<Real>& Xp, const Complex<Real> C0, const Complex<Real> C1, const Complex<Real> C2) {
  1071. Vr = C0 * (-n-1);
  1072. Vt = C2;
  1073. Vp = -imag * m * C1;
  1074. Wr = C0 * n;
  1075. Wt = C2;
  1076. Wp = -imag * m * C1;
  1077. Xr = 0;
  1078. Xt = imag * m * C1;
  1079. Xp = C2;
  1080. };
  1081. { // Set Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp
  1082. auto C0 = Y_1;
  1083. auto C1 = Y_1 * csc_theta;
  1084. auto C2 = Yt(n,m) * R[i];
  1085. SetVecSH(Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp, C0, C1, C2);
  1086. }
  1087. { // Set Vr_t, Vt_t, Vp_t, Wr_t, Wt_t, Wp_t, Xr_t, Xt_t, Xp_t
  1088. auto C0 = Y_1t;
  1089. auto C1 = (Y_1t - Y_1 * cot_theta / R[i]) * csc_theta;
  1090. if (fabs(cos_theta) == 1 && m == 1) C1 = 0; ///////////// TODO
  1091. auto C2 = Ytt(n,m);
  1092. if (!m) C2 = (Anm * Y_2t - Bnm * Y_0t) * csc_theta - Y_1t * cot_theta; ///////////// TODO
  1093. SetVecSH(Vr_t, Vt_t, Vp_t, Wr_t, Wt_t, Wp_t, Xr_t, Xt_t, Xp_t, C0, C1, C2);
  1094. Vr_t += (-Vt) / R[i];
  1095. Vt_t += ( Vr) / R[i];
  1096. Wr_t += (-Wt) / R[i];
  1097. Wt_t += ( Wr) / R[i];
  1098. Xr_t += (-Xt) / R[i];
  1099. Xt_t += ( Xr) / R[i];
  1100. }
  1101. { // Set Vr_p, Vt_p, Vp_p, Wr_p, Wt_p, Wp_p, Xr_p, Xt_p, Xp_p
  1102. auto C0 = -Y_1p;
  1103. auto C1 = -Y_1p * csc_theta;
  1104. auto C2 = -Ypt(n, m) * csc_theta;
  1105. //auto C2 = -(Anm * Y_2p - Bnm * Y_0p) * csc_theta;
  1106. SetVecSH(Vr_p, Vt_p, Vp_p, Wr_p, Wt_p, Wp_p, Xr_p, Xt_p, Xp_p, C0, C1, C2);
  1107. Vr_p += (-sin_theta * Vp ) * csc_theta / R[i];
  1108. Vt_p += (-cos_theta * Vp ) * csc_theta / R[i];
  1109. Vp_p += ( sin_theta * Vr + cos_theta * Vt) * csc_theta / R[i];
  1110. Wr_p += (-sin_theta * Wp ) * csc_theta / R[i];
  1111. Wt_p += (-cos_theta * Wp ) * csc_theta / R[i];
  1112. Wp_p += ( sin_theta * Wr + cos_theta * Wt) * csc_theta / R[i];
  1113. Xr_p += (-sin_theta * Xp ) * csc_theta / R[i];
  1114. Xt_p += (-cos_theta * Xp ) * csc_theta / R[i];
  1115. Xp_p += ( sin_theta * Xr + cos_theta * Xt) * csc_theta / R[i];
  1116. if (fabs(cos_theta) == 1 && m == 1) {
  1117. Vt_p = 0;
  1118. Vp_p = 0;
  1119. Wt_p = 0;
  1120. Wp_p = 0;
  1121. Xt_p = 0;
  1122. Xp_p = 0;
  1123. }
  1124. }
  1125. Ynm = Y_1;
  1126. }
  1127. if (fabs(cos_theta) == 1) {
  1128. if (m!=0) Vr = 0;
  1129. if (m!=1) Vt = 0;
  1130. if (m!=1) Vp = 0;
  1131. if (m!=0) Wr = 0;
  1132. if (m!=1) Wt = 0;
  1133. if (m!=1) Wp = 0;
  1134. Xr = 0;
  1135. if (m!=1) Xt = 0;
  1136. if (m!=1) Xp = 0;
  1137. if (m!=1 ) Vr_t = 0;
  1138. if (m!=0 && m!=2) Vt_t = 0;
  1139. if (m!=2 ) Vp_t = 0;
  1140. if (m!=1 ) Wr_t = 0;
  1141. if (m!=0 && m!=2) Wt_t = 0;
  1142. if (m!=2 ) Wp_t = 0;
  1143. if (m!=1 ) Xr_t = 0;
  1144. if (m!=2 ) Xt_t = 0;
  1145. if (m!=0 && m!=2) Xp_t = 0;
  1146. if (m!=1 ) Vr_p = 0;
  1147. if (m!=2 ) Vt_p = 0;
  1148. if (m!=0 && m!=2) Vp_p = 0;
  1149. if (m!=1 ) Wr_p = 0;
  1150. if (m!=2 ) Wt_p = 0;
  1151. if (m!=0 && m!=2) Wp_p = 0;
  1152. if (m!=1 ) Xr_p = 0;
  1153. if (m!=0 && m!=2) Xt_p = 0;
  1154. if (m!=2 ) Xp_p = 0;
  1155. }
  1156. Complex<Real> PV, PW, PX;
  1157. Complex<Real> SV[COORD_DIM][COORD_DIM];
  1158. Complex<Real> SW[COORD_DIM][COORD_DIM];
  1159. Complex<Real> SX[COORD_DIM][COORD_DIM];
  1160. if (interior) {
  1161. PV = (n + 1) * Ynm * rpow[n + 2];
  1162. PW = 0;
  1163. PX = 0;
  1164. Real a, b;
  1165. Real a_r, b_r;
  1166. a = n / (Real)((2 * n + 1) * (2 * n + 3)) * rpow[n + 3]; // pow<Real>(R[i], n+1);
  1167. b = -(n + 1) / (Real)(4 * n + 2) * (rpow[n + 1] - rpow[n + 3]); // (pow<Real>(R[i], n-1) - pow<Real>(R[i], n+1));
  1168. a_r = n / (Real)((2 * n + 1) * (2 * n + 3)) * (n + 1) * rpow[n + 2]; // pow<Real>(R[i], n);
  1169. b_r = -(n + 1) / (Real)(4 * n + 2) * ((n - 1) * rpow[n] - (n + 1) * rpow[n + 2]); // ((n-1) * pow<Real>(R[i], n-2) - (n+1) * pow<Real>(R[i], n));
  1170. SV[0][0] = a_r * Vr + b_r * Wr;
  1171. SV[1][0] = a_r * Vt + b_r * Wt;
  1172. SV[2][0] = a_r * Vp + b_r * Wp;
  1173. SV[0][1] = a * Vr_t + b * Wr_t;
  1174. SV[1][1] = a * Vt_t + b * Wt_t;
  1175. SV[2][1] = a * Vp_t + b * Wp_t;
  1176. SV[0][2] = a * Vr_p + b * Wr_p;
  1177. SV[1][2] = a * Vt_p + b * Wt_p;
  1178. SV[2][2] = a * Vp_p + b * Wp_p;
  1179. a = (n + 1) / (Real)((2 * n + 1) * (2 * n - 1)) * rpow[n + 1]; // pow<Real>(R[i], n-1);
  1180. a_r = (n + 1) / (Real)((2 * n + 1) * (2 * n - 1)) * (n - 1) * rpow[n]; // pow<Real>(R[i], n-2);
  1181. SW[0][0] = a_r * Wr;
  1182. SW[1][0] = a_r * Wt;
  1183. SW[2][0] = a_r * Wp;
  1184. SW[0][1] = a * Wr_t;
  1185. SW[1][1] = a * Wt_t;
  1186. SW[2][1] = a * Wp_t;
  1187. SW[0][2] = a * Wr_p;
  1188. SW[1][2] = a * Wt_p;
  1189. SW[2][2] = a * Wp_p;
  1190. a = 1 / (Real)(2 * n + 1) * rpow[n + 2]; // pow<Real>(R[i], n);
  1191. a_r = 1 / (Real)(2 * n + 1) * (n)*rpow[n + 1]; // pow<Real>(R[i], n-1);
  1192. SX[0][0] = a_r * Xr;
  1193. SX[1][0] = a_r * Xt;
  1194. SX[2][0] = a_r * Xp;
  1195. SX[0][1] = a * Xr_t;
  1196. SX[1][1] = a * Xt_t;
  1197. SX[2][1] = a * Xp_t;
  1198. SX[0][2] = a * Xr_p;
  1199. SX[1][2] = a * Xt_p;
  1200. SX[2][2] = a * Xp_p;
  1201. } else {
  1202. PV = 0;
  1203. PW = n * Ynm * rpow[n + 1];
  1204. PX = 0;
  1205. Real a, b;
  1206. Real a_r, b_r;
  1207. a = n / (Real)((2 * n + 1) * (2 * n + 3)) * rpow[n + 2]; // pow<Real>(R[i], -n-2);
  1208. a_r = n / (Real)((2 * n + 1) * (2 * n + 3)) * (-n - 2) * rpow[n + 3]; // pow<Real>(R[i], -n-3);
  1209. SV[0][0] = a_r * Vr;
  1210. SV[1][0] = a_r * Vt;
  1211. SV[2][0] = a_r * Vp;
  1212. SV[0][1] = a * Vr_t;
  1213. SV[1][1] = a * Vt_t;
  1214. SV[2][1] = a * Vp_t;
  1215. SV[0][2] = a * Vr_p;
  1216. SV[1][2] = a * Vt_p;
  1217. SV[2][2] = a * Vp_p;
  1218. a = (n + 1) / (Real)((2 * n + 1) * (2 * n - 1)) * rpow[n]; // pow<Real>(R[i], -n);
  1219. b = n / (Real)(4 * n + 2) * (rpow[n + 2] - rpow[n]); //(pow<Real>(R[i], -n-2) - pow<Real>(R[i], -n));
  1220. a_r = (n + 1) / (Real)((2 * n + 1) * (2 * n - 1)) * (-n) * rpow[n + 1]; // pow<Real>(R[i], -n-1);
  1221. b_r = n / (Real)(4 * n + 2) * ((-n - 2) * rpow[n + 3] - (-n) * rpow[n + 1]); // ((-n-2)*pow<Real>(R[i], -n-3) - (-n)*pow<Real>(R[i], -n-1));
  1222. SW[0][0] = a_r * Wr + b_r * Vr;
  1223. SW[1][0] = a_r * Wt + b_r * Vt;
  1224. SW[2][0] = a_r * Wp + b_r * Vp;
  1225. SW[0][1] = a * Wr_t + b * Vr_t;
  1226. SW[1][1] = a * Wt_t + b * Vt_t;
  1227. SW[2][1] = a * Wp_t + b * Vp_t;
  1228. SW[0][2] = a * Wr_p + b * Vr_p;
  1229. SW[1][2] = a * Wt_p + b * Vt_p;
  1230. SW[2][2] = a * Wp_p + b * Vp_p;
  1231. a = 1 / (Real)(2 * n + 1) * rpow[n + 1]; // pow<Real>(R[i], -n-1);
  1232. a_r = 1 / (Real)(2 * n + 1) * (-n - 1) * rpow[n + 2]; // pow<Real>(R[i], -n-2);
  1233. SX[0][0] = a_r * Xr;
  1234. SX[1][0] = a_r * Xt;
  1235. SX[2][0] = a_r * Xp;
  1236. SX[0][1] = a * Xr_t;
  1237. SX[1][1] = a * Xt_t;
  1238. SX[2][1] = a * Xp_t;
  1239. SX[0][2] = a * Xr_p;
  1240. SX[1][2] = a * Xt_p;
  1241. SX[2][2] = a * Xp_p;
  1242. }
  1243. Complex<Real> KV[COORD_DIM][COORD_DIM], KW[COORD_DIM][COORD_DIM], KX[COORD_DIM][COORD_DIM];
  1244. KV[0][0] = SV[0][0] + SV[0][0] - PV; KV[0][1] = SV[0][1] + SV[1][0] ; KV[0][2] = SV[0][2] + SV[2][0] ;
  1245. KV[1][0] = SV[1][0] + SV[0][1] ; KV[1][1] = SV[1][1] + SV[1][1] - PV; KV[1][2] = SV[1][2] + SV[2][1] ;
  1246. KV[2][0] = SV[2][0] + SV[0][2] ; KV[2][1] = SV[2][1] + SV[1][2] ; KV[2][2] = SV[2][2] + SV[2][2] - PV;
  1247. KW[0][0] = SW[0][0] + SW[0][0] - PW; KW[0][1] = SW[0][1] + SW[1][0] ; KW[0][2] = SW[0][2] + SW[2][0] ;
  1248. KW[1][0] = SW[1][0] + SW[0][1] ; KW[1][1] = SW[1][1] + SW[1][1] - PW; KW[1][2] = SW[1][2] + SW[2][1] ;
  1249. KW[2][0] = SW[2][0] + SW[0][2] ; KW[2][1] = SW[2][1] + SW[1][2] ; KW[2][2] = SW[2][2] + SW[2][2] - PW;
  1250. KX[0][0] = SX[0][0] + SX[0][0] - PX; KX[0][1] = SX[0][1] + SX[1][0] ; KX[0][2] = SX[0][2] + SX[2][0] ;
  1251. KX[1][0] = SX[1][0] + SX[0][1] ; KX[1][1] = SX[1][1] + SX[1][1] - PX; KX[1][2] = SX[1][2] + SX[2][1] ;
  1252. KX[2][0] = SX[2][0] + SX[0][2] ; KX[2][1] = SX[2][1] + SX[1][2] ; KX[2][2] = SX[2][2] + SX[2][2] - PX;
  1253. write_coeff(KV[0][0]*norm0[0] + KV[0][1]*norm0[1] + KV[0][2]*norm0[2], n, m, 0, 0);
  1254. write_coeff(KV[1][0]*norm0[0] + KV[1][1]*norm0[1] + KV[1][2]*norm0[2], n, m, 0, 1);
  1255. write_coeff(KV[2][0]*norm0[0] + KV[2][1]*norm0[1] + KV[2][2]*norm0[2], n, m, 0, 2);
  1256. write_coeff(KW[0][0]*norm0[0] + KW[0][1]*norm0[1] + KW[0][2]*norm0[2], n, m, 1, 0);
  1257. write_coeff(KW[1][0]*norm0[0] + KW[1][1]*norm0[1] + KW[1][2]*norm0[2], n, m, 1, 1);
  1258. write_coeff(KW[2][0]*norm0[0] + KW[2][1]*norm0[1] + KW[2][2]*norm0[2], n, m, 1, 2);
  1259. write_coeff(KX[0][0]*norm0[0] + KX[0][1]*norm0[1] + KX[0][2]*norm0[2], n, m, 2, 0);
  1260. write_coeff(KX[1][0]*norm0[0] + KX[1][1]*norm0[1] + KX[1][2]*norm0[2], n, m, 2, 1);
  1261. write_coeff(KX[2][0]*norm0[0] + KX[2][1]*norm0[1] + KX[2][2]*norm0[2], n, m, 2, 2);
  1262. }
  1263. }
  1264. }
  1265. { // Set X <-- Q * StokesOp * B1
  1266. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  1267. for (Long k0 = 0; k0 < N; k0++) {
  1268. StaticArray<Real,9> Q;
  1269. { // Set Q
  1270. Real cos_theta = cos(theta_phi[k0 * 2 + 0]);
  1271. Real sin_theta = sin(theta_phi[k0 * 2 + 0]);
  1272. Real cos_phi = cos(theta_phi[k0 * 2 + 1]);
  1273. Real sin_phi = sin(theta_phi[k0 * 2 + 1]);
  1274. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  1275. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  1276. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  1277. }
  1278. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  1279. StaticArray<Real,COORD_DIM> in;
  1280. for (Long j = 0; j < COORD_DIM; j++) {
  1281. in[j] = 0;
  1282. for (Long i = 0; i < COORD_DIM * M; i++) {
  1283. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  1284. }
  1285. }
  1286. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  1287. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  1288. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  1289. }
  1290. }
  1291. }
  1292. }
  1293. template <class Real> void SphericalHarmonics<Real>::StokesEvalKSelf(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, bool interior, Vector<Real>& X) {
  1294. Long M = (p0+1) * (p0+1);
  1295. Long dof;
  1296. Matrix<Real> B1;
  1297. { // Set B1, dof
  1298. Vector<Real> B0;
  1299. SHCArrange1(S, arrange, p0, B0);
  1300. dof = B0.Dim() / M / COORD_DIM;
  1301. assert(B0.Dim() == dof * COORD_DIM * M);
  1302. B1.ReInit(dof, COORD_DIM * M);
  1303. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  1304. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  1305. }
  1306. assert(B1.Dim(1) == COORD_DIM * M);
  1307. assert(B1.Dim(0) == dof);
  1308. Long N, p_;
  1309. Matrix<Real> SHBasis;
  1310. Vector<Real> R, theta_phi;
  1311. { // Set N, p_, R, SHBasis
  1312. p_ = p0 + 1;
  1313. Real M_ = (p_+1) * (p_+1);
  1314. N = coord.Dim() / COORD_DIM;
  1315. assert(coord.Dim() == N * COORD_DIM);
  1316. R.ReInit(N);
  1317. theta_phi.ReInit(2 * N);
  1318. for (Long i = 0; i < N; i++) { // Set R, theta_phi
  1319. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  1320. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  1321. theta_phi[i * 2 + 0] = atan2(sqrt<Real>(x[0]*x[0] + x[1]*x[1]), x[2]);
  1322. theta_phi[i * 2 + 1] = atan2(x[1], x[0]);
  1323. }
  1324. SHBasisEval(p_, theta_phi, SHBasis);
  1325. assert(SHBasis.Dim(1) == M_);
  1326. assert(SHBasis.Dim(0) == N);
  1327. SCTL_UNUSED(M_);
  1328. }
  1329. Matrix<Real> StokesOp(N * COORD_DIM, COORD_DIM * M);
  1330. for (Long i = 0; i < N; i++) { // Set StokesOp
  1331. Real cos_theta, sin_theta, csc_theta, cos_phi, sin_phi;
  1332. { // Set cos_theta, csc_theta, cos_phi, sin_phi
  1333. cos_theta = cos(theta_phi[i * 2 + 0]);
  1334. sin_theta = sin(theta_phi[i * 2 + 0]);
  1335. csc_theta = 1 / sin_theta;
  1336. cos_phi = cos(theta_phi[i * 2 + 1]);
  1337. sin_phi = sin(theta_phi[i * 2 + 1]);
  1338. }
  1339. Complex<Real> imag(0,1), exp_phi(cos_phi, -sin_phi);
  1340. const Real radius = R[i];
  1341. Vector<Real> rpow;
  1342. rpow.ReInit(p0 + 4);
  1343. if (interior) {
  1344. rpow[0] = 1 / (radius * radius);
  1345. for (Long ri = 1; ri < p0 + 4; ri++) rpow[ri] = rpow[ri - 1] * radius; // rpow[n] = r^(n-2)
  1346. } else {
  1347. rpow[0] = 1;
  1348. const Real rinv = 1 / radius;
  1349. for (Long ri = 1; ri < p0 + 4; ri++) rpow[ri] = rpow[ri - 1] * rinv; // rpow[n] = r^(-n)
  1350. }
  1351. for (Long m = 0; m <= p0; m++) {
  1352. for (Long n = m; n <= p0; n++) {
  1353. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  1354. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  1355. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  1356. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  1357. if (m) {
  1358. idx += (p0+1-m);
  1359. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  1360. }
  1361. }
  1362. };
  1363. Complex<Real> Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp;
  1364. { // Set vector spherical harmonics
  1365. auto Y = [&SHBasis,p_,i](Long n, Long m) {
  1366. Complex<Real> c;
  1367. if (0 <= m && m <= n && n <= p_) {
  1368. Long idx = (2 * p_ - m + 2) * m - (m ? p_+1 : 0) + n;
  1369. c.real = SHBasis[i][idx];
  1370. if (m) {
  1371. idx += (p_+1-m);
  1372. c.imag = SHBasis[i][idx];
  1373. }
  1374. }
  1375. return c;
  1376. };
  1377. auto Yt = [exp_phi, &Y, &R, i](Long n, Long m) {
  1378. auto A = (0<=n && m<=n ? 0.5 * sqrt<Real>((n+m)*(n-m+1)) * (m-1==0?2.0:1.0) : 0);
  1379. auto B = (0<=n && m<=n ? 0.5 * sqrt<Real>((n-m)*(n+m+1)) * (m+1==0?2.0:1.0) : 0);
  1380. return (B / exp_phi * Y(n, m + 1) - A * exp_phi * Y(n, m - 1)) / R[i];
  1381. };
  1382. Complex<Real> Y_1 = Y(n + 0, m);
  1383. Complex<Real> Y_1t = Yt(n + 0, m);
  1384. Complex<Real> Ycsc_1 = Y_1 * csc_theta;
  1385. if (fabs(sin_theta) == 0) {
  1386. auto Y_csc0 = [exp_phi, cos_theta](Long n, Long m) {
  1387. if (m == 1) return -sqrt<Real>((2*n+1)*n*(n+1)) * ((n%2==0) && (cos_theta<0) ? -1 : 1) * exp_phi;
  1388. return Complex<Real>(0, 0);
  1389. };
  1390. Ycsc_1 = Y_csc0(n + 0, m);
  1391. }
  1392. auto SetVecSH = [&imag,n,m](Complex<Real>& Vr, Complex<Real>& Vt, Complex<Real>& Vp, Complex<Real>& Wr, Complex<Real>& Wt, Complex<Real>& Wp, Complex<Real>& Xr, Complex<Real>& Xt, Complex<Real>& Xp, const Complex<Real> C0, const Complex<Real> C1, const Complex<Real> C2) {
  1393. Vr = C0 * (-n-1);
  1394. Vt = C2;
  1395. Vp = -imag * m * C1;
  1396. Wr = C0 * n;
  1397. Wt = C2;
  1398. Wp = -imag * m * C1;
  1399. Xr = 0;
  1400. Xt = imag * m * C1;
  1401. Xp = C2;
  1402. };
  1403. { // Set Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp
  1404. auto C0 = Y_1;
  1405. auto C1 = Ycsc_1;
  1406. auto C2 = Y_1t * R[i];
  1407. SetVecSH(Vr, Vt, Vp, Wr, Wt, Wp, Xr, Xt, Xp, C0, C1, C2);
  1408. }
  1409. }
  1410. Complex<Real> SVr, SVt, SVp;
  1411. Complex<Real> SWr, SWt, SWp;
  1412. Complex<Real> SXr, SXt, SXp;
  1413. if (interior) {
  1414. Real a, b;
  1415. a = ((2 * n * n + 4 * n + 3) / (Real)((2 * n + 1) * (2 * n + 3))) * rpow[n + 2]; // pow<Real>(R[i], n);
  1416. b = ((n + 1) * (n - 1) / (Real)(2 * n + 1)) * (rpow[n + 2] - rpow[n]); //(pow<Real>(R[i], n) - pow<Real>(R[i], n - 2));
  1417. SVr = a * Vr + b * Wr;
  1418. SVt = a * Vt + b * Wt;
  1419. SVp = a * Vp + b * Wp;
  1420. a = (2 * (n + 1) * (n - 1) / (Real)((2 * n + 1) * (2 * n - 1))) * rpow[n]; // * pow<Real>(R[i], n - 2);
  1421. SWr = a * Wr;
  1422. SWt = a * Wt;
  1423. SWp = a * Wp;
  1424. a = ((n - 1) / (Real)(2 * n + 1)) * rpow[n + 1]; // pow<Real>(R[i], n - 1);
  1425. SXr = a * Xr;
  1426. SXt = a * Xt;
  1427. SXp = a * Xp;
  1428. } else {
  1429. Real a, b;
  1430. a = -2 * n * (n + 2) / (Real)((2 * n + 1) * (2 * n + 3)) * rpow[n + 3]; // pow<Real>(R[i], -n - 3);
  1431. SVr = a * Vr;
  1432. SVt = a * Vt;
  1433. SVp = a * Vp;
  1434. a = -(2 * n * n + 1) / (Real)((2 * n + 1) * (2 * n - 1)) * rpow[n + 1]; // pow<Real>(R[i], -n - 1);
  1435. b = n * (n + 2) / (Real)(2 * n + 1) * (rpow[n + 1] - rpow[n + 3]); //(pow<Real>(R[i], -n - 1) - pow<Real>(R[i], -n - 3));
  1436. SWr = a * Wr + b * Vr;
  1437. SWt = a * Wt + b * Vt;
  1438. SWp = a * Wp + b * Vp;
  1439. a = -(n + 2) / (Real)(2 * n + 1) * rpow[n + 2]; // pow<Real>(R[i], -n - 2);
  1440. SXr = a * Xr;
  1441. SXt = a * Xt;
  1442. SXp = a * Xp;
  1443. }
  1444. write_coeff(SVr, n, m, 0, 0);
  1445. write_coeff(SVt, n, m, 0, 1);
  1446. write_coeff(SVp, n, m, 0, 2);
  1447. write_coeff(SWr, n, m, 1, 0);
  1448. write_coeff(SWt, n, m, 1, 1);
  1449. write_coeff(SWp, n, m, 1, 2);
  1450. write_coeff(SXr, n, m, 2, 0);
  1451. write_coeff(SXt, n, m, 2, 1);
  1452. write_coeff(SXp, n, m, 2, 2);
  1453. }
  1454. }
  1455. }
  1456. { // Set X <-- Q * StokesOp * B1
  1457. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  1458. for (Long k0 = 0; k0 < N; k0++) {
  1459. StaticArray<Real,9> Q;
  1460. { // Set Q
  1461. Real cos_theta = cos(theta_phi[k0 * 2 + 0]);
  1462. Real sin_theta = sin(theta_phi[k0 * 2 + 0]);
  1463. Real cos_phi = cos(theta_phi[k0 * 2 + 1]);
  1464. Real sin_phi = sin(theta_phi[k0 * 2 + 1]);
  1465. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  1466. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  1467. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  1468. }
  1469. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  1470. StaticArray<Real,COORD_DIM> in;
  1471. for (Long j = 0; j < COORD_DIM; j++) {
  1472. in[j] = 0;
  1473. for (Long i = 0; i < COORD_DIM * M; i++) {
  1474. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  1475. }
  1476. }
  1477. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  1478. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  1479. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  1480. }
  1481. }
  1482. }
  1483. }
  1484. template <class Real> void SphericalHarmonics<Real>::Grid2SHC_(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& B1){
  1485. const auto& Mf = OpFourierInv(Np);
  1486. assert(Mf.Dim(0) == Np);
  1487. const std::vector<Matrix<Real>>& Ml = SphericalHarmonics<Real>::MatLegendreInv(Nt-1,p1);
  1488. assert((Long)Ml.size() == p1+1);
  1489. Long N = X.Dim() / (Np*Nt);
  1490. assert(X.Dim() == N*Np*Nt);
  1491. Vector<Real> B0((2*p1+1) * N*Nt);
  1492. #pragma omp parallel
  1493. { // B0 <-- Transpose(FFT(X))
  1494. Integer tid=omp_get_thread_num();
  1495. Integer omp_p=omp_get_num_threads();
  1496. Long a=(tid+0)*N*Nt/omp_p;
  1497. Long b=(tid+1)*N*Nt/omp_p;
  1498. Vector<Real> buff(Mf.Dim(1));
  1499. Long fft_coeff_len = std::min(buff.Dim(), 2*p1+2);
  1500. Matrix<Real> B0_(2*p1+1, N*Nt, B0.begin(), false);
  1501. const Matrix<Real> MX(N * Nt, Np, (Iterator<Real>)X.begin(), false);
  1502. for (Long i = a; i < b; i++) {
  1503. { // buff <-- FFT(Xi)
  1504. const Vector<Real> Xi(Np, (Iterator<Real>)X.begin() + Np * i, false);
  1505. Mf.Execute(Xi, buff);
  1506. }
  1507. { // B0 <-- Transpose(buff)
  1508. B0_[0][i] = buff[0]; // skipping buff[1] == 0
  1509. for (Long j = 2; j < fft_coeff_len; j++) B0_[j-1][i] = buff[j];
  1510. for (Long j = fft_coeff_len; j < 2*p1+2; j++) B0_[j-1][i] = 0;
  1511. }
  1512. }
  1513. }
  1514. if (B1.Dim() != N*(p1+1)*(p1+1)) B1.ReInit(N*(p1+1)*(p1+1));
  1515. #pragma omp parallel
  1516. { // Evaluate Legendre polynomial
  1517. Integer tid=omp_get_thread_num();
  1518. Integer omp_p=omp_get_num_threads();
  1519. Long offset0=0;
  1520. Long offset1=0;
  1521. for (Long i = 0; i < p1+1; i++) {
  1522. Long N_ = (i==0 ? N : 2*N);
  1523. Matrix<Real> Min (N_, Nt , B0.begin()+offset0, false);
  1524. Matrix<Real> Mout(N_, p1+1-i, B1.begin()+offset1, false);
  1525. { // Mout = Min * Ml[i] // split between threads
  1526. Long a=(tid+0)*N_/omp_p;
  1527. Long b=(tid+1)*N_/omp_p;
  1528. if (a < b) {
  1529. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  1530. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1531. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  1532. }
  1533. }
  1534. offset0+=Min .Dim(0)*Min .Dim(1);
  1535. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1536. }
  1537. assert(offset0 == B0.Dim());
  1538. assert(offset1 == B1.Dim());
  1539. }
  1540. B1 *= 1 / sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  1541. }
  1542. template <class Real> void SphericalHarmonics<Real>::SHCArrange0(const Vector<Real>& B1, Long p1, Vector<Real>& S, SHCArrange arrange){
  1543. Long M = (p1+1)*(p1+1);
  1544. Long N = B1.Dim() / M;
  1545. assert(B1.Dim() == N*M);
  1546. if (arrange == SHCArrange::ALL) { // S <-- Rearrange(B1)
  1547. Long M = 2*(p1+1)*(p1+1);
  1548. if(S.Dim() != N * M) S.ReInit(N * M);
  1549. #pragma omp parallel
  1550. { // S <-- Rearrange(B1)
  1551. Integer tid=omp_get_thread_num();
  1552. Integer omp_p=omp_get_num_threads();
  1553. Long a=(tid+0)*N/omp_p;
  1554. Long b=(tid+1)*N/omp_p;
  1555. for (Long i = a; i < b; i++) {
  1556. Long offset = 0;
  1557. for (Long j = 0; j < p1+1; j++) {
  1558. Long len = p1+1 - j;
  1559. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  1560. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1561. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 0;
  1562. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  1563. offset += len;
  1564. }
  1565. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  1566. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1567. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  1568. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  1569. offset += len;
  1570. } else {
  1571. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  1572. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = 0;
  1573. }
  1574. }
  1575. }
  1576. }
  1577. }
  1578. if (arrange == SHCArrange::ROW_MAJOR) { // S <-- Rearrange(B1)
  1579. Long M = (p1+1)*(p1+2);
  1580. if(S.Dim() != N * M) S.ReInit(N * M);
  1581. #pragma omp parallel
  1582. { // S <-- Rearrange(B1)
  1583. Integer tid=omp_get_thread_num();
  1584. Integer omp_p=omp_get_num_threads();
  1585. Long a=(tid+0)*N/omp_p;
  1586. Long b=(tid+1)*N/omp_p;
  1587. for (Long i = a; i < b; i++) {
  1588. Long offset = 0;
  1589. for (Long j = 0; j < p1+1; j++) {
  1590. Long len = p1+1 - j;
  1591. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  1592. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1593. Iterator<Real> S_ = S .begin() + i*M + 0;
  1594. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  1595. offset += len;
  1596. }
  1597. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  1598. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1599. Iterator<Real> S_ = S .begin() + i*M + 1;
  1600. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  1601. offset += len;
  1602. } else {
  1603. Iterator<Real> S_ = S .begin() + i*M + 1;
  1604. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = 0;
  1605. }
  1606. }
  1607. }
  1608. }
  1609. }
  1610. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // S <-- Rearrange(B1)
  1611. Long M = (p1+1)*(p1+1);
  1612. if(S.Dim() != N * M) S.ReInit(N * M);
  1613. #pragma omp parallel
  1614. { // S <-- Rearrange(B1)
  1615. Integer tid=omp_get_thread_num();
  1616. Integer omp_p=omp_get_num_threads();
  1617. Long a=(tid+0)*N/omp_p;
  1618. Long b=(tid+1)*N/omp_p;
  1619. for (Long i = a; i < b; i++) {
  1620. Long offset = 0;
  1621. for (Long j = 0; j < p1+1; j++) {
  1622. Long len = p1+1 - j;
  1623. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  1624. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1625. Iterator<Real> S_ = S .begin() + i*M + offset;
  1626. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  1627. offset += len;
  1628. }
  1629. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  1630. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1631. Iterator<Real> S_ = S .begin() + i*M + offset;
  1632. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  1633. offset += len;
  1634. }
  1635. }
  1636. }
  1637. }
  1638. }
  1639. }
  1640. template <class Real> void SphericalHarmonics<Real>::SHCArrange1(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& B0){
  1641. Long M, N;
  1642. { // Set M, N
  1643. M = 0;
  1644. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  1645. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  1646. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  1647. if (M == 0) return;
  1648. N = S.Dim() / M;
  1649. assert(S.Dim() == N * M);
  1650. }
  1651. if (B0.Dim() != N*(p0+1)*(p0+1)) B0.ReInit(N*(p0+1)*(p0+1));
  1652. if (arrange == SHCArrange::ALL) { // B0 <-- Rearrange(S)
  1653. #pragma omp parallel
  1654. { // B0 <-- Rearrange(S)
  1655. Integer tid=omp_get_thread_num();
  1656. Integer omp_p=omp_get_num_threads();
  1657. Long a=(tid+0)*N/omp_p;
  1658. Long b=(tid+1)*N/omp_p;
  1659. for (Long i = a; i < b; i++) {
  1660. Long offset = 0;
  1661. for (Long j = 0; j < p0+1; j++) {
  1662. Long len = p0+1 - j;
  1663. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1664. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1665. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 0;
  1666. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  1667. offset += len;
  1668. }
  1669. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1670. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1671. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 1;
  1672. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  1673. offset += len;
  1674. }
  1675. }
  1676. }
  1677. }
  1678. }
  1679. if (arrange == SHCArrange::ROW_MAJOR) { // B0 <-- Rearrange(S)
  1680. #pragma omp parallel
  1681. { // B0 <-- Rearrange(S)
  1682. Integer tid=omp_get_thread_num();
  1683. Integer omp_p=omp_get_num_threads();
  1684. Long a=(tid+0)*N/omp_p;
  1685. Long b=(tid+1)*N/omp_p;
  1686. for (Long i = a; i < b; i++) {
  1687. Long offset = 0;
  1688. for (Long j = 0; j < p0+1; j++) {
  1689. Long len = p0+1 - j;
  1690. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1691. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1692. ConstIterator<Real> S_ = S .begin() + i*M + 0;
  1693. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  1694. offset += len;
  1695. }
  1696. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1697. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1698. ConstIterator<Real> S_ = S .begin() + i*M + 1;
  1699. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  1700. offset += len;
  1701. }
  1702. }
  1703. }
  1704. }
  1705. }
  1706. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // B0 <-- Rearrange(S)
  1707. #pragma omp parallel
  1708. { // B0 <-- Rearrange(S)
  1709. Integer tid=omp_get_thread_num();
  1710. Integer omp_p=omp_get_num_threads();
  1711. Long a=(tid+0)*N/omp_p;
  1712. Long b=(tid+1)*N/omp_p;
  1713. for (Long i = a; i < b; i++) {
  1714. Long offset = 0;
  1715. for (Long j = 0; j < p0+1; j++) {
  1716. Long len = p0+1 - j;
  1717. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1718. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1719. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  1720. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  1721. offset += len;
  1722. }
  1723. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1724. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1725. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  1726. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  1727. offset += len;
  1728. }
  1729. }
  1730. }
  1731. }
  1732. }
  1733. }
  1734. template <class Real> void SphericalHarmonics<Real>::SHC2Grid_(const Vector<Real>& B0, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_phi, Vector<Real>* X_theta){
  1735. const auto& Mf = OpFourier(Np);
  1736. assert(Mf.Dim(1) == Np);
  1737. const std::vector<Matrix<Real>>& Ml =SphericalHarmonics<Real>::MatLegendre (p0,Nt-1);
  1738. const std::vector<Matrix<Real>>& Mdl=SphericalHarmonics<Real>::MatLegendreGrad(p0,Nt-1);
  1739. assert((Long)Ml .size() == p0+1);
  1740. assert((Long)Mdl.size() == p0+1);
  1741. Long N = B0.Dim() / ((p0+1)*(p0+1));
  1742. assert(B0.Dim() == N*(p0+1)*(p0+1));
  1743. if(X && X ->Dim()!=N*Np*Nt) X ->ReInit(N*Np*Nt);
  1744. if(X_theta && X_theta->Dim()!=N*Np*Nt) X_theta->ReInit(N*Np*Nt);
  1745. if(X_phi && X_phi ->Dim()!=N*Np*Nt) X_phi ->ReInit(N*Np*Nt);
  1746. Vector<Real> B1(N*(2*p0+1)*Nt);
  1747. if(X || X_phi){
  1748. #pragma omp parallel
  1749. { // Evaluate Legendre polynomial
  1750. Integer tid=omp_get_thread_num();
  1751. Integer omp_p=omp_get_num_threads();
  1752. Long offset0=0;
  1753. Long offset1=0;
  1754. for(Long i=0;i<p0+1;i++){
  1755. Long N_ = (i==0 ? N : 2*N);
  1756. const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
  1757. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  1758. { // Mout = Min * Ml[i] // split between threads
  1759. Long a=(tid+0)*N_/omp_p;
  1760. Long b=(tid+1)*N_/omp_p;
  1761. if(a<b){
  1762. const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
  1763. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1764. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  1765. }
  1766. }
  1767. offset0+=Min .Dim(0)*Min .Dim(1);
  1768. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1769. }
  1770. }
  1771. B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  1772. #pragma omp parallel
  1773. { // Transpose and evaluate Fourier
  1774. Integer tid=omp_get_thread_num();
  1775. Integer omp_p=omp_get_num_threads();
  1776. Long a=(tid+0)*N*Nt/omp_p;
  1777. Long b=(tid+1)*N*Nt/omp_p;
  1778. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  1779. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  1780. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  1781. for (Long i = a; i < b; i++) {
  1782. { // buff <-- Transpose(B1)
  1783. buff[0] = B1_[0][i];
  1784. buff[1] = 0;
  1785. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1786. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1787. }
  1788. { // X <-- FFT(buff)
  1789. Vector<Real> Xi(Np, X->begin() + Np * i, false);
  1790. Mf.Execute(buff, Xi);
  1791. }
  1792. if(X_phi){ // Evaluate Fourier gradient
  1793. { // buff <-- Transpose(B1)
  1794. buff[0] = 0;
  1795. buff[1] = 0;
  1796. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1797. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1798. for (Long j = 1; j < buff.Dim()/2; j++) {
  1799. Real x = buff[2*j+0];
  1800. Real y = buff[2*j+1];
  1801. buff[2*j+0] = -j*y;
  1802. buff[2*j+1] = j*x;
  1803. }
  1804. }
  1805. { // X_phi <-- FFT(buff)
  1806. Vector<Real> Xi(Np, X_phi->begin() + Np * i, false);
  1807. Mf.Execute(buff, Xi);
  1808. }
  1809. }
  1810. }
  1811. }
  1812. }
  1813. if(X_theta){
  1814. #pragma omp parallel
  1815. { // Evaluate Legendre gradient
  1816. Integer tid=omp_get_thread_num();
  1817. Integer omp_p=omp_get_num_threads();
  1818. Long offset0=0;
  1819. Long offset1=0;
  1820. for(Long i=0;i<p0+1;i++){
  1821. Long N_ = (i==0 ? N : 2*N);
  1822. const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
  1823. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  1824. { // Mout = Min * Mdl[i] // split between threads
  1825. Long a=(tid+0)*N_/omp_p;
  1826. Long b=(tid+1)*N_/omp_p;
  1827. if(a<b){
  1828. const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
  1829. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1830. Matrix<Real>::GEMM(Mout_,Min_,Mdl[i]);
  1831. }
  1832. }
  1833. offset0+=Min .Dim(0)*Min .Dim(1);
  1834. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1835. }
  1836. }
  1837. B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  1838. #pragma omp parallel
  1839. { // Transpose and evaluate Fourier
  1840. Integer tid=omp_get_thread_num();
  1841. Integer omp_p=omp_get_num_threads();
  1842. Long a=(tid+0)*N*Nt/omp_p;
  1843. Long b=(tid+1)*N*Nt/omp_p;
  1844. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  1845. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  1846. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  1847. for (Long i = a; i < b; i++) {
  1848. { // buff <-- Transpose(B1)
  1849. buff[0] = B1_[0][i];
  1850. buff[1] = 0;
  1851. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1852. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1853. }
  1854. { // Xi <-- FFT(buff)
  1855. Vector<Real> Xi(Np, X_theta->begin() + Np * i, false);
  1856. Mf.Execute(buff, Xi);
  1857. }
  1858. }
  1859. }
  1860. }
  1861. }
  1862. template <class Real> void SphericalHarmonics<Real>::LegPoly(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  1863. Vector<Real> theta(X.Dim());
  1864. for (Long i = 0; i < X.Dim(); i++) theta[i] = acos(X[i]);
  1865. LegPoly_(poly_val, theta, degree);
  1866. }
  1867. template <class Real> void SphericalHarmonics<Real>::LegPoly_(Vector<Real>& poly_val, const Vector<Real>& theta, Long degree){
  1868. Long N = theta.Dim();
  1869. Long Npoly = (degree + 1) * (degree + 2) / 2;
  1870. if (poly_val.Dim() != Npoly * N) poly_val.ReInit(Npoly * N);
  1871. Real fact = 1 / sqrt<Real>(4 * const_pi<Real>());
  1872. Vector<Real> cos_theta(N), sin_theta(N);
  1873. for (Long n = 0; n < N; n++) {
  1874. cos_theta[n] = cos(theta[n]);
  1875. sin_theta[n] = sin(theta[n]);
  1876. poly_val[n] = fact;
  1877. }
  1878. Long idx = 0;
  1879. Long idx_nxt = 0;
  1880. for (Long i = 1; i <= degree; i++) {
  1881. idx_nxt += N*(degree-i+2);
  1882. Real c = sqrt<Real>((2*i+1)/(Real)(2*i));
  1883. for (Long n = 0; n < N; n++) {
  1884. poly_val[idx_nxt+n] = -poly_val[idx+n] * sin_theta[n] * c;
  1885. }
  1886. idx = idx_nxt;
  1887. }
  1888. idx = 0;
  1889. for (Long m = 0; m < degree; m++) {
  1890. for (Long n = 0; n < N; n++) {
  1891. Real pmm = 0;
  1892. Real pmmp1 = poly_val[idx+n];
  1893. for (Long ll = m + 1; ll <= degree; ll++) {
  1894. Real a = sqrt<Real>(((2*ll-1)*(2*ll+1) ) / (Real)((ll-m)*(ll+m) ));
  1895. Real b = sqrt<Real>(((2*ll+1)*(ll+m-1)*(ll-m-1)) / (Real)((ll-m)*(ll+m)*(2*ll-3)));
  1896. Real pll = cos_theta[n]*a*pmmp1 - b*pmm;
  1897. pmm = pmmp1;
  1898. pmmp1 = pll;
  1899. poly_val[idx + N*(ll-m) + n] = pll;
  1900. }
  1901. }
  1902. idx += N * (degree - m + 1);
  1903. }
  1904. }
  1905. template <class Real> void SphericalHarmonics<Real>::LegPolyDeriv(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  1906. Vector<Real> theta(X.Dim());
  1907. for (Long i = 0; i < X.Dim(); i++) theta[i] = acos(X[i]);
  1908. LegPolyDeriv_(poly_val, theta, degree);
  1909. }
  1910. template <class Real> void SphericalHarmonics<Real>::LegPolyDeriv_(Vector<Real>& poly_val, const Vector<Real>& theta, Long degree){
  1911. Long N = theta.Dim();
  1912. Long Npoly = (degree + 1) * (degree + 2) / 2;
  1913. if (poly_val.Dim() != N * Npoly) poly_val.ReInit(N * Npoly);
  1914. Vector<Real> cos_theta(N), sin_theta(N);
  1915. for (Long i = 0; i < N; i++) {
  1916. cos_theta[i] = cos(theta[i]);
  1917. sin_theta[i] = sin(theta[i]);
  1918. }
  1919. Vector<Real> leg_poly(Npoly * N);
  1920. LegPoly_(leg_poly, theta, degree);
  1921. for (Long m = 0; m <= degree; m++) {
  1922. for (Long n = m; n <= degree; n++) {
  1923. ConstIterator<Real> Pn = leg_poly.begin() + N * ((degree * 2 - m + 1) * (m + 0) / 2 + n);
  1924. ConstIterator<Real> Pn_ = leg_poly.begin() + N * ((degree * 2 - m + 0) * (m + 1) / 2 + n) * (m < n);
  1925. Iterator <Real> Hn = poly_val.begin() + N * ((degree * 2 - m + 1) * (m + 0) / 2 + n);
  1926. Real c2 = sqrt<Real>(m<n ? (n+m+1)*(n-m) : 0);
  1927. for (Long i = 0; i < N; i++) {
  1928. Real c1 = (sin_theta[i]>0 ? m/sin_theta[i] : 0);
  1929. Hn[i] = c1*cos_theta[i]*Pn[i] + c2*Pn_[i];
  1930. }
  1931. }
  1932. }
  1933. }
  1934. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreNodes(Long p){
  1935. assert(p<SCTL_SHMAXDEG);
  1936. Vector<Real>& Qx=MatrixStore().Qx_[p];
  1937. #pragma omp critical (SCTL_LEGNODES)
  1938. if(!Qx.Dim()){
  1939. Vector<double> qx1(p+1);
  1940. Vector<double> qw1(p+1);
  1941. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  1942. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  1943. if (Qx.Dim() != p+1) Qx.ReInit(p+1);
  1944. for (Long i = 0; i < p + 1; i++) Qx[i] = -qx1[i];
  1945. }
  1946. return Qx;
  1947. }
  1948. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreWeights(Long p){
  1949. assert(p<SCTL_SHMAXDEG);
  1950. Vector<Real>& Qw=MatrixStore().Qw_[p];
  1951. #pragma omp critical (SCTL_LEGWEIGHTS)
  1952. if(!Qw.Dim()){
  1953. Vector<double> qx1(p+1);
  1954. Vector<double> qw1(p+1);
  1955. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  1956. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  1957. if (Qw.Dim() != p+1) Qw.ReInit(p+1);
  1958. for (Long i = 0; i < p + 1; i++) Qw[i] = qw1[i];
  1959. }
  1960. return Qw;
  1961. }
  1962. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::SingularWeights(Long p1){
  1963. assert(p1<SCTL_SHMAXDEG);
  1964. Vector<Real>& Sw=MatrixStore().Sw_[p1];
  1965. #pragma omp critical (SCTL_SINWEIGHTS)
  1966. if(!Sw.Dim()){
  1967. const Vector<Real>& qx1 = LegendreNodes(p1);
  1968. const Vector<Real>& qw1 = LegendreWeights(p1);
  1969. std::vector<Real> Yf(p1+1,0);
  1970. { // Set Yf
  1971. Vector<Real> x0(1); x0=1.0;
  1972. Vector<Real> alp0((p1+1)*(p1+2)/2);
  1973. LegPoly(alp0, x0, p1);
  1974. Vector<Real> alp((p1+1) * (p1+1)*(p1+2)/2);
  1975. LegPoly(alp, qx1, p1);
  1976. for(Long j=0;j<p1+1;j++){
  1977. for(Long i=0;i<p1+1;i++){
  1978. Yf[i]+=4*M_PI/(2*j+1) * alp0[j] * alp[j*(p1+1)+i];
  1979. }
  1980. }
  1981. }
  1982. Sw.ReInit(p1+1);
  1983. for(Long i=0;i<p1+1;i++){
  1984. Sw[i]=(qw1[i]*M_PI/p1)*Yf[i]/cos(acos(qx1[i])/2);
  1985. }
  1986. }
  1987. return Sw;
  1988. }
  1989. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourier(Long p0, Long p1){
  1990. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1991. Matrix<Real>& Mf =MatrixStore().Mf_ [p0*SCTL_SHMAXDEG+p1];
  1992. #pragma omp critical (SCTL_MATFOURIER)
  1993. if(!Mf.Dim(0)){
  1994. const Real SQRT2PI=sqrt(2*M_PI);
  1995. { // Set Mf
  1996. Matrix<Real> M(2*p0,2*p1);
  1997. for(Long j=0;j<2*p1;j++){
  1998. M[0][j]=SQRT2PI*1.0;
  1999. for(Long k=1;k<p0;k++){
  2000. M[2*k-1][j]=SQRT2PI*cos(j*k*M_PI/p1);
  2001. M[2*k-0][j]=SQRT2PI*sin(j*k*M_PI/p1);
  2002. }
  2003. M[2*p0-1][j]=SQRT2PI*cos(j*p0*M_PI/p1);
  2004. }
  2005. Mf=M;
  2006. }
  2007. }
  2008. return Mf;
  2009. }
  2010. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierInv(Long p0, Long p1){
  2011. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  2012. Matrix<Real>& Mf =MatrixStore().Mfinv_ [p0*SCTL_SHMAXDEG+p1];
  2013. #pragma omp critical (SCTL_MATFOURIERINV)
  2014. if(!Mf.Dim(0)){
  2015. const Real INVSQRT2PI=1.0/sqrt(2*M_PI)/p0;
  2016. { // Set Mf
  2017. Matrix<Real> M(2*p0,2*p1);
  2018. M.SetZero();
  2019. if(p1>p0) p1=p0;
  2020. for(Long j=0;j<2*p0;j++){
  2021. M[j][0]=INVSQRT2PI*0.5;
  2022. for(Long k=1;k<p1;k++){
  2023. M[j][2*k-1]=INVSQRT2PI*cos(j*k*M_PI/p0);
  2024. M[j][2*k-0]=INVSQRT2PI*sin(j*k*M_PI/p0);
  2025. }
  2026. M[j][2*p1-1]=INVSQRT2PI*cos(j*p1*M_PI/p0);
  2027. }
  2028. if(p1==p0) for(Long j=0;j<2*p0;j++) M[j][2*p1-1]*=0.5;
  2029. Mf=M;
  2030. }
  2031. }
  2032. return Mf;
  2033. }
  2034. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierGrad(Long p0, Long p1){
  2035. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  2036. Matrix<Real>& Mdf=MatrixStore().Mdf_[p0*SCTL_SHMAXDEG+p1];
  2037. #pragma omp critical (SCTL_MATFOURIERGRAD)
  2038. if(!Mdf.Dim(0)){
  2039. const Real SQRT2PI=sqrt(2*M_PI);
  2040. { // Set Mdf_
  2041. Matrix<Real> M(2*p0,2*p1);
  2042. for(Long j=0;j<2*p1;j++){
  2043. M[0][j]=SQRT2PI*0.0;
  2044. for(Long k=1;k<p0;k++){
  2045. M[2*k-1][j]=-SQRT2PI*k*sin(j*k*M_PI/p1);
  2046. M[2*k-0][j]= SQRT2PI*k*cos(j*k*M_PI/p1);
  2047. }
  2048. M[2*p0-1][j]=-SQRT2PI*p0*sin(j*p0*M_PI/p1);
  2049. }
  2050. Mdf=M;
  2051. }
  2052. }
  2053. return Mdf;
  2054. }
  2055. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourier(Long Np){
  2056. assert(Np<SCTL_SHMAXDEG);
  2057. auto& Mf =MatrixStore().Mfftinv_ [Np];
  2058. #pragma omp critical (SCTL_FFT_PLAN0)
  2059. if(!Mf.Dim(0)){
  2060. StaticArray<Long,1> fft_dim{Np};
  2061. Mf.Setup(FFT_Type::C2R, 1, Vector<Long>(1,fft_dim,false));
  2062. }
  2063. return Mf;
  2064. }
  2065. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourierInv(Long Np){
  2066. assert(Np<SCTL_SHMAXDEG);
  2067. auto& Mf =MatrixStore().Mfft_ [Np];
  2068. #pragma omp critical (SCTL_FFT_PLAN1)
  2069. if(!Mf.Dim(0)){
  2070. StaticArray<Long,1> fft_dim {Np};
  2071. Mf.Setup(FFT_Type::R2C, 1, Vector<Long>(1,fft_dim,false));
  2072. }
  2073. return Mf;
  2074. }
  2075. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendre(Long p0, Long p1){
  2076. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  2077. std::vector<Matrix<Real>>& Ml =MatrixStore().Ml_ [p0*SCTL_SHMAXDEG+p1];
  2078. #pragma omp critical (SCTL_MATLEG)
  2079. if(!Ml.size()){
  2080. const Vector<Real>& qx1 = LegendreNodes(p1);
  2081. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  2082. LegPoly(alp, qx1, p0);
  2083. Ml.resize(p0+1);
  2084. auto ptr = alp.begin();
  2085. for(Long i=0;i<=p0;i++){
  2086. Ml[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  2087. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  2088. }
  2089. }
  2090. return Ml;
  2091. }
  2092. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreInv(Long p0, Long p1){
  2093. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  2094. std::vector<Matrix<Real>>& Ml =MatrixStore().Mlinv_ [p0*SCTL_SHMAXDEG+p1];
  2095. #pragma omp critical (SCTL_MATLEGINV)
  2096. if(!Ml.size()){
  2097. const Vector<Real>& qx1 = LegendreNodes(p0);
  2098. const Vector<Real>& qw1 = LegendreWeights(p0);
  2099. Vector<Real> alp(qx1.Dim()*(p1+1)*(p1+2)/2);
  2100. LegPoly(alp, qx1, p1);
  2101. Ml.resize(p1+1);
  2102. auto ptr = alp.begin();
  2103. for(Long i=0;i<=p1;i++){
  2104. Ml[i].ReInit(qx1.Dim(), p1+1-i);
  2105. Matrix<Real> M(p1+1-i, qx1.Dim(), ptr, false);
  2106. for(Long j=0;j<p1+1-i;j++){ // Transpose and weights
  2107. for(Long k=0;k<qx1.Dim();k++){
  2108. Ml[i][k][j]=M[j][k]*qw1[k]*2*M_PI;
  2109. }
  2110. }
  2111. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  2112. }
  2113. }
  2114. return Ml;
  2115. }
  2116. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreGrad(Long p0, Long p1){
  2117. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  2118. std::vector<Matrix<Real>>& Mdl=MatrixStore().Mdl_[p0*SCTL_SHMAXDEG+p1];
  2119. #pragma omp critical (SCTL_MATLEGGRAD)
  2120. if(!Mdl.size()){
  2121. const Vector<Real>& qx1 = LegendreNodes(p1);
  2122. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  2123. LegPolyDeriv(alp, qx1, p0);
  2124. Mdl.resize(p0+1);
  2125. auto ptr = alp.begin();
  2126. for(Long i=0;i<=p0;i++){
  2127. Mdl[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  2128. ptr+=Mdl[i].Dim(0)*Mdl[i].Dim(1);
  2129. }
  2130. }
  2131. return Mdl;
  2132. }
  2133. template <class Real> void SphericalHarmonics<Real>::SHBasisEval(Long p0, const Vector<Real>& theta_phi, Matrix<Real>& SHBasis) {
  2134. Long M = (p0+1) * (p0+1);
  2135. Long N = theta_phi.Dim() / 2;
  2136. assert(theta_phi.Dim() == N * 2);
  2137. Vector<Complex<Real>> exp_phi(N);
  2138. Matrix<Real> LegP((p0+1)*(p0+2)/2, N);
  2139. { // Set exp_phi, LegP
  2140. Vector<Real> theta(N);
  2141. for (Long i = 0; i < N; i++) { // Set theta, exp_phi
  2142. theta[i] = theta_phi[i*2+0];
  2143. exp_phi[i].real = cos<Real>(theta_phi[i*2+1]);
  2144. exp_phi[i].imag = sin<Real>(theta_phi[i*2+1]);
  2145. }
  2146. Vector<Real> alp(LegP.Dim(0) * LegP.Dim(1), LegP.begin(), false);
  2147. LegPoly_(alp, theta, p0);
  2148. }
  2149. { // Set SHBasis
  2150. SHBasis.ReInit(N, M);
  2151. Real s = 4 * sqrt<Real>(const_pi<Real>());
  2152. for (Long k0 = 0; k0 < N; k0++) {
  2153. Complex<Real> exp_phi_ = 1;
  2154. Complex<Real> exp_phi1 = exp_phi[k0];
  2155. for (Long m = 0; m <= p0; m++) {
  2156. for (Long n = m; n <= p0; n++) {
  2157. Long poly_idx = (2 * p0 - m + 1) * m / 2 + n;
  2158. Long basis_idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  2159. SHBasis[k0][basis_idx] = LegP[poly_idx][k0] * exp_phi_.real * s;
  2160. if (m) { // imaginary part
  2161. basis_idx += (p0+1-m);
  2162. SHBasis[k0][basis_idx] = -LegP[poly_idx][k0] * exp_phi_.imag * s;
  2163. } else {
  2164. SHBasis[k0][basis_idx] = SHBasis[k0][basis_idx] * 0.5;
  2165. }
  2166. }
  2167. exp_phi_ = exp_phi_ * exp_phi1;
  2168. }
  2169. }
  2170. }
  2171. assert(SHBasis.Dim(0) == N);
  2172. assert(SHBasis.Dim(1) == M);
  2173. }
  2174. template <class Real> void SphericalHarmonics<Real>::VecSHBasisEval(Long p0, const Vector<Real>& theta_phi, Matrix<Real>& SHBasis) {
  2175. Long M = (p0+1) * (p0+1);
  2176. Long N = theta_phi.Dim() / 2;
  2177. assert(theta_phi.Dim() == N * 2);
  2178. Long p_ = p0 + 1;
  2179. Long M_ = (p_+1) * (p_+1);
  2180. Matrix<Real> Ynm(N, M_);
  2181. SHBasisEval(p_, theta_phi, Ynm);
  2182. Vector<Real> cos_theta(N), csc_theta(N);
  2183. for (Long i = 0; i < N; i++) { // Set theta
  2184. cos_theta[i] = cos(theta_phi[i*2+0]);
  2185. csc_theta[i] = 1.0 / sin(theta_phi[i*2+0]);
  2186. }
  2187. { // Set SHBasis
  2188. SHBasis.ReInit(N * COORD_DIM, COORD_DIM * M);
  2189. SHBasis = 0;
  2190. const Complex<Real> imag(0,1);
  2191. for (Long i = 0; i < N; i++) {
  2192. auto Y = [p_, &Ynm, i](Long n, Long m) {
  2193. Complex<Real> c;
  2194. if (0 <= m && m <= n && n <= p_) {
  2195. Long idx = (2 * p_ - m + 2) * m - (m ? p_+1 : 0) + n;
  2196. c.real = Ynm[i][idx];
  2197. if (m) {
  2198. idx += (p_+1-m);
  2199. c.imag = Ynm[i][idx];
  2200. }
  2201. }
  2202. return c;
  2203. };
  2204. auto write_coeff = [p0, &SHBasis, i, M](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  2205. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  2206. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  2207. SHBasis[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  2208. if (m) {
  2209. idx += (p0+1-m);
  2210. SHBasis[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  2211. }
  2212. }
  2213. };
  2214. auto A = [p_](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  2215. auto B = [p_](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  2216. if (fabs(csc_theta[i]) > 0) {
  2217. for (Long m = 0; m <= p0; m++) {
  2218. for (Long n = m; n <= p0; n++) {
  2219. Complex<Real> AYBY = A(n,m) * Y(n+1,m) - B(n,m) * Y(n-1,m);
  2220. Complex<Real> Fv2r = Y(n,m) * (-n-1);
  2221. Complex<Real> Fw2r = Y(n,m) * n;
  2222. Complex<Real> Fx2r = 0;
  2223. Complex<Real> Fv2t = AYBY * csc_theta[i];
  2224. Complex<Real> Fw2t = AYBY * csc_theta[i];
  2225. Complex<Real> Fx2t = imag * m * Y(n,m) * csc_theta[i];
  2226. Complex<Real> Fv2p = -imag * m * Y(n,m) * csc_theta[i];
  2227. Complex<Real> Fw2p = -imag * m * Y(n,m) * csc_theta[i];
  2228. Complex<Real> Fx2p = AYBY * csc_theta[i];
  2229. write_coeff(Fv2r, n, m, 0, 0);
  2230. write_coeff(Fw2r, n, m, 1, 0);
  2231. write_coeff(Fx2r, n, m, 2, 0);
  2232. write_coeff(Fv2t, n, m, 0, 1);
  2233. write_coeff(Fw2t, n, m, 1, 1);
  2234. write_coeff(Fx2t, n, m, 2, 1);
  2235. write_coeff(Fv2p, n, m, 0, 2);
  2236. write_coeff(Fw2p, n, m, 1, 2);
  2237. write_coeff(Fx2p, n, m, 2, 2);
  2238. }
  2239. }
  2240. } else {
  2241. Complex<Real> exp_phi;
  2242. exp_phi.real = cos<Real>(theta_phi[i*2+1]);
  2243. exp_phi.imag = -sin<Real>(theta_phi[i*2+1]);
  2244. for (Long m = 0; m <= p0; m++) {
  2245. for (Long n = m; n <= p0; n++) {
  2246. Complex<Real> Fv2r = 0;
  2247. Complex<Real> Fw2r = 0;
  2248. Complex<Real> Fx2r = 0;
  2249. Complex<Real> Fv2t = 0;
  2250. Complex<Real> Fw2t = 0;
  2251. Complex<Real> Fx2t = 0;
  2252. Complex<Real> Fv2p = 0;
  2253. Complex<Real> Fw2p = 0;
  2254. Complex<Real> Fx2p = 0;
  2255. if (m == 0) {
  2256. Fv2r = Y(n,m) * (-n-1);
  2257. Fw2r = Y(n,m) * n;
  2258. Fx2r = 0;
  2259. }
  2260. if (m == 1) {
  2261. auto Ycsc = [&cos_theta, &exp_phi, i](Long n) { return -sqrt<Real>((2*n+1)*n*(n+1)) * ((n%2==0) && (cos_theta[i]<0) ? -1 : 1) * exp_phi; };
  2262. Complex<Real> AYBY = A(n,m) * Ycsc(n+1) - B(n,m) * Ycsc(n-1);
  2263. Fv2t = AYBY;
  2264. Fw2t = AYBY;
  2265. Fx2t = imag * m * Ycsc(n);
  2266. Fv2p =-imag * m * Ycsc(n);
  2267. Fw2p =-imag * m * Ycsc(n);
  2268. Fx2p = AYBY;
  2269. }
  2270. write_coeff(Fv2r, n, m, 0, 0);
  2271. write_coeff(Fw2r, n, m, 1, 0);
  2272. write_coeff(Fx2r, n, m, 2, 0);
  2273. write_coeff(Fv2t, n, m, 0, 1);
  2274. write_coeff(Fw2t, n, m, 1, 1);
  2275. write_coeff(Fx2t, n, m, 2, 1);
  2276. write_coeff(Fv2p, n, m, 0, 2);
  2277. write_coeff(Fw2p, n, m, 1, 2);
  2278. write_coeff(Fx2p, n, m, 2, 2);
  2279. }
  2280. }
  2281. }
  2282. }
  2283. }
  2284. assert(SHBasis.Dim(0) == N * COORD_DIM);
  2285. assert(SHBasis.Dim(1) == COORD_DIM * M);
  2286. }
  2287. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatRotate(Long p0){
  2288. std::vector<std::vector<Long>> coeff_perm(p0+1);
  2289. { // Set coeff_perm
  2290. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  2291. Long itr=0;
  2292. for(Long i=0;i<2*p0;i++){
  2293. Long m=(i+1)/2;
  2294. for(Long n=m;n<=p0;n++){
  2295. coeff_perm[n][i]=itr;
  2296. itr++;
  2297. }
  2298. }
  2299. }
  2300. assert(p0<SCTL_SHMAXDEG);
  2301. std::vector<Matrix<Real>>& Mr=MatrixStore().Mr_[p0];
  2302. #pragma omp critical (SCTL_MATROTATE)
  2303. if(!Mr.size()){
  2304. const Real SQRT2PI=sqrt(2*M_PI);
  2305. Long Ncoef=p0*(p0+2);
  2306. Long Ngrid=2*p0*(p0+1);
  2307. Long Naleg=(p0+1)*(p0+2)/2;
  2308. Matrix<Real> Mcoord0(3,Ngrid);
  2309. const Vector<Real>& x=LegendreNodes(p0);
  2310. for(Long i=0;i<p0+1;i++){ // Set Mcoord0
  2311. for(Long j=0;j<2*p0;j++){
  2312. Mcoord0[0][i*2*p0+j]=x[i];
  2313. Mcoord0[1][i*2*p0+j]=sqrt(1-x[i]*x[i])*sin(M_PI*j/p0);
  2314. Mcoord0[2][i*2*p0+j]=sqrt(1-x[i]*x[i])*cos(M_PI*j/p0);
  2315. }
  2316. }
  2317. for(Long l=0;l<p0+1;l++){ // For each rotation angle
  2318. Matrix<Real> Mcoord1;
  2319. { // Rotate coordinates
  2320. Matrix<Real> M(COORD_DIM, COORD_DIM);
  2321. Real cos_=-x[l];
  2322. Real sin_=-sqrt(1.0-x[l]*x[l]);
  2323. M[0][0]= cos_; M[0][1]=0; M[0][2]=-sin_;
  2324. M[1][0]= 0; M[1][1]=1; M[1][2]= 0;
  2325. M[2][0]= sin_; M[2][1]=0; M[2][2]= cos_;
  2326. Mcoord1=M*Mcoord0;
  2327. }
  2328. Matrix<Real> Mleg(Naleg, Ngrid);
  2329. { // Set Mleg
  2330. const Vector<Real> Vcoord1(Mcoord1.Dim(0)*Mcoord1.Dim(1), Mcoord1.begin(), false);
  2331. Vector<Real> Vleg(Mleg.Dim(0)*Mleg.Dim(1), Mleg.begin(), false);
  2332. LegPoly(Vleg, Vcoord1, p0);
  2333. }
  2334. Vector<Real> theta(Ngrid);
  2335. for(Long i=0;i<theta.Dim();i++){ // Set theta
  2336. theta[i]=atan2(Mcoord1[1][i],Mcoord1[2][i]); // TODO: works only for float and double
  2337. }
  2338. Matrix<Real> Mcoef2grid(Ncoef, Ngrid);
  2339. { // Build Mcoef2grid
  2340. Long offset0=0;
  2341. Long offset1=0;
  2342. for(Long i=0;i<p0+1;i++){
  2343. Long len=p0+1-i;
  2344. { // P * cos
  2345. for(Long j=0;j<len;j++){
  2346. for(Long k=0;k<Ngrid;k++){
  2347. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*cos(i*theta[k]);
  2348. }
  2349. }
  2350. offset1+=len;
  2351. }
  2352. if(i!=0 && i!=p0){ // P * sin
  2353. for(Long j=0;j<len;j++){
  2354. for(Long k=0;k<Ngrid;k++){
  2355. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*sin(i*theta[k]);
  2356. }
  2357. }
  2358. offset1+=len;
  2359. }
  2360. offset0+=len;
  2361. }
  2362. assert(offset0==Naleg);
  2363. assert(offset1==Ncoef);
  2364. }
  2365. Vector<Real> Vcoef2coef(Ncoef*Ncoef);
  2366. Vector<Real> Vcoef2grid(Ncoef*Ngrid, Mcoef2grid[0], false);
  2367. Grid2SHC(Vcoef2grid, p0+1, 2*p0, p0, Vcoef2coef, SHCArrange::COL_MAJOR_NONZERO);
  2368. Matrix<Real> Mcoef2coef(Ncoef, Ncoef, Vcoef2coef.begin(), false);
  2369. for(Long n=0;n<=p0;n++){ // Create matrices for fast rotation
  2370. Matrix<Real> M(coeff_perm[n].size(),coeff_perm[n].size());
  2371. for(Long i=0;i<(Long)coeff_perm[n].size();i++){
  2372. for(Long j=0;j<(Long)coeff_perm[n].size();j++){
  2373. M[i][j]=Mcoef2coef[coeff_perm[n][i]][coeff_perm[n][j]];
  2374. }
  2375. }
  2376. Mr.push_back(M);
  2377. }
  2378. }
  2379. }
  2380. return Mr;
  2381. }
  2382. template <class Real> void SphericalHarmonics<Real>::SHC2GridTranspose(const Vector<Real>& X, Long p0, Long p1, Vector<Real>& S){
  2383. Matrix<Real> Mf =SphericalHarmonics<Real>::MatFourier(p1,p0).Transpose();
  2384. std::vector<Matrix<Real>> Ml =SphericalHarmonics<Real>::MatLegendre(p1,p0);
  2385. for(Long i=0;i<(Long)Ml.size();i++) Ml[i]=Ml[i].Transpose();
  2386. assert(p1==(Long)Ml.size()-1);
  2387. assert(p0==Mf.Dim(0)/2);
  2388. assert(p1==Mf.Dim(1)/2);
  2389. Long N=X.Dim()/(2*p0*(p0+1));
  2390. assert(N*2*p0*(p0+1)==X.Dim());
  2391. if(S.Dim()!=N*(p1*(p1+2))) S.ReInit(N*(p1*(p1+2)));
  2392. Vector<Real> B0, B1;
  2393. B0.ReInit(N* p1*(p1+2));
  2394. B1.ReInit(N*2*p1*(p0+1));
  2395. #pragma omp parallel
  2396. { // Evaluate Fourier and transpose
  2397. Integer tid=omp_get_thread_num();
  2398. Integer omp_p=omp_get_num_threads();
  2399. Long a=(tid+0)*N*(p0+1)/omp_p;
  2400. Long b=(tid+1)*N*(p0+1)/omp_p;
  2401. const Long block_size=16;
  2402. Matrix<Real> B2(block_size,2*p1);
  2403. for(Long i0=a;i0<b;i0+=block_size){
  2404. Long i1=std::min(b,i0+block_size);
  2405. const Matrix<Real> Min (i1-i0,2*p0, (Iterator<Real>)X.begin()+i0*2*p0, false);
  2406. Matrix<Real> Mout(i1-i0,2*p1, B2.begin(), false);
  2407. Matrix<Real>::GEMM(Mout, Min, Mf);
  2408. for(Long i=i0;i<i1;i++){
  2409. for(Long j=0;j<2*p1;j++){
  2410. B1[j*N*(p0+1)+i]=B2[i-i0][j];
  2411. }
  2412. }
  2413. }
  2414. }
  2415. #pragma omp parallel
  2416. { // Evaluate Legendre polynomial
  2417. Integer tid=omp_get_thread_num();
  2418. Integer omp_p=omp_get_num_threads();
  2419. Long offset0=0;
  2420. Long offset1=0;
  2421. for(Long i=0;i<p1+1;i++){
  2422. Long N0=2*N;
  2423. if(i==0 || i==p1) N0=N;
  2424. Matrix<Real> Min (N0, p0+1 , B1.begin()+offset0, false);
  2425. Matrix<Real> Mout(N0, p1+1-i, B0.begin()+offset1, false);
  2426. { // Mout = Min * Ml[i] // split between threads
  2427. Long a=(tid+0)*N0/omp_p;
  2428. Long b=(tid+1)*N0/omp_p;
  2429. if(a<b){
  2430. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  2431. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  2432. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  2433. }
  2434. }
  2435. offset0+=Min .Dim(0)*Min .Dim(1);
  2436. offset1+=Mout.Dim(0)*Mout.Dim(1);
  2437. }
  2438. }
  2439. #pragma omp parallel
  2440. { // S <-- Rearrange(B0)
  2441. Integer tid=omp_get_thread_num();
  2442. Integer omp_p=omp_get_num_threads();
  2443. Long a=(tid+0)*N/omp_p;
  2444. Long b=(tid+1)*N/omp_p;
  2445. for(Long i=a;i<b;i++){
  2446. Long offset=0;
  2447. for(Long j=0;j<2*p1;j++){
  2448. Long len=p1+1-(j+1)/2;
  2449. Real* B_=&B0[i*len+N*offset];
  2450. Real* S_=&S[i*p1*(p1+2)+offset];
  2451. for(Long k=0;k<len;k++) S_[k]=B_[k];
  2452. offset+=len;
  2453. }
  2454. }
  2455. }
  2456. }
  2457. template <class Real> void SphericalHarmonics<Real>::RotateAll(const Vector<Real>& S, Long p0, Long dof, Vector<Real>& S_){
  2458. const std::vector<Matrix<Real>>& Mr=MatRotate(p0);
  2459. std::vector<std::vector<Long>> coeff_perm(p0+1);
  2460. { // Set coeff_perm
  2461. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  2462. Long itr=0;
  2463. for(Long i=0;i<2*p0;i++){
  2464. Long m=(i+1)/2;
  2465. for(Long n=m;n<=p0;n++){
  2466. coeff_perm[n][i]=itr;
  2467. itr++;
  2468. }
  2469. }
  2470. }
  2471. Long Ncoef=p0*(p0+2);
  2472. Long N=S.Dim()/Ncoef/dof;
  2473. assert(N*Ncoef*dof==S.Dim());
  2474. if(S_.Dim()!=N*dof*Ncoef*p0*(p0+1)) S_.ReInit(N*dof*Ncoef*p0*(p0+1));
  2475. const Matrix<Real> S0(N*dof, Ncoef, (Iterator<Real>)S.begin(), false);
  2476. Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, S_.begin(), false);
  2477. #pragma omp parallel
  2478. { // Construct all p0*(p0+1) rotations
  2479. Integer tid=omp_get_thread_num();
  2480. Integer omp_p=omp_get_num_threads();
  2481. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  2482. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  2483. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  2484. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  2485. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  2486. }
  2487. Long a=(tid+0)*N/omp_p;
  2488. Long b=(tid+1)*N/omp_p;
  2489. for(Long i=a;i<b;i++){
  2490. for(Long d=0;d<dof;d++){
  2491. for(Long j=0;j<p0;j++){
  2492. Long offset=0;
  2493. for(Long k=0;k<p0+1;k++){
  2494. Real r[2]={cos(k*j*M_PI/p0),-sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  2495. Long len=p0+1-k;
  2496. if(k!=0 && k!=p0){
  2497. for(Long l=0;l<len;l++){
  2498. Real x[2];
  2499. x[0]=S0[i*dof+d][offset+len*0+l];
  2500. x[1]=S0[i*dof+d][offset+len*1+l];
  2501. B0[j*dof+d][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  2502. B0[j*dof+d][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  2503. }
  2504. offset+=2*len;
  2505. }else{
  2506. for(Long l=0;l<len;l++){
  2507. B0[j*dof+d][offset+l]=S0[i*dof+d][offset+l];
  2508. }
  2509. offset+=len;
  2510. }
  2511. }
  2512. assert(offset==Ncoef);
  2513. }
  2514. }
  2515. { // Fast rotation
  2516. for(Long k=0;k<dof*p0;k++){ // forward permutation
  2517. for(Long l=0;l<=p0;l++){
  2518. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2519. Bi[l][k][j]=B0[k][coeff_perm[l][j]];
  2520. }
  2521. }
  2522. }
  2523. for(Long t=0;t<=p0;t++){
  2524. for(Long l=0;l<=p0;l++){ // mat-vec
  2525. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  2526. }
  2527. Matrix<Real> Mout(dof*p0,Ncoef, S1[(i*(p0+1)+t)*dof*p0], false);
  2528. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  2529. for(Long l=0;l<=p0;l++){
  2530. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2531. Mout[k][coeff_perm[l][j]]=Bo[l][k][j];
  2532. }
  2533. }
  2534. }
  2535. }
  2536. }
  2537. }
  2538. }
  2539. }
  2540. template <class Real> void SphericalHarmonics<Real>::RotateTranspose(const Vector<Real>& S_, Long p0, Long dof, Vector<Real>& S){
  2541. std::vector<Matrix<Real>> Mr=MatRotate(p0);
  2542. for(Long i=0;i<(Long)Mr.size();i++) Mr[i]=Mr[i].Transpose();
  2543. std::vector<std::vector<Long>> coeff_perm(p0+1);
  2544. { // Set coeff_perm
  2545. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  2546. Long itr=0;
  2547. for(Long i=0;i<2*p0;i++){
  2548. Long m=(i+1)/2;
  2549. for(Long n=m;n<=p0;n++){
  2550. coeff_perm[n][i]=itr;
  2551. itr++;
  2552. }
  2553. }
  2554. }
  2555. Long Ncoef=p0*(p0+2);
  2556. Long N=S_.Dim()/Ncoef/dof/(p0*(p0+1));
  2557. assert(N*Ncoef*dof*(p0*(p0+1))==S_.Dim());
  2558. if(S.Dim()!=N*dof*Ncoef*p0*(p0+1)) S.ReInit(N*dof*Ncoef*p0*(p0+1));
  2559. Matrix<Real> S0(N*dof*p0*(p0+1), Ncoef, S.begin(), false);
  2560. const Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, (Iterator<Real>)S_.begin(), false);
  2561. #pragma omp parallel
  2562. { // Transpose all p0*(p0+1) rotations
  2563. Integer tid=omp_get_thread_num();
  2564. Integer omp_p=omp_get_num_threads();
  2565. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  2566. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  2567. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  2568. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  2569. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  2570. }
  2571. Long a=(tid+0)*N/omp_p;
  2572. Long b=(tid+1)*N/omp_p;
  2573. for(Long i=a;i<b;i++){
  2574. for(Long t=0;t<p0+1;t++){
  2575. Long idx0=(i*(p0+1)+t)*p0*dof;
  2576. { // Fast rotation
  2577. const Matrix<Real> Min(p0*dof, Ncoef, (Iterator<Real>)S1[idx0], false);
  2578. for(Long k=0;k<dof*p0;k++){ // forward permutation
  2579. for(Long l=0;l<=p0;l++){
  2580. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2581. Bi[l][k][j]=Min[k][coeff_perm[l][j]];
  2582. }
  2583. }
  2584. }
  2585. for(Long l=0;l<=p0;l++){ // mat-vec
  2586. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  2587. }
  2588. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  2589. for(Long l=0;l<=p0;l++){
  2590. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2591. B0[k][coeff_perm[l][j]]=Bo[l][k][j];
  2592. }
  2593. }
  2594. }
  2595. }
  2596. for(Long j=0;j<p0;j++){
  2597. for(Long d=0;d<dof;d++){
  2598. Long idx1=idx0+j*dof+d;
  2599. Long offset=0;
  2600. for(Long k=0;k<p0+1;k++){
  2601. Real r[2]={cos(k*j*M_PI/p0),sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  2602. Long len=p0+1-k;
  2603. if(k!=0 && k!=p0){
  2604. for(Long l=0;l<len;l++){
  2605. Real x[2];
  2606. x[0]=B0[j*dof+d][offset+len*0+l];
  2607. x[1]=B0[j*dof+d][offset+len*1+l];
  2608. S0[idx1][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  2609. S0[idx1][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  2610. }
  2611. offset+=2*len;
  2612. }else{
  2613. for(Long l=0;l<len;l++){
  2614. S0[idx1][offset+l]=B0[j*dof+d][offset+l];
  2615. }
  2616. offset+=len;
  2617. }
  2618. }
  2619. assert(offset==Ncoef);
  2620. }
  2621. }
  2622. }
  2623. }
  2624. }
  2625. }
  2626. template <class Real> void SphericalHarmonics<Real>::StokesSingularInteg(const Vector<Real>& S, Long p0, Long p1, Vector<Real>* SLMatrix, Vector<Real>* DLMatrix){
  2627. Long Ngrid=2*p0*(p0+1);
  2628. Long Ncoef= p0*(p0+2);
  2629. Long Nves=S.Dim()/(Ngrid*COORD_DIM);
  2630. if(SLMatrix) SLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  2631. if(DLMatrix) DLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  2632. Long BLOCK_SIZE=(Long)6e9/((3*2*p1*(p1+1))*(3*2*p0*(p0+1))*2*8); // Limit memory usage to 6GB
  2633. BLOCK_SIZE=std::min<Long>(BLOCK_SIZE,omp_get_max_threads());
  2634. BLOCK_SIZE=std::max<Long>(BLOCK_SIZE,1);
  2635. for(Long a=0;a<Nves;a+=BLOCK_SIZE){
  2636. Long b=std::min(a+BLOCK_SIZE, Nves);
  2637. Vector<Real> _SLMatrix, _DLMatrix;
  2638. if(SLMatrix) _SLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), SLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  2639. if(DLMatrix) _DLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), DLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  2640. const Vector<Real> _S ((b-a)*(Ngrid*COORD_DIM) , (Iterator<Real>)S.begin()+a*(Ngrid*COORD_DIM), false);
  2641. if(SLMatrix && DLMatrix) StokesSingularInteg_< true, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  2642. else if(SLMatrix) StokesSingularInteg_< true, false>(_S, p0, p1, _SLMatrix, _DLMatrix);
  2643. else if(DLMatrix) StokesSingularInteg_<false, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  2644. }
  2645. }
  2646. template <class Real> template <bool SLayer, bool DLayer> void SphericalHarmonics<Real>::StokesSingularInteg_(const Vector<Real>& X0, Long p0, Long p1, Vector<Real>& SL, Vector<Real>& DL){
  2647. Profile::Tic("Rotate");
  2648. Vector<Real> S0, S;
  2649. SphericalHarmonics<Real>::Grid2SHC(X0, p0+1, 2*p0, p0, S0, SHCArrange::COL_MAJOR_NONZERO);
  2650. SphericalHarmonics<Real>::RotateAll(S0, p0, COORD_DIM, S);
  2651. Profile::Toc();
  2652. Profile::Tic("Upsample");
  2653. Vector<Real> X, X_theta, X_phi, trg;
  2654. SphericalHarmonics<Real>::SHC2Grid(S, SHCArrange::COL_MAJOR_NONZERO, p0, p1+1, 2*p1, &X, &X_theta, &X_phi);
  2655. SphericalHarmonics<Real>::SHC2Pole(S, SHCArrange::COL_MAJOR_NONZERO, p0, trg);
  2656. Profile::Toc();
  2657. Profile::Tic("Stokes");
  2658. Vector<Real> SL0, DL0;
  2659. { // Stokes kernel
  2660. //Long M0=2*p0*(p0+1);
  2661. Long M1=2*p1*(p1+1);
  2662. Long N=trg.Dim()/(2*COORD_DIM);
  2663. assert(X.Dim()==M1*COORD_DIM*N);
  2664. if(SLayer && SL0.Dim()!=N*2*6*M1) SL0.ReInit(2*N*6*M1);
  2665. if(DLayer && DL0.Dim()!=N*2*6*M1) DL0.ReInit(2*N*6*M1);
  2666. const Vector<Real>& qw=SphericalHarmonics<Real>::SingularWeights(p1);
  2667. const Real scal_const_dl = 3.0/(4.0*M_PI);
  2668. const Real scal_const_sl = 1.0/(8.0*M_PI);
  2669. static Real eps=-1;
  2670. if(eps<0) eps=machine_eps<Real>();
  2671. #pragma omp parallel
  2672. {
  2673. Integer tid=omp_get_thread_num();
  2674. Integer omp_p=omp_get_num_threads();
  2675. Long a=(tid+0)*N/omp_p;
  2676. Long b=(tid+1)*N/omp_p;
  2677. for(Long i=a;i<b;i++){
  2678. for(Long t=0;t<2;t++){
  2679. Real tx, ty, tz;
  2680. { // Read target coordinates
  2681. tx=trg[i*2*COORD_DIM+0*2+t];
  2682. ty=trg[i*2*COORD_DIM+1*2+t];
  2683. tz=trg[i*2*COORD_DIM+2*2+t];
  2684. }
  2685. for(Long j0=0;j0<p1+1;j0++){
  2686. for(Long j1=0;j1<2*p1;j1++){
  2687. Long s=2*p1*j0+j1;
  2688. Real dx, dy, dz;
  2689. { // Compute dx, dy, dz
  2690. dx=tx-X[(i*COORD_DIM+0)*M1+s];
  2691. dy=ty-X[(i*COORD_DIM+1)*M1+s];
  2692. dz=tz-X[(i*COORD_DIM+2)*M1+s];
  2693. }
  2694. Real nx, ny, nz;
  2695. { // Compute source normal
  2696. Real x_theta=X_phi[(i*COORD_DIM+0)*M1+s];
  2697. Real y_theta=X_phi[(i*COORD_DIM+1)*M1+s];
  2698. Real z_theta=X_phi[(i*COORD_DIM+2)*M1+s];
  2699. Real x_phi=X_theta[(i*COORD_DIM+0)*M1+s];
  2700. Real y_phi=X_theta[(i*COORD_DIM+1)*M1+s];
  2701. Real z_phi=X_theta[(i*COORD_DIM+2)*M1+s];
  2702. nx=(y_theta*z_phi-z_theta*y_phi);
  2703. ny=(z_theta*x_phi-x_theta*z_phi);
  2704. nz=(x_theta*y_phi-y_theta*x_phi);
  2705. }
  2706. Real area_elem=1.0;
  2707. if(SLayer){ // Compute area_elem
  2708. area_elem=sqrt(nx*nx+ny*ny+nz*nz);
  2709. }
  2710. Real rinv, rinv2;
  2711. { // Compute rinv, rinv2
  2712. Real r2=dx*dx+dy*dy+dz*dz;
  2713. rinv=1.0/sqrt(r2);
  2714. if(r2<=eps) rinv=0;
  2715. rinv2=rinv*rinv;
  2716. }
  2717. if(DLayer){
  2718. Real rinv5=rinv2*rinv2*rinv;
  2719. Real r_dot_n_rinv5=scal_const_dl*qw[j0*t+(p1-j0)*(1-t)] * (nx*dx+ny*dy+nz*dz)*rinv5;
  2720. DL0[((i*2+t)*6+0)*M1+s]=dx*dx*r_dot_n_rinv5;
  2721. DL0[((i*2+t)*6+1)*M1+s]=dx*dy*r_dot_n_rinv5;
  2722. DL0[((i*2+t)*6+2)*M1+s]=dx*dz*r_dot_n_rinv5;
  2723. DL0[((i*2+t)*6+3)*M1+s]=dy*dy*r_dot_n_rinv5;
  2724. DL0[((i*2+t)*6+4)*M1+s]=dy*dz*r_dot_n_rinv5;
  2725. DL0[((i*2+t)*6+5)*M1+s]=dz*dz*r_dot_n_rinv5;
  2726. }
  2727. if(SLayer){
  2728. Real area_rinv =scal_const_sl*qw[j0*t+(p1-j0)*(1-t)] * area_elem*rinv;
  2729. Real area_rinv2=area_rinv*rinv2;
  2730. SL0[((i*2+t)*6+0)*M1+s]=area_rinv+dx*dx*area_rinv2;
  2731. SL0[((i*2+t)*6+1)*M1+s]= dx*dy*area_rinv2;
  2732. SL0[((i*2+t)*6+2)*M1+s]= dx*dz*area_rinv2;
  2733. SL0[((i*2+t)*6+3)*M1+s]=area_rinv+dy*dy*area_rinv2;
  2734. SL0[((i*2+t)*6+4)*M1+s]= dy*dz*area_rinv2;
  2735. SL0[((i*2+t)*6+5)*M1+s]=area_rinv+dz*dz*area_rinv2;
  2736. }
  2737. }
  2738. }
  2739. }
  2740. }
  2741. }
  2742. Profile::Add_FLOP(20*(2*p1)*(p1+1)*2*N);
  2743. if(SLayer) Profile::Add_FLOP((19+6)*(2*p1)*(p1+1)*2*N);
  2744. if(DLayer) Profile::Add_FLOP( 22 *(2*p1)*(p1+1)*2*N);
  2745. }
  2746. Profile::Toc();
  2747. Profile::Tic("UpsampleTranspose");
  2748. Vector<Real> SL1, DL1;
  2749. SphericalHarmonics<Real>::SHC2GridTranspose(SL0, p1, p0, SL1);
  2750. SphericalHarmonics<Real>::SHC2GridTranspose(DL0, p1, p0, DL1);
  2751. Profile::Toc();
  2752. Profile::Tic("RotateTranspose");
  2753. Vector<Real> SL2, DL2;
  2754. SphericalHarmonics<Real>::RotateTranspose(SL1, p0, 2*6, SL2);
  2755. SphericalHarmonics<Real>::RotateTranspose(DL1, p0, 2*6, DL2);
  2756. Profile::Toc();
  2757. Profile::Tic("Rearrange");
  2758. Vector<Real> SL3, DL3;
  2759. { // Transpose
  2760. Long Ncoef=p0*(p0+2);
  2761. Long Ngrid=2*p0*(p0+1);
  2762. { // Transpose SL2
  2763. Long N=SL2.Dim()/(6*Ncoef*Ngrid);
  2764. SL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  2765. #pragma omp parallel
  2766. {
  2767. Integer tid=omp_get_thread_num();
  2768. Integer omp_p=omp_get_num_threads();
  2769. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  2770. Long a=(tid+0)*N/omp_p;
  2771. Long b=(tid+1)*N/omp_p;
  2772. for(Long i=a;i<b;i++){
  2773. Matrix<Real> M0(Ngrid*6, Ncoef, SL2.begin()+i*Ngrid*6*Ncoef, false);
  2774. for(Long k=0;k<Ncoef;k++){ // Transpose
  2775. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  2776. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  2777. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  2778. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  2779. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  2780. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  2781. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  2782. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  2783. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  2784. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  2785. }
  2786. }
  2787. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, SL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  2788. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  2789. for(Long j0=0;j0<COORD_DIM;j0++){
  2790. for(Long j1=0;j1<p0+1;j1++){
  2791. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  2792. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  2793. }
  2794. }
  2795. }
  2796. }
  2797. }
  2798. }
  2799. { // Transpose DL2
  2800. Long N=DL2.Dim()/(6*Ncoef*Ngrid);
  2801. DL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  2802. #pragma omp parallel
  2803. {
  2804. Integer tid=omp_get_thread_num();
  2805. Integer omp_p=omp_get_num_threads();
  2806. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  2807. Long a=(tid+0)*N/omp_p;
  2808. Long b=(tid+1)*N/omp_p;
  2809. for(Long i=a;i<b;i++){
  2810. Matrix<Real> M0(Ngrid*6, Ncoef, DL2.begin()+i*Ngrid*6*Ncoef, false);
  2811. for(Long k=0;k<Ncoef;k++){ // Transpose
  2812. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  2813. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  2814. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  2815. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  2816. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  2817. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  2818. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  2819. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  2820. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  2821. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  2822. }
  2823. }
  2824. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, DL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  2825. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  2826. for(Long j0=0;j0<COORD_DIM;j0++){
  2827. for(Long j1=0;j1<p0+1;j1++){
  2828. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  2829. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  2830. }
  2831. }
  2832. }
  2833. }
  2834. }
  2835. }
  2836. }
  2837. Profile::Toc();
  2838. Profile::Tic("Grid2SHC");
  2839. SphericalHarmonics<Real>::Grid2SHC(SL3, p0+1, 2*p0, p0, SL, SHCArrange::COL_MAJOR_NONZERO);
  2840. SphericalHarmonics<Real>::Grid2SHC(DL3, p0+1, 2*p0, p0, DL, SHCArrange::COL_MAJOR_NONZERO);
  2841. Profile::Toc();
  2842. }
  2843. } // end namespace