sph_harm.txx 96 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661
  1. #include SCTL_INCLUDE(legendre_rule.hpp)
  2. // TODO: Replace work vectors with dynamic-arrays
  3. namespace SCTL_NAMESPACE {
  4. template <class Real> void SphericalHarmonics<Real>::Grid2SHC(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& S, SHCArrange arrange){
  5. Long N = X.Dim() / (Np*Nt);
  6. assert(X.Dim() == N*Np*Nt);
  7. Vector<Real> B1(N*(p1+1)*(p1+1));
  8. Grid2SHC_(X, Nt, Np, p1, B1);
  9. SHCArrange0(B1, p1, S, arrange);
  10. }
  11. template <class Real> void SphericalHarmonics<Real>::SHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_theta, Vector<Real>* X_phi){
  12. Vector<Real> B0;
  13. SHCArrange1(S, arrange, p0, B0);
  14. SHC2Grid_(B0, p0, Nt, Np, X, X_phi, X_theta);
  15. }
  16. template <class Real> void SphericalHarmonics<Real>::SHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& cos_theta_phi, Vector<Real>& X) {
  17. Long M = (p0+1) * (p0+1);
  18. Long dof;
  19. Matrix<Real> B1;
  20. { // Set B1, dof
  21. Vector<Real> B0;
  22. SHCArrange1(S, arrange, p0, B0);
  23. dof = B0.Dim() / M;
  24. assert(B0.Dim() == dof * M);
  25. B1.ReInit(dof, M);
  26. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  27. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  28. }
  29. assert(B1.Dim(0) == dof);
  30. assert(B1.Dim(1) == M);
  31. Matrix<Real> SHBasis;
  32. SHBasisEval(p0, cos_theta_phi, SHBasis);
  33. assert(SHBasis.Dim(1) == M);
  34. Long N = SHBasis.Dim(0);
  35. { // Set X
  36. if (X.Dim() != N*dof) X.ReInit(N * dof);
  37. for (Long k0 = 0; k0 < N; k0++) {
  38. for (Long k1 = 0; k1 < dof; k1++) {
  39. Real X_ = 0;
  40. for (Long i = 0; i < M; i++) X_ += B1[k1][i] * SHBasis[k0][i];
  41. X[k0 * dof + k1] = X_;
  42. }
  43. }
  44. }
  45. }
  46. template <class Real> void SphericalHarmonics<Real>::SHC2Pole(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& P){
  47. Vector<Real> QP[2];
  48. { // Set QP // TODO: store these weights
  49. Vector<Real> x(1), alp;
  50. const Real SQRT2PI = sqrt<Real>(4 * const_pi<Real>());
  51. for (Long i = 0; i < 2; i++) {
  52. x = (i ? -1 : 1);
  53. LegPoly(alp, x, p0);
  54. QP[i].ReInit(p0 + 1, alp.begin());
  55. QP[i] *= SQRT2PI;
  56. }
  57. }
  58. Long M, N;
  59. { // Set M, N
  60. M = 0;
  61. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  62. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  63. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  64. if (M == 0) return;
  65. N = S.Dim() / M;
  66. assert(S.Dim() == N * M);
  67. }
  68. if(P.Dim() != N * 2) P.ReInit(N * 2);
  69. if (arrange == SHCArrange::ALL) {
  70. #pragma omp parallel
  71. { // Compute pole
  72. Integer tid = omp_get_thread_num();
  73. Integer omp_p = omp_get_num_threads();
  74. Long a = (tid + 0) * N / omp_p;
  75. Long b = (tid + 1) * N / omp_p;
  76. for (Long i = a; i < b; i++) {
  77. Real P_[2] = {0, 0};
  78. for (Long j = 0; j < p0 + 1; j++) {
  79. P_[0] += S[i*M + j*(p0+1)*2] * QP[0][j];
  80. P_[1] += S[i*M + j*(p0+1)*2] * QP[1][j];
  81. }
  82. P[2*i+0] = P_[0];
  83. P[2*i+1] = P_[1];
  84. }
  85. }
  86. }
  87. if (arrange == SHCArrange::ROW_MAJOR) {
  88. #pragma omp parallel
  89. { // Compute pole
  90. Integer tid = omp_get_thread_num();
  91. Integer omp_p = omp_get_num_threads();
  92. Long a = (tid + 0) * N / omp_p;
  93. Long b = (tid + 1) * N / omp_p;
  94. for (Long i = a; i < b; i++) {
  95. Long idx = 0;
  96. Real P_[2] = {0, 0};
  97. for (Long j = 0; j < p0 + 1; j++) {
  98. P_[0] += S[i*M+idx] * QP[0][j];
  99. P_[1] += S[i*M+idx] * QP[1][j];
  100. idx += 2*(j+1);
  101. }
  102. P[2*i+0] = P_[0];
  103. P[2*i+1] = P_[1];
  104. }
  105. }
  106. }
  107. if (arrange == SHCArrange::COL_MAJOR_NONZERO) {
  108. #pragma omp parallel
  109. { // Compute pole
  110. Integer tid = omp_get_thread_num();
  111. Integer omp_p = omp_get_num_threads();
  112. Long a = (tid + 0) * N / omp_p;
  113. Long b = (tid + 1) * N / omp_p;
  114. for (Long i = a; i < b; i++) {
  115. Real P_[2] = {0, 0};
  116. for (Long j = 0; j < p0 + 1; j++) {
  117. P_[0] += S[i*M+j] * QP[0][j];
  118. P_[1] += S[i*M+j] * QP[1][j];
  119. }
  120. P[2*i+0] = P_[0];
  121. P[2*i+1] = P_[1];
  122. }
  123. }
  124. }
  125. }
  126. template <class Real> void SphericalHarmonics<Real>::WriteVTK(const char* fname, const Vector<Real>* S, const Vector<Real>* v_ptr, SHCArrange arrange, Long p0, Long p1, Real period, const Comm& comm){
  127. typedef double VTKReal;
  128. Vector<Real> SS;
  129. if (S == nullptr) {
  130. Integer p = 2;
  131. Integer Ncoeff = (p + 1) * (p + 1);
  132. Vector<Real> SSS(COORD_DIM * Ncoeff), SSS_grid;
  133. SSS.SetZero();
  134. SSS[1+0*p+0*Ncoeff] = sqrt<Real>(2.0)/sqrt<Real>(3.0);
  135. SSS[1+1*p+1*Ncoeff] = 1/sqrt<Real>(3.0);
  136. SSS[1+2*p+2*Ncoeff] = 1/sqrt<Real>(3.0);
  137. SphericalHarmonics<Real>::SHC2Grid(SSS, SHCArrange::COL_MAJOR_NONZERO, p, p+1, 2*p+2, &SSS_grid);
  138. SphericalHarmonics<Real>::Grid2SHC(SSS_grid, p+1, 2*p+2, p0, SS, arrange);
  139. S = &SS;
  140. }
  141. Vector<Real> X, Xp, V, Vp;
  142. { // Upsample X
  143. const Vector<Real>& X0=*S;
  144. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &X);
  145. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Xp);
  146. }
  147. if(v_ptr){ // Upsample V
  148. const Vector<Real>& X0=*v_ptr;
  149. SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &V);
  150. SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Vp);
  151. }
  152. std::vector<VTKReal> point_coord;
  153. std::vector<VTKReal> point_value;
  154. std::vector<int32_t> poly_connect;
  155. std::vector<int32_t> poly_offset;
  156. { // Set point_coord, point_value, poly_connect
  157. Long N_ves = X.Dim()/(2*p1*(p1+1)*COORD_DIM); // Number of vesicles
  158. assert(Xp.Dim() == N_ves*2*COORD_DIM);
  159. for(Long k=0;k<N_ves;k++){ // Set point_coord
  160. Real C[COORD_DIM]={0,0,0};
  161. if(period>0){
  162. for(Integer l=0;l<COORD_DIM;l++) C[l]=0;
  163. for(Long i=0;i<p1+1;i++){
  164. for(Long j=0;j<2*p1;j++){
  165. for(Integer l=0;l<COORD_DIM;l++){
  166. C[l]+=X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))];
  167. }
  168. }
  169. }
  170. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[0+2*(l+k*COORD_DIM)];
  171. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[1+2*(l+k*COORD_DIM)];
  172. for(Integer l=0;l<COORD_DIM;l++) C[l]/=2*p1*(p1+1)+2;
  173. for(Integer l=0;l<COORD_DIM;l++) C[l]=(round(C[l]/period))*period;
  174. }
  175. for(Long i=0;i<p1+1;i++){
  176. for(Long j=0;j<2*p1;j++){
  177. for(Integer l=0;l<COORD_DIM;l++){
  178. point_coord.push_back(X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))]-C[l]);
  179. }
  180. }
  181. }
  182. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[0+2*(l+k*COORD_DIM)]-C[l]);
  183. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[1+2*(l+k*COORD_DIM)]-C[l]);
  184. }
  185. if(v_ptr) {
  186. Long data__dof = V.Dim() / (2*p1*(p1+1));
  187. for(Long k=0;k<N_ves;k++){ // Set point_value
  188. for(Long i=0;i<p1+1;i++){
  189. for(Long j=0;j<2*p1;j++){
  190. for(Long l=0;l<data__dof;l++){
  191. point_value.push_back(V[j+2*p1*(i+(p1+1)*(l+k*data__dof))]);
  192. }
  193. }
  194. }
  195. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[0+2*(l+k*data__dof)]);
  196. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[1+2*(l+k*data__dof)]);
  197. }
  198. }
  199. for(Long k=0;k<N_ves;k++){
  200. for(Long j=0;j<2*p1;j++){
  201. Long i0= 0;
  202. Long i1=p1;
  203. Long j0=((j+0) );
  204. Long j1=((j+1)%(2*p1));
  205. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+0);
  206. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  207. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  208. poly_offset.push_back(poly_connect.size());
  209. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+1);
  210. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  211. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  212. poly_offset.push_back(poly_connect.size());
  213. }
  214. for(Long i=0;i<p1;i++){
  215. for(Long j=0;j<2*p1;j++){
  216. Long i0=((i+0) );
  217. Long i1=((i+1) );
  218. Long j0=((j+0) );
  219. Long j1=((j+1)%(2*p1));
  220. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  221. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  222. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  223. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  224. poly_offset.push_back(poly_connect.size());
  225. }
  226. }
  227. }
  228. }
  229. Integer np = comm.Size();
  230. Integer myrank = comm.Rank();
  231. std::vector<VTKReal>& coord=point_coord;
  232. std::vector<VTKReal>& value=point_value;
  233. std::vector<int32_t>& connect=poly_connect;
  234. std::vector<int32_t>& offset=poly_offset;
  235. Long pt_cnt=coord.size()/COORD_DIM;
  236. Long poly_cnt=poly_offset.size();
  237. // Open file for writing.
  238. std::stringstream vtufname;
  239. vtufname<<fname<<"_"<<std::setfill('0')<<std::setw(6)<<myrank<<".vtp";
  240. std::ofstream vtufile;
  241. vtufile.open(vtufname.str().c_str());
  242. if(vtufile.fail()) return;
  243. bool isLittleEndian;
  244. { // Set isLittleEndian
  245. uint16_t number = 0x1;
  246. uint8_t *numPtr = (uint8_t*)&number;
  247. isLittleEndian=(numPtr[0] == 1);
  248. }
  249. // Proceed to write to file.
  250. Long data_size=0;
  251. vtufile<<"<?xml version=\"1.0\"?>\n";
  252. if(isLittleEndian) vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"LittleEndian\">\n";
  253. else vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"BigEndian\">\n";
  254. //===========================================================================
  255. vtufile<<" <PolyData>\n";
  256. vtufile<<" <Piece NumberOfPoints=\""<<pt_cnt<<"\" NumberOfVerts=\"0\" NumberOfLines=\"0\" NumberOfStrips=\"0\" NumberOfPolys=\""<<poly_cnt<<"\">\n";
  257. //---------------------------------------------------------------------------
  258. vtufile<<" <Points>\n";
  259. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  260. data_size+=sizeof(uint32_t)+coord.size()*sizeof(VTKReal);
  261. vtufile<<" </Points>\n";
  262. //---------------------------------------------------------------------------
  263. if(value.size()){ // value
  264. vtufile<<" <PointData>\n";
  265. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  266. data_size+=sizeof(uint32_t)+value.size()*sizeof(VTKReal);
  267. vtufile<<" </PointData>\n";
  268. }
  269. //---------------------------------------------------------------------------
  270. vtufile<<" <Polys>\n";
  271. vtufile<<" <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  272. data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t);
  273. vtufile<<" <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  274. data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t);
  275. vtufile<<" </Polys>\n";
  276. //---------------------------------------------------------------------------
  277. vtufile<<" </Piece>\n";
  278. vtufile<<" </PolyData>\n";
  279. //===========================================================================
  280. vtufile<<" <AppendedData encoding=\"raw\">\n";
  281. vtufile<<" _";
  282. int32_t block_size;
  283. block_size=coord.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord.size()*sizeof(VTKReal));
  284. if(value.size()){ // value
  285. block_size=value.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value.size()*sizeof(VTKReal));
  286. }
  287. block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t));
  288. block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t));
  289. vtufile<<"\n";
  290. vtufile<<" </AppendedData>\n";
  291. //===========================================================================
  292. vtufile<<"</VTKFile>\n";
  293. vtufile.close();
  294. if(myrank) return;
  295. std::stringstream pvtufname;
  296. pvtufname<<fname<<".pvtp";
  297. std::ofstream pvtufile;
  298. pvtufile.open(pvtufname.str().c_str());
  299. if(pvtufile.fail()) return;
  300. pvtufile<<"<?xml version=\"1.0\"?>\n";
  301. pvtufile<<"<VTKFile type=\"PPolyData\">\n";
  302. pvtufile<<" <PPolyData GhostLevel=\"0\">\n";
  303. pvtufile<<" <PPoints>\n";
  304. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\"/>\n";
  305. pvtufile<<" </PPoints>\n";
  306. if(value.size()){ // value
  307. pvtufile<<" <PPointData>\n";
  308. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\"/>\n";
  309. pvtufile<<" </PPointData>\n";
  310. }
  311. {
  312. // Extract filename from path.
  313. std::stringstream vtupath;
  314. vtupath<<'/'<<fname;
  315. std::string pathname = vtupath.str();
  316. auto found = pathname.find_last_of("/\\");
  317. std::string fname_ = pathname.substr(found+1);
  318. for(Integer i=0;i<np;i++) pvtufile<<" <Piece Source=\""<<fname_<<"_"<<std::setfill('0')<<std::setw(6)<<i<<".vtp\"/>\n";
  319. }
  320. pvtufile<<" </PPolyData>\n";
  321. pvtufile<<"</VTKFile>\n";
  322. pvtufile.close();
  323. }
  324. template <class Real> void SphericalHarmonics<Real>::Grid2VecSHC(const Vector<Real>& X, Long Nt, Long Np, Long p0, Vector<Real>& S, SHCArrange arrange) {
  325. Long N = X.Dim() / (Np*Nt);
  326. assert(X.Dim() == N*Np*Nt);
  327. assert(N % COORD_DIM == 0);
  328. Vector<Real> B0(N*Nt*Np);
  329. { // Set B0
  330. Vector<Real> sin_phi(Np), cos_phi(Np);
  331. for (Long i = 0; i < Np; i++) {
  332. sin_phi[i] = sin(2 * const_pi<Real>() * i / Np);
  333. cos_phi[i] = cos(2 * const_pi<Real>() * i / Np);
  334. }
  335. const auto& Y = LegendreNodes(Nt - 1);
  336. assert(Y.Dim() == Nt);
  337. Long Ngrid = Nt * Np;
  338. for (Long k = 0; k < N; k+=COORD_DIM) {
  339. for (Long i = 0; i < Nt; i++) {
  340. Real sin_theta = sqrt<Real>(1 - Y[i]*Y[i]);
  341. Real cos_theta = Y[i];
  342. Real csc_theta = 1 / sin_theta;
  343. const auto X_ = X.begin() + (k*Nt+i)*Np;
  344. auto B0_ = B0.begin() + (k*Nt+i)*Np;
  345. for (Long j = 0; j < Np; j++) {
  346. StaticArray<Real,3> in;
  347. in[0] = X_[0*Ngrid+j];
  348. in[1] = X_[1*Ngrid+j];
  349. in[2] = X_[2*Ngrid+j];
  350. StaticArray<Real,9> Q;
  351. { // Set Q
  352. Q[0] = sin_theta*cos_phi[j]; Q[1] = sin_theta*sin_phi[j]; Q[2] = cos_theta;
  353. Q[3] = cos_theta*cos_phi[j]; Q[4] = cos_theta*sin_phi[j]; Q[5] =-sin_theta;
  354. Q[6] = -sin_phi[j]; Q[7] = cos_phi[j]; Q[8] = 0;
  355. }
  356. B0_[0*Ngrid+j] = ( Q[0] * in[0] + Q[1] * in[1] + Q[2] * in[2] );
  357. B0_[1*Ngrid+j] = ( Q[3] * in[0] + Q[4] * in[1] + Q[5] * in[2] ) * csc_theta;
  358. B0_[2*Ngrid+j] = ( Q[6] * in[0] + Q[7] * in[1] + Q[8] * in[2] ) * csc_theta;
  359. }
  360. }
  361. }
  362. }
  363. Long p_ = p0 + 1;
  364. Long M0 = (p0+1)*(p0+1);
  365. Long M_ = (p_+1)*(p_+1);
  366. Vector<Real> B1(N*M_);
  367. Grid2SHC_(B0, Nt, Np, p_, B1);
  368. Vector<Real> B2(N*M0);
  369. const Complex<Real> imag(0,1);
  370. for (Long i=0; i<N; i+=COORD_DIM) {
  371. for (Long m=0; m<=p0; m++) {
  372. for (Long n=m; n<=p0; n++) {
  373. auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  374. Complex<Real> c;
  375. if (0<=m && m<=n && n<=p) {
  376. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  377. Long idx_imag = idx_real + (p+1-m)*N;
  378. c.real = coeff[idx_real];
  379. if (m) c.imag = coeff[idx_imag];
  380. }
  381. return c;
  382. };
  383. auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  384. if (0<=m && m<=n && n<=p) {
  385. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  386. Long idx_imag = idx_real + (p+1-m)*N;
  387. coeff[idx_real] = c.real;
  388. if (m) coeff[idx_imag] = c.imag;
  389. }
  390. };
  391. auto gr = [&](Long n, Long m) { return read_coeff(B1, i+0, p_, n, m); };
  392. auto gt = [&](Long n, Long m) { return read_coeff(B1, i+1, p_, n, m); };
  393. auto gp = [&](Long n, Long m) { return read_coeff(B1, i+2, p_, n, m); };
  394. Complex<Real> phiY, phiG, phiX;
  395. { // (phiG, phiX) <-- (gt, gp)
  396. auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  397. auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  398. phiY = gr(n,m);
  399. phiG = (gt(n+1,m)*A(n,m) - gt(n-1,m)*B(n,m) - imag*m*gp(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
  400. phiX = (gp(n+1,m)*A(n,m) - gp(n-1,m)*B(n,m) + imag*m*gt(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
  401. }
  402. auto phiV = (phiG * (n + 0) - phiY) * (1/(Real)(2*n + 1));
  403. auto phiW = (phiG * (n + 1) + phiY) * (1/(Real)(2*n + 1));
  404. if (n==0) {
  405. phiW = 0;
  406. phiX = 0;
  407. }
  408. write_coeff(phiV, B2, i+0, p0, n, m);
  409. write_coeff(phiW, B2, i+1, p0, n, m);
  410. write_coeff(phiX, B2, i+2, p0, n, m);
  411. }
  412. }
  413. }
  414. SHCArrange0(B2, p0, S, arrange);
  415. }
  416. template <class Real> void SphericalHarmonics<Real>::VecSHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>& X) {
  417. Vector<Real> B0;
  418. SHCArrange1(S, arrange, p0, B0);
  419. Long p_ = p0 + 1;
  420. Long M0 = (p0+1)*(p0+1);
  421. Long M_ = (p_+1)*(p_+1);
  422. Long N = B0.Dim() / M0;
  423. assert(B0.Dim() == N*M0);
  424. assert(N % COORD_DIM == 0);
  425. Vector<Real> B1(N*M_);
  426. const Complex<Real> imag(0,1);
  427. for (Long i=0; i<N; i+=COORD_DIM) {
  428. for (Long m=0; m<=p_; m++) {
  429. for (Long n=m; n<=p_; n++) {
  430. auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  431. Complex<Real> c;
  432. if (0<=m && m<=n && n<=p) {
  433. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  434. Long idx_imag = idx_real + (p+1-m)*N;
  435. c.real = coeff[idx_real];
  436. if (m) c.imag = coeff[idx_imag];
  437. }
  438. return c;
  439. };
  440. auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
  441. if (0<=m && m<=n && n<=p) {
  442. Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
  443. Long idx_imag = idx_real + (p+1-m)*N;
  444. coeff[idx_real] = c.real;
  445. if (m) coeff[idx_imag] = c.imag;
  446. }
  447. };
  448. auto phiG = [&](Long n, Long m) {
  449. auto phiV = read_coeff(B0, i+0, p0, n, m);
  450. auto phiW = read_coeff(B0, i+1, p0, n, m);
  451. return phiV + phiW;
  452. };
  453. auto phiY = [&](Long n, Long m) {
  454. auto phiV = read_coeff(B0, i+0, p0, n, m);
  455. auto phiW = read_coeff(B0, i+1, p0, n, m);
  456. return phiW * n - phiV * (n + 1);
  457. };
  458. auto phiX = [&](Long n, Long m) {
  459. return read_coeff(B0, i+2, p0, n, m);
  460. };
  461. Complex<Real> gr, gt, gp;
  462. { // (gt, gp) <-- (phiG, phiX)
  463. auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  464. auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  465. gr = phiY(n,m);
  466. gt = phiG(n-1,m)*A(n-1,m) - phiG(n+1,m)*B(n+1,m) - imag*m*phiX(n,m);
  467. gp = phiX(n-1,m)*A(n-1,m) - phiX(n+1,m)*B(n+1,m) + imag*m*phiG(n,m);
  468. }
  469. write_coeff(gr, B1, i+0, p_, n, m);
  470. write_coeff(gt, B1, i+1, p_, n, m);
  471. write_coeff(gp, B1, i+2, p_, n, m);
  472. }
  473. }
  474. }
  475. { // Set X
  476. SHC2Grid_(B1, p_, Nt, Np, &X);
  477. Vector<Real> sin_phi(Np), cos_phi(Np);
  478. for (Long i = 0; i < Np; i++) {
  479. sin_phi[i] = sin(2 * const_pi<Real>() * i / Np);
  480. cos_phi[i] = cos(2 * const_pi<Real>() * i / Np);
  481. }
  482. const auto& Y = LegendreNodes(Nt - 1);
  483. assert(Y.Dim() == Nt);
  484. Long Ngrid = Nt * Np;
  485. for (Long k = 0; k < N; k+=COORD_DIM) {
  486. for (Long i = 0; i < Nt; i++) {
  487. Real sin_theta = sqrt<Real>(1 - Y[i]*Y[i]);
  488. Real cos_theta = Y[i];
  489. Real csc_theta = 1 / sin_theta;
  490. auto X_ = X.begin() + (k*Nt+i)*Np;
  491. for (Long j = 0; j < Np; j++) {
  492. StaticArray<Real,3> in;
  493. in[0] = X_[0*Ngrid+j];
  494. in[1] = X_[1*Ngrid+j] * csc_theta;
  495. in[2] = X_[2*Ngrid+j] * csc_theta;
  496. StaticArray<Real,9> Q;
  497. { // Set Q
  498. Q[0] = sin_theta*cos_phi[j]; Q[1] = sin_theta*sin_phi[j]; Q[2] = cos_theta;
  499. Q[3] = cos_theta*cos_phi[j]; Q[4] = cos_theta*sin_phi[j]; Q[5] =-sin_theta;
  500. Q[6] = -sin_phi[j]; Q[7] = cos_phi[j]; Q[8] = 0;
  501. }
  502. X_[0*Ngrid+j] = ( Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2] );
  503. X_[1*Ngrid+j] = ( Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2] );
  504. X_[2*Ngrid+j] = ( Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2] );
  505. }
  506. }
  507. }
  508. }
  509. }
  510. template <class Real> void SphericalHarmonics<Real>::VecSHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& cos_theta_phi, Vector<Real>& X) {
  511. Long M = (p0+1) * (p0+1);
  512. Long dof;
  513. Matrix<Real> B1;
  514. { // Set B1, dof
  515. Vector<Real> B0;
  516. SHCArrange1(S, arrange, p0, B0);
  517. dof = B0.Dim() / M / COORD_DIM;
  518. assert(B0.Dim() == dof * COORD_DIM * M);
  519. B1.ReInit(dof, COORD_DIM * M);
  520. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  521. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  522. }
  523. assert(B1.Dim(1) == COORD_DIM * M);
  524. assert(B1.Dim(0) == dof);
  525. Matrix<Real> SHBasis;
  526. VecSHBasisEval(p0, cos_theta_phi, SHBasis);
  527. assert(SHBasis.Dim(1) == COORD_DIM * M);
  528. Long N = SHBasis.Dim(0) / COORD_DIM;
  529. { // Set X <-- Q * SHBasis * B1
  530. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  531. for (Long k0 = 0; k0 < N; k0++) {
  532. StaticArray<Real,9> Q;
  533. { // Set Q
  534. Real cos_theta = cos_theta_phi[k0 * 2 + 0];
  535. Real sin_theta = sqrt<Real>(1 - cos_theta * cos_theta);
  536. Real cos_phi = cos(cos_theta_phi[k0 * 2 + 1]);
  537. Real sin_phi = sin(cos_theta_phi[k0 * 2 + 1]);
  538. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  539. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  540. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  541. }
  542. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * SHBasis * B1
  543. StaticArray<Real,COORD_DIM> in;
  544. for (Long j = 0; j < COORD_DIM; j++) {
  545. in[j] = 0;
  546. for (Long i = 0; i < COORD_DIM * M; i++) {
  547. in[j] += B1[k1][i] * SHBasis[k0 * COORD_DIM + j][i];
  548. }
  549. }
  550. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  551. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  552. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  553. }
  554. }
  555. }
  556. }
  557. template <class Real> void SphericalHarmonics<Real>::StokesEvalSL(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, bool interior, Vector<Real>& X) {
  558. Long M = (p0+1) * (p0+1);
  559. Long dof;
  560. Matrix<Real> B1;
  561. { // Set B1, dof
  562. Vector<Real> B0;
  563. SHCArrange1(S, arrange, p0, B0);
  564. dof = B0.Dim() / M / COORD_DIM;
  565. assert(B0.Dim() == dof * COORD_DIM * M);
  566. B1.ReInit(dof, COORD_DIM * M);
  567. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  568. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  569. }
  570. assert(B1.Dim(1) == COORD_DIM * M);
  571. assert(B1.Dim(0) == dof);
  572. Long N;
  573. Matrix<Real> SHBasis;
  574. Vector<Real> R, cos_theta_phi;
  575. { // Set N, R, SHBasis
  576. N = coord.Dim() / COORD_DIM;
  577. assert(coord.Dim() == N * COORD_DIM);
  578. R.ReInit(N);
  579. cos_theta_phi.ReInit(2 * N);
  580. for (Long i = 0; i < N; i++) { // Set R, cos_theta_phi
  581. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  582. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  583. cos_theta_phi[i * 2 + 0] = x[2] / R[i];
  584. cos_theta_phi[i * 2 + 1] = atan2(x[1], x[0]); // TODO: works only for float and double
  585. }
  586. VecSHBasisEval(p0, cos_theta_phi, SHBasis);
  587. assert(SHBasis.Dim(1) == COORD_DIM * M);
  588. assert(SHBasis.Dim(0) == N * COORD_DIM);
  589. }
  590. Matrix<Real> StokesOp(N * COORD_DIM, COORD_DIM * M);
  591. for (Long i = 0; i < N; i++) { // Set StokesOp
  592. for (Long m = 0; m <= p0; m++) {
  593. for (Long n = m; n <= p0; n++) {
  594. auto read_coeff = [&](Long n, Long m, Long k0, Long k1) {
  595. Complex<Real> c;
  596. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  597. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  598. c.real = SHBasis[i * COORD_DIM + k1][k0 * M + idx];
  599. if (m) {
  600. idx += (p0+1-m);
  601. c.imag = SHBasis[i * COORD_DIM + k1][k0 * M + idx];
  602. }
  603. }
  604. return c;
  605. };
  606. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  607. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  608. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  609. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  610. if (m) {
  611. idx += (p0+1-m);
  612. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  613. }
  614. }
  615. };
  616. auto Vr = read_coeff(n, m, 0, 0);
  617. auto Vt = read_coeff(n, m, 0, 1);
  618. auto Vp = read_coeff(n, m, 0, 2);
  619. auto Wr = read_coeff(n, m, 1, 0);
  620. auto Wt = read_coeff(n, m, 1, 1);
  621. auto Wp = read_coeff(n, m, 1, 2);
  622. auto Xr = read_coeff(n, m, 2, 0);
  623. auto Xt = read_coeff(n, m, 2, 1);
  624. auto Xp = read_coeff(n, m, 2, 2);
  625. Complex<Real> SVr, SVt, SVp;
  626. Complex<Real> SWr, SWt, SWp;
  627. Complex<Real> SXr, SXt, SXp;
  628. if (interior) {
  629. Real a,b;
  630. a = n / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], n+1);
  631. b = -(n+1) / (Real)(4*n+2) * (pow<Real>(R[i], n-1) - pow<Real>(R[i], n+1));
  632. SVr = a * Vr + b * Wr;
  633. SVt = a * Vt + b * Wt;
  634. SVp = a * Vp + b * Wp;
  635. a = (n+1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], n-1);
  636. SWr = a * Wr;
  637. SWt = a * Wt;
  638. SWp = a * Wp;
  639. a = 1 / (Real)(2*n+1) * pow<Real>(R[i], n);
  640. SXr = a * Xr;
  641. SXt = a * Xt;
  642. SXp = a * Xp;
  643. } else {
  644. Real a,b;
  645. a = n / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], -n-2);
  646. SVr = a * Vr;
  647. SVt = a * Vt;
  648. SVp = a * Vp;
  649. a = (n+1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], -n);
  650. b = n / (Real)(4*n+2) * (pow<Real>(R[i], -n-2) - pow<Real>(R[i], -n));
  651. SWr = a * Wr + b * Vr;
  652. SWt = a * Wt + b * Vt;
  653. SWp = a * Wp + b * Vp;
  654. a = 1 / (Real)(2*n+1) * pow<Real>(R[i], -n-1);
  655. SXr = a * Xr;
  656. SXt = a * Xt;
  657. SXp = a * Xp;
  658. }
  659. write_coeff(SVr, n, m, 0, 0);
  660. write_coeff(SVt, n, m, 0, 1);
  661. write_coeff(SVp, n, m, 0, 2);
  662. write_coeff(SWr, n, m, 1, 0);
  663. write_coeff(SWt, n, m, 1, 1);
  664. write_coeff(SWp, n, m, 1, 2);
  665. write_coeff(SXr, n, m, 2, 0);
  666. write_coeff(SXt, n, m, 2, 1);
  667. write_coeff(SXp, n, m, 2, 2);
  668. }
  669. }
  670. }
  671. { // Set X <-- Q * StokesOp * B1
  672. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  673. for (Long k0 = 0; k0 < N; k0++) {
  674. StaticArray<Real,9> Q;
  675. { // Set Q
  676. Real cos_theta = cos_theta_phi[k0 * 2 + 0];
  677. Real sin_theta = sqrt<Real>(1 - cos_theta * cos_theta);
  678. Real cos_phi = cos(cos_theta_phi[k0 * 2 + 1]);
  679. Real sin_phi = sin(cos_theta_phi[k0 * 2 + 1]);
  680. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  681. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  682. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  683. }
  684. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  685. StaticArray<Real,COORD_DIM> in;
  686. for (Long j = 0; j < COORD_DIM; j++) {
  687. in[j] = 0;
  688. for (Long i = 0; i < COORD_DIM * M; i++) {
  689. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  690. }
  691. }
  692. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  693. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  694. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  695. }
  696. }
  697. }
  698. }
  699. template <class Real> void SphericalHarmonics<Real>::StokesEvalDL(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, bool interior, Vector<Real>& X) {
  700. Long M = (p0+1) * (p0+1);
  701. Long dof;
  702. Matrix<Real> B1;
  703. { // Set B1, dof
  704. Vector<Real> B0;
  705. SHCArrange1(S, arrange, p0, B0);
  706. dof = B0.Dim() / M / COORD_DIM;
  707. assert(B0.Dim() == dof * COORD_DIM * M);
  708. B1.ReInit(dof, COORD_DIM * M);
  709. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  710. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  711. }
  712. assert(B1.Dim(1) == COORD_DIM * M);
  713. assert(B1.Dim(0) == dof);
  714. Long N;
  715. Matrix<Real> SHBasis;
  716. Vector<Real> R, cos_theta_phi;
  717. { // Set N, R, SHBasis
  718. N = coord.Dim() / COORD_DIM;
  719. assert(coord.Dim() == N * COORD_DIM);
  720. R.ReInit(N);
  721. cos_theta_phi.ReInit(2 * N);
  722. for (Long i = 0; i < N; i++) { // Set R, cos_theta_phi
  723. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  724. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  725. cos_theta_phi[i * 2 + 0] = x[2] / R[i];
  726. cos_theta_phi[i * 2 + 1] = atan2(x[1], x[0]); // TODO: works only for float and double
  727. }
  728. VecSHBasisEval(p0, cos_theta_phi, SHBasis);
  729. assert(SHBasis.Dim(1) == COORD_DIM * M);
  730. assert(SHBasis.Dim(0) == N * COORD_DIM);
  731. }
  732. Matrix<Real> StokesOp(N * COORD_DIM, COORD_DIM * M);
  733. for (Long i = 0; i < N; i++) { // Set StokesOp
  734. for (Long m = 0; m <= p0; m++) {
  735. for (Long n = m; n <= p0; n++) {
  736. auto read_coeff = [&](Long n, Long m, Long k0, Long k1) {
  737. Complex<Real> c;
  738. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  739. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  740. c.real = SHBasis[i * COORD_DIM + k1][k0 * M + idx];
  741. if (m) {
  742. idx += (p0+1-m);
  743. c.imag = SHBasis[i * COORD_DIM + k1][k0 * M + idx];
  744. }
  745. }
  746. return c;
  747. };
  748. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  749. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  750. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  751. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  752. if (m) {
  753. idx += (p0+1-m);
  754. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  755. }
  756. }
  757. };
  758. auto Vr = read_coeff(n, m, 0, 0);
  759. auto Vt = read_coeff(n, m, 0, 1);
  760. auto Vp = read_coeff(n, m, 0, 2);
  761. auto Wr = read_coeff(n, m, 1, 0);
  762. auto Wt = read_coeff(n, m, 1, 1);
  763. auto Wp = read_coeff(n, m, 1, 2);
  764. auto Xr = read_coeff(n, m, 2, 0);
  765. auto Xt = read_coeff(n, m, 2, 1);
  766. auto Xp = read_coeff(n, m, 2, 2);
  767. Complex<Real> SVr, SVt, SVp;
  768. Complex<Real> SWr, SWt, SWp;
  769. Complex<Real> SXr, SXt, SXp;
  770. if (interior) {
  771. Real a,b;
  772. a = -2*n*(n+2) / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], n+1);
  773. b = -(n+1)*(n+2) / (Real)(2*n+1) * (pow<Real>(R[i], n+1) - pow<Real>(R[i], n-1));
  774. SVr = a * Vr + b * Wr;
  775. SVt = a * Vt + b * Wt;
  776. SVp = a * Vp + b * Wp;
  777. a = -(2*n*n+1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], n-1);
  778. SWr = a * Wr;
  779. SWt = a * Wt;
  780. SWp = a * Wp;
  781. a = -(n+2) / (Real)(2*n+1) * pow<Real>(R[i], n);
  782. SXr = a * Xr;
  783. SXt = a * Xt;
  784. SXp = a * Xp;
  785. } else {
  786. Real a,b;
  787. a = (2*n*n+4*n+3) / (Real)((2*n+1) * (2*n+3)) * pow<Real>(R[i], -n-2);
  788. SVr = a * Vr;
  789. SVt = a * Vt;
  790. SVp = a * Vp;
  791. a = 2*(n+1)*(n-1) / (Real)((2*n+1) * (2*n-1)) * pow<Real>(R[i], -n);
  792. b = 2*n*(n-1) / (Real)(4*n+2) * (pow<Real>(R[i], -n-2) - pow<Real>(R[i], -n));
  793. SWr = a * Wr + b * Vr;
  794. SWt = a * Wt + b * Vt;
  795. SWp = a * Wp + b * Vp;
  796. a = (n-1) / (Real)(2*n+1) * pow<Real>(R[i], -n-1);
  797. SXr = a * Xr;
  798. SXt = a * Xt;
  799. SXp = a * Xp;
  800. }
  801. write_coeff(SVr, n, m, 0, 0);
  802. write_coeff(SVt, n, m, 0, 1);
  803. write_coeff(SVp, n, m, 0, 2);
  804. write_coeff(SWr, n, m, 1, 0);
  805. write_coeff(SWt, n, m, 1, 1);
  806. write_coeff(SWp, n, m, 1, 2);
  807. write_coeff(SXr, n, m, 2, 0);
  808. write_coeff(SXt, n, m, 2, 1);
  809. write_coeff(SXp, n, m, 2, 2);
  810. }
  811. }
  812. }
  813. { // Set X <-- Q * StokesOp * B1
  814. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  815. for (Long k0 = 0; k0 < N; k0++) {
  816. StaticArray<Real,9> Q;
  817. { // Set Q
  818. Real cos_theta = cos_theta_phi[k0 * 2 + 0];
  819. Real sin_theta = sqrt<Real>(1 - cos_theta * cos_theta);
  820. Real cos_phi = cos(cos_theta_phi[k0 * 2 + 1]);
  821. Real sin_phi = sin(cos_theta_phi[k0 * 2 + 1]);
  822. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  823. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  824. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  825. }
  826. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  827. StaticArray<Real,COORD_DIM> in;
  828. for (Long j = 0; j < COORD_DIM; j++) {
  829. in[j] = 0;
  830. for (Long i = 0; i < COORD_DIM * M; i++) {
  831. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  832. }
  833. }
  834. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  835. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  836. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  837. }
  838. }
  839. }
  840. }
  841. template <class Real> void SphericalHarmonics<Real>::StokesEvalKL(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& coord, const Vector<Real>& norm, bool interior, Vector<Real>& X) {
  842. Long M = (p0+1) * (p0+1);
  843. Long dof;
  844. Matrix<Real> B1;
  845. { // Set B1, dof
  846. Vector<Real> B0;
  847. SHCArrange1(S, arrange, p0, B0);
  848. dof = B0.Dim() / M / COORD_DIM;
  849. assert(B0.Dim() == dof * COORD_DIM * M);
  850. B1.ReInit(dof, COORD_DIM * M);
  851. Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
  852. SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
  853. }
  854. assert(B1.Dim(1) == COORD_DIM * M);
  855. assert(B1.Dim(0) == dof);
  856. Long N = coord.Dim() / COORD_DIM;
  857. assert(coord.Dim() == N * COORD_DIM);
  858. Matrix<Real> SHBasis;
  859. Vector<Real> R, cos_theta_phi;
  860. { // Set R, SHBasis
  861. R.ReInit(N);
  862. cos_theta_phi.ReInit(2 * N);
  863. for (Long i = 0; i < N; i++) { // Set R, cos_theta_phi
  864. ConstIterator<Real> x = coord.begin() + i * COORD_DIM;
  865. R[i] = sqrt<Real>(x[0]*x[0] + x[1]*x[1] + x[2]*x[2]);
  866. cos_theta_phi[i * 2 + 0] = x[2] / R[i];
  867. cos_theta_phi[i * 2 + 1] = atan2(x[1], x[0]); // TODO: works only for float and double
  868. }
  869. SHBasisEval(p0, cos_theta_phi, SHBasis);
  870. assert(SHBasis.Dim(1) == M);
  871. assert(SHBasis.Dim(0) == N);
  872. }
  873. Matrix<Real> StokesOp(N * COORD_DIM, COORD_DIM * M);
  874. for (Long i = 0; i < N; i++) { // Set StokesOp
  875. StaticArray<Real, COORD_DIM> norm0;
  876. Real cos_theta, sin_theta, cos_phi, sin_phi;
  877. { // Set cos_theta, sin_theta, cos_phi, sin_phi
  878. cos_theta = cos_theta_phi[i * 2 + 0];
  879. sin_theta = sqrt<Real>(1 - cos_theta * cos_theta);
  880. cos_phi = cos(cos_theta_phi[i * 2 + 1]);
  881. sin_phi = sin(cos_theta_phi[i * 2 + 1]);
  882. }
  883. { // Set norm0 <-- Q^t * norm
  884. StaticArray<Real,9> Q;
  885. { // Set Q
  886. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  887. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  888. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  889. }
  890. StaticArray<Real,COORD_DIM> in;
  891. in[0] = norm[i * COORD_DIM + 0];
  892. in[1] = norm[i * COORD_DIM + 1];
  893. in[2] = norm[i * COORD_DIM + 2];
  894. norm0[0] = Q[0] * in[0] + Q[1] * in[1] + Q[2] * in[2];
  895. norm0[1] = Q[3] * in[0] + Q[4] * in[1] + Q[5] * in[2];
  896. norm0[2] = Q[6] * in[0] + Q[7] * in[1] + Q[8] * in[2];
  897. }
  898. Complex<Real> imag(0,1);
  899. Complex<Real> exp_iphi(cos_phi, sin_phi);
  900. Complex<Real> exp_iphi_conj(cos_phi, -sin_phi);
  901. Real cot_theta = cos_theta / sin_theta;
  902. Real csc_theta = 1 / sin_theta;
  903. Real cos_2theta = 2 * cos_theta * cos_theta - 1;
  904. for (Long m = 0; m <= p0; m++) {
  905. for (Long n = m; n <= p0; n++) {
  906. auto read_coeff = [&](Long n, Long m) {
  907. Complex<Real> c;
  908. if (0 <= m && m <= n && n <= p0) {
  909. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  910. c.real = SHBasis[i][idx];
  911. if (m) {
  912. idx += (p0+1-m);
  913. c.imag = SHBasis[i][idx];
  914. }
  915. }
  916. return c;
  917. };
  918. auto write_coeff = [&](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  919. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  920. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  921. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  922. if (m) {
  923. idx += (p0+1-m);
  924. StokesOp[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  925. }
  926. }
  927. };
  928. auto Ynm0 = read_coeff(n, m + 0);
  929. auto Ynm1 = read_coeff(n, m + 1);
  930. auto Ynm2 = read_coeff(n, m + 2);
  931. Complex<Real> KV[COORD_DIM][COORD_DIM];
  932. Complex<Real> KW[COORD_DIM][COORD_DIM];
  933. Complex<Real> KX[COORD_DIM][COORD_DIM];
  934. if (interior) {
  935. KV[0][0] = 0;
  936. KV[0][1] = 0;
  937. KV[0][2] = 0;
  938. KV[1][0] = 0;
  939. KV[1][1] = 0;
  940. KV[1][2] = 0;
  941. KV[2][0] = 0;
  942. KV[2][1] = 0;
  943. KV[2][2] = 0;
  944. KW[0][0] = 0;
  945. KW[0][1] = 0;
  946. KW[0][2] = 0;
  947. KW[1][0] = 0;
  948. KW[1][1] = 0;
  949. KW[1][2] = 0;
  950. KW[2][0] = 0;
  951. KW[2][1] = 0;
  952. KW[2][2] = 0;
  953. KX[0][0] = 0;
  954. KX[0][1] = 0;
  955. KX[0][2] = 0;
  956. KX[1][0] = 0;
  957. KX[1][1] = 0;
  958. KX[1][2] = 0;
  959. KX[2][0] = 0;
  960. KX[2][1] = 0;
  961. KX[2][2] = 0;
  962. } else {
  963. Real r = R[i];
  964. KV[0][0] = (2*n*(n*n+3*n+2)*pow<Real>(r,-n-3)*Ynm0) / (4*n*n+8*n+3);
  965. KW[0][0] = -(n*pow<Real>(r,-n-3)*(2*n*n*n*(r*r-1) + n*n*(7*r*r-5) + n*(r*r-1) - r*r + 2)*Ynm0) / (4*n*n-1);
  966. KX[0][0] = 0;
  967. KV[0][1] = -(2*n*(n+2)*exp_iphi_conj*pow<Real>(r,-n-3)*(sqrt<Real>(-m*m-m+n*n+n)*Ynm1 + m*exp_iphi*cot_theta*Ynm0)) / (4*n*n+8*n+3);
  968. KW[0][1] = (exp_iphi_conj*pow<Real>(r,-n-3)*(2*n*n*n*(r*r-1) + n*n*(r*r-3) - 2*n*(r*r-1) - r*r))*(sqrt<Real>(-m*m-m+n*n+n)*Ynm1 + m*exp_iphi*cot_theta*Ynm0) / (4*n*n-1);
  969. KX[0][1] = (imag*m*(n+2)*pow<Real>(r,-n-2)*csc_theta*Ynm0) / (2*n+1);
  970. KV[0][2] = -(2*imag*m*n*(n+2)*pow<Real>(r,-n-3)*csc_theta*Ynm0) / (4*n*n+8*n+3);
  971. KW[0][2] = (imag*m*pow<Real>(r,-n-3)*(2*n*n*n*(r*r-1) + n*n*(r*r-3) - 2*n*(r*r-1) - r*r)*csc_theta*Ynm0) / (4*n*n-1);
  972. KX[0][2] = (pow<Real>(r,-n-3)*(-m*(n+2)*r*cot_theta*Ynm0 - (n+2)*exp_iphi_conj*r*sqrt<Real>(-m*(m+1) + n*n + n)*Ynm1)) / (2*n+1);
  973. KV[1][0] = -(2*n*(n+2)*exp_iphi_conj*pow<Real>(r,-n-3)*(sqrt<Real>(-m*m-m+n*n+n)*Ynm1 + m*exp_iphi*cot_theta*Ynm0)) / (4*n*n+8*n+3);
  974. KW[1][0] = (exp_iphi_conj*pow<Real>(r,-n-3)*(2*n*n*n*(r*r-1) + n*n*(r*r-3) - 2*n*(r*r-1) - r*r))*(sqrt<Real>(-m*m-m+n*n+n)*Ynm1 + m*exp_iphi*cot_theta*Ynm0) / (4*n*n-1);
  975. KX[1][0] = (imag*m*(n+2)*pow<Real>(r,-n-2)*csc_theta*Ynm0) / (2*n+1);
  976. KV[1][1] = (2*(2*m+1)*n*exp_iphi*sqrt<Real>(-m*m-m+n*n+n)*cot_theta*Ynm1 - 2*n*exp_iphi*exp_iphi*(-m*m*cot_theta*cot_theta+m*csc_theta*csc_theta+n+1)*Ynm0 + 2*n*sqrt<Real>(m*m*m*m + 4*m*m*m + m*m*(-2*n*n-2*n+5) + m*(-4*n*n-4*n+2) + n*(n*n*n+2*n*n-n-2))*Ynm2) * (exp_iphi_conj*exp_iphi_conj*pow<Real>(r,-n-3)) / (4*n*n+8*n+3);
  977. KW[1][1] = (Ynm0*((m-1)*m*exp_iphi*exp_iphi*(2*n*n-(n-2)*(2*n+1)*r*r-n)*csc_theta*csc_theta - exp_iphi*exp_iphi*((n-2)*(2*n+1)*r*r*(n-m*m)+n*(2*n-1)*(m*m+n+1))) + sqrt<Real>((m-n)*(m-n+1)*(m+n+1)*(m+n+2))*(2*n*n-(n-2)*(2*n+1)*r*r-n)*Ynm2 + (2*m+1)*exp_iphi*sqrt<Real>(-m*(m+1)+n*n+n)*(2*n*n-(n-2)*(2*n+1)*r*r-n)*cot_theta*Ynm1) * (exp_iphi_conj*exp_iphi_conj*pow<Real>(r,-n-3)) / (4*n*n-1);
  978. KX[1][1] = -(sqrt<Real>(-m*m-m+n*n+n)*Ynm1 + (m-1)*exp_iphi*cot_theta*Ynm0)*(2*imag*m*exp_iphi_conj*pow<Real>(r,-n-2)*csc_theta) / (2*n+1);
  979. KV[1][2] = (2*imag*m*n*exp_iphi_conj*pow<Real>(r,-n-3)*csc_theta*(sqrt<Real>(-m*m-m+n*n+n)*Ynm1 + (m-1)*exp_iphi*cot_theta*Ynm0)) / (4*n*n+8*n+3);
  980. KW[1][2] = -(imag*m*exp_iphi_conj*pow<Real>(r,-n-3)*(-2*n*n + (n-2)*(2*n+1)*r*r + n)*csc_theta)*(sqrt<Real>(-m*(m+1) + n*n + n)*Ynm1 + (m-1)*exp_iphi*cot_theta*Ynm0) / (4*n*n-1);
  981. KX[1][2] = (4*m*exp_iphi*sqrt<Real>(-m*(m+1)+n*n+n)*cot_theta*Ynm1 + 2*sqrt<Real>((m-n)*(m-n+1)*(m+n+1)*(m+n+2))*Ynm2 + (m-1)*m*exp_iphi*exp_iphi*(cos_2theta+3)*csc_theta*csc_theta*Ynm0)*(exp_iphi_conj*exp_iphi_conj*pow<Real>(r,-n-2)) / (2*(2*n+1));
  982. KV[2][0] = -(2*imag*m*n*(n+2)*pow<Real>(r,-n-3)*csc_theta*Ynm0) / (4*n*n+8*n+3);
  983. KW[2][0] = (imag*m*pow<Real>(r,-n-3)*(2*n*n*n*(r*r-1) + n*n*(r*r-3) - 2*n*(r*r-1) - r*r)*csc_theta*Ynm0) / (4*n*n-1);
  984. KX[2][0] = (pow<Real>(r,-n-3)*(-m*(n+2)*r*cot_theta*Ynm0 - (n+2)*exp_iphi_conj*r*sqrt<Real>(-m*(m+1)+n*n+n)*Ynm1)) / (2*n+1);
  985. KV[2][1] = (2*imag*m*n*exp_iphi_conj*pow<Real>(r,-n-3)*csc_theta*(sqrt<Real>(-m*m-m+n*n+n)*Ynm1 + (m-1)*exp_iphi*cot_theta*Ynm0)) / (4*n*n+8*n+3);
  986. KW[2][1] = -(imag*m*exp_iphi_conj*pow<Real>(r,-n-3)*(-2*n*n + (n-2)*(2*n+1)*r*r + n)*csc_theta)*(sqrt<Real>(-m*(m+1)+n*n+n)*Ynm1 + (m-1)*exp_iphi*cot_theta*Ynm0) / (4*n*n-1);
  987. KX[2][1] = (4*m*exp_iphi*sqrt<Real>(-m*(m+1)+n*n+n)*cot_theta*Ynm1 + 2*sqrt<Real>((m-n)*(m-n+1)*(m+n+1)*(m+n+2))*Ynm2 + (m-1)*m*exp_iphi*exp_iphi*(cos_2theta+3)*csc_theta*csc_theta*Ynm0)*(exp_iphi_conj*exp_iphi_conj*pow<Real>(r,-n-2)) / (2*(2*n+1));
  988. KV[2][2] = (2*n*sqrt<Real>(-m*m-m+n*n+n)*cot_theta*Ynm1 - 2*n*exp_iphi*(m*m*csc_theta*csc_theta-m*cot_theta*cot_theta+n+1)*Ynm0)*(exp_iphi_conj*pow<Real>(r,-n-3)) / (4*n*n+8*n+3);
  989. KW[2][2] = (-sqrt<Real>(-m*(m+1)+n*n+n)*(-2*n*n + (n-2)*(2*n+1)*r*r + n)*cot_theta*Ynm1 + Ynm0*(m*exp_iphi*(-2*n*n + (n-2)*(2*n+1)*r*r + n)*((m-1)*csc_theta*csc_theta+1) - n*exp_iphi*(2*n*n + (n-2)*(2*n+1)*r*r + n - 1)))*(exp_iphi_conj*pow<Real>(r,-n-3)) / (4*n*n-1);
  990. KX[2][2] = (sqrt<Real>(-m*(m+1)+n*n+n)*Ynm1 + (m-1)*exp_iphi*cot_theta*Ynm0)*(2*imag*m*exp_iphi_conj*pow<Real>(r,-n-2)*csc_theta) / (2*n+1);
  991. }
  992. Complex<Real> SVr, SVt, SVp;
  993. SVr = KV[0][0] * norm0[0] + KV[0][1] * norm0[1] + KV[0][2] * norm0[2];
  994. SVt = KV[1][0] * norm0[0] + KV[1][1] * norm0[1] + KV[1][2] * norm0[2];
  995. SVp = KV[2][0] * norm0[0] + KV[2][1] * norm0[1] + KV[2][2] * norm0[2];
  996. Complex<Real> SWr, SWt, SWp;
  997. SWr = KW[0][0] * norm0[0] + KW[0][1] * norm0[1] + KW[0][2] * norm0[2];
  998. SWt = KW[1][0] * norm0[0] + KW[1][1] * norm0[1] + KW[1][2] * norm0[2];
  999. SWp = KW[2][0] * norm0[0] + KW[2][1] * norm0[1] + KW[2][2] * norm0[2];
  1000. Complex<Real> SXr, SXt, SXp;
  1001. SXr = KX[0][0] * norm0[0] + KX[0][1] * norm0[1] + KX[0][2] * norm0[2];
  1002. SXt = KX[1][0] * norm0[0] + KX[1][1] * norm0[1] + KX[1][2] * norm0[2];
  1003. SXp = KX[2][0] * norm0[0] + KX[2][1] * norm0[1] + KX[2][2] * norm0[2];
  1004. write_coeff(SVr, n, m, 0, 0);
  1005. write_coeff(SVt, n, m, 0, 1);
  1006. write_coeff(SVp, n, m, 0, 2);
  1007. write_coeff(SWr, n, m, 1, 0);
  1008. write_coeff(SWt, n, m, 1, 1);
  1009. write_coeff(SWp, n, m, 1, 2);
  1010. write_coeff(SXr, n, m, 2, 0);
  1011. write_coeff(SXt, n, m, 2, 1);
  1012. write_coeff(SXp, n, m, 2, 2);
  1013. }
  1014. }
  1015. }
  1016. { // Set X <-- Q * StokesOp * B1
  1017. if (X.Dim() != N * dof * COORD_DIM) X.ReInit(N * dof * COORD_DIM);
  1018. for (Long k0 = 0; k0 < N; k0++) {
  1019. StaticArray<Real,9> Q;
  1020. { // Set Q
  1021. Real cos_theta = cos_theta_phi[k0 * 2 + 0];
  1022. Real sin_theta = sqrt<Real>(1 - cos_theta * cos_theta);
  1023. Real cos_phi = cos(cos_theta_phi[k0 * 2 + 1]);
  1024. Real sin_phi = sin(cos_theta_phi[k0 * 2 + 1]);
  1025. Q[0] = sin_theta*cos_phi; Q[1] = sin_theta*sin_phi; Q[2] = cos_theta;
  1026. Q[3] = cos_theta*cos_phi; Q[4] = cos_theta*sin_phi; Q[5] =-sin_theta;
  1027. Q[6] = -sin_phi; Q[7] = cos_phi; Q[8] = 0;
  1028. }
  1029. for (Long k1 = 0; k1 < dof; k1++) { // Set X <-- Q * StokesOp * B1
  1030. StaticArray<Real,COORD_DIM> in;
  1031. for (Long j = 0; j < COORD_DIM; j++) {
  1032. in[j] = 0;
  1033. for (Long i = 0; i < COORD_DIM * M; i++) {
  1034. in[j] += B1[k1][i] * StokesOp[k0 * COORD_DIM + j][i];
  1035. }
  1036. }
  1037. X[(k0 * dof + k1) * COORD_DIM + 0] = Q[0] * in[0] + Q[3] * in[1] + Q[6] * in[2];
  1038. X[(k0 * dof + k1) * COORD_DIM + 1] = Q[1] * in[0] + Q[4] * in[1] + Q[7] * in[2];
  1039. X[(k0 * dof + k1) * COORD_DIM + 2] = Q[2] * in[0] + Q[5] * in[1] + Q[8] * in[2];
  1040. }
  1041. }
  1042. }
  1043. }
  1044. template <class Real> void SphericalHarmonics<Real>::Grid2SHC_(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& B1){
  1045. const auto& Mf = OpFourierInv(Np);
  1046. assert(Mf.Dim(0) == Np);
  1047. const std::vector<Matrix<Real>>& Ml = SphericalHarmonics<Real>::MatLegendreInv(Nt-1,p1);
  1048. assert((Long)Ml.size() == p1+1);
  1049. Long N = X.Dim() / (Np*Nt);
  1050. assert(X.Dim() == N*Np*Nt);
  1051. Vector<Real> B0((2*p1+1) * N*Nt);
  1052. #pragma omp parallel
  1053. { // B0 <-- Transpose(FFT(X))
  1054. Integer tid=omp_get_thread_num();
  1055. Integer omp_p=omp_get_num_threads();
  1056. Long a=(tid+0)*N*Nt/omp_p;
  1057. Long b=(tid+1)*N*Nt/omp_p;
  1058. Vector<Real> buff(Mf.Dim(1));
  1059. Long fft_coeff_len = std::min(buff.Dim(), 2*p1+2);
  1060. Matrix<Real> B0_(2*p1+1, N*Nt, B0.begin(), false);
  1061. const Matrix<Real> MX(N * Nt, Np, (Iterator<Real>)X.begin(), false);
  1062. for (Long i = a; i < b; i++) {
  1063. { // buff <-- FFT(Xi)
  1064. const Vector<Real> Xi(Np, (Iterator<Real>)X.begin() + Np * i, false);
  1065. Mf.Execute(Xi, buff);
  1066. }
  1067. { // B0 <-- Transpose(buff)
  1068. B0_[0][i] = buff[0]; // skipping buff[1] == 0
  1069. for (Long j = 2; j < fft_coeff_len; j++) B0_[j-1][i] = buff[j];
  1070. for (Long j = fft_coeff_len; j < 2*p1+2; j++) B0_[j-1][i] = 0;
  1071. }
  1072. }
  1073. }
  1074. if (B1.Dim() != N*(p1+1)*(p1+1)) B1.ReInit(N*(p1+1)*(p1+1));
  1075. #pragma omp parallel
  1076. { // Evaluate Legendre polynomial
  1077. Integer tid=omp_get_thread_num();
  1078. Integer omp_p=omp_get_num_threads();
  1079. Long offset0=0;
  1080. Long offset1=0;
  1081. for (Long i = 0; i < p1+1; i++) {
  1082. Long N_ = (i==0 ? N : 2*N);
  1083. Matrix<Real> Min (N_, Nt , B0.begin()+offset0, false);
  1084. Matrix<Real> Mout(N_, p1+1-i, B1.begin()+offset1, false);
  1085. { // Mout = Min * Ml[i] // split between threads
  1086. Long a=(tid+0)*N_/omp_p;
  1087. Long b=(tid+1)*N_/omp_p;
  1088. if (a < b) {
  1089. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  1090. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1091. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  1092. }
  1093. }
  1094. offset0+=Min .Dim(0)*Min .Dim(1);
  1095. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1096. }
  1097. assert(offset0 == B0.Dim());
  1098. assert(offset1 == B1.Dim());
  1099. }
  1100. B1 *= 1 / sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  1101. }
  1102. template <class Real> void SphericalHarmonics<Real>::SHCArrange0(const Vector<Real>& B1, Long p1, Vector<Real>& S, SHCArrange arrange){
  1103. Long M = (p1+1)*(p1+1);
  1104. Long N = B1.Dim() / M;
  1105. assert(B1.Dim() == N*M);
  1106. if (arrange == SHCArrange::ALL) { // S <-- Rearrange(B1)
  1107. Long M = 2*(p1+1)*(p1+1);
  1108. if(S.Dim() != N * M) S.ReInit(N * M);
  1109. #pragma omp parallel
  1110. { // S <-- Rearrange(B1)
  1111. Integer tid=omp_get_thread_num();
  1112. Integer omp_p=omp_get_num_threads();
  1113. Long a=(tid+0)*N/omp_p;
  1114. Long b=(tid+1)*N/omp_p;
  1115. for (Long i = a; i < b; i++) {
  1116. Long offset = 0;
  1117. for (Long j = 0; j < p1+1; j++) {
  1118. Long len = p1+1 - j;
  1119. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  1120. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1121. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 0;
  1122. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  1123. offset += len;
  1124. }
  1125. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  1126. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1127. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  1128. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
  1129. offset += len;
  1130. } else {
  1131. Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
  1132. for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = 0;
  1133. }
  1134. }
  1135. }
  1136. }
  1137. }
  1138. if (arrange == SHCArrange::ROW_MAJOR) { // S <-- Rearrange(B1)
  1139. Long M = (p1+1)*(p1+2);
  1140. if(S.Dim() != N * M) S.ReInit(N * M);
  1141. #pragma omp parallel
  1142. { // S <-- Rearrange(B1)
  1143. Integer tid=omp_get_thread_num();
  1144. Integer omp_p=omp_get_num_threads();
  1145. Long a=(tid+0)*N/omp_p;
  1146. Long b=(tid+1)*N/omp_p;
  1147. for (Long i = a; i < b; i++) {
  1148. Long offset = 0;
  1149. for (Long j = 0; j < p1+1; j++) {
  1150. Long len = p1+1 - j;
  1151. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  1152. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1153. Iterator<Real> S_ = S .begin() + i*M + 0;
  1154. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  1155. offset += len;
  1156. }
  1157. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  1158. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1159. Iterator<Real> S_ = S .begin() + i*M + 1;
  1160. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
  1161. offset += len;
  1162. } else {
  1163. Iterator<Real> S_ = S .begin() + i*M + 1;
  1164. for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = 0;
  1165. }
  1166. }
  1167. }
  1168. }
  1169. }
  1170. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // S <-- Rearrange(B1)
  1171. Long M = (p1+1)*(p1+1);
  1172. if(S.Dim() != N * M) S.ReInit(N * M);
  1173. #pragma omp parallel
  1174. { // S <-- Rearrange(B1)
  1175. Integer tid=omp_get_thread_num();
  1176. Integer omp_p=omp_get_num_threads();
  1177. Long a=(tid+0)*N/omp_p;
  1178. Long b=(tid+1)*N/omp_p;
  1179. for (Long i = a; i < b; i++) {
  1180. Long offset = 0;
  1181. for (Long j = 0; j < p1+1; j++) {
  1182. Long len = p1+1 - j;
  1183. if (1) { // Set Real(S_n^m) for m=j and n=j..p
  1184. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1185. Iterator<Real> S_ = S .begin() + i*M + offset;
  1186. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  1187. offset += len;
  1188. }
  1189. if (j) { // Set Imag(S_n^m) for m=j and n=j..p
  1190. ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
  1191. Iterator<Real> S_ = S .begin() + i*M + offset;
  1192. for (Long k = 0; k < len; k++) S_[k] = B_[k];
  1193. offset += len;
  1194. }
  1195. }
  1196. }
  1197. }
  1198. }
  1199. }
  1200. template <class Real> void SphericalHarmonics<Real>::SHCArrange1(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& B0){
  1201. Long M, N;
  1202. { // Set M, N
  1203. M = 0;
  1204. if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
  1205. if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
  1206. if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
  1207. if (M == 0) return;
  1208. N = S.Dim() / M;
  1209. assert(S.Dim() == N * M);
  1210. }
  1211. if (B0.Dim() != N*(p0+1)*(p0+1)) B0.ReInit(N*(p0+1)*(p0+1));
  1212. if (arrange == SHCArrange::ALL) { // B0 <-- Rearrange(S)
  1213. #pragma omp parallel
  1214. { // B0 <-- Rearrange(S)
  1215. Integer tid=omp_get_thread_num();
  1216. Integer omp_p=omp_get_num_threads();
  1217. Long a=(tid+0)*N/omp_p;
  1218. Long b=(tid+1)*N/omp_p;
  1219. for (Long i = a; i < b; i++) {
  1220. Long offset = 0;
  1221. for (Long j = 0; j < p0+1; j++) {
  1222. Long len = p0+1 - j;
  1223. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1224. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1225. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 0;
  1226. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  1227. offset += len;
  1228. }
  1229. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1230. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1231. ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 1;
  1232. for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
  1233. offset += len;
  1234. }
  1235. }
  1236. }
  1237. }
  1238. }
  1239. if (arrange == SHCArrange::ROW_MAJOR) { // B0 <-- Rearrange(S)
  1240. #pragma omp parallel
  1241. { // B0 <-- Rearrange(S)
  1242. Integer tid=omp_get_thread_num();
  1243. Integer omp_p=omp_get_num_threads();
  1244. Long a=(tid+0)*N/omp_p;
  1245. Long b=(tid+1)*N/omp_p;
  1246. for (Long i = a; i < b; i++) {
  1247. Long offset = 0;
  1248. for (Long j = 0; j < p0+1; j++) {
  1249. Long len = p0+1 - j;
  1250. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1251. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1252. ConstIterator<Real> S_ = S .begin() + i*M + 0;
  1253. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  1254. offset += len;
  1255. }
  1256. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1257. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1258. ConstIterator<Real> S_ = S .begin() + i*M + 1;
  1259. for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
  1260. offset += len;
  1261. }
  1262. }
  1263. }
  1264. }
  1265. }
  1266. if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // B0 <-- Rearrange(S)
  1267. #pragma omp parallel
  1268. { // B0 <-- Rearrange(S)
  1269. Integer tid=omp_get_thread_num();
  1270. Integer omp_p=omp_get_num_threads();
  1271. Long a=(tid+0)*N/omp_p;
  1272. Long b=(tid+1)*N/omp_p;
  1273. for (Long i = a; i < b; i++) {
  1274. Long offset = 0;
  1275. for (Long j = 0; j < p0+1; j++) {
  1276. Long len = p0+1 - j;
  1277. if (1) { // Get Real(S_n^m) for m=j and n=j..p
  1278. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1279. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  1280. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  1281. offset += len;
  1282. }
  1283. if (j) { // Get Imag(S_n^m) for m=j and n=j..p
  1284. Iterator<Real> B_ = B0.begin() + i*len + N*offset;
  1285. ConstIterator<Real> S_ = S .begin() + i*M + offset;
  1286. for (Long k = 0; k < len; k++) B_[k] = S_[k];
  1287. offset += len;
  1288. }
  1289. }
  1290. }
  1291. }
  1292. }
  1293. }
  1294. template <class Real> void SphericalHarmonics<Real>::SHC2Grid_(const Vector<Real>& B0, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_phi, Vector<Real>* X_theta){
  1295. const auto& Mf = OpFourier(Np);
  1296. assert(Mf.Dim(1) == Np);
  1297. const std::vector<Matrix<Real>>& Ml =SphericalHarmonics<Real>::MatLegendre (p0,Nt-1);
  1298. const std::vector<Matrix<Real>>& Mdl=SphericalHarmonics<Real>::MatLegendreGrad(p0,Nt-1);
  1299. assert((Long)Ml .size() == p0+1);
  1300. assert((Long)Mdl.size() == p0+1);
  1301. Long N = B0.Dim() / ((p0+1)*(p0+1));
  1302. assert(B0.Dim() == N*(p0+1)*(p0+1));
  1303. if(X && X ->Dim()!=N*Np*Nt) X ->ReInit(N*Np*Nt);
  1304. if(X_theta && X_theta->Dim()!=N*Np*Nt) X_theta->ReInit(N*Np*Nt);
  1305. if(X_phi && X_phi ->Dim()!=N*Np*Nt) X_phi ->ReInit(N*Np*Nt);
  1306. Vector<Real> B1(N*(2*p0+1)*Nt);
  1307. if(X || X_phi){
  1308. #pragma omp parallel
  1309. { // Evaluate Legendre polynomial
  1310. Integer tid=omp_get_thread_num();
  1311. Integer omp_p=omp_get_num_threads();
  1312. Long offset0=0;
  1313. Long offset1=0;
  1314. for(Long i=0;i<p0+1;i++){
  1315. Long N_ = (i==0 ? N : 2*N);
  1316. const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
  1317. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  1318. { // Mout = Min * Ml[i] // split between threads
  1319. Long a=(tid+0)*N_/omp_p;
  1320. Long b=(tid+1)*N_/omp_p;
  1321. if(a<b){
  1322. const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
  1323. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1324. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  1325. }
  1326. }
  1327. offset0+=Min .Dim(0)*Min .Dim(1);
  1328. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1329. }
  1330. }
  1331. B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  1332. #pragma omp parallel
  1333. { // Transpose and evaluate Fourier
  1334. Integer tid=omp_get_thread_num();
  1335. Integer omp_p=omp_get_num_threads();
  1336. Long a=(tid+0)*N*Nt/omp_p;
  1337. Long b=(tid+1)*N*Nt/omp_p;
  1338. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  1339. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  1340. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  1341. for (Long i = a; i < b; i++) {
  1342. { // buff <-- Transpose(B1)
  1343. buff[0] = B1_[0][i];
  1344. buff[1] = 0;
  1345. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1346. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1347. }
  1348. { // X <-- FFT(buff)
  1349. Vector<Real> Xi(Np, X->begin() + Np * i, false);
  1350. Mf.Execute(buff, Xi);
  1351. }
  1352. if(X_phi){ // Evaluate Fourier gradient
  1353. { // buff <-- Transpose(B1)
  1354. buff[0] = 0;
  1355. buff[1] = 0;
  1356. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1357. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1358. for (Long j = 1; j < buff.Dim()/2; j++) {
  1359. Real x = buff[2*j+0];
  1360. Real y = buff[2*j+1];
  1361. buff[2*j+0] = -j*y;
  1362. buff[2*j+1] = j*x;
  1363. }
  1364. }
  1365. { // X_phi <-- FFT(buff)
  1366. Vector<Real> Xi(Np, X_phi->begin() + Np * i, false);
  1367. Mf.Execute(buff, Xi);
  1368. }
  1369. }
  1370. }
  1371. }
  1372. }
  1373. if(X_theta){
  1374. #pragma omp parallel
  1375. { // Evaluate Legendre gradient
  1376. Integer tid=omp_get_thread_num();
  1377. Integer omp_p=omp_get_num_threads();
  1378. Long offset0=0;
  1379. Long offset1=0;
  1380. for(Long i=0;i<p0+1;i++){
  1381. Long N_ = (i==0 ? N : 2*N);
  1382. const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
  1383. Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
  1384. { // Mout = Min * Mdl[i] // split between threads
  1385. Long a=(tid+0)*N_/omp_p;
  1386. Long b=(tid+1)*N_/omp_p;
  1387. if(a<b){
  1388. const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
  1389. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1390. Matrix<Real>::GEMM(Mout_,Min_,Mdl[i]);
  1391. }
  1392. }
  1393. offset0+=Min .Dim(0)*Min .Dim(1);
  1394. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1395. }
  1396. }
  1397. B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
  1398. #pragma omp parallel
  1399. { // Transpose and evaluate Fourier
  1400. Integer tid=omp_get_thread_num();
  1401. Integer omp_p=omp_get_num_threads();
  1402. Long a=(tid+0)*N*Nt/omp_p;
  1403. Long b=(tid+1)*N*Nt/omp_p;
  1404. Vector<Real> buff(Mf.Dim(0)); buff = 0;
  1405. Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
  1406. Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
  1407. for (Long i = a; i < b; i++) {
  1408. { // buff <-- Transpose(B1)
  1409. buff[0] = B1_[0][i];
  1410. buff[1] = 0;
  1411. for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
  1412. for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
  1413. }
  1414. { // Xi <-- FFT(buff)
  1415. Vector<Real> Xi(Np, X_theta->begin() + Np * i, false);
  1416. Mf.Execute(buff, Xi);
  1417. }
  1418. }
  1419. }
  1420. }
  1421. }
  1422. template <class Real> void SphericalHarmonics<Real>::LegPoly(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  1423. Long N = X.Dim();
  1424. Long Npoly = (degree + 1) * (degree + 2) / 2;
  1425. if (poly_val.Dim() != Npoly * N) poly_val.ReInit(Npoly * N);
  1426. Real fact = 1 / sqrt<Real>(4 * const_pi<Real>());
  1427. Vector<Real> u(N);
  1428. for (Long n = 0; n < N; n++) {
  1429. u[n] = (X[n]*X[n]<1 ? sqrt<Real>(1-X[n]*X[n]) : 0);
  1430. poly_val[n] = fact;
  1431. }
  1432. Long idx = 0;
  1433. Long idx_nxt = 0;
  1434. for (Long i = 1; i <= degree; i++) {
  1435. idx_nxt += N*(degree-i+2);
  1436. Real c = sqrt<Real>((2*i+1)/(Real)(2*i));
  1437. for (Long n = 0; n < N; n++) {
  1438. poly_val[idx_nxt+n] = -poly_val[idx+n] * u[n] * c;
  1439. }
  1440. idx = idx_nxt;
  1441. }
  1442. idx = 0;
  1443. for (Long m = 0; m < degree; m++) {
  1444. for (Long n = 0; n < N; n++) {
  1445. Real pmm = 0;
  1446. Real pmmp1 = poly_val[idx+n];
  1447. for (Long ll = m + 1; ll <= degree; ll++) {
  1448. Real a = sqrt<Real>(((2*ll-1)*(2*ll+1) ) / (Real)((ll-m)*(ll+m) ));
  1449. Real b = sqrt<Real>(((2*ll+1)*(ll+m-1)*(ll-m-1)) / (Real)((ll-m)*(ll+m)*(2*ll-3)));
  1450. Real pll = X[n]*a*pmmp1 - b*pmm;
  1451. pmm = pmmp1;
  1452. pmmp1 = pll;
  1453. poly_val[idx + N*(ll-m) + n] = pll;
  1454. }
  1455. }
  1456. idx += N * (degree - m + 1);
  1457. }
  1458. }
  1459. template <class Real> void SphericalHarmonics<Real>::LegPolyDeriv(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
  1460. Long N = X.Dim();
  1461. Long Npoly = (degree + 1) * (degree + 2) / 2;
  1462. if (poly_val.Dim() != N * Npoly) poly_val.ReInit(N * Npoly);
  1463. Vector<Real> leg_poly(Npoly * N);
  1464. LegPoly(leg_poly, X, degree);
  1465. for (Long m = 0; m <= degree; m++) {
  1466. for (Long n = m; n <= degree; n++) {
  1467. ConstIterator<Real> Pn = leg_poly.begin() + N * ((degree * 2 - m + 1) * (m + 0) / 2 + n);
  1468. ConstIterator<Real> Pn_ = leg_poly.begin() + N * ((degree * 2 - m + 0) * (m + 1) / 2 + n) * (m < n);
  1469. Iterator <Real> Hn = poly_val.begin() + N * ((degree * 2 - m + 1) * (m + 0) / 2 + n);
  1470. Real c2 = sqrt<Real>(m<n ? (n+m+1)*(n-m) : 0);
  1471. for (Long i = 0; i < N; i++) {
  1472. Real c1 = (X[i]*X[i]<1 ? m/sqrt<Real>(1-X[i]*X[i]) : 0);
  1473. Hn[i] = c1*X[i]*Pn[i] + c2*Pn_[i];
  1474. }
  1475. }
  1476. }
  1477. }
  1478. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreNodes(Long p){
  1479. assert(p<SCTL_SHMAXDEG);
  1480. Vector<Real>& Qx=MatrixStore().Qx_[p];
  1481. if(!Qx.Dim()){
  1482. Vector<double> qx1(p+1);
  1483. Vector<double> qw1(p+1);
  1484. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  1485. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  1486. if (Qx.Dim() != p+1) Qx.ReInit(p+1);
  1487. for (Long i = 0; i < p + 1; i++) Qx[i] = -qx1[i];
  1488. }
  1489. return Qx;
  1490. }
  1491. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::LegendreWeights(Long p){
  1492. assert(p<SCTL_SHMAXDEG);
  1493. Vector<Real>& Qw=MatrixStore().Qw_[p];
  1494. if(!Qw.Dim()){
  1495. Vector<double> qx1(p+1);
  1496. Vector<double> qw1(p+1);
  1497. cgqf(p+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  1498. assert(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: works only for float and double
  1499. if (Qw.Dim() != p+1) Qw.ReInit(p+1);
  1500. for (Long i = 0; i < p + 1; i++) Qw[i] = qw1[i];
  1501. }
  1502. return Qw;
  1503. }
  1504. template <class Real> const Vector<Real>& SphericalHarmonics<Real>::SingularWeights(Long p1){
  1505. assert(p1<SCTL_SHMAXDEG);
  1506. Vector<Real>& Sw=MatrixStore().Sw_[p1];
  1507. if(!Sw.Dim()){
  1508. const Vector<Real>& qx1 = LegendreNodes(p1);
  1509. const Vector<Real>& qw1 = LegendreWeights(p1);
  1510. std::vector<Real> Yf(p1+1,0);
  1511. { // Set Yf
  1512. Vector<Real> x0(1); x0=1.0;
  1513. Vector<Real> alp0((p1+1)*(p1+2)/2);
  1514. LegPoly(alp0, x0, p1);
  1515. Vector<Real> alp((p1+1) * (p1+1)*(p1+2)/2);
  1516. LegPoly(alp, qx1, p1);
  1517. for(Long j=0;j<p1+1;j++){
  1518. for(Long i=0;i<p1+1;i++){
  1519. Yf[i]+=4*M_PI/(2*j+1) * alp0[j] * alp[j*(p1+1)+i];
  1520. }
  1521. }
  1522. }
  1523. Sw.ReInit(p1+1);
  1524. for(Long i=0;i<p1+1;i++){
  1525. Sw[i]=(qw1[i]*M_PI/p1)*Yf[i]/cos(acos(qx1[i])/2);
  1526. }
  1527. }
  1528. return Sw;
  1529. }
  1530. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourier(Long p0, Long p1){
  1531. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1532. Matrix<Real>& Mf =MatrixStore().Mf_ [p0*SCTL_SHMAXDEG+p1];
  1533. if(!Mf.Dim(0)){
  1534. const Real SQRT2PI=sqrt(2*M_PI);
  1535. { // Set Mf
  1536. Matrix<Real> M(2*p0,2*p1);
  1537. for(Long j=0;j<2*p1;j++){
  1538. M[0][j]=SQRT2PI*1.0;
  1539. for(Long k=1;k<p0;k++){
  1540. M[2*k-1][j]=SQRT2PI*cos(j*k*M_PI/p1);
  1541. M[2*k-0][j]=SQRT2PI*sin(j*k*M_PI/p1);
  1542. }
  1543. M[2*p0-1][j]=SQRT2PI*cos(j*p0*M_PI/p1);
  1544. }
  1545. Mf=M;
  1546. }
  1547. }
  1548. return Mf;
  1549. }
  1550. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierInv(Long p0, Long p1){
  1551. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1552. Matrix<Real>& Mf =MatrixStore().Mfinv_ [p0*SCTL_SHMAXDEG+p1];
  1553. if(!Mf.Dim(0)){
  1554. const Real INVSQRT2PI=1.0/sqrt(2*M_PI)/p0;
  1555. { // Set Mf
  1556. Matrix<Real> M(2*p0,2*p1);
  1557. M.SetZero();
  1558. if(p1>p0) p1=p0;
  1559. for(Long j=0;j<2*p0;j++){
  1560. M[j][0]=INVSQRT2PI*0.5;
  1561. for(Long k=1;k<p1;k++){
  1562. M[j][2*k-1]=INVSQRT2PI*cos(j*k*M_PI/p0);
  1563. M[j][2*k-0]=INVSQRT2PI*sin(j*k*M_PI/p0);
  1564. }
  1565. M[j][2*p1-1]=INVSQRT2PI*cos(j*p1*M_PI/p0);
  1566. }
  1567. if(p1==p0) for(Long j=0;j<2*p0;j++) M[j][2*p1-1]*=0.5;
  1568. Mf=M;
  1569. }
  1570. }
  1571. return Mf;
  1572. }
  1573. template <class Real> const Matrix<Real>& SphericalHarmonics<Real>::MatFourierGrad(Long p0, Long p1){
  1574. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1575. Matrix<Real>& Mdf=MatrixStore().Mdf_[p0*SCTL_SHMAXDEG+p1];
  1576. if(!Mdf.Dim(0)){
  1577. const Real SQRT2PI=sqrt(2*M_PI);
  1578. { // Set Mdf_
  1579. Matrix<Real> M(2*p0,2*p1);
  1580. for(Long j=0;j<2*p1;j++){
  1581. M[0][j]=SQRT2PI*0.0;
  1582. for(Long k=1;k<p0;k++){
  1583. M[2*k-1][j]=-SQRT2PI*k*sin(j*k*M_PI/p1);
  1584. M[2*k-0][j]= SQRT2PI*k*cos(j*k*M_PI/p1);
  1585. }
  1586. M[2*p0-1][j]=-SQRT2PI*p0*sin(j*p0*M_PI/p1);
  1587. }
  1588. Mdf=M;
  1589. }
  1590. }
  1591. return Mdf;
  1592. }
  1593. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourier(Long Np){
  1594. assert(Np<SCTL_SHMAXDEG);
  1595. auto& Mf =MatrixStore().Mfftinv_ [Np];
  1596. #pragma omp critical (SCTL_FFT_PLAN0)
  1597. if(!Mf.Dim(0)){
  1598. StaticArray<Long,1> fft_dim = {Np};
  1599. Mf.Setup(FFT_Type::C2R, 1, Vector<Long>(1,fft_dim,false));
  1600. }
  1601. return Mf;
  1602. }
  1603. template <class Real> const FFT<Real>& SphericalHarmonics<Real>::OpFourierInv(Long Np){
  1604. assert(Np<SCTL_SHMAXDEG);
  1605. auto& Mf =MatrixStore().Mfft_ [Np];
  1606. #pragma omp critical (SCTL_FFT_PLAN1)
  1607. if(!Mf.Dim(0)){
  1608. StaticArray<Long,1> fft_dim = {Np};
  1609. Mf.Setup(FFT_Type::R2C, 1, Vector<Long>(1,fft_dim,false));
  1610. }
  1611. return Mf;
  1612. }
  1613. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendre(Long p0, Long p1){
  1614. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1615. std::vector<Matrix<Real>>& Ml =MatrixStore().Ml_ [p0*SCTL_SHMAXDEG+p1];
  1616. if(!Ml.size()){
  1617. const Vector<Real>& qx1 = LegendreNodes(p1);
  1618. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  1619. LegPoly(alp, qx1, p0);
  1620. Ml.resize(p0+1);
  1621. auto ptr = alp.begin();
  1622. for(Long i=0;i<=p0;i++){
  1623. Ml[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  1624. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  1625. }
  1626. }
  1627. return Ml;
  1628. }
  1629. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreInv(Long p0, Long p1){
  1630. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1631. std::vector<Matrix<Real>>& Ml =MatrixStore().Mlinv_ [p0*SCTL_SHMAXDEG+p1];
  1632. if(!Ml.size()){
  1633. const Vector<Real>& qx1 = LegendreNodes(p0);
  1634. const Vector<Real>& qw1 = LegendreWeights(p0);
  1635. Vector<Real> alp(qx1.Dim()*(p1+1)*(p1+2)/2);
  1636. LegPoly(alp, qx1, p1);
  1637. Ml.resize(p1+1);
  1638. auto ptr = alp.begin();
  1639. for(Long i=0;i<=p1;i++){
  1640. Ml[i].ReInit(qx1.Dim(), p1+1-i);
  1641. Matrix<Real> M(p1+1-i, qx1.Dim(), ptr, false);
  1642. for(Long j=0;j<p1+1-i;j++){ // Transpose and weights
  1643. for(Long k=0;k<qx1.Dim();k++){
  1644. Ml[i][k][j]=M[j][k]*qw1[k]*2*M_PI;
  1645. }
  1646. }
  1647. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  1648. }
  1649. }
  1650. return Ml;
  1651. }
  1652. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreGrad(Long p0, Long p1){
  1653. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  1654. std::vector<Matrix<Real>>& Mdl=MatrixStore().Mdl_[p0*SCTL_SHMAXDEG+p1];
  1655. if(!Mdl.size()){
  1656. const Vector<Real>& qx1 = LegendreNodes(p1);
  1657. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  1658. LegPolyDeriv(alp, qx1, p0);
  1659. Mdl.resize(p0+1);
  1660. auto ptr = alp.begin();
  1661. for(Long i=0;i<=p0;i++){
  1662. Mdl[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  1663. ptr+=Mdl[i].Dim(0)*Mdl[i].Dim(1);
  1664. }
  1665. }
  1666. return Mdl;
  1667. }
  1668. template <class Real> void SphericalHarmonics<Real>::SHBasisEval(Long p0, const Vector<Real>& cos_theta_phi, Matrix<Real>& SHBasis) {
  1669. Long M = (p0+1) * (p0+1);
  1670. Long N = cos_theta_phi.Dim() / 2;
  1671. assert(cos_theta_phi.Dim() == N * 2);
  1672. Vector<Complex<Real>> exp_phi(N);
  1673. Matrix<Real> LegP((p0+1)*(p0+2)/2, N);
  1674. { // Set exp_phi, LegP
  1675. Vector<Real> cos_theta(N);
  1676. for (Long i = 0; i < N; i++) { // Set cos_theta, exp_phi
  1677. cos_theta[i] = cos_theta_phi[i*2+0];
  1678. exp_phi[i].real = cos<Real>(cos_theta_phi[i*2+1]);
  1679. exp_phi[i].imag = sin<Real>(cos_theta_phi[i*2+1]);
  1680. }
  1681. Vector<Real> alp(LegP.Dim(0) * LegP.Dim(1), LegP.begin(), false);
  1682. LegPoly(alp, cos_theta, p0);
  1683. }
  1684. { // Set SHBasis
  1685. SHBasis.ReInit(N, M);
  1686. Real s = 4 * sqrt<Real>(const_pi<Real>());
  1687. for (Long k0 = 0; k0 < N; k0++) {
  1688. Complex<Real> exp_phi_ = 1;
  1689. Complex<Real> exp_phi1 = exp_phi[k0];
  1690. for (Long m = 0; m <= p0; m++) {
  1691. for (Long n = m; n <= p0; n++) {
  1692. Long poly_idx = (2 * p0 - m + 1) * m / 2 + n;
  1693. Long basis_idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  1694. SHBasis[k0][basis_idx] = LegP[poly_idx][k0] * exp_phi_.real * s;
  1695. if (m) { // imaginary part
  1696. basis_idx += (p0+1-m);
  1697. SHBasis[k0][basis_idx] = -LegP[poly_idx][k0] * exp_phi_.imag * s;
  1698. } else {
  1699. SHBasis[k0][basis_idx] = SHBasis[k0][basis_idx] * 0.5;
  1700. }
  1701. }
  1702. exp_phi_ = exp_phi_ * exp_phi1;
  1703. }
  1704. }
  1705. }
  1706. assert(SHBasis.Dim(0) == N);
  1707. assert(SHBasis.Dim(1) == M);
  1708. }
  1709. template <class Real> void SphericalHarmonics<Real>::VecSHBasisEval(Long p0, const Vector<Real>& cos_theta_phi, Matrix<Real>& SHBasis) {
  1710. Long M = (p0+1) * (p0+1);
  1711. Long N = cos_theta_phi.Dim() / 2;
  1712. assert(cos_theta_phi.Dim() == N * 2);
  1713. Long p_ = p0 + 1;
  1714. Long M_ = (p_+1) * (p_+1);
  1715. Matrix<Real> Ynm(N, M_);
  1716. SHBasisEval(p_, cos_theta_phi, Ynm);
  1717. Vector<Real> cos_theta(N);
  1718. for (Long i = 0; i < N; i++) { // Set cos_theta
  1719. cos_theta[i] = cos_theta_phi[i*2+0];
  1720. }
  1721. { // Set SHBasis
  1722. SHBasis.ReInit(N * COORD_DIM, COORD_DIM * M);
  1723. SHBasis = 0;
  1724. const Complex<Real> imag(0,1);
  1725. for (Long i = 0; i < N; i++) {
  1726. auto Y = [p_, &Ynm, i](Long n, Long m) {
  1727. Complex<Real> c;
  1728. if (0 <= m && m <= n && n <= p_) {
  1729. Long idx = (2 * p_ - m + 2) * m - (m ? p_+1 : 0) + n;
  1730. c.real = Ynm[i][idx];
  1731. if (m) {
  1732. idx += (p_+1-m);
  1733. c.imag = Ynm[i][idx];
  1734. }
  1735. }
  1736. return c;
  1737. };
  1738. auto write_coeff = [p0, &SHBasis, i, M](Complex<Real> c, Long n, Long m, Long k0, Long k1) {
  1739. if (0 <= m && m <= n && n <= p0 && 0 <= k0 && k0 < COORD_DIM && 0 <= k1 && k1 < COORD_DIM) {
  1740. Long idx = (2 * p0 - m + 2) * m - (m ? p0+1 : 0) + n;
  1741. SHBasis[i * COORD_DIM + k1][k0 * M + idx] = c.real;
  1742. if (m) {
  1743. idx += (p0+1-m);
  1744. SHBasis[i * COORD_DIM + k1][k0 * M + idx] = c.imag;
  1745. }
  1746. }
  1747. };
  1748. auto A = [p_](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
  1749. auto B = [p_](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
  1750. Real csc_theta = 1 / sqrt<Real>(1 - cos_theta[i] * cos_theta[i]);
  1751. if (fabs(cos_theta[i]) < 1) {
  1752. for (Long m = 0; m <= p0; m++) {
  1753. for (Long n = m; n <= p0; n++) {
  1754. Complex<Real> AYBY = A(n,m) * Y(n+1,m) - B(n,m) * Y(n-1,m);
  1755. Complex<Real> Fv2r = Y(n,m) * (-n-1);
  1756. Complex<Real> Fw2r = Y(n,m) * n;
  1757. Complex<Real> Fx2r = 0;
  1758. Complex<Real> Fv2t = AYBY * csc_theta;
  1759. Complex<Real> Fw2t = AYBY * csc_theta;
  1760. Complex<Real> Fx2t = imag * m * Y(n,m) * csc_theta;
  1761. Complex<Real> Fv2p = -imag * m * Y(n,m) * csc_theta;
  1762. Complex<Real> Fw2p = -imag * m * Y(n,m) * csc_theta;
  1763. Complex<Real> Fx2p = AYBY * csc_theta;
  1764. write_coeff(Fv2r, n, m, 0, 0);
  1765. write_coeff(Fw2r, n, m, 1, 0);
  1766. write_coeff(Fx2r, n, m, 2, 0);
  1767. write_coeff(Fv2t, n, m, 0, 1);
  1768. write_coeff(Fw2t, n, m, 1, 1);
  1769. write_coeff(Fx2t, n, m, 2, 1);
  1770. write_coeff(Fv2p, n, m, 0, 2);
  1771. write_coeff(Fw2p, n, m, 1, 2);
  1772. write_coeff(Fx2p, n, m, 2, 2);
  1773. }
  1774. }
  1775. } else {
  1776. Complex<Real> exp_phi;
  1777. exp_phi.real = cos<Real>(cos_theta_phi[i*2+1]);
  1778. exp_phi.imag = -sin<Real>(cos_theta_phi[i*2+1]);
  1779. for (Long m = 0; m <= p0; m++) {
  1780. for (Long n = m; n <= p0; n++) {
  1781. Complex<Real> Fv2r = 0;
  1782. Complex<Real> Fw2r = 0;
  1783. Complex<Real> Fx2r = 0;
  1784. Complex<Real> Fv2t = 0;
  1785. Complex<Real> Fw2t = 0;
  1786. Complex<Real> Fx2t = 0;
  1787. Complex<Real> Fv2p = 0;
  1788. Complex<Real> Fw2p = 0;
  1789. Complex<Real> Fx2p = 0;
  1790. if (m == 0) {
  1791. Fv2r = Y(n,m) * (-n-1);
  1792. Fw2r = Y(n,m) * n;
  1793. Fx2r = 0;
  1794. }
  1795. if (m == 1) {
  1796. auto dY = [&cos_theta, i](Long n) { return -sqrt<Real>((2*n+1)*n*(n+1)) * ((n%2==0) && (cos_theta[i]<0) ? -1 : 1); };
  1797. Complex<Real> AYBY = A(n,m) * dY(n+1) - B(n,m) * dY(n-1);
  1798. Fv2t = AYBY * exp_phi;
  1799. Fw2t = AYBY * exp_phi;
  1800. Fx2t = imag * m * dY(n) * exp_phi;
  1801. Fv2p =-imag * m * dY(n) * exp_phi;
  1802. Fw2p =-imag * m * dY(n) * exp_phi;
  1803. Fx2p = AYBY * exp_phi;
  1804. }
  1805. write_coeff(Fv2r, n, m, 0, 0);
  1806. write_coeff(Fw2r, n, m, 1, 0);
  1807. write_coeff(Fx2r, n, m, 2, 0);
  1808. write_coeff(Fv2t, n, m, 0, 1);
  1809. write_coeff(Fw2t, n, m, 1, 1);
  1810. write_coeff(Fx2t, n, m, 2, 1);
  1811. write_coeff(Fv2p, n, m, 0, 2);
  1812. write_coeff(Fw2p, n, m, 1, 2);
  1813. write_coeff(Fx2p, n, m, 2, 2);
  1814. }
  1815. }
  1816. }
  1817. }
  1818. }
  1819. assert(SHBasis.Dim(0) == N * COORD_DIM);
  1820. assert(SHBasis.Dim(1) == COORD_DIM * M);
  1821. }
  1822. template <class Real> const std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatRotate(Long p0){
  1823. std::vector<std::vector<Long>> coeff_perm(p0+1);
  1824. { // Set coeff_perm
  1825. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  1826. Long itr=0;
  1827. for(Long i=0;i<2*p0;i++){
  1828. Long m=(i+1)/2;
  1829. for(Long n=m;n<=p0;n++){
  1830. coeff_perm[n][i]=itr;
  1831. itr++;
  1832. }
  1833. }
  1834. }
  1835. assert(p0<SCTL_SHMAXDEG);
  1836. std::vector<Matrix<Real>>& Mr=MatrixStore().Mr_[p0];
  1837. if(!Mr.size()){
  1838. const Real SQRT2PI=sqrt(2*M_PI);
  1839. Long Ncoef=p0*(p0+2);
  1840. Long Ngrid=2*p0*(p0+1);
  1841. Long Naleg=(p0+1)*(p0+2)/2;
  1842. Matrix<Real> Mcoord0(3,Ngrid);
  1843. const Vector<Real>& x=LegendreNodes(p0);
  1844. for(Long i=0;i<p0+1;i++){ // Set Mcoord0
  1845. for(Long j=0;j<2*p0;j++){
  1846. Mcoord0[0][i*2*p0+j]=x[i];
  1847. Mcoord0[1][i*2*p0+j]=sqrt(1-x[i]*x[i])*sin(M_PI*j/p0);
  1848. Mcoord0[2][i*2*p0+j]=sqrt(1-x[i]*x[i])*cos(M_PI*j/p0);
  1849. }
  1850. }
  1851. for(Long l=0;l<p0+1;l++){ // For each rotation angle
  1852. Matrix<Real> Mcoord1;
  1853. { // Rotate coordinates
  1854. Matrix<Real> M(COORD_DIM, COORD_DIM);
  1855. Real cos_=-x[l];
  1856. Real sin_=-sqrt(1.0-x[l]*x[l]);
  1857. M[0][0]= cos_; M[0][1]=0; M[0][2]=-sin_;
  1858. M[1][0]= 0; M[1][1]=1; M[1][2]= 0;
  1859. M[2][0]= sin_; M[2][1]=0; M[2][2]= cos_;
  1860. Mcoord1=M*Mcoord0;
  1861. }
  1862. Matrix<Real> Mleg(Naleg, Ngrid);
  1863. { // Set Mleg
  1864. const Vector<Real> Vcoord1(Mcoord1.Dim(0)*Mcoord1.Dim(1), Mcoord1.begin(), false);
  1865. Vector<Real> Vleg(Mleg.Dim(0)*Mleg.Dim(1), Mleg.begin(), false);
  1866. LegPoly(Vleg, Vcoord1, p0);
  1867. }
  1868. Vector<Real> theta(Ngrid);
  1869. for(Long i=0;i<theta.Dim();i++){ // Set theta
  1870. theta[i]=atan2(Mcoord1[1][i],Mcoord1[2][i]); // TODO: works only for float and double
  1871. }
  1872. Matrix<Real> Mcoef2grid(Ncoef, Ngrid);
  1873. { // Build Mcoef2grid
  1874. Long offset0=0;
  1875. Long offset1=0;
  1876. for(Long i=0;i<p0+1;i++){
  1877. Long len=p0+1-i;
  1878. { // P * cos
  1879. for(Long j=0;j<len;j++){
  1880. for(Long k=0;k<Ngrid;k++){
  1881. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*cos(i*theta[k]);
  1882. }
  1883. }
  1884. offset1+=len;
  1885. }
  1886. if(i!=0 && i!=p0){ // P * sin
  1887. for(Long j=0;j<len;j++){
  1888. for(Long k=0;k<Ngrid;k++){
  1889. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*sin(i*theta[k]);
  1890. }
  1891. }
  1892. offset1+=len;
  1893. }
  1894. offset0+=len;
  1895. }
  1896. assert(offset0==Naleg);
  1897. assert(offset1==Ncoef);
  1898. }
  1899. Vector<Real> Vcoef2coef(Ncoef*Ncoef);
  1900. Vector<Real> Vcoef2grid(Ncoef*Ngrid, Mcoef2grid[0], false);
  1901. Grid2SHC(Vcoef2grid, p0+1, 2*p0, p0, Vcoef2coef, SHCArrange::COL_MAJOR_NONZERO);
  1902. Matrix<Real> Mcoef2coef(Ncoef, Ncoef, Vcoef2coef.begin(), false);
  1903. for(Long n=0;n<=p0;n++){ // Create matrices for fast rotation
  1904. Matrix<Real> M(coeff_perm[n].size(),coeff_perm[n].size());
  1905. for(Long i=0;i<(Long)coeff_perm[n].size();i++){
  1906. for(Long j=0;j<(Long)coeff_perm[n].size();j++){
  1907. M[i][j]=Mcoef2coef[coeff_perm[n][i]][coeff_perm[n][j]];
  1908. }
  1909. }
  1910. Mr.push_back(M);
  1911. }
  1912. }
  1913. }
  1914. return Mr;
  1915. }
  1916. template <class Real> void SphericalHarmonics<Real>::SHC2GridTranspose(const Vector<Real>& X, Long p0, Long p1, Vector<Real>& S){
  1917. Matrix<Real> Mf =SphericalHarmonics<Real>::MatFourier(p1,p0).Transpose();
  1918. std::vector<Matrix<Real>> Ml =SphericalHarmonics<Real>::MatLegendre(p1,p0);
  1919. for(Long i=0;i<(Long)Ml.size();i++) Ml[i]=Ml[i].Transpose();
  1920. assert(p1==(Long)Ml.size()-1);
  1921. assert(p0==Mf.Dim(0)/2);
  1922. assert(p1==Mf.Dim(1)/2);
  1923. Long N=X.Dim()/(2*p0*(p0+1));
  1924. assert(N*2*p0*(p0+1)==X.Dim());
  1925. if(S.Dim()!=N*(p1*(p1+2))) S.ReInit(N*(p1*(p1+2)));
  1926. Vector<Real> B0, B1;
  1927. B0.ReInit(N* p1*(p1+2));
  1928. B1.ReInit(N*2*p1*(p0+1));
  1929. #pragma omp parallel
  1930. { // Evaluate Fourier and transpose
  1931. Integer tid=omp_get_thread_num();
  1932. Integer omp_p=omp_get_num_threads();
  1933. Long a=(tid+0)*N*(p0+1)/omp_p;
  1934. Long b=(tid+1)*N*(p0+1)/omp_p;
  1935. const Long block_size=16;
  1936. Matrix<Real> B2(block_size,2*p1);
  1937. for(Long i0=a;i0<b;i0+=block_size){
  1938. Long i1=std::min(b,i0+block_size);
  1939. const Matrix<Real> Min (i1-i0,2*p0, (Iterator<Real>)X.begin()+i0*2*p0, false);
  1940. Matrix<Real> Mout(i1-i0,2*p1, B2.begin(), false);
  1941. Matrix<Real>::GEMM(Mout, Min, Mf);
  1942. for(Long i=i0;i<i1;i++){
  1943. for(Long j=0;j<2*p1;j++){
  1944. B1[j*N*(p0+1)+i]=B2[i-i0][j];
  1945. }
  1946. }
  1947. }
  1948. }
  1949. #pragma omp parallel
  1950. { // Evaluate Legendre polynomial
  1951. Integer tid=omp_get_thread_num();
  1952. Integer omp_p=omp_get_num_threads();
  1953. Long offset0=0;
  1954. Long offset1=0;
  1955. for(Long i=0;i<p1+1;i++){
  1956. Long N0=2*N;
  1957. if(i==0 || i==p1) N0=N;
  1958. Matrix<Real> Min (N0, p0+1 , B1.begin()+offset0, false);
  1959. Matrix<Real> Mout(N0, p1+1-i, B0.begin()+offset1, false);
  1960. { // Mout = Min * Ml[i] // split between threads
  1961. Long a=(tid+0)*N0/omp_p;
  1962. Long b=(tid+1)*N0/omp_p;
  1963. if(a<b){
  1964. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  1965. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  1966. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  1967. }
  1968. }
  1969. offset0+=Min .Dim(0)*Min .Dim(1);
  1970. offset1+=Mout.Dim(0)*Mout.Dim(1);
  1971. }
  1972. }
  1973. #pragma omp parallel
  1974. { // S <-- Rearrange(B0)
  1975. Integer tid=omp_get_thread_num();
  1976. Integer omp_p=omp_get_num_threads();
  1977. Long a=(tid+0)*N/omp_p;
  1978. Long b=(tid+1)*N/omp_p;
  1979. for(Long i=a;i<b;i++){
  1980. Long offset=0;
  1981. for(Long j=0;j<2*p1;j++){
  1982. Long len=p1+1-(j+1)/2;
  1983. Real* B_=&B0[i*len+N*offset];
  1984. Real* S_=&S[i*p1*(p1+2)+offset];
  1985. for(Long k=0;k<len;k++) S_[k]=B_[k];
  1986. offset+=len;
  1987. }
  1988. }
  1989. }
  1990. }
  1991. template <class Real> void SphericalHarmonics<Real>::RotateAll(const Vector<Real>& S, Long p0, Long dof, Vector<Real>& S_){
  1992. const std::vector<Matrix<Real>>& Mr=MatRotate(p0);
  1993. std::vector<std::vector<Long>> coeff_perm(p0+1);
  1994. { // Set coeff_perm
  1995. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  1996. Long itr=0;
  1997. for(Long i=0;i<2*p0;i++){
  1998. Long m=(i+1)/2;
  1999. for(Long n=m;n<=p0;n++){
  2000. coeff_perm[n][i]=itr;
  2001. itr++;
  2002. }
  2003. }
  2004. }
  2005. Long Ncoef=p0*(p0+2);
  2006. Long N=S.Dim()/Ncoef/dof;
  2007. assert(N*Ncoef*dof==S.Dim());
  2008. if(S_.Dim()!=N*dof*Ncoef*p0*(p0+1)) S_.ReInit(N*dof*Ncoef*p0*(p0+1));
  2009. const Matrix<Real> S0(N*dof, Ncoef, (Iterator<Real>)S.begin(), false);
  2010. Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, S_.begin(), false);
  2011. #pragma omp parallel
  2012. { // Construct all p0*(p0+1) rotations
  2013. Integer tid=omp_get_thread_num();
  2014. Integer omp_p=omp_get_num_threads();
  2015. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  2016. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  2017. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  2018. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  2019. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  2020. }
  2021. Long a=(tid+0)*N/omp_p;
  2022. Long b=(tid+1)*N/omp_p;
  2023. for(Long i=a;i<b;i++){
  2024. for(Long d=0;d<dof;d++){
  2025. for(Long j=0;j<p0;j++){
  2026. Long offset=0;
  2027. for(Long k=0;k<p0+1;k++){
  2028. Real r[2]={cos(k*j*M_PI/p0),-sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  2029. Long len=p0+1-k;
  2030. if(k!=0 && k!=p0){
  2031. for(Long l=0;l<len;l++){
  2032. Real x[2];
  2033. x[0]=S0[i*dof+d][offset+len*0+l];
  2034. x[1]=S0[i*dof+d][offset+len*1+l];
  2035. B0[j*dof+d][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  2036. B0[j*dof+d][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  2037. }
  2038. offset+=2*len;
  2039. }else{
  2040. for(Long l=0;l<len;l++){
  2041. B0[j*dof+d][offset+l]=S0[i*dof+d][offset+l];
  2042. }
  2043. offset+=len;
  2044. }
  2045. }
  2046. assert(offset==Ncoef);
  2047. }
  2048. }
  2049. { // Fast rotation
  2050. for(Long k=0;k<dof*p0;k++){ // forward permutation
  2051. for(Long l=0;l<=p0;l++){
  2052. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2053. Bi[l][k][j]=B0[k][coeff_perm[l][j]];
  2054. }
  2055. }
  2056. }
  2057. for(Long t=0;t<=p0;t++){
  2058. for(Long l=0;l<=p0;l++){ // mat-vec
  2059. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  2060. }
  2061. Matrix<Real> Mout(dof*p0,Ncoef, S1[(i*(p0+1)+t)*dof*p0], false);
  2062. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  2063. for(Long l=0;l<=p0;l++){
  2064. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2065. Mout[k][coeff_perm[l][j]]=Bo[l][k][j];
  2066. }
  2067. }
  2068. }
  2069. }
  2070. }
  2071. }
  2072. }
  2073. }
  2074. template <class Real> void SphericalHarmonics<Real>::RotateTranspose(const Vector<Real>& S_, Long p0, Long dof, Vector<Real>& S){
  2075. std::vector<Matrix<Real>> Mr=MatRotate(p0);
  2076. for(Long i=0;i<(Long)Mr.size();i++) Mr[i]=Mr[i].Transpose();
  2077. std::vector<std::vector<Long>> coeff_perm(p0+1);
  2078. { // Set coeff_perm
  2079. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  2080. Long itr=0;
  2081. for(Long i=0;i<2*p0;i++){
  2082. Long m=(i+1)/2;
  2083. for(Long n=m;n<=p0;n++){
  2084. coeff_perm[n][i]=itr;
  2085. itr++;
  2086. }
  2087. }
  2088. }
  2089. Long Ncoef=p0*(p0+2);
  2090. Long N=S_.Dim()/Ncoef/dof/(p0*(p0+1));
  2091. assert(N*Ncoef*dof*(p0*(p0+1))==S_.Dim());
  2092. if(S.Dim()!=N*dof*Ncoef*p0*(p0+1)) S.ReInit(N*dof*Ncoef*p0*(p0+1));
  2093. Matrix<Real> S0(N*dof*p0*(p0+1), Ncoef, S.begin(), false);
  2094. const Matrix<Real> S1(N*dof*p0*(p0+1), Ncoef, (Iterator<Real>)S_.begin(), false);
  2095. #pragma omp parallel
  2096. { // Transpose all p0*(p0+1) rotations
  2097. Integer tid=omp_get_thread_num();
  2098. Integer omp_p=omp_get_num_threads();
  2099. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  2100. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  2101. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  2102. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  2103. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  2104. }
  2105. Long a=(tid+0)*N/omp_p;
  2106. Long b=(tid+1)*N/omp_p;
  2107. for(Long i=a;i<b;i++){
  2108. for(Long t=0;t<p0+1;t++){
  2109. Long idx0=(i*(p0+1)+t)*p0*dof;
  2110. { // Fast rotation
  2111. const Matrix<Real> Min(p0*dof, Ncoef, (Iterator<Real>)S1[idx0], false);
  2112. for(Long k=0;k<dof*p0;k++){ // forward permutation
  2113. for(Long l=0;l<=p0;l++){
  2114. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2115. Bi[l][k][j]=Min[k][coeff_perm[l][j]];
  2116. }
  2117. }
  2118. }
  2119. for(Long l=0;l<=p0;l++){ // mat-vec
  2120. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  2121. }
  2122. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  2123. for(Long l=0;l<=p0;l++){
  2124. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  2125. B0[k][coeff_perm[l][j]]=Bo[l][k][j];
  2126. }
  2127. }
  2128. }
  2129. }
  2130. for(Long j=0;j<p0;j++){
  2131. for(Long d=0;d<dof;d++){
  2132. Long idx1=idx0+j*dof+d;
  2133. Long offset=0;
  2134. for(Long k=0;k<p0+1;k++){
  2135. Real r[2]={cos(k*j*M_PI/p0),sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  2136. Long len=p0+1-k;
  2137. if(k!=0 && k!=p0){
  2138. for(Long l=0;l<len;l++){
  2139. Real x[2];
  2140. x[0]=B0[j*dof+d][offset+len*0+l];
  2141. x[1]=B0[j*dof+d][offset+len*1+l];
  2142. S0[idx1][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  2143. S0[idx1][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  2144. }
  2145. offset+=2*len;
  2146. }else{
  2147. for(Long l=0;l<len;l++){
  2148. S0[idx1][offset+l]=B0[j*dof+d][offset+l];
  2149. }
  2150. offset+=len;
  2151. }
  2152. }
  2153. assert(offset==Ncoef);
  2154. }
  2155. }
  2156. }
  2157. }
  2158. }
  2159. }
  2160. template <class Real> void SphericalHarmonics<Real>::StokesSingularInteg(const Vector<Real>& S, Long p0, Long p1, Vector<Real>* SLMatrix, Vector<Real>* DLMatrix){
  2161. Long Ngrid=2*p0*(p0+1);
  2162. Long Ncoef= p0*(p0+2);
  2163. Long Nves=S.Dim()/(Ngrid*COORD_DIM);
  2164. if(SLMatrix) SLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  2165. if(DLMatrix) DLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  2166. Long BLOCK_SIZE=(Long)6e9/((3*2*p1*(p1+1))*(3*2*p0*(p0+1))*2*8); // Limit memory usage to 6GB
  2167. BLOCK_SIZE=std::min<Long>(BLOCK_SIZE,omp_get_max_threads());
  2168. BLOCK_SIZE=std::max<Long>(BLOCK_SIZE,1);
  2169. for(Long a=0;a<Nves;a+=BLOCK_SIZE){
  2170. Long b=std::min(a+BLOCK_SIZE, Nves);
  2171. Vector<Real> _SLMatrix, _DLMatrix;
  2172. if(SLMatrix) _SLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), SLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  2173. if(DLMatrix) _DLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), DLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  2174. const Vector<Real> _S ((b-a)*(Ngrid*COORD_DIM) , (Iterator<Real>)S.begin()+a*(Ngrid*COORD_DIM), false);
  2175. if(SLMatrix && DLMatrix) StokesSingularInteg_< true, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  2176. else if(SLMatrix) StokesSingularInteg_< true, false>(_S, p0, p1, _SLMatrix, _DLMatrix);
  2177. else if(DLMatrix) StokesSingularInteg_<false, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  2178. }
  2179. }
  2180. template <class Real> template <bool SLayer, bool DLayer> void SphericalHarmonics<Real>::StokesSingularInteg_(const Vector<Real>& X0, Long p0, Long p1, Vector<Real>& SL, Vector<Real>& DL){
  2181. Profile::Tic("Rotate");
  2182. Vector<Real> S0, S;
  2183. SphericalHarmonics<Real>::Grid2SHC(X0, p0+1, 2*p0, p0, S0, SHCArrange::COL_MAJOR_NONZERO);
  2184. SphericalHarmonics<Real>::RotateAll(S0, p0, COORD_DIM, S);
  2185. Profile::Toc();
  2186. Profile::Tic("Upsample");
  2187. Vector<Real> X, X_theta, X_phi, trg;
  2188. SphericalHarmonics<Real>::SHC2Grid(S, SHCArrange::COL_MAJOR_NONZERO, p0, p1+1, 2*p1, &X, &X_theta, &X_phi);
  2189. SphericalHarmonics<Real>::SHC2Pole(S, SHCArrange::COL_MAJOR_NONZERO, p0, trg);
  2190. Profile::Toc();
  2191. Profile::Tic("Stokes");
  2192. Vector<Real> SL0, DL0;
  2193. { // Stokes kernel
  2194. //Long M0=2*p0*(p0+1);
  2195. Long M1=2*p1*(p1+1);
  2196. Long N=trg.Dim()/(2*COORD_DIM);
  2197. assert(X.Dim()==M1*COORD_DIM*N);
  2198. if(SLayer && SL0.Dim()!=N*2*6*M1) SL0.ReInit(2*N*6*M1);
  2199. if(DLayer && DL0.Dim()!=N*2*6*M1) DL0.ReInit(2*N*6*M1);
  2200. const Vector<Real>& qw=SphericalHarmonics<Real>::SingularWeights(p1);
  2201. const Real scal_const_dl = 3.0/(4.0*M_PI);
  2202. const Real scal_const_sl = 1.0/(8.0*M_PI);
  2203. static Real eps=-1;
  2204. if(eps<0){
  2205. eps=1;
  2206. while(eps*(Real)0.5+(Real)1.0>1.0) eps*=0.5;
  2207. }
  2208. #pragma omp parallel
  2209. {
  2210. Integer tid=omp_get_thread_num();
  2211. Integer omp_p=omp_get_num_threads();
  2212. Long a=(tid+0)*N/omp_p;
  2213. Long b=(tid+1)*N/omp_p;
  2214. for(Long i=a;i<b;i++){
  2215. for(Long t=0;t<2;t++){
  2216. Real tx, ty, tz;
  2217. { // Read target coordinates
  2218. tx=trg[i*2*COORD_DIM+0*2+t];
  2219. ty=trg[i*2*COORD_DIM+1*2+t];
  2220. tz=trg[i*2*COORD_DIM+2*2+t];
  2221. }
  2222. for(Long j0=0;j0<p1+1;j0++){
  2223. for(Long j1=0;j1<2*p1;j1++){
  2224. Long s=2*p1*j0+j1;
  2225. Real dx, dy, dz;
  2226. { // Compute dx, dy, dz
  2227. dx=tx-X[(i*COORD_DIM+0)*M1+s];
  2228. dy=ty-X[(i*COORD_DIM+1)*M1+s];
  2229. dz=tz-X[(i*COORD_DIM+2)*M1+s];
  2230. }
  2231. Real nx, ny, nz;
  2232. { // Compute source normal
  2233. Real x_theta=X_phi[(i*COORD_DIM+0)*M1+s];
  2234. Real y_theta=X_phi[(i*COORD_DIM+1)*M1+s];
  2235. Real z_theta=X_phi[(i*COORD_DIM+2)*M1+s];
  2236. Real x_phi=X_theta[(i*COORD_DIM+0)*M1+s];
  2237. Real y_phi=X_theta[(i*COORD_DIM+1)*M1+s];
  2238. Real z_phi=X_theta[(i*COORD_DIM+2)*M1+s];
  2239. nx=(y_theta*z_phi-z_theta*y_phi);
  2240. ny=(z_theta*x_phi-x_theta*z_phi);
  2241. nz=(x_theta*y_phi-y_theta*x_phi);
  2242. }
  2243. Real area_elem=1.0;
  2244. if(SLayer){ // Compute area_elem
  2245. area_elem=sqrt(nx*nx+ny*ny+nz*nz);
  2246. }
  2247. Real rinv, rinv2;
  2248. { // Compute rinv, rinv2
  2249. Real r2=dx*dx+dy*dy+dz*dz;
  2250. rinv=1.0/sqrt(r2);
  2251. if(r2<=eps) rinv=0;
  2252. rinv2=rinv*rinv;
  2253. }
  2254. if(DLayer){
  2255. Real rinv5=rinv2*rinv2*rinv;
  2256. Real r_dot_n_rinv5=scal_const_dl*qw[j0*t+(p1-j0)*(1-t)] * (nx*dx+ny*dy+nz*dz)*rinv5;
  2257. DL0[((i*2+t)*6+0)*M1+s]=dx*dx*r_dot_n_rinv5;
  2258. DL0[((i*2+t)*6+1)*M1+s]=dx*dy*r_dot_n_rinv5;
  2259. DL0[((i*2+t)*6+2)*M1+s]=dx*dz*r_dot_n_rinv5;
  2260. DL0[((i*2+t)*6+3)*M1+s]=dy*dy*r_dot_n_rinv5;
  2261. DL0[((i*2+t)*6+4)*M1+s]=dy*dz*r_dot_n_rinv5;
  2262. DL0[((i*2+t)*6+5)*M1+s]=dz*dz*r_dot_n_rinv5;
  2263. }
  2264. if(SLayer){
  2265. Real area_rinv =scal_const_sl*qw[j0*t+(p1-j0)*(1-t)] * area_elem*rinv;
  2266. Real area_rinv2=area_rinv*rinv2;
  2267. SL0[((i*2+t)*6+0)*M1+s]=area_rinv+dx*dx*area_rinv2;
  2268. SL0[((i*2+t)*6+1)*M1+s]= dx*dy*area_rinv2;
  2269. SL0[((i*2+t)*6+2)*M1+s]= dx*dz*area_rinv2;
  2270. SL0[((i*2+t)*6+3)*M1+s]=area_rinv+dy*dy*area_rinv2;
  2271. SL0[((i*2+t)*6+4)*M1+s]= dy*dz*area_rinv2;
  2272. SL0[((i*2+t)*6+5)*M1+s]=area_rinv+dz*dz*area_rinv2;
  2273. }
  2274. }
  2275. }
  2276. }
  2277. }
  2278. }
  2279. Profile::Add_FLOP(20*(2*p1)*(p1+1)*2*N);
  2280. if(SLayer) Profile::Add_FLOP((19+6)*(2*p1)*(p1+1)*2*N);
  2281. if(DLayer) Profile::Add_FLOP( 22 *(2*p1)*(p1+1)*2*N);
  2282. }
  2283. Profile::Toc();
  2284. Profile::Tic("UpsampleTranspose");
  2285. Vector<Real> SL1, DL1;
  2286. SphericalHarmonics<Real>::SHC2GridTranspose(SL0, p1, p0, SL1);
  2287. SphericalHarmonics<Real>::SHC2GridTranspose(DL0, p1, p0, DL1);
  2288. Profile::Toc();
  2289. Profile::Tic("RotateTranspose");
  2290. Vector<Real> SL2, DL2;
  2291. SphericalHarmonics<Real>::RotateTranspose(SL1, p0, 2*6, SL2);
  2292. SphericalHarmonics<Real>::RotateTranspose(DL1, p0, 2*6, DL2);
  2293. Profile::Toc();
  2294. Profile::Tic("Rearrange");
  2295. Vector<Real> SL3, DL3;
  2296. { // Transpose
  2297. Long Ncoef=p0*(p0+2);
  2298. Long Ngrid=2*p0*(p0+1);
  2299. { // Transpose SL2
  2300. Long N=SL2.Dim()/(6*Ncoef*Ngrid);
  2301. SL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  2302. #pragma omp parallel
  2303. {
  2304. Integer tid=omp_get_thread_num();
  2305. Integer omp_p=omp_get_num_threads();
  2306. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  2307. Long a=(tid+0)*N/omp_p;
  2308. Long b=(tid+1)*N/omp_p;
  2309. for(Long i=a;i<b;i++){
  2310. Matrix<Real> M0(Ngrid*6, Ncoef, SL2.begin()+i*Ngrid*6*Ncoef, false);
  2311. for(Long k=0;k<Ncoef;k++){ // Transpose
  2312. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  2313. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  2314. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  2315. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  2316. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  2317. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  2318. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  2319. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  2320. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  2321. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  2322. }
  2323. }
  2324. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, SL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  2325. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  2326. for(Long j0=0;j0<COORD_DIM;j0++){
  2327. for(Long j1=0;j1<p0+1;j1++){
  2328. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  2329. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  2330. }
  2331. }
  2332. }
  2333. }
  2334. }
  2335. }
  2336. { // Transpose DL2
  2337. Long N=DL2.Dim()/(6*Ncoef*Ngrid);
  2338. DL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  2339. #pragma omp parallel
  2340. {
  2341. Integer tid=omp_get_thread_num();
  2342. Integer omp_p=omp_get_num_threads();
  2343. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  2344. Long a=(tid+0)*N/omp_p;
  2345. Long b=(tid+1)*N/omp_p;
  2346. for(Long i=a;i<b;i++){
  2347. Matrix<Real> M0(Ngrid*6, Ncoef, DL2.begin()+i*Ngrid*6*Ncoef, false);
  2348. for(Long k=0;k<Ncoef;k++){ // Transpose
  2349. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  2350. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  2351. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  2352. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  2353. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  2354. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  2355. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  2356. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  2357. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  2358. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  2359. }
  2360. }
  2361. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, DL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  2362. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  2363. for(Long j0=0;j0<COORD_DIM;j0++){
  2364. for(Long j1=0;j1<p0+1;j1++){
  2365. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  2366. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  2367. }
  2368. }
  2369. }
  2370. }
  2371. }
  2372. }
  2373. }
  2374. Profile::Toc();
  2375. Profile::Tic("Grid2SHC");
  2376. SphericalHarmonics<Real>::Grid2SHC(SL3, p0+1, 2*p0, p0, SL, SHCArrange::COL_MAJOR_NONZERO);
  2377. SphericalHarmonics<Real>::Grid2SHC(DL3, p0+1, 2*p0, p0, DL, SHCArrange::COL_MAJOR_NONZERO);
  2378. Profile::Toc();
  2379. }
  2380. } // end namespace