sph_harm.txx 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395
  1. #include SCTL_INCLUDE(legendre_rule.hpp)
  2. namespace SCTL_NAMESPACE {
  3. // Vector<Real> qx1, qw1;
  4. // //cgqf(p0+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  5. // ChebBasis<Real>::quad_rule(p0+1, qx1, qw1);
  6. // sctl::ASSERT(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: these are Legendre nodes only for float and double
  7. // for (auto x : qx1) x = 2 * x - 1;
  8. // for (auto w : qw1) w = 2 * w;
  9. template <class Real> void SphericalHarmonics<Real>::SHC2Grid(const Vector<Real>& S, Long p0, Long p1, Vector<Real>& X, Vector<Real>* X_theta, Vector<Real>* X_phi){
  10. Matrix<Real>& Mf =SphericalHarmonics<Real>::MatFourier(p0,p1);
  11. Matrix<Real>& Mdf=SphericalHarmonics<Real>::MatFourierGrad(p0,p1);
  12. std::vector<Matrix<Real>>& Ml =SphericalHarmonics<Real>::MatLegendre(p0,p1);
  13. std::vector<Matrix<Real>>& Mdl=SphericalHarmonics<Real>::MatLegendreGrad(p0,p1);
  14. assert(p0==(Long)Ml.size()-1);
  15. assert(p0==Mf.Dim(0)/2);
  16. assert(p1==Mf.Dim(1)/2);
  17. Long N=S.Dim()/(p0*(p0+2));
  18. assert(N*p0*(p0+2)==S.Dim());
  19. if(X.Dim()!=N*2*p1*(p1+1)) X.ReInit(N*2*p1*(p1+1));
  20. if(X_phi && X_phi ->Dim()!=N*2*p1*(p1+1)) X_phi ->ReInit(N*2*p1*(p1+1));
  21. if(X_theta && X_theta->Dim()!=N*2*p1*(p1+1)) X_theta->ReInit(N*2*p1*(p1+1));
  22. static Vector<Real> B0, B1;
  23. B0.ReInit(N* p0*(p0+2));
  24. B1.ReInit(N*2*p0*(p1+1));
  25. #pragma omp parallel
  26. { // B0 <-- Rearrange(S)
  27. Integer tid=omp_get_thread_num();
  28. Integer omp_p=omp_get_num_threads();
  29. Long a=(tid+0)*N/omp_p;
  30. Long b=(tid+1)*N/omp_p;
  31. for(Long i=a;i<b;i++){
  32. Long offset=0;
  33. for(Long j=0;j<2*p0;j++){
  34. Long len=p0+1-(j+1)/2;
  35. Real* B_=&B0[i*len+N*offset];
  36. const Real* S_=&S[i*p0*(p0+2)+offset];
  37. for(Long k=0;k<len;k++) B_[k]=S_[k];
  38. offset+=len;
  39. }
  40. }
  41. }
  42. #pragma omp parallel
  43. { // Evaluate Legendre polynomial
  44. Integer tid=omp_get_thread_num();
  45. Integer omp_p=omp_get_num_threads();
  46. Long offset0=0;
  47. Long offset1=0;
  48. for(Long i=0;i<p0+1;i++){
  49. Long N0=2*N;
  50. if(i==0 || i==p0) N0=N;
  51. Matrix<Real> Min (N0, p0+1-i, B0.begin()+offset0, false);
  52. Matrix<Real> Mout(N0, p1+1 , B1.begin()+offset1, false);
  53. { // Mout = Min * Ml[i] // split between threads
  54. Long a=(tid+0)*N0/omp_p;
  55. Long b=(tid+1)*N0/omp_p;
  56. if(a<b){
  57. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  58. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  59. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  60. }
  61. }
  62. offset0+=Min .Dim(0)*Min .Dim(1);
  63. offset1+=Mout.Dim(0)*Mout.Dim(1);
  64. }
  65. }
  66. #pragma omp parallel
  67. { // Transpose and evaluate Fourier
  68. Integer tid=omp_get_thread_num();
  69. Integer omp_p=omp_get_num_threads();
  70. Long a=(tid+0)*N*(p1+1)/omp_p;
  71. Long b=(tid+1)*N*(p1+1)/omp_p;
  72. const Long block_size=16;
  73. Matrix<Real> B2(block_size,2*p0);
  74. for(Long i0=a;i0<b;i0+=block_size){
  75. Long i1=std::min(b,i0+block_size);
  76. for(Long i=i0;i<i1;i++){
  77. for(Long j=0;j<2*p0;j++){
  78. B2[i-i0][j]=B1[j*N*(p1+1)+i];
  79. }
  80. }
  81. Matrix<Real> Min (i1-i0,2*p0, B2.begin() , false);
  82. Matrix<Real> Mout(i1-i0,2*p1, X .begin()+i0*2*p1, false);
  83. Matrix<Real>::GEMM(Mout, Min, Mf);
  84. if(X_theta){ // Evaluate Fourier gradient
  85. Matrix<Real> Mout(i1-i0,2*p1, X_theta->begin()+i0*2*p1, false);
  86. Matrix<Real>::GEMM(Mout, Min, Mdf);
  87. }
  88. }
  89. }
  90. if(X_phi){
  91. #pragma omp parallel
  92. { // Evaluate Legendre gradient
  93. Integer tid=omp_get_thread_num();
  94. Integer omp_p=omp_get_num_threads();
  95. Long offset0=0;
  96. Long offset1=0;
  97. for(Long i=0;i<p0+1;i++){
  98. Long N0=2*N;
  99. if(i==0 || i==p0) N0=N;
  100. Matrix<Real> Min (N0, p0+1-i, B0.begin()+offset0, false);
  101. Matrix<Real> Mout(N0, p1+1 , B1.begin()+offset1, false);
  102. { // Mout = Min * Mdl[i] // split between threads
  103. Long a=(tid+0)*N0/omp_p;
  104. Long b=(tid+1)*N0/omp_p;
  105. if(a<b){
  106. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  107. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  108. Matrix<Real>::GEMM(Mout_,Min_,Mdl[i]);
  109. }
  110. }
  111. offset0+=Min .Dim(0)*Min .Dim(1);
  112. offset1+=Mout.Dim(0)*Mout.Dim(1);
  113. }
  114. }
  115. #pragma omp parallel
  116. { // Transpose and evaluate Fourier
  117. Integer tid=omp_get_thread_num();
  118. Integer omp_p=omp_get_num_threads();
  119. Long a=(tid+0)*N*(p1+1)/omp_p;
  120. Long b=(tid+1)*N*(p1+1)/omp_p;
  121. const Long block_size=16;
  122. Matrix<Real> B2(block_size,2*p0);
  123. for(Long i0=a;i0<b;i0+=block_size){
  124. Long i1=std::min(b,i0+block_size);
  125. for(Long i=i0;i<i1;i++){
  126. for(Long j=0;j<2*p0;j++){
  127. B2[i-i0][j]=B1[j*N*(p1+1)+i];
  128. }
  129. }
  130. Matrix<Real> Min (i1-i0,2*p0, B2.begin() , false);
  131. Matrix<Real> Mout(i1-i0,2*p1, X_phi->begin()+i0*2*p1, false);
  132. Matrix<Real>::GEMM(Mout, Min, Mf);
  133. }
  134. }
  135. }
  136. }
  137. template <class Real> void SphericalHarmonics<Real>::Grid2SHC(const Vector<Real>& X, Long p0, Long p1, Vector<Real>& S){
  138. Matrix<Real> Mf =SphericalHarmonics<Real>::MatFourierInv(p0,p1);
  139. std::vector<Matrix<Real>> Ml =SphericalHarmonics<Real>::MatLegendreInv(p0,p1);
  140. assert(p1==(Long)Ml.size()-1);
  141. assert(p0==Mf.Dim(0)/2);
  142. assert(p1==Mf.Dim(1)/2);
  143. Long N=X.Dim()/(2*p0*(p0+1));
  144. assert(N*2*p0*(p0+1)==X.Dim());
  145. if(S.Dim()!=N*(p1*(p1+2))) S.ReInit(N*(p1*(p1+2)));
  146. static Vector<Real> B0, B1;
  147. B0.ReInit(N* p1*(p1+2));
  148. B1.ReInit(N*2*p1*(p0+1));
  149. #pragma omp parallel
  150. { // Evaluate Fourier and transpose
  151. Integer tid=omp_get_thread_num();
  152. Integer omp_p=omp_get_num_threads();
  153. Long a=(tid+0)*N*(p0+1)/omp_p;
  154. Long b=(tid+1)*N*(p0+1)/omp_p;
  155. const Long block_size=16;
  156. Matrix<Real> B2(block_size,2*p1);
  157. for(Long i0=a;i0<b;i0+=block_size){
  158. Long i1=std::min(b,i0+block_size);
  159. Matrix<Real> Min (i1-i0,2*p0, (Iterator<Real>)X.begin()+i0*2*p0, false);
  160. Matrix<Real> Mout(i1-i0,2*p1, B2.begin() , false);
  161. Matrix<Real>::GEMM(Mout, Min, Mf);
  162. for(Long i=i0;i<i1;i++){
  163. for(Long j=0;j<2*p1;j++){
  164. B1[j*N*(p0+1)+i]=B2[i-i0][j];
  165. }
  166. }
  167. }
  168. }
  169. #pragma omp parallel
  170. { // Evaluate Legendre polynomial
  171. Integer tid=omp_get_thread_num();
  172. Integer omp_p=omp_get_num_threads();
  173. Long offset0=0;
  174. Long offset1=0;
  175. for(Long i=0;i<p1+1;i++){
  176. Long N0=2*N;
  177. if(i==0 || i==p1) N0=N;
  178. Matrix<Real> Min (N0, p0+1 , B1.begin()+offset0, false);
  179. Matrix<Real> Mout(N0, p1+1-i, B0.begin()+offset1, false);
  180. { // Mout = Min * Ml[i] // split between threads
  181. Long a=(tid+0)*N0/omp_p;
  182. Long b=(tid+1)*N0/omp_p;
  183. if(a<b){
  184. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  185. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  186. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  187. }
  188. }
  189. offset0+=Min .Dim(0)*Min .Dim(1);
  190. offset1+=Mout.Dim(0)*Mout.Dim(1);
  191. }
  192. }
  193. #pragma omp parallel
  194. { // S <-- Rearrange(B0)
  195. Integer tid=omp_get_thread_num();
  196. Integer omp_p=omp_get_num_threads();
  197. Long a=(tid+0)*N/omp_p;
  198. Long b=(tid+1)*N/omp_p;
  199. for(Long i=a;i<b;i++){
  200. Long offset=0;
  201. for(Long j=0;j<2*p1;j++){
  202. Long len=p1+1-(j+1)/2;
  203. Real* B_=&B0[i*len+N*offset];
  204. Real* S_=&S[i*p1*(p1+2)+offset];
  205. for(Long k=0;k<len;k++) S_[k]=B_[k];
  206. offset+=len;
  207. }
  208. }
  209. }
  210. }
  211. template <class Real> void SphericalHarmonics<Real>::SHC2GridTranspose(const Vector<Real>& X, Long p0, Long p1, Vector<Real>& S){
  212. Matrix<Real> Mf =SphericalHarmonics<Real>::MatFourier(p1,p0).Transpose();
  213. std::vector<Matrix<Real>> Ml =SphericalHarmonics<Real>::MatLegendre(p1,p0);
  214. for(Long i=0;i<(Long)Ml.size();i++) Ml[i]=Ml[i].Transpose();
  215. assert(p1==(Long)Ml.size()-1);
  216. assert(p0==Mf.Dim(0)/2);
  217. assert(p1==Mf.Dim(1)/2);
  218. Long N=X.Dim()/(2*p0*(p0+1));
  219. assert(N*2*p0*(p0+1)==X.Dim());
  220. if(S.Dim()!=N*(p1*(p1+2))) S.ReInit(N*(p1*(p1+2)));
  221. static Vector<Real> B0, B1;
  222. B0.ReInit(N* p1*(p1+2));
  223. B1.ReInit(N*2*p1*(p0+1));
  224. #pragma omp parallel
  225. { // Evaluate Fourier and transpose
  226. Integer tid=omp_get_thread_num();
  227. Integer omp_p=omp_get_num_threads();
  228. Long a=(tid+0)*N*(p0+1)/omp_p;
  229. Long b=(tid+1)*N*(p0+1)/omp_p;
  230. const Long block_size=16;
  231. Matrix<Real> B2(block_size,2*p1);
  232. for(Long i0=a;i0<b;i0+=block_size){
  233. Long i1=std::min(b,i0+block_size);
  234. Matrix<Real> Min (i1-i0,2*p0, (Iterator<Real>)X.begin()+i0*2*p0, false);
  235. Matrix<Real> Mout(i1-i0,2*p1, B2.begin() , false);
  236. Matrix<Real>::GEMM(Mout, Min, Mf);
  237. for(Long i=i0;i<i1;i++){
  238. for(Long j=0;j<2*p1;j++){
  239. B1[j*N*(p0+1)+i]=B2[i-i0][j];
  240. }
  241. }
  242. }
  243. }
  244. #pragma omp parallel
  245. { // Evaluate Legendre polynomial
  246. Integer tid=omp_get_thread_num();
  247. Integer omp_p=omp_get_num_threads();
  248. Long offset0=0;
  249. Long offset1=0;
  250. for(Long i=0;i<p1+1;i++){
  251. Long N0=2*N;
  252. if(i==0 || i==p1) N0=N;
  253. Matrix<Real> Min (N0, p0+1 , B1.begin()+offset0, false);
  254. Matrix<Real> Mout(N0, p1+1-i, B0.begin()+offset1, false);
  255. { // Mout = Min * Ml[i] // split between threads
  256. Long a=(tid+0)*N0/omp_p;
  257. Long b=(tid+1)*N0/omp_p;
  258. if(a<b){
  259. Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
  260. Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
  261. Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
  262. }
  263. }
  264. offset0+=Min .Dim(0)*Min .Dim(1);
  265. offset1+=Mout.Dim(0)*Mout.Dim(1);
  266. }
  267. }
  268. #pragma omp parallel
  269. { // S <-- Rearrange(B0)
  270. Integer tid=omp_get_thread_num();
  271. Integer omp_p=omp_get_num_threads();
  272. Long a=(tid+0)*N/omp_p;
  273. Long b=(tid+1)*N/omp_p;
  274. for(Long i=a;i<b;i++){
  275. Long offset=0;
  276. for(Long j=0;j<2*p1;j++){
  277. Long len=p1+1-(j+1)/2;
  278. Real* B_=&B0[i*len+N*offset];
  279. Real* S_=&S[i*p1*(p1+2)+offset];
  280. for(Long k=0;k<len;k++) S_[k]=B_[k];
  281. offset+=len;
  282. }
  283. }
  284. }
  285. }
  286. template <class Real> void SphericalHarmonics<Real>::SHC2Pole(const Vector<Real>& S, Long p0, Vector<Real>& P){
  287. Vector<Real> QP[2];
  288. { // Set QP
  289. Real x[2]={-1,1};
  290. Vector<Real> alp((p0+1)*(p0+2)/2);
  291. const Real SQRT2PI=sqrt(2*M_PI);
  292. for(Long i=0;i<2;i++){
  293. LegPoly(&alp[0], &x[i], 1, p0);
  294. QP[i].ReInit(p0+1, alp.begin());
  295. for(Long j=0;j<p0+1;j++) QP[i][j]*=SQRT2PI;
  296. }
  297. }
  298. Long N=S.Dim()/(p0*(p0+2));
  299. assert(N*p0*(p0+2)==S.Dim());
  300. if(P.Dim()!=N*2) P.ReInit(N*2);
  301. #pragma omp parallel
  302. { // Compute pole
  303. Integer tid=omp_get_thread_num();
  304. Integer omp_p=omp_get_num_threads();
  305. Long a=(tid+0)*N/omp_p;
  306. Long b=(tid+1)*N/omp_p;
  307. for(Long i=a;i<b;i++){
  308. Real P_[2]={0,0};
  309. for(Long j=0;j<p0+1;j++){
  310. P_[0]+=S[i*p0*(p0+2)+j]*QP[0][j];
  311. P_[1]+=S[i*p0*(p0+2)+j]*QP[1][j];
  312. }
  313. P[2*i+0]=P_[0];
  314. P[2*i+1]=P_[1];
  315. }
  316. }
  317. }
  318. template <class Real> void SphericalHarmonics<Real>::RotateAll(const Vector<Real>& S, Long p0, Long dof, Vector<Real>& S_){
  319. std::vector<Matrix<Real>>& Mr=MatRotate(p0);
  320. std::vector<std::vector<Long>> coeff_perm(p0+1);
  321. { // Set coeff_perm
  322. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  323. Long itr=0;
  324. for(Long i=0;i<2*p0;i++){
  325. Long m=(i+1)/2;
  326. for(Long n=m;n<=p0;n++){
  327. coeff_perm[n][i]=itr;
  328. itr++;
  329. }
  330. }
  331. }
  332. Long Ncoef=p0*(p0+2);
  333. Long N=S.Dim()/Ncoef/dof;
  334. assert(N*Ncoef*dof==S.Dim());
  335. if(S_.Dim()!=N*dof*Ncoef*p0*(p0+1)) S_.ReInit(N*dof*Ncoef*p0*(p0+1));
  336. Matrix<Real> S0(N*dof ,Ncoef, (Iterator<Real>)S.begin(), false);
  337. Matrix<Real> S1(N*dof*p0*(p0+1),Ncoef, S_.begin() , false);
  338. #pragma omp parallel
  339. { // Construct all p0*(p0+1) rotations
  340. Integer tid=omp_get_thread_num();
  341. Integer omp_p=omp_get_num_threads();
  342. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  343. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  344. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  345. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  346. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  347. }
  348. Long a=(tid+0)*N/omp_p;
  349. Long b=(tid+1)*N/omp_p;
  350. for(Long i=a;i<b;i++){
  351. for(Long d=0;d<dof;d++){
  352. for(Long j=0;j<p0;j++){
  353. Long offset=0;
  354. for(Long k=0;k<p0+1;k++){
  355. Real r[2]={cos(k*j*M_PI/p0),-sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  356. Long len=p0+1-k;
  357. if(k!=0 && k!=p0){
  358. for(Long l=0;l<len;l++){
  359. Real x[2];
  360. x[0]=S0[i*dof+d][offset+len*0+l];
  361. x[1]=S0[i*dof+d][offset+len*1+l];
  362. B0[j*dof+d][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  363. B0[j*dof+d][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  364. }
  365. offset+=2*len;
  366. }else{
  367. for(Long l=0;l<len;l++){
  368. B0[j*dof+d][offset+l]=S0[i*dof+d][offset+l];
  369. }
  370. offset+=len;
  371. }
  372. }
  373. assert(offset==Ncoef);
  374. }
  375. }
  376. { // Fast rotation
  377. for(Long k=0;k<dof*p0;k++){ // forward permutation
  378. for(Long l=0;l<=p0;l++){
  379. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  380. Bi[l][k][j]=B0[k][coeff_perm[l][j]];
  381. }
  382. }
  383. }
  384. for(Long t=0;t<=p0;t++){
  385. for(Long l=0;l<=p0;l++){ // mat-vec
  386. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  387. }
  388. Matrix<Real> Mout(dof*p0,Ncoef, S1[(i*(p0+1)+t)*dof*p0], false);
  389. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  390. for(Long l=0;l<=p0;l++){
  391. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  392. Mout[k][coeff_perm[l][j]]=Bo[l][k][j];
  393. }
  394. }
  395. }
  396. }
  397. }
  398. }
  399. }
  400. }
  401. template <class Real> void SphericalHarmonics<Real>::RotateTranspose(const Vector<Real>& S_, Long p0, Long dof, Vector<Real>& S){
  402. std::vector<Matrix<Real>> Mr=MatRotate(p0);
  403. for(Long i=0;i<(Long)Mr.size();i++) Mr[i]=Mr[i].Transpose();
  404. std::vector<std::vector<Long>> coeff_perm(p0+1);
  405. { // Set coeff_perm
  406. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  407. Long itr=0;
  408. for(Long i=0;i<2*p0;i++){
  409. Long m=(i+1)/2;
  410. for(Long n=m;n<=p0;n++){
  411. coeff_perm[n][i]=itr;
  412. itr++;
  413. }
  414. }
  415. }
  416. Long Ncoef=p0*(p0+2);
  417. Long N=S_.Dim()/Ncoef/dof/(p0*(p0+1));
  418. assert(N*Ncoef*dof*(p0*(p0+1))==S_.Dim());
  419. if(S.Dim()!=N*dof*Ncoef*p0*(p0+1)) S.ReInit(N*dof*Ncoef*p0*(p0+1));
  420. Matrix<Real> S0(N*dof*p0*(p0+1),Ncoef, S.begin() , false);
  421. Matrix<Real> S1(N*dof*p0*(p0+1),Ncoef, (Iterator<Real>)S_.begin(), false);
  422. #pragma omp parallel
  423. { // Transpose all p0*(p0+1) rotations
  424. Integer tid=omp_get_thread_num();
  425. Integer omp_p=omp_get_num_threads();
  426. Matrix<Real> B0(dof*p0,Ncoef); // memory buffer
  427. std::vector<Matrix<Real>> Bi(p0+1), Bo(p0+1); // memory buffers
  428. for(Long i=0;i<=p0;i++){ // initialize Bi, Bo
  429. Bi[i].ReInit(dof*p0,coeff_perm[i].size());
  430. Bo[i].ReInit(dof*p0,coeff_perm[i].size());
  431. }
  432. Long a=(tid+0)*N/omp_p;
  433. Long b=(tid+1)*N/omp_p;
  434. for(Long i=a;i<b;i++){
  435. for(Long t=0;t<p0+1;t++){
  436. Long idx0=(i*(p0+1)+t)*p0*dof;
  437. { // Fast rotation
  438. Matrix<Real> Min(p0*dof,Ncoef, S1[idx0], false);
  439. for(Long k=0;k<dof*p0;k++){ // forward permutation
  440. for(Long l=0;l<=p0;l++){
  441. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  442. Bi[l][k][j]=Min[k][coeff_perm[l][j]];
  443. }
  444. }
  445. }
  446. for(Long l=0;l<=p0;l++){ // mat-vec
  447. Matrix<Real>::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]);
  448. }
  449. for(Long k=0;k<dof*p0;k++){ // reverse permutation
  450. for(Long l=0;l<=p0;l++){
  451. for(Long j=0;j<(Long)coeff_perm[l].size();j++){
  452. B0[k][coeff_perm[l][j]]=Bo[l][k][j];
  453. }
  454. }
  455. }
  456. }
  457. for(Long j=0;j<p0;j++){
  458. for(Long d=0;d<dof;d++){
  459. Long idx1=idx0+j*dof+d;
  460. Long offset=0;
  461. for(Long k=0;k<p0+1;k++){
  462. Real r[2]={cos(k*j*M_PI/p0),sin(k*j*M_PI/p0)}; // exp(i*k*theta)
  463. Long len=p0+1-k;
  464. if(k!=0 && k!=p0){
  465. for(Long l=0;l<len;l++){
  466. Real x[2];
  467. x[0]=B0[j*dof+d][offset+len*0+l];
  468. x[1]=B0[j*dof+d][offset+len*1+l];
  469. S0[idx1][offset+len*0+l]=x[0]*r[0]-x[1]*r[1];
  470. S0[idx1][offset+len*1+l]=x[0]*r[1]+x[1]*r[0];
  471. }
  472. offset+=2*len;
  473. }else{
  474. for(Long l=0;l<len;l++){
  475. S0[idx1][offset+l]=B0[j*dof+d][offset+l];
  476. }
  477. offset+=len;
  478. }
  479. }
  480. assert(offset==Ncoef);
  481. }
  482. }
  483. }
  484. }
  485. }
  486. }
  487. template <class Real> Vector<Real>& SphericalHarmonics<Real>::LegendreNodes(Long p1){
  488. assert(p1<SCTL_SHMAXDEG);
  489. assert(MatrixStore().Qx_.size() == SCTL_SHMAXDEG);
  490. Vector<Real>& Qx=MatrixStore().Qx_[p1];
  491. if(!Qx.Dim()){
  492. Vector<Real> qx1(p1+1);
  493. Vector<Real> qw1(p1+1);
  494. cgqf(p1+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  495. Qx=qx1;
  496. }
  497. return Qx;
  498. }
  499. template <class Real> Vector<Real>& SphericalHarmonics<Real>::LegendreWeights(Long p1){
  500. assert(p1<SCTL_SHMAXDEG);
  501. assert(MatrixStore().Qw_.size() == SCTL_SHMAXDEG);
  502. Vector<Real>& Qw=MatrixStore().Qw_[p1];
  503. if(!Qw.Dim()){
  504. // TODO: this works only for Real = double
  505. Vector<Real> qx1(p1+1);
  506. Vector<Real> qw1(p1+1);
  507. cgqf(p1+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  508. for(Long i=0;i<qw1.Dim();i++) qw1[i]*=M_PI/p1/sqrt(1-qx1[i]*qx1[i]);
  509. Qw=qw1;
  510. }
  511. return Qw;
  512. }
  513. template <class Real> Vector<Real>& SphericalHarmonics<Real>::SingularWeights(Long p1){
  514. assert(p1<SCTL_SHMAXDEG);
  515. assert(MatrixStore().Sw_.size() == SCTL_SHMAXDEG);
  516. Vector<Real>& Sw=MatrixStore().Sw_[p1];
  517. if(!Sw.Dim()){
  518. Vector<Real> qx1(p1+1);
  519. Vector<Real> qw1(p1+1);
  520. cgqf(p1+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  521. std::vector<Real> Yf(p1+1,0);
  522. { // Set Yf
  523. Real x0=1.0;
  524. std::vector<Real> alp0((p1+1)*(p1+2)/2);
  525. LegPoly(&alp0[0], &x0, 1, p1);
  526. std::vector<Real> alp((p1+1) * (p1+1)*(p1+2)/2);
  527. LegPoly(&alp[0], &qx1[0], p1+1, p1);
  528. for(Long j=0;j<p1+1;j++){
  529. for(Long i=0;i<p1+1;i++){
  530. Yf[i]+=4*M_PI/(2*j+1) * alp0[j] * alp[j*(p1+1)+i];
  531. }
  532. }
  533. }
  534. Sw.ReInit(p1+1);
  535. for(Long i=0;i<p1+1;i++){
  536. Sw[i]=(qw1[i]*M_PI/p1)*Yf[i]/cos(acos(qx1[i])/2);
  537. }
  538. }
  539. return Sw;
  540. }
  541. template <class Real> Matrix<Real>& SphericalHarmonics<Real>::MatFourier(Long p0, Long p1){
  542. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  543. assert(MatrixStore().Mf_ .size() == SCTL_SHMAXDEG*SCTL_SHMAXDEG);
  544. Matrix<Real>& Mf =MatrixStore().Mf_ [p0*SCTL_SHMAXDEG+p1];
  545. if(!Mf.Dim(0)){
  546. const Real SQRT2PI=sqrt(2*M_PI);
  547. { // Set Mf
  548. Matrix<Real> M(2*p0,2*p1);
  549. for(Long j=0;j<2*p1;j++){
  550. M[0][j]=SQRT2PI*1.0;
  551. for(Long k=1;k<p0;k++){
  552. M[2*k-1][j]=SQRT2PI*cos(j*k*M_PI/p1);
  553. M[2*k-0][j]=SQRT2PI*sin(j*k*M_PI/p1);
  554. }
  555. M[2*p0-1][j]=SQRT2PI*cos(j*p0*M_PI/p1);
  556. }
  557. Mf=M;
  558. }
  559. }
  560. return Mf;
  561. }
  562. template <class Real> Matrix<Real>& SphericalHarmonics<Real>::MatFourierInv(Long p0, Long p1){
  563. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  564. assert(MatrixStore().Mfinv_ .size() == SCTL_SHMAXDEG*SCTL_SHMAXDEG);
  565. Matrix<Real>& Mf =MatrixStore().Mfinv_ [p0*SCTL_SHMAXDEG+p1];
  566. if(!Mf.Dim(0)){
  567. const Real INVSQRT2PI=1.0/sqrt(2*M_PI)/p0;
  568. { // Set Mf
  569. Matrix<Real> M(2*p0,2*p1);
  570. M.SetZero();
  571. if(p1>p0) p1=p0;
  572. for(Long j=0;j<2*p0;j++){
  573. M[j][0]=INVSQRT2PI*0.5;
  574. for(Long k=1;k<p1;k++){
  575. M[j][2*k-1]=INVSQRT2PI*cos(j*k*M_PI/p0);
  576. M[j][2*k-0]=INVSQRT2PI*sin(j*k*M_PI/p0);
  577. }
  578. M[j][2*p1-1]=INVSQRT2PI*cos(j*p1*M_PI/p0);
  579. }
  580. if(p1==p0) for(Long j=0;j<2*p0;j++) M[j][2*p1-1]*=0.5;
  581. Mf=M;
  582. }
  583. }
  584. return Mf;
  585. }
  586. template <class Real> Matrix<Real>& SphericalHarmonics<Real>::MatFourierGrad(Long p0, Long p1){
  587. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  588. assert(MatrixStore().Mdf_.size() == SCTL_SHMAXDEG*SCTL_SHMAXDEG);
  589. Matrix<Real>& Mdf=MatrixStore().Mdf_[p0*SCTL_SHMAXDEG+p1];
  590. if(!Mdf.Dim(0)){
  591. const Real SQRT2PI=sqrt(2*M_PI);
  592. { // Set Mdf_
  593. Matrix<Real> M(2*p0,2*p1);
  594. for(Long j=0;j<2*p1;j++){
  595. M[0][j]=SQRT2PI*0.0;
  596. for(Long k=1;k<p0;k++){
  597. M[2*k-1][j]=-SQRT2PI*k*sin(j*k*M_PI/p1);
  598. M[2*k-0][j]= SQRT2PI*k*cos(j*k*M_PI/p1);
  599. }
  600. M[2*p0-1][j]=-SQRT2PI*p0*sin(j*p0*M_PI/p1);
  601. }
  602. Mdf=M;
  603. }
  604. }
  605. return Mdf;
  606. }
  607. template <class Real> std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendre(Long p0, Long p1){
  608. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  609. assert(MatrixStore().Ml_ .size() == SCTL_SHMAXDEG*SCTL_SHMAXDEG);
  610. std::vector<Matrix<Real>>& Ml =MatrixStore().Ml_ [p0*SCTL_SHMAXDEG+p1];
  611. if(!Ml.size()){
  612. Vector<Real> qx1(p1+1);
  613. Vector<Real> qw1(p1+1);
  614. cgqf(p1+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  615. { // Set Ml
  616. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  617. LegPoly(&alp[0], &qx1[0], qx1.Dim(), p0);
  618. Ml.resize(p0+1);
  619. auto ptr = alp.begin();
  620. for(Long i=0;i<=p0;i++){
  621. Ml[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  622. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  623. }
  624. }
  625. }
  626. return Ml;
  627. }
  628. template <class Real> std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreInv(Long p0, Long p1){
  629. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  630. assert(MatrixStore().Mlinv_ .size() == SCTL_SHMAXDEG*SCTL_SHMAXDEG);
  631. std::vector<Matrix<Real>>& Ml =MatrixStore().Mlinv_ [p0*SCTL_SHMAXDEG+p1];
  632. if(!Ml.size()){
  633. Vector<Real> qx1(p0+1);
  634. Vector<Real> qw1(p0+1);
  635. cgqf(p0+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  636. { // Set Ml
  637. Vector<Real> alp(qx1.Dim()*(p1+1)*(p1+2)/2);
  638. LegPoly(&alp[0], &qx1[0], qx1.Dim(), p1);
  639. Ml.resize(p1+1);
  640. auto ptr = alp.begin();
  641. for(Long i=0;i<=p1;i++){
  642. Ml[i].ReInit(qx1.Dim(), p1+1-i);
  643. Matrix<Real> M(p1+1-i, qx1.Dim(), ptr, false);
  644. for(Long j=0;j<p1+1-i;j++){ // Transpose and weights
  645. for(Long k=0;k<qx1.Dim();k++){
  646. Ml[i][k][j]=M[j][k]*qw1[k]*2*M_PI;
  647. }
  648. }
  649. ptr+=Ml[i].Dim(0)*Ml[i].Dim(1);
  650. }
  651. }
  652. }
  653. return Ml;
  654. }
  655. template <class Real> std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatLegendreGrad(Long p0, Long p1){
  656. assert(p0<SCTL_SHMAXDEG && p1<SCTL_SHMAXDEG);
  657. assert(MatrixStore().Mdl_.size() == SCTL_SHMAXDEG*SCTL_SHMAXDEG);
  658. std::vector<Matrix<Real>>& Mdl=MatrixStore().Mdl_[p0*SCTL_SHMAXDEG+p1];
  659. if(!Mdl.size()){
  660. Vector<Real> qx1(p1+1);
  661. Vector<Real> qw1(p1+1);
  662. cgqf(p1+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]);
  663. { // Set Mdl
  664. Vector<Real> alp(qx1.Dim()*(p0+1)*(p0+2)/2);
  665. LegPolyDeriv(&alp[0], &qx1[0], qx1.Dim(), p0);
  666. Mdl.resize(p0+1);
  667. auto ptr = alp.begin();
  668. for(Long i=0;i<=p0;i++){
  669. Mdl[i].ReInit(p0+1-i, qx1.Dim(), ptr);
  670. ptr+=Mdl[i].Dim(0)*Mdl[i].Dim(1);
  671. }
  672. }
  673. }
  674. return Mdl;
  675. }
  676. template <class Real> std::vector<Matrix<Real>>& SphericalHarmonics<Real>::MatRotate(Long p0){
  677. std::vector<std::vector<Long>> coeff_perm(p0+1);
  678. { // Set coeff_perm
  679. for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0));
  680. Long itr=0;
  681. for(Long i=0;i<2*p0;i++){
  682. Long m=(i+1)/2;
  683. for(Long n=m;n<=p0;n++){
  684. coeff_perm[n][i]=itr;
  685. itr++;
  686. }
  687. }
  688. }
  689. assert(p0<SCTL_SHMAXDEG);
  690. assert(MatrixStore().Mr_.size() == SCTL_SHMAXDEG);
  691. std::vector<Matrix<Real>>& Mr=MatrixStore().Mr_[p0];
  692. if(!Mr.size()){
  693. const Real SQRT2PI=sqrt(2*M_PI);
  694. Long Ncoef=p0*(p0+2);
  695. Long Ngrid=2*p0*(p0+1);
  696. Long Naleg=(p0+1)*(p0+2)/2;
  697. Matrix<Real> Mcoord0(3,Ngrid);
  698. Vector<Real>& x=LegendreNodes(p0);
  699. for(Long i=0;i<p0+1;i++){ // Set Mcoord0
  700. for(Long j=0;j<2*p0;j++){
  701. Mcoord0[0][i*2*p0+j]=x[i];
  702. Mcoord0[1][i*2*p0+j]=sqrt(1-x[i]*x[i])*sin(M_PI*j/p0);
  703. Mcoord0[2][i*2*p0+j]=sqrt(1-x[i]*x[i])*cos(M_PI*j/p0);
  704. }
  705. }
  706. for(Long l=0;l<p0+1;l++){ // For each rotation angle
  707. Matrix<Real> Mcoord1;
  708. { // Rotate coordinates
  709. Matrix<Real> M(COORD_DIM, COORD_DIM);
  710. Real cos_=-x[l];
  711. Real sin_=-sqrt(1.0-x[l]*x[l]);
  712. M[0][0]= cos_; M[0][1]=0; M[0][2]=-sin_;
  713. M[1][0]= 0; M[1][1]=1; M[1][2]= 0;
  714. M[2][0]= sin_; M[2][1]=0; M[2][2]= cos_;
  715. Mcoord1=M*Mcoord0;
  716. }
  717. Matrix<Real> Mleg(Naleg, Ngrid);
  718. { // Set Mleg
  719. LegPoly(&Mleg[0][0], &Mcoord1[0][0], Ngrid, p0);
  720. }
  721. Vector<Real> theta(Ngrid);
  722. for(Long i=0;i<theta.Dim();i++){ // Set theta
  723. theta[i]=atan2(Mcoord1[1][i],Mcoord1[2][i]);
  724. }
  725. Matrix<Real> Mcoef2grid(Ncoef, Ngrid);
  726. { // Build Mcoef2grid
  727. Long offset0=0;
  728. Long offset1=0;
  729. for(Long i=0;i<p0+1;i++){
  730. Long len=p0+1-i;
  731. { // P * cos
  732. for(Long j=0;j<len;j++){
  733. for(Long k=0;k<Ngrid;k++){
  734. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*cos(i*theta[k]);
  735. }
  736. }
  737. offset1+=len;
  738. }
  739. if(i!=0 && i!=p0){ // P * sin
  740. for(Long j=0;j<len;j++){
  741. for(Long k=0;k<Ngrid;k++){
  742. Mcoef2grid[offset1+j][k]=SQRT2PI*Mleg[offset0+j][k]*sin(i*theta[k]);
  743. }
  744. }
  745. offset1+=len;
  746. }
  747. offset0+=len;
  748. }
  749. assert(offset0==Naleg);
  750. assert(offset1==Ncoef);
  751. }
  752. Vector<Real> Vcoef2coef(Ncoef*Ncoef);
  753. Vector<Real> Vcoef2grid(Ncoef*Ngrid, Mcoef2grid[0], false);
  754. Grid2SHC(Vcoef2grid, p0, p0, Vcoef2coef);
  755. Matrix<Real> Mcoef2coef(Ncoef, Ncoef, Vcoef2coef.begin(), false);
  756. for(Long n=0;n<=p0;n++){ // Create matrices for fast rotation
  757. Matrix<Real> M(coeff_perm[n].size(),coeff_perm[n].size());
  758. for(Long i=0;i<(Long)coeff_perm[n].size();i++){
  759. for(Long j=0;j<(Long)coeff_perm[n].size();j++){
  760. M[i][j]=Mcoef2coef[coeff_perm[n][i]][coeff_perm[n][j]];
  761. }
  762. }
  763. Mr.push_back(M);
  764. }
  765. }
  766. }
  767. return Mr;
  768. }
  769. template <class Real> void SphericalHarmonics<Real>::StokesSingularInteg(const Vector<Real>& S, Long p0, Long p1, Vector<Real>* SLMatrix, Vector<Real>* DLMatrix){
  770. Long Ngrid=2*p0*(p0+1);
  771. Long Ncoef= p0*(p0+2);
  772. Long Nves=S.Dim()/(Ngrid*COORD_DIM);
  773. if(SLMatrix) SLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  774. if(DLMatrix) DLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM));
  775. Long BLOCK_SIZE=(Long)6e9/((3*2*p1*(p1+1))*(3*2*p0*(p0+1))*2*8); // Limit memory usage to 6GB
  776. BLOCK_SIZE=std::min<Long>(BLOCK_SIZE,omp_get_max_threads());
  777. BLOCK_SIZE=std::max<Long>(BLOCK_SIZE,1);
  778. for(Long a=0;a<Nves;a+=BLOCK_SIZE){
  779. Long b=std::min(a+BLOCK_SIZE, Nves);
  780. Vector<Real> _SLMatrix, _DLMatrix, _S;
  781. if(SLMatrix) _SLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), SLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  782. if(DLMatrix) _DLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), DLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false);
  783. _S .ReInit((b-a)*(Ngrid*COORD_DIM) , (Iterator<Real>)S.begin()+a*(Ngrid*COORD_DIM), false);
  784. if(SLMatrix && DLMatrix) StokesSingularInteg_< true, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  785. else if(SLMatrix) StokesSingularInteg_< true, false>(_S, p0, p1, _SLMatrix, _DLMatrix);
  786. else if(DLMatrix) StokesSingularInteg_<false, true>(_S, p0, p1, _SLMatrix, _DLMatrix);
  787. }
  788. }
  789. template <class Real> void SphericalHarmonics<Real>::LegPoly(Real* poly_val, const Real* X, Long N, Long degree){
  790. Real* p_val=poly_val;
  791. Real fact=1.0/(Real)sqrt(4*M_PI);
  792. std::vector<Real> u(N);
  793. for(Long n=0;n<N;n++){
  794. u[n]=sqrt(1-X[n]*X[n]);
  795. if(X[n]*X[n]>1.0) u[n]=0;
  796. p_val[n]=fact;
  797. }
  798. Real* p_val_nxt=poly_val;
  799. for(Long i=1;i<=degree;i++){
  800. p_val_nxt=&p_val_nxt[N*(degree-i+2)];
  801. Real c=(i==1?sqrt(3.0/2.0):1);
  802. if(i>1)c*=sqrt((Real)(2*i+1)/(2*i));
  803. for(Long n=0;n<N;n++){
  804. p_val_nxt[n]=-p_val[n]*u[n]*c;
  805. }
  806. p_val=p_val_nxt;
  807. }
  808. p_val=poly_val;
  809. for(Long m=0;m<degree;m++){
  810. for(Long n=0;n<N;n++){
  811. Real pmm=0;
  812. Real pmmp1=p_val[n];
  813. Real pll;
  814. for(Long ll=m+1;ll<=degree;ll++){
  815. Real a=sqrt(((Real)(2*ll-1)*(2*ll+1))/((ll-m)*(ll+m)));
  816. Real b=sqrt(((Real)(2*ll+1)*(ll+m-1)*(ll-m-1))/((ll-m)*(ll+m)*(2*ll-3)));
  817. pll=X[n]*a*pmmp1-b*pmm;
  818. pmm=pmmp1;
  819. pmmp1=pll;
  820. p_val[N*(ll-m)+n]=pll;
  821. }
  822. }
  823. p_val=&p_val[N*(degree-m+1)];
  824. }
  825. }
  826. template <class Real> void SphericalHarmonics<Real>::LegPolyDeriv(Real* poly_val, const Real* X, Long N, Long degree){
  827. std::vector<Real> leg_poly((degree+1)*(degree+2)*N/2);
  828. LegPoly(&leg_poly[0], X, N, degree);
  829. for(Long m=0;m<=degree;m++){
  830. for(Long n=0;n<=degree;n++) if(m<=n){
  831. const Real* Pn =&leg_poly[0];
  832. const Real* Pn_=&leg_poly[0];
  833. if((m+0)<=(n+0)) Pn =&leg_poly[N*(((degree*2-abs(m+0)+1)*abs(m+0))/2+(n+0))];
  834. if((m+1)<=(n+0)) Pn_=&leg_poly[N*(((degree*2-abs(m+1)+1)*abs(m+1))/2+(n+0))];
  835. Real* Hn =&poly_val[N*(((degree*2-abs(m+0)+1)*abs(m+0))/2+(n+0))];
  836. Real c1=(abs(m+0)<=(n+0)?1.0:0)*m;
  837. Real c2=(abs(m+1)<=(n+0)?1.0:0)*sqrt(n+m+1)*sqrt(n>m?n-m:1);
  838. for(Long i=0;i<N;i++){
  839. Hn[i]=-(c1*X[i]*Pn[i]+c2*sqrt(1-X[i]*X[i])*Pn_[i])/sqrt(1-X[i]*X[i]);
  840. }
  841. }
  842. }
  843. }
  844. template <class Real> template <bool SLayer, bool DLayer> void SphericalHarmonics<Real>::StokesSingularInteg_(const Vector<Real>& X0, Long p0, Long p1, Vector<Real>& SL, Vector<Real>& DL){
  845. Profile::Tic("Rotate");
  846. static Vector<Real> S0, S;
  847. SphericalHarmonics<Real>::Grid2SHC(X0, p0, p0, S0);
  848. SphericalHarmonics<Real>::RotateAll(S0, p0, COORD_DIM, S);
  849. Profile::Toc();
  850. Profile::Tic("Upsample");
  851. Vector<Real> X, X_phi, X_theta, trg;
  852. SphericalHarmonics<Real>::SHC2Grid(S, p0, p1, X, &X_theta, &X_phi);
  853. SphericalHarmonics<Real>::SHC2Pole(S, p0, trg);
  854. Profile::Toc();
  855. Profile::Tic("Stokes");
  856. Vector<Real> SL0, DL0;
  857. { // Stokes kernel
  858. //Long M0=2*p0*(p0+1);
  859. Long M1=2*p1*(p1+1);
  860. Long N=trg.Dim()/(2*COORD_DIM);
  861. assert(X.Dim()==M1*COORD_DIM*N);
  862. if(SLayer && SL0.Dim()!=N*2*6*M1) SL0.ReInit(2*N*6*M1);
  863. if(DLayer && DL0.Dim()!=N*2*6*M1) DL0.ReInit(2*N*6*M1);
  864. Vector<Real>& qw=SphericalHarmonics<Real>::SingularWeights(p1);
  865. const Real scal_const_dl = 3.0/(4.0*M_PI);
  866. const Real scal_const_sl = 1.0/(8.0*M_PI);
  867. static Real eps=-1;
  868. if(eps<0){
  869. eps=1;
  870. while(eps*(Real)0.5+(Real)1.0>1.0) eps*=0.5;
  871. }
  872. #pragma omp parallel
  873. {
  874. Integer tid=omp_get_thread_num();
  875. Integer omp_p=omp_get_num_threads();
  876. Long a=(tid+0)*N/omp_p;
  877. Long b=(tid+1)*N/omp_p;
  878. for(Long i=a;i<b;i++){
  879. for(Long t=0;t<2;t++){
  880. Real tx, ty, tz;
  881. { // Read target coordinates
  882. tx=trg[i*2*COORD_DIM+0*2+t];
  883. ty=trg[i*2*COORD_DIM+1*2+t];
  884. tz=trg[i*2*COORD_DIM+2*2+t];
  885. }
  886. for(Long j0=0;j0<p1+1;j0++){
  887. for(Long j1=0;j1<2*p1;j1++){
  888. Long s=2*p1*j0+j1;
  889. Real dx, dy, dz;
  890. { // Compute dx, dy, dz
  891. dx=tx-X[(i*COORD_DIM+0)*M1+s];
  892. dy=ty-X[(i*COORD_DIM+1)*M1+s];
  893. dz=tz-X[(i*COORD_DIM+2)*M1+s];
  894. }
  895. Real nx, ny, nz;
  896. { // Compute source normal
  897. Real x_theta=X_theta[(i*COORD_DIM+0)*M1+s];
  898. Real y_theta=X_theta[(i*COORD_DIM+1)*M1+s];
  899. Real z_theta=X_theta[(i*COORD_DIM+2)*M1+s];
  900. Real x_phi=X_phi[(i*COORD_DIM+0)*M1+s];
  901. Real y_phi=X_phi[(i*COORD_DIM+1)*M1+s];
  902. Real z_phi=X_phi[(i*COORD_DIM+2)*M1+s];
  903. nx=(y_theta*z_phi-z_theta*y_phi);
  904. ny=(z_theta*x_phi-x_theta*z_phi);
  905. nz=(x_theta*y_phi-y_theta*x_phi);
  906. }
  907. Real area_elem=1.0;
  908. if(SLayer){ // Compute area_elem
  909. area_elem=sqrt(nx*nx+ny*ny+nz*nz);
  910. }
  911. Real rinv, rinv2;
  912. { // Compute rinv, rinv2
  913. Real r2=dx*dx+dy*dy+dz*dz;
  914. rinv=1.0/sqrt(r2);
  915. if(r2<=eps) rinv=0;
  916. rinv2=rinv*rinv;
  917. }
  918. if(DLayer){
  919. Real rinv5=rinv2*rinv2*rinv;
  920. Real r_dot_n_rinv5=scal_const_dl*qw[j0*t+(p1-j0)*(1-t)] * (nx*dx+ny*dy+nz*dz)*rinv5;
  921. DL0[((i*2+t)*6+0)*M1+s]=dx*dx*r_dot_n_rinv5;
  922. DL0[((i*2+t)*6+1)*M1+s]=dx*dy*r_dot_n_rinv5;
  923. DL0[((i*2+t)*6+2)*M1+s]=dx*dz*r_dot_n_rinv5;
  924. DL0[((i*2+t)*6+3)*M1+s]=dy*dy*r_dot_n_rinv5;
  925. DL0[((i*2+t)*6+4)*M1+s]=dy*dz*r_dot_n_rinv5;
  926. DL0[((i*2+t)*6+5)*M1+s]=dz*dz*r_dot_n_rinv5;
  927. }
  928. if(SLayer){
  929. Real area_rinv =scal_const_sl*qw[j0*t+(p1-j0)*(1-t)] * area_elem*rinv;
  930. Real area_rinv2=area_rinv*rinv2;
  931. SL0[((i*2+t)*6+0)*M1+s]=area_rinv+dx*dx*area_rinv2;
  932. SL0[((i*2+t)*6+1)*M1+s]= dx*dy*area_rinv2;
  933. SL0[((i*2+t)*6+2)*M1+s]= dx*dz*area_rinv2;
  934. SL0[((i*2+t)*6+3)*M1+s]=area_rinv+dy*dy*area_rinv2;
  935. SL0[((i*2+t)*6+4)*M1+s]= dy*dz*area_rinv2;
  936. SL0[((i*2+t)*6+5)*M1+s]=area_rinv+dz*dz*area_rinv2;
  937. }
  938. }
  939. }
  940. }
  941. }
  942. }
  943. Profile::Add_FLOP(20*(2*p1)*(p1+1)*2*N);
  944. if(SLayer) Profile::Add_FLOP((19+6)*(2*p1)*(p1+1)*2*N);
  945. if(DLayer) Profile::Add_FLOP( 22 *(2*p1)*(p1+1)*2*N);
  946. }
  947. Profile::Toc();
  948. Profile::Tic("UpsampleTranspose");
  949. static Vector<Real> SL1, DL1;
  950. SphericalHarmonics<Real>::SHC2GridTranspose(SL0, p1, p0, SL1);
  951. SphericalHarmonics<Real>::SHC2GridTranspose(DL0, p1, p0, DL1);
  952. Profile::Toc();
  953. Profile::Tic("RotateTranspose");
  954. static Vector<Real> SL2, DL2;
  955. SphericalHarmonics<Real>::RotateTranspose(SL1, p0, 2*6, SL2);
  956. SphericalHarmonics<Real>::RotateTranspose(DL1, p0, 2*6, DL2);
  957. Profile::Toc();
  958. Profile::Tic("Rearrange");
  959. static Vector<Real> SL3, DL3;
  960. { // Transpose
  961. Long Ncoef=p0*(p0+2);
  962. Long Ngrid=2*p0*(p0+1);
  963. { // Transpose SL2
  964. Long N=SL2.Dim()/(6*Ncoef*Ngrid);
  965. SL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  966. #pragma omp parallel
  967. {
  968. Integer tid=omp_get_thread_num();
  969. Integer omp_p=omp_get_num_threads();
  970. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  971. Long a=(tid+0)*N/omp_p;
  972. Long b=(tid+1)*N/omp_p;
  973. for(Long i=a;i<b;i++){
  974. Matrix<Real> M0(Ngrid*6, Ncoef, SL2.begin()+i*Ngrid*6*Ncoef, false);
  975. for(Long k=0;k<Ncoef;k++){ // Transpose
  976. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  977. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  978. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  979. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  980. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  981. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  982. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  983. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  984. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  985. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  986. }
  987. }
  988. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, SL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  989. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  990. for(Long j0=0;j0<COORD_DIM;j0++){
  991. for(Long j1=0;j1<p0+1;j1++){
  992. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  993. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  994. }
  995. }
  996. }
  997. }
  998. }
  999. }
  1000. { // Transpose DL2
  1001. Long N=DL2.Dim()/(6*Ncoef*Ngrid);
  1002. DL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid);
  1003. #pragma omp parallel
  1004. {
  1005. Integer tid=omp_get_thread_num();
  1006. Integer omp_p=omp_get_num_threads();
  1007. Matrix<Real> B(COORD_DIM*Ncoef,Ngrid*COORD_DIM);
  1008. Long a=(tid+0)*N/omp_p;
  1009. Long b=(tid+1)*N/omp_p;
  1010. for(Long i=a;i<b;i++){
  1011. Matrix<Real> M0(Ngrid*6, Ncoef, DL2.begin()+i*Ngrid*6*Ncoef, false);
  1012. for(Long k=0;k<Ncoef;k++){ // Transpose
  1013. for(Long j=0;j<Ngrid;j++){ // TODO: needs blocking
  1014. B[k+Ncoef*0][j*COORD_DIM+0]=M0[j*6+0][k];
  1015. B[k+Ncoef*1][j*COORD_DIM+0]=M0[j*6+1][k];
  1016. B[k+Ncoef*2][j*COORD_DIM+0]=M0[j*6+2][k];
  1017. B[k+Ncoef*0][j*COORD_DIM+1]=M0[j*6+1][k];
  1018. B[k+Ncoef*1][j*COORD_DIM+1]=M0[j*6+3][k];
  1019. B[k+Ncoef*2][j*COORD_DIM+1]=M0[j*6+4][k];
  1020. B[k+Ncoef*0][j*COORD_DIM+2]=M0[j*6+2][k];
  1021. B[k+Ncoef*1][j*COORD_DIM+2]=M0[j*6+4][k];
  1022. B[k+Ncoef*2][j*COORD_DIM+2]=M0[j*6+5][k];
  1023. }
  1024. }
  1025. Matrix<Real> M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, DL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false);
  1026. for(Long k=0;k<B.Dim(0);k++){ // Rearrange
  1027. for(Long j0=0;j0<COORD_DIM;j0++){
  1028. for(Long j1=0;j1<p0+1;j1++){
  1029. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+ j1)*2+0)*p0+j2]=B[k][((j1*p0+j2)*2+0)*COORD_DIM+j0];
  1030. for(Long j2=0;j2<p0;j2++) M1[k][((j0*(p0+1)+p0-j1)*2+1)*p0+j2]=B[k][((j1*p0+j2)*2+1)*COORD_DIM+j0];
  1031. }
  1032. }
  1033. }
  1034. }
  1035. }
  1036. }
  1037. }
  1038. Profile::Toc();
  1039. Profile::Tic("Grid2SHC");
  1040. SphericalHarmonics<Real>::Grid2SHC(SL3, p0, p0, SL);
  1041. SphericalHarmonics<Real>::Grid2SHC(DL3, p0, p0, DL);
  1042. Profile::Toc();
  1043. }
  1044. template <class Real> void SphericalHarmonics<Real>::WriteVTK(const char* fname, long p0, long p1, Real period, const Vector<Real>* S, const Vector<Real>* v_ptr, MPI_Comm comm){
  1045. typedef double VTKReal;
  1046. Vector<Real> SS;
  1047. if (S == nullptr) {
  1048. Integer p = 2;
  1049. Integer Ncoeff = p * (p + 2);
  1050. Vector<Real> SSS(COORD_DIM * Ncoeff);
  1051. SSS.SetZero();
  1052. SSS[1+0*p+0*Ncoeff] = sqrt<Real>(2.0)/sqrt<Real>(3.0);
  1053. SSS[1+1*p+1*Ncoeff] = 2/sqrt<Real>(3.0);
  1054. SSS[1+2*p+2*Ncoeff] = 2/sqrt<Real>(3.0);
  1055. SphericalHarmonics<Real>::SHC2Grid(SSS, p, p0, SS);
  1056. S = &SS;
  1057. }
  1058. Vector<Real> X, Xp, V, Vp;
  1059. { // Upsample X
  1060. const Vector<Real>& X0=*S;
  1061. Vector<Real> X1;
  1062. SphericalHarmonics<Real>::Grid2SHC(X0, p0, p0, X1);
  1063. SphericalHarmonics<Real>::SHC2Grid(X1, p0, p1, X);
  1064. SphericalHarmonics<Real>::SHC2Pole(X1, p0, Xp);
  1065. }
  1066. if(v_ptr){ // Upsample V
  1067. const Vector<Real>& X0=*v_ptr;
  1068. Vector<Real> X1;
  1069. SphericalHarmonics<Real>::Grid2SHC(X0, p0, p0, X1);
  1070. SphericalHarmonics<Real>::SHC2Grid(X1, p0, p1, V );
  1071. SphericalHarmonics<Real>::SHC2Pole(X1, p0, Vp);
  1072. }
  1073. std::vector<VTKReal> point_coord;
  1074. std::vector<VTKReal> point_value;
  1075. std::vector<int32_t> poly_connect;
  1076. std::vector<int32_t> poly_offset;
  1077. { // Set point_coord, point_value, poly_connect
  1078. Long N_ves = X.Dim()/(2*p1*(p1+1)*COORD_DIM); // Number of vesicles
  1079. assert(Xp.Dim() == N_ves*2*COORD_DIM);
  1080. for(Long k=0;k<N_ves;k++){ // Set point_coord
  1081. Real C[COORD_DIM]={0,0,0};
  1082. if(period>0){
  1083. for(Integer l=0;l<COORD_DIM;l++) C[l]=0;
  1084. for(Long i=0;i<p1+1;i++){
  1085. for(Long j=0;j<2*p1;j++){
  1086. for(Integer l=0;l<COORD_DIM;l++){
  1087. C[l]+=X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))];
  1088. }
  1089. }
  1090. }
  1091. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[0+2*(l+k*COORD_DIM)];
  1092. for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[1+2*(l+k*COORD_DIM)];
  1093. for(Integer l=0;l<COORD_DIM;l++) C[l]/=2*p1*(p1+1)+2;
  1094. for(Integer l=0;l<COORD_DIM;l++) C[l]=(round(C[l]/period))*period;
  1095. }
  1096. for(Long i=0;i<p1+1;i++){
  1097. for(Long j=0;j<2*p1;j++){
  1098. for(Integer l=0;l<COORD_DIM;l++){
  1099. point_coord.push_back(X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))]-C[l]);
  1100. }
  1101. }
  1102. }
  1103. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[0+2*(l+k*COORD_DIM)]-C[l]);
  1104. for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[1+2*(l+k*COORD_DIM)]-C[l]);
  1105. }
  1106. if(v_ptr) {
  1107. Long data__dof = V.Dim() / (2*p1*(p1+1));
  1108. for(Long k=0;k<N_ves;k++){ // Set point_value
  1109. for(Long i=0;i<p1+1;i++){
  1110. for(Long j=0;j<2*p1;j++){
  1111. for(Long l=0;l<data__dof;l++){
  1112. point_value.push_back(V[j+2*p1*(i+(p1+1)*(l+k*data__dof))]);
  1113. }
  1114. }
  1115. }
  1116. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[0+2*(l+k*data__dof)]);
  1117. for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[1+2*(l+k*data__dof)]);
  1118. }
  1119. }
  1120. for(Long k=0;k<N_ves;k++){
  1121. for(Long j=0;j<2*p1;j++){
  1122. Long i0= 0;
  1123. Long i1=p1;
  1124. Long j0=((j+0) );
  1125. Long j1=((j+1)%(2*p1));
  1126. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+0);
  1127. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  1128. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  1129. poly_offset.push_back(poly_connect.size());
  1130. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+1);
  1131. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  1132. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  1133. poly_offset.push_back(poly_connect.size());
  1134. }
  1135. for(Long i=0;i<p1;i++){
  1136. for(Long j=0;j<2*p1;j++){
  1137. Long i0=((i+0) );
  1138. Long i1=((i+1) );
  1139. Long j0=((j+0) );
  1140. Long j1=((j+1)%(2*p1));
  1141. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
  1142. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
  1143. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
  1144. poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
  1145. poly_offset.push_back(poly_connect.size());
  1146. }
  1147. }
  1148. }
  1149. }
  1150. int myrank, np;
  1151. MPI_Comm_size(comm,&np);
  1152. MPI_Comm_rank(comm,&myrank);
  1153. std::vector<VTKReal>& coord=point_coord;
  1154. std::vector<VTKReal>& value=point_value;
  1155. std::vector<int32_t>& connect=poly_connect;
  1156. std::vector<int32_t>& offset=poly_offset;
  1157. Long pt_cnt=coord.size()/COORD_DIM;
  1158. Long poly_cnt=poly_offset.size();
  1159. // Open file for writing.
  1160. std::stringstream vtufname;
  1161. vtufname<<fname<<"_"<<std::setfill('0')<<std::setw(6)<<myrank<<".vtp";
  1162. std::ofstream vtufile;
  1163. vtufile.open(vtufname.str().c_str());
  1164. if(vtufile.fail()) return;
  1165. bool isLittleEndian;
  1166. { // Set isLittleEndian
  1167. uint16_t number = 0x1;
  1168. uint8_t *numPtr = (uint8_t*)&number;
  1169. isLittleEndian=(numPtr[0] == 1);
  1170. }
  1171. // Proceed to write to file.
  1172. Long data_size=0;
  1173. vtufile<<"<?xml version=\"1.0\"?>\n";
  1174. if(isLittleEndian) vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"LittleEndian\">\n";
  1175. else vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"BigEndian\">\n";
  1176. //===========================================================================
  1177. vtufile<<" <PolyData>\n";
  1178. vtufile<<" <Piece NumberOfPoints=\""<<pt_cnt<<"\" NumberOfVerts=\"0\" NumberOfLines=\"0\" NumberOfStrips=\"0\" NumberOfPolys=\""<<poly_cnt<<"\">\n";
  1179. //---------------------------------------------------------------------------
  1180. vtufile<<" <Points>\n";
  1181. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  1182. data_size+=sizeof(uint32_t)+coord.size()*sizeof(VTKReal);
  1183. vtufile<<" </Points>\n";
  1184. //---------------------------------------------------------------------------
  1185. if(value.size()){ // value
  1186. vtufile<<" <PointData>\n";
  1187. vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  1188. data_size+=sizeof(uint32_t)+value.size()*sizeof(VTKReal);
  1189. vtufile<<" </PointData>\n";
  1190. }
  1191. //---------------------------------------------------------------------------
  1192. vtufile<<" <Polys>\n";
  1193. vtufile<<" <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  1194. data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t);
  1195. vtufile<<" <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
  1196. data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t);
  1197. vtufile<<" </Polys>\n";
  1198. //---------------------------------------------------------------------------
  1199. vtufile<<" </Piece>\n";
  1200. vtufile<<" </PolyData>\n";
  1201. //===========================================================================
  1202. vtufile<<" <AppendedData encoding=\"raw\">\n";
  1203. vtufile<<" _";
  1204. int32_t block_size;
  1205. block_size=coord.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord.size()*sizeof(VTKReal));
  1206. if(value.size()){ // value
  1207. block_size=value.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value.size()*sizeof(VTKReal));
  1208. }
  1209. block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t));
  1210. block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t));
  1211. vtufile<<"\n";
  1212. vtufile<<" </AppendedData>\n";
  1213. //===========================================================================
  1214. vtufile<<"</VTKFile>\n";
  1215. vtufile.close();
  1216. if(myrank) return;
  1217. std::stringstream pvtufname;
  1218. pvtufname<<fname<<".pvtp";
  1219. std::ofstream pvtufile;
  1220. pvtufile.open(pvtufname.str().c_str());
  1221. if(pvtufile.fail()) return;
  1222. pvtufile<<"<?xml version=\"1.0\"?>\n";
  1223. pvtufile<<"<VTKFile type=\"PPolyData\">\n";
  1224. pvtufile<<" <PPolyData GhostLevel=\"0\">\n";
  1225. pvtufile<<" <PPoints>\n";
  1226. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\"/>\n";
  1227. pvtufile<<" </PPoints>\n";
  1228. if(value.size()){ // value
  1229. pvtufile<<" <PPointData>\n";
  1230. pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\"/>\n";
  1231. pvtufile<<" </PPointData>\n";
  1232. }
  1233. {
  1234. // Extract filename from path.
  1235. std::stringstream vtupath;
  1236. vtupath<<'/'<<fname;
  1237. std::string pathname = vtupath.str();
  1238. auto found = pathname.find_last_of("/\\");
  1239. std::string fname_ = pathname.substr(found+1);
  1240. for(Integer i=0;i<np;i++) pvtufile<<" <Piece Source=\""<<fname_<<"_"<<std::setfill('0')<<std::setw(6)<<i<<".vtp\"/>\n";
  1241. }
  1242. pvtufile<<" </PPolyData>\n";
  1243. pvtufile<<"</VTKFile>\n";
  1244. pvtufile.close();
  1245. }
  1246. } // end namespace