#include SCTL_INCLUDE(legendre_rule.hpp) namespace SCTL_NAMESPACE { // Vector qx1, qw1; // //cgqf(p0+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]); // ChebBasis::quad_rule(p0+1, qx1, qw1); // sctl::ASSERT(typeid(Real) == typeid(double) || typeid(Real) == typeid(float)); // TODO: these are Legendre nodes only for float and double // for (auto x : qx1) x = 2 * x - 1; // for (auto w : qw1) w = 2 * w; template void SphericalHarmonics::SHC2Grid(const Vector& S, Long p0, Long p1, Vector& X, Vector* X_theta, Vector* X_phi){ Matrix& Mf =SphericalHarmonics::MatFourier(p0,p1); Matrix& Mdf=SphericalHarmonics::MatFourierGrad(p0,p1); std::vector>& Ml =SphericalHarmonics::MatLegendre(p0,p1); std::vector>& Mdl=SphericalHarmonics::MatLegendreGrad(p0,p1); assert(p0==(Long)Ml.size()-1); assert(p0==Mf.Dim(0)/2); assert(p1==Mf.Dim(1)/2); Long N=S.Dim()/(p0*(p0+2)); assert(N*p0*(p0+2)==S.Dim()); if(X.Dim()!=N*2*p1*(p1+1)) X.ReInit(N*2*p1*(p1+1)); if(X_phi && X_phi ->Dim()!=N*2*p1*(p1+1)) X_phi ->ReInit(N*2*p1*(p1+1)); if(X_theta && X_theta->Dim()!=N*2*p1*(p1+1)) X_theta->ReInit(N*2*p1*(p1+1)); static Vector B0, B1; B0.ReInit(N* p0*(p0+2)); B1.ReInit(N*2*p0*(p1+1)); #pragma omp parallel { // B0 <-- Rearrange(S) Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Long a=(tid+0)*N/omp_p; Long b=(tid+1)*N/omp_p; for(Long i=a;i Min (N0, p0+1-i, B0.begin()+offset0, false); Matrix Mout(N0, p1+1 , B1.begin()+offset1, false); { // Mout = Min * Ml[i] // split between threads Long a=(tid+0)*N0/omp_p; Long b=(tid+1)*N0/omp_p; if(a Min_ (b-a, Min .Dim(1), Min [a], false); Matrix Mout_(b-a, Mout.Dim(1), Mout[a], false); Matrix::GEMM(Mout_,Min_,Ml[i]); } } offset0+=Min .Dim(0)*Min .Dim(1); offset1+=Mout.Dim(0)*Mout.Dim(1); } } #pragma omp parallel { // Transpose and evaluate Fourier Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Long a=(tid+0)*N*(p1+1)/omp_p; Long b=(tid+1)*N*(p1+1)/omp_p; const Long block_size=16; Matrix B2(block_size,2*p0); for(Long i0=a;i0 Min (i1-i0,2*p0, B2.begin() , false); Matrix Mout(i1-i0,2*p1, X .begin()+i0*2*p1, false); Matrix::GEMM(Mout, Min, Mf); if(X_theta){ // Evaluate Fourier gradient Matrix Mout(i1-i0,2*p1, X_theta->begin()+i0*2*p1, false); Matrix::GEMM(Mout, Min, Mdf); } } } if(X_phi){ #pragma omp parallel { // Evaluate Legendre gradient Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Long offset0=0; Long offset1=0; for(Long i=0;i Min (N0, p0+1-i, B0.begin()+offset0, false); Matrix Mout(N0, p1+1 , B1.begin()+offset1, false); { // Mout = Min * Mdl[i] // split between threads Long a=(tid+0)*N0/omp_p; Long b=(tid+1)*N0/omp_p; if(a Min_ (b-a, Min .Dim(1), Min [a], false); Matrix Mout_(b-a, Mout.Dim(1), Mout[a], false); Matrix::GEMM(Mout_,Min_,Mdl[i]); } } offset0+=Min .Dim(0)*Min .Dim(1); offset1+=Mout.Dim(0)*Mout.Dim(1); } } #pragma omp parallel { // Transpose and evaluate Fourier Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Long a=(tid+0)*N*(p1+1)/omp_p; Long b=(tid+1)*N*(p1+1)/omp_p; const Long block_size=16; Matrix B2(block_size,2*p0); for(Long i0=a;i0 Min (i1-i0,2*p0, B2.begin() , false); Matrix Mout(i1-i0,2*p1, X_phi->begin()+i0*2*p1, false); Matrix::GEMM(Mout, Min, Mf); } } } } template void SphericalHarmonics::Grid2SHC(const Vector& X, Long p0, Long p1, Vector& S){ Matrix Mf =SphericalHarmonics::MatFourierInv(p0,p1); std::vector> Ml =SphericalHarmonics::MatLegendreInv(p0,p1); assert(p1==(Long)Ml.size()-1); assert(p0==Mf.Dim(0)/2); assert(p1==Mf.Dim(1)/2); Long N=X.Dim()/(2*p0*(p0+1)); assert(N*2*p0*(p0+1)==X.Dim()); if(S.Dim()!=N*(p1*(p1+2))) S.ReInit(N*(p1*(p1+2))); static Vector B0, B1; B0.ReInit(N* p1*(p1+2)); B1.ReInit(N*2*p1*(p0+1)); #pragma omp parallel { // Evaluate Fourier and transpose Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Long a=(tid+0)*N*(p0+1)/omp_p; Long b=(tid+1)*N*(p0+1)/omp_p; const Long block_size=16; Matrix B2(block_size,2*p1); for(Long i0=a;i0 Min (i1-i0,2*p0, (Iterator)X.begin()+i0*2*p0, false); Matrix Mout(i1-i0,2*p1, B2.begin() , false); Matrix::GEMM(Mout, Min, Mf); for(Long i=i0;i Min (N0, p0+1 , B1.begin()+offset0, false); Matrix Mout(N0, p1+1-i, B0.begin()+offset1, false); { // Mout = Min * Ml[i] // split between threads Long a=(tid+0)*N0/omp_p; Long b=(tid+1)*N0/omp_p; if(a Min_ (b-a, Min .Dim(1), Min [a], false); Matrix Mout_(b-a, Mout.Dim(1), Mout[a], false); Matrix::GEMM(Mout_,Min_,Ml[i]); } } offset0+=Min .Dim(0)*Min .Dim(1); offset1+=Mout.Dim(0)*Mout.Dim(1); } } #pragma omp parallel { // S <-- Rearrange(B0) Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Long a=(tid+0)*N/omp_p; Long b=(tid+1)*N/omp_p; for(Long i=a;i void SphericalHarmonics::SHC2GridTranspose(const Vector& X, Long p0, Long p1, Vector& S){ Matrix Mf =SphericalHarmonics::MatFourier(p1,p0).Transpose(); std::vector> Ml =SphericalHarmonics::MatLegendre(p1,p0); for(Long i=0;i<(Long)Ml.size();i++) Ml[i]=Ml[i].Transpose(); assert(p1==(Long)Ml.size()-1); assert(p0==Mf.Dim(0)/2); assert(p1==Mf.Dim(1)/2); Long N=X.Dim()/(2*p0*(p0+1)); assert(N*2*p0*(p0+1)==X.Dim()); if(S.Dim()!=N*(p1*(p1+2))) S.ReInit(N*(p1*(p1+2))); static Vector B0, B1; B0.ReInit(N* p1*(p1+2)); B1.ReInit(N*2*p1*(p0+1)); #pragma omp parallel { // Evaluate Fourier and transpose Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Long a=(tid+0)*N*(p0+1)/omp_p; Long b=(tid+1)*N*(p0+1)/omp_p; const Long block_size=16; Matrix B2(block_size,2*p1); for(Long i0=a;i0 Min (i1-i0,2*p0, (Iterator)X.begin()+i0*2*p0, false); Matrix Mout(i1-i0,2*p1, B2.begin() , false); Matrix::GEMM(Mout, Min, Mf); for(Long i=i0;i Min (N0, p0+1 , B1.begin()+offset0, false); Matrix Mout(N0, p1+1-i, B0.begin()+offset1, false); { // Mout = Min * Ml[i] // split between threads Long a=(tid+0)*N0/omp_p; Long b=(tid+1)*N0/omp_p; if(a Min_ (b-a, Min .Dim(1), Min [a], false); Matrix Mout_(b-a, Mout.Dim(1), Mout[a], false); Matrix::GEMM(Mout_,Min_,Ml[i]); } } offset0+=Min .Dim(0)*Min .Dim(1); offset1+=Mout.Dim(0)*Mout.Dim(1); } } #pragma omp parallel { // S <-- Rearrange(B0) Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Long a=(tid+0)*N/omp_p; Long b=(tid+1)*N/omp_p; for(Long i=a;i void SphericalHarmonics::SHC2Pole(const Vector& S, Long p0, Vector& P){ Vector QP[2]; { // Set QP Real x[2]={-1,1}; Vector alp((p0+1)*(p0+2)/2); const Real SQRT2PI=sqrt(2*M_PI); for(Long i=0;i<2;i++){ LegPoly(&alp[0], &x[i], 1, p0); QP[i].ReInit(p0+1, alp.begin()); for(Long j=0;j void SphericalHarmonics::RotateAll(const Vector& S, Long p0, Long dof, Vector& S_){ std::vector>& Mr=MatRotate(p0); std::vector> coeff_perm(p0+1); { // Set coeff_perm for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0)); Long itr=0; for(Long i=0;i<2*p0;i++){ Long m=(i+1)/2; for(Long n=m;n<=p0;n++){ coeff_perm[n][i]=itr; itr++; } } } Long Ncoef=p0*(p0+2); Long N=S.Dim()/Ncoef/dof; assert(N*Ncoef*dof==S.Dim()); if(S_.Dim()!=N*dof*Ncoef*p0*(p0+1)) S_.ReInit(N*dof*Ncoef*p0*(p0+1)); Matrix S0(N*dof ,Ncoef, (Iterator)S.begin(), false); Matrix S1(N*dof*p0*(p0+1),Ncoef, S_.begin() , false); #pragma omp parallel { // Construct all p0*(p0+1) rotations Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Matrix B0(dof*p0,Ncoef); // memory buffer std::vector> Bi(p0+1), Bo(p0+1); // memory buffers for(Long i=0;i<=p0;i++){ // initialize Bi, Bo Bi[i].ReInit(dof*p0,coeff_perm[i].size()); Bo[i].ReInit(dof*p0,coeff_perm[i].size()); } Long a=(tid+0)*N/omp_p; Long b=(tid+1)*N/omp_p; for(Long i=a;i::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]); } Matrix Mout(dof*p0,Ncoef, S1[(i*(p0+1)+t)*dof*p0], false); for(Long k=0;k void SphericalHarmonics::RotateTranspose(const Vector& S_, Long p0, Long dof, Vector& S){ std::vector> Mr=MatRotate(p0); for(Long i=0;i<(Long)Mr.size();i++) Mr[i]=Mr[i].Transpose(); std::vector> coeff_perm(p0+1); { // Set coeff_perm for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0)); Long itr=0; for(Long i=0;i<2*p0;i++){ Long m=(i+1)/2; for(Long n=m;n<=p0;n++){ coeff_perm[n][i]=itr; itr++; } } } Long Ncoef=p0*(p0+2); Long N=S_.Dim()/Ncoef/dof/(p0*(p0+1)); assert(N*Ncoef*dof*(p0*(p0+1))==S_.Dim()); if(S.Dim()!=N*dof*Ncoef*p0*(p0+1)) S.ReInit(N*dof*Ncoef*p0*(p0+1)); Matrix S0(N*dof*p0*(p0+1),Ncoef, S.begin() , false); Matrix S1(N*dof*p0*(p0+1),Ncoef, (Iterator)S_.begin(), false); #pragma omp parallel { // Transpose all p0*(p0+1) rotations Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Matrix B0(dof*p0,Ncoef); // memory buffer std::vector> Bi(p0+1), Bo(p0+1); // memory buffers for(Long i=0;i<=p0;i++){ // initialize Bi, Bo Bi[i].ReInit(dof*p0,coeff_perm[i].size()); Bo[i].ReInit(dof*p0,coeff_perm[i].size()); } Long a=(tid+0)*N/omp_p; Long b=(tid+1)*N/omp_p; for(Long i=a;i Min(p0*dof,Ncoef, S1[idx0], false); for(Long k=0;k::GEMM(Bo[l],Bi[l],Mr[t*(p0+1)+l]); } for(Long k=0;k Vector& SphericalHarmonics::LegendreNodes(Long p1){ assert(p1& Qx=MatrixStore().Qx_[p1]; if(!Qx.Dim()){ Vector qx1(p1+1); Vector qw1(p1+1); cgqf(p1+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]); Qx=qx1; } return Qx; } template Vector& SphericalHarmonics::LegendreWeights(Long p1){ assert(p1& Qw=MatrixStore().Qw_[p1]; if(!Qw.Dim()){ // TODO: this works only for Real = double Vector qx1(p1+1); Vector qw1(p1+1); cgqf(p1+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]); for(Long i=0;i Vector& SphericalHarmonics::SingularWeights(Long p1){ assert(p1& Sw=MatrixStore().Sw_[p1]; if(!Sw.Dim()){ Vector qx1(p1+1); Vector qw1(p1+1); cgqf(p1+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]); std::vector Yf(p1+1,0); { // Set Yf Real x0=1.0; std::vector alp0((p1+1)*(p1+2)/2); LegPoly(&alp0[0], &x0, 1, p1); std::vector alp((p1+1) * (p1+1)*(p1+2)/2); LegPoly(&alp[0], &qx1[0], p1+1, p1); for(Long j=0;j Matrix& SphericalHarmonics::MatFourier(Long p0, Long p1){ assert(p0& Mf =MatrixStore().Mf_ [p0*SCTL_SHMAXDEG+p1]; if(!Mf.Dim(0)){ const Real SQRT2PI=sqrt(2*M_PI); { // Set Mf Matrix M(2*p0,2*p1); for(Long j=0;j<2*p1;j++){ M[0][j]=SQRT2PI*1.0; for(Long k=1;k Matrix& SphericalHarmonics::MatFourierInv(Long p0, Long p1){ assert(p0& Mf =MatrixStore().Mfinv_ [p0*SCTL_SHMAXDEG+p1]; if(!Mf.Dim(0)){ const Real INVSQRT2PI=1.0/sqrt(2*M_PI)/p0; { // Set Mf Matrix M(2*p0,2*p1); M.SetZero(); if(p1>p0) p1=p0; for(Long j=0;j<2*p0;j++){ M[j][0]=INVSQRT2PI*0.5; for(Long k=1;k Matrix& SphericalHarmonics::MatFourierGrad(Long p0, Long p1){ assert(p0& Mdf=MatrixStore().Mdf_[p0*SCTL_SHMAXDEG+p1]; if(!Mdf.Dim(0)){ const Real SQRT2PI=sqrt(2*M_PI); { // Set Mdf_ Matrix M(2*p0,2*p1); for(Long j=0;j<2*p1;j++){ M[0][j]=SQRT2PI*0.0; for(Long k=1;k std::vector>& SphericalHarmonics::MatLegendre(Long p0, Long p1){ assert(p0>& Ml =MatrixStore().Ml_ [p0*SCTL_SHMAXDEG+p1]; if(!Ml.size()){ Vector qx1(p1+1); Vector qw1(p1+1); cgqf(p1+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]); { // Set Ml Vector alp(qx1.Dim()*(p0+1)*(p0+2)/2); LegPoly(&alp[0], &qx1[0], qx1.Dim(), p0); Ml.resize(p0+1); auto ptr = alp.begin(); for(Long i=0;i<=p0;i++){ Ml[i].ReInit(p0+1-i, qx1.Dim(), ptr); ptr+=Ml[i].Dim(0)*Ml[i].Dim(1); } } } return Ml; } template std::vector>& SphericalHarmonics::MatLegendreInv(Long p0, Long p1){ assert(p0>& Ml =MatrixStore().Mlinv_ [p0*SCTL_SHMAXDEG+p1]; if(!Ml.size()){ Vector qx1(p0+1); Vector qw1(p0+1); cgqf(p0+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]); { // Set Ml Vector alp(qx1.Dim()*(p1+1)*(p1+2)/2); LegPoly(&alp[0], &qx1[0], qx1.Dim(), p1); Ml.resize(p1+1); auto ptr = alp.begin(); for(Long i=0;i<=p1;i++){ Ml[i].ReInit(qx1.Dim(), p1+1-i); Matrix M(p1+1-i, qx1.Dim(), ptr, false); for(Long j=0;j std::vector>& SphericalHarmonics::MatLegendreGrad(Long p0, Long p1){ assert(p0>& Mdl=MatrixStore().Mdl_[p0*SCTL_SHMAXDEG+p1]; if(!Mdl.size()){ Vector qx1(p1+1); Vector qw1(p1+1); cgqf(p1+1, 1, 0.0, 0.0, -1.0, 1.0, &qx1[0], &qw1[0]); { // Set Mdl Vector alp(qx1.Dim()*(p0+1)*(p0+2)/2); LegPolyDeriv(&alp[0], &qx1[0], qx1.Dim(), p0); Mdl.resize(p0+1); auto ptr = alp.begin(); for(Long i=0;i<=p0;i++){ Mdl[i].ReInit(p0+1-i, qx1.Dim(), ptr); ptr+=Mdl[i].Dim(0)*Mdl[i].Dim(1); } } } return Mdl; } template std::vector>& SphericalHarmonics::MatRotate(Long p0){ std::vector> coeff_perm(p0+1); { // Set coeff_perm for(Long n=0;n<=p0;n++) coeff_perm[n].resize(std::min(2*n+1,2*p0)); Long itr=0; for(Long i=0;i<2*p0;i++){ Long m=(i+1)/2; for(Long n=m;n<=p0;n++){ coeff_perm[n][i]=itr; itr++; } } } assert(p0>& Mr=MatrixStore().Mr_[p0]; if(!Mr.size()){ const Real SQRT2PI=sqrt(2*M_PI); Long Ncoef=p0*(p0+2); Long Ngrid=2*p0*(p0+1); Long Naleg=(p0+1)*(p0+2)/2; Matrix Mcoord0(3,Ngrid); Vector& x=LegendreNodes(p0); for(Long i=0;i Mcoord1; { // Rotate coordinates Matrix M(COORD_DIM, COORD_DIM); Real cos_=-x[l]; Real sin_=-sqrt(1.0-x[l]*x[l]); M[0][0]= cos_; M[0][1]=0; M[0][2]=-sin_; M[1][0]= 0; M[1][1]=1; M[1][2]= 0; M[2][0]= sin_; M[2][1]=0; M[2][2]= cos_; Mcoord1=M*Mcoord0; } Matrix Mleg(Naleg, Ngrid); { // Set Mleg LegPoly(&Mleg[0][0], &Mcoord1[0][0], Ngrid, p0); } Vector theta(Ngrid); for(Long i=0;i Mcoef2grid(Ncoef, Ngrid); { // Build Mcoef2grid Long offset0=0; Long offset1=0; for(Long i=0;i Vcoef2coef(Ncoef*Ncoef); Vector Vcoef2grid(Ncoef*Ngrid, Mcoef2grid[0], false); Grid2SHC(Vcoef2grid, p0, p0, Vcoef2coef); Matrix Mcoef2coef(Ncoef, Ncoef, Vcoef2coef.begin(), false); for(Long n=0;n<=p0;n++){ // Create matrices for fast rotation Matrix M(coeff_perm[n].size(),coeff_perm[n].size()); for(Long i=0;i<(Long)coeff_perm[n].size();i++){ for(Long j=0;j<(Long)coeff_perm[n].size();j++){ M[i][j]=Mcoef2coef[coeff_perm[n][i]][coeff_perm[n][j]]; } } Mr.push_back(M); } } } return Mr; } template void SphericalHarmonics::StokesSingularInteg(const Vector& S, Long p0, Long p1, Vector* SLMatrix, Vector* DLMatrix){ Long Ngrid=2*p0*(p0+1); Long Ncoef= p0*(p0+2); Long Nves=S.Dim()/(Ngrid*COORD_DIM); if(SLMatrix) SLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM)); if(DLMatrix) DLMatrix->ReInit(Nves*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM)); Long BLOCK_SIZE=(Long)6e9/((3*2*p1*(p1+1))*(3*2*p0*(p0+1))*2*8); // Limit memory usage to 6GB BLOCK_SIZE=std::min(BLOCK_SIZE,omp_get_max_threads()); BLOCK_SIZE=std::max(BLOCK_SIZE,1); for(Long a=0;a _SLMatrix, _DLMatrix, _S; if(SLMatrix) _SLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), SLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false); if(DLMatrix) _DLMatrix.ReInit((b-a)*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), DLMatrix->begin()+a*(Ncoef*COORD_DIM)*(Ncoef*COORD_DIM), false); _S .ReInit((b-a)*(Ngrid*COORD_DIM) , (Iterator)S.begin()+a*(Ngrid*COORD_DIM), false); if(SLMatrix && DLMatrix) StokesSingularInteg_< true, true>(_S, p0, p1, _SLMatrix, _DLMatrix); else if(SLMatrix) StokesSingularInteg_< true, false>(_S, p0, p1, _SLMatrix, _DLMatrix); else if(DLMatrix) StokesSingularInteg_(_S, p0, p1, _SLMatrix, _DLMatrix); } } template void SphericalHarmonics::LegPoly(Real* poly_val, const Real* X, Long N, Long degree){ Real* p_val=poly_val; Real fact=1.0/(Real)sqrt(4*M_PI); std::vector u(N); for(Long n=0;n1.0) u[n]=0; p_val[n]=fact; } Real* p_val_nxt=poly_val; for(Long i=1;i<=degree;i++){ p_val_nxt=&p_val_nxt[N*(degree-i+2)]; Real c=(i==1?sqrt(3.0/2.0):1); if(i>1)c*=sqrt((Real)(2*i+1)/(2*i)); for(Long n=0;n void SphericalHarmonics::LegPolyDeriv(Real* poly_val, const Real* X, Long N, Long degree){ std::vector leg_poly((degree+1)*(degree+2)*N/2); LegPoly(&leg_poly[0], X, N, degree); for(Long m=0;m<=degree;m++){ for(Long n=0;n<=degree;n++) if(m<=n){ const Real* Pn =&leg_poly[0]; const Real* Pn_=&leg_poly[0]; if((m+0)<=(n+0)) Pn =&leg_poly[N*(((degree*2-abs(m+0)+1)*abs(m+0))/2+(n+0))]; if((m+1)<=(n+0)) Pn_=&leg_poly[N*(((degree*2-abs(m+1)+1)*abs(m+1))/2+(n+0))]; Real* Hn =&poly_val[N*(((degree*2-abs(m+0)+1)*abs(m+0))/2+(n+0))]; Real c1=(abs(m+0)<=(n+0)?1.0:0)*m; Real c2=(abs(m+1)<=(n+0)?1.0:0)*sqrt(n+m+1)*sqrt(n>m?n-m:1); for(Long i=0;i template void SphericalHarmonics::StokesSingularInteg_(const Vector& X0, Long p0, Long p1, Vector& SL, Vector& DL){ Profile::Tic("Rotate"); static Vector S0, S; SphericalHarmonics::Grid2SHC(X0, p0, p0, S0); SphericalHarmonics::RotateAll(S0, p0, COORD_DIM, S); Profile::Toc(); Profile::Tic("Upsample"); Vector X, X_phi, X_theta, trg; SphericalHarmonics::SHC2Grid(S, p0, p1, X, &X_theta, &X_phi); SphericalHarmonics::SHC2Pole(S, p0, trg); Profile::Toc(); Profile::Tic("Stokes"); Vector SL0, DL0; { // Stokes kernel //Long M0=2*p0*(p0+1); Long M1=2*p1*(p1+1); Long N=trg.Dim()/(2*COORD_DIM); assert(X.Dim()==M1*COORD_DIM*N); if(SLayer && SL0.Dim()!=N*2*6*M1) SL0.ReInit(2*N*6*M1); if(DLayer && DL0.Dim()!=N*2*6*M1) DL0.ReInit(2*N*6*M1); Vector& qw=SphericalHarmonics::SingularWeights(p1); const Real scal_const_dl = 3.0/(4.0*M_PI); const Real scal_const_sl = 1.0/(8.0*M_PI); static Real eps=-1; if(eps<0){ eps=1; while(eps*(Real)0.5+(Real)1.0>1.0) eps*=0.5; } #pragma omp parallel { Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Long a=(tid+0)*N/omp_p; Long b=(tid+1)*N/omp_p; for(Long i=a;i SL1, DL1; SphericalHarmonics::SHC2GridTranspose(SL0, p1, p0, SL1); SphericalHarmonics::SHC2GridTranspose(DL0, p1, p0, DL1); Profile::Toc(); Profile::Tic("RotateTranspose"); static Vector SL2, DL2; SphericalHarmonics::RotateTranspose(SL1, p0, 2*6, SL2); SphericalHarmonics::RotateTranspose(DL1, p0, 2*6, DL2); Profile::Toc(); Profile::Tic("Rearrange"); static Vector SL3, DL3; { // Transpose Long Ncoef=p0*(p0+2); Long Ngrid=2*p0*(p0+1); { // Transpose SL2 Long N=SL2.Dim()/(6*Ncoef*Ngrid); SL3.ReInit(N*COORD_DIM*Ncoef*COORD_DIM*Ngrid); #pragma omp parallel { Integer tid=omp_get_thread_num(); Integer omp_p=omp_get_num_threads(); Matrix B(COORD_DIM*Ncoef,Ngrid*COORD_DIM); Long a=(tid+0)*N/omp_p; Long b=(tid+1)*N/omp_p; for(Long i=a;i M0(Ngrid*6, Ncoef, SL2.begin()+i*Ngrid*6*Ncoef, false); for(Long k=0;k M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, SL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false); for(Long k=0;k B(COORD_DIM*Ncoef,Ngrid*COORD_DIM); Long a=(tid+0)*N/omp_p; Long b=(tid+1)*N/omp_p; for(Long i=a;i M0(Ngrid*6, Ncoef, DL2.begin()+i*Ngrid*6*Ncoef, false); for(Long k=0;k M1(Ncoef*COORD_DIM, COORD_DIM*Ngrid, DL3.begin()+i*COORD_DIM*Ncoef*COORD_DIM*Ngrid, false); for(Long k=0;k::Grid2SHC(SL3, p0, p0, SL); SphericalHarmonics::Grid2SHC(DL3, p0, p0, DL); Profile::Toc(); } template void SphericalHarmonics::WriteVTK(const char* fname, long p0, long p1, Real period, const Vector* S, const Vector* v_ptr, MPI_Comm comm){ typedef double VTKReal; Vector SS; if (S == nullptr) { Integer p = 2; Integer Ncoeff = p * (p + 2); Vector SSS(COORD_DIM * Ncoeff); SSS.SetZero(); SSS[1+0*p+0*Ncoeff] = sqrt(2.0)/sqrt(3.0); SSS[1+1*p+1*Ncoeff] = 2/sqrt(3.0); SSS[1+2*p+2*Ncoeff] = 2/sqrt(3.0); SphericalHarmonics::SHC2Grid(SSS, p, p0, SS); S = &SS; } Vector X, Xp, V, Vp; { // Upsample X const Vector& X0=*S; Vector X1; SphericalHarmonics::Grid2SHC(X0, p0, p0, X1); SphericalHarmonics::SHC2Grid(X1, p0, p1, X); SphericalHarmonics::SHC2Pole(X1, p0, Xp); } if(v_ptr){ // Upsample V const Vector& X0=*v_ptr; Vector X1; SphericalHarmonics::Grid2SHC(X0, p0, p0, X1); SphericalHarmonics::SHC2Grid(X1, p0, p1, V ); SphericalHarmonics::SHC2Pole(X1, p0, Vp); } std::vector point_coord; std::vector point_value; std::vector poly_connect; std::vector poly_offset; { // Set point_coord, point_value, poly_connect Long N_ves = X.Dim()/(2*p1*(p1+1)*COORD_DIM); // Number of vesicles assert(Xp.Dim() == N_ves*2*COORD_DIM); for(Long k=0;k0){ for(Integer l=0;l& coord=point_coord; std::vector& value=point_value; std::vector& connect=poly_connect; std::vector& offset=poly_offset; Long pt_cnt=coord.size()/COORD_DIM; Long poly_cnt=poly_offset.size(); // Open file for writing. std::stringstream vtufname; vtufname<\n"; if(isLittleEndian) vtufile<<"\n"; else vtufile<<"\n"; //=========================================================================== vtufile<<" \n"; vtufile<<" \n"; //--------------------------------------------------------------------------- vtufile<<" \n"; vtufile<<" \n"; data_size+=sizeof(uint32_t)+coord.size()*sizeof(VTKReal); vtufile<<" \n"; //--------------------------------------------------------------------------- if(value.size()){ // value vtufile<<" \n"; vtufile<<" \n"; data_size+=sizeof(uint32_t)+value.size()*sizeof(VTKReal); vtufile<<" \n"; } //--------------------------------------------------------------------------- vtufile<<" \n"; vtufile<<" \n"; data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t); vtufile<<" \n"; data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t); vtufile<<" \n"; //--------------------------------------------------------------------------- vtufile<<" \n"; vtufile<<" \n"; //=========================================================================== vtufile<<" \n"; vtufile<<" _"; int32_t block_size; block_size=coord.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord.size()*sizeof(VTKReal)); if(value.size()){ // value block_size=value.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value.size()*sizeof(VTKReal)); } block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t)); block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t)); vtufile<<"\n"; vtufile<<" \n"; //=========================================================================== vtufile<<"\n"; vtufile.close(); if(myrank) return; std::stringstream pvtufname; pvtufname<\n"; pvtufile<<"\n"; pvtufile<<" \n"; pvtufile<<" \n"; pvtufile<<" \n"; pvtufile<<" \n"; if(value.size()){ // value pvtufile<<" \n"; pvtufile<<" \n"; pvtufile<<" \n"; } { // Extract filename from path. std::stringstream vtupath; vtupath<<'/'<\n"; } pvtufile<<" \n"; pvtufile<<"\n"; pvtufile.close(); } } // end namespace