|
@@ -14,927 +14,929 @@ template <class Real> void SphericalHarmonics<Real>::Grid2SHC(const Vector<Real>
|
|
|
SHCArrange0(B1, p1, S, arrange);
|
|
|
}
|
|
|
|
|
|
-template <class Real> void SphericalHarmonics<Real>::Grid2SHC_(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& B1){
|
|
|
- const auto& Mf = OpFourierInv(Np);
|
|
|
- assert(Mf.Dim(0) == Np);
|
|
|
-
|
|
|
- const std::vector<Matrix<Real>>& Ml = SphericalHarmonics<Real>::MatLegendreInv(Nt-1,p1);
|
|
|
- assert((Long)Ml.size() == p1+1);
|
|
|
+template <class Real> void SphericalHarmonics<Real>::SHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_phi, Vector<Real>* X_theta){
|
|
|
+ Vector<Real> B0;
|
|
|
+ SHCArrange1(S, arrange, p0, B0);
|
|
|
+ B0 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
|
|
|
+ SHC2Grid_(B0, p0, Nt, Np, X, X_phi, X_theta);
|
|
|
+}
|
|
|
|
|
|
- Long N = X.Dim() / (Np*Nt);
|
|
|
- assert(X.Dim() == N*Np*Nt);
|
|
|
+template <class Real> void SphericalHarmonics<Real>::SHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& cos_theta_phi, Vector<Real>& X) {
|
|
|
+ Long M = (p0+1) * (p0+1);
|
|
|
|
|
|
- Vector<Real> B0((2*p1+1) * N*Nt);
|
|
|
- #pragma omp parallel
|
|
|
- { // B0 <-- Transpose(FFT(X))
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
- Long a=(tid+0)*N*Nt/omp_p;
|
|
|
- Long b=(tid+1)*N*Nt/omp_p;
|
|
|
+ Long dof;
|
|
|
+ Matrix<Real> B1;
|
|
|
+ { // Set B1, dof
|
|
|
+ Vector<Real> B0;
|
|
|
+ SHCArrange1(S, arrange, p0, B0);
|
|
|
+ dof = B0.Dim() / M;
|
|
|
+ assert(B0.Dim() == dof * M);
|
|
|
|
|
|
- Vector<Real> buff(Mf.Dim(1));
|
|
|
- Long fft_coeff_len = std::min(buff.Dim(), 2*p1+2);
|
|
|
- Matrix<Real> B0_(2*p1+1, N*Nt, B0.begin(), false);
|
|
|
- const Matrix<Real> MX(N * Nt, Np, (Iterator<Real>)X.begin(), false);
|
|
|
- for (Long i = a; i < b; i++) {
|
|
|
- { // buff <-- FFT(Xi)
|
|
|
- const Vector<Real> Xi(Np, (Iterator<Real>)X.begin() + Np * i, false);
|
|
|
- Mf.Execute(Xi, buff);
|
|
|
- }
|
|
|
- { // B0 <-- Transpose(buff)
|
|
|
- B0_[0][i] = buff[0]; // skipping buff[1] == 0
|
|
|
- for (Long j = 2; j < fft_coeff_len; j++) B0_[j-1][i] = buff[j];
|
|
|
- for (Long j = fft_coeff_len; j < 2*p1+2; j++) B0_[j-1][i] = 0;
|
|
|
- }
|
|
|
- }
|
|
|
+ B1.ReInit(dof, M);
|
|
|
+ Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
|
|
|
+ SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
|
|
|
}
|
|
|
+ assert(B1.Dim(0) == dof);
|
|
|
+ assert(B1.Dim(1) == M);
|
|
|
|
|
|
- if (B1.Dim() != N*(p1+1)*(p1+1)) B1.ReInit(N*(p1+1)*(p1+1));
|
|
|
- #pragma omp parallel
|
|
|
- { // Evaluate Legendre polynomial
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
+ Matrix<Real> SHBasis;
|
|
|
+ SHBasisEval(p0, cos_theta_phi, SHBasis);
|
|
|
+ assert(SHBasis.Dim(1) == M);
|
|
|
+ Long N = SHBasis.Dim(0);
|
|
|
|
|
|
- Long offset0=0;
|
|
|
- Long offset1=0;
|
|
|
- for (Long i = 0; i < p1+1; i++) {
|
|
|
- Long N_ = (i==0 ? N : 2*N);
|
|
|
- Matrix<Real> Min (N_, Nt , B0.begin()+offset0, false);
|
|
|
- Matrix<Real> Mout(N_, p1+1-i, B1.begin()+offset1, false);
|
|
|
- { // Mout = Min * Ml[i] // split between threads
|
|
|
- Long a=(tid+0)*N_/omp_p;
|
|
|
- Long b=(tid+1)*N_/omp_p;
|
|
|
- if (a < b) {
|
|
|
- Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
|
|
|
- Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
|
|
|
- Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
|
|
|
- }
|
|
|
+ if (X.Dim() != N*dof) X.ReInit(N * dof);
|
|
|
+ { // Set X
|
|
|
+ for (Long k0 = 0; k0 < N; k0++) {
|
|
|
+ for (Long k1 = 0; k1 < dof; k1++) {
|
|
|
+ Real X_ = 0;
|
|
|
+ for (Long i = 0; i < M; i++) X_ += B1[k1][i] * SHBasis[k0][i];
|
|
|
+ X[k0 * dof + k1] = X_;
|
|
|
}
|
|
|
- offset0+=Min .Dim(0)*Min .Dim(1);
|
|
|
- offset1+=Mout.Dim(0)*Mout.Dim(1);
|
|
|
}
|
|
|
- assert(offset0 == B0.Dim());
|
|
|
- assert(offset1 == B1.Dim());
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-template <class Real> void SphericalHarmonics<Real>::SHCArrange0(const Vector<Real>& B1, Long p1, Vector<Real>& S, SHCArrange arrange){
|
|
|
- Long M = (p1+1)*(p1+1);
|
|
|
- Long N = B1.Dim() / M;
|
|
|
- assert(B1.Dim() == N*M);
|
|
|
- if (arrange == SHCArrange::ALL) { // S <-- Rearrange(B1)
|
|
|
- Long M = 2*(p1+1)*(p1+1);
|
|
|
- if(S.Dim() != N * M) S.ReInit(N * M);
|
|
|
+template <class Real> void SphericalHarmonics<Real>::SHC2Pole(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& P){
|
|
|
+ Vector<Real> QP[2];
|
|
|
+ { // Set QP // TODO: store these weights
|
|
|
+ Vector<Real> x(1), alp;
|
|
|
+ const Real SQRT2PI = sqrt<Real>(4 * const_pi<Real>());
|
|
|
+ for (Long i = 0; i < 2; i++) {
|
|
|
+ x = (i ? -1 : 1);
|
|
|
+ LegPoly(alp, x, p0);
|
|
|
+ QP[i].ReInit(p0 + 1, alp.begin());
|
|
|
+ QP[i] *= SQRT2PI;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Long M, N;
|
|
|
+ { // Set M, N
|
|
|
+ M = 0;
|
|
|
+ if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
|
|
|
+ if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
|
|
|
+ if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
|
|
|
+ if (M == 0) return;
|
|
|
+ N = S.Dim() / M;
|
|
|
+ assert(S.Dim() == N * M);
|
|
|
+ }
|
|
|
+ if(P.Dim() != N * 2) P.ReInit(N * 2);
|
|
|
+
|
|
|
+ if (arrange == SHCArrange::ALL) {
|
|
|
#pragma omp parallel
|
|
|
- { // S <-- Rearrange(B1)
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
+ { // Compute pole
|
|
|
+ Integer tid = omp_get_thread_num();
|
|
|
+ Integer omp_p = omp_get_num_threads();
|
|
|
|
|
|
- Long a=(tid+0)*N/omp_p;
|
|
|
- Long b=(tid+1)*N/omp_p;
|
|
|
+ Long a = (tid + 0) * N / omp_p;
|
|
|
+ Long b = (tid + 1) * N / omp_p;
|
|
|
for (Long i = a; i < b; i++) {
|
|
|
- Long offset = 0;
|
|
|
- for (Long j = 0; j < p1+1; j++) {
|
|
|
- Long len = p1+1 - j;
|
|
|
- if (1) { // Set Real(S_n^m) for m=j and n=j..p
|
|
|
- ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
- Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 0;
|
|
|
- for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
|
|
|
- offset += len;
|
|
|
- }
|
|
|
- if (j) { // Set Imag(S_n^m) for m=j and n=j..p
|
|
|
- ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
- Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
|
|
|
- for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
|
|
|
- offset += len;
|
|
|
- } else {
|
|
|
- Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
|
|
|
- for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = 0;
|
|
|
- }
|
|
|
+ Real P_[2] = {0, 0};
|
|
|
+ for (Long j = 0; j < p0 + 1; j++) {
|
|
|
+ P_[0] += S[i*M + j*(p0+1)*2] * QP[0][j];
|
|
|
+ P_[1] += S[i*M + j*(p0+1)*2] * QP[1][j];
|
|
|
}
|
|
|
+ P[2*i+0] = P_[0];
|
|
|
+ P[2*i+1] = P_[1];
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- if (arrange == SHCArrange::ROW_MAJOR) { // S <-- Rearrange(B1)
|
|
|
- Long M = (p1+1)*(p1+2);
|
|
|
- if(S.Dim() != N * M) S.ReInit(N * M);
|
|
|
+ if (arrange == SHCArrange::ROW_MAJOR) {
|
|
|
#pragma omp parallel
|
|
|
- { // S <-- Rearrange(B1)
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
+ { // Compute pole
|
|
|
+ Integer tid = omp_get_thread_num();
|
|
|
+ Integer omp_p = omp_get_num_threads();
|
|
|
|
|
|
- Long a=(tid+0)*N/omp_p;
|
|
|
- Long b=(tid+1)*N/omp_p;
|
|
|
+ Long a = (tid + 0) * N / omp_p;
|
|
|
+ Long b = (tid + 1) * N / omp_p;
|
|
|
for (Long i = a; i < b; i++) {
|
|
|
- Long offset = 0;
|
|
|
- for (Long j = 0; j < p1+1; j++) {
|
|
|
- Long len = p1+1 - j;
|
|
|
- if (1) { // Set Real(S_n^m) for m=j and n=j..p
|
|
|
- ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
- Iterator<Real> S_ = S .begin() + i*M + 0;
|
|
|
- for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
|
|
|
- offset += len;
|
|
|
- }
|
|
|
- if (j) { // Set Imag(S_n^m) for m=j and n=j..p
|
|
|
- ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
- Iterator<Real> S_ = S .begin() + i*M + 1;
|
|
|
- for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
|
|
|
- offset += len;
|
|
|
- } else {
|
|
|
- Iterator<Real> S_ = S .begin() + i*M + 1;
|
|
|
- for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = 0;
|
|
|
- }
|
|
|
+ Long idx = 0;
|
|
|
+ Real P_[2] = {0, 0};
|
|
|
+ for (Long j = 0; j < p0 + 1; j++) {
|
|
|
+ P_[0] += S[i*M+idx] * QP[0][j];
|
|
|
+ P_[1] += S[i*M+idx] * QP[1][j];
|
|
|
+ idx += 2*(j+1);
|
|
|
}
|
|
|
+ P[2*i+0] = P_[0];
|
|
|
+ P[2*i+1] = P_[1];
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // S <-- Rearrange(B1)
|
|
|
- Long M = (p1+1)*(p1+1);
|
|
|
- if(S.Dim() != N * M) S.ReInit(N * M);
|
|
|
+ if (arrange == SHCArrange::COL_MAJOR_NONZERO) {
|
|
|
#pragma omp parallel
|
|
|
- { // S <-- Rearrange(B1)
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
+ { // Compute pole
|
|
|
+ Integer tid = omp_get_thread_num();
|
|
|
+ Integer omp_p = omp_get_num_threads();
|
|
|
|
|
|
- Long a=(tid+0)*N/omp_p;
|
|
|
- Long b=(tid+1)*N/omp_p;
|
|
|
+ Long a = (tid + 0) * N / omp_p;
|
|
|
+ Long b = (tid + 1) * N / omp_p;
|
|
|
for (Long i = a; i < b; i++) {
|
|
|
- Long offset = 0;
|
|
|
- for (Long j = 0; j < p1+1; j++) {
|
|
|
- Long len = p1+1 - j;
|
|
|
- if (1) { // Set Real(S_n^m) for m=j and n=j..p
|
|
|
- ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
- Iterator<Real> S_ = S .begin() + i*M + offset;
|
|
|
- for (Long k = 0; k < len; k++) S_[k] = B_[k];
|
|
|
- offset += len;
|
|
|
- }
|
|
|
- if (j) { // Set Imag(S_n^m) for m=j and n=j..p
|
|
|
- ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
- Iterator<Real> S_ = S .begin() + i*M + offset;
|
|
|
- for (Long k = 0; k < len; k++) S_[k] = B_[k];
|
|
|
- offset += len;
|
|
|
- }
|
|
|
+ Real P_[2] = {0, 0};
|
|
|
+ for (Long j = 0; j < p0 + 1; j++) {
|
|
|
+ P_[0] += S[i*M+j] * QP[0][j];
|
|
|
+ P_[1] += S[i*M+j] * QP[1][j];
|
|
|
}
|
|
|
+ P[2*i+0] = P_[0];
|
|
|
+ P[2*i+1] = P_[1];
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-template <class Real> void SphericalHarmonics<Real>::SHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_phi, Vector<Real>* X_theta){
|
|
|
- Vector<Real> B0;
|
|
|
- SHCArrange1(S, arrange, p0, B0);
|
|
|
- B0 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
|
|
|
- SHC2Grid_(B0, p0, Nt, Np, X, X_phi, X_theta);
|
|
|
-}
|
|
|
+template <class Real> void SphericalHarmonics<Real>::WriteVTK(const char* fname, const Vector<Real>* S, const Vector<Real>* v_ptr, SHCArrange arrange, Long p0, Long p1, Real period, const Comm& comm){
|
|
|
+ typedef double VTKReal;
|
|
|
|
|
|
-template <class Real> void SphericalHarmonics<Real>::SHCArrange1(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& B0){
|
|
|
- Long M, N;
|
|
|
- { // Set M, N
|
|
|
- M = 0;
|
|
|
- if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
|
|
|
- if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
|
|
|
- if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
|
|
|
- if (M == 0) return;
|
|
|
- N = S.Dim() / M;
|
|
|
- assert(S.Dim() == N * M);
|
|
|
+ Vector<Real> SS;
|
|
|
+ if (S == nullptr) {
|
|
|
+ Integer p = 2;
|
|
|
+ Integer Ncoeff = (p + 1) * (p + 1);
|
|
|
+ Vector<Real> SSS(COORD_DIM * Ncoeff), SSS_grid;
|
|
|
+ SSS.SetZero();
|
|
|
+ SSS[1+0*p+0*Ncoeff] = sqrt<Real>(2.0)/sqrt<Real>(3.0);
|
|
|
+ SSS[1+1*p+1*Ncoeff] = 1/sqrt<Real>(3.0);
|
|
|
+ SSS[1+2*p+2*Ncoeff] = 1/sqrt<Real>(3.0);
|
|
|
+ SphericalHarmonics<Real>::SHC2Grid(SSS, SHCArrange::COL_MAJOR_NONZERO, p, p+1, 2*p+2, &SSS_grid);
|
|
|
+ SphericalHarmonics<Real>::Grid2SHC(SSS_grid, p+1, 2*p+2, p0, SS, arrange);
|
|
|
+ S = &SS;
|
|
|
}
|
|
|
|
|
|
- if (B0.Dim() != N*(p0+1)*(p0+1)) B0.ReInit(N*(p0+1)*(p0+1));
|
|
|
- if (arrange == SHCArrange::ALL) { // B0 <-- Rearrange(S)
|
|
|
- #pragma omp parallel
|
|
|
- { // B0 <-- Rearrange(S)
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
+ Vector<Real> X, Xp, V, Vp;
|
|
|
+ { // Upsample X
|
|
|
+ const Vector<Real>& X0=*S;
|
|
|
+ SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &X);
|
|
|
+ SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Xp);
|
|
|
+ }
|
|
|
+ if(v_ptr){ // Upsample V
|
|
|
+ const Vector<Real>& X0=*v_ptr;
|
|
|
+ SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &V);
|
|
|
+ SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Vp);
|
|
|
+ }
|
|
|
|
|
|
- Long a=(tid+0)*N/omp_p;
|
|
|
- Long b=(tid+1)*N/omp_p;
|
|
|
- for (Long i = a; i < b; i++) {
|
|
|
- Long offset = 0;
|
|
|
- for (Long j = 0; j < p0+1; j++) {
|
|
|
- Long len = p0+1 - j;
|
|
|
- if (1) { // Get Real(S_n^m) for m=j and n=j..p
|
|
|
- Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
- ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 0;
|
|
|
- for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
|
|
|
- offset += len;
|
|
|
- }
|
|
|
- if (j) { // Get Imag(S_n^m) for m=j and n=j..p
|
|
|
- Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
- ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 1;
|
|
|
- for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
|
|
|
- offset += len;
|
|
|
+ std::vector<VTKReal> point_coord;
|
|
|
+ std::vector<VTKReal> point_value;
|
|
|
+ std::vector<int32_t> poly_connect;
|
|
|
+ std::vector<int32_t> poly_offset;
|
|
|
+ { // Set point_coord, point_value, poly_connect
|
|
|
+ Long N_ves = X.Dim()/(2*p1*(p1+1)*COORD_DIM); // Number of vesicles
|
|
|
+ assert(Xp.Dim() == N_ves*2*COORD_DIM);
|
|
|
+ for(Long k=0;k<N_ves;k++){ // Set point_coord
|
|
|
+ Real C[COORD_DIM]={0,0,0};
|
|
|
+ if(period>0){
|
|
|
+ for(Integer l=0;l<COORD_DIM;l++) C[l]=0;
|
|
|
+ for(Long i=0;i<p1+1;i++){
|
|
|
+ for(Long j=0;j<2*p1;j++){
|
|
|
+ for(Integer l=0;l<COORD_DIM;l++){
|
|
|
+ C[l]+=X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))];
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
+ for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[0+2*(l+k*COORD_DIM)];
|
|
|
+ for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[1+2*(l+k*COORD_DIM)];
|
|
|
+ for(Integer l=0;l<COORD_DIM;l++) C[l]/=2*p1*(p1+1)+2;
|
|
|
+ for(Integer l=0;l<COORD_DIM;l++) C[l]=(round(C[l]/period))*period;
|
|
|
}
|
|
|
- }
|
|
|
- }
|
|
|
- if (arrange == SHCArrange::ROW_MAJOR) { // B0 <-- Rearrange(S)
|
|
|
- #pragma omp parallel
|
|
|
- { // B0 <-- Rearrange(S)
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
|
|
|
- Long a=(tid+0)*N/omp_p;
|
|
|
- Long b=(tid+1)*N/omp_p;
|
|
|
- for (Long i = a; i < b; i++) {
|
|
|
- Long offset = 0;
|
|
|
- for (Long j = 0; j < p0+1; j++) {
|
|
|
- Long len = p0+1 - j;
|
|
|
- if (1) { // Get Real(S_n^m) for m=j and n=j..p
|
|
|
- Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
- ConstIterator<Real> S_ = S .begin() + i*M + 0;
|
|
|
- for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
|
|
|
- offset += len;
|
|
|
- }
|
|
|
- if (j) { // Get Imag(S_n^m) for m=j and n=j..p
|
|
|
- Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
- ConstIterator<Real> S_ = S .begin() + i*M + 1;
|
|
|
- for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
|
|
|
- offset += len;
|
|
|
+ for(Long i=0;i<p1+1;i++){
|
|
|
+ for(Long j=0;j<2*p1;j++){
|
|
|
+ for(Integer l=0;l<COORD_DIM;l++){
|
|
|
+ point_coord.push_back(X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))]-C[l]);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[0+2*(l+k*COORD_DIM)]-C[l]);
|
|
|
+ for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[1+2*(l+k*COORD_DIM)]-C[l]);
|
|
|
}
|
|
|
- }
|
|
|
- if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // B0 <-- Rearrange(S)
|
|
|
- #pragma omp parallel
|
|
|
- { // B0 <-- Rearrange(S)
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
|
|
|
- Long a=(tid+0)*N/omp_p;
|
|
|
- Long b=(tid+1)*N/omp_p;
|
|
|
- for (Long i = a; i < b; i++) {
|
|
|
- Long offset = 0;
|
|
|
- for (Long j = 0; j < p0+1; j++) {
|
|
|
- Long len = p0+1 - j;
|
|
|
- if (1) { // Get Real(S_n^m) for m=j and n=j..p
|
|
|
- Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
- ConstIterator<Real> S_ = S .begin() + i*M + offset;
|
|
|
- for (Long k = 0; k < len; k++) B_[k] = S_[k];
|
|
|
- offset += len;
|
|
|
- }
|
|
|
- if (j) { // Get Imag(S_n^m) for m=j and n=j..p
|
|
|
- Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
- ConstIterator<Real> S_ = S .begin() + i*M + offset;
|
|
|
- for (Long k = 0; k < len; k++) B_[k] = S_[k];
|
|
|
- offset += len;
|
|
|
+ if(v_ptr) {
|
|
|
+ Long data__dof = V.Dim() / (2*p1*(p1+1));
|
|
|
+ for(Long k=0;k<N_ves;k++){ // Set point_value
|
|
|
+ for(Long i=0;i<p1+1;i++){
|
|
|
+ for(Long j=0;j<2*p1;j++){
|
|
|
+ for(Long l=0;l<data__dof;l++){
|
|
|
+ point_value.push_back(V[j+2*p1*(i+(p1+1)*(l+k*data__dof))]);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
+ for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[0+2*(l+k*data__dof)]);
|
|
|
+ for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[1+2*(l+k*data__dof)]);
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-template <class Real> void SphericalHarmonics<Real>::SHC2Grid_(const Vector<Real>& B0, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_phi, Vector<Real>* X_theta){
|
|
|
- const auto& Mf = OpFourier(Np);
|
|
|
- assert(Mf.Dim(1) == Np);
|
|
|
-
|
|
|
- const std::vector<Matrix<Real>>& Ml =SphericalHarmonics<Real>::MatLegendre (p0,Nt-1);
|
|
|
- const std::vector<Matrix<Real>>& Mdl=SphericalHarmonics<Real>::MatLegendreGrad(p0,Nt-1);
|
|
|
- assert((Long)Ml .size() == p0+1);
|
|
|
- assert((Long)Mdl.size() == p0+1);
|
|
|
|
|
|
- Long N = B0.Dim() / ((p0+1)*(p0+1));
|
|
|
- assert(B0.Dim() == N*(p0+1)*(p0+1));
|
|
|
-
|
|
|
- if(X && X ->Dim()!=N*Np*Nt) X ->ReInit(N*Np*Nt);
|
|
|
- if(X_theta && X_theta->Dim()!=N*Np*Nt) X_theta->ReInit(N*Np*Nt);
|
|
|
- if(X_phi && X_phi ->Dim()!=N*Np*Nt) X_phi ->ReInit(N*Np*Nt);
|
|
|
+ for(Long k=0;k<N_ves;k++){
|
|
|
+ for(Long j=0;j<2*p1;j++){
|
|
|
+ Long i0= 0;
|
|
|
+ Long i1=p1;
|
|
|
+ Long j0=((j+0) );
|
|
|
+ Long j1=((j+1)%(2*p1));
|
|
|
|
|
|
- Vector<Real> B1(N*(2*p0+1)*Nt);
|
|
|
- if(X || X_phi){
|
|
|
- #pragma omp parallel
|
|
|
- { // Evaluate Legendre polynomial
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
+ poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+0);
|
|
|
+ poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
|
|
|
+ poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
|
|
|
+ poly_offset.push_back(poly_connect.size());
|
|
|
|
|
|
- Long offset0=0;
|
|
|
- Long offset1=0;
|
|
|
- for(Long i=0;i<p0+1;i++){
|
|
|
- Long N_ = (i==0 ? N : 2*N);
|
|
|
- const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
|
|
|
- Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
|
|
|
- { // Mout = Min * Ml[i] // split between threads
|
|
|
- Long a=(tid+0)*N_/omp_p;
|
|
|
- Long b=(tid+1)*N_/omp_p;
|
|
|
- if(a<b){
|
|
|
- const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
|
|
|
- Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
|
|
|
- Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
|
|
|
- }
|
|
|
+ poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+1);
|
|
|
+ poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
|
|
|
+ poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
|
|
|
+ poly_offset.push_back(poly_connect.size());
|
|
|
+ }
|
|
|
+ for(Long i=0;i<p1;i++){
|
|
|
+ for(Long j=0;j<2*p1;j++){
|
|
|
+ Long i0=((i+0) );
|
|
|
+ Long i1=((i+1) );
|
|
|
+ Long j0=((j+0) );
|
|
|
+ Long j1=((j+1)%(2*p1));
|
|
|
+ poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
|
|
|
+ poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
|
|
|
+ poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
|
|
|
+ poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
|
|
|
+ poly_offset.push_back(poly_connect.size());
|
|
|
}
|
|
|
- offset0+=Min .Dim(0)*Min .Dim(1);
|
|
|
- offset1+=Mout.Dim(0)*Mout.Dim(1);
|
|
|
}
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- #pragma omp parallel
|
|
|
- { // Transpose and evaluate Fourier
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
+ Integer np = comm.Size();
|
|
|
+ Integer myrank = comm.Rank();
|
|
|
|
|
|
- Long a=(tid+0)*N*Nt/omp_p;
|
|
|
- Long b=(tid+1)*N*Nt/omp_p;
|
|
|
+ std::vector<VTKReal>& coord=point_coord;
|
|
|
+ std::vector<VTKReal>& value=point_value;
|
|
|
+ std::vector<int32_t>& connect=poly_connect;
|
|
|
+ std::vector<int32_t>& offset=poly_offset;
|
|
|
|
|
|
- Vector<Real> buff(Mf.Dim(0)); buff = 0;
|
|
|
- Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
|
|
|
- Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
|
|
|
- for (Long i = a; i < b; i++) {
|
|
|
- { // buff <-- Transpose(B1)
|
|
|
- buff[0] = B1_[0][i];
|
|
|
- buff[1] = 0;
|
|
|
- for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
|
|
|
- for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
|
|
|
- }
|
|
|
- { // X <-- FFT(buff)
|
|
|
- Vector<Real> Xi(Np, X->begin() + Np * i, false);
|
|
|
- Mf.Execute(buff, Xi);
|
|
|
- }
|
|
|
+ Long pt_cnt=coord.size()/COORD_DIM;
|
|
|
+ Long poly_cnt=poly_offset.size();
|
|
|
|
|
|
- if(X_phi){ // Evaluate Fourier gradient
|
|
|
- { // buff <-- Transpose(B1)
|
|
|
- buff[0] = 0;
|
|
|
- buff[1] = 0;
|
|
|
- for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
|
|
|
- for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
|
|
|
- for (Long j = 1; j < buff.Dim()/2; j++) {
|
|
|
- Real x = buff[2*j+0];
|
|
|
- Real y = buff[2*j+1];
|
|
|
- buff[2*j+0] = -j*y;
|
|
|
- buff[2*j+1] = j*x;
|
|
|
- }
|
|
|
- }
|
|
|
- { // X_phi <-- FFT(buff)
|
|
|
- Vector<Real> Xi(Np, X_phi->begin() + Np * i, false);
|
|
|
- Mf.Execute(buff, Xi);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ // Open file for writing.
|
|
|
+ std::stringstream vtufname;
|
|
|
+ vtufname<<fname<<"_"<<std::setfill('0')<<std::setw(6)<<myrank<<".vtp";
|
|
|
+ std::ofstream vtufile;
|
|
|
+ vtufile.open(vtufname.str().c_str());
|
|
|
+ if(vtufile.fail()) return;
|
|
|
+
|
|
|
+ bool isLittleEndian;
|
|
|
+ { // Set isLittleEndian
|
|
|
+ uint16_t number = 0x1;
|
|
|
+ uint8_t *numPtr = (uint8_t*)&number;
|
|
|
+ isLittleEndian=(numPtr[0] == 1);
|
|
|
}
|
|
|
- if(X_theta){
|
|
|
- #pragma omp parallel
|
|
|
- { // Evaluate Legendre gradient
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
|
|
|
- Long offset0=0;
|
|
|
- Long offset1=0;
|
|
|
- for(Long i=0;i<p0+1;i++){
|
|
|
- Long N_ = (i==0 ? N : 2*N);
|
|
|
- const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
|
|
|
- Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
|
|
|
- { // Mout = Min * Mdl[i] // split between threads
|
|
|
- Long a=(tid+0)*N_/omp_p;
|
|
|
- Long b=(tid+1)*N_/omp_p;
|
|
|
- if(a<b){
|
|
|
- const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
|
|
|
- Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
|
|
|
- Matrix<Real>::GEMM(Mout_,Min_,Mdl[i]);
|
|
|
- }
|
|
|
- }
|
|
|
- offset0+=Min .Dim(0)*Min .Dim(1);
|
|
|
- offset1+=Mout.Dim(0)*Mout.Dim(1);
|
|
|
- }
|
|
|
- }
|
|
|
+ // Proceed to write to file.
|
|
|
+ Long data_size=0;
|
|
|
+ vtufile<<"<?xml version=\"1.0\"?>\n";
|
|
|
+ if(isLittleEndian) vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"LittleEndian\">\n";
|
|
|
+ else vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"BigEndian\">\n";
|
|
|
+ //===========================================================================
|
|
|
+ vtufile<<" <PolyData>\n";
|
|
|
+ vtufile<<" <Piece NumberOfPoints=\""<<pt_cnt<<"\" NumberOfVerts=\"0\" NumberOfLines=\"0\" NumberOfStrips=\"0\" NumberOfPolys=\""<<poly_cnt<<"\">\n";
|
|
|
|
|
|
- #pragma omp parallel
|
|
|
- { // Transpose and evaluate Fourier
|
|
|
- Integer tid=omp_get_thread_num();
|
|
|
- Integer omp_p=omp_get_num_threads();
|
|
|
+ //---------------------------------------------------------------------------
|
|
|
+ vtufile<<" <Points>\n";
|
|
|
+ vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
|
|
|
+ data_size+=sizeof(uint32_t)+coord.size()*sizeof(VTKReal);
|
|
|
+ vtufile<<" </Points>\n";
|
|
|
+ //---------------------------------------------------------------------------
|
|
|
+ if(value.size()){ // value
|
|
|
+ vtufile<<" <PointData>\n";
|
|
|
+ vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
|
|
|
+ data_size+=sizeof(uint32_t)+value.size()*sizeof(VTKReal);
|
|
|
+ vtufile<<" </PointData>\n";
|
|
|
+ }
|
|
|
+ //---------------------------------------------------------------------------
|
|
|
+ vtufile<<" <Polys>\n";
|
|
|
+ vtufile<<" <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
|
|
|
+ data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t);
|
|
|
+ vtufile<<" <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
|
|
|
+ data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t);
|
|
|
+ vtufile<<" </Polys>\n";
|
|
|
+ //---------------------------------------------------------------------------
|
|
|
|
|
|
- Long a=(tid+0)*N*Nt/omp_p;
|
|
|
- Long b=(tid+1)*N*Nt/omp_p;
|
|
|
+ vtufile<<" </Piece>\n";
|
|
|
+ vtufile<<" </PolyData>\n";
|
|
|
+ //===========================================================================
|
|
|
+ vtufile<<" <AppendedData encoding=\"raw\">\n";
|
|
|
+ vtufile<<" _";
|
|
|
|
|
|
- Vector<Real> buff(Mf.Dim(0)); buff = 0;
|
|
|
- Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
|
|
|
- Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
|
|
|
- for (Long i = a; i < b; i++) {
|
|
|
- { // buff <-- Transpose(B1)
|
|
|
- buff[0] = B1_[0][i];
|
|
|
- buff[1] = 0;
|
|
|
- for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
|
|
|
- for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
|
|
|
- }
|
|
|
- { // Xi <-- FFT(buff)
|
|
|
- Vector<Real> Xi(Np, X_theta->begin() + Np * i, false);
|
|
|
- Mf.Execute(buff, Xi);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ int32_t block_size;
|
|
|
+ block_size=coord.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord.size()*sizeof(VTKReal));
|
|
|
+ if(value.size()){ // value
|
|
|
+ block_size=value.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value.size()*sizeof(VTKReal));
|
|
|
}
|
|
|
-}
|
|
|
+ block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t));
|
|
|
+ block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t));
|
|
|
|
|
|
-template <class Real> void SphericalHarmonics<Real>::SHCEval(const Vector<Real>& S, SHCArrange arrange, Long p0, const Vector<Real>& cos_theta_phi, Vector<Real>& X) {
|
|
|
- Long M = (p0+1) * (p0+1);
|
|
|
+ vtufile<<"\n";
|
|
|
+ vtufile<<" </AppendedData>\n";
|
|
|
+ //===========================================================================
|
|
|
+ vtufile<<"</VTKFile>\n";
|
|
|
+ vtufile.close();
|
|
|
|
|
|
- Long dof;
|
|
|
- Matrix<Real> B1;
|
|
|
- { // Set B1, dof
|
|
|
- Vector<Real> B0;
|
|
|
- SHCArrange1(S, arrange, p0, B0);
|
|
|
- dof = B0.Dim() / M;
|
|
|
- assert(B0.Dim() == dof * M);
|
|
|
|
|
|
- B1.ReInit(dof, M);
|
|
|
- Vector<Real> B1_(B1.Dim(0) * B1.Dim(1), B1.begin(), false);
|
|
|
- SHCArrange0(B0, p0, B1_, SHCArrange::COL_MAJOR_NONZERO);
|
|
|
+ if(myrank) return;
|
|
|
+ std::stringstream pvtufname;
|
|
|
+ pvtufname<<fname<<".pvtp";
|
|
|
+ std::ofstream pvtufile;
|
|
|
+ pvtufile.open(pvtufname.str().c_str());
|
|
|
+ if(pvtufile.fail()) return;
|
|
|
+ pvtufile<<"<?xml version=\"1.0\"?>\n";
|
|
|
+ pvtufile<<"<VTKFile type=\"PPolyData\">\n";
|
|
|
+ pvtufile<<" <PPolyData GhostLevel=\"0\">\n";
|
|
|
+ pvtufile<<" <PPoints>\n";
|
|
|
+ pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\"/>\n";
|
|
|
+ pvtufile<<" </PPoints>\n";
|
|
|
+ if(value.size()){ // value
|
|
|
+ pvtufile<<" <PPointData>\n";
|
|
|
+ pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\"/>\n";
|
|
|
+ pvtufile<<" </PPointData>\n";
|
|
|
}
|
|
|
- assert(B1.Dim(0) == dof);
|
|
|
- assert(B1.Dim(1) == M);
|
|
|
+ {
|
|
|
+ // Extract filename from path.
|
|
|
+ std::stringstream vtupath;
|
|
|
+ vtupath<<'/'<<fname;
|
|
|
+ std::string pathname = vtupath.str();
|
|
|
+ auto found = pathname.find_last_of("/\\");
|
|
|
+ std::string fname_ = pathname.substr(found+1);
|
|
|
+ for(Integer i=0;i<np;i++) pvtufile<<" <Piece Source=\""<<fname_<<"_"<<std::setfill('0')<<std::setw(6)<<i<<".vtp\"/>\n";
|
|
|
+ }
|
|
|
+ pvtufile<<" </PPolyData>\n";
|
|
|
+ pvtufile<<"</VTKFile>\n";
|
|
|
+ pvtufile.close();
|
|
|
+}
|
|
|
|
|
|
- Matrix<Real> SHBasis;
|
|
|
- SHBasisEval(p0, cos_theta_phi, SHBasis);
|
|
|
- assert(SHBasis.Dim(1) == M);
|
|
|
- Long N = SHBasis.Dim(0);
|
|
|
+template <class Real> void SphericalHarmonics<Real>::Grid2VecSHC(const Vector<Real>& X, Long Nt, Long Np, Long p0, Vector<Real>& S, SHCArrange arrange) {
|
|
|
+ Long N = X.Dim() / (Np*Nt);
|
|
|
+ assert(X.Dim() == N*Np*Nt);
|
|
|
+ assert(N % COORD_DIM == 0);
|
|
|
|
|
|
- if (X.Dim() != N*dof) X.ReInit(N * dof);
|
|
|
- { // Set X
|
|
|
- for (Long k0 = 0; k0 < N; k0++) {
|
|
|
- for (Long k1 = 0; k1 < dof; k1++) {
|
|
|
- Real X_ = 0;
|
|
|
- for (Long i = 0; i < M; i++) X_ += B1[k1][i] * SHBasis[k0][i];
|
|
|
- X[k0 * dof + k1] = X_;
|
|
|
+ Vector<Real> B0(N*Nt*Np);
|
|
|
+ { // Set B0
|
|
|
+ B0 = X;
|
|
|
+ const auto& Y = LegendreNodes(Nt - 1);
|
|
|
+ assert(Y.Dim() == Nt);
|
|
|
+ for (Long k = 0; k < N; k++) {
|
|
|
+ if (k % COORD_DIM) {
|
|
|
+ for (Long i = 0; i < Nt; i++) {
|
|
|
+ Real s = 1/sqrt<Real>(1 - Y[i]*Y[i]);
|
|
|
+ for (Long j = 0; j < Np; j++) {
|
|
|
+ B0[(k*Nt+i)*Np+j] *= s;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-}
|
|
|
|
|
|
-template <class Real> void SphericalHarmonics<Real>::SHC2Pole(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& P){
|
|
|
- Vector<Real> QP[2];
|
|
|
- { // Set QP // TODO: store these weights
|
|
|
- Vector<Real> x(1), alp;
|
|
|
- const Real SQRT2PI = sqrt<Real>(4 * const_pi<Real>());
|
|
|
- for (Long i = 0; i < 2; i++) {
|
|
|
- x = (i ? -1 : 1);
|
|
|
- LegPoly(alp, x, p0);
|
|
|
- QP[i].ReInit(p0 + 1, alp.begin());
|
|
|
- QP[i] *= SQRT2PI;
|
|
|
- }
|
|
|
- }
|
|
|
+ Long p_ = p0 + 1;
|
|
|
+ Long M0 = (p0+1)*(p0+1);
|
|
|
+ Long M_ = (p_+1)*(p_+1);
|
|
|
+ Vector<Real> B1(N*M_);
|
|
|
+ Grid2SHC_(B0, Nt, Np, p_, B1);
|
|
|
+ B1 *= 1 / sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
|
|
|
|
|
|
- Long M, N;
|
|
|
- { // Set M, N
|
|
|
- M = 0;
|
|
|
- if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
|
|
|
- if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
|
|
|
- if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
|
|
|
- if (M == 0) return;
|
|
|
- N = S.Dim() / M;
|
|
|
- assert(S.Dim() == N * M);
|
|
|
- }
|
|
|
- if(P.Dim() != N * 2) P.ReInit(N * 2);
|
|
|
+ Vector<Real> B2(N*M0);
|
|
|
+ const Complex<Real> imag(0,1);
|
|
|
+ for (Long i=0; i<N; i+=COORD_DIM) {
|
|
|
+ for (Long m=0; m<=p0; m++) {
|
|
|
+ for (Long n=m; n<=p0; n++) {
|
|
|
+ auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
|
|
|
+ Complex<Real> c;
|
|
|
+ if (0<=m && m<=n && n<=p) {
|
|
|
+ Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
|
|
|
+ Long idx_imag = idx_real + (p+1-m)*N;
|
|
|
+ c.real = coeff[idx_real];
|
|
|
+ if (m) c.imag = coeff[idx_imag];
|
|
|
+ }
|
|
|
+ return c;
|
|
|
+ };
|
|
|
+ auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
|
|
|
+ if (0<=m && m<=n && n<=p) {
|
|
|
+ Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
|
|
|
+ Long idx_imag = idx_real + (p+1-m)*N;
|
|
|
+ coeff[idx_real] = c.real;
|
|
|
+ if (m) coeff[idx_imag] = c.imag;
|
|
|
+ }
|
|
|
+ };
|
|
|
|
|
|
- if (arrange == SHCArrange::ALL) {
|
|
|
- #pragma omp parallel
|
|
|
- { // Compute pole
|
|
|
- Integer tid = omp_get_thread_num();
|
|
|
- Integer omp_p = omp_get_num_threads();
|
|
|
+ auto gr = [&](Long n, Long m) { return read_coeff(B1, i+0, p_, n, m); };
|
|
|
+ auto gt = [&](Long n, Long m) { return read_coeff(B1, i+1, p_, n, m); };
|
|
|
+ auto gp = [&](Long n, Long m) { return read_coeff(B1, i+2, p_, n, m); };
|
|
|
|
|
|
- Long a = (tid + 0) * N / omp_p;
|
|
|
- Long b = (tid + 1) * N / omp_p;
|
|
|
- for (Long i = a; i < b; i++) {
|
|
|
- Real P_[2] = {0, 0};
|
|
|
- for (Long j = 0; j < p0 + 1; j++) {
|
|
|
- P_[0] += S[i*M + j*(p0+1)*2] * QP[0][j];
|
|
|
- P_[1] += S[i*M + j*(p0+1)*2] * QP[1][j];
|
|
|
+ Complex<Real> phiY, phiG, phiX;
|
|
|
+ { // (phiG, phiX) <-- (gt, gp)
|
|
|
+ auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
|
|
|
+ auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
|
|
|
+ phiY = gr(n,m);
|
|
|
+ phiG = (gt(n+1,m)*A(n,m) - gt(n-1,m)*B(n,m) - imag*m*gp(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
|
|
|
+ phiX = (gp(n+1,m)*A(n,m) - gp(n-1,m)*B(n,m) + imag*m*gt(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
|
|
|
}
|
|
|
- P[2*i+0] = P_[0];
|
|
|
- P[2*i+1] = P_[1];
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if (arrange == SHCArrange::ROW_MAJOR) {
|
|
|
- #pragma omp parallel
|
|
|
- { // Compute pole
|
|
|
- Integer tid = omp_get_thread_num();
|
|
|
- Integer omp_p = omp_get_num_threads();
|
|
|
|
|
|
- Long a = (tid + 0) * N / omp_p;
|
|
|
- Long b = (tid + 1) * N / omp_p;
|
|
|
- for (Long i = a; i < b; i++) {
|
|
|
- Long idx = 0;
|
|
|
- Real P_[2] = {0, 0};
|
|
|
- for (Long j = 0; j < p0 + 1; j++) {
|
|
|
- P_[0] += S[i*M+idx] * QP[0][j];
|
|
|
- P_[1] += S[i*M+idx] * QP[1][j];
|
|
|
- idx += 2*(j+1);
|
|
|
+ auto phiV = (phiG * (n + 0) - phiY) * (1/(Real)(2*n + 1));
|
|
|
+ auto phiW = (phiG * (n + 1) + phiY) * (1/(Real)(2*n + 1));
|
|
|
+
|
|
|
+ if (n==0) {
|
|
|
+ phiW = 0;
|
|
|
+ phiX = 0;
|
|
|
}
|
|
|
- P[2*i+0] = P_[0];
|
|
|
- P[2*i+1] = P_[1];
|
|
|
+ write_coeff(phiV, B2, i+0, p0, n, m);
|
|
|
+ write_coeff(phiW, B2, i+1, p0, n, m);
|
|
|
+ write_coeff(phiX, B2, i+2, p0, n, m);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- if (arrange == SHCArrange::COL_MAJOR_NONZERO) {
|
|
|
- #pragma omp parallel
|
|
|
- { // Compute pole
|
|
|
- Integer tid = omp_get_thread_num();
|
|
|
- Integer omp_p = omp_get_num_threads();
|
|
|
|
|
|
- Long a = (tid + 0) * N / omp_p;
|
|
|
- Long b = (tid + 1) * N / omp_p;
|
|
|
- for (Long i = a; i < b; i++) {
|
|
|
- Real P_[2] = {0, 0};
|
|
|
- for (Long j = 0; j < p0 + 1; j++) {
|
|
|
- P_[0] += S[i*M+j] * QP[0][j];
|
|
|
- P_[1] += S[i*M+j] * QP[1][j];
|
|
|
- }
|
|
|
- P[2*i+0] = P_[0];
|
|
|
- P[2*i+1] = P_[1];
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ SHCArrange0(B2, p0, S, arrange);
|
|
|
}
|
|
|
|
|
|
-template <class Real> void SphericalHarmonics<Real>::WriteVTK(const char* fname, const Vector<Real>* S, const Vector<Real>* v_ptr, SHCArrange arrange, Long p0, Long p1, Real period, const Comm& comm){
|
|
|
- typedef double VTKReal;
|
|
|
-
|
|
|
- Vector<Real> SS;
|
|
|
- if (S == nullptr) {
|
|
|
- Integer p = 2;
|
|
|
- Integer Ncoeff = (p + 1) * (p + 1);
|
|
|
- Vector<Real> SSS(COORD_DIM * Ncoeff), SSS_grid;
|
|
|
- SSS.SetZero();
|
|
|
- SSS[1+0*p+0*Ncoeff] = sqrt<Real>(2.0)/sqrt<Real>(3.0);
|
|
|
- SSS[1+1*p+1*Ncoeff] = 1/sqrt<Real>(3.0);
|
|
|
- SSS[1+2*p+2*Ncoeff] = 1/sqrt<Real>(3.0);
|
|
|
- SphericalHarmonics<Real>::SHC2Grid(SSS, SHCArrange::COL_MAJOR_NONZERO, p, p+1, 2*p+2, &SSS_grid);
|
|
|
- SphericalHarmonics<Real>::Grid2SHC(SSS_grid, p+1, 2*p+2, p0, SS, arrange);
|
|
|
- S = &SS;
|
|
|
- }
|
|
|
+template <class Real> void SphericalHarmonics<Real>::VecSHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>& X) {
|
|
|
+ Vector<Real> B0;
|
|
|
+ SHCArrange1(S, arrange, p0, B0);
|
|
|
|
|
|
- Vector<Real> X, Xp, V, Vp;
|
|
|
- { // Upsample X
|
|
|
- const Vector<Real>& X0=*S;
|
|
|
- SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &X);
|
|
|
- SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Xp);
|
|
|
- }
|
|
|
- if(v_ptr){ // Upsample V
|
|
|
- const Vector<Real>& X0=*v_ptr;
|
|
|
- SphericalHarmonics<Real>::SHC2Grid(X0, arrange, p0, p1+1, 2*p1, &V);
|
|
|
- SphericalHarmonics<Real>::SHC2Pole(X0, arrange, p0, Vp);
|
|
|
- }
|
|
|
+ Long p_ = p0 + 1;
|
|
|
+ Long M0 = (p0+1)*(p0+1);
|
|
|
+ Long M_ = (p_+1)*(p_+1);
|
|
|
+ Long N = B0.Dim() / M0;
|
|
|
+ assert(B0.Dim() == N*M0);
|
|
|
+ assert(N % COORD_DIM == 0);
|
|
|
|
|
|
- std::vector<VTKReal> point_coord;
|
|
|
- std::vector<VTKReal> point_value;
|
|
|
- std::vector<int32_t> poly_connect;
|
|
|
- std::vector<int32_t> poly_offset;
|
|
|
- { // Set point_coord, point_value, poly_connect
|
|
|
- Long N_ves = X.Dim()/(2*p1*(p1+1)*COORD_DIM); // Number of vesicles
|
|
|
- assert(Xp.Dim() == N_ves*2*COORD_DIM);
|
|
|
- for(Long k=0;k<N_ves;k++){ // Set point_coord
|
|
|
- Real C[COORD_DIM]={0,0,0};
|
|
|
- if(period>0){
|
|
|
- for(Integer l=0;l<COORD_DIM;l++) C[l]=0;
|
|
|
- for(Long i=0;i<p1+1;i++){
|
|
|
- for(Long j=0;j<2*p1;j++){
|
|
|
- for(Integer l=0;l<COORD_DIM;l++){
|
|
|
- C[l]+=X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))];
|
|
|
- }
|
|
|
+ Vector<Real> B1(N*M_);
|
|
|
+ const Complex<Real> imag(0,1);
|
|
|
+ for (Long i=0; i<N; i+=COORD_DIM) {
|
|
|
+ for (Long m=0; m<=p_; m++) {
|
|
|
+ for (Long n=m; n<=p_; n++) {
|
|
|
+ auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
|
|
|
+ Complex<Real> c;
|
|
|
+ if (0<=m && m<=n && n<=p) {
|
|
|
+ Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
|
|
|
+ Long idx_imag = idx_real + (p+1-m)*N;
|
|
|
+ c.real = coeff[idx_real];
|
|
|
+ if (m) c.imag = coeff[idx_imag];
|
|
|
}
|
|
|
- }
|
|
|
- for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[0+2*(l+k*COORD_DIM)];
|
|
|
- for(Integer l=0;l<COORD_DIM;l++) C[l]+=Xp[1+2*(l+k*COORD_DIM)];
|
|
|
- for(Integer l=0;l<COORD_DIM;l++) C[l]/=2*p1*(p1+1)+2;
|
|
|
- for(Integer l=0;l<COORD_DIM;l++) C[l]=(round(C[l]/period))*period;
|
|
|
- }
|
|
|
-
|
|
|
- for(Long i=0;i<p1+1;i++){
|
|
|
- for(Long j=0;j<2*p1;j++){
|
|
|
- for(Integer l=0;l<COORD_DIM;l++){
|
|
|
- point_coord.push_back(X[j+2*p1*(i+(p1+1)*(l+k*COORD_DIM))]-C[l]);
|
|
|
+ return c;
|
|
|
+ };
|
|
|
+ auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
|
|
|
+ if (0<=m && m<=n && n<=p) {
|
|
|
+ Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
|
|
|
+ Long idx_imag = idx_real + (p+1-m)*N;
|
|
|
+ coeff[idx_real] = c.real;
|
|
|
+ if (m) coeff[idx_imag] = c.imag;
|
|
|
}
|
|
|
+ };
|
|
|
+
|
|
|
+ auto phiG = [&](Long n, Long m) {
|
|
|
+ auto phiV = read_coeff(B0, i+0, p0, n, m);
|
|
|
+ auto phiW = read_coeff(B0, i+1, p0, n, m);
|
|
|
+ return phiV + phiW;
|
|
|
+ };
|
|
|
+ auto phiY = [&](Long n, Long m) {
|
|
|
+ auto phiV = read_coeff(B0, i+0, p0, n, m);
|
|
|
+ auto phiW = read_coeff(B0, i+1, p0, n, m);
|
|
|
+ return -phiV * (n + 1) + phiW * n;
|
|
|
+ };
|
|
|
+ auto phiX = [&](Long n, Long m) {
|
|
|
+ return read_coeff(B0, i+2, p0, n, m);
|
|
|
+ };
|
|
|
+
|
|
|
+ Complex<Real> gr, gt, gp;
|
|
|
+ { // (gt, gp) <-- (phiG, phiX)
|
|
|
+ auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
|
|
|
+ auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
|
|
|
+ gr = phiY(n,m);
|
|
|
+ gt = phiG(n-1,m)*A(n-1,m) - phiG(n+1,m)*B(n+1,m) - imag*m*phiX(n,m);
|
|
|
+ gp = phiX(n-1,m)*A(n-1,m) - phiX(n+1,m)*B(n+1,m) + imag*m*phiG(n,m);
|
|
|
}
|
|
|
+
|
|
|
+ write_coeff(gr, B1, i+0, p_, n, m);
|
|
|
+ write_coeff(gt, B1, i+1, p_, n, m);
|
|
|
+ write_coeff(gp, B1, i+2, p_, n, m);
|
|
|
}
|
|
|
- for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[0+2*(l+k*COORD_DIM)]-C[l]);
|
|
|
- for(Integer l=0;l<COORD_DIM;l++) point_coord.push_back(Xp[1+2*(l+k*COORD_DIM)]-C[l]);
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- if(v_ptr) {
|
|
|
- Long data__dof = V.Dim() / (2*p1*(p1+1));
|
|
|
- for(Long k=0;k<N_ves;k++){ // Set point_value
|
|
|
- for(Long i=0;i<p1+1;i++){
|
|
|
- for(Long j=0;j<2*p1;j++){
|
|
|
- for(Long l=0;l<data__dof;l++){
|
|
|
- point_value.push_back(V[j+2*p1*(i+(p1+1)*(l+k*data__dof))]);
|
|
|
- }
|
|
|
+ B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
|
|
|
+ SHC2Grid_(B1, p_, Nt, Np, &X);
|
|
|
+
|
|
|
+ { // Set X
|
|
|
+ const auto& Y = LegendreNodes(Nt - 1);
|
|
|
+ assert(Y.Dim() == Nt);
|
|
|
+ for (Long k = 0; k < N; k++) {
|
|
|
+ if (k % COORD_DIM) {
|
|
|
+ for (Long i = 0; i < Nt; i++) {
|
|
|
+ Real s = 1/sqrt<Real>(1 - Y[i]*Y[i]);
|
|
|
+ for (Long j = 0; j < Np; j++) {
|
|
|
+ X[(k*Nt+i)*Np+j] *= s;
|
|
|
}
|
|
|
}
|
|
|
- for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[0+2*(l+k*data__dof)]);
|
|
|
- for(Long l=0;l<data__dof;l++) point_value.push_back(Vp[1+2*(l+k*data__dof)]);
|
|
|
}
|
|
|
}
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
- for(Long k=0;k<N_ves;k++){
|
|
|
- for(Long j=0;j<2*p1;j++){
|
|
|
- Long i0= 0;
|
|
|
- Long i1=p1;
|
|
|
- Long j0=((j+0) );
|
|
|
- Long j1=((j+1)%(2*p1));
|
|
|
|
|
|
- poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+0);
|
|
|
- poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
|
|
|
- poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
|
|
|
- poly_offset.push_back(poly_connect.size());
|
|
|
|
|
|
- poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*(p1+1)+1);
|
|
|
- poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
|
|
|
- poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
|
|
|
- poly_offset.push_back(poly_connect.size());
|
|
|
- }
|
|
|
- for(Long i=0;i<p1;i++){
|
|
|
- for(Long j=0;j<2*p1;j++){
|
|
|
- Long i0=((i+0) );
|
|
|
- Long i1=((i+1) );
|
|
|
- Long j0=((j+0) );
|
|
|
- Long j1=((j+1)%(2*p1));
|
|
|
- poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j0);
|
|
|
- poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j0);
|
|
|
- poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i1+j1);
|
|
|
- poly_connect.push_back((2*p1*(p1+1)+2)*k + 2*p1*i0+j1);
|
|
|
- poly_offset.push_back(poly_connect.size());
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
- Integer np = comm.Size();
|
|
|
- Integer myrank = comm.Rank();
|
|
|
|
|
|
- std::vector<VTKReal>& coord=point_coord;
|
|
|
- std::vector<VTKReal>& value=point_value;
|
|
|
- std::vector<int32_t>& connect=poly_connect;
|
|
|
- std::vector<int32_t>& offset=poly_offset;
|
|
|
|
|
|
- Long pt_cnt=coord.size()/COORD_DIM;
|
|
|
- Long poly_cnt=poly_offset.size();
|
|
|
+template <class Real> void SphericalHarmonics<Real>::Grid2SHC_(const Vector<Real>& X, Long Nt, Long Np, Long p1, Vector<Real>& B1){
|
|
|
+ const auto& Mf = OpFourierInv(Np);
|
|
|
+ assert(Mf.Dim(0) == Np);
|
|
|
|
|
|
- // Open file for writing.
|
|
|
- std::stringstream vtufname;
|
|
|
- vtufname<<fname<<"_"<<std::setfill('0')<<std::setw(6)<<myrank<<".vtp";
|
|
|
- std::ofstream vtufile;
|
|
|
- vtufile.open(vtufname.str().c_str());
|
|
|
- if(vtufile.fail()) return;
|
|
|
+ const std::vector<Matrix<Real>>& Ml = SphericalHarmonics<Real>::MatLegendreInv(Nt-1,p1);
|
|
|
+ assert((Long)Ml.size() == p1+1);
|
|
|
|
|
|
- bool isLittleEndian;
|
|
|
- { // Set isLittleEndian
|
|
|
- uint16_t number = 0x1;
|
|
|
- uint8_t *numPtr = (uint8_t*)&number;
|
|
|
- isLittleEndian=(numPtr[0] == 1);
|
|
|
- }
|
|
|
+ Long N = X.Dim() / (Np*Nt);
|
|
|
+ assert(X.Dim() == N*Np*Nt);
|
|
|
|
|
|
- // Proceed to write to file.
|
|
|
- Long data_size=0;
|
|
|
- vtufile<<"<?xml version=\"1.0\"?>\n";
|
|
|
- if(isLittleEndian) vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"LittleEndian\">\n";
|
|
|
- else vtufile<<"<VTKFile type=\"PolyData\" version=\"0.1\" byte_order=\"BigEndian\">\n";
|
|
|
- //===========================================================================
|
|
|
- vtufile<<" <PolyData>\n";
|
|
|
- vtufile<<" <Piece NumberOfPoints=\""<<pt_cnt<<"\" NumberOfVerts=\"0\" NumberOfLines=\"0\" NumberOfStrips=\"0\" NumberOfPolys=\""<<poly_cnt<<"\">\n";
|
|
|
+ Vector<Real> B0((2*p1+1) * N*Nt);
|
|
|
+ #pragma omp parallel
|
|
|
+ { // B0 <-- Transpose(FFT(X))
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
+ Long a=(tid+0)*N*Nt/omp_p;
|
|
|
+ Long b=(tid+1)*N*Nt/omp_p;
|
|
|
|
|
|
- //---------------------------------------------------------------------------
|
|
|
- vtufile<<" <Points>\n";
|
|
|
- vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
|
|
|
- data_size+=sizeof(uint32_t)+coord.size()*sizeof(VTKReal);
|
|
|
- vtufile<<" </Points>\n";
|
|
|
- //---------------------------------------------------------------------------
|
|
|
- if(value.size()){ // value
|
|
|
- vtufile<<" <PointData>\n";
|
|
|
- vtufile<<" <DataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
|
|
|
- data_size+=sizeof(uint32_t)+value.size()*sizeof(VTKReal);
|
|
|
- vtufile<<" </PointData>\n";
|
|
|
+ Vector<Real> buff(Mf.Dim(1));
|
|
|
+ Long fft_coeff_len = std::min(buff.Dim(), 2*p1+2);
|
|
|
+ Matrix<Real> B0_(2*p1+1, N*Nt, B0.begin(), false);
|
|
|
+ const Matrix<Real> MX(N * Nt, Np, (Iterator<Real>)X.begin(), false);
|
|
|
+ for (Long i = a; i < b; i++) {
|
|
|
+ { // buff <-- FFT(Xi)
|
|
|
+ const Vector<Real> Xi(Np, (Iterator<Real>)X.begin() + Np * i, false);
|
|
|
+ Mf.Execute(Xi, buff);
|
|
|
+ }
|
|
|
+ { // B0 <-- Transpose(buff)
|
|
|
+ B0_[0][i] = buff[0]; // skipping buff[1] == 0
|
|
|
+ for (Long j = 2; j < fft_coeff_len; j++) B0_[j-1][i] = buff[j];
|
|
|
+ for (Long j = fft_coeff_len; j < 2*p1+2; j++) B0_[j-1][i] = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
- //---------------------------------------------------------------------------
|
|
|
- vtufile<<" <Polys>\n";
|
|
|
- vtufile<<" <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
|
|
|
- data_size+=sizeof(uint32_t)+connect.size()*sizeof(int32_t);
|
|
|
- vtufile<<" <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\""<<data_size<<"\" />\n";
|
|
|
- data_size+=sizeof(uint32_t)+offset.size() *sizeof(int32_t);
|
|
|
- vtufile<<" </Polys>\n";
|
|
|
- //---------------------------------------------------------------------------
|
|
|
|
|
|
- vtufile<<" </Piece>\n";
|
|
|
- vtufile<<" </PolyData>\n";
|
|
|
- //===========================================================================
|
|
|
- vtufile<<" <AppendedData encoding=\"raw\">\n";
|
|
|
- vtufile<<" _";
|
|
|
+ if (B1.Dim() != N*(p1+1)*(p1+1)) B1.ReInit(N*(p1+1)*(p1+1));
|
|
|
+ #pragma omp parallel
|
|
|
+ { // Evaluate Legendre polynomial
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
|
|
|
- int32_t block_size;
|
|
|
- block_size=coord.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&coord [0], coord.size()*sizeof(VTKReal));
|
|
|
- if(value.size()){ // value
|
|
|
- block_size=value.size()*sizeof(VTKReal); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&value [0], value.size()*sizeof(VTKReal));
|
|
|
+ Long offset0=0;
|
|
|
+ Long offset1=0;
|
|
|
+ for (Long i = 0; i < p1+1; i++) {
|
|
|
+ Long N_ = (i==0 ? N : 2*N);
|
|
|
+ Matrix<Real> Min (N_, Nt , B0.begin()+offset0, false);
|
|
|
+ Matrix<Real> Mout(N_, p1+1-i, B1.begin()+offset1, false);
|
|
|
+ { // Mout = Min * Ml[i] // split between threads
|
|
|
+ Long a=(tid+0)*N_/omp_p;
|
|
|
+ Long b=(tid+1)*N_/omp_p;
|
|
|
+ if (a < b) {
|
|
|
+ Matrix<Real> Min_ (b-a, Min .Dim(1), Min [a], false);
|
|
|
+ Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
|
|
|
+ Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ offset0+=Min .Dim(0)*Min .Dim(1);
|
|
|
+ offset1+=Mout.Dim(0)*Mout.Dim(1);
|
|
|
+ }
|
|
|
+ assert(offset0 == B0.Dim());
|
|
|
+ assert(offset1 == B1.Dim());
|
|
|
}
|
|
|
- block_size=connect.size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&connect[0], connect.size()*sizeof(int32_t));
|
|
|
- block_size=offset .size()*sizeof(int32_t); vtufile.write((char*)&block_size, sizeof(int32_t)); vtufile.write((char*)&offset [0], offset .size()*sizeof(int32_t));
|
|
|
-
|
|
|
- vtufile<<"\n";
|
|
|
- vtufile<<" </AppendedData>\n";
|
|
|
- //===========================================================================
|
|
|
- vtufile<<"</VTKFile>\n";
|
|
|
- vtufile.close();
|
|
|
+}
|
|
|
+template <class Real> void SphericalHarmonics<Real>::SHCArrange0(const Vector<Real>& B1, Long p1, Vector<Real>& S, SHCArrange arrange){
|
|
|
+ Long M = (p1+1)*(p1+1);
|
|
|
+ Long N = B1.Dim() / M;
|
|
|
+ assert(B1.Dim() == N*M);
|
|
|
+ if (arrange == SHCArrange::ALL) { // S <-- Rearrange(B1)
|
|
|
+ Long M = 2*(p1+1)*(p1+1);
|
|
|
+ if(S.Dim() != N * M) S.ReInit(N * M);
|
|
|
+ #pragma omp parallel
|
|
|
+ { // S <-- Rearrange(B1)
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
|
|
|
+ Long a=(tid+0)*N/omp_p;
|
|
|
+ Long b=(tid+1)*N/omp_p;
|
|
|
+ for (Long i = a; i < b; i++) {
|
|
|
+ Long offset = 0;
|
|
|
+ for (Long j = 0; j < p1+1; j++) {
|
|
|
+ Long len = p1+1 - j;
|
|
|
+ if (1) { // Set Real(S_n^m) for m=j and n=j..p
|
|
|
+ ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
+ Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 0;
|
|
|
+ for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
|
|
|
+ offset += len;
|
|
|
+ }
|
|
|
+ if (j) { // Set Imag(S_n^m) for m=j and n=j..p
|
|
|
+ ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
+ Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
|
|
|
+ for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = B_[k];
|
|
|
+ offset += len;
|
|
|
+ } else {
|
|
|
+ Iterator<Real> S_ = S .begin() + i*M + j*(p1+1)*2 + j*2 + 1;
|
|
|
+ for (Long k = 0; k < len; k++) S_[k * (p1+1)*2] = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (arrange == SHCArrange::ROW_MAJOR) { // S <-- Rearrange(B1)
|
|
|
+ Long M = (p1+1)*(p1+2);
|
|
|
+ if(S.Dim() != N * M) S.ReInit(N * M);
|
|
|
+ #pragma omp parallel
|
|
|
+ { // S <-- Rearrange(B1)
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
|
|
|
- if(myrank) return;
|
|
|
- std::stringstream pvtufname;
|
|
|
- pvtufname<<fname<<".pvtp";
|
|
|
- std::ofstream pvtufile;
|
|
|
- pvtufile.open(pvtufname.str().c_str());
|
|
|
- if(pvtufile.fail()) return;
|
|
|
- pvtufile<<"<?xml version=\"1.0\"?>\n";
|
|
|
- pvtufile<<"<VTKFile type=\"PPolyData\">\n";
|
|
|
- pvtufile<<" <PPolyData GhostLevel=\"0\">\n";
|
|
|
- pvtufile<<" <PPoints>\n";
|
|
|
- pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<COORD_DIM<<"\" Name=\"Position\"/>\n";
|
|
|
- pvtufile<<" </PPoints>\n";
|
|
|
- if(value.size()){ // value
|
|
|
- pvtufile<<" <PPointData>\n";
|
|
|
- pvtufile<<" <PDataArray type=\"Float"<<sizeof(VTKReal)*8<<"\" NumberOfComponents=\""<<value.size()/pt_cnt<<"\" Name=\""<<"value"<<"\"/>\n";
|
|
|
- pvtufile<<" </PPointData>\n";
|
|
|
+ Long a=(tid+0)*N/omp_p;
|
|
|
+ Long b=(tid+1)*N/omp_p;
|
|
|
+ for (Long i = a; i < b; i++) {
|
|
|
+ Long offset = 0;
|
|
|
+ for (Long j = 0; j < p1+1; j++) {
|
|
|
+ Long len = p1+1 - j;
|
|
|
+ if (1) { // Set Real(S_n^m) for m=j and n=j..p
|
|
|
+ ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
+ Iterator<Real> S_ = S .begin() + i*M + 0;
|
|
|
+ for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
|
|
|
+ offset += len;
|
|
|
+ }
|
|
|
+ if (j) { // Set Imag(S_n^m) for m=j and n=j..p
|
|
|
+ ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
+ Iterator<Real> S_ = S .begin() + i*M + 1;
|
|
|
+ for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = B_[k];
|
|
|
+ offset += len;
|
|
|
+ } else {
|
|
|
+ Iterator<Real> S_ = S .begin() + i*M + 1;
|
|
|
+ for (Long k=0;k<len;k++) S_[(j+k)*(j+k+1) + 2*j] = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
- {
|
|
|
- // Extract filename from path.
|
|
|
- std::stringstream vtupath;
|
|
|
- vtupath<<'/'<<fname;
|
|
|
- std::string pathname = vtupath.str();
|
|
|
- auto found = pathname.find_last_of("/\\");
|
|
|
- std::string fname_ = pathname.substr(found+1);
|
|
|
- for(Integer i=0;i<np;i++) pvtufile<<" <Piece Source=\""<<fname_<<"_"<<std::setfill('0')<<std::setw(6)<<i<<".vtp\"/>\n";
|
|
|
+ if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // S <-- Rearrange(B1)
|
|
|
+ Long M = (p1+1)*(p1+1);
|
|
|
+ if(S.Dim() != N * M) S.ReInit(N * M);
|
|
|
+ #pragma omp parallel
|
|
|
+ { // S <-- Rearrange(B1)
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
+
|
|
|
+ Long a=(tid+0)*N/omp_p;
|
|
|
+ Long b=(tid+1)*N/omp_p;
|
|
|
+ for (Long i = a; i < b; i++) {
|
|
|
+ Long offset = 0;
|
|
|
+ for (Long j = 0; j < p1+1; j++) {
|
|
|
+ Long len = p1+1 - j;
|
|
|
+ if (1) { // Set Real(S_n^m) for m=j and n=j..p
|
|
|
+ ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
+ Iterator<Real> S_ = S .begin() + i*M + offset;
|
|
|
+ for (Long k = 0; k < len; k++) S_[k] = B_[k];
|
|
|
+ offset += len;
|
|
|
+ }
|
|
|
+ if (j) { // Set Imag(S_n^m) for m=j and n=j..p
|
|
|
+ ConstIterator<Real> B_ = B1.begin() + i*len + N*offset;
|
|
|
+ Iterator<Real> S_ = S .begin() + i*M + offset;
|
|
|
+ for (Long k = 0; k < len; k++) S_[k] = B_[k];
|
|
|
+ offset += len;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
- pvtufile<<" </PPolyData>\n";
|
|
|
- pvtufile<<"</VTKFile>\n";
|
|
|
- pvtufile.close();
|
|
|
}
|
|
|
|
|
|
-template <class Real> void SphericalHarmonics<Real>::Grid2VecSHC(const Vector<Real>& X, Long Nt, Long Np, Long p0, Vector<Real>& S, SHCArrange arrange) {
|
|
|
- Long N = X.Dim() / (Np*Nt);
|
|
|
- assert(X.Dim() == N*Np*Nt);
|
|
|
- assert(N % COORD_DIM == 0);
|
|
|
+template <class Real> void SphericalHarmonics<Real>::SHCArrange1(const Vector<Real>& S, SHCArrange arrange, Long p0, Vector<Real>& B0){
|
|
|
+ Long M, N;
|
|
|
+ { // Set M, N
|
|
|
+ M = 0;
|
|
|
+ if (arrange == SHCArrange::ALL) M = 2*(p0+1)*(p0+1);
|
|
|
+ if (arrange == SHCArrange::ROW_MAJOR) M = (p0+1)*(p0+2);
|
|
|
+ if (arrange == SHCArrange::COL_MAJOR_NONZERO) M = (p0+1)*(p0+1);
|
|
|
+ if (M == 0) return;
|
|
|
+ N = S.Dim() / M;
|
|
|
+ assert(S.Dim() == N * M);
|
|
|
+ }
|
|
|
|
|
|
- Vector<Real> B0(N*Nt*Np);
|
|
|
- { // Set B0
|
|
|
- B0 = X;
|
|
|
- const auto& Y = LegendreNodes(Nt - 1);
|
|
|
- assert(Y.Dim() == Nt);
|
|
|
- for (Long k = 0; k < N; k++) {
|
|
|
- if (k % COORD_DIM) {
|
|
|
- for (Long i = 0; i < Nt; i++) {
|
|
|
- Real s = 1/sqrt<Real>(1 - Y[i]*Y[i]);
|
|
|
- for (Long j = 0; j < Np; j++) {
|
|
|
- B0[(k*Nt+i)*Np+j] *= s;
|
|
|
+ if (B0.Dim() != N*(p0+1)*(p0+1)) B0.ReInit(N*(p0+1)*(p0+1));
|
|
|
+ if (arrange == SHCArrange::ALL) { // B0 <-- Rearrange(S)
|
|
|
+ #pragma omp parallel
|
|
|
+ { // B0 <-- Rearrange(S)
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
+
|
|
|
+ Long a=(tid+0)*N/omp_p;
|
|
|
+ Long b=(tid+1)*N/omp_p;
|
|
|
+ for (Long i = a; i < b; i++) {
|
|
|
+ Long offset = 0;
|
|
|
+ for (Long j = 0; j < p0+1; j++) {
|
|
|
+ Long len = p0+1 - j;
|
|
|
+ if (1) { // Get Real(S_n^m) for m=j and n=j..p
|
|
|
+ Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
+ ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 0;
|
|
|
+ for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
|
|
|
+ offset += len;
|
|
|
+ }
|
|
|
+ if (j) { // Get Imag(S_n^m) for m=j and n=j..p
|
|
|
+ Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
+ ConstIterator<Real> S_ = S .begin() + i*M + j*(p0+1)*2 + j*2 + 1;
|
|
|
+ for (Long k = 0; k < len; k++) B_[k] = S_[k * (p0+1)*2];
|
|
|
+ offset += len;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (arrange == SHCArrange::ROW_MAJOR) { // B0 <-- Rearrange(S)
|
|
|
+ #pragma omp parallel
|
|
|
+ { // B0 <-- Rearrange(S)
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
+
|
|
|
+ Long a=(tid+0)*N/omp_p;
|
|
|
+ Long b=(tid+1)*N/omp_p;
|
|
|
+ for (Long i = a; i < b; i++) {
|
|
|
+ Long offset = 0;
|
|
|
+ for (Long j = 0; j < p0+1; j++) {
|
|
|
+ Long len = p0+1 - j;
|
|
|
+ if (1) { // Get Real(S_n^m) for m=j and n=j..p
|
|
|
+ Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
+ ConstIterator<Real> S_ = S .begin() + i*M + 0;
|
|
|
+ for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
|
|
|
+ offset += len;
|
|
|
+ }
|
|
|
+ if (j) { // Get Imag(S_n^m) for m=j and n=j..p
|
|
|
+ Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
+ ConstIterator<Real> S_ = S .begin() + i*M + 1;
|
|
|
+ for (Long k=0;k<len;k++) B_[k] = S_[(j+k)*(j+k+1) + 2*j];
|
|
|
+ offset += len;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (arrange == SHCArrange::COL_MAJOR_NONZERO) { // B0 <-- Rearrange(S)
|
|
|
+ #pragma omp parallel
|
|
|
+ { // B0 <-- Rearrange(S)
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
+
|
|
|
+ Long a=(tid+0)*N/omp_p;
|
|
|
+ Long b=(tid+1)*N/omp_p;
|
|
|
+ for (Long i = a; i < b; i++) {
|
|
|
+ Long offset = 0;
|
|
|
+ for (Long j = 0; j < p0+1; j++) {
|
|
|
+ Long len = p0+1 - j;
|
|
|
+ if (1) { // Get Real(S_n^m) for m=j and n=j..p
|
|
|
+ Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
+ ConstIterator<Real> S_ = S .begin() + i*M + offset;
|
|
|
+ for (Long k = 0; k < len; k++) B_[k] = S_[k];
|
|
|
+ offset += len;
|
|
|
+ }
|
|
|
+ if (j) { // Get Imag(S_n^m) for m=j and n=j..p
|
|
|
+ Iterator<Real> B_ = B0.begin() + i*len + N*offset;
|
|
|
+ ConstIterator<Real> S_ = S .begin() + i*M + offset;
|
|
|
+ for (Long k = 0; k < len; k++) B_[k] = S_[k];
|
|
|
+ offset += len;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+}
|
|
|
+template <class Real> void SphericalHarmonics<Real>::SHC2Grid_(const Vector<Real>& B0, Long p0, Long Nt, Long Np, Vector<Real>* X, Vector<Real>* X_phi, Vector<Real>* X_theta){
|
|
|
+ const auto& Mf = OpFourier(Np);
|
|
|
+ assert(Mf.Dim(1) == Np);
|
|
|
|
|
|
- Long p_ = p0 + 1;
|
|
|
- Long M0 = (p0+1)*(p0+1);
|
|
|
- Long M_ = (p_+1)*(p_+1);
|
|
|
- Vector<Real> B1(N*M_);
|
|
|
- Grid2SHC_(B0, Nt, Np, p_, B1);
|
|
|
- B1 *= 1 / sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
|
|
|
-
|
|
|
- Vector<Real> B2(N*M0);
|
|
|
- const Complex<Real> imag(0,1);
|
|
|
- for (Long i=0; i<N; i+=COORD_DIM) {
|
|
|
- for (Long m=0; m<=p0; m++) {
|
|
|
- for (Long n=m; n<=p0; n++) {
|
|
|
- auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
|
|
|
- Complex<Real> c;
|
|
|
- if (0<=m && m<=n && n<=p) {
|
|
|
- Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
|
|
|
- Long idx_imag = idx_real + (p+1-m)*N;
|
|
|
- c.real = coeff[idx_real];
|
|
|
- if (m) c.imag = coeff[idx_imag];
|
|
|
- }
|
|
|
- return c;
|
|
|
- };
|
|
|
- auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
|
|
|
- if (0<=m && m<=n && n<=p) {
|
|
|
- Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
|
|
|
- Long idx_imag = idx_real + (p+1-m)*N;
|
|
|
- coeff[idx_real] = c.real;
|
|
|
- if (m) coeff[idx_imag] = c.imag;
|
|
|
- }
|
|
|
- };
|
|
|
+ const std::vector<Matrix<Real>>& Ml =SphericalHarmonics<Real>::MatLegendre (p0,Nt-1);
|
|
|
+ const std::vector<Matrix<Real>>& Mdl=SphericalHarmonics<Real>::MatLegendreGrad(p0,Nt-1);
|
|
|
+ assert((Long)Ml .size() == p0+1);
|
|
|
+ assert((Long)Mdl.size() == p0+1);
|
|
|
|
|
|
- auto gr = [&](Long n, Long m) { return read_coeff(B1, i+0, p_, n, m); };
|
|
|
- auto gt = [&](Long n, Long m) { return read_coeff(B1, i+1, p_, n, m); };
|
|
|
- auto gp = [&](Long n, Long m) { return read_coeff(B1, i+2, p_, n, m); };
|
|
|
+ Long N = B0.Dim() / ((p0+1)*(p0+1));
|
|
|
+ assert(B0.Dim() == N*(p0+1)*(p0+1));
|
|
|
|
|
|
- Complex<Real> phiY, phiG, phiX;
|
|
|
- { // (phiG, phiX) <-- (gt, gp)
|
|
|
- auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
|
|
|
- auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
|
|
|
- phiY = gr(n,m);
|
|
|
- phiG = (gt(n+1,m)*A(n,m) - gt(n-1,m)*B(n,m) - imag*m*gp(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
|
|
|
- phiX = (gp(n+1,m)*A(n,m) - gp(n-1,m)*B(n,m) + imag*m*gt(n,m)) * (1/(Real)(std::max<Long>(n,1)*(n+1)));
|
|
|
- }
|
|
|
+ if(X && X ->Dim()!=N*Np*Nt) X ->ReInit(N*Np*Nt);
|
|
|
+ if(X_theta && X_theta->Dim()!=N*Np*Nt) X_theta->ReInit(N*Np*Nt);
|
|
|
+ if(X_phi && X_phi ->Dim()!=N*Np*Nt) X_phi ->ReInit(N*Np*Nt);
|
|
|
|
|
|
- auto phiV = (phiG * (n + 0) - phiY) * (1/(Real)(2*n + 1));
|
|
|
- auto phiW = (phiG * (n + 1) + phiY) * (1/(Real)(2*n + 1));
|
|
|
+ Vector<Real> B1(N*(2*p0+1)*Nt);
|
|
|
+ if(X || X_phi){
|
|
|
+ #pragma omp parallel
|
|
|
+ { // Evaluate Legendre polynomial
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
|
|
|
- if (n==0) {
|
|
|
- phiW = 0;
|
|
|
- phiX = 0;
|
|
|
+ Long offset0=0;
|
|
|
+ Long offset1=0;
|
|
|
+ for(Long i=0;i<p0+1;i++){
|
|
|
+ Long N_ = (i==0 ? N : 2*N);
|
|
|
+ const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
|
|
|
+ Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
|
|
|
+ { // Mout = Min * Ml[i] // split between threads
|
|
|
+ Long a=(tid+0)*N_/omp_p;
|
|
|
+ Long b=(tid+1)*N_/omp_p;
|
|
|
+ if(a<b){
|
|
|
+ const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
|
|
|
+ Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
|
|
|
+ Matrix<Real>::GEMM(Mout_,Min_,Ml[i]);
|
|
|
+ }
|
|
|
}
|
|
|
- write_coeff(phiV, B2, i+0, p0, n, m);
|
|
|
- write_coeff(phiW, B2, i+1, p0, n, m);
|
|
|
- write_coeff(phiX, B2, i+2, p0, n, m);
|
|
|
+ offset0+=Min .Dim(0)*Min .Dim(1);
|
|
|
+ offset1+=Mout.Dim(0)*Mout.Dim(1);
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- SHCArrange0(B2, p0, S, arrange);
|
|
|
-}
|
|
|
+ #pragma omp parallel
|
|
|
+ { // Transpose and evaluate Fourier
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
|
|
|
-template <class Real> void SphericalHarmonics<Real>::VecSHC2Grid(const Vector<Real>& S, SHCArrange arrange, Long p0, Long Nt, Long Np, Vector<Real>& X) {
|
|
|
- Vector<Real> B0;
|
|
|
- SHCArrange1(S, arrange, p0, B0);
|
|
|
+ Long a=(tid+0)*N*Nt/omp_p;
|
|
|
+ Long b=(tid+1)*N*Nt/omp_p;
|
|
|
|
|
|
- Long p_ = p0 + 1;
|
|
|
- Long M0 = (p0+1)*(p0+1);
|
|
|
- Long M_ = (p_+1)*(p_+1);
|
|
|
- Long N = B0.Dim() / M0;
|
|
|
- assert(B0.Dim() == N*M0);
|
|
|
- assert(N % COORD_DIM == 0);
|
|
|
+ Vector<Real> buff(Mf.Dim(0)); buff = 0;
|
|
|
+ Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
|
|
|
+ Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
|
|
|
+ for (Long i = a; i < b; i++) {
|
|
|
+ { // buff <-- Transpose(B1)
|
|
|
+ buff[0] = B1_[0][i];
|
|
|
+ buff[1] = 0;
|
|
|
+ for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
|
|
|
+ for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
|
|
|
+ }
|
|
|
+ { // X <-- FFT(buff)
|
|
|
+ Vector<Real> Xi(Np, X->begin() + Np * i, false);
|
|
|
+ Mf.Execute(buff, Xi);
|
|
|
+ }
|
|
|
|
|
|
- Vector<Real> B1(N*M_);
|
|
|
- const Complex<Real> imag(0,1);
|
|
|
- for (Long i=0; i<N; i+=COORD_DIM) {
|
|
|
- for (Long m=0; m<=p_; m++) {
|
|
|
- for (Long n=m; n<=p_; n++) {
|
|
|
- auto read_coeff = [&](const Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
|
|
|
- Complex<Real> c;
|
|
|
- if (0<=m && m<=n && n<=p) {
|
|
|
- Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
|
|
|
- Long idx_imag = idx_real + (p+1-m)*N;
|
|
|
- c.real = coeff[idx_real];
|
|
|
- if (m) c.imag = coeff[idx_imag];
|
|
|
+ if(X_phi){ // Evaluate Fourier gradient
|
|
|
+ { // buff <-- Transpose(B1)
|
|
|
+ buff[0] = 0;
|
|
|
+ buff[1] = 0;
|
|
|
+ for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
|
|
|
+ for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
|
|
|
+ for (Long j = 1; j < buff.Dim()/2; j++) {
|
|
|
+ Real x = buff[2*j+0];
|
|
|
+ Real y = buff[2*j+1];
|
|
|
+ buff[2*j+0] = -j*y;
|
|
|
+ buff[2*j+1] = j*x;
|
|
|
+ }
|
|
|
}
|
|
|
- return c;
|
|
|
- };
|
|
|
- auto write_coeff = [&](Complex<Real> c, Vector<Real>& coeff, Long i, Long p, Long n, Long m) {
|
|
|
- if (0<=m && m<=n && n<=p) {
|
|
|
- Long idx_real = ((2*p-m+3)*m - (m?p+1:0))*N + (p+1-m)*i - m + n;
|
|
|
- Long idx_imag = idx_real + (p+1-m)*N;
|
|
|
- coeff[idx_real] = c.real;
|
|
|
- if (m) coeff[idx_imag] = c.imag;
|
|
|
+ { // X_phi <-- FFT(buff)
|
|
|
+ Vector<Real> Xi(Np, X_phi->begin() + Np * i, false);
|
|
|
+ Mf.Execute(buff, Xi);
|
|
|
}
|
|
|
- };
|
|
|
-
|
|
|
- auto phiG = [&](Long n, Long m) {
|
|
|
- auto phiV = read_coeff(B0, i+0, p0, n, m);
|
|
|
- auto phiW = read_coeff(B0, i+1, p0, n, m);
|
|
|
- return phiV + phiW;
|
|
|
- };
|
|
|
- auto phiY = [&](Long n, Long m) {
|
|
|
- auto phiV = read_coeff(B0, i+0, p0, n, m);
|
|
|
- auto phiW = read_coeff(B0, i+1, p0, n, m);
|
|
|
- return -phiV * (n + 1) + phiW * n;
|
|
|
- };
|
|
|
- auto phiX = [&](Long n, Long m) {
|
|
|
- return read_coeff(B0, i+2, p0, n, m);
|
|
|
- };
|
|
|
-
|
|
|
- Complex<Real> gr, gt, gp;
|
|
|
- { // (gt, gp) <-- (phiG, phiX)
|
|
|
- auto A = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>(n*n * ((n+1)*(n+1) - m*m) / (Real)((2*n+1)*(2*n+3))) : 0); };
|
|
|
- auto B = [&](Long n, Long m) { return (0<=n && m<=n && n<=p_ ? sqrt<Real>((n+1)*(n+1) * (n*n - m*m) / (Real)((2*n+1)*(2*n-1))) : 0); };
|
|
|
- gr = phiY(n,m);
|
|
|
- gt = phiG(n-1,m)*A(n-1,m) - phiG(n+1,m)*B(n+1,m) - imag*m*phiX(n,m);
|
|
|
- gp = phiX(n-1,m)*A(n-1,m) - phiX(n+1,m)*B(n+1,m) + imag*m*phiG(n,m);
|
|
|
}
|
|
|
-
|
|
|
- write_coeff(gr, B1, i+0, p_, n, m);
|
|
|
- write_coeff(gt, B1, i+1, p_, n, m);
|
|
|
- write_coeff(gp, B1, i+2, p_, n, m);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ if(X_theta){
|
|
|
+ #pragma omp parallel
|
|
|
+ { // Evaluate Legendre gradient
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
|
|
|
- B1 *= sqrt<Real>(4 * const_pi<Real>() * Np); // Scaling to match Zydrunas Fortran code.
|
|
|
- SHC2Grid_(B1, p_, Nt, Np, &X);
|
|
|
-
|
|
|
- { // Set X
|
|
|
- const auto& Y = LegendreNodes(Nt - 1);
|
|
|
- assert(Y.Dim() == Nt);
|
|
|
- for (Long k = 0; k < N; k++) {
|
|
|
- if (k % COORD_DIM) {
|
|
|
- for (Long i = 0; i < Nt; i++) {
|
|
|
- Real s = 1/sqrt<Real>(1 - Y[i]*Y[i]);
|
|
|
- for (Long j = 0; j < Np; j++) {
|
|
|
- X[(k*Nt+i)*Np+j] *= s;
|
|
|
+ Long offset0=0;
|
|
|
+ Long offset1=0;
|
|
|
+ for(Long i=0;i<p0+1;i++){
|
|
|
+ Long N_ = (i==0 ? N : 2*N);
|
|
|
+ const Matrix<Real> Min (N_, p0+1-i, (Iterator<Real>)B0.begin()+offset0, false);
|
|
|
+ Matrix<Real> Mout(N_, Nt , B1.begin()+offset1, false);
|
|
|
+ { // Mout = Min * Mdl[i] // split between threads
|
|
|
+ Long a=(tid+0)*N_/omp_p;
|
|
|
+ Long b=(tid+1)*N_/omp_p;
|
|
|
+ if(a<b){
|
|
|
+ const Matrix<Real> Min_ (b-a, Min .Dim(1), (Iterator<Real>)Min [a], false);
|
|
|
+ Matrix<Real> Mout_(b-a, Mout.Dim(1), Mout[a], false);
|
|
|
+ Matrix<Real>::GEMM(Mout_,Min_,Mdl[i]);
|
|
|
}
|
|
|
}
|
|
|
+ offset0+=Min .Dim(0)*Min .Dim(1);
|
|
|
+ offset1+=Mout.Dim(0)*Mout.Dim(1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ #pragma omp parallel
|
|
|
+ { // Transpose and evaluate Fourier
|
|
|
+ Integer tid=omp_get_thread_num();
|
|
|
+ Integer omp_p=omp_get_num_threads();
|
|
|
+
|
|
|
+ Long a=(tid+0)*N*Nt/omp_p;
|
|
|
+ Long b=(tid+1)*N*Nt/omp_p;
|
|
|
+
|
|
|
+ Vector<Real> buff(Mf.Dim(0)); buff = 0;
|
|
|
+ Long fft_coeff_len = std::min(buff.Dim(), 2*p0+2);
|
|
|
+ Matrix<Real> B1_(2*p0+1, N*Nt, B1.begin(), false);
|
|
|
+ for (Long i = a; i < b; i++) {
|
|
|
+ { // buff <-- Transpose(B1)
|
|
|
+ buff[0] = B1_[0][i];
|
|
|
+ buff[1] = 0;
|
|
|
+ for (Long j = 2; j < fft_coeff_len; j++) buff[j] = B1_[j-1][i];
|
|
|
+ for (Long j = fft_coeff_len; j < buff.Dim(); j++) buff[j] = 0;
|
|
|
+ }
|
|
|
+ { // Xi <-- FFT(buff)
|
|
|
+ Vector<Real> Xi(Np, X_theta->begin() + Np * i, false);
|
|
|
+ Mf.Execute(buff, Xi);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
-
|
|
|
template <class Real> void SphericalHarmonics<Real>::LegPoly(Vector<Real>& poly_val, const Vector<Real>& X, Long degree){
|
|
|
Long N = X.Dim();
|
|
|
Long Npoly = (degree + 1) * (degree + 2) / 2;
|