#include SCTL_INCLUDE(kernel_functions.hpp) #include SCTL_INCLUDE(tensor.hpp) #include SCTL_INCLUDE(quadrule.hpp) #include SCTL_INCLUDE(ompUtils.hpp) #include SCTL_INCLUDE(profile.hpp) #include SCTL_INCLUDE(legendre_rule.hpp) #include SCTL_INCLUDE(fft_wrapper.hpp) #include SCTL_INCLUDE(vtudata.hpp) #include SCTL_INCLUDE(lagrange-interp.hpp) #include namespace SCTL_NAMESPACE { template template void ToroidalGreensFn::Setup(const Kernel& ker, Real R0) { #ifdef SCTL_QUAD_T using ValueType = QuadReal; #else using ValueType = long double; #endif PrecompToroidalGreensFn(ker, R0); } template template void ToroidalGreensFn::BuildOperatorModal(Matrix& M, const Real x0, const Real x1, const Real x2, const Kernel& ker) const { constexpr Integer KDIM0 = Kernel::SrcDim(); constexpr Integer KDIM1 = Kernel::TrgDim(); constexpr Integer Nmm = (Nm/2+1)*2; constexpr Integer Ntt = (Nt/2+1)*2; StaticArray buff0; StaticArray buff1; Vector r_basis(Nr,buff0,false); Vector interp_r(Nr,buff0+Nr,false); Vector interp_Ntt(Ntt,buff1,false); if (M.Dim(0) != KDIM0*Nmm || M.Dim(1) != KDIM1) M.ReInit(KDIM0*Nmm,KDIM1); { // Set M const Real r = sqrt(x0*x0 + x1*x1); const Real rho = sqrt((r-R0_)*(r-R0_) + x2*x2); if (rho < max_dist*R0_) { const Real r_inv = 1/r; const Real rho_inv = 1/rho; const Real cos_theta = x0*r_inv; const Real sin_theta = x1*r_inv; const Real cos_phi = x2*rho_inv; const Real sin_phi = (r-R0_)*rho_inv; { // Set interp_r interp_r = 0; const Real rho0 = (rho/R0_-min_dist)/(max_dist-min_dist); BasisFn::EvalBasis(r_basis, rho0); for (Long i = 0; i < Nr; i++) { Real fn_val = 0; for (Long j = 0; j < Nr; j++) { fn_val += Mnds2coeff1[0][i*Nr+j] * r_basis[j]; } for (Long j = 0; j < Nr; j++) { interp_r[j] += Mnds2coeff0[0][i*Nr+j] * fn_val; } } } { // Set interp_Ntt interp_Ntt[0] = 0.5; interp_Ntt[1] = 0.0; Complex exp_t(cos_phi, sin_phi); Complex exp_jt(cos_phi, sin_phi); for (Long j = 1; j < Ntt/2; j++) { interp_Ntt[j*2+0] = exp_jt.real; interp_Ntt[j*2+1] =-exp_jt.imag; exp_jt *= exp_t; } } M = 0; for (Long j = 0; j < Nr; j++) { for (Long k = 0; k < Ntt; k++) { Real interp_wt = interp_r[j] * interp_Ntt[k]; ConstIterator Ut_ = Ut.begin() + (j*Ntt+k)*KDIM0*Nmm*KDIM1; for (Long i = 0; i < KDIM0*Nmm*KDIM1; i++) { // Set M M[0][i] += Ut_[i] * interp_wt; } } } { // Rotate by theta Complex exp_iktheta(1,0), exp_itheta(cos_theta, -sin_theta); for (Long k = 0; k < Nmm/2; k++) { for (Long i = 0; i < KDIM0; i++) { for (Long j = 0; j < KDIM1; j++) { Complex c(M[i*Nmm+2*k+0][j],M[i*Nmm+2*k+1][j]); c *= exp_iktheta; M[i*Nmm+2*k+0][j] = c.real; M[i*Nmm+2*k+1][j] = c.imag; } } exp_iktheta *= exp_itheta; } } } else if (rho < max_dist*R0_*1.25) { BuildOperatorModalDirect<110>(M, x0, x1, x2, ker); } else if (rho < max_dist*R0_*1.67) { BuildOperatorModalDirect<88>(M, x0, x1, x2, ker); } else if (rho < max_dist*R0_*2.5) { BuildOperatorModalDirect<76>(M, x0, x1, x2, ker); } else if (rho < max_dist*R0_*5) { BuildOperatorModalDirect<50>(M, x0, x1, x2, ker); } else if (rho < max_dist*R0_*10) { BuildOperatorModalDirect<25>(M, x0, x1, x2, ker); } else if (rho < max_dist*R0_*20) { BuildOperatorModalDirect<14>(M, x0, x1, x2, ker); } else { BuildOperatorModalDirect(M, x0, x1, x2, ker); } } } template template ValueType ToroidalGreensFn::BasisFn::Eval(const Vector& coeff, ValueType x) { if (1) { ValueType sum = 0; ValueType log_x = log(x); Long Nsplit = std::max(0,(coeff.Dim()-1)/2); ValueType x_i = 1; for (Long i = 0; i < Nsplit; i++) { sum += coeff[i] * x_i; x_i *= x; } x_i = 1; for (Long i = coeff.Dim()-2; i >= Nsplit; i--) { sum += coeff[i] * log_x * x_i; x_i *= x; } if (coeff.Dim()-1 >= 0) sum += coeff[coeff.Dim()-1] / x; return sum; } if (0) { ValueType sum = 0; Long Nsplit = coeff.Dim()/2; for (Long i = 0; i < Nsplit; i++) { sum += coeff[i] * sctl::pow(x,i); } for (Long i = Nsplit; i < coeff.Dim(); i++) { sum += coeff[i] * log(x) * sctl::pow(x,coeff.Dim()-1-i); } return sum; } } template template void ToroidalGreensFn::BasisFn::EvalBasis(Vector& f, ValueType x) { const Long N = f.Dim(); const Long Nsplit = std::max(0,(N-1)/2); ValueType xi = 1; for (Long i = 0; i < Nsplit; i++) { f[i] = xi; xi *= x; } ValueType xi_logx = log(x); for (Long i = N-2; i >= Nsplit; i--) { f[i] = xi_logx; xi_logx *= x; } if (N-1 >= 0) f[N-1] = 1/x; } template template const Vector& ToroidalGreensFn::BasisFn::nds(Integer ORDER) { ValueType fn_start = 1e-7, fn_end = 1.0; auto compute_nds = [&ORDER,&fn_start,&fn_end]() { Vector nds, wts; auto integrands = [&ORDER,&fn_start,&fn_end](const Vector& nds) { const Integer K = ORDER; const Long N = nds.Dim(); Matrix M(N,K); for (Long j = 0; j < N; j++) { Vector f(K,M[j],false); EvalBasis(f, nds[j]*(fn_end-fn_start)+fn_start); } return M; }; InterpQuadRule::Build(nds, wts, integrands, sqrt(machine_eps()), ORDER); return nds*(fn_end-fn_start)+fn_start; }; static Vector nds = compute_nds(); return nds; } template template void ToroidalGreensFn::PrecompToroidalGreensFn(const Kernel& ker, ValueType R0) { SCTL_ASSERT(ker.CoordDim() == COORD_DIM); constexpr Integer KDIM0 = Kernel::SrcDim(); constexpr Integer KDIM1 = Kernel::TrgDim(); constexpr Long Nmm = (Nm/2+1)*2; constexpr Long Ntt = (Nt/2+1)*2; R0_ = (Real)R0; const auto& nds = BasisFn::nds(Nr); { // Set Mnds2coeff0, Mnds2coeff1 Matrix M(Nr,Nr); Vector coeff(Nr); coeff = 0; for (Long i = 0; i < Nr; i++) { coeff[i] = 1; for (Long j = 0; j < Nr; j++) { M[i][j] = BasisFn::Eval(coeff, nds[j]); } coeff[i] = 0; } Matrix U, S, Vt; M.SVD(U, S, Vt); for (Long i = 0; i < S.Dim(0); i++) { S[i][i] = 1/S[i][i]; } auto Mnds2coeff0_ = S * Vt; auto Mnds2coeff1_ = U.Transpose(); Mnds2coeff0.ReInit(Mnds2coeff0_.Dim(0), Mnds2coeff0_.Dim(1)); Mnds2coeff1.ReInit(Mnds2coeff1_.Dim(0), Mnds2coeff1_.Dim(1)); for (Long i = 0; i < Mnds2coeff0.Dim(0)*Mnds2coeff0.Dim(1); i++) Mnds2coeff0[0][i] = (Real)Mnds2coeff0_[0][i]; for (Long i = 0; i < Mnds2coeff1.Dim(0)*Mnds2coeff1.Dim(1); i++) Mnds2coeff1[0][i] = (Real)Mnds2coeff1_[0][i]; } { // Setup fft_Nm_R2C Vector dim_vec(1); dim_vec[0] = Nm; fft_Nm_R2C.Setup(FFT_Type::R2C, KDIM0, dim_vec); fft_Nm_C2R.Setup(FFT_Type::C2R, KDIM0*KDIM1, dim_vec); } Vector Xtrg(Nr*Nt*COORD_DIM); for (Long i = 0; i < Nr; i++) { for (Long j = 0; j < Nt; j++) { Xtrg[(i*Nt+j)*COORD_DIM+0] = R0 * (1.0 + (min_dist+(max_dist-min_dist)*nds[i]) * sin(j*2*const_pi()/Nt)); Xtrg[(i*Nt+j)*COORD_DIM+1] = R0 * (0.0); Xtrg[(i*Nt+j)*COORD_DIM+2] = R0 * (0.0 + (min_dist+(max_dist-min_dist)*nds[i]) * cos(j*2*const_pi()/Nt)); } } Vector U0(KDIM0*Nmm*Nr*KDIM1*Nt); { // Set U0 FFT fft_Nm_C2R; { // Setup fft_Nm_C2R Vector dim_vec(1); dim_vec[0] = Nm; fft_Nm_C2R.Setup(FFT_Type::C2R, KDIM0, dim_vec); } Vector Fcoeff(KDIM0*Nmm), F, U_; for (Long i = 0; i < KDIM0*Nmm; i++) { Fcoeff = 0; Fcoeff[i] = 1; { // Set F fft_Nm_C2R.Execute(Fcoeff, F); Matrix FF(KDIM0,Nm,F.begin(), false); FF = FF.Transpose(); } ComputePotential(U_, Xtrg, R0, F, ker); SCTL_ASSERT(U_.Dim() == Nr*Nt*KDIM1); for (Long j = 0; j < Nr; j++) { for (Long l = 0; l < Nt; l++) { for (Long k = 0; k < KDIM1; k++) { U0[((i*Nr+j)*KDIM1+k)*Nt+l] = U_[(j*Nt+l)*KDIM1+k]; } } } } } Vector U1(KDIM0*Nmm*Nr*KDIM1*Ntt); { // U1 <-- fft_Nt(U0) FFT fft_Nt; Vector dim_vec(1); dim_vec = Nt; fft_Nt.Setup(FFT_Type::R2C, KDIM0*Nmm*Nr*KDIM1, dim_vec); fft_Nt.Execute(U0, U1); if (Nt%2==0 && Nt) { for (Long i = Ntt-2; i < U1.Dim(); i += Ntt) { U1[i] *= 0.5; } } U1 *= 1.0/sqrt(Nt); } U.ReInit(KDIM0*Nmm*KDIM1*Nr*Ntt); { // U <-- rearrange(U1) for (Long i0 = 0; i0 < KDIM0*Nmm; i0++) { for (Long i1 = 0; i1 < Nr; i1++) { for (Long i2 = 0; i2 < KDIM1; i2++) { for (Long i3 = 0; i3 < Ntt; i3++) { U[((i0*Nr+i1)*KDIM1+i2)*Ntt+i3] = (Real)U1[((i0*KDIM1+i2)*Nr+i1)*Ntt+i3]; } } } } } Ut.ReInit(Nr*Ntt*KDIM0*Nmm*KDIM1); { // Set Ut Matrix Ut_(Nr*Ntt,KDIM0*Nmm*KDIM1, Ut.begin(), false); Matrix U_(KDIM0*Nmm*KDIM1,Nr*Ntt, U.begin(), false); Ut_ = U_.Transpose()*2.0; } } template template void ToroidalGreensFn::ComputePotential(Vector& U, const Vector& Xtrg, ValueType R0, const Vector& F_, const Kernel& ker, ValueType tol) { constexpr Integer KDIM0 = Kernel::SrcDim(); Vector F_fourier_coeff; const Long Nt_ = F_.Dim() / KDIM0; // number of Fourier modes SCTL_ASSERT(F_.Dim() == Nt_ * KDIM0); { // Transpose F_ Matrix FF(Nt_,KDIM0,(Iterator)F_.begin(), false); FF = FF.Transpose(); } { // Set F_fourier_coeff FFT fft_plan; Vector dim_vec(1); dim_vec[0] = Nt_; fft_plan.Setup(FFT_Type::R2C, KDIM0, dim_vec); fft_plan.Execute(F_, F_fourier_coeff); if (Nt_%2==0 && F_fourier_coeff.Dim()) { F_fourier_coeff[F_fourier_coeff.Dim()-2] *= 0.5; } } auto EvalFourierExp = [&Nt_](Vector& F, const Vector& F_fourier_coeff, Integer dof, const Vector& theta) { const Long N = F_fourier_coeff.Dim() / dof / 2; SCTL_ASSERT(F_fourier_coeff.Dim() == dof * N * 2); const Long Ntheta = theta.Dim(); if (F.Dim() != Ntheta*dof) F.ReInit(Ntheta*dof); for (Integer k = 0; k < dof; k++) { for (Long j = 0; j < Ntheta; j++) { Complex F_(0,0); for (Long i = 0; i < N; i++) { Complex c(F_fourier_coeff[(k*N+i)*2+0],F_fourier_coeff[(k*N+i)*2+1]); Complex exp_t(cos(theta[j]*i), sin(theta[j]*i)); F_ += exp_t * c * (i==0?1:2); } F[j*dof+k] = F_.real/sqrt(Nt_); } } }; constexpr Integer QuadOrder = 18; std::function(ValueType,ValueType,ValueType)> compute_potential = [&](ValueType a, ValueType b, ValueType tol) -> Vector { auto GetGeomCircle = [&R0] (Vector& Xsrc, Vector& Nsrc, const Vector& nds) { Long N = nds.Dim(); if (Xsrc.Dim() != N * COORD_DIM) Xsrc.ReInit(N*COORD_DIM); if (Nsrc.Dim() != N * COORD_DIM) Nsrc.ReInit(N*COORD_DIM); for (Long i = 0; i < N; i++) { Xsrc[i*COORD_DIM+0] = R0 * cos(nds[i]); Xsrc[i*COORD_DIM+1] = R0 * sin(nds[i]); Xsrc[i*COORD_DIM+2] = R0 * 0; Nsrc[i*COORD_DIM+0] = cos(nds[i]); Nsrc[i*COORD_DIM+1] = sin(nds[i]); Nsrc[i*COORD_DIM+2] = 0; } }; const auto& nds0 = ChebQuadRule::nds(QuadOrder+1); const auto& wts0 = ChebQuadRule::wts(QuadOrder+1); const auto& nds1 = ChebQuadRule::nds(QuadOrder+0); const auto& wts1 = ChebQuadRule::wts(QuadOrder+0); Vector U0; Vector Xsrc, Nsrc, Fsrc; GetGeomCircle(Xsrc, Nsrc, a+(b-a)*nds0); EvalFourierExp(Fsrc, F_fourier_coeff, KDIM0, a+(b-a)*nds0); for (Long i = 0; i < nds0.Dim(); i++) { for (Long j = 0; j < KDIM0; j++) { Fsrc[i*KDIM0+j] *= ((b-a) * wts0[i]); } } ker.Eval(U0, Xtrg, Xsrc, Nsrc, Fsrc); Vector U1; GetGeomCircle(Xsrc, Nsrc, a+(b-a)*nds1); EvalFourierExp(Fsrc, F_fourier_coeff, KDIM0, a+(b-a)*nds1); for (Long i = 0; i < nds1.Dim(); i++) { for (Long j = 0; j < KDIM0; j++) { Fsrc[i*KDIM0+j] *= ((b-a) * wts1[i]); } } ker.Eval(U1, Xtrg, Xsrc, Nsrc, Fsrc); ValueType err = 0, max_val = 0; for (Long i = 0; i < U1.Dim(); i++) { err = std::max(err, fabs(U0[i]-U1[i])); max_val = std::max(max_val, fabs(U0[i])); } if (err < tol || (b-a)()) || (b-a)(), tol); }; template template void ToroidalGreensFn::BuildOperatorModalDirect(Matrix& M, const Real x0, const Real x1, const Real x2, const Kernel& ker) const { constexpr Integer KDIM0 = Kernel::SrcDim(); constexpr Integer KDIM1 = Kernel::TrgDim(); constexpr Integer Nmm = (Nm/2+1)*2; auto get_sin_theta = [](Long N){ Vector sin_theta(N); for (Long i = 0; i < N; i++) { sin_theta[i] = sin(2*const_pi()*i/N); } return sin_theta; }; auto get_cos_theta = [](Long N){ Vector cos_theta(N); for (Long i = 0; i < N; i++) { cos_theta[i] = cos(2*const_pi()*i/N); } return cos_theta; }; auto get_circle_coord = [](Long N, Real R0){ Vector X(N*COORD_DIM); for (Long i = 0; i < N; i++) { X[i*COORD_DIM+0] = R0*cos(2*const_pi()*i/N); X[i*COORD_DIM+1] = R0*sin(2*const_pi()*i/N); X[i*COORD_DIM+2] = 0; } return X; }; constexpr Real scal = 2/sqrt(Nm); static const Vector sin_nds = get_sin_theta(Nnds); static const Vector cos_nds = get_cos_theta(Nnds); static const Vector Xn = get_circle_coord(Nnds,1); StaticArray buff0; Vector Xs(Nnds*COORD_DIM,buff0,false); Xs = Xn * R0_; StaticArray Xt = {x0,x1,x2}; StaticArray mem_buff2; Matrix Mker(KDIM0*Nnds, KDIM1, mem_buff2, false); ker.KernelMatrix(Mker, Vector(COORD_DIM,(Iterator)Xt,false), Xs, Xn); StaticArray mem_buff3; Vector> exp_itheta(Nnds, (Iterator>)(mem_buff3+0*Nnds), false); Vector> exp_iktheta_da(Nnds, (Iterator>)(mem_buff3+2*Nnds), false); for (Integer j = 0; j < Nnds; j++) { exp_itheta[j].real = cos_nds[j]; exp_itheta[j].imag =-sin_nds[j]; exp_iktheta_da[j].real = 2*const_pi()/Nnds*scal; exp_iktheta_da[j].imag = 0; } for (Integer k = 0; k < Nmm/2; k++) { // apply Mker to complex exponentials // TODO: FFT might be faster since points are uniform Tensor Mk0, Mk1; for (Integer i0 = 0; i0 < KDIM0; i0++) { for (Integer i1 = 0; i1 < KDIM1; i1++) { Mk0(i0,i1) = 0; Mk1(i0,i1) = 0; } } for (Integer j = 0; j < Nnds; j++) { Tensor Mker_(Mker[j*KDIM0]); Mk0 = Mk0 + Mker_ * exp_iktheta_da[j].real; Mk1 = Mk1 + Mker_ * exp_iktheta_da[j].imag; } for (Integer i0 = 0; i0 < KDIM0; i0++) { for (Integer i1 = 0; i1 < KDIM1; i1++) { M[i0*Nmm+(k*2+0)][i1] = Mk0(i0,i1); M[i0*Nmm+(k*2+1)][i1] = Mk1(i0,i1); } } exp_iktheta_da *= exp_itheta; } for (Integer i0 = 0; i0 < KDIM0; i0++) { for (Integer i1 = 0; i1 < KDIM1; i1++) { M[i0*Nmm+0][i1] *= 0.5; M[i0*Nmm+1][i1] *= 0.5; if (Nm%2 == 0) { M[(i0+1)*Nmm-2][i1] *= 0.5; M[(i0+1)*Nmm-1][i1] *= 0.5; } } } } template static void ReadFile(Vector>& data, const std::string fname) { FILE* f = fopen(fname.c_str(), "r"); if (f == nullptr) { std::cout << "Unable to open file for reading:" << fname << '\n'; } else { uint64_t data_len; Long readlen = fread(&data_len, sizeof(uint64_t), 1, f); SCTL_ASSERT(readlen == 1); if (data_len) { data.ReInit(data_len); for (Long i = 0; i < data.Dim(); i++) { readlen = fread(&data_len, sizeof(uint64_t), 1, f); SCTL_ASSERT(readlen == 1); data[i].ReInit(data_len); if (data_len) { readlen = fread(&data[i][0], sizeof(ValueType), data_len, f); SCTL_ASSERT(readlen == (Long)data_len); } } } fclose(f); } } template static void WriteFile(const Vector>& data, const std::string fname) { FILE* f = fopen(fname.c_str(), "wb+"); if (f == nullptr) { std::cout << "Unable to open file for writing:" << fname << '\n'; exit(0); } uint64_t data_len = data.Dim(); fwrite(&data_len, sizeof(uint64_t), 1, f); for (Integer i = 0; i < data.Dim(); i++) { data_len = data[i].Dim(); fwrite(&data_len, sizeof(uint64_t), 1, f); if (data_len) fwrite(&data[i][0], sizeof(ValueType), data_len, f); } fclose(f); } template static ValueType dot_prod(const Tensor& u, const Tensor& v) { ValueType u_dot_v = 0; u_dot_v += u(0,0) * v(0,0); u_dot_v += u(1,0) * v(1,0); u_dot_v += u(2,0) * v(2,0); return u_dot_v; } template static Tensor cross_prod(const Tensor& u, const Tensor& v) { Tensor uxv; uxv(0,0) = u(1,0) * v(2,0) - u(2,0) * v(1,0); uxv(1,0) = u(2,0) * v(0,0) - u(0,0) * v(2,0); uxv(2,0) = u(0,0) * v(1,0) - u(1,0) * v(0,0); return uxv; } template static const Vector& sin_theta(const Integer ORDER) { constexpr Integer MaxOrder = 256; auto compute_sin_theta = [MaxOrder](){ Vector> sin_theta_lst(MaxOrder); for (Long k = 0; k < MaxOrder; k++) { sin_theta_lst[k].ReInit(k); for (Long i = 0; i < k; i++) { sin_theta_lst[k][i] = sin(2*const_pi()*i/k); } } return sin_theta_lst; }; static const auto sin_theta_lst = compute_sin_theta(); SCTL_ASSERT(ORDER < MaxOrder); return sin_theta_lst[ORDER]; } template static const Vector& cos_theta(const Integer ORDER) { constexpr Integer MaxOrder = 256; auto compute_cos_theta = [MaxOrder](){ Vector> cos_theta_lst(MaxOrder); for (Long k = 0; k < MaxOrder; k++) { cos_theta_lst[k].ReInit(k); for (Long i = 0; i < k; i++) { cos_theta_lst[k][i] = cos(2*const_pi()*i/k); } } return cos_theta_lst; }; static const auto cos_theta_lst = compute_cos_theta(); SCTL_ASSERT(ORDER < MaxOrder); return cos_theta_lst[ORDER]; } template static const Matrix& fourier_matrix(Integer Nmodes, Integer Nnodes) { constexpr Integer MaxOrder = 128; auto compute_fourier_matrix = [](Integer Nmodes, Integer Nnodes) { if (Nnodes == 0 || Nmodes == 0) return Matrix(); Matrix M_fourier(2*Nmodes,Nnodes); for (Long i = 0; i < Nnodes; i++) { Real theta = 2*const_pi()*i/Nnodes; for (Long k = 0; k < Nmodes; k++) { M_fourier[k*2+0][i] = cos(k*theta); M_fourier[k*2+1][i] = sin(k*theta); } } return M_fourier; }; auto compute_all = [&compute_fourier_matrix, MaxOrder]() { Matrix> Mall(MaxOrder, MaxOrder); for (Long i = 0; i < MaxOrder; i++) { for (Long j = 0; j < MaxOrder; j++) { Mall[i][j] = compute_fourier_matrix(i,j); } } return Mall; }; static const Matrix> Mall = compute_all(); SCTL_ASSERT(Nmodes < MaxOrder && Nnodes < MaxOrder); return Mall[Nmodes][Nnodes]; } template static const Matrix& fourier_matrix_inv(Integer Nnodes, Integer Nmodes) { constexpr Integer MaxOrder = 128; auto compute_fourier_matrix_inv = [](Integer Nnodes, Integer Nmodes) { if (Nmodes > Nnodes/2+1 || Nnodes == 0 || Nmodes == 0) return Matrix(); const Real scal = 2/(Real)Nnodes; Matrix M_fourier_inv(Nnodes,2*Nmodes); for (Long i = 0; i < Nnodes; i++) { Real theta = 2*const_pi()*i/Nnodes; for (Long k = 0; k < Nmodes; k++) { M_fourier_inv[i][k*2+0] = cos(k*theta)*scal; M_fourier_inv[i][k*2+1] = sin(k*theta)*scal; } } for (Long i = 0; i < Nnodes; i++) { M_fourier_inv[i][0] *= 0.5; } if (Nnodes == (Nmodes-1)*2) { for (Long i = 0; i < Nnodes; i++) { M_fourier_inv[i][Nnodes] *= 0.5; } } return M_fourier_inv; }; auto compute_all = [&compute_fourier_matrix_inv, MaxOrder]() { Matrix> Mall(MaxOrder, MaxOrder); for (Long i = 0; i < MaxOrder; i++) { for (Long j = 0; j < MaxOrder; j++) { Mall[i][j] = compute_fourier_matrix_inv(i,j); } } return Mall; }; static const Matrix> Mall = compute_all(); SCTL_ASSERT(Nnodes < MaxOrder && Nmodes < MaxOrder); return Mall[Nnodes][Nmodes]; } template static const Matrix& fourier_matrix_inv_transpose(Integer Nnodes, Integer Nmodes) { constexpr Integer MaxOrder = 128; auto compute_all = [MaxOrder]() { Matrix> Mall(MaxOrder, MaxOrder); for (Long i = 0; i < MaxOrder; i++) { for (Long j = 0; j < MaxOrder; j++) { Mall[i][j] = fourier_matrix_inv(i,j).Transpose(); } } return Mall; }; static const Matrix> Mall = compute_all(); SCTL_ASSERT(Nnodes < MaxOrder && Nmodes < MaxOrder); return Mall[Nnodes][Nmodes]; } template static const std::pair,Vector>& LegendreQuadRule(Integer ORDER) { constexpr Integer max_order = 50; auto compute_nds_wts = [max_order]() { Vector,Vector>> nds_wts(max_order); for (Integer order = 1; order < max_order; order++) { auto& x_ = nds_wts[order].first; auto& w_ = nds_wts[order].second; x_ = LegQuadRule::ComputeNds(order); w_ = LegQuadRule::ComputeWts(x_); } return nds_wts; }; static const auto nds_wts = compute_nds_wts(); SCTL_ASSERT(ORDER < max_order); return nds_wts[ORDER]; } template static const std::pair,Vector>& LogSingularityQuadRule(Integer ORDER) { constexpr Integer MaxOrder = 50; auto compute_nds_wts_lst = [MaxOrder]() { #ifdef SCTL_QUAD_T using RealType = QuadReal; #else using RealType = long double; #endif Vector> data; ReadFile(data, "data/log_quad"); if (data.Dim() < MaxOrder*2) { data.ReInit(MaxOrder*2); #pragma omp parallel for for (Integer order = 1; order < MaxOrder; order++) { auto integrands = [order](const Vector& nds) { const Integer K = order; const Long N = nds.Dim(); Matrix M(N,K); for (Long j = 0; j < N; j++) { for (Long i = 0; i < (K+1)/2; i++) { M[j][i] = pow(nds[j],i); } for (Long i = (K+1)/2; i < K; i++) { M[j][i] = pow(nds[j],i-(K+1)/2) * log(nds[j]); } } return M; }; InterpQuadRule::Build(data[order*2+0], data[order*2+1], integrands, false, machine_eps(), order, 2e-4, 1.0); } WriteFile(data, "data/log_quad"); } Vector,Vector>> nds_wts_lst(MaxOrder); #pragma omp parallel for for (Integer order = 1; order < MaxOrder; order++) { const auto& nds = data[order*2+0]; const auto& wts = data[order*2+1]; auto& nds_ = nds_wts_lst[order].first; auto& wts_ = nds_wts_lst[order].second; nds_.ReInit(nds.Dim()); wts_.ReInit(wts.Dim()); for (Long i = 0; i < nds.Dim(); i++) { nds_[i] = (ValueType)nds[i]; wts_[i] = (ValueType)wts[i]; } } return nds_wts_lst; }; static const auto nds_wts_lst = compute_nds_wts_lst(); SCTL_ASSERT(ORDER < MaxOrder); return nds_wts_lst[ORDER]; } template static Vector> BuildToroidalSpecialQuadRules(Integer Nmodes, Integer VecLen) { const std::string fname = std::string("data/toroidal_quad_rule_m") + std::to_string(Nmodes) + "_" + Kernel::Name(); constexpr Integer COORD_DIM = 3; constexpr Integer max_adap_depth = 30; // build quadrature rules for points up to 2*pi*0.5^max_adap_depth from source loop constexpr Integer crossover_adap_depth = 2; constexpr Integer max_digits = 20; #ifdef SCTL_QUAD_T using ValueType = QuadReal; #else using ValueType = long double; #endif auto DyadicPanelQuadRule = [](Vector& nds, Vector& wts, const Integer depth, const Integer LegOrder, const Integer PanelRepeat) { Vector panel_nds, panel_wts; { // Set panel_nds, panel_wts auto leg_quad = LegendreQuadRule(LegOrder); const auto& leg_nds = leg_quad.first; const auto& leg_wts = leg_quad.second; const Long rep = PanelRepeat; const ValueType scal = 1/(ValueType)rep; for (Long i = 0; i < rep; i++) { for (Long j = 0; j < leg_nds.Dim(); j++) { panel_nds.PushBack(leg_nds[j]*scal + i*scal); panel_wts.PushBack(leg_wts[j]*scal); } } } SCTL_ASSERT(depth); Long N = 2*depth; ValueType l = 0.5; nds.ReInit(N*panel_nds.Dim()); wts.ReInit(N*panel_nds.Dim()); for (Integer idx = 0; idx < depth; idx++) { l *= (idx nds0(panel_nds.Dim(), nds.begin()+( idx )*panel_nds.Dim(), false); Vector nds1(panel_nds.Dim(), nds.begin()+(N-idx-1)*panel_nds.Dim(), false); Vector wts0(panel_wts.Dim(), wts.begin()+( idx )*panel_wts.Dim(), false); Vector wts1(panel_wts.Dim(), wts.begin()+(N-idx-1)*panel_wts.Dim(), false); for (Long i = 0; i < panel_nds.Dim(); i++) { ValueType s = panel_nds[i]*l + (idx> data; if (!adap) { // read from file ReadFile(data, fname); } else { // use dyadically refined panel quadrature rules data.ReInit(max_adap_depth * max_digits); for (Integer idx = 0; idx < max_adap_depth; idx++) { const ValueType dist = 4*const_pi()*pow(0.5,idx); const Integer DyadicRefDepth = std::max(1,(Integer)(log(dist/2/const_pi())/log(0.5)+0.5)); Vector quad_nds, quad_wts; for (Integer digits = 0; digits < max_digits; digits++) { const Integer LegOrder = (Integer)(digits*1.5); DyadicPanelQuadRule(quad_nds, quad_wts, DyadicRefDepth, LegOrder, adap); const Long N = quad_nds.Dim(); data[idx*max_digits+digits].ReInit(3*N); for (Long i = 0; i < N; i++) { data[idx*max_digits+digits][i*3+0] = cos(2*const_pi()*quad_nds[i]); data[idx*max_digits+digits][i*3+1] = sin(2*const_pi()*quad_nds[i]); data[idx*max_digits+digits][i*3+2] = (2*const_pi()*quad_wts[i]); } } } } if (!adap && data.Dim() != max_adap_depth*max_digits) { // If file is not-found then compute quadrature rule and write to file data.ReInit(max_adap_depth * max_digits); for (Integer idx = 0; idx < max_adap_depth; idx++) { Vector> quad_nds, quad_wts; { // generate special quadrature rule Vector nds, wts; Matrix Mintegrands; auto discretize_basis_functions = [Nmodes,&DyadicPanelQuadRule](Matrix& Mintegrands, Vector& nds, Vector& wts, const ValueType dist, const Integer LegOrder) { auto trg_coord = [](ValueType dist, Long M) { Vector Xtrg; //(M*M*COORD_DIM); for (Long i = 0; i < M; i++) { for (Long j = 0; j < M; j++) { ValueType theta = i*2*const_pi()/(M); ValueType r = (0.5 + i*0.5/(M)) * dist; ValueType x0 = r*cos(theta); ValueType x1 = 0; ValueType x2 = r*sin(theta); if (x0 > 0) { Xtrg.PushBack(x0); Xtrg.PushBack(x1); Xtrg.PushBack(x2); } } } return Xtrg; }; const Vector Xtrg = trg_coord(dist, 25); // TODO: determine optimal sample count const Long Ntrg = Xtrg.Dim()/COORD_DIM; const Integer DyadicRefDepth = std::max(1,(Integer)(log(dist/2/const_pi())/log(0.5)+0.5)); DyadicPanelQuadRule(nds, wts, DyadicRefDepth, LegOrder, 1); const Long Nnds = nds.Dim(); Vector> exp_itheta(Nnds), exp_iktheta(Nnds); Vector Xsrc(Nnds*COORD_DIM), Xn(Nnds*COORD_DIM); for (Long i = 0; i < Nnds; i++) { const ValueType cos_t = cos(2*const_pi()*nds[i]); const ValueType sin_t = sin(2*const_pi()*nds[i]); exp_iktheta[i].real = 1; exp_iktheta[i].imag = 0; exp_itheta[i].real = cos_t; exp_itheta[i].imag = sin_t; Xsrc[i*COORD_DIM+0] = -2*sin(const_pi()*nds[i])*sin(const_pi()*nds[i]); // == cos_t - 1 Xsrc[i*COORD_DIM+1] = sin_t; Xsrc[i*COORD_DIM+2] = 0; Xn[i*COORD_DIM+0] = cos_t; Xn[i*COORD_DIM+1] = sin_t; Xn[i*COORD_DIM+2] = 0; } Kernel ker; Matrix Mker; ker.KernelMatrix(Mker, Xtrg, Xsrc, Xn); SCTL_ASSERT(Mker.Dim(0) == Nnds * Kernel::SrcDim()); SCTL_ASSERT(Mker.Dim(1) == Ntrg * Kernel::TrgDim()); Mintegrands.ReInit(Nnds, (Nmodes*2)*Kernel::SrcDim() * Ntrg*Kernel::TrgDim()); for (Long k = 0; k < Nmodes; k++) { for (Long i = 0; i < Nnds; i++) { for (Long j = 0; j < Ntrg; j++) { for (Long k0 = 0; k0 < Kernel::SrcDim(); k0++) { for (Long k1 = 0; k1 < Kernel::TrgDim(); k1++) { Mintegrands[i][(((k*2+0)*Kernel::SrcDim()+k0) *Ntrg+j)*Kernel::TrgDim()+k1] = Mker[i*Kernel::SrcDim()+k0][j*Kernel::TrgDim()+k1] * exp_iktheta[i].real; Mintegrands[i][(((k*2+1)*Kernel::SrcDim()+k0) *Ntrg+j)*Kernel::TrgDim()+k1] = Mker[i*Kernel::SrcDim()+k0][j*Kernel::TrgDim()+k1] * exp_iktheta[i].imag; } } } } for (Long i = 0; i < Nnds; i++) { exp_iktheta[i] *= exp_itheta[i]; } } }; const ValueType dist = 4*const_pi()*pow(0.5,idx); // distance of target points from the source loop (which is a unit circle) discretize_basis_functions(Mintegrands, nds, wts, dist, 35); // TODO: adaptively select Legendre order Vector eps_vec; for (Long k = 0; k < max_digits; k++) eps_vec.PushBack(pow(0.1,k)); std::cout<<"Level = "<::Build(quad_nds, quad_wts, Mintegrands, nds, wts, true, eps_vec); } for (Integer digits = 0; digits < max_digits; digits++) { Long N = quad_nds[digits].Dim(); data[idx*max_digits+digits].ReInit(3*N); for (Long i = 0; i < N; i++) { data[idx*max_digits+digits][i*3+0] = cos(2*const_pi()*quad_nds[digits][i]); data[idx*max_digits+digits][i*3+1] = sin(2*const_pi()*quad_nds[digits][i]); data[idx*max_digits+digits][i*3+2] = (2*const_pi()*quad_wts[digits][i]); } } } WriteFile(data, fname); } for (Integer idx = 0; idx < crossover_adap_depth; idx++) { // Use trapezoidal rule up to crossover_adap_depth for (Integer digits = 0; digits < max_digits; digits++) { Long N = std::max(digits*pow(2,idx), Nmodes); // TODO: determine optimal order by testing error or adaptively data[idx*max_digits+digits].ReInit(3*N); for (Long i = 0; i < N; i++) { ValueType quad_nds = i/(ValueType)N; ValueType quad_wts = 1/(ValueType)N; data[idx*max_digits+digits][i*3+0] = cos(2*const_pi()*quad_nds); data[idx*max_digits+digits][i*3+1] = sin(2*const_pi()*quad_nds); data[idx*max_digits+digits][i*3+2] = (2*const_pi()*quad_wts); } } } Vector> quad_rule_lst; quad_rule_lst.ReInit(data.Dim()*4); for (Integer i = 0; i < data.Dim(); i++) { const Long Nnds_ = data[i].Dim()/3; const Integer Nnds = ((Nnds_+VecLen-1)/VecLen)*VecLen; quad_rule_lst[i*4+0].ReInit(Nnds); quad_rule_lst[i*4+0].SetZero(); quad_rule_lst[i*4+1].ReInit(Nnds); quad_rule_lst[i*4+1].SetZero(); quad_rule_lst[i*4+2].ReInit(Nnds); quad_rule_lst[i*4+2].SetZero(); quad_rule_lst[i*4+3].ReInit(Nmodes*2*Nnds); quad_rule_lst[i*4+3].SetZero(); for (Long j = 0; j < Nnds_; j++) { Complex exp_itheta(data[i][j*3+0], data[i][j*3+1]); quad_rule_lst[i*4+0][j] = (RealType)(exp_itheta.real-1); quad_rule_lst[i*4+1][j] = (RealType)(exp_itheta.imag); quad_rule_lst[i*4+2][j] = (RealType)data[i][j*3+2]; Complex exp_iktheta(1,0); for (Long k = 0; k < Nmodes; k++) { quad_rule_lst[i*4+3][(k*2+0)*Nnds+j] = (RealType)exp_iktheta.real; quad_rule_lst[i*4+3][(k*2+1)*Nnds+j] = (RealType)exp_iktheta.imag; exp_iktheta *= exp_itheta; } } } return quad_rule_lst; } template static bool ToroidalSpecialQuadRule(Matrix& Mfourier, Vector& nds_cos_theta, Vector& nds_sin_theta, Vector& wts, const Integer Nmodes, RealType r_R0, Integer digits) { static constexpr Integer max_adap_depth = 30; // build quadrature rules for points up to 2*pi*0.5^max_adap_depth from source loop static constexpr Integer crossover_adap_depth = 2; static constexpr Integer max_digits = 20; if (digits >= max_digits) digits = max_digits-1; //SCTL_ASSERT(digits(); s*=2) adap_depth++; if (adap_depth >= max_adap_depth) { SCTL_WARN("Toroidal quadrature evaluation is outside of the range of precomputed quadratures; accuracy may be sverely degraded."); adap_depth = max_adap_depth-1; } SCTL_ASSERT(Nmodes < 100); static Vector>> all_fourier_basis(100); static Vector>> all_quad_nds_cos_theta(100); static Vector>> all_quad_nds_sin_theta(100); static Vector>> all_quad_wts(100); #pragma omp critical(SCTL_ToroidalSpecialQuadRule) if (all_quad_wts[Nmodes].Dim() == 0) { auto quad_rules = BuildToroidalSpecialQuadRules(Nmodes, VecLen); const Long Nrules = quad_rules.Dim()/4; Vector> fourier_basis(Nrules); Vector> quad_nds_cos_theta(Nrules); Vector> quad_nds_sin_theta(Nrules); Vector> quad_wts(Nrules); for (Long i = 0; i < Nrules; i++) { // Set quad_nds_cos_theta, quad_nds_sin_theta, quad_wts, fourier_basis const Integer Nnds = quad_rules[i*4+0].Dim(); SCTL_ASSERT(Nnds%VecLen == 0); quad_wts[i] = quad_rules[i*4+2]; quad_nds_cos_theta[i] = quad_rules[i*4+0]; quad_nds_sin_theta[i] = quad_rules[i*4+1]; fourier_basis[i] = Matrix((Nmodes-ModalUpsample)*2, Nnds, quad_rules[i*4+3].begin()).Transpose(); } all_fourier_basis[Nmodes].Swap(fourier_basis); all_quad_nds_cos_theta[Nmodes].Swap(quad_nds_cos_theta); all_quad_nds_sin_theta[Nmodes].Swap(quad_nds_sin_theta); all_quad_wts[Nmodes].Swap(quad_wts); } { // Set Mfourier, nds_cos_theta, nds_sin_theta, wts const Long quad_idx = adap_depth*max_digits+digits; const auto& Mfourier0 = all_fourier_basis[Nmodes][quad_idx]; const auto& nds0_cos_theta = all_quad_nds_cos_theta[Nmodes][quad_idx]; const auto& nds0_sin_theta = all_quad_nds_sin_theta[Nmodes][quad_idx]; const auto& wts0 = all_quad_wts[Nmodes][quad_idx]; const Long N = wts0.Dim(); Mfourier.ReInit(Mfourier0.Dim(0), Mfourier0.Dim(1), (Iterator)Mfourier0.begin(), false); nds_cos_theta.ReInit(N, (Iterator)nds0_cos_theta.begin(), false); nds_sin_theta.ReInit(N, (Iterator)nds0_sin_theta.begin(), false); wts.ReInit(N, (Iterator)wts0.begin(), false); } // return whether an adaptive quadrature rule has been used return (adap_depth >= crossover_adap_depth); } template static void toroidal_greens_fn_batched(Matrix& M, const Tensor& x_trg, const Tensor& e_trg, const RealType r_trg, const Tensor& n_trg, const Matrix& x_src, const Matrix& dx_src, const Matrix& d2x_src, const Matrix& r_src, const Matrix& dr_src, const Matrix& e1_src, const Kernel& ker, const Integer FourierModes) { static constexpr Integer VecLen = DefaultVecLen(); using VecType = Vec; constexpr Integer COORD_DIM = 3; using Vec3 = Tensor; static constexpr Integer KDIM0 = Kernel::SrcDim(); static constexpr Integer KDIM1 = Kernel::TrgDim()/(trg_dot_prod?COORD_DIM:1); static constexpr Integer Nbuff = 10000; // TODO const Long BatchSize = M.Dim(0); SCTL_ASSERT(M.Dim(1) == KDIM0*KDIM1*FourierModes*2); SCTL_ASSERT( x_src.Dim(1) == BatchSize && x_src.Dim(0) == COORD_DIM); SCTL_ASSERT( dx_src.Dim(1) == BatchSize && dx_src.Dim(0) == COORD_DIM); SCTL_ASSERT(d2x_src.Dim(1) == BatchSize && d2x_src.Dim(0) == COORD_DIM); SCTL_ASSERT( r_src.Dim(1) == BatchSize && r_src.Dim(0) == 1); SCTL_ASSERT( dr_src.Dim(1) == BatchSize && dr_src.Dim(0) == 1); SCTL_ASSERT( e1_src.Dim(1) == BatchSize && e1_src.Dim(0) == COORD_DIM); const VecType n_trg_[COORD_DIM] = {n_trg(0,0),n_trg(1,0),n_trg(2,0)}; const Vec3 y_trg = x_trg + e_trg*r_trg; for (Long ii = 0; ii < BatchSize; ii++) { RealType r = r_src[0][ii], dr = dr_src[0][ii]; Vec3 x, dx, d2x, e1; for (Integer k = 0; k < COORD_DIM; k++) { // Set x, dx, d2x, e1 x (k,0) = x_src[k][ii]; dx (k,0) = dx_src[k][ii]; d2x(k,0) = d2x_src[k][ii]; e1 (k,0) = e1_src[k][ii]; } auto toroidal_greens_fn = [&ker,&n_trg_](Matrix& M, const Vec3& Xt, const Vec3& x, const Vec3& dx, const Vec3& d2x, const Vec3& e1_, const RealType r, const RealType dr, const Integer FourierModes) { SCTL_ASSERT(M.Dim(0) == KDIM0*KDIM1); SCTL_ASSERT(M.Dim(1) == FourierModes*2); const auto Xt_X0 = Xt-x; RealType dist; Vec3 e1, e2, e3; { // Set dist, e1, e2, e3 e3 = dx*(-1/sqrt(dot_prod(dx,dx))); e1 = Xt_X0 - e3 * dot_prod(Xt_X0,e3); if (dot_prod(e1,e1) == 0) e1 = e1_; e1 = e1 * (1/sqrt(dot_prod(e1,e1))); e2 = cross_prod(e3, e1); e2 = e2 * (1/sqrt(dot_prod(e2,e2))); RealType dist0 = dot_prod(Xt_X0, e1) - r; RealType dist1 = dot_prod(Xt_X0, e3); dist = sqrt(dist0*dist0 + dist1*dist1); } const auto exp_theta = Complex(dot_prod(e1,e1_), -dot_prod(e2,e1_)); Matrix Mexp_iktheta; Vector nds_cos_theta, nds_sin_theta, wts; ToroidalSpecialQuadRule(Mexp_iktheta, nds_cos_theta, nds_sin_theta, wts, FourierModes+ModalUpsample, dist/r, digits); const Long Nnds = wts.Dim(); SCTL_ASSERT(Nnds < Nbuff); { // Set M const RealType d2x_dot_e1 = e1(0,0)*d2x(0,0) + e1(1,0)*d2x(1,0) + e1(2,0)*d2x(2,0); const RealType d2x_dot_e2 = e2(0,0)*d2x(0,0) + e2(1,0)*d2x(1,0) + e2(2,0)*d2x(2,0); const RealType norm_dx_ = sqrt(dot_prod(dx,dx)); const RealType inv_norm_dx = 1/norm_dx_; const VecType norm_dx(norm_dx_); const VecType vec_dx[3] = {dx(0,0), dx(1,0), dx(2,0)}; const VecType vec_dy0[3] = {Xt(0,0)-x(0,0), Xt(1,0)-x(1,0), Xt(2,0)-x(2,0)}; alignas(sizeof(VecType)) StaticArray mem_buff; Matrix Mker_da(KDIM0*KDIM1, Nnds, mem_buff, false); for (Integer j = 0; j < Nnds; j+=VecLen) { // Set Mker_da VecType dy[3], n[3], da; { // Set dy, n, da VecType cost = VecType::LoadAligned(&nds_cos_theta[j])+(RealType)1; VecType sint = VecType::LoadAligned(&nds_sin_theta[j]); dy[0] = vec_dy0[0] - cost*(r*e1(0,0)) - sint*(r*e2(0,0)); dy[1] = vec_dy0[1] - cost*(r*e1(1,0)) - sint*(r*e2(1,0)); dy[2] = vec_dy0[2] - cost*(r*e1(2,0)) - sint*(r*e2(2,0)); VecType norm_dy = norm_dx - (cost*d2x_dot_e1 + sint*d2x_dot_e2) * (r*inv_norm_dx); n[0] = cost*e1(0,0)*norm_dy + sint*e2(0,0)*norm_dy - vec_dx[0]*(dr*inv_norm_dx); n[1] = cost*e1(1,0)*norm_dy + sint*e2(1,0)*norm_dy - vec_dx[1]*(dr*inv_norm_dx); n[2] = cost*e1(2,0)*norm_dy + sint*e2(2,0)*norm_dy - vec_dx[2]*(dr*inv_norm_dx); VecType da2 = n[0]*n[0] + n[1]*n[1] + n[2]*n[2]; VecType inv_da = approx_rsqrt(da2); da = da2 * inv_da * r; n[0] = n[0] * inv_da; n[1] = n[1] * inv_da; n[2] = n[2] * inv_da; //da = norm_dx*r - (n[0]*vec_d2x[0]+n[1]*vec_d2x[1]+n[2]*vec_d2x[2])*(r*r*inv_norm_dx); // dr == 0 } VecType Mker[KDIM0][Kernel::TrgDim()]; ker.template uKerMatrix(Mker, dy, n, ker.GetCtxPtr()); VecType da_wts = VecType::LoadAligned(&wts[j]) * da; for (Integer k0 = 0; k0 < KDIM0; k0++) { for (Integer k1 = 0; k1 < KDIM1; k1++) { if (trg_dot_prod) { VecType Mker_dot_n = FMA(Mker[k0][k1*COORD_DIM+0],n_trg_[0], FMA(Mker[k0][k1*COORD_DIM+1],n_trg_[1], Mker[k0][k1*COORD_DIM+2]*n_trg_[2])); (Mker_dot_n*da_wts).StoreAligned(&Mker_da[k0*KDIM1+k1][j]); } else { (Mker[k0][k1]*da_wts).StoreAligned(&Mker_da[k0*KDIM1+k1][j]); } } } } Matrix::GEMM(M, Mker_da, Mexp_iktheta); Complex exp_iktheta(1,0); for (Integer j = 0; j < FourierModes; j++) { for (Integer k = 0; k < KDIM0*KDIM1; k++) { Complex Mjk(M[k][j*2+0],M[k][j*2+1]); Mjk *= exp_iktheta; M[k][j*2+0] = Mjk.real; M[k][j*2+1] = Mjk.imag; } exp_iktheta *= exp_theta; } } }; Matrix M_toroidal_greens_fn(KDIM0*KDIM1, FourierModes*2, M[ii], false); toroidal_greens_fn(M_toroidal_greens_fn, y_trg, x, dx, d2x, e1, r, dr, FourierModes); } return; if (adap==0) { // Print toroidal quadrature error using ValueType = RealType; auto copy_matrix = [](Matrix& M_, const Matrix& M) { M_.ReInit(M.Dim(0), M.Dim(1)); for (Long i = 0; i < M.Dim(0)*M.Dim(1); i++) { M_[0][i] = (ValueType)M[0][i]; } }; Matrix M_; Tensor x_trg_; Tensor e_trg_; Tensor n_trg_; for (Long i = 0; i < 3; i++) { x_trg_(i,0) = (ValueType)x_trg(i,0); e_trg_(i,0) = (ValueType)e_trg(i,0); n_trg_(i,0) = (ValueType)n_trg(i,0); } Matrix x_src_ ; Matrix dx_src_ ; Matrix d2x_src_; Matrix r_src_ ; Matrix dr_src_ ; Matrix e1_src_ ; copy_matrix(M_ , M); copy_matrix(x_src_ , x_src); copy_matrix(dx_src_ , dx_src); copy_matrix(d2x_src_, d2x_src); copy_matrix(r_src_ , r_src); copy_matrix(dr_src_ , dr_src); copy_matrix(e1_src_ , e1_src); toroidal_greens_fn_batched<(digits<32?digits+1:digits), ModalUpsample, trg_dot_prod, ValueType, Kernel, 2>(M_, x_trg_, e_trg_/sqrt(dot_prod(e_trg_,e_trg_)), (ValueType)r_trg, n_trg_, x_src_, dx_src_, d2x_src_, r_src_, dr_src_, e1_src_, ker, FourierModes); static RealType max_rel_err = 0; RealType max_err = 0, max_val = 0; for (Long i = 0; i < BatchSize*KDIM0*KDIM1; i++) { RealType err = fabs(M[0][i*FourierModes*2] - (RealType)M_[0][i*FourierModes*2]); RealType val = fabs(M[0][i*FourierModes*2]); if (err > max_err) max_err = err; if (val > max_val) max_val = val; } if (max_val>0 && max_err/max_val > max_rel_err) { max_rel_err = max_err/max_val; std::cout< static void DyadicQuad_s(Vector& nds, Vector& wts, const Integer LegQuadOrder, const Integer LogQuadOrder, const ValueType s, const Integer levels, bool sort) { const auto& log_quad_nds = LogSingularityQuadRule(LogQuadOrder).first; const auto& log_quad_wts = LogSingularityQuadRule(LogQuadOrder).second; const auto& leg_nds = LegendreQuadRule(LegQuadOrder).first; const auto& leg_wts = LegendreQuadRule(LegQuadOrder).second; ValueType len0 = std::min(pow(0.5,levels), std::min(s, (1-s))); ValueType len1 = std::min(s, 1-s); ValueType len2 = std::max(s, 1-s); for (Long i = 0; i < log_quad_nds.Dim(); i++) { nds.PushBack( len0*log_quad_nds[i]); nds.PushBack(-len0*log_quad_nds[i]); wts.PushBack(len0*log_quad_wts[i]); wts.PushBack(len0*log_quad_wts[i]); } for (ValueType start = len0; start < len1; start*=2) { ValueType step_ = std::min(start, len1-start); for (Long i = 0; i < leg_nds.Dim(); i++) { nds.PushBack( start + step_*leg_nds[i]); nds.PushBack(-start - step_*leg_nds[i]); wts.PushBack(step_*leg_wts[i]); wts.PushBack(step_*leg_wts[i]); } } for (ValueType start = len1; start < len2; start*=2) { ValueType step_ = std::min(start, len2-start); for (Long i = 0; i < leg_nds.Dim(); i++) { if (s + start + step_*leg_nds[i] <= 1.0) { nds.PushBack( start + step_*leg_nds[i]); wts.PushBack(step_*leg_wts[i]); } if (s - start - step_*leg_nds[i] >= 0.0) { nds.PushBack(-start - step_*leg_nds[i]); wts.PushBack(step_*leg_wts[i]); } } } if (!sort) return; Vector nds_(nds.Dim()); Vector wts_(wts.Dim()); Vector> sort_pair; for (Long i = 0; i < nds.Dim(); i++) { sort_pair.PushBack(std::pair{nds[i], i}); } std::sort(sort_pair.begin(), sort_pair.end()); for (Long i = 0; i < nds.Dim(); i++) { const Long idx = sort_pair[i].second; nds_[i] = nds[idx]; wts_[i] = wts[idx]; } nds = nds_; wts = wts_; }; template static void SpecialQuadBuildBasisMatrix(Matrix& M, Vector& quad_nds, Vector& quad_wts, const Integer Ncheb, const Integer FourierModes, const ValueType s_trg, const Integer max_digits, const ValueType elem_length, const Integer RefLevels, const Kernel& ker) { // TODO: cleanup constexpr Integer COORD_DIM = 3; using Vec3 = Tensor; const Long LegQuadOrder = 2*max_digits; constexpr Long LogQuadOrder = 16; // this has non-negative weights constexpr Integer KDIM0 = Kernel::SrcDim(); constexpr Integer KDIM1 = Kernel::TrgDim() / (trg_dot_prod ? COORD_DIM : 1); // Adaptive quadrature rule DyadicQuad_s(quad_nds, quad_wts, LegQuadOrder, LogQuadOrder, s_trg, RefLevels, true); quad_nds += s_trg; // TODO: remove this Matrix Minterp_quad_nds; { // Set Minterp_quad_nds Minterp_quad_nds.ReInit(Ncheb, quad_nds.Dim()); Vector Vinterp_quad_nds(Ncheb*quad_nds.Dim(), Minterp_quad_nds.begin(), false); LagrangeInterp::Interpolate(Vinterp_quad_nds, SlenderElemList::CenterlineNodes(Ncheb), quad_nds); } Vec3 x_trg, e_trg, n_trg; x_trg(0,0) = 0; x_trg(1,0) = 0; x_trg(2,0) = 0; e_trg(0,0) = 1; e_trg(1,0) = 0; e_trg(2,0) = 0; n_trg(0,0) = 1; n_trg(1,0) = 0; n_trg(2,0) = 0; Vector radius( Ncheb); Vector coord (COORD_DIM*Ncheb); Vector dr ( Ncheb); Vector dx (COORD_DIM*Ncheb); Vector d2x (COORD_DIM*Ncheb); Vector e1 (COORD_DIM*Ncheb); for (Long i = 0; i < Ncheb; i++) { radius[i] = 1; dr[i] = 0; coord[0*Ncheb+i] = 0; coord[1*Ncheb+i] = 0; coord[2*Ncheb+i] = SlenderElemList::CenterlineNodes(Ncheb)[i] * elem_length - s_trg * elem_length; dx[0*Ncheb+i] = 0; dx[1*Ncheb+i] = 0; dx[2*Ncheb+i] = elem_length; d2x[0*Ncheb+i] = 0; d2x[1*Ncheb+i] = 0; d2x[2*Ncheb+i] = 0; e1[0*Ncheb+i] = 1; e1[1*Ncheb+i] = 0; e1[2*Ncheb+i] = 0; } Matrix r_src, dr_src, x_src, dx_src, d2x_src, e1_src; r_src .ReInit( 1,quad_nds.Dim()); dr_src .ReInit( 1,quad_nds.Dim()); x_src .ReInit(COORD_DIM,quad_nds.Dim()); dx_src .ReInit(COORD_DIM,quad_nds.Dim()); d2x_src.ReInit(COORD_DIM,quad_nds.Dim()); e1_src .ReInit(COORD_DIM,quad_nds.Dim()); Matrix::GEMM( x_src, Matrix(COORD_DIM,Ncheb, coord.begin(),false), Minterp_quad_nds); Matrix::GEMM( dx_src, Matrix(COORD_DIM,Ncheb, dx.begin(),false), Minterp_quad_nds); Matrix::GEMM(d2x_src, Matrix(COORD_DIM,Ncheb, d2x.begin(),false), Minterp_quad_nds); Matrix::GEMM( r_src, Matrix( 1,Ncheb,radius.begin(),false), Minterp_quad_nds); Matrix::GEMM( dr_src, Matrix( 1,Ncheb, dr.begin(),false), Minterp_quad_nds); Matrix::GEMM( e1_src, Matrix(COORD_DIM,Ncheb, e1.begin(),false), Minterp_quad_nds); for (Long j = 0; j < quad_nds.Dim(); j++) { // Set e2_src Vec3 e1, dx; for (Integer k = 0; k < COORD_DIM; k++) { e1(k,0) = e1_src[k][j]; dx(k,0) = dx_src[k][j]; } e1 = e1 - dx * dot_prod(e1, dx) * (1/dot_prod(dx,dx)); e1 = e1 * (1/sqrt(dot_prod(e1,e1))); for (Integer k = 0; k < COORD_DIM; k++) { e1_src[k][j] = e1(k,0); } } Matrix M_tor(quad_nds.Dim(), KDIM0*KDIM1*FourierModes*2); constexpr Integer TorGreensFnDigits = (Integer)(TypeTraits::SigBits*0.3010299957); toroidal_greens_fn_batched(M_tor, x_trg, e_trg, (ValueType)1, n_trg, x_src, dx_src, d2x_src, r_src, dr_src, e1_src, ker, FourierModes); M.ReInit(quad_nds.Dim(), Ncheb*FourierModes*2*KDIM0*KDIM1); for (Long i = 0; i < quad_nds.Dim(); i++) { for (Long j = 0; j < Ncheb; j++) { for (Long k = 0; k < KDIM0*KDIM1*FourierModes*2; k++) { M[i][j*KDIM0*KDIM1*FourierModes*2+k] = Minterp_quad_nds[j][i] * M_tor[i][k]; } } } } template static Vector> BuildSpecialQuadRules(const Integer Ncheb, const Integer FourierModes, const Integer trg_node_idx, const ValueType elem_length) { constexpr Integer Nlen = 20; // number of length samples in [elem_length/sqrt(2), elem_length*sqrt(2)] constexpr Integer max_digits = 19; const ValueType s_trg = SlenderElemList::CenterlineNodes(Ncheb)[trg_node_idx]; const Integer adap_depth = (Integer)(log(elem_length)/log(2)+4); const ValueType eps_buffer = std::min(3e-2/elem_length, 3e-4); // distance of closest node points to s_trg const ValueType eps = 8*machine_eps(); Kernel ker; Vector