Dhairya Malhotra há 5 anos atrás
pai
commit
4cd199f785
1 ficheiros alterados com 214 adições e 862 exclusões
  1. 214 862
      include/sctl/boundary_quadrature.hpp

+ 214 - 862
include/sctl/boundary_quadrature.hpp

@@ -1,6 +1,7 @@
 #ifndef _SCTL_BOUNDARY_QUADRATURE_HPP_
 #define _SCTL_BOUNDARY_QUADRATURE_HPP_
 
+#include <biest.hpp>
 #include <mutex>
 #include <atomic>
 #include <tuple>
@@ -2079,6 +2080,31 @@ template <class Real, Integer ORDER=10> class Stellarator {
       }
     };
 
+    struct BiotSavartGrad3D {
+      template <class ValueType> static constexpr ValueType ScaleFactor() {
+        return 1 / (4 * const_pi<ValueType>());
+      }
+      template <class ValueType> static void Eval(ValueType (&u)[3][9], const ValueType (&r)[3], const ValueType (&n)[3], void* ctx_ptr) {
+        ValueType r2 = r[0]*r[0]+r[1]*r[1]+r[2]*r[2];
+        ValueType rinv = (r2>1e-16 ? 1/sqrt<ValueType>(r2) : 0);
+        ValueType rinv2 = rinv * rinv;
+        ValueType rinv3 = rinv2 * rinv;
+        ValueType rinv5 = rinv2 * rinv3;
+
+        u[0][0] =                                      0; u[1][0] =              - 3 * r[2] * r[0] * rinv5; u[2][0] =                3 * r[1] * r[0] * rinv5;
+        u[0][1] =                                      0; u[1][0] =              - 3 * r[2] * r[1] * rinv5; u[2][0] = -(1) * rinv3 + 3 * r[1] * r[1] * rinv5;
+        u[0][2] =                                      0; u[1][0] =  (1) * rinv3 - 3 * r[2] * r[2] * rinv5; u[2][0] =                3 * r[1] * r[2] * rinv5;
+
+        u[0][3] =                3 * r[2] * r[0] * rinv5; u[1][1] =                                      0; u[2][1] =  (1) * rinv3 - 3 * r[0] * r[0] * rinv5;
+        u[0][4] =                3 * r[2] * r[1] * rinv5; u[1][1] =                                      0; u[2][1] =              - 3 * r[0] * r[1] * rinv5;
+        u[0][5] = -(1) * rinv3 + 3 * r[2] * r[2] * rinv5; u[1][1] =                                      0; u[2][1] =              - 3 * r[0] * r[2] * rinv5;
+
+        u[0][6] =              - 3 * r[1] * r[0] * rinv5; u[1][2] = -(1) * rinv3 + 3 * r[0] * r[0] * rinv5; u[2][2] =                                      0;
+        u[0][7] =  (1) * rinv3 - 3 * r[1] * r[1] * rinv5; u[1][2] =                3 * r[0] * r[1] * rinv5; u[2][2] =                                      0;
+        u[0][8] =              - 3 * r[1] * r[2] * rinv5; u[1][2] =                3 * r[0] * r[2] * rinv5; u[2][2] =                                      0;
+      }
+    };
+
     struct Laplace3D_dUxD {
       template <class ValueType> static constexpr ValueType ScaleFactor() {
         return 1 / (4 * const_pi<ValueType>());
@@ -2147,6 +2173,12 @@ template <class Real, Integer ORDER=10> class Stellarator {
       }
     };
 
+    static Real max_norm(const sctl::Vector<Real>& x) {
+      Real err = 0;
+      for (const auto& a : x) err = std::max(err, sctl::fabs<Real>(a));
+      return err;
+    }
+
   public:
     Stellarator(const Vector<Long>& NtNp = Vector<Long>()) {
       NtNp_ = NtNp;
@@ -2182,12 +2214,28 @@ template <class Real, Integer ORDER=10> class Stellarator {
       return elements;
     }
 
+    Long Nsurf() const {
+      return elem_dsp.Dim();
+    }
+    Long ElemDsp(Long s) const {
+      return elem_dsp[s];
+    }
+    Long NTor(Long s) const {
+      return NtNp_[s*2+0];
+    }
+    Long NPol(Long s) const {
+      return NtNp_[s*2+1];
+    }
+
     static void test() {
       constexpr Integer order_singular = 15;
       constexpr Integer order_direct = 35;
       Comm comm = Comm::World();
       Profile::Enable(true);
 
+      Real gmres_tol = 1e-8;
+      Long max_iter = 100;
+
       Stellarator<Real,ORDER> S;
       { // Init S
         Vector<Long> NtNp;
@@ -2196,6 +2244,151 @@ template <class Real, Integer ORDER=10> class Stellarator {
         S = Stellarator<Real,ORDER>(NtNp);
       }
 
+      auto cheb2grid = [] (const Vector<ElemBasis>& X, Long Mt, Long Mp, Long Nt, Long Np) {
+        const Long dof = X.Dim() / (Mt * Mp);
+        SCTL_ASSERT(X.Dim() == Mt * Mp *dof);
+
+        Vector<Real> Xf(dof*Nt*Np); Xf = 0;
+        const Long Nnodes = ElemBasis::Size();
+        const Matrix<Real>& Mnodes = Basis<Real,1,ORDER>::Nodes();
+        for (Long t = 0; t < Nt; t++) {
+          for (Long p = 0; p < Np; p++) {
+            Real theta = t / (Real)Nt;
+            Real phi   = p / (Real)Np;
+            Long i = (Long)(theta * Mt);
+            Long j = (Long)(phi   * Mp);
+            Real x = theta * Mt - i;
+            Real y = phi   * Mp - j;
+            Long elem_idx = i * Mp + j;
+
+            Vector<Real> Interp0(ORDER);
+            Vector<Real> Interp1(ORDER);
+            { // Set Interp0, Interp1
+              auto node = [&Mnodes] (Long i) {
+                return Mnodes[0][i];
+              };
+              for (Long i = 0; i < ORDER; i++) {
+                Real wt_x = 1, wt_y = 1;
+                for (Long j = 0; j < ORDER; j++) {
+                  if (j != i) {
+                    wt_x *= (x - node(j)) / (node(i) - node(j));
+                    wt_y *= (y - node(j)) / (node(i) - node(j));
+                  }
+                  Interp0[i] = wt_x;
+                  Interp1[i] = wt_y;
+                }
+              }
+            }
+
+            for (Long ii = 0; ii < ORDER; ii++) {
+              for (Long jj = 0; jj < ORDER; jj++) {
+                Long node_idx = jj * ORDER + ii;
+                for (Long k = 0; k < dof; k++) {
+                  Xf[(k*Nt+t)*Np+p] += X[elem_idx*dof+k][node_idx] * Interp0[ii] * Interp1[jj];
+                }
+              }
+            }
+          }
+        }
+        return Xf;
+      };
+      auto grid2cheb = [] (const Vector<Real>& Xf, Long Nt, Long Np, Long Mt, Long Mp) {
+        Long dof = Xf.Dim() / (Nt*Np);
+        SCTL_ASSERT(Xf.Dim() == dof*Nt*Np);
+        Vector<ElemBasis> X(Mt*Mp*dof);
+        constexpr Integer INTERP_ORDER = 12;
+
+        for (Long tt = 0; tt < Mt; tt++) {
+          for (Long pp = 0; pp < Mp; pp++) {
+            for (Long t = 0; t < ORDER; t++) {
+              for (Long p = 0; p < ORDER; p++) {
+                Matrix<Real> Mnodes = Basis<Real,1,ORDER>::Nodes();
+                Real theta = (tt + Mnodes[0][t]) / Mt;
+                Real phi   = (pp + Mnodes[0][p]) / Mp;
+                Long i = (Long)(theta * Nt);
+                Long j = (Long)(phi   * Np);
+                Real x = theta * Nt - i;
+                Real y = phi   * Np - j;
+
+                Vector<Real> Interp0(INTERP_ORDER);
+                Vector<Real> Interp1(INTERP_ORDER);
+                { // Set Interp0, Interp1
+                  auto node = [] (Long i) {
+                    return (Real)i - (INTERP_ORDER-1)/2;
+                  };
+                  for (Long i = 0; i < INTERP_ORDER; i++) {
+                    Real wt_x = 1, wt_y = 1;
+                    for (Long j = 0; j < INTERP_ORDER; j++) {
+                      if (j != i) {
+                        wt_x *= (x - node(j)) / (node(i) - node(j));
+                        wt_y *= (y - node(j)) / (node(i) - node(j));
+                      }
+                      Interp0[i] = wt_x;
+                      Interp1[i] = wt_y;
+                    }
+                  }
+                }
+
+                for (Long k = 0; k < dof; k++) {
+                  Real X0 = 0;
+                  for (Long ii = 0; ii < INTERP_ORDER; ii++) {
+                    for (Long jj = 0; jj < INTERP_ORDER; jj++) {
+                      Long idx_i = (i + ii-(INTERP_ORDER-1)/2 + Nt) % Nt;
+                      Long idx_j = (j + jj-(INTERP_ORDER-1)/2 + Np) % Np;
+                      X0 += Interp0[ii] * Interp1[jj] * Xf[(k*Nt+idx_i)*Np+idx_j];
+                    }
+                  }
+                  Long elem_idx = tt * Mp + pp;
+                  Long node_idx = p * ORDER + t;
+                  X[elem_idx*dof+k][node_idx] = X0;
+                }
+              }
+            }
+          }
+        }
+        return X;
+      };
+
+      Long Nelem = S.GetElemList().NElem();
+      Vector<ElemBasis> Jt(Nelem*COORD_DIM), Jp(Nelem*COORD_DIM);
+      for (Long k = 0; k < S.Nsurf(); k++) {
+        Long Nt = S.NTor(k)*ORDER, Np = S.NPol(k)*ORDER;
+        const auto& X_ = S.GetElemList().ElemVector();
+        Vector<ElemBasis> X(S.NTor(k)*S.NPol(k)*COORD_DIM, (Iterator<ElemBasis>)X_.begin()+S.ElemDsp(k)*COORD_DIM, false);
+
+        biest::Surface<Real> SS(Nt, Np);
+        biest::SurfaceOp<Real> surf_op(comm, Nt, Np);
+        SS.Coord() = cheb2grid(X, S.NTor(k), S.NPol(k), Nt, Np);
+
+        Vector<Real> dX, d2X;
+        surf_op.Grad2D(dX, SS.Coord());
+        surf_op.Grad2D(d2X, dX);
+
+        sctl::Vector<Real> Jt_(COORD_DIM * Nt * Np);
+        sctl::Vector<Real> Jp_(COORD_DIM * Nt * Np);
+        { // Set Jt_, Jp_
+          Vector<Real> DivV, InvLapDivV, GradInvLapDivV;
+          for (sctl::Long i = 0; i < Nt*Np; i++) { // Set V
+            for (sctl::Long k =0; k < COORD_DIM; k++) {
+              Jt_[k * Nt*Np + i] = dX[(k*2+0) * Nt*Np + i];
+              Jp_[k * Nt*Np + i] = dX[(k*2+1) * Nt*Np + i];
+            }
+          }
+
+          surf_op.SurfDiv(DivV, dX, Jt_);
+          surf_op.GradInvSurfLap(GradInvLapDivV, dX, d2X, DivV, gmres_tol * max_norm(Jt_) / max_norm(DivV), max_iter, 1.5);
+          Jt_ = Jt_ - GradInvLapDivV;
+
+          surf_op.SurfDiv(DivV, dX, Jp_);
+          surf_op.GradInvSurfLap(GradInvLapDivV, dX, d2X, DivV, gmres_tol * max_norm(Jp_) / max_norm(DivV), max_iter, 1.5);
+          Jp_ = Jp_ - GradInvLapDivV;
+        }
+        Vector<ElemBasis> Jt__(S.NTor(k)*S.NPol(k)*COORD_DIM, (Iterator<ElemBasis>)Jt.begin()+S.ElemDsp(k)*COORD_DIM, false);
+        Vector<ElemBasis> Jp__(S.NTor(k)*S.NPol(k)*COORD_DIM, (Iterator<ElemBasis>)Jp.begin()+S.ElemDsp(k)*COORD_DIM, false);
+        Jt__ = grid2cheb(Jt_, Nt, Np, S.NTor(k), S.NPol(k));
+        Jp__ = grid2cheb(Jp_, Nt, Np, S.NTor(k), S.NPol(k));
+      }
+
       Vector<ElemBasis> normal, area_elem;
       auto compute_dot_prod = [](const Vector<ElemBasis>& A, const Vector<ElemBasis>& B) {
         const Long Nelem = A.Dim() / COORD_DIM;
@@ -2279,7 +2472,13 @@ template <class Real, Integer ORDER=10> class Stellarator {
       S.quadrature_FxdU.template Setup<ElemBasis, ElemBasis>(S.GetElemList(), S.Laplace_FxdU, order_singular, order_direct, -1.0, comm);
       S.quadrature_dUxF.template Setup<ElemBasis, ElemBasis>(S.GetElemList(), S.Laplace_dUxF, order_singular, order_direct, -1.0, comm);
 
-      auto compute_B0 = [&S](const Real alpha) { // alpha/|r| \hat{\theta}
+      GenericKernel<BiotSavart3D> BiotSavart;
+      GenericKernel<BiotSavartGrad3D> BiotSavartGrad;
+      Quadrature<Real> quadrature_BS, quadrature_dBS;
+      quadrature_BS .template Setup<ElemBasis, ElemBasis>(S.GetElemList(), BiotSavart    , order_singular, order_direct, -1.0, comm, -0.01 * pow<-2,Real>(ORDER));
+      quadrature_dBS.template Setup<ElemBasis, ElemBasis>(S.GetElemList(), BiotSavartGrad, order_singular, order_direct, -1.0, comm, -0.01 * pow<-2,Real>(ORDER));
+
+      auto compute_B0_deprecated = [&S](const Real alpha) { // alpha/|r| \hat{\theta}
         const Vector<ElemBasis> X = S.GetElemList().ElemVector();
         const Long Nelem = S.GetElemList().NElem();
         const Long Nnodes = ElemBasis::Size();
@@ -2310,7 +2509,7 @@ template <class Real, Integer ORDER=10> class Stellarator {
         }
         return B0;
       };
-      auto compute_dB0 = [&S](const Real alpha) { // alpha/|r| \hat{\theta}
+      auto compute_dB0_deprecated = [&S](const Real alpha) { // alpha/|r| \hat{\theta}
         const Vector<ElemBasis> X = S.GetElemList().ElemVector();
         const Long Nelem = S.GetElemList().NElem();
         const Long Nnodes = ElemBasis::Size();
@@ -2339,6 +2538,16 @@ template <class Real, Integer ORDER=10> class Stellarator {
         }
         return dB0;
       };
+      auto compute_B0 = [&S, &Jp,&BiotSavart,&quadrature_BS](const Real alpha) {
+        Vector<ElemBasis> B0;
+        quadrature_BS.Eval(B0, S.GetElemList(), Jp, BiotSavart);
+        return B0*alpha;
+      };
+      auto compute_dB0 = [&S, &Jp,&BiotSavartGrad,&quadrature_dBS](const Real alpha) {
+        Vector<ElemBasis> dB0;
+        quadrature_dBS.Eval(dB0, S.GetElemList(), Jp, BiotSavartGrad);
+        return dB0*alpha;
+      };
       auto compute_half_n_plus_dG = [&S, &normal](const Vector<ElemBasis>& sigma) { // B = n sigma/2 + dG[sigma]
         const Long Nelem = S.GetElemList().NElem();
         const Long Nnodes = ElemBasis::Size();
@@ -2531,18 +2740,10 @@ template <class Real, Integer ORDER=10> class Stellarator {
           ElemBasis::Grad(dX, S.GetElemList().ElemVector());
           for (Long i = 0; i < Nelem; i++) {
             for (Long j = 0; j < Nnodes; j++) {
-              Tensor<Real,true,COORD_DIM,2> dx;
-              dx(0,0) = dX[i*COORD_DIM*2+0][j];
-              dx(0,1) = dX[i*COORD_DIM*2+1][j];
-              dx(1,0) = dX[i*COORD_DIM*2+2][j];
-              dx(1,1) = dX[i*COORD_DIM*2+3][j];
-              dx(2,0) = dX[i*COORD_DIM*2+4][j];
-              dx(2,1) = dX[i*COORD_DIM*2+5][j];
-
               Real s = 1 / (area_elem[i][j] * S.NtNp_[0]);
-              for (Long k = 0; k < COORD_DIM; k++) {
-                density[i*COORD_DIM+k][j] = dx(k,1) * s;
-              }
+              density[i*COORD_DIM+0][j] = dX[i*COORD_DIM*2+1][j] * s;
+              density[i*COORD_DIM+1][j] = dX[i*COORD_DIM*2+3][j] * s;
+              density[i*COORD_DIM+2][j] = dX[i*COORD_DIM*2+5][j] * s;
             }
           }
         }
@@ -4168,855 +4369,6 @@ template <class Real, Integer ORDER=10> class Stellarator {
       }
     }
 
-    static void test_Askham() {
-      constexpr Integer order_singular = 15;
-      constexpr Integer order_direct = 35;
-      Comm comm = Comm::World();
-      Profile::Enable(true);
-
-      Stellarator<Real,ORDER> S;
-      { // Init S
-        Vector<Long> NtNp;
-        NtNp.PushBack(20);
-        NtNp.PushBack(4);
-        S = Stellarator<Real,ORDER>(NtNp);
-      }
-
-      Vector<ElemBasis> normal, area_elem;
-      auto compute_dot_prod = [](const Vector<ElemBasis>& A, const Vector<ElemBasis>& B) {
-        const Long Nelem = A.Dim() / COORD_DIM;
-        const Long Nnodes = ElemBasis::Size();
-        SCTL_ASSERT(A.Dim() == Nelem * COORD_DIM);
-        SCTL_ASSERT(B.Dim() == Nelem * COORD_DIM);
-        Vector<ElemBasis> AdotB(Nelem);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            Real a_dot_b = 0;
-            a_dot_b += A[i*COORD_DIM+0][j]*B[i*COORD_DIM+0][j];
-            a_dot_b += A[i*COORD_DIM+1][j]*B[i*COORD_DIM+1][j];
-            a_dot_b += A[i*COORD_DIM+2][j]*B[i*COORD_DIM+2][j];
-            AdotB[i][j] = a_dot_b;
-          }
-        }
-        return AdotB;
-      };
-      auto compute_inner_prod = [&S, &area_elem](const Vector<ElemBasis>& A, const Vector<ElemBasis>& B) {
-        const auto& quad_wts = ElemBasis::QuadWts();
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-        const Long dof = B.Dim() / Nelem;
-        Real sum = 0;
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            Real AdotB = 0;
-            for (Long k = 0; k < dof; k++) {
-              AdotB += A[i*dof+k][j] * B[i*dof+k][j];
-            }
-            sum += AdotB * area_elem[i][j] * quad_wts[j];
-          }
-        }
-        return sum;
-      };
-      auto compute_norm_area_elem = [&S](Vector<ElemBasis>& normal, Vector<ElemBasis>& area_elem){ // Set normal, area_elem
-        const Vector<ElemBasis> X = S.GetElemList().ElemVector();
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-
-        Vector<ElemBasis> dX;
-        ElemBasis::Grad(dX, X);
-
-        area_elem.ReInit(Nelem);
-        normal.ReInit(Nelem * COORD_DIM);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            Tensor<Real,true,COORD_DIM> x, n;
-            Tensor<Real,true,COORD_DIM,2> dx;
-            x(0) = X[i*COORD_DIM+0][j];
-            x(1) = X[i*COORD_DIM+1][j];
-            x(2) = X[i*COORD_DIM+2][j];
-
-            dx(0,0) = dX[i*COORD_DIM*2+0][j];
-            dx(0,1) = dX[i*COORD_DIM*2+1][j];
-            dx(1,0) = dX[i*COORD_DIM*2+2][j];
-            dx(1,1) = dX[i*COORD_DIM*2+3][j];
-            dx(2,0) = dX[i*COORD_DIM*2+4][j];
-            dx(2,1) = dX[i*COORD_DIM*2+5][j];
-
-            n(0) = dx(1,0) * dx(2,1) - dx(2,0) * dx(1,1);
-            n(1) = dx(2,0) * dx(0,1) - dx(0,0) * dx(2,1);
-            n(2) = dx(0,0) * dx(1,1) - dx(1,0) * dx(0,1);
-            Real area_elem_ = sqrt<Real>(n(0)*n(0) + n(1)*n(1) + n(2)*n(2));
-            Real ooae = 1 / area_elem_;
-            n(0) *= ooae;
-            n(1) *= ooae;
-            n(2) *= ooae;
-
-            normal[i*COORD_DIM+0][j] = n(0);
-            normal[i*COORD_DIM+1][j] = n(1);
-            normal[i*COORD_DIM+2][j] = n(2);
-            area_elem[i][j] = area_elem_;
-          }
-        }
-      };
-      compute_norm_area_elem(normal, area_elem);
-
-      S.quadrature_FxU .template Setup<ElemBasis, ElemBasis>(S.GetElemList(), S.Laplace_FxU , order_singular, order_direct, -1.0, comm);
-      S.quadrature_DxU .template Setup<ElemBasis, ElemBasis>(S.GetElemList(), S.Laplace_DxU , order_singular, order_direct, -1.0, comm);
-      S.quadrature_FxdU.template Setup<ElemBasis, ElemBasis>(S.GetElemList(), S.Laplace_FxdU, order_singular, order_direct, -1.0, comm);
-      S.quadrature_dUxF.template Setup<ElemBasis, ElemBasis>(S.GetElemList(), S.Laplace_dUxF, order_singular, order_direct, -1.0, comm);
-
-      auto compute_poloidal_circulation = [&S] (const Vector<ElemBasis>& B) {
-        const Vector<ElemBasis> X = S.GetElemList().ElemVector();
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-        const auto& quad_wts = Basis<Real,1,ORDER>::QuadWts();
-
-        Vector<ElemBasis> dX;
-        ElemBasis::Grad(dX, X);
-        const Long Nt = 40;
-        const Long Np = 8;
-        for (Long t = 0; t < Nt; t++) {
-          for (Long j = 0; j < ORDER; j++) {
-            Real sum = 0;
-            for (Long p = 0; p < Np; p++) {
-              for (Long i = 0; i < ORDER; i++) {
-                Long elem_idx = t*Np+p;
-                Long node_idx = i*ORDER+j;
-
-                Tensor<Real,true,COORD_DIM,2> dx;
-                dx(0,0) = dX[elem_idx*COORD_DIM*2+0][node_idx];
-                dx(0,1) = dX[elem_idx*COORD_DIM*2+1][node_idx];
-                dx(1,0) = dX[elem_idx*COORD_DIM*2+2][node_idx];
-                dx(1,1) = dX[elem_idx*COORD_DIM*2+3][node_idx];
-                dx(2,0) = dX[elem_idx*COORD_DIM*2+4][node_idx];
-                dx(2,1) = dX[elem_idx*COORD_DIM*2+5][node_idx];
-
-                Tensor<Real,true,COORD_DIM> b;
-                b(0) = B[elem_idx*COORD_DIM+0][node_idx];
-                b(1) = B[elem_idx*COORD_DIM+1][node_idx];
-                b(2) = B[elem_idx*COORD_DIM+2][node_idx];
-
-                sum += (b(0)*dx(0,1) + b(1)*dx(1,1) + b(2)*dx(2,1)) * quad_wts[i];
-              }
-            }
-            std::cout<<sum<<' ';
-          }
-        }
-        std::cout<<'\n';
-      };
-      auto compute_toroidal_circulation = [&S] (const Vector<ElemBasis>& B) {
-        const Vector<ElemBasis> X = S.GetElemList().ElemVector();
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-        const auto& quad_wts = Basis<Real,1,ORDER>::QuadWts();
-
-        Vector<ElemBasis> dX;
-        ElemBasis::Grad(dX, X);
-        const Long Nt = 40;
-        const Long Np = 8;
-        for (Long p = 0; p < Np; p++) {
-          for (Long i = 0; i < ORDER; i++) {
-            Real sum = 0;
-            for (Long t = 0; t < Nt; t++) {
-              for (Long j = 0; j < ORDER; j++) {
-                Long elem_idx = t*Np+p;
-                Long node_idx = i*ORDER+j;
-
-                Tensor<Real,true,COORD_DIM,2> dx;
-                dx(0,0) = dX[elem_idx*COORD_DIM*2+0][node_idx];
-                dx(0,1) = dX[elem_idx*COORD_DIM*2+1][node_idx];
-                dx(1,0) = dX[elem_idx*COORD_DIM*2+2][node_idx];
-                dx(1,1) = dX[elem_idx*COORD_DIM*2+3][node_idx];
-                dx(2,0) = dX[elem_idx*COORD_DIM*2+4][node_idx];
-                dx(2,1) = dX[elem_idx*COORD_DIM*2+5][node_idx];
-
-                Tensor<Real,true,COORD_DIM> b;
-                b(0) = B[elem_idx*COORD_DIM+0][node_idx];
-                b(1) = B[elem_idx*COORD_DIM+1][node_idx];
-                b(2) = B[elem_idx*COORD_DIM+2][node_idx];
-
-                sum += (b(0)*dx(0,0) + b(1)*dx(1,0) + b(2)*dx(2,0)) * quad_wts[j];
-              }
-            }
-            std::cout<<sum<<' ';
-          }
-        }
-        std::cout<<'\n';
-      };
-
-      auto compute_poloidal_circulation_ = [&S,&area_elem] (const Vector<ElemBasis>& B) {
-        const Vector<ElemBasis> X = S.GetElemList().ElemVector();
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-        const auto& quad_wts = ElemBasis::QuadWts();
-
-        Vector<ElemBasis> dX;
-        ElemBasis::Grad(dX, X);
-        Real sum = 0;
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            Tensor<Real,true,COORD_DIM,2> dx;
-            dx(0,0) = dX[i*COORD_DIM*2+0][j];
-            dx(0,1) = dX[i*COORD_DIM*2+1][j];
-            dx(1,0) = dX[i*COORD_DIM*2+2][j];
-            dx(1,1) = dX[i*COORD_DIM*2+3][j];
-            dx(2,0) = dX[i*COORD_DIM*2+4][j];
-            dx(2,1) = dX[i*COORD_DIM*2+5][j];
-
-            Tensor<Real,true,COORD_DIM> b;
-            b(0) = B[i*COORD_DIM+0][j];
-            b(1) = B[i*COORD_DIM+1][j];
-            b(2) = B[i*COORD_DIM+2][j];
-
-            Real s = 1/area_elem[i][j];
-            sum += (b(0)*dx(0,1) + b(1)*dx(1,1) + b(2)*dx(2,1)) * s * area_elem[i][j] * quad_wts[j];
-          }
-        }
-        return sum;
-      };
-      auto compute_toroidal_circulation_ = [&S,&area_elem] (const Vector<ElemBasis>& B) {
-        const Vector<ElemBasis> X = S.GetElemList().ElemVector();
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-        const auto& quad_wts = ElemBasis::QuadWts();
-
-        Vector<ElemBasis> dX;
-        ElemBasis::Grad(dX, X);
-        Real sum = 0;
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            Tensor<Real,true,COORD_DIM,2> dx;
-            dx(0,0) = dX[i*COORD_DIM*2+0][j];
-            dx(0,1) = dX[i*COORD_DIM*2+1][j];
-            dx(1,0) = dX[i*COORD_DIM*2+2][j];
-            dx(1,1) = dX[i*COORD_DIM*2+3][j];
-            dx(2,0) = dX[i*COORD_DIM*2+4][j];
-            dx(2,1) = dX[i*COORD_DIM*2+5][j];
-
-            Tensor<Real,true,COORD_DIM> b;
-            b(0) = B[i*COORD_DIM+0][j];
-            b(1) = B[i*COORD_DIM+1][j];
-            b(2) = B[i*COORD_DIM+2][j];
-
-            Real s = 1/area_elem[i][j];
-            sum += (b(0)*dx(0,0) + b(1)*dx(1,0) + b(2)*dx(2,0)) * s * area_elem[i][j] * quad_wts[j];
-          }
-        }
-        return sum;
-      };
-
-      auto compute_grad = [&S] (const Vector<ElemBasis>& F) {
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-
-        Vector<ElemBasis> du_dX(Nelem*COORD_DIM*2);
-        { // Set du_dX
-          Vector<ElemBasis> dX;
-          ElemBasis::Grad(dX, S.GetElemList().ElemVector());
-
-          auto inv2x2 = [](Tensor<Real, true, 2, 2> M) {
-            Tensor<Real, true, 2, 2> Mout;
-            Real oodet = 1 / (M(0,0) * M(1,1) - M(0,1) * M(1,0));
-            Mout(0,0) =  M(1,1) * oodet;
-            Mout(0,1) = -M(0,1) * oodet;
-            Mout(1,0) = -M(1,0) * oodet;
-            Mout(1,1) =  M(0,0) * oodet;
-            return Mout;
-          };
-          for (Long i = 0; i < Nelem; i++) {
-            for (Long j = 0; j < Nnodes; j++) {
-              Tensor<Real, true, 3, 2> dX_du;
-              dX_du(0,0) = dX[(i*COORD_DIM+0)*2+0][j];
-              dX_du(1,0) = dX[(i*COORD_DIM+1)*2+0][j];
-              dX_du(2,0) = dX[(i*COORD_DIM+2)*2+0][j];
-              dX_du(0,1) = dX[(i*COORD_DIM+0)*2+1][j];
-              dX_du(1,1) = dX[(i*COORD_DIM+1)*2+1][j];
-              dX_du(2,1) = dX[(i*COORD_DIM+2)*2+1][j];
-
-              Tensor<Real, true, 2, 2> G; // = dX_du.Transpose() * dX_du;
-              G(0,0) = dX_du(0,0) * dX_du(0,0) + dX_du(1,0) * dX_du(1,0) + dX_du(2,0) * dX_du(2,0);
-              G(0,1) = dX_du(0,0) * dX_du(0,1) + dX_du(1,0) * dX_du(1,1) + dX_du(2,0) * dX_du(2,1);
-              G(1,0) = dX_du(0,1) * dX_du(0,0) + dX_du(1,1) * dX_du(1,0) + dX_du(2,1) * dX_du(2,0);
-              G(1,1) = dX_du(0,1) * dX_du(0,1) + dX_du(1,1) * dX_du(1,1) + dX_du(2,1) * dX_du(2,1);
-
-              Tensor<Real, true, 2, 2> Ginv = inv2x2(G);
-              du_dX[(i*COORD_DIM+0)*2+0][j] = Ginv(0,0) * dX_du(0,0) + Ginv(0,1) * dX_du(0,1);
-              du_dX[(i*COORD_DIM+1)*2+0][j] = Ginv(0,0) * dX_du(1,0) + Ginv(0,1) * dX_du(1,1);
-              du_dX[(i*COORD_DIM+2)*2+0][j] = Ginv(0,0) * dX_du(2,0) + Ginv(0,1) * dX_du(2,1);
-              du_dX[(i*COORD_DIM+0)*2+1][j] = Ginv(1,0) * dX_du(0,0) + Ginv(1,1) * dX_du(0,1);
-              du_dX[(i*COORD_DIM+1)*2+1][j] = Ginv(1,0) * dX_du(1,0) + Ginv(1,1) * dX_du(1,1);
-              du_dX[(i*COORD_DIM+2)*2+1][j] = Ginv(1,0) * dX_du(2,0) + Ginv(1,1) * dX_du(2,1);
-            }
-          }
-        }
-
-        Vector<ElemBasis> dF;
-        ElemBasis::Grad(dF, F);
-
-        Long dof = F.Dim() / Nelem;
-        SCTL_ASSERT(F.Dim() == Nelem * dof);
-        Vector<ElemBasis> gradF(Nelem*dof*3);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            for (Long k = 0; k < dof; k++) {
-              gradF[(i*dof+k)*COORD_DIM+0][j] = dF[(i*dof+k)*2+0][j]*du_dX[(i*COORD_DIM+0)*2+0][j] + dF[(i*dof+k)*2+1][j]*du_dX[(i*COORD_DIM+0)*2+1][j];
-              gradF[(i*dof+k)*COORD_DIM+1][j] = dF[(i*dof+k)*2+0][j]*du_dX[(i*COORD_DIM+1)*2+0][j] + dF[(i*dof+k)*2+1][j]*du_dX[(i*COORD_DIM+1)*2+1][j];
-              gradF[(i*dof+k)*COORD_DIM+2][j] = dF[(i*dof+k)*2+0][j]*du_dX[(i*COORD_DIM+2)*2+0][j] + dF[(i*dof+k)*2+1][j]*du_dX[(i*COORD_DIM+2)*2+1][j];
-            }
-          }
-        }
-        return gradF;
-      };
-      auto compute_grad_adj = [&S,&area_elem] (const Vector<ElemBasis>& V) {
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-
-        Vector<ElemBasis> du_dX(Nelem*COORD_DIM*2);
-        { // Set du_dX
-          Vector<ElemBasis> dX;
-          ElemBasis::Grad(dX, S.GetElemList().ElemVector());
-
-          auto inv2x2 = [](Tensor<Real, true, 2, 2> M) {
-            Tensor<Real, true, 2, 2> Mout;
-            Real oodet = 1 / (M(0,0) * M(1,1) - M(0,1) * M(1,0));
-            Mout(0,0) =  M(1,1) * oodet;
-            Mout(0,1) = -M(0,1) * oodet;
-            Mout(1,0) = -M(1,0) * oodet;
-            Mout(1,1) =  M(0,0) * oodet;
-            return Mout;
-          };
-          for (Long i = 0; i < Nelem; i++) {
-            for (Long j = 0; j < Nnodes; j++) {
-              Tensor<Real, true, 3, 2> dX_du;
-              dX_du(0,0) = dX[(i*COORD_DIM+0)*2+0][j];
-              dX_du(1,0) = dX[(i*COORD_DIM+1)*2+0][j];
-              dX_du(2,0) = dX[(i*COORD_DIM+2)*2+0][j];
-              dX_du(0,1) = dX[(i*COORD_DIM+0)*2+1][j];
-              dX_du(1,1) = dX[(i*COORD_DIM+1)*2+1][j];
-              dX_du(2,1) = dX[(i*COORD_DIM+2)*2+1][j];
-
-              Tensor<Real, true, 2, 2> G; // = dX_du.Transpose() * dX_du;
-              G(0,0) = dX_du(0,0) * dX_du(0,0) + dX_du(1,0) * dX_du(1,0) + dX_du(2,0) * dX_du(2,0);
-              G(0,1) = dX_du(0,0) * dX_du(0,1) + dX_du(1,0) * dX_du(1,1) + dX_du(2,0) * dX_du(2,1);
-              G(1,0) = dX_du(0,1) * dX_du(0,0) + dX_du(1,1) * dX_du(1,0) + dX_du(2,1) * dX_du(2,0);
-              G(1,1) = dX_du(0,1) * dX_du(0,1) + dX_du(1,1) * dX_du(1,1) + dX_du(2,1) * dX_du(2,1);
-
-              Tensor<Real, true, 2, 2> Ginv = inv2x2(G);
-              du_dX[(i*COORD_DIM+0)*2+0][j] = Ginv(0,0) * dX_du(0,0) + Ginv(0,1) * dX_du(0,1);
-              du_dX[(i*COORD_DIM+1)*2+0][j] = Ginv(0,0) * dX_du(1,0) + Ginv(0,1) * dX_du(1,1);
-              du_dX[(i*COORD_DIM+2)*2+0][j] = Ginv(0,0) * dX_du(2,0) + Ginv(0,1) * dX_du(2,1);
-              du_dX[(i*COORD_DIM+0)*2+1][j] = Ginv(1,0) * dX_du(0,0) + Ginv(1,1) * dX_du(0,1);
-              du_dX[(i*COORD_DIM+1)*2+1][j] = Ginv(1,0) * dX_du(1,0) + Ginv(1,1) * dX_du(1,1);
-              du_dX[(i*COORD_DIM+2)*2+1][j] = Ginv(1,0) * dX_du(2,0) + Ginv(1,1) * dX_du(2,1);
-            }
-          }
-        }
-
-        Vector<ElemBasis> dudX_V(Nelem*2);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            dudX_V[i*2+0][j] = 0;
-            dudX_V[i*2+1][j] = 0;
-
-            dudX_V[i*2+0][j] += du_dX[(i*COORD_DIM+0)*2+0][j] * V[i*COORD_DIM+0][j];
-            dudX_V[i*2+0][j] += du_dX[(i*COORD_DIM+1)*2+0][j] * V[i*COORD_DIM+1][j];
-            dudX_V[i*2+0][j] += du_dX[(i*COORD_DIM+2)*2+0][j] * V[i*COORD_DIM+2][j];
-
-            dudX_V[i*2+1][j] += du_dX[(i*COORD_DIM+0)*2+1][j] * V[i*COORD_DIM+0][j];
-            dudX_V[i*2+1][j] += du_dX[(i*COORD_DIM+1)*2+1][j] * V[i*COORD_DIM+1][j];
-            dudX_V[i*2+1][j] += du_dX[(i*COORD_DIM+2)*2+1][j] * V[i*COORD_DIM+2][j];
-          }
-        }
-
-        Vector<ElemBasis> eye(Nnodes), Mgrad;
-        eye = 0;
-        for (Long i = 0; i < Nnodes; i++) eye[i][i] = 1;
-        ElemBasis::Grad(Mgrad, eye);
-
-        Vector<ElemBasis> grad_adj_V(Nelem);
-        const auto& quad_wts = ElemBasis::QuadWts();
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            Real sum = 0;
-            for (Long k = 0; k < Nnodes; k++) {
-              sum += Mgrad[j*2+0][k] * dudX_V[i*2+0][k] * (area_elem[i][k] * quad_wts[k]) / (quad_wts[j] * area_elem[i][j]);
-              sum += Mgrad[j*2+1][k] * dudX_V[i*2+1][k] * (area_elem[i][k] * quad_wts[k]) / (quad_wts[j] * area_elem[i][j]);
-            }
-            grad_adj_V[i][j] = sum;
-          }
-        }
-        return grad_adj_V;
-      };
-      auto compute_div = [&S] (const Vector<ElemBasis>& F) {
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-        SCTL_ASSERT(F.Dim() == Nelem*COORD_DIM);
-
-        Vector<ElemBasis> du_dX(Nelem*COORD_DIM*2);
-        { // Set du_dX
-          Vector<ElemBasis> dX;
-          ElemBasis::Grad(dX, S.GetElemList().ElemVector());
-
-          auto inv2x2 = [](Tensor<Real, true, 2, 2> M) {
-            Tensor<Real, true, 2, 2> Mout;
-            Real oodet = 1 / (M(0,0) * M(1,1) - M(0,1) * M(1,0));
-            Mout(0,0) =  M(1,1) * oodet;
-            Mout(0,1) = -M(0,1) * oodet;
-            Mout(1,0) = -M(1,0) * oodet;
-            Mout(1,1) =  M(0,0) * oodet;
-            return Mout;
-          };
-          for (Long i = 0; i < Nelem; i++) {
-            for (Long j = 0; j < Nnodes; j++) {
-              Tensor<Real, true, 3, 2> dX_du;
-              dX_du(0,0) = dX[(i*COORD_DIM+0)*2+0][j];
-              dX_du(1,0) = dX[(i*COORD_DIM+1)*2+0][j];
-              dX_du(2,0) = dX[(i*COORD_DIM+2)*2+0][j];
-              dX_du(0,1) = dX[(i*COORD_DIM+0)*2+1][j];
-              dX_du(1,1) = dX[(i*COORD_DIM+1)*2+1][j];
-              dX_du(2,1) = dX[(i*COORD_DIM+2)*2+1][j];
-
-              Tensor<Real, true, 2, 2> G; // = dX_du.Transpose() * dX_du;
-              G(0,0) = dX_du(0,0) * dX_du(0,0) + dX_du(1,0) * dX_du(1,0) + dX_du(2,0) * dX_du(2,0);
-              G(0,1) = dX_du(0,0) * dX_du(0,1) + dX_du(1,0) * dX_du(1,1) + dX_du(2,0) * dX_du(2,1);
-              G(1,0) = dX_du(0,1) * dX_du(0,0) + dX_du(1,1) * dX_du(1,0) + dX_du(2,1) * dX_du(2,0);
-              G(1,1) = dX_du(0,1) * dX_du(0,1) + dX_du(1,1) * dX_du(1,1) + dX_du(2,1) * dX_du(2,1);
-
-              Tensor<Real, true, 2, 2> Ginv = inv2x2(G);
-              du_dX[(i*COORD_DIM+0)*2+0][j] = Ginv(0,0) * dX_du(0,0) + Ginv(0,1) * dX_du(0,1);
-              du_dX[(i*COORD_DIM+1)*2+0][j] = Ginv(0,0) * dX_du(1,0) + Ginv(0,1) * dX_du(1,1);
-              du_dX[(i*COORD_DIM+2)*2+0][j] = Ginv(0,0) * dX_du(2,0) + Ginv(0,1) * dX_du(2,1);
-              du_dX[(i*COORD_DIM+0)*2+1][j] = Ginv(1,0) * dX_du(0,0) + Ginv(1,1) * dX_du(0,1);
-              du_dX[(i*COORD_DIM+1)*2+1][j] = Ginv(1,0) * dX_du(1,0) + Ginv(1,1) * dX_du(1,1);
-              du_dX[(i*COORD_DIM+2)*2+1][j] = Ginv(1,0) * dX_du(2,0) + Ginv(1,1) * dX_du(2,1);
-            }
-          }
-        }
-
-        Vector<ElemBasis> dF;
-        ElemBasis::Grad(dF, F);
-
-        Vector<ElemBasis> divF(Nelem);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            Real divF_ = 0;
-            divF_ += dF[(i*COORD_DIM+0)*2+0][j]*du_dX[(i*COORD_DIM+0)*2+0][j] + dF[(i*COORD_DIM+0)*2+1][j]*du_dX[(i*COORD_DIM+0)*2+1][j];
-            divF_ += dF[(i*COORD_DIM+1)*2+0][j]*du_dX[(i*COORD_DIM+1)*2+0][j] + dF[(i*COORD_DIM+1)*2+1][j]*du_dX[(i*COORD_DIM+1)*2+1][j];
-            divF_ += dF[(i*COORD_DIM+2)*2+0][j]*du_dX[(i*COORD_DIM+2)*2+0][j] + dF[(i*COORD_DIM+2)*2+1][j]*du_dX[(i*COORD_DIM+2)*2+1][j];
-            divF[i][j] = divF_;
-          }
-        }
-        return divF;
-      };
-
-      auto compute_B0 = [&S](const Real alpha) { // alpha/|r| \hat{\theta}
-        const Vector<ElemBasis> X = S.GetElemList().ElemVector();
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-
-        Vector<ElemBasis> B0(Nelem * COORD_DIM);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            Tensor<Real,true,COORD_DIM> x, b0, axis;
-            x(0) = X[i*COORD_DIM+0][j];
-            x(1) = X[i*COORD_DIM+1][j];
-            x(2) = X[i*COORD_DIM+2][j];
-
-            axis(0) = 0;
-            axis(1) = 0;
-            axis(2) = 1;
-            b0(0) = axis(1) * x(2) - axis(2) * x(1);
-            b0(1) = axis(2) * x(0) - axis(0) * x(2);
-            b0(2) = axis(0) * x(1) - axis(1) * x(0);
-            Real scale = 1 / (b0(0)*b0(0) + b0(1)*b0(1) + b0(2)*b0(2));
-            b0(0) *= scale;
-            b0(1) *= scale;
-            b0(2) *= scale;
-
-            B0[i*COORD_DIM+0][j] = alpha * b0(0);
-            B0[i*COORD_DIM+1][j] = alpha * b0(1);
-            B0[i*COORD_DIM+2][j] = alpha * b0(2);
-          }
-        }
-        return B0;
-      };
-      auto compute_dB0 = [&S](const Real alpha) { // alpha/|r| \hat{\theta}
-        const Vector<ElemBasis> X = S.GetElemList().ElemVector();
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-
-        Vector<ElemBasis> dB0(Nelem * COORD_DIM * COORD_DIM);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            Tensor<Real,true,COORD_DIM> x;
-            x(0) = X[i*COORD_DIM+0][j];
-            x(1) = X[i*COORD_DIM+1][j];
-            x(2) = X[i*COORD_DIM+2][j];
-            Real R2inv = 1 / (x(0)*x(0) + x(1)*x(1));
-
-            dB0[(i*COORD_DIM+0)*COORD_DIM+0][j] = alpha * (2*x(0)*x(1) * R2inv*R2inv);
-            dB0[(i*COORD_DIM+0)*COORD_DIM+1][j] = alpha * (-R2inv + 2*x(1)*x(1) * R2inv*R2inv);
-            dB0[(i*COORD_DIM+0)*COORD_DIM+2][j] = 0;
-
-            dB0[(i*COORD_DIM+1)*COORD_DIM+0][j] = alpha * (R2inv - 2*x(0)*x(0) * R2inv*R2inv);
-            dB0[(i*COORD_DIM+1)*COORD_DIM+1][j] = alpha * (-2*x(0)*x(1) * R2inv*R2inv);
-            dB0[(i*COORD_DIM+1)*COORD_DIM+2][j] = 0;
-
-            dB0[(i*COORD_DIM+2)*COORD_DIM+0][j] = 0;
-            dB0[(i*COORD_DIM+2)*COORD_DIM+1][j] = 0;
-            dB0[(i*COORD_DIM+2)*COORD_DIM+2][j] = 0;
-          }
-        }
-        return dB0;
-      };
-      auto compute_half_n_plus_dG = [&S, &normal](const Vector<ElemBasis>& sigma) { // B = n sigma/2 + dG[sigma]
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-
-        Vector<ElemBasis> B;
-        S.quadrature_FxdU.Eval(B, S.GetElemList(), sigma, S.Laplace_FxdU);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            for (Long k = 0; k < COORD_DIM; k++) {
-              B[i*COORD_DIM+k][j] -= 0.5*sigma[i][j]*normal[i*COORD_DIM+k][j];
-            }
-          }
-        }
-        return B;
-      };
-
-      auto compute_A11 = [&S,&normal,&compute_half_n_plus_dG,&compute_dot_prod](Vector<Real>& B_dot_n, const Vector<Real>& sigma) {
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-        B_dot_n.ReInit(Nelem * Nnodes);
-        Vector<ElemBasis> sigma_(Nelem);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            sigma_[i][j] = sigma[i*Nnodes+j];
-          }
-        }
-        Vector<ElemBasis> B_dot_n_ = compute_dot_prod(normal, compute_half_n_plus_dG(sigma_));
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            B_dot_n[i*Nnodes+j] = B_dot_n_[i][j];
-          }
-        }
-      };
-      auto compute_A12 = [&S,&normal,&compute_dot_prod,&compute_B0](Vector<Real>& B_dot_n, const Real alpha) {
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-        B_dot_n.ReInit(Nelem * Nnodes);
-        Vector<ElemBasis> B_dot_n_ = compute_dot_prod(normal, compute_B0(alpha));
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            B_dot_n[i*Nnodes+j] = B_dot_n_[i][j];
-          }
-        }
-      };
-      auto compute_A21 = [&S,&normal,&compute_half_n_plus_dG,&compute_poloidal_circulation_](const Vector<Real>& sigma) {
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-
-        Vector<ElemBasis> sigma_(Nelem);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            sigma_[i][j] = sigma[i*Nnodes+j];
-          }
-        }
-
-        if (0) { // alternate implementation
-          //Vector<ElemBasis> A21_(Nelem);
-          //Vector<Real> A21(Nelem*Nnodes);
-          //compute_A21adj(A21, 1);
-          //for (Long i = 0; i < Nelem; i++) {
-          //  for (Long j = 0; j < Nnodes; j++) {
-          //    A21_[i][j] = A21[i*Nnodes+j];
-          //  }
-          //}
-          //return compute_inner_prod(A21_, sigma_);
-        }
-        Vector<ElemBasis> B = compute_half_n_plus_dG(sigma_);
-
-        Vector<ElemBasis> J(Nelem * COORD_DIM);
-        for (Long i = 0; i < Nelem; i++) { // Set J
-          for (Long j = 0; j < Nnodes; j++) {
-            Tensor<Real,true,COORD_DIM> b, n;
-            b(0) = B[i*COORD_DIM+0][j];
-            b(1) = B[i*COORD_DIM+1][j];
-            b(2) = B[i*COORD_DIM+2][j];
-
-            n(0) = normal[i*COORD_DIM+0][j];
-            n(1) = normal[i*COORD_DIM+1][j];
-            n(2) = normal[i*COORD_DIM+2][j];
-
-            J[i*COORD_DIM+0][j] = n(1) * b(2) - n(2) * b(1);
-            J[i*COORD_DIM+1][j] = n(2) * b(0) - n(0) * b(2);
-            J[i*COORD_DIM+2][j] = n(0) * b(1) - n(1) * b(0);
-          }
-        }
-
-        Vector<ElemBasis> A;
-        S.quadrature_FxU.Eval(A, S.GetElemList(), J, S.Laplace_FxU);
-        return compute_poloidal_circulation_(A)/S.NtNp_[0];
-      };
-      auto compute_A22 = [&S,&compute_B0,&normal,&compute_poloidal_circulation_](const Real alpha) {
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-
-        Vector<ElemBasis> B = compute_B0(alpha);
-
-        Vector<ElemBasis> J(Nelem * COORD_DIM);
-        for (Long i = 0; i < Nelem; i++) { // Set J
-          for (Long j = 0; j < Nnodes; j++) {
-            Tensor<Real,true,COORD_DIM> b, n;
-            b(0) = B[i*COORD_DIM+0][j];
-            b(1) = B[i*COORD_DIM+1][j];
-            b(2) = B[i*COORD_DIM+2][j];
-
-            n(0) = normal[i*COORD_DIM+0][j];
-            n(1) = normal[i*COORD_DIM+1][j];
-            n(2) = normal[i*COORD_DIM+2][j];
-
-            J[i*COORD_DIM+0][j] = n(1) * b(2) - n(2) * b(1);
-            J[i*COORD_DIM+1][j] = n(2) * b(0) - n(0) * b(2);
-            J[i*COORD_DIM+2][j] = n(0) * b(1) - n(1) * b(0);
-          }
-        }
-
-        Vector<ElemBasis> A;
-        S.quadrature_FxU.Eval(A, S.GetElemList(), J, S.Laplace_FxU);
-        return compute_poloidal_circulation_(A)/S.NtNp_[0];
-      };
-      auto compute_A = [&compute_A11,&compute_A12,&compute_A21,&compute_A22] (const Vector<Real>& x) {
-        const Vector<Real> sigma(x.Dim()-1,(Iterator<Real>)x.begin(),false);
-        const Real& alpha = x[x.Dim()-1];
-
-        Vector<Real> Ax;
-        Ax.ReInit(x.Dim());
-        Vector<Real> Bdotn(x.Dim()-1,Ax.begin(),false);
-        Real& flux = Ax[x.Dim()-1];
-
-        Vector<Real> Adotn_0, Adotn_1;
-        compute_A11(Adotn_0, sigma);
-        compute_A12(Adotn_1, alpha);
-        Bdotn = Adotn_0 + Adotn_1;
-
-        flux = compute_A21(sigma) + compute_A22(alpha);
-        return Ax;
-      };
-      auto compute_invA = [&S,&comm,&compute_A] (Vector<ElemBasis>& sigma, Real& alpha, Real flux) {
-        typename sctl::ParallelSolver<Real>::ParallelOp BIOp = [&compute_A](sctl::Vector<Real>* Ax, const sctl::Vector<Real>& x) {
-          (*Ax) = compute_A(x);
-        };
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-
-        Vector<Real> rhs_(Nelem * Nnodes + 1);
-        rhs_ = 0;
-        rhs_[Nelem * Nnodes] = flux;
-
-        Vector<Real> x_(Nelem * Nnodes + 1);
-        x_ = 0;
-        ParallelSolver<Real> linear_solver(comm, true);
-        linear_solver(&x_, BIOp, rhs_, 1e-8, 50);
-
-        sigma.ReInit(Nelem);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            sigma[i][j] = x_[i*Nnodes+j];
-          }
-        }
-        alpha = x_[Nelem * Nnodes];
-      };
-      auto compute_invA_ = [&S,&comm,&compute_A] (Vector<Real>& b) {
-        typename sctl::ParallelSolver<Real>::ParallelOp BIOp = [&compute_A](sctl::Vector<Real>* Ax, const sctl::Vector<Real>& x) {
-          (*Ax) = compute_A(x);
-        };
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-
-        Vector<Real> x(b.Dim());
-        x = 0;
-        ParallelSolver<Real> linear_solver(comm, true);
-        linear_solver(&x, BIOp, b, 1e-8, 50);
-        return x;
-      };
-
-      /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      Real flux = 1.0, alpha;
-      Vector<ElemBasis> sigma(S.GetElemList().NElem());
-      compute_invA(sigma, alpha, flux);
-      Vector<ElemBasis> B = compute_half_n_plus_dG(sigma) + compute_B0(alpha);
-      Real g = compute_inner_prod(B, B);
-      std::cout<<"g = "<<g<<'\n';
-
-      { // Write VTU
-        VTUData vtu;
-        vtu.AddElems(S.GetElemList(), sigma, ORDER);
-        vtu.WriteVTK("sigma", comm);
-      }
-      { // Write VTU
-        VTUData vtu;
-        vtu.AddElems(S.GetElemList(), B, ORDER);
-        vtu.WriteVTK("B", comm);
-      }
-      { // Write VTU
-        VTUData vtu;
-        vtu.AddElems(S.GetElemList(), area_elem, ORDER);
-        vtu.WriteVTK("area_elem", comm);
-      }
-
-      /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-      auto compute_B = [&sigma,&alpha,&S,&area_elem,&normal,&compute_norm_area_elem,&compute_invA,&compute_half_n_plus_dG,&compute_B0,&compute_inner_prod,&comm] (const Vector<ElemBasis>& nu, Real eps) {
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-
-        Vector<ElemBasis> X_orig(Nelem*COORD_DIM);
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            X_orig[i*COORD_DIM+0][j] = S.Elem(i,0)[j];
-            X_orig[i*COORD_DIM+1][j] = S.Elem(i,1)[j];
-            X_orig[i*COORD_DIM+2][j] = S.Elem(i,2)[j];
-            S.Elem(i,0)[j] += eps*nu[i][j] * normal[i*COORD_DIM+0][j];
-            S.Elem(i,1)[j] += eps*nu[i][j] * normal[i*COORD_DIM+1][j];
-            S.Elem(i,2)[j] += eps*nu[i][j] * normal[i*COORD_DIM+2][j];
-          }
-        }
-        compute_norm_area_elem(normal, area_elem);
-        S.quadrature_FxdU.template Setup<ElemBasis, ElemBasis>(S.GetElemList(), S.Laplace_FxdU, order_singular, order_direct, -1.0, comm);
-
-        Real flux = 1.0, alpha;
-        Vector<ElemBasis> sigma;
-        compute_invA(sigma, alpha, flux);
-        Vector<ElemBasis> B = compute_half_n_plus_dG(sigma) + compute_B0(alpha);
-
-        for (Long i = 0; i < Nelem; i++) {
-          for (Long j = 0; j < Nnodes; j++) {
-            S.Elem(i,0)[j] = X_orig[i*COORD_DIM+0][j];
-            S.Elem(i,1)[j] = X_orig[i*COORD_DIM+1][j];
-            S.Elem(i,2)[j] = X_orig[i*COORD_DIM+2][j];
-          }
-        }
-        compute_norm_area_elem(normal, area_elem);
-        S.quadrature_FxdU.template Setup<ElemBasis, ElemBasis>(S.GetElemList(), S.Laplace_FxdU, order_singular, order_direct, -1.0, comm);
-
-        return B;
-      };
-
-      {
-        const Long Nelem = S.GetElemList().NElem();
-        const Long Nnodes = ElemBasis::Size();
-        Real eps = 1e-5;
-
-        Vector<ElemBasis> nu(Nelem);
-        nu = 1; //area_elem;
-
-        Vector<ElemBasis> mytest;
-        { // compute nu_n_dot_gradB_dot_n
-          auto compute_B_perturb_trg = [&S,&comm,&area_elem,&normal,&compute_norm_area_elem,&compute_dot_prod,&sigma,&alpha,&compute_B0](const Vector<ElemBasis>& nu, Real eps) {
-            const Long Nelem = S.GetElemList().NElem();
-            const Long Nnodes = ElemBasis::Size();
-
-            Vector<ElemBasis> X_orig(Nelem*COORD_DIM);
-            for (Long i = 0; i < Nelem; i++) {
-              for (Long j = 0; j < Nnodes; j++) {
-                X_orig[i*COORD_DIM+0][j] = S.Elem(i,0)[j];
-                X_orig[i*COORD_DIM+1][j] = S.Elem(i,1)[j];
-                X_orig[i*COORD_DIM+2][j] = S.Elem(i,2)[j];
-                S.Elem(i,0)[j] = S.Elem(i,0)[j] - 1e-4*normal[i*COORD_DIM+0][j] + eps*nu[i][j] * normal[i*COORD_DIM+0][j];
-                S.Elem(i,1)[j] = S.Elem(i,1)[j] - 1e-4*normal[i*COORD_DIM+1][j] + eps*nu[i][j] * normal[i*COORD_DIM+1][j];
-                S.Elem(i,2)[j] = S.Elem(i,2)[j] - 1e-4*normal[i*COORD_DIM+2][j] + eps*nu[i][j] * normal[i*COORD_DIM+2][j];
-              }
-            }
-            compute_norm_area_elem(normal, area_elem);
-            Vector<ElemBasis> my_normal = normal;
-            Vector<ElemBasis> B0 = compute_B0(alpha);
-            for (Long i = 0; i < Nelem; i++) {
-              for (Long j = 0; j < Nnodes; j++) {
-                S.Elem(i,0)[j] = X_orig[i*COORD_DIM+0][j];
-                S.Elem(i,1)[j] = X_orig[i*COORD_DIM+1][j];
-                S.Elem(i,2)[j] = X_orig[i*COORD_DIM+2][j];
-              }
-            }
-            compute_norm_area_elem(normal, area_elem);
-
-            Vector<Real> Xt(Nelem*Nnodes*COORD_DIM);
-            for (Long i = 0; i < Nelem; i++) {
-              for (Long j = 0; j < Nnodes; j++) {
-                for (Long k = 0; k < COORD_DIM; k++) {
-                  Xt[(i*Nnodes+j)*COORD_DIM+k] = S.Elem(i,k)[j] - 1e-4*normal[i*COORD_DIM+k][j] + eps*nu[i][j] * normal[i*COORD_DIM+k][j];
-                }
-              }
-            }
-            Vector<ElemBasis> B1;
-            Quadrature<Real> quadrature_FxdU;
-            quadrature_FxdU.template Setup<ElemBasis>(S.GetElemList(), Xt, S.Laplace_FxdU, order_singular, order_direct, -1, comm);
-            quadrature_FxdU.Eval(B1, S.GetElemList(), sigma, S.Laplace_FxdU);
-
-            return compute_dot_prod(B0+B1,my_normal);
-          };
-          mytest = (compute_B_perturb_trg(nu,eps) - compute_B_perturb_trg(nu,-eps)) * (1/(2*eps));
-        }
-        { // Write VTU
-          VTUData vtu;
-          vtu.AddElems(S.GetElemList(), mytest, ORDER);
-          vtu.WriteVTK("mytest", comm);
-        }
-
-        Vector<ElemBasis> nu_divB(Nelem);
-        { // compute nu_divB
-          nu_divB = compute_div(B);
-          for (Long i = 0; i < Nelem; i++) {
-            for (Long j = 0; j < Nnodes; j++) {
-              nu_divB[i][j] *= nu[i][j];
-            }
-          }
-        }
-        { // Write VTU
-          VTUData vtu;
-          vtu.AddElems(S.GetElemList(), nu_divB, ORDER);
-          vtu.WriteVTK("nu_divB", comm);
-        }
-
-        Vector<ElemBasis> B_dot_grad_nu;
-        B_dot_grad_nu = compute_dot_prod(B, compute_grad(nu));
-        { // Write VTU
-          VTUData vtu;
-          vtu.AddElems(S.GetElemList(), B_dot_grad_nu, ORDER);
-          vtu.WriteVTK("B_dot_grad_nu", comm);
-        }
-
-        Vector<ElemBasis> dBdnu_dot_n;
-        { // compute dBdnu_dot_n
-          auto B0 = compute_B(nu,-eps);
-          auto B1 = compute_B(nu,eps);
-          dBdnu_dot_n = compute_dot_prod((B1-B0)*(1.0/(2.0*eps)), normal);
-        }
-        { // Write VTU
-          VTUData vtu;
-          vtu.AddElems(S.GetElemList(), dBdnu_dot_n, ORDER);
-          vtu.WriteVTK("dBdnu_dot_n", comm);
-        }
-
-        { // Write VTU
-          VTUData vtu;
-          vtu.AddElems(S.GetElemList(), dBdnu_dot_n-B_dot_grad_nu, ORDER);
-          vtu.WriteVTK("err0", comm);
-        }
-        { // Write VTU
-          VTUData vtu;
-          vtu.AddElems(S.GetElemList(), mytest+B_dot_grad_nu+nu_divB, ORDER);
-          vtu.WriteVTK("err1", comm);
-        }
-      }
-
-      /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    }
-
   private:
     void InitSurf(Long l) {
       const auto& nodes = ElemBasis::Nodes();