vor 7 Jahren · c6435186ea
--- a/include/sctl/morton.hpp
+++ b/include/sctl/morton.hpp
@@ -0,0 +1,230 @@
 
				+#ifndef _SCTL_MORTON_
			
 
				+#define _SCTL_MORTON_
			
 
				+
			
 
				+#include SCTL_INCLUDE(common.hpp)
			
 
				+#include <cstdint>
			
 
				+
			
 
				+#ifndef SCTL_MAX_DEPTH
			
 
				+#define SCTL_MAX_DEPTH 15
			
 
				+#endif
			
 
				+
			
 
				+namespace SCTL_NAMESPACE {
			
 
				+
			
 
				+template <class ValueType> class Vector;
			
 
				+
			
 
				+template <Integer DIM = 3> class Morton {
			
 
				+
			
 
				+ public:
			
 
				+  #if SCTL_MAX_DEPTH < 7
			
 
				+  typedef uint8_t UINT_T;
			
 
				+  typedef int8_t INT_T;
			
 
				+  #elif SCTL_MAX_DEPTH < 15
			
 
				+  typedef uint16_t UINT_T;
			
 
				+  typedef int16_t INT_T;
			
 
				+  #elif SCTL_MAX_DEPTH < 31
			
 
				+  typedef uint32_t UINT_T;
			
 
				+  typedef int32_t INT_T;
			
 
				+  #elif SCTL_MAX_DEPTH < 63
			
 
				+  typedef uint64_t UINT_T;
			
 
				+  typedef int64_t INT_T;
			
 
				+  #endif
			
 
				+
			
 
				+  static const Integer MAX_DEPTH = SCTL_MAX_DEPTH;
			
 
				+
			
 
				+  Morton() {
			
 
				+    depth = 0;
			
 
				+    for (Integer i = 0; i < DIM; i++) x[i] = 0;
			
 
				+  }
			
 
				+
			
 
				+  template <class T> Morton(ConstIterator<T> coord, uint8_t depth_ = MAX_DEPTH) {
			
 
				+    depth = depth_;
			
 
				+    assert(depth <= MAX_DEPTH);
			
 
				+    UINT_T mask = ~((((UINT_T)1) << (MAX_DEPTH - depth)) - 1);
			
 
				+    for (Integer i = 0; i < DIM; i++) x[i] = mask & (UINT_T)floor(coord[i] * maxCoord);
			
 
				+  }
			
 
				+
			
 
				+  uint8_t Depth() const { return depth; }
			
 
				+
			
 
				+  template <class T> void Coord(Iterator<T> coord) const {
			
 
				+    static const T s = 1.0 / ((T)maxCoord);
			
 
				+    for (Integer i = 0; i < DIM; i++) coord[i] = x[i] * s;
			
 
				+  }
			
 
				+
			
 
				+  Morton Next() const {
			
 
				+    UINT_T mask = ((UINT_T)1) << (MAX_DEPTH - depth);
			
 
				+    Integer d, i;
			
 
				+
			
 
				+    Morton m = *this;
			
 
				+    for (d = depth; d >= 0; d--) {
			
 
				+      for (i = 0; i < DIM; i++) {
			
 
				+        m.x[i] = (m.x[i] ^ mask);
			
 
				+        if ((m.x[i] & mask)) break;
			
 
				+      }
			
 
				+      if (i < DIM) break;
			
 
				+      mask = (mask << 1);
			
 
				+    }
			
 
				+
			
 
				+    if (d < 0) d = 0;
			
 
				+    m.depth = (uint8_t)d;
			
 
				+
			
 
				+    return m;
			
 
				+  }
			
 
				+
			
 
				+  Morton Ancestor(uint8_t ancestor_level) const {
			
 
				+    UINT_T mask = ~((((UINT_T)1) << (MAX_DEPTH - ancestor_level)) - 1);
			
 
				+
			
 
				+    Morton m;
			
 
				+    for (Integer i = 0; i < DIM; i++) m.x[i] = x[i] & mask;
			
 
				+    m.depth = ancestor_level;
			
 
				+    return m;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * \brief Returns the deepest first descendant.
			
 
				+   */
			
 
				+  Morton DFD(uint8_t level = MAX_DEPTH) const {
			
 
				+    Morton m = *this;
			
 
				+    m.depth = level;
			
 
				+    return m;
			
 
				+  }
			
 
				+
			
 
				+  void NbrList(Vector<Morton> &nlst, uint8_t level, bool periodic) const {
			
 
				+    nlst.ReInit(1);
			
 
				+    UINT_T mask = ~((((UINT_T)1) << (MAX_DEPTH - level)) - 1);
			
 
				+    for (Integer i = 0; i < DIM; i++) nlst[0].x[i] = x[i] & mask;
			
 
				+    nlst[0].depth = level;
			
 
				+
			
 
				+    Morton m;
			
 
				+    Integer k = 1;
			
 
				+    mask = (((UINT_T)1) << (MAX_DEPTH - level));
			
 
				+    if (periodic) {
			
 
				+      for (Integer i = 0; i < DIM; i++) {
			
 
				+        for (Integer j = 0; j < k; j++) {
			
 
				+          m = nlst[j];
			
 
				+          INT_T xi = (INT_T)m.x[i] + (INT_T)mask;
			
 
				+          if (xi >= maxCoord) xi -= maxCoord;
			
 
				+          m.x[i] = xi;
			
 
				+          nlst.PushBack(m);
			
 
				+        }
			
 
				+        for (Integer j = 0; j < k; j++) {
			
 
				+          m = nlst[j];
			
 
				+          INT_T xi = (INT_T)m.x[i] - (INT_T)mask;
			
 
				+          if (xi < 0) xi += maxCoord;
			
 
				+          m.x[i] = xi;
			
 
				+          nlst.PushBack(m);
			
 
				+        }
			
 
				+        k = nlst.Dim();
			
 
				+      }
			
 
				+    } else {
			
 
				+      for (Integer i = 0; i < DIM; i++) {
			
 
				+        for (Integer j = 0; j < k; j++) {
			
 
				+          m = nlst[j];
			
 
				+          INT_T xi = (INT_T)m.x[i] + (INT_T)mask;
			
 
				+          m.x[i] = xi;
			
 
				+          if (xi < maxCoord) nlst.PushBack(m);
			
 
				+        }
			
 
				+        for (Integer j = 0; j < k; j++) {
			
 
				+          m = nlst[j];
			
 
				+          INT_T xi = (INT_T)m.x[i] - (INT_T)mask;
			
 
				+          m.x[i] = xi;
			
 
				+          if (xi >= 0) nlst.PushBack(m);
			
 
				+        }
			
 
				+        k = nlst.Dim();
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  void Children(Vector<Morton> &nlst) const {
			
 
				+    static const Integer cnt = (1UL << DIM);
			
 
				+    if (nlst.Dim() != cnt) nlst.ReInit(cnt);
			
 
				+
			
 
				+    for (Integer i = 0; i < DIM; i++) nlst[0].x[i] = x[i];
			
 
				+    nlst[0].depth = (uint8_t)(depth + 1);
			
 
				+
			
 
				+    Integer k = 1;
			
 
				+    UINT_T mask = (((UINT_T)1) << (MAX_DEPTH - (depth + 1)));
			
 
				+    for (Integer i = 0; i < DIM; i++) {
			
 
				+      for (Integer j = 0; j < k; j++) {
			
 
				+        nlst[j + k] = nlst[j];
			
 
				+        nlst[j + k].x[i] += mask;
			
 
				+      }
			
 
				+      k = (k << 1);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  bool operator<(const Morton &m) const {
			
 
				+    UINT_T diff = 0;
			
 
				+    for (Integer i = 0; i < DIM; i++) diff = diff | (x[i] ^ m.x[i]);
			
 
				+    if (!diff) return depth < m.depth;
			
 
				+
			
 
				+    UINT_T mask = 1;
			
 
				+    for (Integer i = 4 * sizeof(UINT_T); i > 0; i = (i >> 1)) {
			
 
				+      UINT_T mask_ = (mask << i);
			
 
				+      if (mask_ <= diff) mask = mask_;
			
 
				+    }
			
 
				+
			
 
				+    for (Integer i = DIM - 1; i >= 0; i--) {
			
 
				+      if (mask & (x[i] ^ m.x[i])) return x[i] < m.x[i];
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  bool operator>(const Morton &m) const { return m < (*this); }
			
 
				+
			
 
				+  bool operator!=(const Morton &m) const {
			
 
				+    for (Integer i = 0; i < DIM; i++)
			
 
				+      if (x[i] != m.x[i]) return true;
			
 
				+    return (depth != m.depth);
			
 
				+  }
			
 
				+
			
 
				+  bool operator==(const Morton &m) const { return !(*this != m); }
			
 
				+
			
 
				+  bool operator<=(const Morton &m) const { return !(*this > m); }
			
 
				+
			
 
				+  bool operator>=(const Morton &m) const { return !(*this < m); }
			
 
				+
			
 
				+  bool isAncestor(Morton const &descendant) const { return descendant.depth > depth && descendant.Ancestor(depth) == *this; }
			
 
				+
			
 
				+  Long operator-(const Morton<DIM> &I) const {
			
 
				+    // Intersecting -1
			
 
				+    // Touching 0
			
 
				+
			
 
				+    Long offset0 = 1 << (MAX_DEPTH - depth - 1);
			
 
				+    Long offset1 = 1 << (MAX_DEPTH - I.depth - 1);
			
 
				+
			
 
				+    Long diff = 0;
			
 
				+    for (Integer i = 0; i < DIM; i++) {
			
 
				+      diff = std::max<Long>(diff, abs(((Long)x[i] + offset0) - ((Long)I.x[i] + offset1)));
			
 
				+    }
			
 
				+    if (diff < offset0 + offset1) return -1;
			
 
				+    Integer max_depth = std::max(depth, I.depth);
			
 
				+    diff = (diff - offset0 - offset1) >> (MAX_DEPTH - max_depth);
			
 
				+    return diff;
			
 
				+  }
			
 
				+
			
 
				+  friend std::ostream &operator<<(std::ostream &out, const Morton &mid) {
			
 
				+    double a = 0;
			
 
				+    double s = 1u << DIM;
			
 
				+    for (Integer j = MAX_DEPTH; j >= 0; j--) {
			
 
				+      for (Integer i = DIM - 1; i >= 0; i--) {
			
 
				+        s = s * 0.5;
			
 
				+        if (mid.x[i] & (((UINT_T)1) << j)) a += s;
			
 
				+      }
			
 
				+    }
			
 
				+    out << "(";
			
 
				+    for (Integer i = 0; i < DIM; i++) {
			
 
				+      out << mid.x[i] * 1.0 / maxCoord << ",";
			
 
				+    }
			
 
				+    out << (int)mid.depth << "," << a << ")";
			
 
				+    return out;
			
 
				+  }
			
 
				+
			
 
				+ private:
			
 
				+  static const UINT_T maxCoord = ((UINT_T)1) << (MAX_DEPTH);
			
 
				+
			
 
				+  // StaticArray<UINT_T,DIM> x;
			
 
				+  UINT_T x[DIM];
			
 
				+  uint8_t depth;
			
 
				+};
			
 
				+}
			
 
				+
			
 
				+#endif  //_SCTL_MORTON_
			
--- a/include/sctl/parallel_solver.hpp
+++ b/include/sctl/parallel_solver.hpp
@@ -0,0 +1,252 @@
 
				+#ifndef _SCTL_PARALLEL_SOLVER_HPP_
			
 
				+#define _SCTL_PARALLEL_SOLVER_HPP_
			
 
				+
			
 
				+#include SCTL_INCLUDE(vector.hpp)
			
 
				+#include SCTL_INCLUDE(comm.hpp)
			
 
				+
			
 
				+#include <functional>
			
 
				+
			
 
				+namespace SCTL_NAMESPACE {
			
 
				+
			
 
				+template <class Real> class ParallelSolver {
			
 
				+
			
 
				+ public:
			
 
				+  using ParallelOp = std::function<void(Vector<Real>*, const Vector<Real>&)>;
			
 
				+
			
 
				+  ParallelSolver(const Comm& comm, bool verbose = true) : comm_(comm), verbose_(verbose) {}
			
 
				+
			
 
				+  void operator()(Vector<Real>* x, const ParallelOp& A, const Vector<Real>& b, Real tol, Integer max_iter = -1);
			
 
				+
			
 
				+ private:
			
 
				+  bool verbose_;
			
 
				+  Comm comm_;
			
 
				+};
			
 
				+
			
 
				+}  // end namespace
			
 
				+
			
 
				+#ifdef SCTL_HAVE_PETSC
			
 
				+
			
 
				+#include <petscksp.h>
			
 
				+
			
 
				+namespace SCTL_NAMESPACE {
			
 
				+
			
 
				+template <class Real> int ParallelSolverMatVec(Mat M_, Vec x_, Vec Mx_) {
			
 
				+  PetscErrorCode ierr;
			
 
				+
			
 
				+  PetscInt N, N_;
			
 
				+  VecGetLocalSize(x_, &N);
			
 
				+  VecGetLocalSize(Mx_, &N_);
			
 
				+  SCTL_ASSERT(N == N_);
			
 
				+
			
 
				+  void* data = NULL;
			
 
				+  MatShellGetContext(M_, &data);
			
 
				+  auto& M = dynamic_cast<const typename ParallelSolver<Real>::ParallelOp&>(*(typename ParallelSolver<Real>::ParallelOp*)data);
			
 
				+
			
 
				+  const PetscScalar* x_ptr;
			
 
				+  ierr = VecGetArrayRead(x_, &x_ptr);
			
 
				+  CHKERRQ(ierr);
			
 
				+
			
 
				+  Vector<Real> x(N);
			
 
				+  for (Long i = 0; i < N; i++) x[i] = x_ptr[i];
			
 
				+  Vector<Real> Mx(N);
			
 
				+  M(&Mx, x);
			
 
				+
			
 
				+  PetscScalar* Mx_ptr;
			
 
				+  ierr = VecGetArray(Mx_, &Mx_ptr);
			
 
				+  CHKERRQ(ierr);
			
 
				+
			
 
				+  for (long i = 0; i < N; i++) Mx_ptr[i] = Mx[i];
			
 
				+  ierr = VecRestoreArray(Mx_, &Mx_ptr);
			
 
				+  CHKERRQ(ierr);
			
 
				+
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+template <class Real> inline void ParallelSolver<Real>::operator()(Vector<Real>* x, const ParallelOp& A, const Vector<Real>& b, Real tol, Integer max_iter) {
			
 
				+  PetscInt N = b.Dim();
			
 
				+  if (max_iter < 0) max_iter = N;
			
 
				+  MPI_Comm comm = comm_.GetMPI_Comm();
			
 
				+  PetscErrorCode ierr;
			
 
				+
			
 
				+  Mat PetscA;
			
 
				+  {  // Create Matrix. PetscA
			
 
				+    MatCreateShell(comm, N, N, PETSC_DETERMINE, PETSC_DETERMINE, (void*)&A, &PetscA);
			
 
				+    MatShellSetOperation(PetscA, MATOP_MULT, (void (*)(void))ParallelSolverMatVec<Real>);
			
 
				+  }
			
 
				+
			
 
				+  Vec Petsc_x, Petsc_b;
			
 
				+  {  // Create vectors
			
 
				+    VecCreateMPI(comm, N, PETSC_DETERMINE, &Petsc_b);
			
 
				+    VecCreateMPI(comm, N, PETSC_DETERMINE, &Petsc_x);
			
 
				+
			
 
				+    PetscScalar* b_ptr;
			
 
				+    ierr = VecGetArray(Petsc_b, &b_ptr);
			
 
				+    CHKERRABORT(comm, ierr);
			
 
				+    for (long i = 0; i < N; i++) b_ptr[i] = b[i];
			
 
				+    ierr = VecRestoreArray(Petsc_b, &b_ptr);
			
 
				+    CHKERRABORT(comm, ierr);
			
 
				+  }
			
 
				+
			
 
				+  // Create linear solver context
			
 
				+  KSP ksp;
			
 
				+  ierr = KSPCreate(comm, &ksp);
			
 
				+  CHKERRABORT(comm, ierr);
			
 
				+
			
 
				+  // Set operators. Here the matrix that defines the linear system
			
 
				+  // also serves as the preconditioning matrix.
			
 
				+  ierr = KSPSetOperators(ksp, PetscA, PetscA);
			
 
				+  CHKERRABORT(comm, ierr);
			
 
				+
			
 
				+  // Set runtime options
			
 
				+  KSPSetType(ksp, KSPGMRES);
			
 
				+  KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED);
			
 
				+  KSPSetTolerances(ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT, max_iter);
			
 
				+  if (verbose_) KSPMonitorSet(ksp, KSPMonitorDefault, NULL, NULL);
			
 
				+  KSPGMRESSetRestart(ksp, max_iter);
			
 
				+  ierr = KSPSetFromOptions(ksp);
			
 
				+  CHKERRABORT(comm, ierr);
			
 
				+
			
 
				+  // -------------------------------------------------------------------
			
 
				+  // Solve the linear system: Ax=b
			
 
				+  // -------------------------------------------------------------------
			
 
				+  ierr = KSPSolve(ksp, Petsc_b, Petsc_x);
			
 
				+  CHKERRABORT(comm, ierr);
			
 
				+
			
 
				+  // View info about the solver
			
 
				+  // KSPView(ksp,PETSC_VIEWER_STDOUT_WORLD); CHKERRABORT(comm, ierr);
			
 
				+
			
 
				+  // Iterations
			
 
				+  // PetscInt its;
			
 
				+  // ierr = KSPGetIterationNumber(ksp,&its); CHKERRABORT(comm, ierr);
			
 
				+  // ierr = PetscPrintf(PETSC_COMM_WORLD,"Iterations %D\n",its); CHKERRABORT(comm, ierr);
			
 
				+
			
 
				+  {  // Set x
			
 
				+    const PetscScalar* x_ptr;
			
 
				+    ierr = VecGetArrayRead(Petsc_x, &x_ptr);
			
 
				+    CHKERRABORT(comm, ierr);
			
 
				+
			
 
				+    if (x->Dim() != N) x->ReInit(N);
			
 
				+    for (long i = 0; i < N; i++) (*x)[i] = x_ptr[i];
			
 
				+  }
			
 
				+
			
 
				+  ierr = KSPDestroy(&ksp);
			
 
				+  CHKERRABORT(comm, ierr);
			
 
				+  ierr = MatDestroy(&PetscA);
			
 
				+  CHKERRABORT(comm, ierr);
			
 
				+  ierr = VecDestroy(&Petsc_x);
			
 
				+  CHKERRABORT(comm, ierr);
			
 
				+  ierr = VecDestroy(&Petsc_b);
			
 
				+  CHKERRABORT(comm, ierr);
			
 
				+}
			
 
				+
			
 
				+}  // end namespace
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+namespace SCTL_NAMESPACE {
			
 
				+
			
 
				+template <class Real> static Real inner_prod(const Vector<Real>& x, const Vector<Real>& y, const Comm& comm) {
			
 
				+  Real x_dot_y = 0;
			
 
				+  Long N = x.Dim();
			
 
				+  SCTL_ASSERT(y.Dim() == N);
			
 
				+  for (Long i = 0; i < N; i++) x_dot_y += x[i] * y[i];
			
 
				+
			
 
				+  Real x_dot_y_glb = 0;
			
 
				+  comm.Allreduce(SCTL_PTR2CONSTITR(Real, &x_dot_y, 1), SCTL_PTR2ITR(Real, &x_dot_y_glb, 1), 1, Comm::CommOp::SUM);
			
 
				+
			
 
				+  return x_dot_y_glb;
			
 
				+}
			
 
				+
			
 
				+template <class Real> inline void ParallelSolver<Real>::operator()(Vector<Real>* x, const ParallelOp& A, const Vector<Real>& b, Real tol, Integer max_iter) {
			
 
				+  Long N = b.Dim();
			
 
				+  if (max_iter < 0) max_iter = N;
			
 
				+  Real b_norm = sqrt(inner_prod(b, b, comm_));
			
 
				+
			
 
				+  Vector<Real> q(N);
			
 
				+  Vector<Vector<Real>> Q;
			
 
				+  Vector<Vector<Real>> H;
			
 
				+  {  // Initialize q, Q
			
 
				+    q = b;
			
 
				+    Real one_over_q_norm = 1.0 / sqrt<Real>(inner_prod(q, q, comm_));
			
 
				+    for (Long j = 0; j < N; j++) q[j] *= one_over_q_norm;
			
 
				+    Q.PushBack(q);
			
 
				+  }
			
 
				+
			
 
				+  Matrix<Real> H_;
			
 
				+  Vector<Real> Aq(N), y, h, r = b;
			
 
				+  while (1) {
			
 
				+    Real r_norm = sqrt(inner_prod(r, r, comm_));
			
 
				+    if (verbose_ && !comm_.Rank()) printf("%3d KSP Residual norm %.12e\n", H.Dim(), r_norm);
			
 
				+    if (r_norm < tol * b_norm || H.Dim() == max_iter) break;
			
 
				+
			
 
				+    A(&Aq, q);
			
 
				+    q = Aq;
			
 
				+
			
 
				+    h.ReInit(Q.Dim() + 1);
			
 
				+    for (Integer i = 0; i < Q.Dim(); i++) {  // Orthogonalized q
			
 
				+      h[i] = inner_prod(q, Q[i], comm_);
			
 
				+      for (Long j = 0; j < N; j++) q[j] -= h[i] * Q[i][j];
			
 
				+    }
			
 
				+    {  // Normalize q
			
 
				+      h[Q.Dim()] = sqrt<Real>(inner_prod(q, q, comm_));
			
 
				+      Real one_over_q_norm = 1.0 / h[Q.Dim()];
			
 
				+      for (Long j = 0; j < N; j++) q[j] *= one_over_q_norm;
			
 
				+    }
			
 
				+    Q.PushBack(q);
			
 
				+    H.PushBack(h);
			
 
				+
			
 
				+    {  // Set y
			
 
				+      H_.ReInit(H.Dim(), Q.Dim());
			
 
				+      H_.SetZero();
			
 
				+      for (Integer i = 0; i < H.Dim(); i++) {
			
 
				+        for (Integer j = 0; j < H[i].Dim(); j++) {
			
 
				+          H_[i][j] = H[i][j];
			
 
				+        }
			
 
				+      }
			
 
				+      H_ = H_.pinv();
			
 
				+
			
 
				+      y.ReInit(H_.Dim(1));
			
 
				+      for (Integer i = 0; i < y.Dim(); i++) {
			
 
				+        y[i] = H_[0][i] * b_norm;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    {  // Compute residual
			
 
				+      Vector<Real> Hy(Q.Dim());
			
 
				+      Hy.SetZero();
			
 
				+      for (Integer i = 0; i < H.Dim(); i++) {
			
 
				+        for (Integer j = 0; j < H[i].Dim(); j++) {
			
 
				+          Hy[j] += H[i][j] * y[i];
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      Vector<Real> QHy(N);
			
 
				+      QHy.SetZero();
			
 
				+      for (Integer i = 0; i < Q.Dim(); i++) {
			
 
				+        for (Long j = 0; j < N; j++) {
			
 
				+          QHy[j] += Q[i][j] * Hy[i];
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      for (Integer j = 0; j < N; j++) {  // Set r
			
 
				+        r[j] = b[j] - QHy[j];
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  {  // Set x
			
 
				+    if (x->Dim() != N) x->ReInit(N);
			
 
				+    x->SetZero();
			
 
				+    for (Integer i = 0; i < y.Dim(); i++) {
			
 
				+      for (Integer j = 0; j < N; j++) {
			
 
				+        (*x)[j] += y[i] * Q[i][j];
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // end namespace
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#endif  //_SCTL_PARALLEL_SOLVER_HPP_