123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349 |
- #ifndef _SCTL_PARALLEL_SOLVER_HPP_
- #define _SCTL_PARALLEL_SOLVER_HPP_
- #include <sctl/common.hpp>
- #include SCTL_INCLUDE(comm.hpp)
- #include SCTL_INCLUDE(mem_mgr.hpp)
- #include SCTL_INCLUDE(math_utils.hpp)
- #include <functional>
- namespace SCTL_NAMESPACE {
- template <class ValueType> class Vector;
- template <class ValueType> class Matrix;
- template <class Real> class ParallelSolver {
- public:
- using ParallelOp = std::function<void(Vector<Real>*, const Vector<Real>&)>;
- ParallelSolver(const Comm& comm = Comm::Self(), bool verbose = true) : comm_(comm), verbose_(verbose) {}
- void operator()(Vector<Real>* x, const ParallelOp& A, const Vector<Real>& b, const Real tol, const Integer max_iter = -1, const bool use_abs_tol = false);
- static void test(Long N = 15) {
- srand48(0);
- Matrix<Real> A(N, N);
- Vector<Real> b(N), x;
- for (Long i = 0; i < N; i++) {
- b[i] = drand48();
- for (Long j = 0; j < N; j++) {
- A[i][j] = drand48();
- }
- }
- auto LinOp = [&A](Vector<Real>* Ax, const Vector<Real>& x) {
- const Long N = x.Dim();
- Ax->ReInit(N);
- Matrix<Real> Ax_(N, 1, Ax->begin(), false);
- Ax_ = A * Matrix<Real>(N, 1, (Iterator<Real>)x.begin(), false);
- };
- ParallelSolver<Real> solver;
- solver(&x, LinOp, b, 1e-10, -1, false);
- auto print_error = [N,&A,&b](const Vector<Real>& x) {
- Real max_err = 0;
- auto Merr = A*Matrix<Real>(N, 1, (Iterator<Real>)x.begin(), false) - Matrix<Real>(N, 1, b.begin(), false);
- for (const auto& a : Merr) max_err = std::max(max_err, fabs(a));
- std::cout<<"Maximum error = "<<max_err<<'\n';
- };
- print_error(x);
- }
- private:
- void GenericGMRES(Vector<Real>* x, const ParallelOp& A, const Vector<Real>& b, const Real tol, Integer max_iter, const bool use_abs_tol);
- Comm comm_;
- bool verbose_;
- };
- } // end namespace
- namespace SCTL_NAMESPACE {
- template <class Real> static Real inner_prod(const Vector<Real>& x, const Vector<Real>& y, const Comm& comm) {
- Real x_dot_y = 0;
- Long N = x.Dim();
- SCTL_ASSERT(y.Dim() == N);
- for (Long i = 0; i < N; i++) x_dot_y += x[i] * y[i];
- Real x_dot_y_glb = 0;
- comm.Allreduce(Ptr2ConstItr<Real>(&x_dot_y, 1), Ptr2Itr<Real>(&x_dot_y_glb, 1), 1, Comm::CommOp::SUM);
- return x_dot_y_glb;
- }
- template <class Real> inline void ParallelSolver<Real>::GenericGMRES(Vector<Real>* x, const ParallelOp& A, const Vector<Real>& b, Real tol, Integer max_iter, bool use_abs_tol) {
- const Long N = b.Dim();
- if (max_iter < 0) { // set max_iter
- StaticArray<Long,2> NN{N,0};
- comm_.Allreduce(NN+0, NN+1, 1, Comm::CommOp::SUM);
- max_iter = NN[1];
- }
- static constexpr Real ARRAY_RESIZE_FACTOR = 1.618;
- Vector<Real> Q_mat, H_mat;
- auto ResizeVector = [](Vector<Real>& v, const Long N0) {
- if (v.Dim() < N0) {
- Vector<Real> v_(N0);
- for (Long i = 0; i < v.Dim(); i++) v_[i] = v[i];
- for (Long i = v.Dim(); i < N0; i++) v_[i] = 0;
- v.Swap(v_);
- }
- };
- auto Q_row = [N,&Q_mat,&ResizeVector](Long i) -> Iterator<Real> {
- const Long idx = i*N;
- if (Q_mat.Dim() <= idx+N) {
- ResizeVector(Q_mat, (Long)((idx+N)*ARRAY_RESIZE_FACTOR));
- }
- return Q_mat.begin() + idx;
- };
- auto Q = [&Q_row](Long i, Long j) -> Real& {
- return Q_row(i)[j];
- };
- auto H_row = [&H_mat,&ResizeVector](Long i) -> Iterator<Real> {
- const Long idx = i*(i+1)/2;
- if (H_mat.Dim() <= idx+i+1) ResizeVector(H_mat, (Long)((idx+i+1)*ARRAY_RESIZE_FACTOR));
- return H_mat.begin() + idx;
- };
- auto H = [&H_row](Long i, Long j) -> Real& {
- return H_row(i)[j];
- };
- auto apply_givens_rotation = [](Vector<Real>& h, Real& cs_k, Real& sn_k, const Vector<Real>& cs, const Vector<Real>& sn, const Long k) {
- // apply for ith row
- for (Long i = 0; i < k; i++) {
- Real temp = cs[i] * h[i] + sn[i] * h[i+1];
- h[i+1] = -sn[i] * h[i] + cs[i] * h[i+1];
- h[i] = temp;
- }
- // update the next sin cos values for rotation
- const Real t = sqrt<Real>(h[k]*h[k] + h[k+1]*h[k+1]);
- cs_k = h[k] / t;
- sn_k = h[k+1] / t;
- // eliminate H(i + 1, i)
- h[k] = cs_k * h[k] + sn_k * h[k+1];
- h[k+1] = 0.0;
- };
- auto arnoldi = [this,N,&Q_row,&Q](Vector<Real>& h, Vector<Real>& q, const ParallelOp& A, const Long k) {
- q.ReInit(N); // Krylov Vector
- A(&q, Vector<Real>(N, Q_row(k), false));
- for (Long i = 0; i < k+1; i++) { // Modified Gram-Schmidt, keeping the Hessenberg matrix
- h[i] = inner_prod(q, Vector<Real>(N, Q_row(i), false), comm_);
- for (Long j = 0; j < N; j++) {
- q[j] -= h[i] * Q(i,j);
- }
- }
- h[k+1] = sqrt<Real>(inner_prod(q, q, comm_));
- q *= 1/h[k+1];
- };
- Vector<Real> r;
- if (x->Dim()) { // r = b - A * x;
- Vector<Real> Ax;
- A(&Ax, *x);
- r = b - Ax;
- } else {
- r = b;
- x->ReInit(N);
- x->SetZero();
- }
- const Real b_norm = sqrt<Real>(inner_prod(b, b, comm_));
- const Real abs_tol = tol * (use_abs_tol ? 1 : b_norm);
- const Real r_norm = sqrt<Real>(inner_prod(r, r, comm_));
- for (Long i = 0; i < N; i++) Q(0,i) = r[i] / r_norm;
- Vector<Real> beta(1); beta = r_norm;
- Vector<Real> sn, cs, h_k, q_k(N);
- Long k = 0;
- Real error = r_norm;
- for (; k < max_iter && error > abs_tol; k++) {
- if (verbose_ && !comm_.Rank()) printf("%3lld KSP Residual norm %.12e\n", (long long)k, (double)error);
- if (sn.Dim() <= k) ResizeVector(sn, (Long)((k+1)*ARRAY_RESIZE_FACTOR));
- if (cs.Dim() <= k) ResizeVector(cs, (Long)((k+1)*ARRAY_RESIZE_FACTOR));
- if (beta.Dim() <= k+1) ResizeVector(beta, (Long)((k+2)*ARRAY_RESIZE_FACTOR));
- if ( h_k.Dim() <= k+1) ResizeVector( h_k, (Long)((k+2)*ARRAY_RESIZE_FACTOR));
- arnoldi(h_k, q_k, A, k);
- apply_givens_rotation(h_k, cs[k], sn[k], cs, sn, k); // eliminate the last element in H ith row and update the rotation matrix
- for (Long i = 0; i < k+1; i++) H(k,i) = h_k[i];
- for (Long i = 0; i < N; i++) Q(k+1,i) = q_k[i];
- // update the residual vector
- beta[k+1] = -sn[k] * beta[k];
- beta[k] = cs[k] * beta[k];
- error = fabs(beta[k+1]);
- }
- if (verbose_ && !comm_.Rank()) printf("%3lld KSP Residual norm %.12e\n", (long long)k, (double)error);
- for (Long i = k-1; i >= 0; i--) { // beta <-- beta * inv(H); (through back substitution)
- beta[i] /= H(i,i);
- for (Long j = 0; j < i; j++) {
- beta[j] -= beta[i] * H(i,j);
- }
- }
- for (Long i = 0; i < N; i++) { // x <-- beta * Q
- for (Long j = 0; j < k; j++) {
- (*x)[i] += beta[j] * Q(j,i);
- }
- }
- }
- template <class Real> inline void ParallelSolver<Real>::operator()(Vector<Real>* x, const ParallelOp& A, const Vector<Real>& b, const Real tol, const Integer max_iter, const bool use_abs_tol) {
- GenericGMRES(x, A, b, tol, max_iter, use_abs_tol);
- }
- } // end namespace
- #ifdef SCTL_HAVE_PETSC
- #include <petscksp.h>
- namespace SCTL_NAMESPACE {
- template <class Real> int ParallelSolverMatVec(Mat M_, ::Vec x_, ::Vec Mx_) {
- PetscErrorCode ierr;
- PetscInt N, N_;
- VecGetLocalSize(x_, &N);
- VecGetLocalSize(Mx_, &N_);
- SCTL_ASSERT(N == N_);
- void* data = nullptr;
- MatShellGetContext(M_, &data);
- auto& M = dynamic_cast<const typename ParallelSolver<Real>::ParallelOp&>(*(typename ParallelSolver<Real>::ParallelOp*)data);
- const PetscScalar* x_ptr;
- ierr = VecGetArrayRead(x_, &x_ptr);
- CHKERRQ(ierr);
- Vector<Real> x(N);
- for (Long i = 0; i < N; i++) x[i] = (Real)x_ptr[i];
- Vector<Real> Mx(N);
- M(&Mx, x);
- PetscScalar* Mx_ptr;
- ierr = VecGetArray(Mx_, &Mx_ptr);
- CHKERRQ(ierr);
- for (long i = 0; i < N; i++) Mx_ptr[i] = Mx[i];
- ierr = VecRestoreArray(Mx_, &Mx_ptr);
- CHKERRQ(ierr);
- return 0;
- }
- PetscErrorCode MyKSPMonitor(KSP ksp, PetscInt n, PetscReal rnorm, void *dummy) {
- Comm* comm = (Comm*)dummy;
- if (!comm->Rank()) printf("%3lld KSP Residual norm %.12e\n", (long long)n, (double)rnorm);
- //PetscPrintf(PETSC_COMM_WORLD,"iteration %D KSP Residual norm %14.12e \n",n,rnorm);
- //PetscViewerAndFormat *vf;
- //PetscViewerAndFormatCreate(PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_DEFAULT, &vf);
- //KSPMonitorResidual(ksp, n, rnorm, vf);
- //PetscViewerAndFormatDestroy(&vf);
- return 0;
- }
- template <class Real> inline void PETScGMRES(Vector<Real>* x, const typename ParallelSolver<Real>::ParallelOp& A, const Vector<Real>& b, const Real tol, Integer max_iter, const bool use_abs_tol, const bool verbose_, const Comm& comm_) {
- PetscInt N = b.Dim();
- if (max_iter < 0) { // set max_iter
- StaticArray<Long,2> NN{N,0};
- comm_.Allreduce(NN+0, NN+1, 1, Comm::CommOp::SUM);
- max_iter = NN[1];
- }
- const MPI_Comm comm = comm_.GetMPI_Comm();
- PetscErrorCode ierr;
- Mat PetscA;
- { // Create Matrix. PetscA
- MatCreateShell(comm, N, N, PETSC_DETERMINE, PETSC_DETERMINE, (void*)&A, &PetscA);
- MatShellSetOperation(PetscA, MATOP_MULT, (void (*)(void))ParallelSolverMatVec<Real>);
- }
- ::Vec Petsc_x, Petsc_b;
- { // Create vectors
- VecCreateMPI(comm, N, PETSC_DETERMINE, &Petsc_b);
- VecCreateMPI(comm, N, PETSC_DETERMINE, &Petsc_x);
- PetscScalar* b_ptr;
- ierr = VecGetArray(Petsc_b, &b_ptr);
- CHKERRABORT(comm, ierr);
- for (long i = 0; i < N; i++) b_ptr[i] = b[i];
- ierr = VecRestoreArray(Petsc_b, &b_ptr);
- CHKERRABORT(comm, ierr);
- }
- // Create linear solver context
- KSP ksp;
- ierr = KSPCreate(comm, &ksp);
- CHKERRABORT(comm, ierr);
- // Set operators. Here the matrix that defines the linear system
- // also serves as the preconditioning matrix.
- ierr = KSPSetOperators(ksp, PetscA, PetscA);
- CHKERRABORT(comm, ierr);
- // Set runtime options
- KSPSetType(ksp, KSPGMRES);
- KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED);
- if (use_abs_tol) KSPSetTolerances(ksp, PETSC_DEFAULT, tol, PETSC_DEFAULT, max_iter);
- else KSPSetTolerances(ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT, max_iter);
- KSPGMRESSetOrthogonalization(ksp, KSPGMRESModifiedGramSchmidtOrthogonalization);
- if (verbose_) KSPMonitorSet(ksp, MyKSPMonitor, (MPI_Comm)&comm_, nullptr);
- KSPGMRESSetRestart(ksp, max_iter);
- ierr = KSPSetFromOptions(ksp);
- CHKERRABORT(comm, ierr);
- // -------------------------------------------------------------------
- // Solve the linear system: Ax=b
- // -------------------------------------------------------------------
- ierr = KSPSolve(ksp, Petsc_b, Petsc_x);
- CHKERRABORT(comm, ierr);
- // View info about the solver
- // KSPView(ksp,PETSC_VIEWER_STDOUT_WORLD); CHKERRABORT(comm, ierr);
- // Iterations
- // PetscInt its;
- // ierr = KSPGetIterationNumber(ksp,&its); CHKERRABORT(comm, ierr);
- // ierr = PetscPrintf(PETSC_COMM_WORLD,"Iterations %D\n",its); CHKERRABORT(comm, ierr);
- { // Set x
- const PetscScalar* x_ptr;
- ierr = VecGetArrayRead(Petsc_x, &x_ptr);
- CHKERRABORT(comm, ierr);
- if (x->Dim() != N) x->ReInit(N);
- for (long i = 0; i < N; i++) (*x)[i] = (Real)x_ptr[i];
- }
- ierr = KSPDestroy(&ksp);
- CHKERRABORT(comm, ierr);
- ierr = MatDestroy(&PetscA);
- CHKERRABORT(comm, ierr);
- ierr = VecDestroy(&Petsc_x);
- CHKERRABORT(comm, ierr);
- ierr = VecDestroy(&Petsc_b);
- CHKERRABORT(comm, ierr);
- }
- template <> inline void ParallelSolver<double>::operator()(Vector<double>* x, const ParallelOp& A, const Vector<double>& b, const double tol, const Integer max_iter, const bool use_abs_tol) {
- PETScGMRES(x, A, b, tol, max_iter, use_abs_tol, verbose_, comm_);
- }
- template <> inline void ParallelSolver<float>::operator()(Vector<float>* x, const ParallelOp& A, const Vector<float>& b, const float tol, const Integer max_iter, const bool use_abs_tol) {
- PETScGMRES(x, A, b, tol, max_iter, use_abs_tol, verbose_, comm_);
- }
- } // end namespace
- #endif
- #endif //_SCTL_PARALLEL_SOLVER_HPP_
|