parallel_solver.hpp 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. #ifndef _SCTL_PARALLEL_SOLVER_HPP_
  2. #define _SCTL_PARALLEL_SOLVER_HPP_
  3. #include SCTL_INCLUDE(vector.hpp)
  4. #include SCTL_INCLUDE(comm.hpp)
  5. #include <functional>
  6. namespace SCTL_NAMESPACE {
  7. template <class Real> class ParallelSolver {
  8. public:
  9. using ParallelOp = std::function<void(Vector<Real>*, const Vector<Real>&)>;
  10. ParallelSolver(const Comm& comm, bool verbose = true) : comm_(comm), verbose_(verbose) {}
  11. void operator()(Vector<Real>* x, const ParallelOp& A, const Vector<Real>& b, Real tol, Integer max_iter = -1);
  12. private:
  13. bool verbose_;
  14. Comm comm_;
  15. };
  16. } // end namespace
  17. #ifdef SCTL_HAVE_PETSC
  18. #include <petscksp.h>
  19. namespace SCTL_NAMESPACE {
  20. template <class Real> int ParallelSolverMatVec(Mat M_, Vec x_, Vec Mx_) {
  21. PetscErrorCode ierr;
  22. PetscInt N, N_;
  23. VecGetLocalSize(x_, &N);
  24. VecGetLocalSize(Mx_, &N_);
  25. SCTL_ASSERT(N == N_);
  26. void* data = nullptr;
  27. MatShellGetContext(M_, &data);
  28. auto& M = dynamic_cast<const typename ParallelSolver<Real>::ParallelOp&>(*(typename ParallelSolver<Real>::ParallelOp*)data);
  29. const PetscScalar* x_ptr;
  30. ierr = VecGetArrayRead(x_, &x_ptr);
  31. CHKERRQ(ierr);
  32. Vector<Real> x(N);
  33. for (Long i = 0; i < N; i++) x[i] = x_ptr[i];
  34. Vector<Real> Mx(N);
  35. M(&Mx, x);
  36. PetscScalar* Mx_ptr;
  37. ierr = VecGetArray(Mx_, &Mx_ptr);
  38. CHKERRQ(ierr);
  39. for (long i = 0; i < N; i++) Mx_ptr[i] = Mx[i];
  40. ierr = VecRestoreArray(Mx_, &Mx_ptr);
  41. CHKERRQ(ierr);
  42. return 0;
  43. }
  44. template <class Real> inline void ParallelSolver<Real>::operator()(Vector<Real>* x, const ParallelOp& A, const Vector<Real>& b, Real tol, Integer max_iter) {
  45. PetscInt N = b.Dim();
  46. if (max_iter < 0) max_iter = N;
  47. MPI_Comm comm = comm_.GetMPI_Comm();
  48. PetscErrorCode ierr;
  49. Mat PetscA;
  50. { // Create Matrix. PetscA
  51. MatCreateShell(comm, N, N, PETSC_DETERMINE, PETSC_DETERMINE, (void*)&A, &PetscA);
  52. MatShellSetOperation(PetscA, MATOP_MULT, (void (*)(void))ParallelSolverMatVec<Real>);
  53. }
  54. Vec Petsc_x, Petsc_b;
  55. { // Create vectors
  56. VecCreateMPI(comm, N, PETSC_DETERMINE, &Petsc_b);
  57. VecCreateMPI(comm, N, PETSC_DETERMINE, &Petsc_x);
  58. PetscScalar* b_ptr;
  59. ierr = VecGetArray(Petsc_b, &b_ptr);
  60. CHKERRABORT(comm, ierr);
  61. for (long i = 0; i < N; i++) b_ptr[i] = b[i];
  62. ierr = VecRestoreArray(Petsc_b, &b_ptr);
  63. CHKERRABORT(comm, ierr);
  64. }
  65. // Create linear solver context
  66. KSP ksp;
  67. ierr = KSPCreate(comm, &ksp);
  68. CHKERRABORT(comm, ierr);
  69. // Set operators. Here the matrix that defines the linear system
  70. // also serves as the preconditioning matrix.
  71. ierr = KSPSetOperators(ksp, PetscA, PetscA);
  72. CHKERRABORT(comm, ierr);
  73. // Set runtime options
  74. KSPSetType(ksp, KSPGMRES);
  75. KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED);
  76. KSPSetTolerances(ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT, max_iter);
  77. if (verbose_) KSPMonitorSet(ksp, KSPMonitorDefault, nullptr, nullptr);
  78. KSPGMRESSetRestart(ksp, max_iter);
  79. ierr = KSPSetFromOptions(ksp);
  80. CHKERRABORT(comm, ierr);
  81. // -------------------------------------------------------------------
  82. // Solve the linear system: Ax=b
  83. // -------------------------------------------------------------------
  84. ierr = KSPSolve(ksp, Petsc_b, Petsc_x);
  85. CHKERRABORT(comm, ierr);
  86. // View info about the solver
  87. // KSPView(ksp,PETSC_VIEWER_STDOUT_WORLD); CHKERRABORT(comm, ierr);
  88. // Iterations
  89. // PetscInt its;
  90. // ierr = KSPGetIterationNumber(ksp,&its); CHKERRABORT(comm, ierr);
  91. // ierr = PetscPrintf(PETSC_COMM_WORLD,"Iterations %D\n",its); CHKERRABORT(comm, ierr);
  92. { // Set x
  93. const PetscScalar* x_ptr;
  94. ierr = VecGetArrayRead(Petsc_x, &x_ptr);
  95. CHKERRABORT(comm, ierr);
  96. if (x->Dim() != N) x->ReInit(N);
  97. for (long i = 0; i < N; i++) (*x)[i] = x_ptr[i];
  98. }
  99. ierr = KSPDestroy(&ksp);
  100. CHKERRABORT(comm, ierr);
  101. ierr = MatDestroy(&PetscA);
  102. CHKERRABORT(comm, ierr);
  103. ierr = VecDestroy(&Petsc_x);
  104. CHKERRABORT(comm, ierr);
  105. ierr = VecDestroy(&Petsc_b);
  106. CHKERRABORT(comm, ierr);
  107. }
  108. } // end namespace
  109. #else
  110. namespace SCTL_NAMESPACE {
  111. template <class Real> static Real inner_prod(const Vector<Real>& x, const Vector<Real>& y, const Comm& comm) {
  112. Real x_dot_y = 0;
  113. Long N = x.Dim();
  114. SCTL_ASSERT(y.Dim() == N);
  115. for (Long i = 0; i < N; i++) x_dot_y += x[i] * y[i];
  116. Real x_dot_y_glb = 0;
  117. comm.Allreduce(Ptr2ConstItr<Real>(&x_dot_y, 1), Ptr2Itr<Real>(&x_dot_y_glb, 1), 1, Comm::CommOp::SUM);
  118. return x_dot_y_glb;
  119. }
  120. template <class Real> inline void ParallelSolver<Real>::operator()(Vector<Real>* x, const ParallelOp& A, const Vector<Real>& b, Real tol, Integer max_iter) {
  121. Long N = b.Dim();
  122. if (max_iter < 0) max_iter = N;
  123. Real b_norm = sqrt(inner_prod(b, b, comm_));
  124. Vector<Real> q(N);
  125. Vector<Vector<Real>> Q;
  126. Vector<Vector<Real>> H;
  127. { // Initialize q, Q
  128. q = b;
  129. Real one_over_q_norm = 1.0 / sqrt<Real>(inner_prod(q, q, comm_));
  130. for (Long j = 0; j < N; j++) q[j] *= one_over_q_norm;
  131. Q.PushBack(q);
  132. }
  133. Matrix<Real> H_;
  134. Vector<Real> Aq(N), y, h, r = b;
  135. while (1) {
  136. Real r_norm = sqrt(inner_prod(r, r, comm_));
  137. if (verbose_ && !comm_.Rank()) printf("%3d KSP Residual norm %.12e\n", H.Dim(), r_norm);
  138. if (r_norm < tol * b_norm || H.Dim() == max_iter) break;
  139. A(&Aq, q);
  140. q = Aq;
  141. h.ReInit(Q.Dim() + 1);
  142. for (Integer i = 0; i < Q.Dim(); i++) { // Orthogonalized q
  143. h[i] = inner_prod(q, Q[i], comm_);
  144. for (Long j = 0; j < N; j++) q[j] -= h[i] * Q[i][j];
  145. }
  146. { // Normalize q
  147. h[Q.Dim()] = sqrt<Real>(inner_prod(q, q, comm_));
  148. Real one_over_q_norm = 1.0 / h[Q.Dim()];
  149. for (Long j = 0; j < N; j++) q[j] *= one_over_q_norm;
  150. }
  151. Q.PushBack(q);
  152. H.PushBack(h);
  153. { // Set y
  154. H_.ReInit(H.Dim(), Q.Dim());
  155. H_.SetZero();
  156. for (Integer i = 0; i < H.Dim(); i++) {
  157. for (Integer j = 0; j < H[i].Dim(); j++) {
  158. H_[i][j] = H[i][j];
  159. }
  160. }
  161. H_ = H_.pinv();
  162. y.ReInit(H_.Dim(1));
  163. for (Integer i = 0; i < y.Dim(); i++) {
  164. y[i] = H_[0][i] * b_norm;
  165. }
  166. }
  167. { // Compute residual
  168. Vector<Real> Hy(Q.Dim());
  169. Hy.SetZero();
  170. for (Integer i = 0; i < H.Dim(); i++) {
  171. for (Integer j = 0; j < H[i].Dim(); j++) {
  172. Hy[j] += H[i][j] * y[i];
  173. }
  174. }
  175. Vector<Real> QHy(N);
  176. QHy.SetZero();
  177. for (Integer i = 0; i < Q.Dim(); i++) {
  178. for (Long j = 0; j < N; j++) {
  179. QHy[j] += Q[i][j] * Hy[i];
  180. }
  181. }
  182. for (Integer j = 0; j < N; j++) { // Set r
  183. r[j] = b[j] - QHy[j];
  184. }
  185. }
  186. }
  187. { // Set x
  188. if (x->Dim() != N) x->ReInit(N);
  189. x->SetZero();
  190. for (Integer i = 0; i < y.Dim(); i++) {
  191. for (Integer j = 0; j < N; j++) {
  192. (*x)[j] += y[i] * Q[i][j];
  193. }
  194. }
  195. }
  196. }
  197. } // end namespace
  198. #endif
  199. #endif //_SCTL_PARALLEL_SOLVER_HPP_