matrix.txx 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. #include <omp.h>
  2. #include <cmath>
  3. #include <cstdlib>
  4. #include <cassert>
  5. #include <iostream>
  6. #include <iomanip>
  7. #include SCTL_INCLUDE(math_utils.hpp)
  8. #include SCTL_INCLUDE(mat_utils.hpp)
  9. #include SCTL_INCLUDE(mem_mgr.hpp)
  10. #include SCTL_INCLUDE(profile.hpp)
  11. namespace SCTL_NAMESPACE {
  12. template <class ValueType> std::ostream& operator<<(std::ostream& output, const Matrix<ValueType>& M) {
  13. std::ios::fmtflags f(std::cout.flags());
  14. output << std::fixed << std::setprecision(4) << std::setiosflags(std::ios::left);
  15. for (Long i = 0; i < M.Dim(0); i++) {
  16. for (Long j = 0; j < M.Dim(1); j++) {
  17. double f = ((double)M(i, j));
  18. if (fabs(f) < 1e-25) f = 0;
  19. output << std::setw(10) << f << ' ';
  20. }
  21. output << ";\n";
  22. }
  23. std::cout.flags(f);
  24. return output;
  25. }
  26. template <class ValueType> void Matrix<ValueType>::Init(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  27. dim[0] = dim1;
  28. dim[1] = dim2;
  29. own_data = own_data_;
  30. if (own_data) {
  31. if (dim[0] * dim[1] > 0) {
  32. data_ptr = aligned_new<ValueType>(dim[0] * dim[1]);
  33. if (data_ != NullIterator<ValueType>()) {
  34. memcopy(data_ptr, data_, dim[0] * dim[1]);
  35. }
  36. } else
  37. data_ptr = NullIterator<ValueType>();
  38. } else
  39. data_ptr = data_;
  40. }
  41. template <class ValueType> Matrix<ValueType>::Matrix() {
  42. Init(0, 0);
  43. }
  44. template <class ValueType> Matrix<ValueType>::Matrix(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  45. Init(dim1, dim2, data_, own_data_);
  46. }
  47. template <class ValueType> Matrix<ValueType>::Matrix(const Matrix<ValueType>& M) {
  48. Init(M.Dim(0), M.Dim(1), (Iterator<ValueType>)M.begin());
  49. }
  50. template <class ValueType> Matrix<ValueType>::~Matrix() {
  51. if (own_data) {
  52. if (data_ptr != NullIterator<ValueType>()) {
  53. aligned_delete(data_ptr);
  54. }
  55. }
  56. data_ptr = NullIterator<ValueType>();
  57. dim[0] = 0;
  58. dim[1] = 0;
  59. }
  60. template <class ValueType> void Matrix<ValueType>::Swap(Matrix<ValueType>& M) {
  61. StaticArray<Long, 2> dim_;
  62. dim_[0] = dim[0];
  63. dim_[1] = dim[1];
  64. Iterator<ValueType> data_ptr_ = data_ptr;
  65. bool own_data_ = own_data;
  66. dim[0] = M.dim[0];
  67. dim[1] = M.dim[1];
  68. data_ptr = M.data_ptr;
  69. own_data = M.own_data;
  70. M.dim[0] = dim_[0];
  71. M.dim[1] = dim_[1];
  72. M.data_ptr = data_ptr_;
  73. M.own_data = own_data_;
  74. }
  75. template <class ValueType> void Matrix<ValueType>::ReInit(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  76. if (own_data_ && own_data && dim[0] * dim[1] >= dim1 * dim2) {
  77. dim[0] = dim1;
  78. dim[1] = dim2;
  79. if (data_ != NullIterator<ValueType>()) {
  80. memcopy(data_ptr, data_, dim[0] * dim[1]);
  81. }
  82. } else {
  83. Matrix<ValueType> tmp(dim1, dim2, data_, own_data_);
  84. this->Swap(tmp);
  85. }
  86. }
  87. template <class ValueType> void Matrix<ValueType>::Write(const char* fname) const {
  88. FILE* f1 = fopen(fname, "wb+");
  89. if (f1 == nullptr) {
  90. std::cout << "Unable to open file for writing:" << fname << '\n';
  91. return;
  92. }
  93. StaticArray<uint64_t, 2> dim_;
  94. dim_[0] = (uint64_t)Dim(0);
  95. dim_[1] = (uint64_t)Dim(1);
  96. fwrite(&dim_[0], sizeof(uint64_t), 2, f1);
  97. if (Dim(0) && Dim(1)) fwrite(&data_ptr[0], sizeof(ValueType), Dim(0) * Dim(1), f1);
  98. fclose(f1);
  99. }
  100. template <class ValueType> void Matrix<ValueType>::Read(const char* fname) {
  101. FILE* f1 = fopen(fname, "r");
  102. if (f1 == nullptr) {
  103. std::cout << "Unable to open file for reading:" << fname << '\n';
  104. return;
  105. }
  106. StaticArray<uint64_t, 2> dim_;
  107. Long readlen = fread(&dim_[0], sizeof(uint64_t), 2, f1);
  108. SCTL_ASSERT(readlen == 2);
  109. if (Dim(0) != (Long)dim_[0] || Dim(1) != (Long)dim_[1]) ReInit(dim_[0], dim_[1]);
  110. if (dim_[0] && dim_[1]) {
  111. readlen = fread(&data_ptr[0], sizeof(ValueType), dim_[0] * dim_[1], f1);
  112. SCTL_ASSERT(readlen == (Long)(dim_[0] * dim_[1]));
  113. }
  114. fclose(f1);
  115. }
  116. template <class ValueType> Long Matrix<ValueType>::Dim(Long i) const { return dim[i]; }
  117. template <class ValueType> void Matrix<ValueType>::SetZero() {
  118. if (dim[0] && dim[1]) memset(data_ptr, 0, dim[0] * dim[1]);
  119. }
  120. template <class ValueType> Iterator<ValueType> Matrix<ValueType>::begin() { return data_ptr; }
  121. template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::begin() const { return data_ptr; }
  122. template <class ValueType> Iterator<ValueType> Matrix<ValueType>::end() { return data_ptr + dim[0] * dim[1]; }
  123. template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::end() const { return data_ptr + dim[0] * dim[1]; }
  124. // Matrix-Matrix operations
  125. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator=(const Matrix<ValueType>& M) {
  126. if (this != &M) {
  127. if (dim[0] * dim[1] < M.dim[0] * M.dim[1]) {
  128. ReInit(M.dim[0], M.dim[1]);
  129. }
  130. dim[0] = M.dim[0];
  131. dim[1] = M.dim[1];
  132. memcopy(data_ptr, M.data_ptr, dim[0] * dim[1]);
  133. }
  134. return *this;
  135. }
  136. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator+=(const Matrix<ValueType>& M) {
  137. SCTL_ASSERT(M.Dim(0) == Dim(0) && M.Dim(1) == Dim(1));
  138. Profile::Add_FLOP(dim[0] * dim[1]);
  139. for (Long i = 0; i < M.Dim(0) * M.Dim(1); i++) data_ptr[i] += M.data_ptr[i];
  140. return *this;
  141. }
  142. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator-=(const Matrix<ValueType>& M) {
  143. SCTL_ASSERT(M.Dim(0) == Dim(0) && M.Dim(1) == Dim(1));
  144. Profile::Add_FLOP(dim[0] * dim[1]);
  145. for (Long i = 0; i < M.Dim(0) * M.Dim(1); i++) data_ptr[i] -= M.data_ptr[i];
  146. return *this;
  147. }
  148. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator+(const Matrix<ValueType>& M2) const {
  149. const Matrix<ValueType>& M1 = *this;
  150. SCTL_ASSERT(M2.Dim(0) == M1.Dim(0) && M2.Dim(1) == M1.Dim(1));
  151. Profile::Add_FLOP(dim[0] * dim[1]);
  152. Matrix<ValueType> M_r(M1.Dim(0), M1.Dim(1));
  153. for (Long i = 0; i < M1.Dim(0) * M1.Dim(1); i++) M_r[0][i] = M1[0][i] + M2[0][i];
  154. return M_r;
  155. }
  156. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator-(const Matrix<ValueType>& M2) const {
  157. const Matrix<ValueType>& M1 = *this;
  158. SCTL_ASSERT(M2.Dim(0) == M1.Dim(0) && M2.Dim(1) == M1.Dim(1));
  159. Profile::Add_FLOP(dim[0] * dim[1]);
  160. Matrix<ValueType> M_r(M1.Dim(0), M1.Dim(1));
  161. for (Long i = 0; i < M1.Dim(0) * M1.Dim(1); i++) M_r[0][i] = M1[0][i] - M2[0][i];
  162. return M_r;
  163. }
  164. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator*(const Matrix<ValueType>& M) const {
  165. SCTL_ASSERT(dim[1] == M.dim[0]);
  166. Profile::Add_FLOP(2 * (((Long)dim[0]) * dim[1]) * M.dim[1]);
  167. Matrix<ValueType> M_r(dim[0], M.dim[1]);
  168. if (M.Dim(0) * M.Dim(1) == 0 || this->Dim(0) * this->Dim(1) == 0) return M_r;
  169. mat::gemm<ValueType>('N', 'N', M.dim[1], dim[0], dim[1], 1.0, M.data_ptr, M.dim[1], data_ptr, dim[1], 0.0, M_r.data_ptr, M_r.dim[1]);
  170. return M_r;
  171. }
  172. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& A, const Matrix<ValueType>& B, ValueType beta) {
  173. SCTL_ASSERT(A.dim[1] == B.dim[0]);
  174. SCTL_ASSERT(M_r.dim[0] == A.dim[0]);
  175. SCTL_ASSERT(M_r.dim[1] == B.dim[1]);
  176. if (A.Dim(0) * A.Dim(1) == 0 || B.Dim(0) * B.Dim(1) == 0) return;
  177. Profile::Add_FLOP(2 * (((Long)A.dim[0]) * A.dim[1]) * B.dim[1]);
  178. mat::gemm<ValueType>('N', 'N', B.dim[1], A.dim[0], A.dim[1], 1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  179. }
  180. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Permutation<ValueType>& P, const Matrix<ValueType>& M, ValueType beta) {
  181. Long d0 = M.Dim(0);
  182. Long d1 = M.Dim(1);
  183. assert(P.Dim() == d0);
  184. assert(M_r.Dim(0) == d0);
  185. assert(M_r.Dim(1) == d1);
  186. if (P.Dim() == 0 || d0 * d1 == 0) return;
  187. if (beta == 0) {
  188. for (Long i = 0; i < d0; i++) {
  189. const ValueType s = P.scal[i];
  190. ConstIterator<ValueType> M_ = M[i];
  191. Iterator<ValueType> M_r_ = M_r[P.perm[i]];
  192. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s;
  193. }
  194. } else {
  195. for (Long i = 0; i < d0; i++) {
  196. const ValueType s = P.scal[i];
  197. ConstIterator<ValueType> M_ = M[i];
  198. Iterator<ValueType> M_r_ = M_r[P.perm[i]];
  199. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s + M_r_[j] * beta;
  200. }
  201. }
  202. }
  203. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& M, const Permutation<ValueType>& P, ValueType beta) {
  204. Long d0 = M.Dim(0);
  205. Long d1 = M.Dim(1);
  206. assert(P.Dim() == d1);
  207. assert(M_r.Dim(0) == d0);
  208. assert(M_r.Dim(1) == d1);
  209. if (P.Dim() == 0 || d0 * d1 == 0) return;
  210. if (beta == 0) {
  211. for (Long i = 0; i < d0; i++) {
  212. ConstIterator<Long> perm_ = P.perm.begin();
  213. ConstIterator<ValueType> scal_ = P.scal.begin();
  214. ConstIterator<ValueType> M_ = M[i];
  215. Iterator<ValueType> M_r_ = M_r[i];
  216. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j];
  217. }
  218. } else {
  219. for (Long i = 0; i < d0; i++) {
  220. ConstIterator<Long> perm_ = P.perm.begin();
  221. ConstIterator<ValueType> scal_ = P.scal.begin();
  222. ConstIterator<ValueType> M_ = M[i];
  223. Iterator<ValueType> M_r_ = M_r[i];
  224. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j] + M_r_[j] * beta;
  225. }
  226. }
  227. }
  228. // cublasgemm wrapper
  229. #if defined(SCTL_HAVE_CUDA)
  230. template <class ValueType> void Matrix<ValueType>::CUBLASGEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& A, const Matrix<ValueType>& B, ValueType beta) {
  231. if (A.Dim(0) * A.Dim(1) == 0 || B.Dim(0) * B.Dim(1) == 0) return;
  232. assert(A.dim[1] == B.dim[0]);
  233. assert(M_r.dim[0] == A.dim[0]);
  234. assert(M_r.dim[1] == B.dim[1]);
  235. Profile::Add_FLOP(2 * (((Long)A.dim[0]) * A.dim[1]) * B.dim[1]);
  236. mat::cublasgemm('N', 'N', B.dim[1], A.dim[0], A.dim[1], (ValueType)1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  237. }
  238. #endif
  239. // Matrix-Scalar operations
  240. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator=(ValueType s) {
  241. Long N = dim[0] * dim[1];
  242. for (Long i = 0; i < N; i++) data_ptr[i] = s;
  243. return *this;
  244. }
  245. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator+=(ValueType s) {
  246. Long N = dim[0] * dim[1];
  247. for (Long i = 0; i < N; i++) data_ptr[i] += s;
  248. Profile::Add_FLOP(N);
  249. return *this;
  250. }
  251. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator-=(ValueType s) {
  252. Long N = dim[0] * dim[1];
  253. for (Long i = 0; i < N; i++) data_ptr[i] -= s;
  254. Profile::Add_FLOP(N);
  255. return *this;
  256. }
  257. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator*=(ValueType s) {
  258. Long N = dim[0] * dim[1];
  259. for (Long i = 0; i < N; i++) data_ptr[i] *= s;
  260. Profile::Add_FLOP(N);
  261. return *this;
  262. }
  263. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator/=(ValueType s) {
  264. Long N = dim[0] * dim[1];
  265. for (Long i = 0; i < N; i++) data_ptr[i] /= s;
  266. Profile::Add_FLOP(N);
  267. return *this;
  268. }
  269. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator+(ValueType s) const {
  270. Long N = dim[0] * dim[1];
  271. Matrix<ValueType> M_r(dim[0], dim[1]);
  272. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] + s;
  273. return M_r;
  274. }
  275. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator-(ValueType s) const {
  276. Long N = dim[0] * dim[1];
  277. Matrix<ValueType> M_r(dim[0], dim[1]);
  278. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] - s;
  279. return M_r;
  280. }
  281. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator*(ValueType s) const {
  282. Long N = dim[0] * dim[1];
  283. Matrix<ValueType> M_r(dim[0], dim[1]);
  284. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] * s;
  285. return M_r;
  286. }
  287. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator/(ValueType s) const {
  288. Long N = dim[0] * dim[1];
  289. Matrix<ValueType> M_r(dim[0], dim[1]);
  290. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] / s;
  291. return M_r;
  292. }
  293. // Element access
  294. template <class ValueType> inline ValueType& Matrix<ValueType>::operator()(Long i, Long j) {
  295. assert(i < dim[0] && j < dim[1]);
  296. return data_ptr[i * dim[1] + j];
  297. }
  298. template <class ValueType> inline const ValueType& Matrix<ValueType>::operator()(Long i, Long j) const {
  299. assert(i < dim[0] && j < dim[1]);
  300. return data_ptr[i * dim[1] + j];
  301. }
  302. template <class ValueType> inline Iterator<ValueType> Matrix<ValueType>::operator[](Long i) {
  303. assert(i < dim[0]);
  304. return data_ptr + i * dim[1];
  305. }
  306. template <class ValueType> inline ConstIterator<ValueType> Matrix<ValueType>::operator[](Long i) const {
  307. assert(i < dim[0]);
  308. return (data_ptr + i * dim[1]);
  309. }
  310. template <class ValueType> void Matrix<ValueType>::RowPerm(const Permutation<ValueType>& P) {
  311. Matrix<ValueType>& M = *this;
  312. if (P.Dim() == 0) return;
  313. assert(M.Dim(0) == P.Dim());
  314. Long d0 = M.Dim(0);
  315. Long d1 = M.Dim(1);
  316. #pragma omp parallel for schedule(static)
  317. for (Long i = 0; i < d0; i++) {
  318. Iterator<ValueType> M_ = M[i];
  319. const ValueType s = P.scal[i];
  320. for (Long j = 0; j < d1; j++) M_[j] *= s;
  321. }
  322. Integer omp_p = omp_get_max_threads();
  323. #pragma omp parallel for schedule(static)
  324. for (Integer tid = 0; tid < omp_p; tid++) {
  325. Long a = d1 * (tid + 0) / omp_p;
  326. Long b = d1 * (tid + 1) / omp_p;
  327. Permutation<ValueType> P_ = P;
  328. for (Long i = 0; i < d0; i++) {
  329. Long j = i;
  330. if (P_.perm[i] == i) continue;
  331. while (j < d0 && P_.perm[j] != i) { j++; }
  332. SCTL_ASSERT_MSG(j < d0, "Matrix::RowPerm(Permutation P) ==> Invalid permutation vector P.");
  333. Iterator<ValueType> Mi = M[i];
  334. Iterator<ValueType> Mj = M[j];
  335. std::swap<Long>(P_.perm[i], P_.perm[j]);
  336. for (Long k = a; k < b; k++) Mi[k]=tid;
  337. }
  338. }
  339. }
  340. template <class ValueType> void Matrix<ValueType>::ColPerm(const Permutation<ValueType>& P) {
  341. Matrix<ValueType>& M = *this;
  342. if (P.Dim() == 0) return;
  343. assert(M.Dim(1) == P.Dim());
  344. Long d0 = M.Dim(0);
  345. Long d1 = M.Dim(1);
  346. Integer omp_p = omp_get_max_threads();
  347. Matrix<ValueType> M_buff(omp_p, d1);
  348. ConstIterator<Long> perm_ = P.perm.begin();
  349. ConstIterator<ValueType> scal_ = P.scal.begin();
  350. #pragma omp parallel for schedule(static)
  351. for (Long i = 0; i < d0; i++) {
  352. Integer pid = omp_get_thread_num();
  353. Iterator<ValueType> buff = M_buff[pid];
  354. Iterator<ValueType> M_ = M[i];
  355. for (Long j = 0; j < d1; j++) buff[j] = M_[j];
  356. for (Long j = 0; j < d1; j++) {
  357. M_[j] = buff[perm_[j]] * scal_[j];
  358. }
  359. }
  360. }
  361. #define SCTL_B1 128
  362. #define SCTL_B2 32
  363. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::Transpose() const {
  364. const Matrix<ValueType>& M = *this;
  365. Long d0 = M.dim[0];
  366. Long d1 = M.dim[1];
  367. Matrix<ValueType> M_r(d1, d0);
  368. const Long blk0 = ((d0 + SCTL_B1 - 1) / SCTL_B1);
  369. const Long blk1 = ((d1 + SCTL_B1 - 1) / SCTL_B1);
  370. const Long blks = blk0 * blk1;
  371. #pragma omp parallel for
  372. for (Long k = 0; k < blks; k++) {
  373. Long i = (k % blk0) * SCTL_B1;
  374. Long j = (k / blk0) * SCTL_B1;
  375. Long d0_ = i + SCTL_B1;
  376. if (d0_ >= d0) d0_ = d0;
  377. Long d1_ = j + SCTL_B1;
  378. if (d1_ >= d1) d1_ = d1;
  379. for (Long ii = i; ii < d0_; ii += SCTL_B2)
  380. for (Long jj = j; jj < d1_; jj += SCTL_B2) {
  381. Long d0__ = ii + SCTL_B2;
  382. if (d0__ >= d0) d0__ = d0;
  383. Long d1__ = jj + SCTL_B2;
  384. if (d1__ >= d1) d1__ = d1;
  385. for (Long iii = ii; iii < d0__; iii++)
  386. for (Long jjj = jj; jjj < d1__; jjj++) {
  387. M_r[jjj][iii] = M[iii][jjj];
  388. }
  389. }
  390. }
  391. return M_r;
  392. }
  393. template <class ValueType> void Matrix<ValueType>::Transpose(Matrix<ValueType>& M_r, const Matrix<ValueType>& M) {
  394. Long d0 = M.dim[0];
  395. Long d1 = M.dim[1];
  396. if (M_r.dim[0] != d1 || M_r.dim[1] != d0) M_r.ReInit(d1, d0);
  397. const Long blk0 = ((d0 + SCTL_B1 - 1) / SCTL_B1);
  398. const Long blk1 = ((d1 + SCTL_B1 - 1) / SCTL_B1);
  399. const Long blks = blk0 * blk1;
  400. #pragma omp parallel for
  401. for (Long k = 0; k < blks; k++) {
  402. Long i = (k % blk0) * SCTL_B1;
  403. Long j = (k / blk0) * SCTL_B1;
  404. Long d0_ = i + SCTL_B1;
  405. if (d0_ >= d0) d0_ = d0;
  406. Long d1_ = j + SCTL_B1;
  407. if (d1_ >= d1) d1_ = d1;
  408. for (Long ii = i; ii < d0_; ii += SCTL_B2)
  409. for (Long jj = j; jj < d1_; jj += SCTL_B2) {
  410. Long d0__ = ii + SCTL_B2;
  411. if (d0__ >= d0) d0__ = d0;
  412. Long d1__ = jj + SCTL_B2;
  413. if (d1__ >= d1) d1__ = d1;
  414. for (Long iii = ii; iii < d0__; iii++)
  415. for (Long jjj = jj; jjj < d1__; jjj++) {
  416. M_r[jjj][iii] = M[iii][jjj];
  417. }
  418. }
  419. }
  420. }
  421. #undef SCTL_B2
  422. #undef SCTL_B1
  423. template <class ValueType> void Matrix<ValueType>::SVD(Matrix<ValueType>& tU, Matrix<ValueType>& tS, Matrix<ValueType>& tVT) {
  424. Matrix<ValueType>& M = *this;
  425. Matrix<ValueType> M_ = M;
  426. int n = M.Dim(0);
  427. int m = M.Dim(1);
  428. int k = (m < n ? m : n);
  429. if (tU.Dim(0) != n || tU.Dim(1) != k) tU.ReInit(n, k);
  430. tU.SetZero();
  431. if (tS.Dim(0) != k || tS.Dim(1) != k) tS.ReInit(k, k);
  432. tS.SetZero();
  433. if (tVT.Dim(0) != k || tVT.Dim(1) != m) tVT.ReInit(k, m);
  434. tVT.SetZero();
  435. // SVD
  436. int INFO = 0;
  437. char JOBU = 'S';
  438. char JOBVT = 'S';
  439. int wssize = 3 * (m < n ? m : n) + (m > n ? m : n);
  440. int wssize1 = 5 * (m < n ? m : n);
  441. wssize = (wssize > wssize1 ? wssize : wssize1);
  442. Iterator<ValueType> wsbuf = aligned_new<ValueType>(wssize);
  443. mat::svd(&JOBU, &JOBVT, &m, &n, M.begin(), &m, tS.begin(), tVT.begin(), &m, tU.begin(), &k, wsbuf, &wssize, &INFO);
  444. aligned_delete<ValueType>(wsbuf);
  445. if (INFO != 0) std::cout << INFO << '\n';
  446. assert(INFO == 0);
  447. for (Long i = 1; i < k; i++) {
  448. tS[i][i] = tS[0][i];
  449. tS[0][i] = 0;
  450. }
  451. // std::cout<<tU*tS*tVT-M_<<'\n';
  452. }
  453. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::pinv(ValueType eps) {
  454. if (eps < 0) eps = sqrt<ValueType>(machine_eps<ValueType>());
  455. Matrix<ValueType> M_r(dim[1], dim[0]);
  456. mat::pinv(data_ptr, dim[0], dim[1], eps, M_r.data_ptr);
  457. this->ReInit(0, 0);
  458. return M_r;
  459. }
  460. template <class ValueType> std::ostream& operator<<(std::ostream& output, const Permutation<ValueType>& P) {
  461. output << std::setprecision(4) << std::setiosflags(std::ios::left);
  462. Long size = P.perm.Dim();
  463. for (Long i = 0; i < size; i++) output << std::setw(10) << P.perm[i] << ' ';
  464. output << ";\n";
  465. for (Long i = 0; i < size; i++) output << std::setw(10) << P.scal[i] << ' ';
  466. output << ";\n";
  467. return output;
  468. }
  469. template <class ValueType> Permutation<ValueType>::Permutation(Long size) {
  470. perm.ReInit(size);
  471. scal.ReInit(size);
  472. for (Long i = 0; i < size; i++) {
  473. perm[i] = i;
  474. scal[i] = 1.0;
  475. }
  476. }
  477. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::RandPerm(Long size) {
  478. Permutation<ValueType> P(size);
  479. for (Long i = 0; i < size; i++) {
  480. P.perm[i] = rand() % size;
  481. for (Long j = 0; j < i; j++)
  482. if (P.perm[i] == P.perm[j]) {
  483. i--;
  484. break;
  485. }
  486. P.scal[i] = ((ValueType)rand()) / RAND_MAX;
  487. }
  488. return P;
  489. }
  490. template <class ValueType> Matrix<ValueType> Permutation<ValueType>::GetMatrix() const {
  491. Long size = perm.Dim();
  492. Matrix<ValueType> M_r(size, size);
  493. for (Long i = 0; i < size; i++)
  494. for (Long j = 0; j < size; j++) M_r[i][j] = (perm[j] == i ? scal[j] : 0.0);
  495. return M_r;
  496. }
  497. template <class ValueType> Long Permutation<ValueType>::Dim() const { return perm.Dim(); }
  498. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::Transpose() {
  499. Long size = perm.Dim();
  500. Permutation<ValueType> P_r(size);
  501. Vector<Long>& perm_r = P_r.perm;
  502. Vector<ValueType>& scal_r = P_r.scal;
  503. for (Long i = 0; i < size; i++) {
  504. perm_r[perm[i]] = i;
  505. scal_r[perm[i]] = scal[i];
  506. }
  507. return P_r;
  508. }
  509. template <class ValueType> Permutation<ValueType>& Permutation<ValueType>::operator*=(ValueType s) {
  510. for (Long i = 0; i < scal.Dim(); i++) scal[i] *= s;
  511. return *this;
  512. }
  513. template <class ValueType> Permutation<ValueType>& Permutation<ValueType>::operator/=(ValueType s) {
  514. for (Long i = 0; i < scal.Dim(); i++) scal[i] /= s;
  515. return *this;
  516. }
  517. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator*(ValueType s) const {
  518. Permutation<ValueType> P = *this;
  519. P *= s;
  520. return P;
  521. }
  522. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator/(ValueType s) const {
  523. Permutation<ValueType> P = *this;
  524. P /= s;
  525. return P;
  526. }
  527. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator*(const Permutation<ValueType>& P) const {
  528. Long size = perm.Dim();
  529. SCTL_ASSERT(P.Dim() == size);
  530. Permutation<ValueType> P_r(size);
  531. Vector<Long>& perm_r = P_r.perm;
  532. Vector<ValueType>& scal_r = P_r.scal;
  533. for (Long i = 0; i < size; i++) {
  534. perm_r[i] = perm[P.perm[i]];
  535. scal_r[i] = scal[P.perm[i]] * P.scal[i];
  536. }
  537. return P_r;
  538. }
  539. template <class ValueType> Matrix<ValueType> Permutation<ValueType>::operator*(const Matrix<ValueType>& M) const {
  540. if (Dim() == 0) return M;
  541. SCTL_ASSERT(M.Dim(0) == Dim());
  542. Long d0 = M.Dim(0);
  543. Long d1 = M.Dim(1);
  544. Matrix<ValueType> M_r(d0, d1);
  545. for (Long i = 0; i < d0; i++) {
  546. const ValueType s = scal[i];
  547. ConstIterator<ValueType> M_ = M[i];
  548. Iterator<ValueType> M_r_ = M_r[perm[i]];
  549. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s;
  550. }
  551. return M_r;
  552. }
  553. template <class ValueType> Matrix<ValueType> operator*(const Matrix<ValueType>& M, const Permutation<ValueType>& P) {
  554. if (P.Dim() == 0) return M;
  555. SCTL_ASSERT(M.Dim(1) == P.Dim());
  556. Long d0 = M.Dim(0);
  557. Long d1 = M.Dim(1);
  558. Matrix<ValueType> M_r(d0, d1);
  559. for (Long i = 0; i < d0; i++) {
  560. ConstIterator<Long> perm_ = P.perm.begin();
  561. ConstIterator<ValueType> scal_ = P.scal.begin();
  562. ConstIterator<ValueType> M_ = M[i];
  563. Iterator<ValueType> M_r_ = M_r[i];
  564. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j];
  565. }
  566. return M_r;
  567. }
  568. } // end namespace