matrix.txx 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675
  1. #include <omp.h>
  2. #include <cmath>
  3. #include <cstdlib>
  4. #include <cassert>
  5. #include <iostream>
  6. #include <iomanip>
  7. #include SCTL_INCLUDE(mat_utils.hpp)
  8. #include SCTL_INCLUDE(mem_mgr.hpp)
  9. #include SCTL_INCLUDE(profile.hpp)
  10. namespace SCTL_NAMESPACE {
  11. template <class ValueType> std::ostream& operator<<(std::ostream& output, const Matrix<ValueType>& M) {
  12. std::ios::fmtflags f(std::cout.flags());
  13. output << std::fixed << std::setprecision(4) << std::setiosflags(std::ios::left);
  14. for (Long i = 0; i < M.Dim(0); i++) {
  15. for (Long j = 0; j < M.Dim(1); j++) {
  16. float f = ((float)M(i, j));
  17. if (fabs<ValueType>(f) < 1e-25) f = 0;
  18. output << std::setw(10) << ((double)f) << ' ';
  19. }
  20. output << ";\n";
  21. }
  22. std::cout.flags(f);
  23. return output;
  24. }
  25. template <class ValueType> Matrix<ValueType>::Matrix() {
  26. dim[0] = 0;
  27. dim[1] = 0;
  28. own_data = true;
  29. data_ptr = nullptr;
  30. }
  31. template <class ValueType> Matrix<ValueType>::Matrix(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  32. dim[0] = dim1;
  33. dim[1] = dim2;
  34. own_data = own_data_;
  35. if (own_data) {
  36. if (dim[0] * dim[1] > 0) {
  37. data_ptr = aligned_new<ValueType>(dim[0] * dim[1]);
  38. if (data_ != nullptr) {
  39. memcopy(data_ptr, data_, dim[0] * dim[1]);
  40. }
  41. } else
  42. data_ptr = nullptr;
  43. } else
  44. data_ptr = data_;
  45. }
  46. template <class ValueType> Matrix<ValueType>::Matrix(const Matrix<ValueType>& M) {
  47. dim[0] = M.dim[0];
  48. dim[1] = M.dim[1];
  49. own_data = true;
  50. if (dim[0] * dim[1] > 0) {
  51. data_ptr = aligned_new<ValueType>(dim[0] * dim[1]);
  52. memcopy(data_ptr, M.data_ptr, dim[0] * dim[1]);
  53. } else
  54. data_ptr = nullptr;
  55. }
  56. template <class ValueType> Matrix<ValueType>::~Matrix() {
  57. if (own_data) {
  58. if (data_ptr != nullptr) {
  59. aligned_delete(data_ptr);
  60. }
  61. }
  62. data_ptr = nullptr;
  63. dim[0] = 0;
  64. dim[1] = 0;
  65. }
  66. template <class ValueType> void Matrix<ValueType>::Swap(Matrix<ValueType>& M) {
  67. StaticArray<Long, 2> dim_;
  68. dim_[0] = dim[0];
  69. dim_[1] = dim[1];
  70. Iterator<ValueType> data_ptr_ = data_ptr;
  71. bool own_data_ = own_data;
  72. dim[0] = M.dim[0];
  73. dim[1] = M.dim[1];
  74. data_ptr = M.data_ptr;
  75. own_data = M.own_data;
  76. M.dim[0] = dim_[0];
  77. M.dim[1] = dim_[1];
  78. M.data_ptr = data_ptr_;
  79. M.own_data = own_data_;
  80. }
  81. template <class ValueType> void Matrix<ValueType>::ReInit(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  82. if (own_data_ && own_data && dim[0] * dim[1] >= dim1 * dim2) {
  83. dim[0] = dim1;
  84. dim[1] = dim2;
  85. if (data_ != nullptr) {
  86. memcopy(data_ptr, data_, dim[0] * dim[1]);
  87. }
  88. } else {
  89. Matrix<ValueType> tmp(dim1, dim2, data_, own_data_);
  90. this->Swap(tmp);
  91. }
  92. }
  93. template <class ValueType> void Matrix<ValueType>::Write(const char* fname) const {
  94. FILE* f1 = fopen(fname, "wb+");
  95. if (f1 == nullptr) {
  96. std::cout << "Unable to open file for writing:" << fname << '\n';
  97. return;
  98. }
  99. StaticArray<uint64_t, 2> dim_;
  100. dim_[0] = (uint64_t)Dim(0);
  101. dim_[1] = (uint64_t)Dim(1);
  102. fwrite(&dim_[0], sizeof(uint64_t), 2, f1);
  103. if (Dim(0) * Dim(1)) fwrite(&data_ptr[0], sizeof(ValueType), Dim(0) * Dim(1), f1);
  104. fclose(f1);
  105. }
  106. template <class ValueType> void Matrix<ValueType>::Read(const char* fname) {
  107. FILE* f1 = fopen(fname, "r");
  108. if (f1 == nullptr) {
  109. std::cout << "Unable to open file for reading:" << fname << '\n';
  110. return;
  111. }
  112. StaticArray<uint64_t, 2> dim_;
  113. Long readlen = fread(&dim_[0], sizeof(uint64_t), 2, f1);
  114. assert(readlen == 2);
  115. if (Dim(0)!=dim_[0] || Dim(1)!=dim_[1]) ReInit(dim_[0], dim_[1]);
  116. if (dim_[0] * dim_[1]) readlen = fread(&data_ptr[0], sizeof(ValueType), dim_[0] * dim_[1], f1);
  117. assert(readlen == dim_[0] * dim_[1]);
  118. fclose(f1);
  119. }
  120. template <class ValueType> Long Matrix<ValueType>::Dim(Long i) const { return dim[i]; }
  121. template <class ValueType> void Matrix<ValueType>::SetZero() {
  122. if (dim[0] * dim[1]) memset(data_ptr, 0, dim[0] * dim[1]);
  123. }
  124. template <class ValueType> Iterator<ValueType> Matrix<ValueType>::begin() { return data_ptr; }
  125. template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::begin() const { return data_ptr; }
  126. template <class ValueType> Iterator<ValueType> Matrix<ValueType>::end() { return data_ptr + dim[0] * dim[1]; }
  127. template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::end() const { return data_ptr + dim[0] * dim[1]; }
  128. // Matrix-Matrix operations
  129. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator=(const Matrix<ValueType>& M) {
  130. if (this != &M) {
  131. if (dim[0] * dim[1] < M.dim[0] * M.dim[1]) {
  132. ReInit(M.dim[0], M.dim[1]);
  133. }
  134. dim[0] = M.dim[0];
  135. dim[1] = M.dim[1];
  136. memcopy(data_ptr, M.data_ptr, dim[0] * dim[1]);
  137. }
  138. return *this;
  139. }
  140. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator+=(const Matrix<ValueType>& M) {
  141. SCTL_ASSERT(M.Dim(0) == Dim(0) && M.Dim(1) == Dim(1));
  142. Profile::Add_FLOP(dim[0] * dim[1]);
  143. for (Long i = 0; i < M.Dim(0) * M.Dim(1); i++) data_ptr[i] += M.data_ptr[i];
  144. return *this;
  145. }
  146. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator-=(const Matrix<ValueType>& M) {
  147. SCTL_ASSERT(M.Dim(0) == Dim(0) && M.Dim(1) == Dim(1));
  148. Profile::Add_FLOP(dim[0] * dim[1]);
  149. for (Long i = 0; i < M.Dim(0) * M.Dim(1); i++) data_ptr[i] -= M.data_ptr[i];
  150. return *this;
  151. }
  152. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator+(const Matrix<ValueType>& M2) const {
  153. const Matrix<ValueType>& M1 = *this;
  154. SCTL_ASSERT(M2.Dim(0) == M1.Dim(0) && M2.Dim(1) == M1.Dim(1));
  155. Profile::Add_FLOP(dim[0] * dim[1]);
  156. Matrix<ValueType> M_r(M1.Dim(0), M1.Dim(1));
  157. for (Long i = 0; i < M1.Dim(0) * M1.Dim(1); i++) M_r[0][i] = M1[0][i] + M2[0][i];
  158. return M_r;
  159. }
  160. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator-(const Matrix<ValueType>& M2) const {
  161. const Matrix<ValueType>& M1 = *this;
  162. SCTL_ASSERT(M2.Dim(0) == M1.Dim(0) && M2.Dim(1) == M1.Dim(1));
  163. Profile::Add_FLOP(dim[0] * dim[1]);
  164. Matrix<ValueType> M_r(M1.Dim(0), M1.Dim(1));
  165. for (Long i = 0; i < M1.Dim(0) * M1.Dim(1); i++) M_r[0][i] = M1[0][i] - M2[0][i];
  166. return M_r;
  167. }
  168. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator*(const Matrix<ValueType>& M) const {
  169. SCTL_ASSERT(dim[1] == M.dim[0]);
  170. Profile::Add_FLOP(2 * (((Long)dim[0]) * dim[1]) * M.dim[1]);
  171. Matrix<ValueType> M_r(dim[0], M.dim[1]);
  172. if (M.Dim(0) * M.Dim(1) == 0 || this->Dim(0) * this->Dim(1) == 0) return M_r;
  173. mat::gemm<ValueType>('N', 'N', M.dim[1], dim[0], dim[1], 1.0, M.data_ptr, M.dim[1], data_ptr, dim[1], 0.0, M_r.data_ptr, M_r.dim[1]);
  174. return M_r;
  175. }
  176. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& A, const Matrix<ValueType>& B, ValueType beta) {
  177. assert(A.dim[1] == B.dim[0]);
  178. assert(M_r.dim[0] == A.dim[0]);
  179. assert(M_r.dim[1] == B.dim[1]);
  180. if (A.Dim(0) * A.Dim(1) == 0 || B.Dim(0) * B.Dim(1) == 0) return;
  181. Profile::Add_FLOP(2 * (((Long)A.dim[0]) * A.dim[1]) * B.dim[1]);
  182. mat::gemm<ValueType>('N', 'N', B.dim[1], A.dim[0], A.dim[1], 1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  183. }
  184. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Permutation<ValueType>& P, const Matrix<ValueType>& M, ValueType beta) {
  185. Long d0 = M.Dim(0);
  186. Long d1 = M.Dim(1);
  187. assert(P.Dim() == d0);
  188. assert(M_r.Dim(0) == d0);
  189. assert(M_r.Dim(1) == d1);
  190. if (P.Dim() == 0 || d0 * d1 == 0) return;
  191. if (beta == 0) {
  192. for (Long i = 0; i < d0; i++) {
  193. const ValueType s = P.scal[i];
  194. ConstIterator<ValueType> M_ = M[i];
  195. Iterator<ValueType> M_r_ = M_r[P.perm[i]];
  196. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s;
  197. }
  198. } else {
  199. for (Long i = 0; i < d0; i++) {
  200. const ValueType s = P.scal[i];
  201. ConstIterator<ValueType> M_ = M[i];
  202. Iterator<ValueType> M_r_ = M_r[P.perm[i]];
  203. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s + M_r_[j] * beta;
  204. }
  205. }
  206. }
  207. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& M, const Permutation<ValueType>& P, ValueType beta) {
  208. Long d0 = M.Dim(0);
  209. Long d1 = M.Dim(1);
  210. assert(P.Dim() == d1);
  211. assert(M_r.Dim(0) == d0);
  212. assert(M_r.Dim(1) == d1);
  213. if (P.Dim() == 0 || d0 * d1 == 0) return;
  214. if (beta == 0) {
  215. for (Long i = 0; i < d0; i++) {
  216. ConstIterator<Long> perm_ = P.perm.begin();
  217. ConstIterator<ValueType> scal_ = P.scal.begin();
  218. ConstIterator<ValueType> M_ = M[i];
  219. Iterator<ValueType> M_r_ = M_r[i];
  220. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j];
  221. }
  222. } else {
  223. for (Long i = 0; i < d0; i++) {
  224. ConstIterator<Long> perm_ = P.perm.begin();
  225. ConstIterator<ValueType> scal_ = P.scal.begin();
  226. ConstIterator<ValueType> M_ = M[i];
  227. Iterator<ValueType> M_r_ = M_r[i];
  228. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j] + M_r_[j] * beta;
  229. }
  230. }
  231. }
  232. // cublasgemm wrapper
  233. #if defined(SCTL_HAVE_CUDA)
  234. template <class ValueType> void Matrix<ValueType>::CUBLASGEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& A, const Matrix<ValueType>& B, ValueType beta) {
  235. if (A.Dim(0) * A.Dim(1) == 0 || B.Dim(0) * B.Dim(1) == 0) return;
  236. assert(A.dim[1] == B.dim[0]);
  237. assert(M_r.dim[0] == A.dim[0]);
  238. assert(M_r.dim[1] == B.dim[1]);
  239. Profile::Add_FLOP(2 * (((Long)A.dim[0]) * A.dim[1]) * B.dim[1]);
  240. mat::cublasgemm('N', 'N', B.dim[1], A.dim[0], A.dim[1], (ValueType)1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  241. }
  242. #endif
  243. // Matrix-Scalar operations
  244. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator=(ValueType s) {
  245. Long N = dim[0] * dim[1];
  246. for (Long i = 0; i < N; i++) data_ptr[i] = s;
  247. return *this;
  248. }
  249. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator+=(ValueType s) {
  250. Long N = dim[0] * dim[1];
  251. for (Long i = 0; i < N; i++) data_ptr[i] += s;
  252. Profile::Add_FLOP(N);
  253. return *this;
  254. }
  255. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator-=(ValueType s) {
  256. Long N = dim[0] * dim[1];
  257. for (Long i = 0; i < N; i++) data_ptr[i] -= s;
  258. Profile::Add_FLOP(N);
  259. return *this;
  260. }
  261. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator*=(ValueType s) {
  262. Long N = dim[0] * dim[1];
  263. for (Long i = 0; i < N; i++) data_ptr[i] *= s;
  264. Profile::Add_FLOP(N);
  265. return *this;
  266. }
  267. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator/=(ValueType s) {
  268. Long N = dim[0] * dim[1];
  269. for (Long i = 0; i < N; i++) data_ptr[i] /= s;
  270. Profile::Add_FLOP(N);
  271. return *this;
  272. }
  273. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator+(ValueType s) const {
  274. Long N = dim[0] * dim[1];
  275. Matrix<ValueType> M_r(dim[0], dim[1]);
  276. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] + s;
  277. return M_r;
  278. }
  279. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator-(ValueType s) const {
  280. Long N = dim[0] * dim[1];
  281. Matrix<ValueType> M_r(dim[0], dim[1]);
  282. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] - s;
  283. return M_r;
  284. }
  285. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator*(ValueType s) const {
  286. Long N = dim[0] * dim[1];
  287. Matrix<ValueType> M_r(dim[0], dim[1]);
  288. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] * s;
  289. return M_r;
  290. }
  291. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator/(ValueType s) const {
  292. Long N = dim[0] * dim[1];
  293. Matrix<ValueType> M_r(dim[0], dim[1]);
  294. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] / s;
  295. return M_r;
  296. }
  297. // Element access
  298. template <class ValueType> inline ValueType& Matrix<ValueType>::operator()(Long i, Long j) {
  299. assert(i < dim[0] && j < dim[1]);
  300. return data_ptr[i * dim[1] + j];
  301. }
  302. template <class ValueType> inline const ValueType& Matrix<ValueType>::operator()(Long i, Long j) const {
  303. assert(i < dim[0] && j < dim[1]);
  304. return data_ptr[i * dim[1] + j];
  305. }
  306. template <class ValueType> inline Iterator<ValueType> Matrix<ValueType>::operator[](Long i) {
  307. assert(i < dim[0]);
  308. return data_ptr + i * dim[1];
  309. }
  310. template <class ValueType> inline ConstIterator<ValueType> Matrix<ValueType>::operator[](Long i) const {
  311. assert(i < dim[0]);
  312. return (data_ptr + i * dim[1]);
  313. }
  314. #define myswap(t, a, b) \
  315. { \
  316. t c = a; \
  317. a = b; \
  318. b = c; \
  319. }
  320. template <class ValueType> void Matrix<ValueType>::RowPerm(const Permutation<ValueType>& P) {
  321. Matrix<ValueType>& M = *this;
  322. if (P.Dim() == 0) return;
  323. assert(M.Dim(0) == P.Dim());
  324. Long d0 = M.Dim(0);
  325. Long d1 = M.Dim(1);
  326. #pragma omp parallel for schedule(static)
  327. for (Long i = 0; i < d0; i++) {
  328. Iterator<ValueType> M_ = M[i];
  329. const ValueType s = P.scal[i];
  330. for (Long j = 0; j < d1; j++) M_[j] *= s;
  331. }
  332. Integer omp_p = omp_get_max_threads();
  333. #pragma omp parallel for schedule(static)
  334. for (Integer tid = 0; tid < omp_p; tid++) {
  335. Long a = d1 * (tid + 0) / omp_p;
  336. Long b = d1 * (tid + 1) / omp_p;
  337. Permutation<ValueType> P_ = P;
  338. for (Long i = 0; i < d0; i++) {
  339. Long j = i;
  340. if (P_.perm[i] == i) continue;
  341. while (j < d0 && P_.perm[j] != i) { j++; }
  342. SCTL_ASSERT_MSG(j < d0, "Matrix::RowPerm(Permutation P) ==> Invalid permutation vector P.");
  343. Iterator<ValueType> Mi = M[i];
  344. Iterator<ValueType> Mj = M[j];
  345. myswap(Long, P_.perm[i], P_.perm[j]);
  346. for (Long k = a; k < b; k++) Mi[k]=tid;
  347. }
  348. }
  349. }
  350. template <class ValueType> void Matrix<ValueType>::ColPerm(const Permutation<ValueType>& P) {
  351. Matrix<ValueType>& M = *this;
  352. if (P.Dim() == 0) return;
  353. assert(M.Dim(1) == P.Dim());
  354. Long d0 = M.Dim(0);
  355. Long d1 = M.Dim(1);
  356. Integer omp_p = omp_get_max_threads();
  357. Matrix<ValueType> M_buff(omp_p, d1);
  358. ConstIterator<Long> perm_ = P.perm.begin();
  359. ConstIterator<ValueType> scal_ = P.scal.begin();
  360. #pragma omp parallel for schedule(static)
  361. for (Long i = 0; i < d0; i++) {
  362. Integer pid = omp_get_thread_num();
  363. Iterator<ValueType> buff = M_buff[pid];
  364. Iterator<ValueType> M_ = M[i];
  365. for (Long j = 0; j < d1; j++) buff[j] = M_[j];
  366. for (Long j = 0; j < d1; j++) {
  367. M_[j] = buff[perm_[j]] * scal_[j];
  368. }
  369. }
  370. }
  371. #undef myswap
  372. #define B1 128
  373. #define B2 32
  374. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::Transpose() const {
  375. const Matrix<ValueType>& M = *this;
  376. Long d0 = M.dim[0];
  377. Long d1 = M.dim[1];
  378. Matrix<ValueType> M_r(d1, d0);
  379. const Long blk0 = ((d0 + B1 - 1) / B1);
  380. const Long blk1 = ((d1 + B1 - 1) / B1);
  381. const Long blks = blk0 * blk1;
  382. #pragma omp parallel for
  383. for (Long k = 0; k < blks; k++) {
  384. Long i = (k % blk0) * B1;
  385. Long j = (k / blk0) * B1;
  386. Long d0_ = i + B1;
  387. if (d0_ >= d0) d0_ = d0;
  388. Long d1_ = j + B1;
  389. if (d1_ >= d1) d1_ = d1;
  390. for (Long ii = i; ii < d0_; ii += B2)
  391. for (Long jj = j; jj < d1_; jj += B2) {
  392. Long d0__ = ii + B2;
  393. if (d0__ >= d0) d0__ = d0;
  394. Long d1__ = jj + B2;
  395. if (d1__ >= d1) d1__ = d1;
  396. for (Long iii = ii; iii < d0__; iii++)
  397. for (Long jjj = jj; jjj < d1__; jjj++) {
  398. M_r[jjj][iii] = M[iii][jjj];
  399. }
  400. }
  401. }
  402. return M_r;
  403. }
  404. template <class ValueType> void Matrix<ValueType>::Transpose(Matrix<ValueType>& M_r, const Matrix<ValueType>& M) {
  405. Long d0 = M.dim[0];
  406. Long d1 = M.dim[1];
  407. if (M_r.dim[0] != d1 || M_r.dim[1] != d0) M_r.ReInit(d1, d0);
  408. const Long blk0 = ((d0 + B1 - 1) / B1);
  409. const Long blk1 = ((d1 + B1 - 1) / B1);
  410. const Long blks = blk0 * blk1;
  411. #pragma omp parallel for
  412. for (Long k = 0; k < blks; k++) {
  413. Long i = (k % blk0) * B1;
  414. Long j = (k / blk0) * B1;
  415. Long d0_ = i + B1;
  416. if (d0_ >= d0) d0_ = d0;
  417. Long d1_ = j + B1;
  418. if (d1_ >= d1) d1_ = d1;
  419. for (Long ii = i; ii < d0_; ii += B2)
  420. for (Long jj = j; jj < d1_; jj += B2) {
  421. Long d0__ = ii + B2;
  422. if (d0__ >= d0) d0__ = d0;
  423. Long d1__ = jj + B2;
  424. if (d1__ >= d1) d1__ = d1;
  425. for (Long iii = ii; iii < d0__; iii++)
  426. for (Long jjj = jj; jjj < d1__; jjj++) {
  427. M_r[jjj][iii] = M[iii][jjj];
  428. }
  429. }
  430. }
  431. }
  432. #undef B2
  433. #undef B1
  434. template <class ValueType> void Matrix<ValueType>::SVD(Matrix<ValueType>& tU, Matrix<ValueType>& tS, Matrix<ValueType>& tVT) {
  435. Matrix<ValueType>& M = *this;
  436. Matrix<ValueType> M_ = M;
  437. int n = M.Dim(0);
  438. int m = M.Dim(1);
  439. int k = (m < n ? m : n);
  440. if (tU.Dim(0) != n || tU.Dim(1) != k) tU.ReInit(n, k);
  441. tU.SetZero();
  442. if (tS.Dim(0) != k || tS.Dim(1) != k) tS.ReInit(k, k);
  443. tS.SetZero();
  444. if (tVT.Dim(0) != k || tVT.Dim(1) != m) tVT.ReInit(k, m);
  445. tVT.SetZero();
  446. // SVD
  447. int INFO = 0;
  448. char JOBU = 'S';
  449. char JOBVT = 'S';
  450. int wssize = 3 * (m < n ? m : n) + (m > n ? m : n);
  451. int wssize1 = 5 * (m < n ? m : n);
  452. wssize = (wssize > wssize1 ? wssize : wssize1);
  453. Iterator<ValueType> wsbuf = aligned_new<ValueType>(wssize);
  454. mat::svd(&JOBU, &JOBVT, &m, &n, M.begin(), &m, tS.begin(), tVT.begin(), &m, tU.begin(), &k, wsbuf, &wssize, &INFO);
  455. aligned_delete<ValueType>(wsbuf);
  456. if (INFO != 0) std::cout << INFO << '\n';
  457. assert(INFO == 0);
  458. for (Long i = 1; i < k; i++) {
  459. tS[i][i] = tS[0][i];
  460. tS[0][i] = 0;
  461. }
  462. // std::cout<<tU*tS*tVT-M_<<'\n';
  463. }
  464. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::pinv(ValueType eps) {
  465. if (eps < 0) {
  466. eps = 1.0;
  467. while (eps + (ValueType)1.0 > 1.0) eps *= 0.5;
  468. eps = sqrt<ValueType>(eps);
  469. }
  470. Matrix<ValueType> M_r(dim[1], dim[0]);
  471. mat::pinv(data_ptr, dim[0], dim[1], eps, M_r.data_ptr);
  472. this->ReInit(0, 0);
  473. return M_r;
  474. }
  475. template <class ValueType> std::ostream& operator<<(std::ostream& output, const Permutation<ValueType>& P) {
  476. output << std::setprecision(4) << std::setiosflags(std::ios::left);
  477. Long size = P.perm.Dim();
  478. for (Long i = 0; i < size; i++) output << std::setw(10) << P.perm[i] << ' ';
  479. output << ";\n";
  480. for (Long i = 0; i < size; i++) output << std::setw(10) << P.scal[i] << ' ';
  481. output << ";\n";
  482. return output;
  483. }
  484. template <class ValueType> Permutation<ValueType>::Permutation(Long size) {
  485. perm.ReInit(size);
  486. scal.ReInit(size);
  487. for (Long i = 0; i < size; i++) {
  488. perm[i] = i;
  489. scal[i] = 1.0;
  490. }
  491. }
  492. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::RandPerm(Long size) {
  493. Permutation<ValueType> P(size);
  494. for (Long i = 0; i < size; i++) {
  495. P.perm[i] = rand() % size;
  496. for (Long j = 0; j < i; j++)
  497. if (P.perm[i] == P.perm[j]) {
  498. i--;
  499. break;
  500. }
  501. P.scal[i] = ((ValueType)rand()) / RAND_MAX;
  502. }
  503. return P;
  504. }
  505. template <class ValueType> Matrix<ValueType> Permutation<ValueType>::GetMatrix() const {
  506. Long size = perm.Dim();
  507. Matrix<ValueType> M_r(size, size);
  508. for (Long i = 0; i < size; i++)
  509. for (Long j = 0; j < size; j++) M_r[i][j] = (perm[j] == i ? scal[j] : 0.0);
  510. return M_r;
  511. }
  512. template <class ValueType> Long Permutation<ValueType>::Dim() const { return perm.Dim(); }
  513. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::Transpose() {
  514. Long size = perm.Dim();
  515. Permutation<ValueType> P_r(size);
  516. Vector<Long>& perm_r = P_r.perm;
  517. Vector<ValueType>& scal_r = P_r.scal;
  518. for (Long i = 0; i < size; i++) {
  519. perm_r[perm[i]] = i;
  520. scal_r[perm[i]] = scal[i];
  521. }
  522. return P_r;
  523. }
  524. template <class ValueType> Permutation<ValueType>& Permutation<ValueType>::operator*=(ValueType s) {
  525. for (Long i = 0; i < scal.Dim(); i++) scal[i] *= s;
  526. return *this;
  527. }
  528. template <class ValueType> Permutation<ValueType>& Permutation<ValueType>::operator/=(ValueType s) {
  529. for (Long i = 0; i < scal.Dim(); i++) scal[i] /= s;
  530. return *this;
  531. }
  532. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator*(ValueType s) const {
  533. Permutation<ValueType> P = *this;
  534. P *= s;
  535. return P;
  536. }
  537. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator/(ValueType s) const {
  538. Permutation<ValueType> P = *this;
  539. P /= s;
  540. return P;
  541. }
  542. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator*(const Permutation<ValueType>& P) const {
  543. Long size = perm.Dim();
  544. SCTL_ASSERT(P.Dim() == size);
  545. Permutation<ValueType> P_r(size);
  546. Vector<Long>& perm_r = P_r.perm;
  547. Vector<ValueType>& scal_r = P_r.scal;
  548. for (Long i = 0; i < size; i++) {
  549. perm_r[i] = perm[P.perm[i]];
  550. scal_r[i] = scal[P.perm[i]] * P.scal[i];
  551. }
  552. return P_r;
  553. }
  554. template <class ValueType> Matrix<ValueType> Permutation<ValueType>::operator*(const Matrix<ValueType>& M) const {
  555. if (Dim() == 0) return M;
  556. SCTL_ASSERT(M.Dim(0) == Dim());
  557. Long d0 = M.Dim(0);
  558. Long d1 = M.Dim(1);
  559. Matrix<ValueType> M_r(d0, d1);
  560. for (Long i = 0; i < d0; i++) {
  561. const ValueType s = scal[i];
  562. ConstIterator<ValueType> M_ = M[i];
  563. Iterator<ValueType> M_r_ = M_r[perm[i]];
  564. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s;
  565. }
  566. return M_r;
  567. }
  568. template <class ValueType> Matrix<ValueType> operator*(const Matrix<ValueType>& M, const Permutation<ValueType>& P) {
  569. if (P.Dim() == 0) return M;
  570. SCTL_ASSERT(M.Dim(1) == P.Dim());
  571. Long d0 = M.Dim(0);
  572. Long d1 = M.Dim(1);
  573. Matrix<ValueType> M_r(d0, d1);
  574. for (Long i = 0; i < d0; i++) {
  575. ConstIterator<Long> perm_ = P.perm.begin();
  576. ConstIterator<ValueType> scal_ = P.scal.begin();
  577. ConstIterator<ValueType> M_ = M[i];
  578. Iterator<ValueType> M_r_ = M_r[i];
  579. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j];
  580. }
  581. return M_r;
  582. }
  583. } // end namespace