matrix.txx 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664
  1. #include <omp.h>
  2. #include <cmath>
  3. #include <cstdlib>
  4. #include <cassert>
  5. #include <iostream>
  6. #include <iomanip>
  7. #include SCTL_INCLUDE(mat_utils.hpp)
  8. #include SCTL_INCLUDE(mem_mgr.hpp)
  9. #include SCTL_INCLUDE(profile.hpp)
  10. namespace SCTL_NAMESPACE {
  11. template <class ValueType> std::ostream& operator<<(std::ostream& output, const Matrix<ValueType>& M) {
  12. std::ios::fmtflags f(std::cout.flags());
  13. output << std::fixed << std::setprecision(4) << std::setiosflags(std::ios::left);
  14. for (Long i = 0; i < M.Dim(0); i++) {
  15. for (Long j = 0; j < M.Dim(1); j++) {
  16. float f = ((float)M(i, j));
  17. if (fabs<float>(f) < 1e-25) f = 0;
  18. output << std::setw(10) << ((double)f) << ' ';
  19. }
  20. output << ";\n";
  21. }
  22. std::cout.flags(f);
  23. return output;
  24. }
  25. template <class ValueType> void Matrix<ValueType>::Init(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  26. dim[0] = dim1;
  27. dim[1] = dim2;
  28. own_data = own_data_;
  29. if (own_data) {
  30. if (dim[0] * dim[1] > 0) {
  31. data_ptr = aligned_new<ValueType>(dim[0] * dim[1]);
  32. if (data_ != NullIterator<ValueType>()) {
  33. memcopy(data_ptr, data_, dim[0] * dim[1]);
  34. }
  35. } else
  36. data_ptr = NullIterator<ValueType>();
  37. } else
  38. data_ptr = data_;
  39. }
  40. template <class ValueType> Matrix<ValueType>::Matrix() {
  41. Init(0, 0);
  42. }
  43. template <class ValueType> Matrix<ValueType>::Matrix(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  44. Init(dim1, dim2, data_, own_data_);
  45. }
  46. template <class ValueType> Matrix<ValueType>::Matrix(const Matrix<ValueType>& M) {
  47. Init(M.Dim(0), M.Dim(1), (Iterator<ValueType>)M.begin());
  48. }
  49. template <class ValueType> Matrix<ValueType>::~Matrix() {
  50. if (own_data) {
  51. if (data_ptr != NullIterator<ValueType>()) {
  52. aligned_delete(data_ptr);
  53. }
  54. }
  55. data_ptr = NullIterator<ValueType>();
  56. dim[0] = 0;
  57. dim[1] = 0;
  58. }
  59. template <class ValueType> void Matrix<ValueType>::Swap(Matrix<ValueType>& M) {
  60. StaticArray<Long, 2> dim_;
  61. dim_[0] = dim[0];
  62. dim_[1] = dim[1];
  63. Iterator<ValueType> data_ptr_ = data_ptr;
  64. bool own_data_ = own_data;
  65. dim[0] = M.dim[0];
  66. dim[1] = M.dim[1];
  67. data_ptr = M.data_ptr;
  68. own_data = M.own_data;
  69. M.dim[0] = dim_[0];
  70. M.dim[1] = dim_[1];
  71. M.data_ptr = data_ptr_;
  72. M.own_data = own_data_;
  73. }
  74. template <class ValueType> void Matrix<ValueType>::ReInit(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  75. if (own_data_ && own_data && dim[0] * dim[1] >= dim1 * dim2) {
  76. dim[0] = dim1;
  77. dim[1] = dim2;
  78. if (data_ != NullIterator<ValueType>()) {
  79. memcopy(data_ptr, data_, dim[0] * dim[1]);
  80. }
  81. } else {
  82. Matrix<ValueType> tmp(dim1, dim2, data_, own_data_);
  83. this->Swap(tmp);
  84. }
  85. }
  86. template <class ValueType> void Matrix<ValueType>::Write(const char* fname) const {
  87. FILE* f1 = fopen(fname, "wb+");
  88. if (f1 == nullptr) {
  89. std::cout << "Unable to open file for writing:" << fname << '\n';
  90. return;
  91. }
  92. StaticArray<uint64_t, 2> dim_;
  93. dim_[0] = (uint64_t)Dim(0);
  94. dim_[1] = (uint64_t)Dim(1);
  95. fwrite(&dim_[0], sizeof(uint64_t), 2, f1);
  96. if (Dim(0) * Dim(1)) fwrite(&data_ptr[0], sizeof(ValueType), Dim(0) * Dim(1), f1);
  97. fclose(f1);
  98. }
  99. template <class ValueType> void Matrix<ValueType>::Read(const char* fname) {
  100. FILE* f1 = fopen(fname, "r");
  101. if (f1 == nullptr) {
  102. std::cout << "Unable to open file for reading:" << fname << '\n';
  103. return;
  104. }
  105. StaticArray<uint64_t, 2> dim_;
  106. Long readlen = fread(&dim_[0], sizeof(uint64_t), 2, f1);
  107. SCTL_ASSERT(readlen == 2);
  108. if (Dim(0) != (Long)dim_[0] || Dim(1) != (Long)dim_[1]) ReInit(dim_[0], dim_[1]);
  109. if (dim_[0] && dim_[1]) {
  110. readlen = fread(&data_ptr[0], sizeof(ValueType), dim_[0] * dim_[1], f1);
  111. SCTL_ASSERT(readlen == (Long)(dim_[0] * dim_[1]));
  112. }
  113. fclose(f1);
  114. }
  115. template <class ValueType> Long Matrix<ValueType>::Dim(Long i) const { return dim[i]; }
  116. template <class ValueType> void Matrix<ValueType>::SetZero() {
  117. if (dim[0] && dim[1]) memset(data_ptr, 0, dim[0] * dim[1]);
  118. }
  119. template <class ValueType> Iterator<ValueType> Matrix<ValueType>::begin() { return data_ptr; }
  120. template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::begin() const { return data_ptr; }
  121. template <class ValueType> Iterator<ValueType> Matrix<ValueType>::end() { return data_ptr + dim[0] * dim[1]; }
  122. template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::end() const { return data_ptr + dim[0] * dim[1]; }
  123. // Matrix-Matrix operations
  124. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator=(const Matrix<ValueType>& M) {
  125. if (this != &M) {
  126. if (dim[0] * dim[1] < M.dim[0] * M.dim[1]) {
  127. ReInit(M.dim[0], M.dim[1]);
  128. }
  129. dim[0] = M.dim[0];
  130. dim[1] = M.dim[1];
  131. memcopy(data_ptr, M.data_ptr, dim[0] * dim[1]);
  132. }
  133. return *this;
  134. }
  135. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator+=(const Matrix<ValueType>& M) {
  136. SCTL_ASSERT(M.Dim(0) == Dim(0) && M.Dim(1) == Dim(1));
  137. Profile::Add_FLOP(dim[0] * dim[1]);
  138. for (Long i = 0; i < M.Dim(0) * M.Dim(1); i++) data_ptr[i] += M.data_ptr[i];
  139. return *this;
  140. }
  141. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator-=(const Matrix<ValueType>& M) {
  142. SCTL_ASSERT(M.Dim(0) == Dim(0) && M.Dim(1) == Dim(1));
  143. Profile::Add_FLOP(dim[0] * dim[1]);
  144. for (Long i = 0; i < M.Dim(0) * M.Dim(1); i++) data_ptr[i] -= M.data_ptr[i];
  145. return *this;
  146. }
  147. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator+(const Matrix<ValueType>& M2) const {
  148. const Matrix<ValueType>& M1 = *this;
  149. SCTL_ASSERT(M2.Dim(0) == M1.Dim(0) && M2.Dim(1) == M1.Dim(1));
  150. Profile::Add_FLOP(dim[0] * dim[1]);
  151. Matrix<ValueType> M_r(M1.Dim(0), M1.Dim(1));
  152. for (Long i = 0; i < M1.Dim(0) * M1.Dim(1); i++) M_r[0][i] = M1[0][i] + M2[0][i];
  153. return M_r;
  154. }
  155. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator-(const Matrix<ValueType>& M2) const {
  156. const Matrix<ValueType>& M1 = *this;
  157. SCTL_ASSERT(M2.Dim(0) == M1.Dim(0) && M2.Dim(1) == M1.Dim(1));
  158. Profile::Add_FLOP(dim[0] * dim[1]);
  159. Matrix<ValueType> M_r(M1.Dim(0), M1.Dim(1));
  160. for (Long i = 0; i < M1.Dim(0) * M1.Dim(1); i++) M_r[0][i] = M1[0][i] - M2[0][i];
  161. return M_r;
  162. }
  163. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator*(const Matrix<ValueType>& M) const {
  164. SCTL_ASSERT(dim[1] == M.dim[0]);
  165. Profile::Add_FLOP(2 * (((Long)dim[0]) * dim[1]) * M.dim[1]);
  166. Matrix<ValueType> M_r(dim[0], M.dim[1]);
  167. if (M.Dim(0) * M.Dim(1) == 0 || this->Dim(0) * this->Dim(1) == 0) return M_r;
  168. mat::gemm<ValueType>('N', 'N', M.dim[1], dim[0], dim[1], 1.0, M.data_ptr, M.dim[1], data_ptr, dim[1], 0.0, M_r.data_ptr, M_r.dim[1]);
  169. return M_r;
  170. }
  171. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& A, const Matrix<ValueType>& B, ValueType beta) {
  172. assert(A.dim[1] == B.dim[0]);
  173. assert(M_r.dim[0] == A.dim[0]);
  174. assert(M_r.dim[1] == B.dim[1]);
  175. if (A.Dim(0) * A.Dim(1) == 0 || B.Dim(0) * B.Dim(1) == 0) return;
  176. Profile::Add_FLOP(2 * (((Long)A.dim[0]) * A.dim[1]) * B.dim[1]);
  177. mat::gemm<ValueType>('N', 'N', B.dim[1], A.dim[0], A.dim[1], 1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  178. }
  179. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Permutation<ValueType>& P, const Matrix<ValueType>& M, ValueType beta) {
  180. Long d0 = M.Dim(0);
  181. Long d1 = M.Dim(1);
  182. assert(P.Dim() == d0);
  183. assert(M_r.Dim(0) == d0);
  184. assert(M_r.Dim(1) == d1);
  185. if (P.Dim() == 0 || d0 * d1 == 0) return;
  186. if (beta == 0) {
  187. for (Long i = 0; i < d0; i++) {
  188. const ValueType s = P.scal[i];
  189. ConstIterator<ValueType> M_ = M[i];
  190. Iterator<ValueType> M_r_ = M_r[P.perm[i]];
  191. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s;
  192. }
  193. } else {
  194. for (Long i = 0; i < d0; i++) {
  195. const ValueType s = P.scal[i];
  196. ConstIterator<ValueType> M_ = M[i];
  197. Iterator<ValueType> M_r_ = M_r[P.perm[i]];
  198. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s + M_r_[j] * beta;
  199. }
  200. }
  201. }
  202. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& M, const Permutation<ValueType>& P, ValueType beta) {
  203. Long d0 = M.Dim(0);
  204. Long d1 = M.Dim(1);
  205. assert(P.Dim() == d1);
  206. assert(M_r.Dim(0) == d0);
  207. assert(M_r.Dim(1) == d1);
  208. if (P.Dim() == 0 || d0 * d1 == 0) return;
  209. if (beta == 0) {
  210. for (Long i = 0; i < d0; i++) {
  211. ConstIterator<Long> perm_ = P.perm.begin();
  212. ConstIterator<ValueType> scal_ = P.scal.begin();
  213. ConstIterator<ValueType> M_ = M[i];
  214. Iterator<ValueType> M_r_ = M_r[i];
  215. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j];
  216. }
  217. } else {
  218. for (Long i = 0; i < d0; i++) {
  219. ConstIterator<Long> perm_ = P.perm.begin();
  220. ConstIterator<ValueType> scal_ = P.scal.begin();
  221. ConstIterator<ValueType> M_ = M[i];
  222. Iterator<ValueType> M_r_ = M_r[i];
  223. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j] + M_r_[j] * beta;
  224. }
  225. }
  226. }
  227. // cublasgemm wrapper
  228. #if defined(SCTL_HAVE_CUDA)
  229. template <class ValueType> void Matrix<ValueType>::CUBLASGEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& A, const Matrix<ValueType>& B, ValueType beta) {
  230. if (A.Dim(0) * A.Dim(1) == 0 || B.Dim(0) * B.Dim(1) == 0) return;
  231. assert(A.dim[1] == B.dim[0]);
  232. assert(M_r.dim[0] == A.dim[0]);
  233. assert(M_r.dim[1] == B.dim[1]);
  234. Profile::Add_FLOP(2 * (((Long)A.dim[0]) * A.dim[1]) * B.dim[1]);
  235. mat::cublasgemm('N', 'N', B.dim[1], A.dim[0], A.dim[1], (ValueType)1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  236. }
  237. #endif
  238. // Matrix-Scalar operations
  239. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator=(ValueType s) {
  240. Long N = dim[0] * dim[1];
  241. for (Long i = 0; i < N; i++) data_ptr[i] = s;
  242. return *this;
  243. }
  244. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator+=(ValueType s) {
  245. Long N = dim[0] * dim[1];
  246. for (Long i = 0; i < N; i++) data_ptr[i] += s;
  247. Profile::Add_FLOP(N);
  248. return *this;
  249. }
  250. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator-=(ValueType s) {
  251. Long N = dim[0] * dim[1];
  252. for (Long i = 0; i < N; i++) data_ptr[i] -= s;
  253. Profile::Add_FLOP(N);
  254. return *this;
  255. }
  256. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator*=(ValueType s) {
  257. Long N = dim[0] * dim[1];
  258. for (Long i = 0; i < N; i++) data_ptr[i] *= s;
  259. Profile::Add_FLOP(N);
  260. return *this;
  261. }
  262. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator/=(ValueType s) {
  263. Long N = dim[0] * dim[1];
  264. for (Long i = 0; i < N; i++) data_ptr[i] /= s;
  265. Profile::Add_FLOP(N);
  266. return *this;
  267. }
  268. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator+(ValueType s) const {
  269. Long N = dim[0] * dim[1];
  270. Matrix<ValueType> M_r(dim[0], dim[1]);
  271. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] + s;
  272. return M_r;
  273. }
  274. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator-(ValueType s) const {
  275. Long N = dim[0] * dim[1];
  276. Matrix<ValueType> M_r(dim[0], dim[1]);
  277. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] - s;
  278. return M_r;
  279. }
  280. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator*(ValueType s) const {
  281. Long N = dim[0] * dim[1];
  282. Matrix<ValueType> M_r(dim[0], dim[1]);
  283. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] * s;
  284. return M_r;
  285. }
  286. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator/(ValueType s) const {
  287. Long N = dim[0] * dim[1];
  288. Matrix<ValueType> M_r(dim[0], dim[1]);
  289. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] / s;
  290. return M_r;
  291. }
  292. // Element access
  293. template <class ValueType> inline ValueType& Matrix<ValueType>::operator()(Long i, Long j) {
  294. assert(i < dim[0] && j < dim[1]);
  295. return data_ptr[i * dim[1] + j];
  296. }
  297. template <class ValueType> inline const ValueType& Matrix<ValueType>::operator()(Long i, Long j) const {
  298. assert(i < dim[0] && j < dim[1]);
  299. return data_ptr[i * dim[1] + j];
  300. }
  301. template <class ValueType> inline Iterator<ValueType> Matrix<ValueType>::operator[](Long i) {
  302. assert(i < dim[0]);
  303. return data_ptr + i * dim[1];
  304. }
  305. template <class ValueType> inline ConstIterator<ValueType> Matrix<ValueType>::operator[](Long i) const {
  306. assert(i < dim[0]);
  307. return (data_ptr + i * dim[1]);
  308. }
  309. template <class ValueType> void Matrix<ValueType>::RowPerm(const Permutation<ValueType>& P) {
  310. Matrix<ValueType>& M = *this;
  311. if (P.Dim() == 0) return;
  312. assert(M.Dim(0) == P.Dim());
  313. Long d0 = M.Dim(0);
  314. Long d1 = M.Dim(1);
  315. #pragma omp parallel for schedule(static)
  316. for (Long i = 0; i < d0; i++) {
  317. Iterator<ValueType> M_ = M[i];
  318. const ValueType s = P.scal[i];
  319. for (Long j = 0; j < d1; j++) M_[j] *= s;
  320. }
  321. Integer omp_p = omp_get_max_threads();
  322. #pragma omp parallel for schedule(static)
  323. for (Integer tid = 0; tid < omp_p; tid++) {
  324. Long a = d1 * (tid + 0) / omp_p;
  325. Long b = d1 * (tid + 1) / omp_p;
  326. Permutation<ValueType> P_ = P;
  327. for (Long i = 0; i < d0; i++) {
  328. Long j = i;
  329. if (P_.perm[i] == i) continue;
  330. while (j < d0 && P_.perm[j] != i) { j++; }
  331. SCTL_ASSERT_MSG(j < d0, "Matrix::RowPerm(Permutation P) ==> Invalid permutation vector P.");
  332. Iterator<ValueType> Mi = M[i];
  333. Iterator<ValueType> Mj = M[j];
  334. std::swap<Long>(P_.perm[i], P_.perm[j]);
  335. for (Long k = a; k < b; k++) Mi[k]=tid;
  336. }
  337. }
  338. }
  339. template <class ValueType> void Matrix<ValueType>::ColPerm(const Permutation<ValueType>& P) {
  340. Matrix<ValueType>& M = *this;
  341. if (P.Dim() == 0) return;
  342. assert(M.Dim(1) == P.Dim());
  343. Long d0 = M.Dim(0);
  344. Long d1 = M.Dim(1);
  345. Integer omp_p = omp_get_max_threads();
  346. Matrix<ValueType> M_buff(omp_p, d1);
  347. ConstIterator<Long> perm_ = P.perm.begin();
  348. ConstIterator<ValueType> scal_ = P.scal.begin();
  349. #pragma omp parallel for schedule(static)
  350. for (Long i = 0; i < d0; i++) {
  351. Integer pid = omp_get_thread_num();
  352. Iterator<ValueType> buff = M_buff[pid];
  353. Iterator<ValueType> M_ = M[i];
  354. for (Long j = 0; j < d1; j++) buff[j] = M_[j];
  355. for (Long j = 0; j < d1; j++) {
  356. M_[j] = buff[perm_[j]] * scal_[j];
  357. }
  358. }
  359. }
  360. #define SCTL_B1 128
  361. #define SCTL_B2 32
  362. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::Transpose() const {
  363. const Matrix<ValueType>& M = *this;
  364. Long d0 = M.dim[0];
  365. Long d1 = M.dim[1];
  366. Matrix<ValueType> M_r(d1, d0);
  367. const Long blk0 = ((d0 + SCTL_B1 - 1) / SCTL_B1);
  368. const Long blk1 = ((d1 + SCTL_B1 - 1) / SCTL_B1);
  369. const Long blks = blk0 * blk1;
  370. #pragma omp parallel for
  371. for (Long k = 0; k < blks; k++) {
  372. Long i = (k % blk0) * SCTL_B1;
  373. Long j = (k / blk0) * SCTL_B1;
  374. Long d0_ = i + SCTL_B1;
  375. if (d0_ >= d0) d0_ = d0;
  376. Long d1_ = j + SCTL_B1;
  377. if (d1_ >= d1) d1_ = d1;
  378. for (Long ii = i; ii < d0_; ii += SCTL_B2)
  379. for (Long jj = j; jj < d1_; jj += SCTL_B2) {
  380. Long d0__ = ii + SCTL_B2;
  381. if (d0__ >= d0) d0__ = d0;
  382. Long d1__ = jj + SCTL_B2;
  383. if (d1__ >= d1) d1__ = d1;
  384. for (Long iii = ii; iii < d0__; iii++)
  385. for (Long jjj = jj; jjj < d1__; jjj++) {
  386. M_r[jjj][iii] = M[iii][jjj];
  387. }
  388. }
  389. }
  390. return M_r;
  391. }
  392. template <class ValueType> void Matrix<ValueType>::Transpose(Matrix<ValueType>& M_r, const Matrix<ValueType>& M) {
  393. Long d0 = M.dim[0];
  394. Long d1 = M.dim[1];
  395. if (M_r.dim[0] != d1 || M_r.dim[1] != d0) M_r.ReInit(d1, d0);
  396. const Long blk0 = ((d0 + SCTL_B1 - 1) / SCTL_B1);
  397. const Long blk1 = ((d1 + SCTL_B1 - 1) / SCTL_B1);
  398. const Long blks = blk0 * blk1;
  399. #pragma omp parallel for
  400. for (Long k = 0; k < blks; k++) {
  401. Long i = (k % blk0) * SCTL_B1;
  402. Long j = (k / blk0) * SCTL_B1;
  403. Long d0_ = i + SCTL_B1;
  404. if (d0_ >= d0) d0_ = d0;
  405. Long d1_ = j + SCTL_B1;
  406. if (d1_ >= d1) d1_ = d1;
  407. for (Long ii = i; ii < d0_; ii += SCTL_B2)
  408. for (Long jj = j; jj < d1_; jj += SCTL_B2) {
  409. Long d0__ = ii + SCTL_B2;
  410. if (d0__ >= d0) d0__ = d0;
  411. Long d1__ = jj + SCTL_B2;
  412. if (d1__ >= d1) d1__ = d1;
  413. for (Long iii = ii; iii < d0__; iii++)
  414. for (Long jjj = jj; jjj < d1__; jjj++) {
  415. M_r[jjj][iii] = M[iii][jjj];
  416. }
  417. }
  418. }
  419. }
  420. #undef SCTL_B2
  421. #undef SCTL_B1
  422. template <class ValueType> void Matrix<ValueType>::SVD(Matrix<ValueType>& tU, Matrix<ValueType>& tS, Matrix<ValueType>& tVT) {
  423. Matrix<ValueType>& M = *this;
  424. Matrix<ValueType> M_ = M;
  425. int n = M.Dim(0);
  426. int m = M.Dim(1);
  427. int k = (m < n ? m : n);
  428. if (tU.Dim(0) != n || tU.Dim(1) != k) tU.ReInit(n, k);
  429. tU.SetZero();
  430. if (tS.Dim(0) != k || tS.Dim(1) != k) tS.ReInit(k, k);
  431. tS.SetZero();
  432. if (tVT.Dim(0) != k || tVT.Dim(1) != m) tVT.ReInit(k, m);
  433. tVT.SetZero();
  434. // SVD
  435. int INFO = 0;
  436. char JOBU = 'S';
  437. char JOBVT = 'S';
  438. int wssize = 3 * (m < n ? m : n) + (m > n ? m : n);
  439. int wssize1 = 5 * (m < n ? m : n);
  440. wssize = (wssize > wssize1 ? wssize : wssize1);
  441. Iterator<ValueType> wsbuf = aligned_new<ValueType>(wssize);
  442. mat::svd(&JOBU, &JOBVT, &m, &n, M.begin(), &m, tS.begin(), tVT.begin(), &m, tU.begin(), &k, wsbuf, &wssize, &INFO);
  443. aligned_delete<ValueType>(wsbuf);
  444. if (INFO != 0) std::cout << INFO << '\n';
  445. assert(INFO == 0);
  446. for (Long i = 1; i < k; i++) {
  447. tS[i][i] = tS[0][i];
  448. tS[0][i] = 0;
  449. }
  450. // std::cout<<tU*tS*tVT-M_<<'\n';
  451. }
  452. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::pinv(ValueType eps) {
  453. if (eps < 0) {
  454. eps = 1.0;
  455. while (eps + (ValueType)1.0 > 1.0) eps *= 0.5;
  456. eps = sqrt<ValueType>(eps);
  457. }
  458. Matrix<ValueType> M_r(dim[1], dim[0]);
  459. mat::pinv(data_ptr, dim[0], dim[1], eps, M_r.data_ptr);
  460. this->ReInit(0, 0);
  461. return M_r;
  462. }
  463. template <class ValueType> std::ostream& operator<<(std::ostream& output, const Permutation<ValueType>& P) {
  464. output << std::setprecision(4) << std::setiosflags(std::ios::left);
  465. Long size = P.perm.Dim();
  466. for (Long i = 0; i < size; i++) output << std::setw(10) << P.perm[i] << ' ';
  467. output << ";\n";
  468. for (Long i = 0; i < size; i++) output << std::setw(10) << P.scal[i] << ' ';
  469. output << ";\n";
  470. return output;
  471. }
  472. template <class ValueType> Permutation<ValueType>::Permutation(Long size) {
  473. perm.ReInit(size);
  474. scal.ReInit(size);
  475. for (Long i = 0; i < size; i++) {
  476. perm[i] = i;
  477. scal[i] = 1.0;
  478. }
  479. }
  480. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::RandPerm(Long size) {
  481. Permutation<ValueType> P(size);
  482. for (Long i = 0; i < size; i++) {
  483. P.perm[i] = rand() % size;
  484. for (Long j = 0; j < i; j++)
  485. if (P.perm[i] == P.perm[j]) {
  486. i--;
  487. break;
  488. }
  489. P.scal[i] = ((ValueType)rand()) / RAND_MAX;
  490. }
  491. return P;
  492. }
  493. template <class ValueType> Matrix<ValueType> Permutation<ValueType>::GetMatrix() const {
  494. Long size = perm.Dim();
  495. Matrix<ValueType> M_r(size, size);
  496. for (Long i = 0; i < size; i++)
  497. for (Long j = 0; j < size; j++) M_r[i][j] = (perm[j] == i ? scal[j] : 0.0);
  498. return M_r;
  499. }
  500. template <class ValueType> Long Permutation<ValueType>::Dim() const { return perm.Dim(); }
  501. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::Transpose() {
  502. Long size = perm.Dim();
  503. Permutation<ValueType> P_r(size);
  504. Vector<Long>& perm_r = P_r.perm;
  505. Vector<ValueType>& scal_r = P_r.scal;
  506. for (Long i = 0; i < size; i++) {
  507. perm_r[perm[i]] = i;
  508. scal_r[perm[i]] = scal[i];
  509. }
  510. return P_r;
  511. }
  512. template <class ValueType> Permutation<ValueType>& Permutation<ValueType>::operator*=(ValueType s) {
  513. for (Long i = 0; i < scal.Dim(); i++) scal[i] *= s;
  514. return *this;
  515. }
  516. template <class ValueType> Permutation<ValueType>& Permutation<ValueType>::operator/=(ValueType s) {
  517. for (Long i = 0; i < scal.Dim(); i++) scal[i] /= s;
  518. return *this;
  519. }
  520. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator*(ValueType s) const {
  521. Permutation<ValueType> P = *this;
  522. P *= s;
  523. return P;
  524. }
  525. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator/(ValueType s) const {
  526. Permutation<ValueType> P = *this;
  527. P /= s;
  528. return P;
  529. }
  530. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator*(const Permutation<ValueType>& P) const {
  531. Long size = perm.Dim();
  532. SCTL_ASSERT(P.Dim() == size);
  533. Permutation<ValueType> P_r(size);
  534. Vector<Long>& perm_r = P_r.perm;
  535. Vector<ValueType>& scal_r = P_r.scal;
  536. for (Long i = 0; i < size; i++) {
  537. perm_r[i] = perm[P.perm[i]];
  538. scal_r[i] = scal[P.perm[i]] * P.scal[i];
  539. }
  540. return P_r;
  541. }
  542. template <class ValueType> Matrix<ValueType> Permutation<ValueType>::operator*(const Matrix<ValueType>& M) const {
  543. if (Dim() == 0) return M;
  544. SCTL_ASSERT(M.Dim(0) == Dim());
  545. Long d0 = M.Dim(0);
  546. Long d1 = M.Dim(1);
  547. Matrix<ValueType> M_r(d0, d1);
  548. for (Long i = 0; i < d0; i++) {
  549. const ValueType s = scal[i];
  550. ConstIterator<ValueType> M_ = M[i];
  551. Iterator<ValueType> M_r_ = M_r[perm[i]];
  552. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s;
  553. }
  554. return M_r;
  555. }
  556. template <class ValueType> Matrix<ValueType> operator*(const Matrix<ValueType>& M, const Permutation<ValueType>& P) {
  557. if (P.Dim() == 0) return M;
  558. SCTL_ASSERT(M.Dim(1) == P.Dim());
  559. Long d0 = M.Dim(0);
  560. Long d1 = M.Dim(1);
  561. Matrix<ValueType> M_r(d0, d1);
  562. for (Long i = 0; i < d0; i++) {
  563. ConstIterator<Long> perm_ = P.perm.begin();
  564. ConstIterator<ValueType> scal_ = P.scal.begin();
  565. ConstIterator<ValueType> M_ = M[i];
  566. Iterator<ValueType> M_r_ = M_r[i];
  567. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j];
  568. }
  569. return M_r;
  570. }
  571. } // end namespace