matrix.txx 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669
  1. #include <omp.h>
  2. #include <cmath>
  3. #include <cstdlib>
  4. #include <cassert>
  5. #include <iostream>
  6. #include <iomanip>
  7. #include SCTL_INCLUDE(mat_utils.hpp)
  8. #include SCTL_INCLUDE(mem_mgr.hpp)
  9. #include SCTL_INCLUDE(profile.hpp)
  10. namespace SCTL_NAMESPACE {
  11. template <class ValueType> std::ostream& operator<<(std::ostream& output, const Matrix<ValueType>& M) {
  12. std::ios::fmtflags f(std::cout.flags());
  13. output << std::fixed << std::setprecision(4) << std::setiosflags(std::ios::left);
  14. for (Long i = 0; i < M.Dim(0); i++) {
  15. for (Long j = 0; j < M.Dim(1); j++) {
  16. float f = ((float)M(i, j));
  17. if (fabs<ValueType>(f) < 1e-25) f = 0;
  18. output << std::setw(10) << ((double)f) << ' ';
  19. }
  20. output << ";\n";
  21. }
  22. std::cout.flags(f);
  23. return output;
  24. }
  25. template <class ValueType> void Matrix<ValueType>::Init(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  26. dim[0] = dim1;
  27. dim[1] = dim2;
  28. own_data = own_data_;
  29. if (own_data) {
  30. if (dim[0] * dim[1] > 0) {
  31. data_ptr = aligned_new<ValueType>(dim[0] * dim[1]);
  32. if (data_ != nullptr) {
  33. memcopy(data_ptr, data_, dim[0] * dim[1]);
  34. }
  35. } else
  36. data_ptr = nullptr;
  37. } else
  38. data_ptr = data_;
  39. }
  40. template <class ValueType> Matrix<ValueType>::Matrix() {
  41. Init(0, 0);
  42. }
  43. template <class ValueType> Matrix<ValueType>::Matrix(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  44. Init(dim1, dim2, data_, own_data_);
  45. }
  46. template <class ValueType> Matrix<ValueType>::Matrix(const Matrix<ValueType>& M) {
  47. Init(M.Dim(0), M.Dim(1), (Iterator<ValueType>)M.begin());
  48. }
  49. template <class ValueType> Matrix<ValueType>::~Matrix() {
  50. if (own_data) {
  51. if (data_ptr != nullptr) {
  52. aligned_delete(data_ptr);
  53. }
  54. }
  55. data_ptr = nullptr;
  56. dim[0] = 0;
  57. dim[1] = 0;
  58. }
  59. template <class ValueType> void Matrix<ValueType>::Swap(Matrix<ValueType>& M) {
  60. StaticArray<Long, 2> dim_;
  61. dim_[0] = dim[0];
  62. dim_[1] = dim[1];
  63. Iterator<ValueType> data_ptr_ = data_ptr;
  64. bool own_data_ = own_data;
  65. dim[0] = M.dim[0];
  66. dim[1] = M.dim[1];
  67. data_ptr = M.data_ptr;
  68. own_data = M.own_data;
  69. M.dim[0] = dim_[0];
  70. M.dim[1] = dim_[1];
  71. M.data_ptr = data_ptr_;
  72. M.own_data = own_data_;
  73. }
  74. template <class ValueType> void Matrix<ValueType>::ReInit(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  75. if (own_data_ && own_data && dim[0] * dim[1] >= dim1 * dim2) {
  76. dim[0] = dim1;
  77. dim[1] = dim2;
  78. if (data_ != nullptr) {
  79. memcopy(data_ptr, data_, dim[0] * dim[1]);
  80. }
  81. } else {
  82. Matrix<ValueType> tmp(dim1, dim2, data_, own_data_);
  83. this->Swap(tmp);
  84. }
  85. }
  86. template <class ValueType> void Matrix<ValueType>::Write(const char* fname) const {
  87. FILE* f1 = fopen(fname, "wb+");
  88. if (f1 == nullptr) {
  89. std::cout << "Unable to open file for writing:" << fname << '\n';
  90. return;
  91. }
  92. StaticArray<uint64_t, 2> dim_;
  93. dim_[0] = (uint64_t)Dim(0);
  94. dim_[1] = (uint64_t)Dim(1);
  95. fwrite(&dim_[0], sizeof(uint64_t), 2, f1);
  96. if (Dim(0) * Dim(1)) fwrite(&data_ptr[0], sizeof(ValueType), Dim(0) * Dim(1), f1);
  97. fclose(f1);
  98. }
  99. template <class ValueType> void Matrix<ValueType>::Read(const char* fname) {
  100. FILE* f1 = fopen(fname, "r");
  101. if (f1 == nullptr) {
  102. std::cout << "Unable to open file for reading:" << fname << '\n';
  103. return;
  104. }
  105. StaticArray<uint64_t, 2> dim_;
  106. Long readlen = fread(&dim_[0], sizeof(uint64_t), 2, f1);
  107. assert(readlen == 2);
  108. if (Dim(0)!=dim_[0] || Dim(1)!=dim_[1]) ReInit(dim_[0], dim_[1]);
  109. if (dim_[0] * dim_[1]) readlen = fread(&data_ptr[0], sizeof(ValueType), dim_[0] * dim_[1], f1);
  110. assert(readlen == dim_[0] * dim_[1]);
  111. fclose(f1);
  112. }
  113. template <class ValueType> Long Matrix<ValueType>::Dim(Long i) const { return dim[i]; }
  114. template <class ValueType> void Matrix<ValueType>::SetZero() {
  115. if (dim[0] * dim[1]) memset(data_ptr, 0, dim[0] * dim[1]);
  116. }
  117. template <class ValueType> Iterator<ValueType> Matrix<ValueType>::begin() { return data_ptr; }
  118. template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::begin() const { return data_ptr; }
  119. template <class ValueType> Iterator<ValueType> Matrix<ValueType>::end() { return data_ptr + dim[0] * dim[1]; }
  120. template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::end() const { return data_ptr + dim[0] * dim[1]; }
  121. // Matrix-Matrix operations
  122. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator=(const Matrix<ValueType>& M) {
  123. if (this != &M) {
  124. if (dim[0] * dim[1] < M.dim[0] * M.dim[1]) {
  125. ReInit(M.dim[0], M.dim[1]);
  126. }
  127. dim[0] = M.dim[0];
  128. dim[1] = M.dim[1];
  129. memcopy(data_ptr, M.data_ptr, dim[0] * dim[1]);
  130. }
  131. return *this;
  132. }
  133. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator+=(const Matrix<ValueType>& M) {
  134. SCTL_ASSERT(M.Dim(0) == Dim(0) && M.Dim(1) == Dim(1));
  135. Profile::Add_FLOP(dim[0] * dim[1]);
  136. for (Long i = 0; i < M.Dim(0) * M.Dim(1); i++) data_ptr[i] += M.data_ptr[i];
  137. return *this;
  138. }
  139. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator-=(const Matrix<ValueType>& M) {
  140. SCTL_ASSERT(M.Dim(0) == Dim(0) && M.Dim(1) == Dim(1));
  141. Profile::Add_FLOP(dim[0] * dim[1]);
  142. for (Long i = 0; i < M.Dim(0) * M.Dim(1); i++) data_ptr[i] -= M.data_ptr[i];
  143. return *this;
  144. }
  145. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator+(const Matrix<ValueType>& M2) const {
  146. const Matrix<ValueType>& M1 = *this;
  147. SCTL_ASSERT(M2.Dim(0) == M1.Dim(0) && M2.Dim(1) == M1.Dim(1));
  148. Profile::Add_FLOP(dim[0] * dim[1]);
  149. Matrix<ValueType> M_r(M1.Dim(0), M1.Dim(1));
  150. for (Long i = 0; i < M1.Dim(0) * M1.Dim(1); i++) M_r[0][i] = M1[0][i] + M2[0][i];
  151. return M_r;
  152. }
  153. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator-(const Matrix<ValueType>& M2) const {
  154. const Matrix<ValueType>& M1 = *this;
  155. SCTL_ASSERT(M2.Dim(0) == M1.Dim(0) && M2.Dim(1) == M1.Dim(1));
  156. Profile::Add_FLOP(dim[0] * dim[1]);
  157. Matrix<ValueType> M_r(M1.Dim(0), M1.Dim(1));
  158. for (Long i = 0; i < M1.Dim(0) * M1.Dim(1); i++) M_r[0][i] = M1[0][i] - M2[0][i];
  159. return M_r;
  160. }
  161. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator*(const Matrix<ValueType>& M) const {
  162. SCTL_ASSERT(dim[1] == M.dim[0]);
  163. Profile::Add_FLOP(2 * (((Long)dim[0]) * dim[1]) * M.dim[1]);
  164. Matrix<ValueType> M_r(dim[0], M.dim[1]);
  165. if (M.Dim(0) * M.Dim(1) == 0 || this->Dim(0) * this->Dim(1) == 0) return M_r;
  166. mat::gemm<ValueType>('N', 'N', M.dim[1], dim[0], dim[1], 1.0, M.data_ptr, M.dim[1], data_ptr, dim[1], 0.0, M_r.data_ptr, M_r.dim[1]);
  167. return M_r;
  168. }
  169. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& A, const Matrix<ValueType>& B, ValueType beta) {
  170. assert(A.dim[1] == B.dim[0]);
  171. assert(M_r.dim[0] == A.dim[0]);
  172. assert(M_r.dim[1] == B.dim[1]);
  173. if (A.Dim(0) * A.Dim(1) == 0 || B.Dim(0) * B.Dim(1) == 0) return;
  174. Profile::Add_FLOP(2 * (((Long)A.dim[0]) * A.dim[1]) * B.dim[1]);
  175. mat::gemm<ValueType>('N', 'N', B.dim[1], A.dim[0], A.dim[1], 1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  176. }
  177. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Permutation<ValueType>& P, const Matrix<ValueType>& M, ValueType beta) {
  178. Long d0 = M.Dim(0);
  179. Long d1 = M.Dim(1);
  180. assert(P.Dim() == d0);
  181. assert(M_r.Dim(0) == d0);
  182. assert(M_r.Dim(1) == d1);
  183. if (P.Dim() == 0 || d0 * d1 == 0) return;
  184. if (beta == 0) {
  185. for (Long i = 0; i < d0; i++) {
  186. const ValueType s = P.scal[i];
  187. ConstIterator<ValueType> M_ = M[i];
  188. Iterator<ValueType> M_r_ = M_r[P.perm[i]];
  189. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s;
  190. }
  191. } else {
  192. for (Long i = 0; i < d0; i++) {
  193. const ValueType s = P.scal[i];
  194. ConstIterator<ValueType> M_ = M[i];
  195. Iterator<ValueType> M_r_ = M_r[P.perm[i]];
  196. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s + M_r_[j] * beta;
  197. }
  198. }
  199. }
  200. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& M, const Permutation<ValueType>& P, ValueType beta) {
  201. Long d0 = M.Dim(0);
  202. Long d1 = M.Dim(1);
  203. assert(P.Dim() == d1);
  204. assert(M_r.Dim(0) == d0);
  205. assert(M_r.Dim(1) == d1);
  206. if (P.Dim() == 0 || d0 * d1 == 0) return;
  207. if (beta == 0) {
  208. for (Long i = 0; i < d0; i++) {
  209. ConstIterator<Long> perm_ = P.perm.begin();
  210. ConstIterator<ValueType> scal_ = P.scal.begin();
  211. ConstIterator<ValueType> M_ = M[i];
  212. Iterator<ValueType> M_r_ = M_r[i];
  213. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j];
  214. }
  215. } else {
  216. for (Long i = 0; i < d0; i++) {
  217. ConstIterator<Long> perm_ = P.perm.begin();
  218. ConstIterator<ValueType> scal_ = P.scal.begin();
  219. ConstIterator<ValueType> M_ = M[i];
  220. Iterator<ValueType> M_r_ = M_r[i];
  221. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j] + M_r_[j] * beta;
  222. }
  223. }
  224. }
  225. // cublasgemm wrapper
  226. #if defined(SCTL_HAVE_CUDA)
  227. template <class ValueType> void Matrix<ValueType>::CUBLASGEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& A, const Matrix<ValueType>& B, ValueType beta) {
  228. if (A.Dim(0) * A.Dim(1) == 0 || B.Dim(0) * B.Dim(1) == 0) return;
  229. assert(A.dim[1] == B.dim[0]);
  230. assert(M_r.dim[0] == A.dim[0]);
  231. assert(M_r.dim[1] == B.dim[1]);
  232. Profile::Add_FLOP(2 * (((Long)A.dim[0]) * A.dim[1]) * B.dim[1]);
  233. mat::cublasgemm('N', 'N', B.dim[1], A.dim[0], A.dim[1], (ValueType)1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  234. }
  235. #endif
  236. // Matrix-Scalar operations
  237. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator=(ValueType s) {
  238. Long N = dim[0] * dim[1];
  239. for (Long i = 0; i < N; i++) data_ptr[i] = s;
  240. return *this;
  241. }
  242. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator+=(ValueType s) {
  243. Long N = dim[0] * dim[1];
  244. for (Long i = 0; i < N; i++) data_ptr[i] += s;
  245. Profile::Add_FLOP(N);
  246. return *this;
  247. }
  248. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator-=(ValueType s) {
  249. Long N = dim[0] * dim[1];
  250. for (Long i = 0; i < N; i++) data_ptr[i] -= s;
  251. Profile::Add_FLOP(N);
  252. return *this;
  253. }
  254. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator*=(ValueType s) {
  255. Long N = dim[0] * dim[1];
  256. for (Long i = 0; i < N; i++) data_ptr[i] *= s;
  257. Profile::Add_FLOP(N);
  258. return *this;
  259. }
  260. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator/=(ValueType s) {
  261. Long N = dim[0] * dim[1];
  262. for (Long i = 0; i < N; i++) data_ptr[i] /= s;
  263. Profile::Add_FLOP(N);
  264. return *this;
  265. }
  266. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator+(ValueType s) const {
  267. Long N = dim[0] * dim[1];
  268. Matrix<ValueType> M_r(dim[0], dim[1]);
  269. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] + s;
  270. return M_r;
  271. }
  272. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator-(ValueType s) const {
  273. Long N = dim[0] * dim[1];
  274. Matrix<ValueType> M_r(dim[0], dim[1]);
  275. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] - s;
  276. return M_r;
  277. }
  278. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator*(ValueType s) const {
  279. Long N = dim[0] * dim[1];
  280. Matrix<ValueType> M_r(dim[0], dim[1]);
  281. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] * s;
  282. return M_r;
  283. }
  284. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator/(ValueType s) const {
  285. Long N = dim[0] * dim[1];
  286. Matrix<ValueType> M_r(dim[0], dim[1]);
  287. for (Long i = 0; i < N; i++) M_r.data_ptr[i] = data_ptr[i] / s;
  288. return M_r;
  289. }
  290. // Element access
  291. template <class ValueType> inline ValueType& Matrix<ValueType>::operator()(Long i, Long j) {
  292. assert(i < dim[0] && j < dim[1]);
  293. return data_ptr[i * dim[1] + j];
  294. }
  295. template <class ValueType> inline const ValueType& Matrix<ValueType>::operator()(Long i, Long j) const {
  296. assert(i < dim[0] && j < dim[1]);
  297. return data_ptr[i * dim[1] + j];
  298. }
  299. template <class ValueType> inline Iterator<ValueType> Matrix<ValueType>::operator[](Long i) {
  300. assert(i < dim[0]);
  301. return data_ptr + i * dim[1];
  302. }
  303. template <class ValueType> inline ConstIterator<ValueType> Matrix<ValueType>::operator[](Long i) const {
  304. assert(i < dim[0]);
  305. return (data_ptr + i * dim[1]);
  306. }
  307. #define myswap(t, a, b) \
  308. { \
  309. t c = a; \
  310. a = b; \
  311. b = c; \
  312. }
  313. template <class ValueType> void Matrix<ValueType>::RowPerm(const Permutation<ValueType>& P) {
  314. Matrix<ValueType>& M = *this;
  315. if (P.Dim() == 0) return;
  316. assert(M.Dim(0) == P.Dim());
  317. Long d0 = M.Dim(0);
  318. Long d1 = M.Dim(1);
  319. #pragma omp parallel for schedule(static)
  320. for (Long i = 0; i < d0; i++) {
  321. Iterator<ValueType> M_ = M[i];
  322. const ValueType s = P.scal[i];
  323. for (Long j = 0; j < d1; j++) M_[j] *= s;
  324. }
  325. Integer omp_p = omp_get_max_threads();
  326. #pragma omp parallel for schedule(static)
  327. for (Integer tid = 0; tid < omp_p; tid++) {
  328. Long a = d1 * (tid + 0) / omp_p;
  329. Long b = d1 * (tid + 1) / omp_p;
  330. Permutation<ValueType> P_ = P;
  331. for (Long i = 0; i < d0; i++) {
  332. Long j = i;
  333. if (P_.perm[i] == i) continue;
  334. while (j < d0 && P_.perm[j] != i) { j++; }
  335. SCTL_ASSERT_MSG(j < d0, "Matrix::RowPerm(Permutation P) ==> Invalid permutation vector P.");
  336. Iterator<ValueType> Mi = M[i];
  337. Iterator<ValueType> Mj = M[j];
  338. myswap(Long, P_.perm[i], P_.perm[j]);
  339. for (Long k = a; k < b; k++) Mi[k]=tid;
  340. }
  341. }
  342. }
  343. template <class ValueType> void Matrix<ValueType>::ColPerm(const Permutation<ValueType>& P) {
  344. Matrix<ValueType>& M = *this;
  345. if (P.Dim() == 0) return;
  346. assert(M.Dim(1) == P.Dim());
  347. Long d0 = M.Dim(0);
  348. Long d1 = M.Dim(1);
  349. Integer omp_p = omp_get_max_threads();
  350. Matrix<ValueType> M_buff(omp_p, d1);
  351. ConstIterator<Long> perm_ = P.perm.begin();
  352. ConstIterator<ValueType> scal_ = P.scal.begin();
  353. #pragma omp parallel for schedule(static)
  354. for (Long i = 0; i < d0; i++) {
  355. Integer pid = omp_get_thread_num();
  356. Iterator<ValueType> buff = M_buff[pid];
  357. Iterator<ValueType> M_ = M[i];
  358. for (Long j = 0; j < d1; j++) buff[j] = M_[j];
  359. for (Long j = 0; j < d1; j++) {
  360. M_[j] = buff[perm_[j]] * scal_[j];
  361. }
  362. }
  363. }
  364. #undef myswap
  365. #define B1 128
  366. #define B2 32
  367. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::Transpose() const {
  368. const Matrix<ValueType>& M = *this;
  369. Long d0 = M.dim[0];
  370. Long d1 = M.dim[1];
  371. Matrix<ValueType> M_r(d1, d0);
  372. const Long blk0 = ((d0 + B1 - 1) / B1);
  373. const Long blk1 = ((d1 + B1 - 1) / B1);
  374. const Long blks = blk0 * blk1;
  375. #pragma omp parallel for
  376. for (Long k = 0; k < blks; k++) {
  377. Long i = (k % blk0) * B1;
  378. Long j = (k / blk0) * B1;
  379. Long d0_ = i + B1;
  380. if (d0_ >= d0) d0_ = d0;
  381. Long d1_ = j + B1;
  382. if (d1_ >= d1) d1_ = d1;
  383. for (Long ii = i; ii < d0_; ii += B2)
  384. for (Long jj = j; jj < d1_; jj += B2) {
  385. Long d0__ = ii + B2;
  386. if (d0__ >= d0) d0__ = d0;
  387. Long d1__ = jj + B2;
  388. if (d1__ >= d1) d1__ = d1;
  389. for (Long iii = ii; iii < d0__; iii++)
  390. for (Long jjj = jj; jjj < d1__; jjj++) {
  391. M_r[jjj][iii] = M[iii][jjj];
  392. }
  393. }
  394. }
  395. return M_r;
  396. }
  397. template <class ValueType> void Matrix<ValueType>::Transpose(Matrix<ValueType>& M_r, const Matrix<ValueType>& M) {
  398. Long d0 = M.dim[0];
  399. Long d1 = M.dim[1];
  400. if (M_r.dim[0] != d1 || M_r.dim[1] != d0) M_r.ReInit(d1, d0);
  401. const Long blk0 = ((d0 + B1 - 1) / B1);
  402. const Long blk1 = ((d1 + B1 - 1) / B1);
  403. const Long blks = blk0 * blk1;
  404. #pragma omp parallel for
  405. for (Long k = 0; k < blks; k++) {
  406. Long i = (k % blk0) * B1;
  407. Long j = (k / blk0) * B1;
  408. Long d0_ = i + B1;
  409. if (d0_ >= d0) d0_ = d0;
  410. Long d1_ = j + B1;
  411. if (d1_ >= d1) d1_ = d1;
  412. for (Long ii = i; ii < d0_; ii += B2)
  413. for (Long jj = j; jj < d1_; jj += B2) {
  414. Long d0__ = ii + B2;
  415. if (d0__ >= d0) d0__ = d0;
  416. Long d1__ = jj + B2;
  417. if (d1__ >= d1) d1__ = d1;
  418. for (Long iii = ii; iii < d0__; iii++)
  419. for (Long jjj = jj; jjj < d1__; jjj++) {
  420. M_r[jjj][iii] = M[iii][jjj];
  421. }
  422. }
  423. }
  424. }
  425. #undef B2
  426. #undef B1
  427. template <class ValueType> void Matrix<ValueType>::SVD(Matrix<ValueType>& tU, Matrix<ValueType>& tS, Matrix<ValueType>& tVT) {
  428. Matrix<ValueType>& M = *this;
  429. Matrix<ValueType> M_ = M;
  430. int n = M.Dim(0);
  431. int m = M.Dim(1);
  432. int k = (m < n ? m : n);
  433. if (tU.Dim(0) != n || tU.Dim(1) != k) tU.ReInit(n, k);
  434. tU.SetZero();
  435. if (tS.Dim(0) != k || tS.Dim(1) != k) tS.ReInit(k, k);
  436. tS.SetZero();
  437. if (tVT.Dim(0) != k || tVT.Dim(1) != m) tVT.ReInit(k, m);
  438. tVT.SetZero();
  439. // SVD
  440. int INFO = 0;
  441. char JOBU = 'S';
  442. char JOBVT = 'S';
  443. int wssize = 3 * (m < n ? m : n) + (m > n ? m : n);
  444. int wssize1 = 5 * (m < n ? m : n);
  445. wssize = (wssize > wssize1 ? wssize : wssize1);
  446. Iterator<ValueType> wsbuf = aligned_new<ValueType>(wssize);
  447. mat::svd(&JOBU, &JOBVT, &m, &n, M.begin(), &m, tS.begin(), tVT.begin(), &m, tU.begin(), &k, wsbuf, &wssize, &INFO);
  448. aligned_delete<ValueType>(wsbuf);
  449. if (INFO != 0) std::cout << INFO << '\n';
  450. assert(INFO == 0);
  451. for (Long i = 1; i < k; i++) {
  452. tS[i][i] = tS[0][i];
  453. tS[0][i] = 0;
  454. }
  455. // std::cout<<tU*tS*tVT-M_<<'\n';
  456. }
  457. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::pinv(ValueType eps) {
  458. if (eps < 0) {
  459. eps = 1.0;
  460. while (eps + (ValueType)1.0 > 1.0) eps *= 0.5;
  461. eps = sqrt<ValueType>(eps);
  462. }
  463. Matrix<ValueType> M_r(dim[1], dim[0]);
  464. mat::pinv(data_ptr, dim[0], dim[1], eps, M_r.data_ptr);
  465. this->ReInit(0, 0);
  466. return M_r;
  467. }
  468. template <class ValueType> std::ostream& operator<<(std::ostream& output, const Permutation<ValueType>& P) {
  469. output << std::setprecision(4) << std::setiosflags(std::ios::left);
  470. Long size = P.perm.Dim();
  471. for (Long i = 0; i < size; i++) output << std::setw(10) << P.perm[i] << ' ';
  472. output << ";\n";
  473. for (Long i = 0; i < size; i++) output << std::setw(10) << P.scal[i] << ' ';
  474. output << ";\n";
  475. return output;
  476. }
  477. template <class ValueType> Permutation<ValueType>::Permutation(Long size) {
  478. perm.ReInit(size);
  479. scal.ReInit(size);
  480. for (Long i = 0; i < size; i++) {
  481. perm[i] = i;
  482. scal[i] = 1.0;
  483. }
  484. }
  485. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::RandPerm(Long size) {
  486. Permutation<ValueType> P(size);
  487. for (Long i = 0; i < size; i++) {
  488. P.perm[i] = rand() % size;
  489. for (Long j = 0; j < i; j++)
  490. if (P.perm[i] == P.perm[j]) {
  491. i--;
  492. break;
  493. }
  494. P.scal[i] = ((ValueType)rand()) / RAND_MAX;
  495. }
  496. return P;
  497. }
  498. template <class ValueType> Matrix<ValueType> Permutation<ValueType>::GetMatrix() const {
  499. Long size = perm.Dim();
  500. Matrix<ValueType> M_r(size, size);
  501. for (Long i = 0; i < size; i++)
  502. for (Long j = 0; j < size; j++) M_r[i][j] = (perm[j] == i ? scal[j] : 0.0);
  503. return M_r;
  504. }
  505. template <class ValueType> Long Permutation<ValueType>::Dim() const { return perm.Dim(); }
  506. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::Transpose() {
  507. Long size = perm.Dim();
  508. Permutation<ValueType> P_r(size);
  509. Vector<Long>& perm_r = P_r.perm;
  510. Vector<ValueType>& scal_r = P_r.scal;
  511. for (Long i = 0; i < size; i++) {
  512. perm_r[perm[i]] = i;
  513. scal_r[perm[i]] = scal[i];
  514. }
  515. return P_r;
  516. }
  517. template <class ValueType> Permutation<ValueType>& Permutation<ValueType>::operator*=(ValueType s) {
  518. for (Long i = 0; i < scal.Dim(); i++) scal[i] *= s;
  519. return *this;
  520. }
  521. template <class ValueType> Permutation<ValueType>& Permutation<ValueType>::operator/=(ValueType s) {
  522. for (Long i = 0; i < scal.Dim(); i++) scal[i] /= s;
  523. return *this;
  524. }
  525. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator*(ValueType s) const {
  526. Permutation<ValueType> P = *this;
  527. P *= s;
  528. return P;
  529. }
  530. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator/(ValueType s) const {
  531. Permutation<ValueType> P = *this;
  532. P /= s;
  533. return P;
  534. }
  535. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator*(const Permutation<ValueType>& P) const {
  536. Long size = perm.Dim();
  537. SCTL_ASSERT(P.Dim() == size);
  538. Permutation<ValueType> P_r(size);
  539. Vector<Long>& perm_r = P_r.perm;
  540. Vector<ValueType>& scal_r = P_r.scal;
  541. for (Long i = 0; i < size; i++) {
  542. perm_r[i] = perm[P.perm[i]];
  543. scal_r[i] = scal[P.perm[i]] * P.scal[i];
  544. }
  545. return P_r;
  546. }
  547. template <class ValueType> Matrix<ValueType> Permutation<ValueType>::operator*(const Matrix<ValueType>& M) const {
  548. if (Dim() == 0) return M;
  549. SCTL_ASSERT(M.Dim(0) == Dim());
  550. Long d0 = M.Dim(0);
  551. Long d1 = M.Dim(1);
  552. Matrix<ValueType> M_r(d0, d1);
  553. for (Long i = 0; i < d0; i++) {
  554. const ValueType s = scal[i];
  555. ConstIterator<ValueType> M_ = M[i];
  556. Iterator<ValueType> M_r_ = M_r[perm[i]];
  557. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s;
  558. }
  559. return M_r;
  560. }
  561. template <class ValueType> Matrix<ValueType> operator*(const Matrix<ValueType>& M, const Permutation<ValueType>& P) {
  562. if (P.Dim() == 0) return M;
  563. SCTL_ASSERT(M.Dim(1) == P.Dim());
  564. Long d0 = M.Dim(0);
  565. Long d1 = M.Dim(1);
  566. Matrix<ValueType> M_r(d0, d1);
  567. for (Long i = 0; i < d0; i++) {
  568. ConstIterator<Long> perm_ = P.perm.begin();
  569. ConstIterator<ValueType> scal_ = P.scal.begin();
  570. ConstIterator<ValueType> M_ = M[i];
  571. Iterator<ValueType> M_r_ = M_r[i];
  572. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j];
  573. }
  574. return M_r;
  575. }
  576. } // end namespace