matrix.txx 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. #include <omp.h>
  2. #include <cmath>
  3. #include <cstdlib>
  4. #include <cassert>
  5. #include <iostream>
  6. #include <iomanip>
  7. #include <pvfmm/mat_utils.hpp>
  8. #include <pvfmm/mem_mgr.hpp>
  9. #include <pvfmm/profile.hpp>
  10. namespace pvfmm {
  11. template <class ValueType> std::ostream& operator<<(std::ostream& output, const Matrix<ValueType>& M) {
  12. std::ios::fmtflags f(std::cout.flags());
  13. output << std::fixed << std::setprecision(4) << std::setiosflags(std::ios::left);
  14. for (Long i = 0; i < M.Dim(0); i++) {
  15. for (Long j = 0; j < M.Dim(1); j++) {
  16. float f = ((float)M(i, j));
  17. if (pvfmm::fabs<ValueType>(f) < 1e-25) f = 0;
  18. output << std::setw(10) << ((double)f) << ' ';
  19. }
  20. output << ";\n";
  21. }
  22. std::cout.flags(f);
  23. return output;
  24. }
  25. template <class ValueType> Matrix<ValueType>::Matrix() {
  26. dim[0] = 0;
  27. dim[1] = 0;
  28. own_data = true;
  29. data_ptr = NULL;
  30. }
  31. template <class ValueType> Matrix<ValueType>::Matrix(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  32. dim[0] = dim1;
  33. dim[1] = dim2;
  34. own_data = own_data_;
  35. if (own_data) {
  36. if (dim[0] * dim[1] > 0) {
  37. data_ptr = aligned_new<ValueType>(dim[0] * dim[1]);
  38. if (data_ != NULL) {
  39. memcopy(data_ptr, data_, dim[0] * dim[1]);
  40. }
  41. } else
  42. data_ptr = NULL;
  43. } else
  44. data_ptr = data_;
  45. }
  46. template <class ValueType> Matrix<ValueType>::Matrix(const Matrix<ValueType>& M) {
  47. dim[0] = M.dim[0];
  48. dim[1] = M.dim[1];
  49. own_data = true;
  50. if (dim[0] * dim[1] > 0) {
  51. data_ptr = aligned_new<ValueType>(dim[0] * dim[1]);
  52. memcopy(data_ptr, M.data_ptr, dim[0] * dim[1]);
  53. } else
  54. data_ptr = NULL;
  55. }
  56. template <class ValueType> Matrix<ValueType>::~Matrix() {
  57. if (own_data) {
  58. if (data_ptr != NULL) {
  59. aligned_delete(data_ptr);
  60. }
  61. }
  62. data_ptr = NULL;
  63. dim[0] = 0;
  64. dim[1] = 0;
  65. }
  66. template <class ValueType> void Matrix<ValueType>::Swap(Matrix<ValueType>& M) {
  67. StaticArray<Long, 2> dim_;
  68. dim_[0] = dim[0];
  69. dim_[1] = dim[1];
  70. Iterator<ValueType> data_ptr_ = data_ptr;
  71. bool own_data_ = own_data;
  72. dim[0] = M.dim[0];
  73. dim[1] = M.dim[1];
  74. data_ptr = M.data_ptr;
  75. own_data = M.own_data;
  76. M.dim[0] = dim_[0];
  77. M.dim[1] = dim_[1];
  78. M.data_ptr = data_ptr_;
  79. M.own_data = own_data_;
  80. }
  81. template <class ValueType> void Matrix<ValueType>::ReInit(Long dim1, Long dim2, Iterator<ValueType> data_, bool own_data_) {
  82. if (own_data_ && own_data && dim[0] * dim[1] >= dim1 * dim2) {
  83. dim[0] = dim1;
  84. dim[1] = dim2;
  85. if (data_ != NULL) {
  86. memcopy(data_ptr, data_, dim[0] * dim[1]);
  87. }
  88. } else {
  89. Matrix<ValueType> tmp(dim1, dim2, data_, own_data_);
  90. this->Swap(tmp);
  91. }
  92. }
  93. template <class ValueType> void Matrix<ValueType>::Write(const char* fname) const {
  94. FILE* f1 = fopen(fname, "wb+");
  95. if (f1 == NULL) {
  96. std::cout << "Unable to open file for writing:" << fname << '\n';
  97. return;
  98. }
  99. StaticArray<uint64_t, 2> dim_;
  100. dim_[0] = (uint64_t)dim[0];
  101. dim_[1] = (uint64_t)dim[1];
  102. fwrite(&dim_[0], sizeof(uint64_t), 2, f1);
  103. fwrite(data_ptr, sizeof(ValueType), dim[0] * dim[1], f1);
  104. fclose(f1);
  105. }
  106. template <class ValueType> void Matrix<ValueType>::Read(const char* fname) {
  107. FILE* f1 = fopen(fname, "r");
  108. if (f1 == NULL) {
  109. std::cout << "Unable to open file for reading:" << fname << '\n';
  110. return;
  111. }
  112. StaticArray<uint64_t, 2> dim_;
  113. Long readlen = fread(&dim_[0], sizeof(uint64_t), 2, f1);
  114. assert(readlen == 2);
  115. ReInit(dim_[0], dim_[1]);
  116. readlen = fread(data_ptr, sizeof(ValueType), dim[0] * dim[1], f1);
  117. assert(readlen == dim[0] * dim[1]);
  118. fclose(f1);
  119. }
  120. template <class ValueType> Long Matrix<ValueType>::Dim(Long i) const { return dim[i]; }
  121. template <class ValueType> void Matrix<ValueType>::SetZero() {
  122. if (dim[0] * dim[1]) pvfmm::memset(data_ptr, 0, dim[0] * dim[1]);
  123. }
  124. template <class ValueType> Iterator<ValueType> Matrix<ValueType>::Begin() { return data_ptr; }
  125. template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::Begin() const { return data_ptr; }
  126. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator=(const Matrix<ValueType>& M) {
  127. if (this != &M) {
  128. if (dim[0] * dim[1] < M.dim[0] * M.dim[1]) {
  129. ReInit(M.dim[0], M.dim[1]);
  130. }
  131. dim[0] = M.dim[0];
  132. dim[1] = M.dim[1];
  133. memcopy(data_ptr, M.data_ptr, dim[0] * dim[1]);
  134. }
  135. return *this;
  136. }
  137. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator+=(const Matrix<ValueType>& M) {
  138. assert(M.Dim(0) == Dim(0) && M.Dim(1) == Dim(1));
  139. Profile::Add_FLOP(dim[0] * dim[1]);
  140. for (Long i = 0; i < M.Dim(0) * M.Dim(1); i++) data_ptr[i] += M.data_ptr[i];
  141. return *this;
  142. }
  143. template <class ValueType> Matrix<ValueType>& Matrix<ValueType>::operator-=(const Matrix<ValueType>& M) {
  144. assert(M.Dim(0) == Dim(0) && M.Dim(1) == Dim(1));
  145. Profile::Add_FLOP(dim[0] * dim[1]);
  146. for (Long i = 0; i < M.Dim(0) * M.Dim(1); i++) data_ptr[i] -= M.data_ptr[i];
  147. return *this;
  148. }
  149. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator+(const Matrix<ValueType>& M2) const {
  150. const Matrix<ValueType>& M1 = *this;
  151. assert(M2.Dim(0) == M1.Dim(0) && M2.Dim(1) == M1.Dim(1));
  152. Profile::Add_FLOP(dim[0] * dim[1]);
  153. Matrix<ValueType> M_r(M1.Dim(0), M1.Dim(1), NULL);
  154. for (Long i = 0; i < M1.Dim(0) * M1.Dim(1); i++) M_r[0][i] = M1[0][i] + M2[0][i];
  155. return M_r;
  156. }
  157. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator-(const Matrix<ValueType>& M2) const {
  158. const Matrix<ValueType>& M1 = *this;
  159. assert(M2.Dim(0) == M1.Dim(0) && M2.Dim(1) == M1.Dim(1));
  160. Profile::Add_FLOP(dim[0] * dim[1]);
  161. Matrix<ValueType> M_r(M1.Dim(0), M1.Dim(1), NULL);
  162. for (Long i = 0; i < M1.Dim(0) * M1.Dim(1); i++) M_r[0][i] = M1[0][i] - M2[0][i];
  163. return M_r;
  164. }
  165. template <class ValueType> inline ValueType& Matrix<ValueType>::operator()(Long i, Long j) {
  166. assert(i < dim[0] && j < dim[1]);
  167. return data_ptr[i * dim[1] + j];
  168. }
  169. template <class ValueType> inline const ValueType& Matrix<ValueType>::operator()(Long i, Long j) const {
  170. assert(i < dim[0] && j < dim[1]);
  171. return data_ptr[i * dim[1] + j];
  172. }
  173. template <class ValueType> inline Iterator<ValueType> Matrix<ValueType>::operator[](Long i) {
  174. assert(i < dim[0]);
  175. return data_ptr + i * dim[1];
  176. }
  177. template <class ValueType> inline ConstIterator<ValueType> Matrix<ValueType>::operator[](Long i) const {
  178. assert(i < dim[0]);
  179. return (data_ptr + i * dim[1]);
  180. }
  181. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::operator*(const Matrix<ValueType>& M) const {
  182. assert(dim[1] == M.dim[0]);
  183. Profile::Add_FLOP(2 * (((Long)dim[0]) * dim[1]) * M.dim[1]);
  184. Matrix<ValueType> M_r(dim[0], M.dim[1], NULL);
  185. if (M.Dim(0) * M.Dim(1) == 0 || this->Dim(0) * this->Dim(1) == 0) return M_r;
  186. mat::gemm<ValueType>('N', 'N', M.dim[1], dim[0], dim[1], 1.0, M.data_ptr, M.dim[1], data_ptr, dim[1], 0.0, M_r.data_ptr, M_r.dim[1]);
  187. return M_r;
  188. }
  189. template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& A, const Matrix<ValueType>& B, ValueType beta) {
  190. assert(A.dim[1] == B.dim[0]);
  191. assert(M_r.dim[0] == A.dim[0]);
  192. assert(M_r.dim[1] == B.dim[1]);
  193. if (A.Dim(0) * A.Dim(1) == 0 || B.Dim(0) * B.Dim(1) == 0) return;
  194. Profile::Add_FLOP(2 * (((Long)A.dim[0]) * A.dim[1]) * B.dim[1]);
  195. mat::gemm<ValueType>('N', 'N', B.dim[1], A.dim[0], A.dim[1], 1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  196. }
  197. // cublasgemm wrapper
  198. #if defined(PVFMM_HAVE_CUDA)
  199. template <class ValueType> void Matrix<ValueType>::CUBLASGEMM(Matrix<ValueType>& M_r, const Matrix<ValueType>& A, const Matrix<ValueType>& B, ValueType beta) {
  200. if (A.Dim(0) * A.Dim(1) == 0 || B.Dim(0) * B.Dim(1) == 0) return;
  201. assert(A.dim[1] == B.dim[0]);
  202. assert(M_r.dim[0] == A.dim[0]);
  203. assert(M_r.dim[1] == B.dim[1]);
  204. Profile::Add_FLOP(2 * (((Long)A.dim[0]) * A.dim[1]) * B.dim[1]);
  205. mat::cublasgemm('N', 'N', B.dim[1], A.dim[0], A.dim[1], (ValueType)1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  206. }
  207. #endif
  208. #define myswap(t, a, b) \
  209. { \
  210. t c = a; \
  211. a = b; \
  212. b = c; \
  213. }
  214. template <class ValueType> void Matrix<ValueType>::RowPerm(const Permutation<ValueType>& P) {
  215. Matrix<ValueType>& M = *this;
  216. if (P.Dim() == 0) return;
  217. assert(M.Dim(0) == P.Dim());
  218. Long d0 = M.Dim(0);
  219. Long d1 = M.Dim(1);
  220. #pragma omp parallel for
  221. for (Long i = 0; i < d0; i++) {
  222. Iterator<ValueType> M_ = M[i];
  223. const ValueType s = P.scal[i];
  224. for (Long j = 0; j < d1; j++) M_[j] *= s;
  225. }
  226. Permutation<ValueType> P_ = P;
  227. for (Long i = 0; i < d0; i++)
  228. while (P_.perm[i] != i) {
  229. Long a = P_.perm[i];
  230. Long b = i;
  231. Iterator<ValueType> M_a = M[a];
  232. Iterator<ValueType> M_b = M[b];
  233. myswap(Long, P_.perm[a], P_.perm[b]);
  234. for (Long j = 0; j < d1; j++) myswap(ValueType, M_a[j], M_b[j]);
  235. }
  236. }
  237. template <class ValueType> void Matrix<ValueType>::ColPerm(const Permutation<ValueType>& P) {
  238. Matrix<ValueType>& M = *this;
  239. if (P.Dim() == 0) return;
  240. assert(M.Dim(1) == P.Dim());
  241. Long d0 = M.Dim(0);
  242. Long d1 = M.Dim(1);
  243. Integer omp_p = omp_get_max_threads();
  244. Matrix<ValueType> M_buff(omp_p, d1);
  245. ConstIterator<Long> perm_ = P.perm.Begin();
  246. ConstIterator<ValueType> scal_ = P.scal.Begin();
  247. #pragma omp parallel for
  248. for (Long i = 0; i < d0; i++) {
  249. Integer pid = omp_get_thread_num();
  250. Iterator<ValueType> buff = M_buff[pid];
  251. Iterator<ValueType> M_ = M[i];
  252. for (Long j = 0; j < d1; j++) buff[j] = M_[j];
  253. for (Long j = 0; j < d1; j++) {
  254. M_[j] = buff[perm_[j]] * scal_[j];
  255. }
  256. }
  257. }
  258. #undef myswap
  259. #define B1 128
  260. #define B2 32
  261. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::Transpose() const {
  262. const Matrix<ValueType>& M = *this;
  263. Long d0 = M.dim[0];
  264. Long d1 = M.dim[1];
  265. Matrix<ValueType> M_r(d1, d0);
  266. const Long blk0 = ((d0 + B1 - 1) / B1);
  267. const Long blk1 = ((d1 + B1 - 1) / B1);
  268. const Long blks = blk0 * blk1;
  269. #pragma omp parallel for
  270. for (Long k = 0; k < blks; k++) {
  271. Long i = (k % blk0) * B1;
  272. Long j = (k / blk0) * B1;
  273. Long d0_ = i + B1;
  274. if (d0_ >= d0) d0_ = d0;
  275. Long d1_ = j + B1;
  276. if (d1_ >= d1) d1_ = d1;
  277. for (Long ii = i; ii < d0_; ii += B2)
  278. for (Long jj = j; jj < d1_; jj += B2) {
  279. Long d0__ = ii + B2;
  280. if (d0__ >= d0) d0__ = d0;
  281. Long d1__ = jj + B2;
  282. if (d1__ >= d1) d1__ = d1;
  283. for (Long iii = ii; iii < d0__; iii++)
  284. for (Long jjj = jj; jjj < d1__; jjj++) {
  285. M_r[jjj][iii] = M[iii][jjj];
  286. }
  287. }
  288. }
  289. return M_r;
  290. }
  291. template <class ValueType> void Matrix<ValueType>::Transpose(Matrix<ValueType>& M_r, const Matrix<ValueType>& M) {
  292. Long d0 = M.dim[0];
  293. Long d1 = M.dim[1];
  294. if (M_r.dim[0] != d1 || M_r.dim[1] != d0) M_r.ReInit(d1, d0);
  295. const Long blk0 = ((d0 + B1 - 1) / B1);
  296. const Long blk1 = ((d1 + B1 - 1) / B1);
  297. const Long blks = blk0 * blk1;
  298. #pragma omp parallel for
  299. for (Long k = 0; k < blks; k++) {
  300. Long i = (k % blk0) * B1;
  301. Long j = (k / blk0) * B1;
  302. Long d0_ = i + B1;
  303. if (d0_ >= d0) d0_ = d0;
  304. Long d1_ = j + B1;
  305. if (d1_ >= d1) d1_ = d1;
  306. for (Long ii = i; ii < d0_; ii += B2)
  307. for (Long jj = j; jj < d1_; jj += B2) {
  308. Long d0__ = ii + B2;
  309. if (d0__ >= d0) d0__ = d0;
  310. Long d1__ = jj + B2;
  311. if (d1__ >= d1) d1__ = d1;
  312. for (Long iii = ii; iii < d0__; iii++)
  313. for (Long jjj = jj; jjj < d1__; jjj++) {
  314. M_r[jjj][iii] = M[iii][jjj];
  315. }
  316. }
  317. }
  318. }
  319. #undef B2
  320. #undef B1
  321. template <class ValueType> void Matrix<ValueType>::SVD(Matrix<ValueType>& tU, Matrix<ValueType>& tS, Matrix<ValueType>& tVT) {
  322. pvfmm::Matrix<ValueType>& M = *this;
  323. pvfmm::Matrix<ValueType> M_ = M;
  324. int n = M.Dim(0);
  325. int m = M.Dim(1);
  326. int k = (m < n ? m : n);
  327. if (tU.Dim(0) != n || tU.Dim(1) != k) tU.ReInit(n, k);
  328. tU.SetZero();
  329. if (tS.Dim(0) != k || tS.Dim(1) != k) tS.ReInit(k, k);
  330. tS.SetZero();
  331. if (tVT.Dim(0) != k || tVT.Dim(1) != m) tVT.ReInit(k, m);
  332. tVT.SetZero();
  333. // SVD
  334. int INFO = 0;
  335. char JOBU = 'S';
  336. char JOBVT = 'S';
  337. int wssize = 3 * (m < n ? m : n) + (m > n ? m : n);
  338. int wssize1 = 5 * (m < n ? m : n);
  339. wssize = (wssize > wssize1 ? wssize : wssize1);
  340. Iterator<ValueType> wsbuf = aligned_new<ValueType>(wssize);
  341. pvfmm::mat::svd(&JOBU, &JOBVT, &m, &n, M.Begin(), &m, tS.Begin(), tVT.Begin(), &m, tU.Begin(), &k, wsbuf, &wssize, &INFO);
  342. aligned_delete<ValueType>(wsbuf);
  343. if (INFO != 0) std::cout << INFO << '\n';
  344. assert(INFO == 0);
  345. for (Long i = 1; i < k; i++) {
  346. tS[i][i] = tS[0][i];
  347. tS[0][i] = 0;
  348. }
  349. // std::cout<<tU*tS*tVT-M_<<'\n';
  350. }
  351. template <class ValueType> Matrix<ValueType> Matrix<ValueType>::pinv(ValueType eps) {
  352. if (eps < 0) {
  353. eps = 1.0;
  354. while (eps + (ValueType)1.0 > 1.0) eps *= 0.5;
  355. eps = pvfmm::sqrt<ValueType>(eps);
  356. }
  357. Matrix<ValueType> M_r(dim[1], dim[0]);
  358. mat::pinv(data_ptr, dim[0], dim[1], eps, M_r.data_ptr);
  359. this->ReInit(0, 0);
  360. return M_r;
  361. }
  362. template <class ValueType> std::ostream& operator<<(std::ostream& output, const Permutation<ValueType>& P) {
  363. output << std::setprecision(4) << std::setiosflags(std::ios::left);
  364. Long size = P.perm.Dim();
  365. for (Long i = 0; i < size; i++) output << std::setw(10) << P.perm[i] << ' ';
  366. output << ";\n";
  367. for (Long i = 0; i < size; i++) output << std::setw(10) << P.scal[i] << ' ';
  368. output << ";\n";
  369. return output;
  370. }
  371. template <class ValueType> Permutation<ValueType>::Permutation(Long size) {
  372. perm.ReInit(size);
  373. scal.ReInit(size);
  374. for (Long i = 0; i < size; i++) {
  375. perm[i] = i;
  376. scal[i] = 1.0;
  377. }
  378. }
  379. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::RandPerm(Long size) {
  380. Permutation<ValueType> P(size);
  381. for (Long i = 0; i < size; i++) {
  382. P.perm[i] = rand() % size;
  383. for (Long j = 0; j < i; j++)
  384. if (P.perm[i] == P.perm[j]) {
  385. i--;
  386. break;
  387. }
  388. P.scal[i] = ((ValueType)rand()) / RAND_MAX;
  389. }
  390. return P;
  391. }
  392. template <class ValueType> Matrix<ValueType> Permutation<ValueType>::GetMatrix() const {
  393. Long size = perm.Dim();
  394. Matrix<ValueType> M_r(size, size, NULL);
  395. for (Long i = 0; i < size; i++)
  396. for (Long j = 0; j < size; j++) M_r[i][j] = (perm[j] == i ? scal[j] : 0.0);
  397. return M_r;
  398. }
  399. template <class ValueType> Long Permutation<ValueType>::Dim() const { return perm.Dim(); }
  400. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::Transpose() {
  401. Long size = perm.Dim();
  402. Permutation<ValueType> P_r(size);
  403. Vector<Long>& perm_r = P_r.perm;
  404. Vector<ValueType>& scal_r = P_r.scal;
  405. for (Long i = 0; i < size; i++) {
  406. perm_r[perm[i]] = i;
  407. scal_r[perm[i]] = scal[i];
  408. }
  409. return P_r;
  410. }
  411. template <class ValueType> Permutation<ValueType> Permutation<ValueType>::operator*(const Permutation<ValueType>& P) {
  412. Long size = perm.Dim();
  413. assert(P.Dim() == size);
  414. Permutation<ValueType> P_r(size);
  415. Vector<Long>& perm_r = P_r.perm;
  416. Vector<ValueType>& scal_r = P_r.scal;
  417. for (Long i = 0; i < size; i++) {
  418. perm_r[i] = perm[P.perm[i]];
  419. scal_r[i] = scal[P.perm[i]] * P.scal[i];
  420. }
  421. return P_r;
  422. }
  423. template <class ValueType> Matrix<ValueType> Permutation<ValueType>::operator*(const Matrix<ValueType>& M) {
  424. if (Dim() == 0) return M;
  425. assert(M.Dim(0) == Dim());
  426. Long d0 = M.Dim(0);
  427. Long d1 = M.Dim(1);
  428. Matrix<ValueType> M_r(d0, d1, NULL);
  429. for (Long i = 0; i < d0; i++) {
  430. const ValueType s = scal[i];
  431. ConstIterator<ValueType> M_ = M[i];
  432. Iterator<ValueType> M_r_ = M_r[perm[i]];
  433. for (Long j = 0; j < d1; j++) M_r_[j] = M_[j] * s;
  434. }
  435. return M_r;
  436. }
  437. template <class ValueType> Matrix<ValueType> operator*(const Matrix<ValueType>& M, const Permutation<ValueType>& P) {
  438. if (P.Dim() == 0) return M;
  439. assert(M.Dim(1) == P.Dim());
  440. Long d0 = M.Dim(0);
  441. Long d1 = M.Dim(1);
  442. Matrix<ValueType> M_r(d0, d1, NULL);
  443. for (Long i = 0; i < d0; i++) {
  444. ConstIterator<Long> perm_ = P.perm.Begin();
  445. ConstIterator<ValueType> scal_ = P.scal.Begin();
  446. ConstIterator<ValueType> M_ = M[i];
  447. Iterator<ValueType> M_r_ = M_r[i];
  448. for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j];
  449. }
  450. return M_r;
  451. }
  452. } // end namespace