matrix.txx 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635
  1. /**
  2. * \file matrix.txx
  3. * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
  4. * \date 2-11-2011
  5. * \brief This file contains inplementation of the class Matrix.
  6. */
  7. #include <omp.h>
  8. #include <cmath>
  9. #include <cstdlib>
  10. #include <cassert>
  11. #include <iostream>
  12. #include <iomanip>
  13. #include <device_wrapper.hpp>
  14. #include <mat_utils.hpp>
  15. #include <mem_mgr.hpp>
  16. #include <profile.hpp>
  17. namespace pvfmm{
  18. template <class T>
  19. std::ostream& operator<<(std::ostream& output, const Matrix<T>& M){
  20. std::ios::fmtflags f(std::cout.flags());
  21. output<<std::fixed<<std::setprecision(4)<<std::setiosflags(std::ios::left);
  22. for(size_t i=0;i<M.Dim(0);i++){
  23. for(size_t j=0;j<M.Dim(1);j++){
  24. float f=((float)M(i,j));
  25. if(pvfmm::fabs<T>(f)<1e-25) f=0;
  26. output<<std::setw(10)<<((double)f)<<' ';
  27. }
  28. output<<";\n";
  29. }
  30. std::cout.flags(f);
  31. return output;
  32. }
  33. template <class T>
  34. Matrix<T>::Matrix(){
  35. dim[0]=0;
  36. dim[1]=0;
  37. own_data=true;
  38. data_ptr=NULL;
  39. dev.dev_ptr=(uintptr_t)NULL;
  40. }
  41. template <class T>
  42. Matrix<T>::Matrix(size_t dim1, size_t dim2, T* data_, bool own_data_){
  43. dim[0]=dim1;
  44. dim[1]=dim2;
  45. own_data=own_data_;
  46. if(own_data){
  47. if(dim[0]*dim[1]>0){
  48. data_ptr=mem::aligned_new<T>(dim[0]*dim[1]);
  49. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  50. Profile::Add_MEM(dim[0]*dim[1]*sizeof(T));
  51. #endif
  52. if(data_!=NULL) mem::memcopy(data_ptr,data_,dim[0]*dim[1]*sizeof(T));
  53. }else data_ptr=NULL;
  54. }else
  55. data_ptr=data_;
  56. dev.dev_ptr=(uintptr_t)NULL;
  57. }
  58. template <class T>
  59. Matrix<T>::Matrix(const Matrix<T>& M){
  60. dim[0]=M.dim[0];
  61. dim[1]=M.dim[1];
  62. own_data=true;
  63. if(dim[0]*dim[1]>0){
  64. data_ptr=mem::aligned_new<T>(dim[0]*dim[1]);
  65. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  66. Profile::Add_MEM(dim[0]*dim[1]*sizeof(T));
  67. #endif
  68. mem::memcopy(data_ptr,M.data_ptr,dim[0]*dim[1]*sizeof(T));
  69. }else
  70. data_ptr=NULL;
  71. dev.dev_ptr=(uintptr_t)NULL;
  72. }
  73. template <class T>
  74. Matrix<T>::~Matrix(){
  75. FreeDevice(false);
  76. if(own_data){
  77. if(data_ptr!=NULL){
  78. mem::aligned_delete(data_ptr);
  79. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  80. Profile::Add_MEM(-dim[0]*dim[1]*sizeof(T));
  81. #endif
  82. }
  83. }
  84. data_ptr=NULL;
  85. dim[0]=0;
  86. dim[1]=0;
  87. }
  88. template <class T>
  89. void Matrix<T>::Swap(Matrix<T>& M){
  90. size_t dim_[2]={dim[0],dim[1]};
  91. T* data_ptr_=data_ptr;
  92. bool own_data_=own_data;
  93. Device dev_=dev;
  94. Vector<char> dev_sig_=dev_sig;
  95. dim[0]=M.dim[0];
  96. dim[1]=M.dim[1];
  97. data_ptr=M.data_ptr;
  98. own_data=M.own_data;
  99. dev=M.dev;
  100. dev_sig=M.dev_sig;
  101. M.dim[0]=dim_[0];
  102. M.dim[1]=dim_[1];
  103. M.data_ptr=data_ptr_;
  104. M.own_data=own_data_;
  105. M.dev=dev_;
  106. M.dev_sig=dev_sig_;
  107. }
  108. template <class T>
  109. void Matrix<T>::ReInit(size_t dim1, size_t dim2, T* data_, bool own_data_){
  110. if(own_data_ && own_data && dim[0]*dim[1]>=dim1*dim2){
  111. if(dim[0]*dim[1]!=dim1*dim2) FreeDevice(false);
  112. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  113. Profile::Add_MEM((dim1*dim2-dim[0]*dim[1])*sizeof(T));
  114. #endif
  115. dim[0]=dim1; dim[1]=dim2;
  116. if(data_) mem::memcopy(data_ptr,data_,dim[0]*dim[1]*sizeof(T));
  117. }else{
  118. Matrix<T> tmp(dim1,dim2,data_,own_data_);
  119. this->Swap(tmp);
  120. }
  121. }
  122. template <class T>
  123. typename Matrix<T>::Device& Matrix<T>::AllocDevice(bool copy){
  124. size_t len=dim[0]*dim[1];
  125. if(dev.dev_ptr==(uintptr_t)NULL && len>0) // Allocate data on device.
  126. dev.dev_ptr=DeviceWrapper::alloc_device((char*)data_ptr, len*sizeof(T));
  127. if(dev.dev_ptr!=(uintptr_t)NULL && copy) // Copy data to device
  128. dev.lock_idx=DeviceWrapper::host2device((char*)data_ptr,(char*)data_ptr,dev.dev_ptr,len*sizeof(T));
  129. dev.dim[0]=dim[0];
  130. dev.dim[1]=dim[1];
  131. return dev;
  132. }
  133. template <class T>
  134. void Matrix<T>::Device2Host(T* host_ptr){
  135. dev.lock_idx=DeviceWrapper::device2host((char*)data_ptr,dev.dev_ptr,(char*)(host_ptr==NULL?data_ptr:host_ptr),dim[0]*dim[1]*sizeof(T));
  136. //#if defined(PVFMM_HAVE_CUDA)
  137. // cudaEventCreate(&lock);
  138. // cudaEventRecord(lock, 0);
  139. //#endif
  140. }
  141. template <class T>
  142. void Matrix<T>::Device2HostWait(){
  143. //#if defined(PVFMM_HAVE_CUDA)
  144. // cudaEventSynchronize(lock);
  145. // cudaEventDestroy(lock);
  146. //#endif
  147. DeviceWrapper::wait(dev.lock_idx);
  148. dev.lock_idx=-1;
  149. }
  150. template <class T>
  151. void Matrix<T>::FreeDevice(bool copy){
  152. if(dev.dev_ptr==(uintptr_t)NULL) return;
  153. if(copy) DeviceWrapper::device2host((char*)data_ptr,dev.dev_ptr,(char*)data_ptr,dim[0]*dim[1]*sizeof(T));
  154. DeviceWrapper::free_device((char*)data_ptr, dev.dev_ptr);
  155. dev.dev_ptr=(uintptr_t)NULL;
  156. dev.dim[0]=0;
  157. dev.dim[1]=0;
  158. }
  159. template <class T>
  160. void Matrix<T>::Write(const char* fname){
  161. FILE* f1=fopen(fname,"wb+");
  162. if(f1==NULL){
  163. std::cout<<"Unable to open file for writing:"<<fname<<'\n';
  164. return;
  165. }
  166. uint32_t dim_[2]={(uint32_t)dim[0],(uint32_t)dim[1]};
  167. fwrite(dim_,sizeof(uint32_t),2,f1);
  168. fwrite(data_ptr,sizeof(T),dim[0]*dim[1],f1);
  169. fclose(f1);
  170. }
  171. template <class T>
  172. void Matrix<T>::Read(const char* fname){
  173. FILE* f1=fopen(fname,"r");
  174. if(f1==NULL){
  175. std::cout<<"Unable to open file for reading:"<<fname<<'\n';
  176. return;
  177. }
  178. uint32_t dim_[2];
  179. size_t readlen=fread (dim_, sizeof(uint32_t), 2, f1);
  180. assert(readlen==2);
  181. ReInit(dim_[0],dim_[1]);
  182. readlen=fread(data_ptr,sizeof(T),dim[0]*dim[1],f1);
  183. assert(readlen==dim[0]*dim[1]);
  184. fclose(f1);
  185. }
  186. template <class T>
  187. size_t Matrix<T>::Dim(size_t i) const{
  188. return dim[i];
  189. }
  190. template <class T>
  191. void Matrix<T>::Resize(size_t i, size_t j){
  192. ReInit(i,j);
  193. }
  194. template <class T>
  195. void Matrix<T>::SetZero(){
  196. if(dim[0]*dim[1])
  197. memset(data_ptr,0,dim[0]*dim[1]*sizeof(T));
  198. }
  199. template <class ValueType>
  200. ValueType* Matrix<ValueType>::Begin(){
  201. return data_ptr;
  202. }
  203. template <class ValueType>
  204. const ValueType* Matrix<ValueType>::Begin() const{
  205. return data_ptr;
  206. }
  207. template <class T>
  208. Matrix<T>& Matrix<T>::operator=(const Matrix<T>& M){
  209. if(this!=&M){
  210. if(dim[0]*dim[1]!=M.dim[0]*M.dim[1]) FreeDevice(false);
  211. if(dim[0]*dim[1]<M.dim[0]*M.dim[1]){
  212. ReInit(M.dim[0],M.dim[1]);
  213. }
  214. dim[0]=M.dim[0]; dim[1]=M.dim[1];
  215. mem::memcopy(data_ptr,M.data_ptr,dim[0]*dim[1]*sizeof(T));
  216. }
  217. return *this;
  218. }
  219. template <class T>
  220. Matrix<T>& Matrix<T>::operator+=(const Matrix<T>& M){
  221. assert(M.Dim(0)==Dim(0) && M.Dim(1)==Dim(1));
  222. Profile::Add_FLOP(dim[0]*dim[1]);
  223. for(size_t i=0;i<M.Dim(0)*M.Dim(1);i++)
  224. data_ptr[i]+=M.data_ptr[i];
  225. return *this;
  226. }
  227. template <class T>
  228. Matrix<T>& Matrix<T>::operator-=(const Matrix<T>& M){
  229. assert(M.Dim(0)==Dim(0) && M.Dim(1)==Dim(1));
  230. Profile::Add_FLOP(dim[0]*dim[1]);
  231. for(size_t i=0;i<M.Dim(0)*M.Dim(1);i++)
  232. data_ptr[i]-=M.data_ptr[i];
  233. return *this;
  234. }
  235. template <class T>
  236. Matrix<T> Matrix<T>::operator+(const Matrix<T>& M2){
  237. Matrix<T>& M1=*this;
  238. assert(M2.Dim(0)==M1.Dim(0) && M2.Dim(1)==M1.Dim(1));
  239. Profile::Add_FLOP(dim[0]*dim[1]);
  240. Matrix<T> M_r(M1.Dim(0),M1.Dim(1),NULL);
  241. for(size_t i=0;i<M1.Dim(0)*M1.Dim(1);i++)
  242. M_r[0][i]=M1[0][i]+M2[0][i];
  243. return M_r;
  244. }
  245. template <class T>
  246. Matrix<T> Matrix<T>::operator-(const Matrix<T>& M2){
  247. Matrix<T>& M1=*this;
  248. assert(M2.Dim(0)==M1.Dim(0) && M2.Dim(1)==M1.Dim(1));
  249. Profile::Add_FLOP(dim[0]*dim[1]);
  250. Matrix<T> M_r(M1.Dim(0),M1.Dim(1),NULL);
  251. for(size_t i=0;i<M1.Dim(0)*M1.Dim(1);i++)
  252. M_r[0][i]=M1[0][i]-M2[0][i];
  253. return M_r;
  254. }
  255. template <class T>
  256. inline const T& Matrix<T>::operator()(size_t i,size_t j) const{
  257. assert(i<dim[0] && j<dim[1]);
  258. return data_ptr[i*dim[1]+j];
  259. }
  260. template <class T>
  261. inline T* Matrix<T>::operator[](size_t i){
  262. assert(i<dim[0]);
  263. return &data_ptr[i*dim[1]];
  264. }
  265. template <class T>
  266. inline const T* Matrix<T>::operator[](size_t i) const{
  267. assert(i<dim[0]);
  268. return &data_ptr[i*dim[1]];
  269. }
  270. template <class T>
  271. Matrix<T> Matrix<T>::operator*(const Matrix<T>& M){
  272. assert(dim[1]==M.dim[0]);
  273. Profile::Add_FLOP(2*(((long long)dim[0])*dim[1])*M.dim[1]);
  274. Matrix<T> M_r(dim[0],M.dim[1],NULL);
  275. if(M.Dim(0)*M.Dim(1)==0 || this->Dim(0)*this->Dim(1)==0) return M_r;
  276. mat::gemm<T>('N','N',M.dim[1],dim[0],dim[1],
  277. 1.0,M.data_ptr,M.dim[1],data_ptr,dim[1],0.0,M_r.data_ptr,M_r.dim[1]);
  278. return M_r;
  279. }
  280. template <class T>
  281. void Matrix<T>::GEMM(Matrix<T>& M_r, const Matrix<T>& A, const Matrix<T>& B, T beta){
  282. if(A.Dim(0)*A.Dim(1)==0 || B.Dim(0)*B.Dim(1)==0) return;
  283. assert(A.dim[1]==B.dim[0]);
  284. assert(M_r.dim[0]==A.dim[0]);
  285. assert(M_r.dim[1]==B.dim[1]);
  286. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  287. Profile::Add_FLOP(2*(((long long)A.dim[0])*A.dim[1])*B.dim[1]);
  288. #endif
  289. mat::gemm<T>('N','N',B.dim[1],A.dim[0],A.dim[1],
  290. 1.0,B.data_ptr,B.dim[1],A.data_ptr,A.dim[1],beta,M_r.data_ptr,M_r.dim[1]);
  291. }
  292. // cublasgemm wrapper
  293. #if defined(PVFMM_HAVE_CUDA)
  294. template <class T>
  295. void Matrix<T>::CUBLASGEMM(Matrix<T>& M_r, const Matrix<T>& A, const Matrix<T>& B, T beta){
  296. if(A.Dim(0)*A.Dim(1)==0 || B.Dim(0)*B.Dim(1)==0) return;
  297. assert(A.dim[1]==B.dim[0]);
  298. assert(M_r.dim[0]==A.dim[0]);
  299. assert(M_r.dim[1]==B.dim[1]);
  300. Profile::Add_FLOP(2*(((long long)A.dim[0])*A.dim[1])*B.dim[1]);
  301. mat::cublasgemm('N', 'N', B.dim[1], A.dim[0], A.dim[1],
  302. (T)1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  303. }
  304. #endif
  305. #define myswap(t,a,b) {t c=a;a=b;b=c;}
  306. template <class T>
  307. void Matrix<T>::RowPerm(const Permutation<T>& P){
  308. Matrix<T>& M=*this;
  309. if(P.Dim()==0) return;
  310. assert(M.Dim(0)==P.Dim());
  311. size_t d0=M.Dim(0);
  312. size_t d1=M.Dim(1);
  313. #pragma omp parallel for
  314. for(size_t i=0;i<d0;i++){
  315. T* M_=M[i];
  316. const T s=P.scal[i];
  317. for(size_t j=0;j<d1;j++) M_[j]*=s;
  318. }
  319. Permutation<T> P_=P;
  320. for(size_t i=0;i<d0;i++)
  321. while(P_.perm[i]!=i){
  322. size_t a=P_.perm[i];
  323. size_t b=i;
  324. T* M_a=M[a];
  325. T* M_b=M[b];
  326. myswap(size_t,P_.perm[a],P_.perm[b]);
  327. for(size_t j=0;j<d1;j++)
  328. myswap(T,M_a[j],M_b[j]);
  329. }
  330. }
  331. template <class T>
  332. void Matrix<T>::ColPerm(const Permutation<T>& P){
  333. Matrix<T>& M=*this;
  334. if(P.Dim()==0) return;
  335. assert(M.Dim(1)==P.Dim());
  336. size_t d0=M.Dim(0);
  337. size_t d1=M.Dim(1);
  338. int omp_p=omp_get_max_threads();
  339. Matrix<T> M_buff(omp_p,d1);
  340. const size_t* perm_=&(P.perm[0]);
  341. const T* scal_=&(P.scal[0]);
  342. #pragma omp parallel for
  343. for(size_t i=0;i<d0;i++){
  344. int pid=omp_get_thread_num();
  345. T* buff=&M_buff[pid][0];
  346. T* M_=M[i];
  347. for(size_t j=0;j<d1;j++)
  348. buff[j]=M_[j];
  349. for(size_t j=0;j<d1;j++){
  350. M_[j]=buff[perm_[j]]*scal_[j];
  351. }
  352. }
  353. }
  354. #undef myswap
  355. #define B1 128
  356. #define B2 32
  357. template <class T>
  358. Matrix<T> Matrix<T>::Transpose(){
  359. Matrix<T>& M=*this;
  360. size_t d0=M.dim[0];
  361. size_t d1=M.dim[1];
  362. Matrix<T> M_r(d1,d0,NULL);
  363. const size_t blk0=((d0+B1-1)/B1);
  364. const size_t blk1=((d1+B1-1)/B1);
  365. const size_t blks=blk0*blk1;
  366. // #pragma omp parallel for
  367. for(size_t k=0;k<blks;k++){
  368. size_t i=(k%blk0)*B1;
  369. size_t j=(k/blk0)*B1;
  370. // for(size_t i=0;i<d0;i+=B1)
  371. // for(size_t j=0;j<d1;j+=B1){
  372. size_t d0_=i+B1; if(d0_>=d0) d0_=d0;
  373. size_t d1_=j+B1; if(d1_>=d1) d1_=d1;
  374. for(size_t ii=i;ii<d0_;ii+=B2)
  375. for(size_t jj=j;jj<d1_;jj+=B2){
  376. size_t d0__=ii+B2; if(d0__>=d0) d0__=d0;
  377. size_t d1__=jj+B2; if(d1__>=d1) d1__=d1;
  378. for(size_t iii=ii;iii<d0__;iii++)
  379. for(size_t jjj=jj;jjj<d1__;jjj++){
  380. M_r[jjj][iii]=M[iii][jjj];
  381. }
  382. }
  383. }
  384. // for(size_t i=0;i<d0;i++)
  385. // for(size_t j=0;j<d1;j++)
  386. // M_r[j][i]=M[i][j];
  387. return M_r;
  388. }
  389. template <class T>
  390. void Matrix<T>::Transpose(Matrix<T>& M_r, const Matrix<T>& M){
  391. size_t d0=M.dim[0];
  392. size_t d1=M.dim[1];
  393. M_r.Resize(d1, d0);
  394. const size_t blk0=((d0+B1-1)/B1);
  395. const size_t blk1=((d1+B1-1)/B1);
  396. const size_t blks=blk0*blk1;
  397. #pragma omp parallel for
  398. for(size_t k=0;k<blks;k++){
  399. size_t i=(k%blk0)*B1;
  400. size_t j=(k/blk0)*B1;
  401. // for(size_t i=0;i<d0;i+=B1)
  402. // for(size_t j=0;j<d1;j+=B1){
  403. size_t d0_=i+B1; if(d0_>=d0) d0_=d0;
  404. size_t d1_=j+B1; if(d1_>=d1) d1_=d1;
  405. for(size_t ii=i;ii<d0_;ii+=B2)
  406. for(size_t jj=j;jj<d1_;jj+=B2){
  407. size_t d0__=ii+B2; if(d0__>=d0) d0__=d0;
  408. size_t d1__=jj+B2; if(d1__>=d1) d1__=d1;
  409. for(size_t iii=ii;iii<d0__;iii++)
  410. for(size_t jjj=jj;jjj<d1__;jjj++){
  411. M_r[jjj][iii]=M[iii][jjj];
  412. }
  413. }
  414. }
  415. }
  416. #undef B2
  417. #undef B1
  418. template <class T>
  419. void Matrix<T>::SVD(Matrix<T>& tU, Matrix<T>& tS, Matrix<T>& tVT){
  420. pvfmm::Matrix<T>& M=*this;
  421. pvfmm::Matrix<T> M_=M;
  422. int n=M.Dim(0);
  423. int m=M.Dim(1);
  424. int k = (m<n?m:n);
  425. tU.Resize(n,k); tU.SetZero();
  426. tS.Resize(k,k); tS.SetZero();
  427. tVT.Resize(k,m); tVT.SetZero();
  428. //SVD
  429. int INFO=0;
  430. char JOBU = 'S';
  431. char JOBVT = 'S';
  432. int wssize = 3*(m<n?m:n)+(m>n?m:n);
  433. int wssize1 = 5*(m<n?m:n);
  434. wssize = (wssize>wssize1?wssize:wssize1);
  435. T* wsbuf = mem::aligned_new<T>(wssize);
  436. pvfmm::mat::svd(&JOBU, &JOBVT, &m, &n, &M[0][0], &m, &tS[0][0], &tVT[0][0], &m, &tU[0][0], &k, wsbuf, &wssize, &INFO);
  437. mem::aligned_delete<T>(wsbuf);
  438. if(INFO!=0) std::cout<<INFO<<'\n';
  439. assert(INFO==0);
  440. for(size_t i=1;i<k;i++){
  441. tS[i][i]=tS[0][i];
  442. tS[0][i]=0;
  443. }
  444. //std::cout<<tU*tS*tVT-M_<<'\n';
  445. }
  446. template <class T>
  447. Matrix<T> Matrix<T>::pinv(T eps){
  448. if(eps<0){
  449. eps=1.0;
  450. while(eps+(T)1.0>1.0) eps*=0.5;
  451. eps=pvfmm::sqrt<T>(eps);
  452. }
  453. Matrix<T> M_r(dim[1],dim[0]);
  454. mat::pinv(data_ptr,dim[0],dim[1],eps,M_r.data_ptr);
  455. this->Resize(0,0);
  456. return M_r;
  457. }
  458. template <class T>
  459. std::ostream& operator<<(std::ostream& output, const Permutation<T>& P){
  460. output<<std::setprecision(4)<<std::setiosflags(std::ios::left);
  461. size_t size=P.perm.Dim();
  462. for(size_t i=0;i<size;i++) output<<std::setw(10)<<P.perm[i]<<' ';
  463. output<<";\n";
  464. for(size_t i=0;i<size;i++) output<<std::setw(10)<<P.scal[i]<<' ';
  465. output<<";\n";
  466. return output;
  467. }
  468. template <class T>
  469. Permutation<T>::Permutation(size_t size){
  470. perm.Resize(size);
  471. scal.Resize(size);
  472. for(size_t i=0;i<size;i++){
  473. perm[i]=i;
  474. scal[i]=1.0;
  475. }
  476. }
  477. template <class T>
  478. Permutation<T> Permutation<T>::RandPerm(size_t size){
  479. Permutation<T> P(size);
  480. for(size_t i=0;i<size;i++){
  481. P.perm[i]=rand()%size;
  482. for(size_t j=0;j<i;j++)
  483. if(P.perm[i]==P.perm[j]){ i--; break; }
  484. P.scal[i]=((T)rand())/RAND_MAX;
  485. }
  486. return P;
  487. }
  488. template <class T>
  489. Matrix<T> Permutation<T>::GetMatrix() const{
  490. size_t size=perm.Dim();
  491. Matrix<T> M_r(size,size,NULL);
  492. for(size_t i=0;i<size;i++)
  493. for(size_t j=0;j<size;j++)
  494. M_r[i][j]=(perm[j]==i?scal[j]:0.0);
  495. return M_r;
  496. }
  497. template <class T>
  498. size_t Permutation<T>::Dim() const{
  499. return perm.Dim();
  500. }
  501. template <class T>
  502. Permutation<T> Permutation<T>::Transpose(){
  503. size_t size=perm.Dim();
  504. Permutation<T> P_r(size);
  505. Vector<PERM_INT_T>& perm_r=P_r.perm;
  506. Vector<T>& scal_r=P_r.scal;
  507. for(size_t i=0;i<size;i++){
  508. perm_r[perm[i]]=i;
  509. scal_r[perm[i]]=scal[i];
  510. }
  511. return P_r;
  512. }
  513. template <class T>
  514. Permutation<T> Permutation<T>::operator*(const Permutation<T>& P){
  515. size_t size=perm.Dim();
  516. assert(P.Dim()==size);
  517. Permutation<T> P_r(size);
  518. Vector<PERM_INT_T>& perm_r=P_r.perm;
  519. Vector<T>& scal_r=P_r.scal;
  520. for(size_t i=0;i<size;i++){
  521. perm_r[i]=perm[P.perm[i]];
  522. scal_r[i]=scal[P.perm[i]]*P.scal[i];
  523. }
  524. return P_r;
  525. }
  526. template <class T>
  527. Matrix<T> Permutation<T>::operator*(const Matrix<T>& M){
  528. if(Dim()==0) return M;
  529. assert(M.Dim(0)==Dim());
  530. size_t d0=M.Dim(0);
  531. size_t d1=M.Dim(1);
  532. Matrix<T> M_r(d0,d1,NULL);
  533. for(size_t i=0;i<d0;i++){
  534. const T s=scal[i];
  535. const T* M_=M[i];
  536. T* M_r_=M_r[perm[i]];
  537. for(size_t j=0;j<d1;j++)
  538. M_r_[j]=M_[j]*s;
  539. }
  540. return M_r;
  541. }
  542. template <class T>
  543. Matrix<T> operator*(const Matrix<T>& M, const Permutation<T>& P){
  544. if(P.Dim()==0) return M;
  545. assert(M.Dim(1)==P.Dim());
  546. size_t d0=M.Dim(0);
  547. size_t d1=M.Dim(1);
  548. Matrix<T> M_r(d0,d1,NULL);
  549. for(size_t i=0;i<d0;i++){
  550. const PERM_INT_T* perm_=&(P.perm[0]);
  551. const T* scal_=&(P.scal[0]);
  552. const T* M_=M[i];
  553. T* M_r_=M_r[i];
  554. for(size_t j=0;j<d1;j++)
  555. M_r_[j]=M_[perm_[j]]*scal_[j];
  556. }
  557. return M_r;
  558. }
  559. }//end namespace