matrix.txx 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. /**
  2. * \file matrix.txx
  3. * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
  4. * \date 2-11-2011
  5. * \brief This file contains inplementation of the class Matrix.
  6. */
  7. #include <omp.h>
  8. #include <cmath>
  9. #include <cstdlib>
  10. #include <cassert>
  11. #include <iostream>
  12. #include <iomanip>
  13. #include <device_wrapper.hpp>
  14. #include <mat_utils.hpp>
  15. #include <mem_mgr.hpp>
  16. #include <profile.hpp>
  17. namespace pvfmm{
  18. template <class T>
  19. std::ostream& operator<<(std::ostream& output, const Matrix<T>& M){
  20. std::ios::fmtflags f(std::cout.flags());
  21. output<<std::fixed<<std::setprecision(4)<<std::setiosflags(std::ios::left);
  22. for(size_t i=0;i<M.Dim(0);i++){
  23. for(size_t j=0;j<M.Dim(1);j++){
  24. float f=((float)M(i,j));
  25. if(fabs(f)<1e-25) f=0;
  26. output<<std::setw(10)<<((double)f)<<' ';
  27. }
  28. output<<";\n";
  29. }
  30. std::cout.flags(f);
  31. return output;
  32. }
  33. template <class T>
  34. Matrix<T>::Matrix(){
  35. dim[0]=0;
  36. dim[1]=0;
  37. own_data=true;
  38. data_ptr=NULL;
  39. dev.dev_ptr=(uintptr_t)NULL;
  40. }
  41. template <class T>
  42. Matrix<T>::Matrix(size_t dim1, size_t dim2, T* data_, bool own_data_){
  43. dim[0]=dim1;
  44. dim[1]=dim2;
  45. own_data=own_data_;
  46. if(own_data){
  47. if(dim[0]*dim[1]>0){
  48. data_ptr=mem::aligned_new<T>(dim[0]*dim[1]);
  49. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  50. Profile::Add_MEM(dim[0]*dim[1]*sizeof(T));
  51. #endif
  52. if(data_!=NULL) mem::memcopy(data_ptr,data_,dim[0]*dim[1]*sizeof(T));
  53. }else data_ptr=NULL;
  54. }else
  55. data_ptr=data_;
  56. dev.dev_ptr=(uintptr_t)NULL;
  57. }
  58. template <class T>
  59. Matrix<T>::Matrix(const Matrix<T>& M){
  60. dim[0]=M.dim[0];
  61. dim[1]=M.dim[1];
  62. own_data=true;
  63. if(dim[0]*dim[1]>0){
  64. data_ptr=mem::aligned_new<T>(dim[0]*dim[1]);
  65. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  66. Profile::Add_MEM(dim[0]*dim[1]*sizeof(T));
  67. #endif
  68. mem::memcopy(data_ptr,M.data_ptr,dim[0]*dim[1]*sizeof(T));
  69. }else
  70. data_ptr=NULL;
  71. dev.dev_ptr=(uintptr_t)NULL;
  72. }
  73. template <class T>
  74. Matrix<T>::~Matrix(){
  75. FreeDevice(false);
  76. if(own_data){
  77. if(data_ptr!=NULL){
  78. mem::aligned_delete(data_ptr);
  79. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  80. Profile::Add_MEM(-dim[0]*dim[1]*sizeof(T));
  81. #endif
  82. }
  83. }
  84. data_ptr=NULL;
  85. dim[0]=0;
  86. dim[1]=0;
  87. }
  88. template <class T>
  89. void Matrix<T>::Swap(Matrix<T>& M){
  90. size_t dim_[2]={dim[0],dim[1]};
  91. T* data_ptr_=data_ptr;
  92. bool own_data_=own_data;
  93. Device dev_=dev;
  94. Vector<char> dev_sig_=dev_sig;
  95. dim[0]=M.dim[0];
  96. dim[1]=M.dim[1];
  97. data_ptr=M.data_ptr;
  98. own_data=M.own_data;
  99. dev=M.dev;
  100. dev_sig=M.dev_sig;
  101. M.dim[0]=dim_[0];
  102. M.dim[1]=dim_[1];
  103. M.data_ptr=data_ptr_;
  104. M.own_data=own_data_;
  105. M.dev=dev_;
  106. M.dev_sig=dev_sig_;
  107. }
  108. template <class T>
  109. void Matrix<T>::ReInit(size_t dim1, size_t dim2, T* data_, bool own_data_){
  110. Matrix<T> tmp(dim1,dim2,data_,own_data_);
  111. this->Swap(tmp);
  112. }
  113. template <class T>
  114. typename Matrix<T>::Device& Matrix<T>::AllocDevice(bool copy){
  115. size_t len=dim[0]*dim[1];
  116. if(dev.dev_ptr==(uintptr_t)NULL && len>0) // Allocate data on device.
  117. dev.dev_ptr=DeviceWrapper::alloc_device((char*)data_ptr, len*sizeof(T));
  118. if(dev.dev_ptr!=(uintptr_t)NULL && copy) // Copy data to device
  119. dev.lock_idx=DeviceWrapper::host2device((char*)data_ptr,(char*)data_ptr,dev.dev_ptr,len*sizeof(T));
  120. dev.dim[0]=dim[0];
  121. dev.dim[1]=dim[1];
  122. return dev;
  123. }
  124. template <class T>
  125. void Matrix<T>::Device2Host(T* host_ptr){
  126. dev.lock_idx=DeviceWrapper::device2host((char*)data_ptr,dev.dev_ptr,(char*)(host_ptr==NULL?data_ptr:host_ptr),dim[0]*dim[1]*sizeof(T));
  127. //#if defined(PVFMM_HAVE_CUDA)
  128. // cudaEventCreate(&lock);
  129. // cudaEventRecord(lock, 0);
  130. //#endif
  131. }
  132. template <class T>
  133. void Matrix<T>::Device2HostWait(){
  134. //#if defined(PVFMM_HAVE_CUDA)
  135. // cudaEventSynchronize(lock);
  136. // cudaEventDestroy(lock);
  137. //#endif
  138. DeviceWrapper::wait(dev.lock_idx);
  139. dev.lock_idx=-1;
  140. }
  141. template <class T>
  142. void Matrix<T>::FreeDevice(bool copy){
  143. if(dev.dev_ptr==(uintptr_t)NULL) return;
  144. if(copy) DeviceWrapper::device2host((char*)data_ptr,dev.dev_ptr,(char*)data_ptr,dim[0]*dim[1]*sizeof(T));
  145. DeviceWrapper::free_device((char*)data_ptr, dev.dev_ptr);
  146. dev.dev_ptr=(uintptr_t)NULL;
  147. dev.dim[0]=0;
  148. dev.dim[1]=0;
  149. }
  150. template <class T>
  151. void Matrix<T>::Write(const char* fname){
  152. FILE* f1=fopen(fname,"wb+");
  153. if(f1==NULL){
  154. std::cout<<"Unable to open file for writing:"<<fname<<'\n';
  155. return;
  156. }
  157. uint32_t dim_[2]={dim[0],dim[1]};
  158. fwrite(dim_,sizeof(uint32_t),2,f1);
  159. fwrite(data_ptr,sizeof(T),dim[0]*dim[1],f1);
  160. fclose(f1);
  161. }
  162. template <class T>
  163. void Matrix<T>::Read(const char* fname){
  164. FILE* f1=fopen(fname,"r");
  165. if(f1==NULL){
  166. std::cout<<"Unable to open file for reading:"<<fname<<'\n';
  167. return;
  168. }
  169. uint32_t dim_[2];
  170. size_t readlen=fread (dim_, sizeof(uint32_t), 2, f1);
  171. assert(readlen==2);
  172. ReInit(dim_[0],dim_[1]);
  173. readlen=fread(data_ptr,sizeof(T),dim[0]*dim[1],f1);
  174. assert(readlen==dim[0]*dim[1]);
  175. fclose(f1);
  176. }
  177. template <class T>
  178. size_t Matrix<T>::Dim(size_t i) const{
  179. return dim[i];
  180. }
  181. template <class T>
  182. void Matrix<T>::Resize(size_t i, size_t j){
  183. if(dim[0]==i && dim[1]==j) return;
  184. FreeDevice(false);
  185. if(own_data){
  186. if(data_ptr!=NULL){
  187. mem::aligned_delete(data_ptr);
  188. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  189. Profile::Add_MEM(-dim[0]*dim[1]*sizeof(T));
  190. #endif
  191. }
  192. }
  193. dim[0]=i;
  194. dim[1]=j;
  195. if(own_data){
  196. if(dim[0]*dim[1]>0){
  197. data_ptr=mem::aligned_new<T>(dim[0]*dim[1]);
  198. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  199. Profile::Add_MEM(dim[0]*dim[1]*sizeof(T));
  200. #endif
  201. }else
  202. data_ptr=NULL;
  203. }
  204. }
  205. template <class T>
  206. void Matrix<T>::SetZero(){
  207. if(dim[0]*dim[1]>0)
  208. memset(data_ptr,0,dim[0]*dim[1]*sizeof(T));
  209. }
  210. template <class T>
  211. Matrix<T>& Matrix<T>::operator=(const Matrix<T>& M){
  212. if(this!=&M){
  213. FreeDevice(false);
  214. if(own_data && dim[0]*dim[1]!=M.dim[0]*M.dim[1]){
  215. if(data_ptr!=NULL){
  216. mem::aligned_delete(data_ptr); data_ptr=NULL;
  217. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  218. Profile::Add_MEM(-dim[0]*dim[1]*sizeof(T));
  219. #endif
  220. }
  221. if(M.dim[0]*M.dim[1]>0){
  222. data_ptr=mem::aligned_new<T>(M.dim[0]*M.dim[1]);
  223. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  224. Profile::Add_MEM(M.dim[0]*M.dim[1]*sizeof(T));
  225. #endif
  226. }
  227. }
  228. dim[0]=M.dim[0];
  229. dim[1]=M.dim[1];
  230. mem::memcopy(data_ptr,M.data_ptr,dim[0]*dim[1]*sizeof(T));
  231. }
  232. return *this;
  233. }
  234. template <class T>
  235. Matrix<T>& Matrix<T>::operator+=(const Matrix<T>& M){
  236. assert(M.Dim(0)==Dim(0) && M.Dim(1)==Dim(1));
  237. Profile::Add_FLOP(dim[0]*dim[1]);
  238. for(size_t i=0;i<M.Dim(0)*M.Dim(1);i++)
  239. data_ptr[i]+=M.data_ptr[i];
  240. return *this;
  241. }
  242. template <class T>
  243. Matrix<T>& Matrix<T>::operator-=(const Matrix<T>& M){
  244. assert(M.Dim(0)==Dim(0) && M.Dim(1)==Dim(1));
  245. Profile::Add_FLOP(dim[0]*dim[1]);
  246. for(size_t i=0;i<M.Dim(0)*M.Dim(1);i++)
  247. data_ptr[i]-=M.data_ptr[i];
  248. return *this;
  249. }
  250. template <class T>
  251. Matrix<T> Matrix<T>::operator+(const Matrix<T>& M2){
  252. Matrix<T>& M1=*this;
  253. assert(M2.Dim(0)==M1.Dim(0) && M2.Dim(1)==M1.Dim(1));
  254. Profile::Add_FLOP(dim[0]*dim[1]);
  255. Matrix<T> M_r(M1.Dim(0),M1.Dim(1),NULL);
  256. for(size_t i=0;i<M1.Dim(0)*M1.Dim(1);i++)
  257. M_r[0][i]=M1[0][i]+M2[0][i];
  258. return M_r;
  259. }
  260. template <class T>
  261. Matrix<T> Matrix<T>::operator-(const Matrix<T>& M2){
  262. Matrix<T>& M1=*this;
  263. assert(M2.Dim(0)==M1.Dim(0) && M2.Dim(1)==M1.Dim(1));
  264. Profile::Add_FLOP(dim[0]*dim[1]);
  265. Matrix<T> M_r(M1.Dim(0),M1.Dim(1),NULL);
  266. for(size_t i=0;i<M1.Dim(0)*M1.Dim(1);i++)
  267. M_r[0][i]=M1[0][i]-M2[0][i];
  268. return M_r;
  269. }
  270. template <class T>
  271. inline T& Matrix<T>::operator()(size_t i,size_t j) const{
  272. assert(i<dim[0] && j<dim[1]);
  273. return data_ptr[i*dim[1]+j];
  274. }
  275. template <class T>
  276. inline T* Matrix<T>::operator[](size_t i) const{
  277. assert(i<dim[0]);
  278. return &data_ptr[i*dim[1]];
  279. }
  280. template <class T>
  281. Matrix<T> Matrix<T>::operator*(const Matrix<T>& M){
  282. assert(dim[1]==M.dim[0]);
  283. Profile::Add_FLOP(2*(((long long)dim[0])*dim[1])*M.dim[1]);
  284. Matrix<T> M_r(dim[0],M.dim[1],NULL);
  285. if(M.Dim(0)*M.Dim(1)==0 || this->Dim(0)*this->Dim(1)==0) return M_r;
  286. mat::gemm<T>('N','N',M.dim[1],dim[0],dim[1],
  287. 1.0,M.data_ptr,M.dim[1],data_ptr,dim[1],0.0,M_r.data_ptr,M_r.dim[1]);
  288. return M_r;
  289. }
  290. template <class T>
  291. void Matrix<T>::GEMM(Matrix<T>& M_r, const Matrix<T>& A, const Matrix<T>& B, T beta){
  292. if(A.Dim(0)*A.Dim(1)==0 || B.Dim(0)*B.Dim(1)==0) return;
  293. assert(A.dim[1]==B.dim[0]);
  294. assert(M_r.dim[0]==A.dim[0]);
  295. assert(M_r.dim[1]==B.dim[1]);
  296. #if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
  297. Profile::Add_FLOP(2*(((long long)A.dim[0])*A.dim[1])*B.dim[1]);
  298. #endif
  299. mat::gemm<T>('N','N',B.dim[1],A.dim[0],A.dim[1],
  300. 1.0,B.data_ptr,B.dim[1],A.data_ptr,A.dim[1],beta,M_r.data_ptr,M_r.dim[1]);
  301. }
  302. // cublasgemm wrapper
  303. #if defined(PVFMM_HAVE_CUDA)
  304. template <class T>
  305. void Matrix<T>::CUBLASGEMM(Matrix<T>& M_r, const Matrix<T>& A, const Matrix<T>& B, T beta){
  306. if(A.Dim(0)*A.Dim(1)==0 || B.Dim(0)*B.Dim(1)==0) return;
  307. assert(A.dim[1]==B.dim[0]);
  308. assert(M_r.dim[0]==A.dim[0]);
  309. assert(M_r.dim[1]==B.dim[1]);
  310. Profile::Add_FLOP(2*(((long long)A.dim[0])*A.dim[1])*B.dim[1]);
  311. mat::cublasgemm('N', 'N', B.dim[1], A.dim[0], A.dim[1],
  312. (T)1.0, B.data_ptr, B.dim[1], A.data_ptr, A.dim[1], beta, M_r.data_ptr, M_r.dim[1]);
  313. }
  314. #endif
  315. #define myswap(t,a,b) {t c=a;a=b;b=c;}
  316. template <class T>
  317. void Matrix<T>::RowPerm(const Permutation<T>& P){
  318. Matrix<T>& M=*this;
  319. if(P.Dim()==0) return;
  320. assert(M.Dim(0)==P.Dim());
  321. size_t d0=M.Dim(0);
  322. size_t d1=M.Dim(1);
  323. #pragma omp parallel for
  324. for(size_t i=0;i<d0;i++){
  325. T* M_=M[i];
  326. const T s=P.scal[i];
  327. for(size_t j=0;j<d1;j++) M_[j]*=s;
  328. }
  329. Permutation<T> P_=P;
  330. for(size_t i=0;i<d0;i++)
  331. while(P_.perm[i]!=i){
  332. size_t a=P_.perm[i];
  333. size_t b=i;
  334. T* M_a=M[a];
  335. T* M_b=M[b];
  336. myswap(size_t,P_.perm[a],P_.perm[b]);
  337. for(size_t j=0;j<d1;j++)
  338. myswap(T,M_a[j],M_b[j]);
  339. }
  340. }
  341. template <class T>
  342. void Matrix<T>::ColPerm(const Permutation<T>& P){
  343. Matrix<T>& M=*this;
  344. if(P.Dim()==0) return;
  345. assert(M.Dim(1)==P.Dim());
  346. size_t d0=M.Dim(0);
  347. size_t d1=M.Dim(1);
  348. int omp_p=omp_get_max_threads();
  349. Matrix<T> M_buff(omp_p,d1);
  350. const size_t* perm_=&(P.perm[0]);
  351. const T* scal_=&(P.scal[0]);
  352. #pragma omp parallel for
  353. for(size_t i=0;i<d0;i++){
  354. int pid=omp_get_thread_num();
  355. T* buff=&M_buff[pid][0];
  356. T* M_=M[i];
  357. for(size_t j=0;j<d1;j++)
  358. buff[j]=M_[j];
  359. for(size_t j=0;j<d1;j++){
  360. M_[j]=buff[perm_[j]]*scal_[j];
  361. }
  362. }
  363. }
  364. #undef myswap
  365. #define B1 128
  366. #define B2 32
  367. template <class T>
  368. Matrix<T> Matrix<T>::Transpose(){
  369. Matrix<T>& M=*this;
  370. size_t d0=M.dim[0];
  371. size_t d1=M.dim[1];
  372. Matrix<T> M_r(d1,d0,NULL);
  373. const size_t blk0=((d0+B1-1)/B1);
  374. const size_t blk1=((d1+B1-1)/B1);
  375. const size_t blks=blk0*blk1;
  376. // #pragma omp parallel for
  377. for(size_t k=0;k<blks;k++){
  378. size_t i=(k%blk0)*B1;
  379. size_t j=(k/blk0)*B1;
  380. // for(size_t i=0;i<d0;i+=B1)
  381. // for(size_t j=0;j<d1;j+=B1){
  382. size_t d0_=i+B1; if(d0_>=d0) d0_=d0;
  383. size_t d1_=j+B1; if(d1_>=d1) d1_=d1;
  384. for(size_t ii=i;ii<d0_;ii+=B2)
  385. for(size_t jj=j;jj<d1_;jj+=B2){
  386. size_t d0__=ii+B2; if(d0__>=d0) d0__=d0;
  387. size_t d1__=jj+B2; if(d1__>=d1) d1__=d1;
  388. for(size_t iii=ii;iii<d0__;iii++)
  389. for(size_t jjj=jj;jjj<d1__;jjj++){
  390. M_r[jjj][iii]=M[iii][jjj];
  391. }
  392. }
  393. }
  394. // for(size_t i=0;i<d0;i++)
  395. // for(size_t j=0;j<d1;j++)
  396. // M_r[j][i]=M[i][j];
  397. return M_r;
  398. }
  399. template <class T>
  400. void Matrix<T>::Transpose(Matrix<T>& M_r, const Matrix<T>& M){
  401. size_t d0=M.dim[0];
  402. size_t d1=M.dim[1];
  403. M_r.Resize(d1, d0);
  404. const size_t blk0=((d0+B1-1)/B1);
  405. const size_t blk1=((d1+B1-1)/B1);
  406. const size_t blks=blk0*blk1;
  407. #pragma omp parallel for
  408. for(size_t k=0;k<blks;k++){
  409. size_t i=(k%blk0)*B1;
  410. size_t j=(k/blk0)*B1;
  411. // for(size_t i=0;i<d0;i+=B1)
  412. // for(size_t j=0;j<d1;j+=B1){
  413. size_t d0_=i+B1; if(d0_>=d0) d0_=d0;
  414. size_t d1_=j+B1; if(d1_>=d1) d1_=d1;
  415. for(size_t ii=i;ii<d0_;ii+=B2)
  416. for(size_t jj=j;jj<d1_;jj+=B2){
  417. size_t d0__=ii+B2; if(d0__>=d0) d0__=d0;
  418. size_t d1__=jj+B2; if(d1__>=d1) d1__=d1;
  419. for(size_t iii=ii;iii<d0__;iii++)
  420. for(size_t jjj=jj;jjj<d1__;jjj++){
  421. M_r[jjj][iii]=M[iii][jjj];
  422. }
  423. }
  424. }
  425. }
  426. #undef B2
  427. #undef B1
  428. template <class T>
  429. void Matrix<T>::SVD(Matrix<T>& tU, Matrix<T>& tS, Matrix<T>& tVT){
  430. pvfmm::Matrix<T>& M=*this;
  431. pvfmm::Matrix<T> M_=M;
  432. int n=M.Dim(0);
  433. int m=M.Dim(1);
  434. int k = (m<n?m:n);
  435. tU.Resize(n,k); tU.SetZero();
  436. tS.Resize(k,k); tS.SetZero();
  437. tVT.Resize(k,m); tVT.SetZero();
  438. //SVD
  439. int INFO=0;
  440. char JOBU = 'S';
  441. char JOBVT = 'S';
  442. int wssize = 3*(m<n?m:n)+(m>n?m:n);
  443. int wssize1 = 5*(m<n?m:n);
  444. wssize = (wssize>wssize1?wssize:wssize1);
  445. T* wsbuf = mem::aligned_new<T>(wssize);
  446. pvfmm::mat::svd(&JOBU, &JOBVT, &m, &n, &M[0][0], &m, &tS[0][0], &tVT[0][0], &m, &tU[0][0], &k, wsbuf, &wssize, &INFO);
  447. mem::aligned_delete<T>(wsbuf);
  448. if(INFO!=0) std::cout<<INFO<<'\n';
  449. assert(INFO==0);
  450. for(size_t i=1;i<k;i++){
  451. tS[i][i]=tS[0][i];
  452. tS[0][i]=0;
  453. }
  454. //std::cout<<tU*tS*tVT-M_<<'\n';
  455. }
  456. template <class T>
  457. Matrix<T> Matrix<T>::pinv(T eps){
  458. if(eps<0){
  459. eps=1.0;
  460. while(eps+(T)1.0>1.0) eps*=0.5;
  461. eps=sqrt(eps);
  462. }
  463. Matrix<T> M_r(dim[1],dim[0]);
  464. mat::pinv(data_ptr,dim[0],dim[1],eps,M_r.data_ptr);
  465. this->Resize(0,0);
  466. return M_r;
  467. }
  468. template <class T>
  469. std::ostream& operator<<(std::ostream& output, const Permutation<T>& P){
  470. output<<std::setprecision(4)<<std::setiosflags(std::ios::left);
  471. size_t size=P.perm.Dim();
  472. for(size_t i=0;i<size;i++) output<<std::setw(10)<<P.perm[i]<<' ';
  473. output<<";\n";
  474. for(size_t i=0;i<size;i++) output<<std::setw(10)<<P.scal[i]<<' ';
  475. output<<";\n";
  476. return output;
  477. }
  478. template <class T>
  479. Permutation<T>::Permutation(size_t size){
  480. perm.Resize(size);
  481. scal.Resize(size);
  482. for(size_t i=0;i<size;i++){
  483. perm[i]=i;
  484. scal[i]=1.0;
  485. }
  486. }
  487. template <class T>
  488. Permutation<T> Permutation<T>::RandPerm(size_t size){
  489. Permutation<T> P(size);
  490. for(size_t i=0;i<size;i++){
  491. P.perm[i]=rand()%size;
  492. for(size_t j=0;j<i;j++)
  493. if(P.perm[i]==P.perm[j]){ i--; break; }
  494. P.scal[i]=((T)rand())/RAND_MAX;
  495. }
  496. return P;
  497. }
  498. template <class T>
  499. Matrix<T> Permutation<T>::GetMatrix() const{
  500. size_t size=perm.Dim();
  501. Matrix<T> M_r(size,size,NULL);
  502. for(size_t i=0;i<size;i++)
  503. for(size_t j=0;j<size;j++)
  504. M_r[i][j]=(perm[j]==i?scal[j]:0.0);
  505. return M_r;
  506. }
  507. template <class T>
  508. size_t Permutation<T>::Dim() const{
  509. return perm.Dim();
  510. }
  511. template <class T>
  512. Permutation<T> Permutation<T>::Transpose(){
  513. size_t size=perm.Dim();
  514. Permutation<T> P_r(size);
  515. Vector<PERM_INT_T>& perm_r=P_r.perm;
  516. Vector<T>& scal_r=P_r.scal;
  517. for(size_t i=0;i<size;i++){
  518. perm_r[perm[i]]=i;
  519. scal_r[perm[i]]=scal[i];
  520. }
  521. return P_r;
  522. }
  523. template <class T>
  524. Permutation<T> Permutation<T>::operator*(const Permutation<T>& P){
  525. size_t size=perm.Dim();
  526. assert(P.Dim()==size);
  527. Permutation<T> P_r(size);
  528. Vector<PERM_INT_T>& perm_r=P_r.perm;
  529. Vector<T>& scal_r=P_r.scal;
  530. for(size_t i=0;i<size;i++){
  531. perm_r[i]=perm[P.perm[i]];
  532. scal_r[i]=scal[P.perm[i]]*P.scal[i];
  533. }
  534. return P_r;
  535. }
  536. template <class T>
  537. Matrix<T> Permutation<T>::operator*(const Matrix<T>& M){
  538. if(Dim()==0) return M;
  539. assert(M.Dim(0)==Dim());
  540. size_t d0=M.Dim(0);
  541. size_t d1=M.Dim(1);
  542. Matrix<T> M_r(d0,d1,NULL);
  543. for(size_t i=0;i<d0;i++){
  544. const T s=scal[i];
  545. const T* M_=M[i];
  546. T* M_r_=M_r[perm[i]];
  547. for(size_t j=0;j<d1;j++)
  548. M_r_[j]=M_[j]*s;
  549. }
  550. return M_r;
  551. }
  552. template <class T>
  553. Matrix<T> operator*(const Matrix<T>& M, const Permutation<T>& P){
  554. if(P.Dim()==0) return M;
  555. assert(M.Dim(1)==P.Dim());
  556. size_t d0=M.Dim(0);
  557. size_t d1=M.Dim(1);
  558. Matrix<T> M_r(d0,d1,NULL);
  559. for(size_t i=0;i<d0;i++){
  560. const PERM_INT_T* perm_=&(P.perm[0]);
  561. const T* scal_=&(P.scal[0]);
  562. const T* M_=M[i];
  563. T* M_r_=M_r[i];
  564. for(size_t j=0;j<d1;j++)
  565. M_r_[j]=M_[perm_[j]]*scal_[j];
  566. }
  567. return M_r;
  568. }
  569. }//end namespace