precomp_mat.txx 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. /**
  2. * \file precomp_mat.txx
  3. * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
  4. * \date 3-07-2011
  5. * \brief This file contains the implementation of the PrecompMat class.
  6. * Handles storage of precomputed translation matrices.
  7. */
  8. #include <omp.h>
  9. #include <cassert>
  10. #include <stdint.h>
  11. #ifdef PVFMM_HAVE_SYS_STAT_H
  12. #include <sys/stat.h>
  13. #endif
  14. #include <mem_mgr.hpp>
  15. #include <profile.hpp>
  16. #include <vector.hpp>
  17. namespace pvfmm{
  18. #define PRECOMP_MIN_DEPTH 128
  19. #define PRECOMP_MAX_DEPTH 128
  20. template <class T>
  21. PrecompMat<T>::PrecompMat(bool scale_invar_): scale_invar(scale_invar_){
  22. if(!scale_invar) mat.resize((PRECOMP_MAX_DEPTH+PRECOMP_MIN_DEPTH)*Type_Count); //For each U,V,W,X
  23. else mat.resize(Type_Count);
  24. for(size_t i=0;i<mat.size();i++)
  25. mat[i].resize(500);
  26. perm.resize(Type_Count);
  27. for(size_t i=0;i<Type_Count;i++){
  28. perm[i].resize(Perm_Count);
  29. }
  30. perm_r.resize((PRECOMP_MAX_DEPTH+PRECOMP_MIN_DEPTH)*Type_Count);
  31. perm_c.resize((PRECOMP_MAX_DEPTH+PRECOMP_MIN_DEPTH)*Type_Count);
  32. for(size_t i=0;i<perm_r.size();i++){
  33. perm_r[i].resize(500);
  34. perm_c[i].resize(500);
  35. }
  36. }
  37. template <class T>
  38. Matrix<T>& PrecompMat<T>::Mat(int l, Mat_Type type, size_t indx){
  39. int level=(scale_invar?0:l+PRECOMP_MIN_DEPTH);
  40. assert(level*Type_Count+type<mat.size());
  41. //#pragma omp critical (PrecompMAT)
  42. if(indx>=mat[level*Type_Count+type].size()){
  43. mat[level*Type_Count+type].resize(indx+1);
  44. assert(false); //TODO: this is not thread safe.
  45. }
  46. return mat[level*Type_Count+type][indx];
  47. }
  48. template <class T>
  49. Permutation<T>& PrecompMat<T>::Perm_R(int l, Mat_Type type, size_t indx){
  50. int level=l+PRECOMP_MIN_DEPTH;
  51. assert(level*Type_Count+type<perm_r.size());
  52. //#pragma omp critical (PrecompMAT)
  53. if(indx>=perm_r[level*Type_Count+type].size()){
  54. perm_r[level*Type_Count+type].resize(indx+1);
  55. assert(false); //TODO: this is not thread safe.
  56. }
  57. return perm_r[level*Type_Count+type][indx];
  58. }
  59. template <class T>
  60. Permutation<T>& PrecompMat<T>::Perm_C(int l, Mat_Type type, size_t indx){
  61. int level=l+PRECOMP_MIN_DEPTH;
  62. assert(level*Type_Count+type<perm_c.size());
  63. //#pragma omp critical (PrecompMAT)
  64. if(indx>=perm_c[level*Type_Count+type].size()){
  65. perm_c[level*Type_Count+type].resize(indx+1);
  66. assert(false); //TODO: this is not thread safe.
  67. }
  68. return perm_c[level*Type_Count+type][indx];
  69. }
  70. template <class T>
  71. Permutation<T>& PrecompMat<T>::Perm(Mat_Type type, size_t indx){
  72. assert(indx<Perm_Count);
  73. return perm[type][indx];
  74. }
  75. template <class T>
  76. size_t PrecompMat<T>::CompactData(int level, Mat_Type type, Matrix<char>& comp_data, size_t offset){
  77. struct HeaderData{
  78. size_t total_size;
  79. size_t level;
  80. size_t mat_cnt ;
  81. size_t max_depth;
  82. };
  83. if(comp_data.Dim(0)*comp_data.Dim(1)>offset){
  84. char* indx_ptr=comp_data[0]+offset;
  85. HeaderData& header=*(HeaderData*)indx_ptr; indx_ptr+=sizeof(HeaderData);
  86. if(level==header.level){ // Data already exists.
  87. offset+=header.total_size;
  88. return offset;
  89. }
  90. }
  91. std::vector<Matrix<T> >& mat_=mat[(scale_invar?0:level+PRECOMP_MIN_DEPTH)*Type_Count+type];
  92. size_t mat_cnt=mat_.size();
  93. size_t indx_size=0;
  94. size_t mem_size=0;
  95. int omp_p=omp_get_max_threads();
  96. size_t l0=(scale_invar?0:level);
  97. size_t l1=(scale_invar?PRECOMP_MAX_DEPTH:level+1);
  98. { // Determine memory size.
  99. indx_size+=sizeof(HeaderData); // HeaderData
  100. indx_size+=mat_cnt*(1+(2+2)*(l1-l0))*sizeof(size_t); //Mat, Perm_R, Perm_C.
  101. indx_size=mem::align_ptr(indx_size);
  102. for(size_t j=0;j<mat_cnt;j++){
  103. Matrix <T>& M =Mat (level,type,j);
  104. if(M.Dim(0)>0 && M.Dim(1)>0){
  105. mem_size+=M.Dim(0)*M.Dim(1)*sizeof(T); mem_size=mem::align_ptr(mem_size);
  106. }
  107. for(size_t l=l0;l<l1;l++){
  108. Permutation<T>& Pr=Perm_R(l,type,j);
  109. Permutation<T>& Pc=Perm_C(l,type,j);
  110. if(Pr.Dim()>0){
  111. mem_size+=Pr.Dim()*sizeof(PERM_INT_T); mem_size=mem::align_ptr(mem_size);
  112. mem_size+=Pr.Dim()*sizeof(T); mem_size=mem::align_ptr(mem_size);
  113. }
  114. if(Pc.Dim()>0){
  115. mem_size+=Pc.Dim()*sizeof(PERM_INT_T); mem_size=mem::align_ptr(mem_size);
  116. mem_size+=Pc.Dim()*sizeof(T); mem_size=mem::align_ptr(mem_size);
  117. }
  118. }
  119. }
  120. }
  121. if(comp_data.Dim(0)*comp_data.Dim(1)<offset+indx_size+mem_size){ // Resize if needed.
  122. Matrix<char> old_data;
  123. if(offset>0) old_data=comp_data;
  124. comp_data.Resize(1,offset+indx_size+mem_size);
  125. if(offset>0){
  126. #pragma omp parallel for
  127. for(int tid=0;tid<omp_p;tid++){ // Copy data.
  128. size_t a=(offset*(tid+0))/omp_p;
  129. size_t b=(offset*(tid+1))/omp_p;
  130. mem::memcopy(comp_data[0]+a, old_data[0]+a, b-a);
  131. }
  132. }
  133. }
  134. { // Create indx.
  135. char* indx_ptr=comp_data[0]+offset;
  136. HeaderData& header=*(HeaderData*)indx_ptr; indx_ptr+=sizeof(HeaderData);
  137. Matrix<size_t> offset_indx(mat_cnt,1+(2+2)*(l1-l0), (size_t*)indx_ptr, false);
  138. header.total_size=indx_size+mem_size;
  139. header. level=level ;
  140. header. mat_cnt = mat_cnt ;
  141. header. max_depth=l1-l0 ;
  142. size_t data_offset=offset+indx_size;
  143. for(size_t j=0;j<mat_cnt;j++){
  144. Matrix <T>& M =Mat (level,type,j);
  145. offset_indx[j][0]=data_offset; indx_ptr+=sizeof(size_t);
  146. data_offset+=M.Dim(0)*M.Dim(1)*sizeof(T); mem_size=mem::align_ptr(mem_size);
  147. for(size_t l=l0;l<l1;l++){
  148. Permutation<T>& Pr=Perm_R(l,type,j);
  149. offset_indx[j][1+4*(l-l0)+0]=data_offset;
  150. data_offset+=Pr.Dim()*sizeof(PERM_INT_T); mem_size=mem::align_ptr(mem_size);
  151. offset_indx[j][1+4*(l-l0)+1]=data_offset;
  152. data_offset+=Pr.Dim()*sizeof(T); mem_size=mem::align_ptr(mem_size);
  153. Permutation<T>& Pc=Perm_C(l,type,j);
  154. offset_indx[j][1+4*(l-l0)+2]=data_offset;
  155. data_offset+=Pc.Dim()*sizeof(PERM_INT_T); mem_size=mem::align_ptr(mem_size);
  156. offset_indx[j][1+4*(l-l0)+3]=data_offset;
  157. data_offset+=Pc.Dim()*sizeof(T); mem_size=mem::align_ptr(mem_size);
  158. }
  159. }
  160. }
  161. #pragma omp parallel for
  162. for(int tid=0;tid<omp_p;tid++){ // Copy data.
  163. char* indx_ptr=comp_data[0]+offset;
  164. //HeaderData& header=*(HeaderData*)indx_ptr;
  165. indx_ptr+=sizeof(HeaderData);
  166. Matrix<size_t> offset_indx(mat_cnt,1+(2+2)*(l1-l0), (size_t*)indx_ptr, false);
  167. for(size_t j=0;j<mat_cnt;j++){
  168. Matrix <T>& M =Mat (level,type,j);
  169. if(M.Dim(0)>0 && M.Dim(1)>0){
  170. size_t a=(M.Dim(0)*M.Dim(1)* tid )/omp_p;
  171. size_t b=(M.Dim(0)*M.Dim(1)*(tid+1))/omp_p;
  172. mem::memcopy(comp_data[0]+offset_indx[j][0]+a*sizeof(T), &M[0][a], (b-a)*sizeof(T));
  173. }
  174. for(size_t l=l0;l<l1;l++){
  175. Permutation<T>& Pr=Perm_R(l,type,j);
  176. Permutation<T>& Pc=Perm_C(l,type,j);
  177. if(Pr.Dim()>0){
  178. size_t a=(Pr.Dim()* tid )/omp_p;
  179. size_t b=(Pr.Dim()*(tid+1))/omp_p;
  180. mem::memcopy(comp_data[0]+offset_indx[j][1+4*(l-l0)+0]+a*sizeof(PERM_INT_T), &Pr.perm[a], (b-a)*sizeof(PERM_INT_T));
  181. mem::memcopy(comp_data[0]+offset_indx[j][1+4*(l-l0)+1]+a*sizeof( T), &Pr.scal[a], (b-a)*sizeof( T));
  182. }
  183. if(Pc.Dim()>0){
  184. size_t a=(Pc.Dim()* tid )/omp_p;
  185. size_t b=(Pc.Dim()*(tid+1))/omp_p;
  186. mem::memcopy(comp_data[0]+offset_indx[j][1+4*(l-l0)+2]+a*sizeof(PERM_INT_T), &Pc.perm[a], (b-a)*sizeof(PERM_INT_T));
  187. mem::memcopy(comp_data[0]+offset_indx[j][1+4*(l-l0)+3]+a*sizeof( T), &Pc.scal[a], (b-a)*sizeof( T));
  188. }
  189. }
  190. }
  191. }
  192. return offset+indx_size+mem_size;
  193. }
  194. template <class T>
  195. void PrecompMat<T>::Save2File(const char* fname, bool replace){
  196. FILE* f=fopen(fname,"r");
  197. if(f!=NULL) { //File already exists.
  198. fclose(f);
  199. if(!replace) return;
  200. }
  201. f=fopen(fname,"wb");
  202. if(f==NULL) return;
  203. int tmp;
  204. tmp=sizeof(T);
  205. fwrite(&tmp,sizeof(int),1,f);
  206. tmp=(scale_invar?1:0);
  207. fwrite(&tmp,sizeof(int),1,f);
  208. for(size_t i=0;i<mat.size();i++){
  209. int n=mat[i].size();
  210. fwrite(&n,sizeof(int),1,f);
  211. for(int j=0;j<n;j++){
  212. Matrix<T>& M=mat[i][j];
  213. int n1=M.Dim(0);
  214. fwrite(&n1,sizeof(int),1,f);
  215. int n2=M.Dim(1);
  216. fwrite(&n2,sizeof(int),1,f);
  217. if(n1*n2>0)
  218. fwrite(&M[0][0],sizeof(T),n1*n2,f);
  219. }
  220. }
  221. fclose(f);
  222. }
  223. #define MY_FREAD(a,b,c,d) { \
  224. size_t r_cnt=fread(a,b,c,d); \
  225. if(r_cnt!=c){ \
  226. fputs ("Reading error ",stderr); \
  227. exit (-1); \
  228. } }
  229. template <class T>
  230. void PrecompMat<T>::LoadFile(const char* fname, MPI_Comm comm){
  231. Profile::Tic("LoadMatrices",&comm,true,3);
  232. Profile::Tic("ReadFile",&comm,true,4);
  233. size_t f_size=0;
  234. char* f_data=NULL;
  235. int np, myrank;
  236. MPI_Comm_size(comm,&np);
  237. MPI_Comm_rank(comm,&myrank);
  238. if(myrank==0){
  239. FILE* f=fopen(fname,"rb");
  240. if(f==NULL){
  241. f_size=0;
  242. }else{
  243. struct stat fileStat;
  244. if(stat(fname,&fileStat) < 0) f_size=0;
  245. else f_size=fileStat.st_size;
  246. //fseek (f, 0, SEEK_END);
  247. //f_size=ftell (f);
  248. }
  249. if(f_size>0){
  250. f_data=mem::aligned_new<char>(f_size);
  251. fseek (f, 0, SEEK_SET);
  252. MY_FREAD(f_data,sizeof(char),f_size,f);
  253. fclose(f);
  254. }
  255. }
  256. Profile::Toc();
  257. Profile::Tic("Broadcast",&comm,true,4);
  258. MPI_Bcast( &f_size, sizeof(size_t), MPI_BYTE, 0, comm );
  259. if(f_size==0){
  260. Profile::Toc();
  261. Profile::Toc();
  262. return;
  263. }
  264. if(f_data==NULL) f_data=mem::aligned_new<char>(f_size);
  265. char* f_ptr=f_data;
  266. int max_send_size=1000000000;
  267. while(f_size>0){
  268. if(f_size>(size_t)max_send_size){
  269. MPI_Bcast( f_ptr, max_send_size, MPI_BYTE, 0, comm );
  270. f_size-=max_send_size;
  271. f_ptr+=max_send_size;
  272. }else{
  273. MPI_Bcast( f_ptr, f_size, MPI_BYTE, 0, comm );
  274. f_size=0;
  275. }
  276. }
  277. f_ptr=f_data;
  278. {
  279. int tmp;
  280. tmp=*(int*)f_ptr; f_ptr+=sizeof(int);
  281. assert(tmp==sizeof(T));
  282. tmp=*(int*)f_ptr; f_ptr+=sizeof(int);
  283. scale_invar=tmp;
  284. size_t mat_size=(size_t)Type_Count*(scale_invar?1:PRECOMP_MAX_DEPTH+PRECOMP_MIN_DEPTH);
  285. if(mat.size()<mat_size){
  286. mat.resize(mat_size);
  287. }
  288. for(size_t i=0;i<mat_size;i++){
  289. int n;
  290. n=*(int*)f_ptr; f_ptr+=sizeof(int);
  291. if(mat[i].size()<(size_t)n)
  292. mat[i].resize(n);
  293. for(int j=0;j<n;j++){
  294. Matrix<T>& M=mat[i][j];
  295. int n1;
  296. n1=*(int*)f_ptr; f_ptr+=sizeof(int);
  297. int n2;
  298. n2=*(int*)f_ptr; f_ptr+=sizeof(int);
  299. if(n1*n2>0){
  300. M.Resize(n1,n2);
  301. mem::memcopy(&M[0][0], f_ptr, sizeof(T)*n1*n2); f_ptr+=sizeof(T)*n1*n2;
  302. }
  303. }
  304. }
  305. // Resize perm_r, perm_c
  306. perm_r.resize((PRECOMP_MAX_DEPTH+PRECOMP_MIN_DEPTH)*Type_Count);
  307. perm_c.resize((PRECOMP_MAX_DEPTH+PRECOMP_MIN_DEPTH)*Type_Count);
  308. for(size_t i=0;i<perm_r.size();i++){
  309. perm_r[i].resize(500);
  310. perm_c[i].resize(500);
  311. }
  312. }
  313. mem::aligned_delete<char>(f_data);
  314. Profile::Toc();
  315. Profile::Toc();
  316. }
  317. #undef MY_FREAD
  318. #undef PRECOMP_MIN_DEPTH
  319. template <class T>
  320. std::vector<T>& PrecompMat<T>::RelativeTrgCoord(){
  321. return rel_trg_coord;
  322. }
  323. template <class T>
  324. bool PrecompMat<T>::ScaleInvar(){
  325. return scale_invar;
  326. }
  327. }//end namespace