precomp_mat.txx 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. /**
  2. * \file precomp_mat.txx
  3. * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
  4. * \date 3-07-2011
  5. * \brief This file contains the implementation of the PrecompMat class.
  6. * Handles storage of precomputed translation matrices.
  7. */
  8. #include <omp.h>
  9. #include <cassert>
  10. #include <stdint.h>
  11. #ifdef PVFMM_HAVE_SYS_STAT_H
  12. #include <sys/stat.h>
  13. #endif
  14. #include <mem_utils.hpp>
  15. #include <profile.hpp>
  16. #include <vector.hpp>
  17. namespace pvfmm{
  18. #define PRECOMP_MIN_DEPTH 40
  19. template <class T>
  20. PrecompMat<T>::PrecompMat(bool homogen, int max_d): homogeneous(homogen), max_depth(max_d){
  21. if(!homogen) mat.resize((max_d+PRECOMP_MIN_DEPTH)*Type_Count); //For each U,V,W,X
  22. else mat.resize(Type_Count);
  23. for(size_t i=0;i<mat.size();i++)
  24. mat[i].resize(500);
  25. perm.resize(Type_Count);
  26. for(size_t i=0;i<Type_Count;i++){
  27. perm[i].resize(Perm_Count);
  28. }
  29. perm_r.resize((max_d+PRECOMP_MIN_DEPTH)*Type_Count);
  30. perm_c.resize((max_d+PRECOMP_MIN_DEPTH)*Type_Count);
  31. for(size_t i=0;i<perm_r.size();i++){
  32. perm_r[i].resize(500);
  33. perm_c[i].resize(500);
  34. }
  35. }
  36. template <class T>
  37. Matrix<T>& PrecompMat<T>::Mat(int l, Mat_Type type, size_t indx){
  38. int level=(homogeneous?0:l+PRECOMP_MIN_DEPTH);
  39. #pragma omp critical (PrecompMAT)
  40. if(indx>=mat[level*Type_Count+type].size()){
  41. mat[level*Type_Count+type].resize(indx+1);
  42. assert(false); //TODO: this is not thread safe.
  43. }
  44. return mat[level*Type_Count+type][indx];
  45. }
  46. template <class T>
  47. Permutation<T>& PrecompMat<T>::Perm_R(int l, Mat_Type type, size_t indx){
  48. int level=l+PRECOMP_MIN_DEPTH;
  49. #pragma omp critical (PrecompMAT)
  50. if(indx>=perm_r[level*Type_Count+type].size()){
  51. perm_r[level*Type_Count+type].resize(indx+1);
  52. assert(false); //TODO: this is not thread safe.
  53. }
  54. return perm_r[level*Type_Count+type][indx];
  55. }
  56. template <class T>
  57. Permutation<T>& PrecompMat<T>::Perm_C(int l, Mat_Type type, size_t indx){
  58. int level=l+PRECOMP_MIN_DEPTH;
  59. #pragma omp critical (PrecompMAT)
  60. if(indx>=perm_c[level*Type_Count+type].size()){
  61. perm_c[level*Type_Count+type].resize(indx+1);
  62. assert(false); //TODO: this is not thread safe.
  63. }
  64. return perm_c[level*Type_Count+type][indx];
  65. }
  66. template <class T>
  67. Permutation<T>& PrecompMat<T>::Perm(Mat_Type type, size_t indx){
  68. assert(indx<Perm_Count);
  69. return perm[type][indx];
  70. }
  71. template <class T>
  72. size_t PrecompMat<T>::CompactData(int l, Mat_Type type, Matrix<char>& comp_data, size_t offset){
  73. std::vector<Matrix<T> >& mat_=mat[(homogeneous?0:l+PRECOMP_MIN_DEPTH)*Type_Count+type];
  74. size_t mat_cnt=mat_.size();
  75. size_t indx_size=0;
  76. size_t mem_size=0;
  77. int omp_p=omp_get_max_threads();
  78. { // Determine memory size.
  79. indx_size+=3*sizeof(size_t); //total_size, mat_cnt, max_depth
  80. indx_size+=mat_cnt*(1+(2+2)*max_depth)*sizeof(size_t); //Mat, Perm_R, Perm_C.
  81. indx_size=((uintptr_t)indx_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1);
  82. for(size_t j=0;j<mat_cnt;j++){
  83. Matrix <T>& M =Mat (l,type,j);
  84. if(M.Dim(0)>0 && M.Dim(1)>0){
  85. mem_size+=M.Dim(0)*M.Dim(1)*sizeof(T); mem_size=((uintptr_t)mem_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1);
  86. }
  87. for(size_t l=0;l<max_depth;l++){
  88. Permutation<T>& Pr=Perm_R(l,type,j);
  89. Permutation<T>& Pc=Perm_C(l,type,j);
  90. if(Pr.Dim()>0){
  91. mem_size+=Pr.Dim()*sizeof(PERM_INT_T); mem_size=((uintptr_t)mem_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1);
  92. mem_size+=Pr.Dim()*sizeof(T); mem_size=((uintptr_t)mem_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1);
  93. }
  94. if(Pc.Dim()>0){
  95. mem_size+=Pc.Dim()*sizeof(PERM_INT_T); mem_size=((uintptr_t)mem_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1);
  96. mem_size+=Pc.Dim()*sizeof(T); mem_size=((uintptr_t)mem_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1);
  97. }
  98. }
  99. }
  100. }
  101. if(comp_data.Dim(0)*comp_data.Dim(1)<offset+indx_size+mem_size){ // Resize if needed.
  102. Matrix<char> old_data;
  103. if(offset>0) old_data=comp_data;
  104. comp_data.Resize(1,offset+indx_size+mem_size);
  105. if(offset>0) mem::memcopy(comp_data[0], old_data[0], offset); //TODO: This will affect NUMA.
  106. }
  107. { // Create indx.
  108. char* indx_ptr=comp_data[0]+offset;
  109. size_t data_offset=offset+indx_size;
  110. ((size_t*)indx_ptr)[0]=indx_size+mem_size; indx_ptr+=sizeof(size_t);
  111. ((size_t*)indx_ptr)[0]= mat_cnt ; indx_ptr+=sizeof(size_t);
  112. ((size_t*)indx_ptr)[0]= max_depth ; indx_ptr+=sizeof(size_t);
  113. for(size_t j=0;j<mat_cnt;j++){
  114. Matrix <T>& M =Mat (l,type,j);
  115. ((size_t*)indx_ptr)[0]=data_offset; indx_ptr+=sizeof(size_t);
  116. data_offset+=M.Dim(0)*M.Dim(1)*sizeof(T); data_offset=((uintptr_t)data_offset+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1);
  117. for(size_t l=0;l<max_depth;l++){
  118. Permutation<T>& Pr=Perm_R(l,type,j);
  119. ((size_t*)indx_ptr)[0]=data_offset; indx_ptr+=sizeof(size_t);
  120. data_offset+=Pr.Dim()*sizeof(PERM_INT_T); data_offset=((uintptr_t)data_offset+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1);
  121. ((size_t*)indx_ptr)[0]=data_offset; indx_ptr+=sizeof(size_t);
  122. data_offset+=Pr.Dim()*sizeof(T); data_offset=((uintptr_t)data_offset+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1);
  123. Permutation<T>& Pc=Perm_C(l,type,j);
  124. ((size_t*)indx_ptr)[0]=data_offset; indx_ptr+=sizeof(size_t);
  125. data_offset+=Pc.Dim()*sizeof(PERM_INT_T); data_offset=((uintptr_t)data_offset+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1);
  126. ((size_t*)indx_ptr)[0]=data_offset; indx_ptr+=sizeof(size_t);
  127. data_offset+=Pc.Dim()*sizeof(T); data_offset=((uintptr_t)data_offset+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1);
  128. }
  129. }
  130. }
  131. { // Copy data.
  132. char* indx_ptr=comp_data[0]+offset;
  133. size_t& total_size=((size_t*)indx_ptr)[0]; indx_ptr+=sizeof(size_t);
  134. size_t& mat_cnt =((size_t*)indx_ptr)[0]; indx_ptr+=sizeof(size_t);
  135. size_t& max_depth=((size_t*)indx_ptr)[0]; indx_ptr+=sizeof(size_t);
  136. Matrix<size_t> data_offset(mat_cnt,1+(2+2)*max_depth, (size_t*)indx_ptr, false);
  137. offset+=total_size;
  138. for(size_t j=0;j<mat_cnt;j++){
  139. Matrix <T>& M =Mat (l,type,j);
  140. if(M.Dim(0)>0 && M.Dim(1)>0){
  141. #pragma omp parallel for
  142. for(int tid=0;tid<omp_p;tid++){
  143. size_t a=(M.Dim(0)*M.Dim(1)* tid )/omp_p;
  144. size_t b=(M.Dim(0)*M.Dim(1)*(tid+1))/omp_p;
  145. mem::memcopy(comp_data[0]+data_offset[j][0]+a*sizeof(T), &M[0][a], (b-a)*sizeof(T));
  146. }
  147. }
  148. for(size_t l=0;l<max_depth;l++){
  149. Permutation<T>& Pr=Perm_R(l,type,j);
  150. Permutation<T>& Pc=Perm_C(l,type,j);
  151. if(Pr.Dim()>0){
  152. mem::memcopy(comp_data[0]+data_offset[j][1+4*l+0], &Pr.perm[0], Pr.Dim()*sizeof(PERM_INT_T));
  153. mem::memcopy(comp_data[0]+data_offset[j][1+4*l+1], &Pr.scal[0], Pr.Dim()*sizeof( T));
  154. }
  155. if(Pc.Dim()>0){
  156. mem::memcopy(comp_data[0]+data_offset[j][1+4*l+2], &Pc.perm[0], Pc.Dim()*sizeof(PERM_INT_T));
  157. mem::memcopy(comp_data[0]+data_offset[j][1+4*l+3], &Pc.scal[0], Pc.Dim()*sizeof( T));
  158. }
  159. }
  160. }
  161. }
  162. return offset;
  163. }
  164. template <class T>
  165. void PrecompMat<T>::Save2File(const char* fname, bool replace){
  166. FILE* f=fopen(fname,"r");
  167. if(f!=NULL) { //File already exists.
  168. fclose(f);
  169. if(!replace) return;
  170. }
  171. f=fopen(fname,"wb");
  172. if(f==NULL) return;
  173. int tmp;
  174. tmp=sizeof(T);
  175. fwrite(&tmp,sizeof(int),1,f);
  176. tmp=(homogeneous?1:0);
  177. fwrite(&tmp,sizeof(int),1,f);
  178. tmp=max_depth;
  179. fwrite(&tmp,sizeof(int),1,f);
  180. for(size_t i=0;i<mat.size();i++){
  181. int n=mat[i].size();
  182. fwrite(&n,sizeof(int),1,f);
  183. for(int j=0;j<n;j++){
  184. Matrix<T>& M=mat[i][j];
  185. int n1=M.Dim(0);
  186. fwrite(&n1,sizeof(int),1,f);
  187. int n2=M.Dim(1);
  188. fwrite(&n2,sizeof(int),1,f);
  189. if(n1*n2>0)
  190. fwrite(&M[0][0],sizeof(T),n1*n2,f);
  191. }
  192. }
  193. fclose(f);
  194. }
  195. #define MY_FREAD(a,b,c,d) { \
  196. size_t r_cnt=fread(a,b,c,d); \
  197. if(r_cnt!=c){ \
  198. fputs ("Reading error ",stderr); \
  199. exit (-1); \
  200. } }
  201. template <class T>
  202. void PrecompMat<T>::LoadFile(const char* fname, MPI_Comm comm){
  203. Profile::Tic("LoadMatrices",&comm,true,3);
  204. Profile::Tic("ReadFile",&comm,true,4);
  205. size_t f_size=0;
  206. char* f_data=NULL;
  207. int np, myrank;
  208. MPI_Comm_size(comm,&np);
  209. MPI_Comm_rank(comm,&myrank);
  210. if(myrank==0){
  211. FILE* f=fopen(fname,"rb");
  212. if(f==NULL){
  213. f_size=0;
  214. }else{
  215. struct stat fileStat;
  216. if(stat(fname,&fileStat) < 0) f_size=0;
  217. else f_size=fileStat.st_size;
  218. //fseek (f, 0, SEEK_END);
  219. //f_size=ftell (f);
  220. }
  221. if(f_size>0){
  222. f_data=new char[f_size];
  223. fseek (f, 0, SEEK_SET);
  224. MY_FREAD(f_data,sizeof(char),f_size,f);
  225. fclose(f);
  226. }
  227. }
  228. Profile::Toc();
  229. Profile::Tic("Broadcast",&comm,true,4);
  230. MPI_Bcast( &f_size, sizeof(size_t), MPI_BYTE, 0, comm );
  231. if(f_size==0){
  232. Profile::Toc();
  233. Profile::Toc();
  234. return;
  235. }
  236. if(f_data==NULL) f_data=new char[f_size];
  237. char* f_ptr=f_data;
  238. int max_send_size=1000000000;
  239. while(f_size>0){
  240. if(f_size>(size_t)max_send_size){
  241. MPI_Bcast( f_ptr, max_send_size, MPI_BYTE, 0, comm );
  242. f_size-=max_send_size;
  243. f_ptr+=max_send_size;
  244. }else{
  245. MPI_Bcast( f_ptr, f_size, MPI_BYTE, 0, comm );
  246. f_size=0;
  247. }
  248. }
  249. f_ptr=f_data;
  250. {
  251. int tmp;
  252. tmp=*(int*)f_ptr; f_ptr+=sizeof(int);
  253. assert(tmp==sizeof(T));
  254. tmp=*(int*)f_ptr; f_ptr+=sizeof(int);
  255. homogeneous=tmp;
  256. int max_depth_;
  257. max_depth_=*(int*)f_ptr; f_ptr+=sizeof(int);
  258. size_t mat_size=(size_t)Type_Count*(homogeneous?1:max_depth_+PRECOMP_MIN_DEPTH);
  259. if(mat.size()<mat_size){
  260. max_depth=max_depth_;
  261. mat.resize(mat_size);
  262. }
  263. for(size_t i=0;i<mat_size;i++){
  264. int n;
  265. n=*(int*)f_ptr; f_ptr+=sizeof(int);
  266. if(mat[i].size()<(size_t)n)
  267. mat[i].resize(n);
  268. for(int j=0;j<n;j++){
  269. Matrix<T>& M=mat[i][j];
  270. int n1;
  271. n1=*(int*)f_ptr; f_ptr+=sizeof(int);
  272. int n2;
  273. n2=*(int*)f_ptr; f_ptr+=sizeof(int);
  274. if(n1*n2>0){
  275. M.Resize(n1,n2);
  276. mem::memcopy(&M[0][0], f_ptr, sizeof(T)*n1*n2); f_ptr+=sizeof(T)*n1*n2;
  277. }
  278. }
  279. }
  280. }
  281. if(f_data!=NULL) delete[] f_data;
  282. Profile::Toc();
  283. Profile::Toc();
  284. }
  285. #undef MY_FREAD
  286. #undef PRECOMP_MIN_DEPTH
  287. template <class T>
  288. std::vector<T>& PrecompMat<T>::RelativeTrgCoord(){
  289. return rel_trg_coord;
  290. }
  291. template <class T>
  292. bool PrecompMat<T>::Homogen(){
  293. return homogeneous;
  294. }
  295. }//end namespace