/** * \file precomp_mat.txx * \author Dhairya Malhotra, dhairya.malhotra@gmail.com * \date 3-07-2011 * \brief This file contains the implementation of the PrecompMat class. * Handles storage of precomputed translation matrices. */ #include #include namespace pvfmm{ #define PRECOMP_MIN_DEPTH 40 template PrecompMat::PrecompMat(bool homogen, int max_d): homogeneous(homogen), max_depth(max_d){ if(!homogen) mat.resize((max_d+PRECOMP_MIN_DEPTH)*Type_Count); //For each U,V,W,X else mat.resize(Type_Count); for(size_t i=0;i Matrix& PrecompMat::Mat(int l, Mat_Type type, size_t indx){ int level=(homogeneous?0:l+PRECOMP_MIN_DEPTH); #pragma omp critical (PrecompMAT) if(indx>=mat[level*Type_Count+type].size()){ mat[level*Type_Count+type].resize(indx+1); assert(false); //TODO: this is not thread safe. } return mat[level*Type_Count+type][indx]; } template Permutation& PrecompMat::Perm_R(Mat_Type type, size_t indx){ #pragma omp critical (PrecompMAT) if(indx>=perm_r[type].size()){ perm_r[type].resize(indx+1); assert(false); //TODO: this is not thread safe. } return perm_r[type][indx]; } template Permutation& PrecompMat::Perm_C(Mat_Type type, size_t indx){ #pragma omp critical (PrecompMAT) if(indx>=perm_c[type].size()){ perm_c[type].resize(indx+1); assert(false); //TODO: this is not thread safe. } return perm_c[type][indx]; } template Permutation& PrecompMat::Perm(Mat_Type type, size_t indx){ assert(indx size_t PrecompMat::CompactData(int l, Mat_Type type, Matrix& comp_data, size_t offset){ std::vector >& mat_=mat[(homogeneous?0:l+PRECOMP_MIN_DEPTH)*Type_Count+type]; size_t mat_cnt=mat_.size(); size_t indx_size=0; size_t mem_size=0; int omp_p=omp_get_max_threads(); { // Determine memory size. indx_size+=2*sizeof(size_t); //total_size, mat_cnt indx_size+=mat_cnt*(1+2+2)*sizeof(size_t); //Mat, Perm_R, Perm_C. indx_size=((uintptr_t)indx_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1); for(size_t j=0;j& M =Mat( l,type,j); Permutation& Pr=Perm_R(type,j); Permutation& Pc=Perm_C(type,j); if(M.Dim(0)>0 && M.Dim(1)>0){ mem_size+=M.Dim(0)*M.Dim(1)*sizeof(T); mem_size=((uintptr_t)mem_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1); } if(Pr.Dim()>0){ mem_size+=Pr.Dim()*sizeof(PERM_INT_T); mem_size=((uintptr_t)mem_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1); mem_size+=Pr.Dim()*sizeof(T); mem_size=((uintptr_t)mem_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1); } if(Pc.Dim()>0){ mem_size+=Pc.Dim()*sizeof(PERM_INT_T); mem_size=((uintptr_t)mem_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1); mem_size+=Pc.Dim()*sizeof(T); mem_size=((uintptr_t)mem_size+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1); } } } if(comp_data.Dim(0)*comp_data.Dim(1) old_data; if(offset>0) old_data=comp_data; comp_data.Resize(1,offset+indx_size+mem_size); if(offset>0) mem::memcopy(comp_data[0], old_data[0], offset); //TODO: This will affect NUMA. } { // Create indx. char* indx_ptr=comp_data[0]+offset; size_t data_offset=offset+indx_size; ((size_t*)indx_ptr)[0]=indx_size+mem_size; indx_ptr+=sizeof(size_t); ((size_t*)indx_ptr)[0]= mat_cnt ; indx_ptr+=sizeof(size_t); for(size_t j=0;j& M =Mat( l,type,j); Permutation& Pr=Perm_R(type,j); Permutation& Pc=Perm_C(type,j); ((size_t*)indx_ptr)[0]=data_offset; indx_ptr+=sizeof(size_t); data_offset+=M.Dim(0)*M.Dim(1)*sizeof(T); data_offset=((uintptr_t)data_offset+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1); ((size_t*)indx_ptr)[0]=data_offset; indx_ptr+=sizeof(size_t); data_offset+=Pr.Dim()*sizeof(PERM_INT_T); data_offset=((uintptr_t)data_offset+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1); ((size_t*)indx_ptr)[0]=data_offset; indx_ptr+=sizeof(size_t); data_offset+=Pr.Dim()*sizeof(T); data_offset=((uintptr_t)data_offset+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1); ((size_t*)indx_ptr)[0]=data_offset; indx_ptr+=sizeof(size_t); data_offset+=Pc.Dim()*sizeof(PERM_INT_T); data_offset=((uintptr_t)data_offset+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1); ((size_t*)indx_ptr)[0]=data_offset; indx_ptr+=sizeof(size_t); data_offset+=Pc.Dim()*sizeof(T); data_offset=((uintptr_t)data_offset+(uintptr_t)(MEM_ALIGN-1)) & ~ (uintptr_t)(MEM_ALIGN-1); } } { // Copy data. char* indx_ptr=comp_data[0]+offset; size_t& total_size=((size_t*)indx_ptr)[0]; indx_ptr+=sizeof(size_t); size_t& mat_cnt =((size_t*)indx_ptr)[0]; indx_ptr+=sizeof(size_t); Vector data_offset((1+2+2)*mat_cnt, (size_t*)indx_ptr, false); offset+=total_size; for(size_t j=0;j& M =Mat( l,type,j); Permutation& Pr=Perm_R(type,j); Permutation& Pc=Perm_C(type,j); if(M.Dim(0)>0 && M.Dim(1)>0){ #pragma omp parallel for for(int tid=0;tid0){ mem::memcopy(comp_data[0]+data_offset[5*j+1], &Pr.perm[0], Pr.Dim()*sizeof(PERM_INT_T)); mem::memcopy(comp_data[0]+data_offset[5*j+2], &Pr.scal[0], Pr.Dim()*sizeof( T)); } if(Pc.Dim()>0){ mem::memcopy(comp_data[0]+data_offset[5*j+3], &Pc.perm[0], Pc.Dim()*sizeof(PERM_INT_T)); mem::memcopy(comp_data[0]+data_offset[5*j+4], &Pc.scal[0], Pc.Dim()*sizeof( T)); } } } return offset; } template void PrecompMat::Save2File(const char* fname, bool replace){ FILE* f=fopen(fname,"r"); if(f!=NULL) { //File already exists. fclose(f); if(!replace) return; } f=fopen(fname,"wb"); if(f==NULL) return; int tmp; tmp=sizeof(T); fwrite(&tmp,sizeof(int),1,f); tmp=(homogeneous?1:0); fwrite(&tmp,sizeof(int),1,f); tmp=max_depth; fwrite(&tmp,sizeof(int),1,f); for(size_t i=0;i& M=mat[i][j]; int n1=M.Dim(0); fwrite(&n1,sizeof(int),1,f); int n2=M.Dim(1); fwrite(&n2,sizeof(int),1,f); if(n1*n2>0) fwrite(&M[0][0],sizeof(T),n1*n2,f); } } fclose(f); } #define MY_FREAD(a,b,c,d) { \ size_t r_cnt=fread(a,b,c,d); \ if(r_cnt!=c){ \ fputs ("Reading error ",stderr); \ exit (-1); \ } } template void PrecompMat::LoadFile(const char* fname, MPI_Comm comm){ Profile::Tic("LoadMatrices",&comm,true,3); Profile::Tic("ReadFile",&comm,true,4); size_t f_size=0; char* f_data=NULL; int np, myrank; MPI_Comm_size(comm,&np); MPI_Comm_rank(comm,&myrank); if(myrank==0){ FILE* f=fopen(fname,"rb"); if(f==NULL){ f_size=0; }else{ struct stat fileStat; if(stat(fname,&fileStat) < 0) f_size=0; else f_size=fileStat.st_size; //fseek (f, 0, SEEK_END); //f_size=ftell (f); } if(f_size>0){ f_data=new char[f_size]; fseek (f, 0, SEEK_SET); MY_FREAD(f_data,sizeof(char),f_size,f); fclose(f); } } Profile::Toc(); Profile::Tic("Broadcast",&comm,true,4); MPI_Bcast( &f_size, sizeof(size_t), MPI_BYTE, 0, comm ); if(f_size==0){ Profile::Toc(); Profile::Toc(); return; } if(f_data==NULL) f_data=new char[f_size]; char* f_ptr=f_data; int max_send_size=1000000000; while(f_size>0){ if(f_size>(size_t)max_send_size){ MPI_Bcast( f_ptr, max_send_size, MPI_BYTE, 0, comm ); f_size-=max_send_size; f_ptr+=max_send_size; }else{ MPI_Bcast( f_ptr, f_size, MPI_BYTE, 0, comm ); f_size=0; } } f_ptr=f_data; { int tmp; tmp=*(int*)f_ptr; f_ptr+=sizeof(int); assert(tmp==sizeof(T)); tmp=*(int*)f_ptr; f_ptr+=sizeof(int); homogeneous=tmp; int max_depth_; max_depth_=*(int*)f_ptr; f_ptr+=sizeof(int); size_t mat_size=(size_t)Type_Count*(homogeneous?1:max_depth_+PRECOMP_MIN_DEPTH); if(mat.size()& M=mat[i][j]; int n1; n1=*(int*)f_ptr; f_ptr+=sizeof(int); int n2; n2=*(int*)f_ptr; f_ptr+=sizeof(int); if(n1*n2>0){ M.Resize(n1,n2); mem::memcopy(&M[0][0], f_ptr, sizeof(T)*n1*n2); f_ptr+=sizeof(T)*n1*n2; } } } } if(f_data!=NULL) delete[] f_data; Profile::Toc(); Profile::Toc(); } #undef MY_FREAD #undef PRECOMP_MIN_DEPTH template std::vector& PrecompMat::RelativeTrgCoord(){ return rel_trg_coord; } template bool PrecompMat::Homogen(){ return homogeneous; } }//end namespace