ソースを参照

Changes for Intel Phi native build.

Dhairya Malhotra 11 年 前
コミット
a05dc803a5
5 ファイル変更25 行追加22 行削除
  1. 4 1
      INSTALL
  2. 2 2
      include/fmm_pts.txx
  3. 1 1
      include/fmm_tree.txx
  4. 8 8
      include/matrix.txx
  5. 10 10
      include/vector.txx

+ 4 - 1
INSTALL

@@ -86,10 +86,13 @@ are given below:
      module swap PrgEnv-pgi PrgEnv-intel
      ./configure MPICXX="CC" F77="ftn"
 
-`Stampede (TACC)'
+`Stampede (TACC)' (Offload build)
      module load fftw3
      ./configure CXXFLAGS="-mavx -wd3218 -wd2570" --with-fftw="$TACC_FFTW3_DIR" FLIBS=" "
 
+`Stampede (TACC)' (Native build)
+     ./configure --host=x86_64-k1om-linux CXXFLAGS="-mmic -wd3218 -wd2570 -I$TACC_MKL_INC -DFFTW3_MKL" F77=ifort FFLAGS="-mmic" FLIBS=" " --with-fftw-include="$TACC_MKL_INC/fftw" --with-fftw-lib="-mkl"
+
 `Ronaldo (ICES)'
      ./configure CXXFLAGS="-msse4" --with-fftw="$FFTW_DIR"
 

+ 2 - 2
include/fmm_pts.txx

@@ -23,7 +23,7 @@
 #ifdef __AVX__
 #include <immintrin.h>
 #endif
-#ifdef __INTEL_OFFLOAD
+#if defined(__INTEL_OFFLOAD) || defined(__MIC__)
 #include <immintrin.h>
 #endif
 
@@ -1232,7 +1232,7 @@ void FMM_Pts<FMMNode>::CollectNodeData(std::vector<FMMNode*>& node, std::vector<
 
 template <class FMMNode>
 void FMM_Pts<FMMNode>::SetupPrecomp(SetupData<Real_t>& setup_data, bool device){
-  if(setup_data.precomp_data==NULL) return;
+  if(setup_data.precomp_data==NULL || setup_data.level>MAX_DEPTH) return;
 
   Profile::Tic("SetupPrecomp",&this->comm,true,25);
   { // Build precomp_data

+ 1 - 1
include/fmm_tree.txx

@@ -185,8 +185,8 @@ void FMM_Tree<FMM_Mat_t>::ClearFMMData() {
       #pragma offload target(mic:0)
       {MIC_Lock::wait_lock(wait_lock_idx);}
     }
-    #endif
     MIC_Lock::init();
+    #endif
   }
 
   }Profile::Toc();

+ 8 - 8
include/matrix.txx

@@ -42,7 +42,7 @@ Matrix<T>::Matrix(size_t dim1, size_t dim2, T* data_, bool own_data_){
   if(own_data){
     if(dim[0]*dim[1]>0){
       data_ptr=mem::aligned_malloc<T>(dim[0]*dim[1]);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
       Profile::Add_MEM(dim[0]*dim[1]*sizeof(T));
 #endif
       if(data_!=NULL) mem::memcopy(data_ptr,data_,dim[0]*dim[1]*sizeof(T));
@@ -59,7 +59,7 @@ Matrix<T>::Matrix(const Matrix<T>& M){
   own_data=true;
   if(dim[0]*dim[1]>0){
     data_ptr=mem::aligned_malloc<T>(dim[0]*dim[1]);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
     Profile::Add_MEM(dim[0]*dim[1]*sizeof(T));
 #endif
     mem::memcopy(data_ptr,M.data_ptr,dim[0]*dim[1]*sizeof(T));
@@ -74,7 +74,7 @@ Matrix<T>::~Matrix(){
   if(own_data){
     if(data_ptr!=NULL){
       mem::aligned_free(data_ptr);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
       Profile::Add_MEM(-dim[0]*dim[1]*sizeof(T));
 #endif
     }
@@ -172,7 +172,7 @@ void Matrix<T>::Resize(size_t i, size_t j){
   if(own_data){
     if(data_ptr!=NULL){
       mem::aligned_free(data_ptr);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
       Profile::Add_MEM(-dim[0]*dim[1]*sizeof(T));
 #endif
 
@@ -183,7 +183,7 @@ void Matrix<T>::Resize(size_t i, size_t j){
   if(own_data){
     if(dim[0]*dim[1]>0){
       data_ptr=mem::aligned_malloc<T>(dim[0]*dim[1]);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
       Profile::Add_MEM(dim[0]*dim[1]*sizeof(T));
 #endif
     }else
@@ -204,13 +204,13 @@ Matrix<T>& Matrix<T>::operator=(const Matrix<T>& M){
     if(own_data && dim[0]*dim[1]!=M.dim[0]*M.dim[1]){
       if(data_ptr!=NULL){
         mem::aligned_free(data_ptr); data_ptr=NULL;
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
         Profile::Add_MEM(-dim[0]*dim[1]*sizeof(T));
 #endif
       }
       if(M.dim[0]*M.dim[1]>0){
         data_ptr=mem::aligned_malloc<T>(M.dim[0]*M.dim[1]);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
         Profile::Add_MEM(M.dim[0]*M.dim[1]*sizeof(T));
 #endif
       }
@@ -294,7 +294,7 @@ void Matrix<T>::DGEMM(Matrix<T>& M_r, const Matrix<T>& A, const Matrix<T>& B, T
   assert(A.dim[1]==B.dim[0]);
   assert(M_r.dim[0]==A.dim[0]);
   assert(M_r.dim[1]==B.dim[1]);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
   Profile::Add_FLOP(2*(((long long)A.dim[0])*A.dim[1])*B.dim[1]);
 #endif
   mat::gemm('N','N',B.dim[1],A.dim[0],A.dim[1],

+ 10 - 10
include/vector.txx

@@ -41,7 +41,7 @@ Vector<T>::Vector(size_t dim_, T* data_, bool own_data_){
   if(own_data){
     if(dim>0){
       data_ptr=mem::aligned_malloc<T>(capacity);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
       Profile::Add_MEM(capacity*sizeof(T));
 #endif
       if(data_!=NULL) mem::memcopy(data_ptr,data_,dim*sizeof(T));
@@ -58,7 +58,7 @@ Vector<T>::Vector(const Vector<T>& V){
   own_data=true;
   if(dim>0){
     data_ptr=mem::aligned_malloc<T>(capacity);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
     Profile::Add_MEM(capacity*sizeof(T));
 #endif
     mem::memcopy(data_ptr,V.data_ptr,dim*sizeof(T));
@@ -74,7 +74,7 @@ Vector<T>::Vector(const std::vector<T>& V){
   own_data=true;
   if(dim>0){
     data_ptr=mem::aligned_malloc<T>(capacity);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
     Profile::Add_MEM(capacity*sizeof(T));
 #endif
     mem::memcopy(data_ptr,&V[0],dim*sizeof(T));
@@ -89,7 +89,7 @@ Vector<T>::~Vector(){
   if(own_data){
     if(data_ptr!=NULL){
       mem::aligned_free(data_ptr);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
       Profile::Add_MEM(-capacity*sizeof(T));
 #endif
     }
@@ -188,14 +188,14 @@ void Vector<T>::Resize(size_t dim_,bool fit_size){
   {
     if(data_ptr!=NULL){
       mem::aligned_free(data_ptr); data_ptr=NULL;
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
       Profile::Add_MEM(-capacity*sizeof(T));
 #endif
     }
     capacity=dim_;
     if(capacity>0){
       data_ptr=mem::aligned_malloc<T>(capacity);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
       Profile::Add_MEM(capacity*sizeof(T));
 #endif
     }
@@ -218,14 +218,14 @@ Vector<T>& Vector<T>::operator=(const Vector<T>& V){
     if(own_data && capacity<V.dim){
       if(data_ptr!=NULL){
         mem::aligned_free(data_ptr); data_ptr=NULL;
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
         Profile::Add_MEM(-capacity*sizeof(T));
 #endif
       }
       capacity=V.dim;
       if(capacity>0){
         data_ptr=mem::aligned_malloc<T>(capacity);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
         Profile::Add_MEM(capacity*sizeof(T));
 #endif
       }
@@ -245,14 +245,14 @@ Vector<T>& Vector<T>::operator=(const std::vector<T>& V){
     if(own_data && capacity<V.size()){
       if(data_ptr!=NULL){
         mem::aligned_free(data_ptr); data_ptr=NULL;
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
         Profile::Add_MEM(-capacity*sizeof(T));
 #endif
       }
       capacity=V.size();
       if(capacity>0){
         data_ptr=mem::aligned_malloc<T>(capacity);
-#ifndef __MIC__
+#if !defined(__MIC__) || !defined(__INTEL_OFFLOAD)
         Profile::Add_MEM(capacity*sizeof(T));
 #endif
       }