Dhairya Malhotra 11 år sedan
förälder
incheckning
cf84ce3836
2 ändrade filer med 26 tillägg och 20 borttagningar
  1. 6 2
      include/device_wrapper.txx
  2. 20 18
      include/mat_utils.txx

+ 6 - 2
include/device_wrapper.txx

@@ -23,7 +23,7 @@ namespace DeviceWrapper{
 
   // CUDA functions
   inline uintptr_t alloc_device_cuda(size_t len) {
-    char *dev_ptr;
+    char *dev_ptr=NULL;
     #if defined(PVFMM_HAVE_CUDA)
     cudaError_t error;
     error = cudaMalloc((void**)&dev_ptr, len);
@@ -327,7 +327,7 @@ namespace DeviceWrapper{
 
   inline cublasHandle_t *CUDA_Lock::acquire_handle () {
     if (!cuda_init) init();
-    return &handle; 
+    return &handle;
   }
 
   inline void CUDA_Lock::wait (int idx) {
@@ -336,6 +336,10 @@ namespace DeviceWrapper{
     if (idx < NUM_STREAM) error = cudaStreamSynchronize(stream[idx]);
   }
 
+  cudaStream_t CUDA_Lock::stream[NUM_STREAM];
+  cublasHandle_t CUDA_Lock::handle;
+  bool CUDA_Lock::cuda_init;
+
   #endif
 
 }//end namespace

+ 20 - 18
include/mat_utils.txx

@@ -31,33 +31,35 @@ namespace mat{
       dgemm_(&TransA, &TransB, &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc);
   }
 
+#if defined(PVFMM_HAVE_CUDA)
 // cublasDgemm wrapper
   inline void cublasXgemm(char TransA, char TransB,  int M,  int N,  int K,  double alpha,  double *A,  int lda,  double *B,  int ldb,  double beta, double *C,  int ldc){
-	cublasOperation_t cublasTransA, cublasTransB;
-	cublasStatus_t status;
-	cublasHandle_t *handle;
-	handle = DeviceWrapper::CUDA_Lock::acquire_handle();
-	/* Need exeception handling if (handle) */
-	if (TransA == 'T' || TransA == 't') cublasTransA = CUBLAS_OP_T;
-	else if (TransA == 'N' || TransA == 'n') cublasTransA = CUBLAS_OP_T;
-	if (TransB == 'T' || TransB == 't') cublasTransB = CUBLAS_OP_T;
-	else if (TransB == 'N' || TransB == 'n') cublasTransB = CUBLAS_OP_T;
+    cublasOperation_t cublasTransA, cublasTransB;
+    cublasStatus_t status;
+    cublasHandle_t *handle;
+    handle = CUDA_Lock::acquire_handle();
+    /* Need exeception handling if (handle) */
+    if (TransA == 'T' || TransA == 't') cublasTransA = CUBLAS_OP_T;
+    else if (TransA == 'N' || TransA == 'n') cublasTransA = CUBLAS_OP_T;
+    if (TransB == 'T' || TransB == 't') cublasTransB = CUBLAS_OP_T;
+    else if (TransB == 'N' || TransB == 'n') cublasTransB = CUBLAS_OP_T;
     status = cublasDgemm(*handle, cublasTransA, cublasTransB, M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc);
   }
 
 // cublasDgemm wrapper
   inline void cublasXgemm(char TransA, char TransB,  int M,  int N,  int K,  float alpha,  float *A,  int lda,  float *B,  int ldb,  float beta, float *C,  int ldc){
-	cublasOperation_t cublasTransA, cublasTransB;
-	cublasStatus_t status;
-	cublasHandle_t *handle;
-	handle = DeviceWrapper::CUDA_Lock::acquire_handle();
-	/* Need exeception handling if (handle) */
-	if (TransA == 'T' || TransA == 't') cublasTransA = CUBLAS_OP_T;
-	else if (TransA == 'N' || TransA == 'n') cublasTransA = CUBLAS_OP_T;
-	if (TransB == 'T' || TransB == 't') cublasTransB = CUBLAS_OP_T;
-	else if (TransB == 'N' || TransB == 'n') cublasTransB = CUBLAS_OP_T;
+    cublasOperation_t cublasTransA, cublasTransB;
+    cublasStatus_t status;
+    cublasHandle_t *handle;
+    handle = CUDA_Lock::acquire_handle();
+    /* Need exeception handling if (handle) */
+    if (TransA == 'T' || TransA == 't') cublasTransA = CUBLAS_OP_T;
+    else if (TransA == 'N' || TransA == 'n') cublasTransA = CUBLAS_OP_T;
+    if (TransB == 'T' || TransB == 't') cublasTransB = CUBLAS_OP_T;
+    else if (TransB == 'N' || TransB == 'n') cublasTransB = CUBLAS_OP_T;
     status = cublasSgemm(*handle, cublasTransA, cublasTransB, M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc);
   }
+#endif
 
   inline void svd(char *JOBU, char *JOBVT, int *M, int *N, float *A, int *LDA,
       float *S, float *U, int *LDU, float *VT, int *LDVT, float *WORK, int *LWORK,