Dhairya Malhotra 11 年之前
父节点
当前提交
43490daacd
共有 2 个文件被更改,包括 5 次插入5 次删除
  1. 1 1
      include/device_wrapper.txx
  2. 4 4
      include/matrix.txx

+ 1 - 1
include/device_wrapper.txx

@@ -212,7 +212,7 @@ namespace DeviceWrapper{
     #ifdef __INTEL_OFFLOAD
     wait_mic(lock_idx);
     #elif defined(PVFMM_HAVE_CUDA)
-    CUDA_Lock::wait(0);
+    //CUDA_Lock::wait(0);
     #else
     ;
     #endif

+ 4 - 4
include/matrix.txx

@@ -130,16 +130,16 @@ template <class T>
 void Matrix<T>::Device2Host(T* host_ptr){
   dev.lock_idx=DeviceWrapper::device2host((char*)data_ptr,dev.dev_ptr,(char*)(host_ptr==NULL?data_ptr:host_ptr),dim[0]*dim[1]*sizeof(T));
 #if defined(PVFMM_HAVE_CUDA)
-  //cudaEventCreate(&lock);
-  //cudaEventRecord(lock, 0);
+  cudaEventCreate(&lock);
+  cudaEventRecord(lock, 0);
 #endif
 }
 
 template <class T>
 void Matrix<T>::Device2HostWait(){
 #if defined(PVFMM_HAVE_CUDA)
-  //cudaEventSynchronize(lock);
-  //cudaEventDestroy(lock);
+  cudaEventSynchronize(lock);
+  cudaEventDestroy(lock);
 #endif
   DeviceWrapper::wait(dev.lock_idx);
   dev.lock_idx=-1;