Quellcode durchsuchen

temporary fix for Device2HostWait()

Dhairya Malhotra vor 11 Jahren
Ursprung
Commit
240427df6b
3 geänderte Dateien mit 19 neuen und 3 gelöschten Zeilen
  1. 8 3
      include/device_wrapper.txx
  2. 3 0
      include/matrix.hpp
  3. 8 0
      include/matrix.txx

+ 8 - 3
include/device_wrapper.txx

@@ -27,11 +27,12 @@ namespace DeviceWrapper{
 #if defined(PVFMM_HAVE_CUDA)
     cudaError_t error;
     error = cudaHostRegister(dev_handle, len, cudaHostRegisterPortable);
-    if(error != cudaSuccess){
-      std::cout<<len<<"\n";
-    }
+    if (error != cudaSuccess)
+      std::cout<<cudaGetErrorString(error)<< '\n';
     assert(error == cudaSuccess);
     error = cudaMalloc((void**)&dev_ptr, len);
+    if (error != cudaSuccess)
+      std::cout<<cudaGetErrorString(error)<< '\n';
     assert(error == cudaSuccess);
 #endif
     return (uintptr_t)dev_ptr;
@@ -55,6 +56,8 @@ namespace DeviceWrapper{
     cudaError_t error;
     cudaStream_t *stream = CUDA_Lock::acquire_stream(0);
     error = cudaMemcpyAsync(dev_ptr, host_ptr, len, cudaMemcpyHostToDevice, *stream);
+    if (error != cudaSuccess)
+      std::cout<<cudaGetErrorString(error)<< '\n';
     assert(error == cudaSuccess);
     return 0;
     #endif
@@ -65,6 +68,8 @@ namespace DeviceWrapper{
     cudaError_t error;
     cudaStream_t *stream = CUDA_Lock::acquire_stream(0);
     error = cudaMemcpyAsync(host_ptr, dev_ptr, len, cudaMemcpyDeviceToHost, *stream);
+    if (error != cudaSuccess)
+      std::cout<<cudaGetErrorString(error)<< '\n';
     assert(error == cudaSuccess);
     return 0;
     #endif

+ 3 - 0
include/matrix.hpp

@@ -120,6 +120,9 @@ class Matrix{
 
   Device dev;
   Vector<char> dev_sig;
+#if defined(PVFMM_HAVE_CUDA)
+  cudaEvent_t lock;
+#endif
 };
 
 

+ 8 - 0
include/matrix.txx

@@ -129,10 +129,18 @@ typename Matrix<T>::Device& Matrix<T>::AllocDevice(bool copy){
 template <class T>
 void Matrix<T>::Device2Host(T* host_ptr){
   dev.lock_idx=DeviceWrapper::device2host((char*)data_ptr,dev.dev_ptr,(char*)(host_ptr==NULL?data_ptr:host_ptr),dim[0]*dim[1]*sizeof(T));
+#if defined(PVFMM_HAVE_CUDA)
+  //cudaEventCreate(&lock);
+  //cudaEventRecord(lock, 0);
+#endif
 }
 
 template <class T>
 void Matrix<T>::Device2HostWait(){
+#if defined(PVFMM_HAVE_CUDA)
+  //cudaEventSynchronize(lock);
+  //cudaEventDestroy(lock);
+#endif
   DeviceWrapper::wait(dev.lock_idx);
   dev.lock_idx=-1;
 }