/** * \file device_wrapper.txx * \author Dhairya Malhotra, dhairya.malhotra@gmail.com * \date 6-5-2013 * \brief This file contains implementation of DeviceWrapper. * * Modified: * editor Chenhan D. Yu * date Juan-28-2014 * Add Cuda support. Error handle is available if needed. */ #include #include // CUDA Stream #if defined(PVFMM_HAVE_CUDA) #endif namespace pvfmm{ namespace DeviceWrapper{ // CUDA functions inline uintptr_t alloc_device_cuda(char* dev_handle, size_t len) { char *dev_ptr=NULL; #if defined(PVFMM_HAVE_CUDA) cudaError_t error; error = cudaHostRegister(dev_handle, len, cudaHostRegisterPortable); if (error != cudaSuccess) std::cout<=0) lock_vec_[idx]=0; #endif } inline void MIC_Lock::wait_lock(int idx){ #ifdef __MIC__ if(idx>=0) while(lock_vec_[idx]==1){ _mm_delay_32(8192); } #else if(idx<0 || lock_vec[idx]==0) return; if(lock_vec[idx]==2){ while(lock_vec[idx]==2); return; } lock_vec[idx]=2; #ifdef __INTEL_OFFLOAD #pragma offload_wait target(mic:0) wait(&lock_vec[idx]) #endif lock_vec[idx]=0; #endif } Vector MIC_Lock::lock_vec; Vector::Device MIC_Lock::lock_vec_; int MIC_Lock::lock_idx; // Implementation of Simple CUDA_Lock #if defined(PVFMM_HAVE_CUDA) CUDA_Lock::CUDA_Lock () { cuda_init = false; } inline void CUDA_Lock::init () { cudaError_t error; cublasStatus_t status; if (!cuda_init) { for (int i = 0; i < NUM_STREAM; i++) { error = cudaStreamCreate(&(stream[i])); } status = cublasCreate(&handle); status = cublasSetStream(handle, stream[0]); cuda_init = true; } } inline void CUDA_Lock::terminate () { cudaError_t error; cublasStatus_t status; if (!cuda_init) init(); for (int i = 0; i < NUM_STREAM; i++) { error = cudaStreamDestroy(stream[i]); } status = cublasDestroy(handle); cuda_init = false; } inline cudaStream_t *CUDA_Lock::acquire_stream (int idx) { if (!cuda_init) init(); if (idx < NUM_STREAM) return &(stream[idx]); else return NULL; } inline cublasHandle_t *CUDA_Lock::acquire_handle () { if (!cuda_init) init(); return &handle; } inline void CUDA_Lock::wait (int idx) { cudaError_t error; if (!cuda_init) init(); if (idx < NUM_STREAM) error = cudaStreamSynchronize(stream[idx]); } cudaStream_t CUDA_Lock::stream[NUM_STREAM]; cublasHandle_t CUDA_Lock::handle; bool CUDA_Lock::cuda_init; #endif }//end namespace