device_wrapper.hpp 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. /**
  2. * \file device_wrapper.hpp
  3. * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
  4. * \date 6-5-2013
  5. * \brief This file contains definition of DeviceWrapper.
  6. */
  7. #ifndef _PVFMM_DEVICE_WRAPPER_HPP_
  8. #define _PVFMM_DEVICE_WRAPPER_HPP_
  9. #ifdef __INTEL_OFFLOAD
  10. #pragma offload_attribute(push,target(mic))
  11. #endif
  12. // Cuda Header
  13. #if defined(PVFMM_HAVE_CUDA)
  14. #include <cuda_runtime_api.h>
  15. #include <cublas_v2.h>
  16. #endif
  17. #include <cstdlib>
  18. #include <cassert>
  19. #include <stdint.h>
  20. #include <pvfmm_common.hpp>
  21. namespace pvfmm{
  22. namespace DeviceWrapper{
  23. uintptr_t alloc_device(char* dev_handle, size_t len);
  24. void free_device(char* dev_handle, uintptr_t dev_ptr);
  25. int host2device(char* host_ptr, char* dev_handle, uintptr_t dev_ptr, size_t len);
  26. int device2host(char* dev_handle, uintptr_t dev_ptr, char* host_ptr, size_t len);
  27. void wait(int lock_idx);
  28. }//end namespace
  29. /*
  30. Usage of 'MIC_Lock' in Asynchronous Offloads
  31. --------------------------------------------
  32. Note: Any MIC offload section should look like this:
  33. int wait_lock_idx=MIC_Lock::curr_lock();
  34. int lock_idx=MIC_Lock::get_lock();
  35. #pragma offload target(mic:0) signal(&MIC_Lock::lock_vec[lock_idx])
  36. {
  37. MIC_Lock::wait_lock(wait_lock_idx);
  38. // Offload code here...
  39. MIC_Lock::release_lock(lock_idx);
  40. }
  41. #ifdef __MIC_ASYNCH__
  42. MIC_Lock::wait_lock(lock_idx);
  43. #endif
  44. This ensures the execution of offloaded code does not overlap with other
  45. asynchronous offloaded code and that data transfers from host to mic have
  46. completed before the data is accessed. You will however, need to be careful
  47. not to overwrite data on mic which may be transferring to the host, or data on
  48. the host which may be transferring to the mic.
  49. On the host, to wait for the last asynchronous offload section or data
  50. transfer, use:
  51. int wait_lock_idx=MIC_Lock::curr_lock();
  52. MIC_Lock::wait_lock(wait_lock_idx);
  53. */
  54. #define NUM_LOCKS 1000000
  55. class MIC_Lock{
  56. public:
  57. static void init();
  58. static int get_lock();
  59. static void release_lock(int idx);
  60. static void wait_lock(int idx);
  61. static int curr_lock();
  62. static Vector<char> lock_vec;
  63. static Vector<char>::Device lock_vec_;
  64. private:
  65. MIC_Lock(){}; // private constructor for static class.
  66. static int lock_idx;
  67. };
  68. #if defined(PVFMM_HAVE_CUDA)
  69. #define NUM_STREAM 2
  70. class CUDA_Lock {
  71. public:
  72. static void init();
  73. static void terminate();
  74. static cudaStream_t *acquire_stream(int idx);
  75. static cublasHandle_t *acquire_handle();
  76. static void wait(int idx);
  77. private:
  78. CUDA_Lock();
  79. static cudaStream_t stream[NUM_STREAM];
  80. static cublasHandle_t handle;
  81. static bool cuda_init;
  82. };
  83. #endif
  84. }//end namespace
  85. #ifdef __INTEL_OFFLOAD
  86. #pragma offload_attribute(pop)
  87. #endif
  88. #include <device_wrapper.txx>
  89. #endif //_PVFMM_DEVICE_WRAPPER_HPP_