device_wrapper.txx 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. /**
  2. * \file device_wrapper.txx
  3. * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
  4. * \date 6-5-2013
  5. * \brief This file contains implementation of DeviceWrapper.
  6. */
  7. #include <omp.h>
  8. #include <cassert>
  9. #include <cstdlib>
  10. namespace pvfmm{
  11. namespace DeviceWrapper{
  12. // MIC functions
  13. #define ALLOC alloc_if(1) free_if(0)
  14. #define FREE alloc_if(0) free_if(1)
  15. #define REUSE alloc_if(0) free_if(0)
  16. inline uintptr_t alloc_device_mic(char* dev_handle, size_t len){
  17. assert(dev_handle!=NULL);
  18. uintptr_t dev_ptr=(uintptr_t)NULL;
  19. #ifdef __INTEL_OFFLOAD
  20. #pragma offload target(mic:0) nocopy( dev_handle: length(len) ALLOC) out(dev_ptr)
  21. #endif
  22. {dev_ptr=(uintptr_t)dev_handle;}
  23. return dev_ptr;
  24. }
  25. inline void free_device_mic(char* dev_handle, uintptr_t dev_ptr){
  26. #ifdef __INTEL_OFFLOAD
  27. #pragma offload target(mic:0) in( dev_handle: length(0) FREE)
  28. {
  29. assert(dev_ptr==(uintptr_t)dev_handle);
  30. }
  31. #endif
  32. }
  33. inline int host2device_mic(char* host_ptr, char* dev_handle, uintptr_t dev_ptr, size_t len){
  34. #ifdef __INTEL_OFFLOAD
  35. int wait_lock_idx=MIC_Lock::curr_lock();
  36. int lock_idx=MIC_Lock::get_lock();
  37. if(dev_handle==host_ptr){
  38. #pragma offload target(mic:0) in( dev_handle : length(len) REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
  39. {
  40. assert(dev_ptr==(uintptr_t)dev_handle);
  41. MIC_Lock::wait_lock(wait_lock_idx);
  42. MIC_Lock::release_lock(lock_idx);
  43. }
  44. }else{
  45. #pragma offload target(mic:0) in(host_ptr [0:len] : into ( dev_handle[0:len]) REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
  46. {
  47. assert(dev_ptr==(uintptr_t)dev_handle);
  48. MIC_Lock::wait_lock(wait_lock_idx);
  49. MIC_Lock::release_lock(lock_idx);
  50. }
  51. }
  52. return lock_idx;
  53. #endif
  54. return -1;
  55. }
  56. inline int device2host_mic(char* dev_handle, uintptr_t dev_ptr, char* host_ptr, size_t len){
  57. #ifdef __INTEL_OFFLOAD
  58. int wait_lock_idx=MIC_Lock::curr_lock();
  59. int lock_idx=MIC_Lock::get_lock();
  60. if(dev_handle==host_ptr){
  61. #pragma offload target(mic:0) out( dev_handle : length(len) REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
  62. {
  63. assert(dev_ptr==(uintptr_t)dev_handle);
  64. MIC_Lock::wait_lock(wait_lock_idx);
  65. MIC_Lock::release_lock(lock_idx);
  66. }
  67. }else{
  68. #pragma offload target(mic:0) out( dev_handle[0:len] : into (host_ptr [0:len]) REUSE ) signal(&MIC_Lock::lock_vec[lock_idx])
  69. {
  70. assert(dev_ptr==(uintptr_t)dev_handle);
  71. MIC_Lock::wait_lock(wait_lock_idx);
  72. MIC_Lock::release_lock(lock_idx);
  73. }
  74. }
  75. return lock_idx;
  76. #endif
  77. return -1;
  78. }
  79. inline void wait_mic(int lock_idx){
  80. #ifdef __INTEL_OFFLOAD
  81. MIC_Lock::wait_lock(lock_idx);
  82. #endif
  83. }
  84. // Wrapper functions
  85. inline uintptr_t alloc_device(char* dev_handle, size_t len){
  86. #ifdef __INTEL_OFFLOAD
  87. return alloc_device_mic(dev_handle,len);
  88. #else
  89. uintptr_t dev_ptr=(uintptr_t)NULL;
  90. {dev_ptr=(uintptr_t)dev_handle;}
  91. return dev_ptr;
  92. #endif
  93. }
  94. inline void free_device(char* dev_handle, uintptr_t dev_ptr){
  95. #ifdef __INTEL_OFFLOAD
  96. free_device_mic(dev_handle,dev_ptr);
  97. #else
  98. ;
  99. #endif
  100. }
  101. template <int SYNC=__DEVICE_SYNC__>
  102. inline int host2device(char* host_ptr, char* dev_handle, uintptr_t dev_ptr, size_t len){
  103. int lock_idx=-1;
  104. #ifdef __INTEL_OFFLOAD
  105. lock_idx=host2device_mic(host_ptr,dev_handle,dev_ptr,len);
  106. if(SYNC){
  107. #pragma offload target(mic:0)
  108. {MIC_Lock::wait_lock(lock_idx);}
  109. }
  110. #else
  111. ;
  112. #endif
  113. return lock_idx;
  114. }
  115. template <int SYNC=__DEVICE_SYNC__>
  116. inline int device2host(char* dev_handle, uintptr_t dev_ptr, char* host_ptr, size_t len){
  117. int lock_idx=-1;
  118. #ifdef __INTEL_OFFLOAD
  119. lock_idx=device2host_mic(dev_handle,dev_ptr, host_ptr, len);
  120. if(SYNC) MIC_Lock::wait_lock(lock_idx);
  121. #else
  122. ;
  123. #endif
  124. return lock_idx;
  125. }
  126. inline void wait(int lock_idx){
  127. #ifdef __INTEL_OFFLOAD
  128. wait_mic(lock_idx);
  129. #else
  130. ;
  131. #endif
  132. }
  133. }
  134. // Implementation of MIC_Lock
  135. #ifdef __MIC__
  136. #define have_mic 1
  137. #else
  138. #define have_mic 0
  139. #endif
  140. inline void MIC_Lock::init(){
  141. #ifdef __INTEL_OFFLOAD
  142. if(have_mic) abort();// Cannot be called from MIC.
  143. lock_idx=0;
  144. lock_vec.Resize(NUM_LOCKS);
  145. lock_vec.SetZero();
  146. lock_vec_=lock_vec.AllocDevice(false);
  147. {for(size_t i=0;i<NUM_LOCKS;i++) lock_vec [i]=1;}
  148. #pragma offload target(mic:0)
  149. {for(size_t i=0;i<NUM_LOCKS;i++) lock_vec_[i]=1;}
  150. #endif
  151. }
  152. inline int MIC_Lock::get_lock(){
  153. #ifdef __INTEL_OFFLOAD
  154. if(have_mic) abort();// Cannot be called from MIC.
  155. int idx;
  156. #pragma omp critical
  157. {
  158. if(lock_idx==NUM_LOCKS-1){
  159. int wait_lock_idx=-1;
  160. wait_lock_idx=MIC_Lock::curr_lock();
  161. MIC_Lock::wait_lock(wait_lock_idx);
  162. #pragma offload target(mic:0)
  163. {MIC_Lock::wait_lock(wait_lock_idx);}
  164. MIC_Lock::init();
  165. }
  166. idx=lock_idx;
  167. lock_idx++;
  168. assert(lock_idx<NUM_LOCKS);
  169. }
  170. return idx;
  171. #else
  172. return -1;
  173. #endif
  174. }
  175. inline int MIC_Lock::curr_lock(){
  176. #ifdef __INTEL_OFFLOAD
  177. if(have_mic) abort();// Cannot be called from MIC.
  178. return lock_idx-1;
  179. #else
  180. return -1;
  181. #endif
  182. }
  183. inline void MIC_Lock::release_lock(int idx){ // Only call from inside an offload section
  184. #ifdef __INTEL_OFFLOAD
  185. #ifdef __MIC__
  186. if(idx>=0) lock_vec_[idx]=0;
  187. #endif
  188. #endif
  189. }
  190. inline void MIC_Lock::wait_lock(int idx){
  191. #ifdef __INTEL_OFFLOAD
  192. #ifdef __MIC__
  193. if(idx>=0) while(lock_vec_[idx]==1){
  194. _mm_delay_32(8192);
  195. }
  196. #else
  197. if(idx<0 || lock_vec[idx]==0) return;
  198. if(lock_vec[idx]==2){
  199. while(lock_vec[idx]==2);
  200. return;
  201. }
  202. lock_vec[idx]=2;
  203. #pragma offload_wait target(mic:0) wait(&lock_vec[idx])
  204. lock_vec[idx]=0;
  205. #endif
  206. #endif
  207. }
  208. }//end namespace