mem_mgr.txx 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. /**
  2. * \file mem_mgr.txx
  3. * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
  4. * \date 9-21-2014
  5. * \brief This file contains the definition of a simple memory manager which
  6. * uses a pre-allocated buffer of size defined in call to the constructor.
  7. */
  8. #include <omp.h>
  9. #include <algorithm>
  10. #include <cstring>
  11. #include <cassert>
  12. #include <device_wrapper.hpp>
  13. namespace pvfmm{
  14. namespace mem{
  15. template <class T>
  16. uintptr_t TypeTraits<T>::ID(){
  17. return (uintptr_t)&ID;
  18. }
  19. template <class T>
  20. bool TypeTraits<T>::IsPOD(){
  21. return false;
  22. }
  23. #define PVFMMDefinePOD(type) template<> bool inline TypeTraits<type>::IsPOD(){return true;};
  24. PVFMMDefinePOD(char);
  25. PVFMMDefinePOD(float);
  26. PVFMMDefinePOD(double);
  27. PVFMMDefinePOD(int);
  28. PVFMMDefinePOD(long long);
  29. PVFMMDefinePOD(unsigned long);
  30. PVFMMDefinePOD(char*);
  31. PVFMMDefinePOD(float*);
  32. PVFMMDefinePOD(double*);
  33. #undef PVFMMDefinePOD
  34. MemoryManager::MemHead* MemoryManager::GetMemHead(void* p){
  35. static uintptr_t alignment=MEM_ALIGN-1;
  36. static uintptr_t header_size=(uintptr_t)(sizeof(MemoryManager::MemHead)+alignment) & ~(uintptr_t)alignment;
  37. return (MemHead*)(((char*)p)-header_size);
  38. }
  39. void* MemoryManager::malloc(const size_t& n_elem, const size_t& type_size, const uintptr_t& type_id) const{
  40. if(!n_elem) return NULL;
  41. static uintptr_t alignment=MEM_ALIGN-1;
  42. static uintptr_t header_size=(uintptr_t)(sizeof(MemHead)+alignment) & ~(uintptr_t)alignment;
  43. size_t size=n_elem*type_size+header_size;
  44. size=(uintptr_t)(size+alignment) & ~(uintptr_t)alignment;
  45. char* base=NULL;
  46. omp_set_lock(&omp_lock);
  47. std::multimap<size_t, size_t>::iterator it=free_map.lower_bound(size);
  48. size_t n_indx=(it!=free_map.end()?it->second:0);
  49. if(n_indx){ // Allocate from buff
  50. size_t n_free_indx=(it->first>size?new_node():0);
  51. MemNode& n=node_buff[n_indx-1];
  52. assert(n.size==it->first);
  53. assert(n.it==it);
  54. assert(n.free);
  55. if(n_free_indx){ // Create a node for the remaining free part.
  56. MemNode& n_free=node_buff[n_free_indx-1];
  57. n_free=n;
  58. n_free.size-=size;
  59. n_free.mem_ptr=(char*)n_free.mem_ptr+size;
  60. { // Insert n_free to the link list
  61. n_free.prev=n_indx;
  62. if(n_free.next){
  63. size_t n_next_indx=n_free.next;
  64. MemNode& n_next=node_buff[n_next_indx-1];
  65. n_next.prev=n_free_indx;
  66. }
  67. n.next=n_free_indx;
  68. }
  69. assert(n_free.free); // Insert n_free to free map
  70. n_free.it=free_map.insert(std::make_pair(n_free.size,n_free_indx));
  71. n.size=size; // Update n
  72. }
  73. n.free=false;
  74. free_map.erase(it);
  75. base = n.mem_ptr;
  76. }
  77. omp_unset_lock(&omp_lock);
  78. if(!base){ // Use system malloc
  79. size+=2+alignment;
  80. char* p = (char*)DeviceWrapper::host_malloc(size);
  81. base = (char*)((uintptr_t)(p+2+alignment) & ~(uintptr_t)alignment);
  82. ((uint16_t*)base)[-1] = (uint16_t)(base-p);
  83. }
  84. { // Check out-of-bounds write
  85. #ifndef NDEBUG
  86. if(n_indx){
  87. #pragma omp parallel for
  88. for(size_t i=0;i<size;i++) assert(base[i]==init_mem_val);
  89. }
  90. #endif
  91. }
  92. MemHead* mem_head=(MemHead*)base;
  93. { // Set mem_head
  94. mem_head->n_indx=n_indx;
  95. mem_head->n_elem=n_elem;
  96. mem_head->type_id=type_id;
  97. }
  98. { // Set header check_sum
  99. #ifndef NDEBUG
  100. size_t check_sum=0;
  101. mem_head->check_sum=0;
  102. for(size_t i=0;i<header_size;i++){
  103. check_sum+=base[i];
  104. }
  105. check_sum=check_sum & ((1UL << sizeof(mem_head->check_sum))-1);
  106. mem_head->check_sum=check_sum;
  107. #endif
  108. }
  109. return (void*)(base+header_size);
  110. }
  111. void MemoryManager::free(void* p, const size_t& type_size, const uintptr_t& type_id) const{
  112. if(!p) return;
  113. static uintptr_t alignment=MEM_ALIGN-1;
  114. static uintptr_t header_size=(uintptr_t)(sizeof(MemHead)+alignment) & ~(uintptr_t)alignment;
  115. char* base=(char*)((char*)p-header_size);
  116. MemHead* mem_head=(MemHead*)base;
  117. assert(mem_head->type_id==type_id);
  118. if(base<&buff[0] || base>=&buff[buff_size]){ // Use system free
  119. char* p=(char*)((uintptr_t)base-((uint16_t*)base)[-1]);
  120. return DeviceWrapper::host_free(p);
  121. }
  122. size_t n_indx=mem_head->n_indx;
  123. assert(n_indx>0 && n_indx<=node_buff.size());
  124. { // Verify header check_sum; set array to init_mem_val
  125. #ifndef NDEBUG
  126. { // Verify header check_sum
  127. size_t check_sum=0;
  128. for(size_t i=0;i<header_size;i++){
  129. check_sum+=base[i];
  130. }
  131. check_sum-=mem_head->check_sum;
  132. check_sum=check_sum & ((1UL << sizeof(mem_head->check_sum))-1);
  133. assert(check_sum==mem_head->check_sum);
  134. }
  135. size_t size=mem_head->n_elem*type_size;
  136. #pragma omp parallel for
  137. for(size_t i=0;i<size;i++) ((char*)p)[i]=init_mem_val;
  138. for(size_t i=0;i<sizeof(MemHead);i++) base[i]=init_mem_val;
  139. #endif
  140. }
  141. omp_set_lock(&omp_lock);
  142. MemNode& n=node_buff[n_indx-1];
  143. assert(!n.free && n.size>0 && n.mem_ptr==base);
  144. if(n.prev!=0 && node_buff[n.prev-1].free){
  145. size_t n_prev_indx=n.prev;
  146. MemNode& n_prev=node_buff[n_prev_indx-1];
  147. n.size+=n_prev.size;
  148. n.mem_ptr=n_prev.mem_ptr;
  149. n.prev=n_prev.prev;
  150. free_map.erase(n_prev.it);
  151. delete_node(n_prev_indx);
  152. if(n.prev){
  153. size_t n_prev_indx=n.prev;
  154. MemNode& n_prev=node_buff[n_prev_indx-1];
  155. n_prev.next=n_indx;
  156. }
  157. }
  158. if(n.next!=0 && node_buff[n.next-1].free){
  159. size_t n_next_indx=n.next;
  160. MemNode& n_next=node_buff[n_next_indx-1];
  161. n.size+=n_next.size;
  162. n.next=n_next.next;
  163. free_map.erase(n_next.it);
  164. delete_node(n_next_indx);
  165. if(n.next){
  166. size_t n_next_indx=n.next;
  167. MemNode& n_next=node_buff[n_next_indx-1];
  168. n_next.prev=n_indx;
  169. }
  170. }
  171. n.free=true; // Insert n to free_map
  172. n.it=free_map.insert(std::make_pair(n.size,n_indx));
  173. omp_unset_lock(&omp_lock);
  174. }
  175. size_t MemoryManager::new_node() const{
  176. if(node_stack.empty()){
  177. node_buff.resize(node_buff.size()+1);
  178. node_stack.push(node_buff.size());
  179. }
  180. size_t indx=node_stack.top();
  181. node_stack.pop();
  182. assert(indx);
  183. return indx;
  184. }
  185. void MemoryManager::delete_node(size_t indx) const{
  186. assert(indx);
  187. assert(indx<=node_buff.size());
  188. MemNode& n=node_buff[indx-1];
  189. n.free=false;
  190. n.size=0;
  191. n.prev=0;
  192. n.next=0;
  193. n.mem_ptr=NULL;
  194. node_stack.push(indx);
  195. }
  196. template <class T>
  197. T* aligned_new(size_t n_elem, const MemoryManager* mem_mgr){
  198. if(!n_elem) return NULL;
  199. static MemoryManager def_mem_mgr(0);
  200. if(!mem_mgr) mem_mgr=&def_mem_mgr;
  201. T* A=(T*)mem_mgr->malloc(n_elem, sizeof(T), TypeTraits<T>::ID());
  202. if(!TypeTraits<T>::IsPOD()){ // Call constructors
  203. //printf("%s\n", __PRETTY_FUNCTION__);
  204. #pragma omp parallel for
  205. for(size_t i=0;i<n_elem;i++){
  206. T* Ai=new(A+i) T();
  207. assert(Ai==(A+i));
  208. }
  209. }else{
  210. #ifndef NDEBUG
  211. #pragma omp parallel for
  212. for(size_t i=0;i<n_elem*sizeof(T);i++){
  213. ((char*)A)[i]=0;
  214. }
  215. #endif
  216. }
  217. assert(A);
  218. return A;
  219. }
  220. template <class T>
  221. void aligned_delete(T* A, const MemoryManager* mem_mgr){
  222. if (!A) return;
  223. if(!TypeTraits<T>::IsPOD()){ // Call destructors
  224. //printf("%s\n", __PRETTY_FUNCTION__);
  225. MemoryManager::MemHead* mem_head=MemoryManager::GetMemHead(A);
  226. size_t n_elem=mem_head->n_elem;
  227. for(size_t i=0;i<n_elem;i++){
  228. (A+i)->~T();
  229. }
  230. }else{
  231. #ifndef NDEBUG
  232. MemoryManager::MemHead* mem_head=MemoryManager::GetMemHead(A);
  233. size_t n_elem=mem_head->n_elem;
  234. #pragma omp parallel for
  235. for(size_t i=0;i<n_elem*sizeof(T);i++){
  236. ((char*)A)[i]=0;
  237. }
  238. #endif
  239. }
  240. static MemoryManager def_mem_mgr(0);
  241. if(!mem_mgr) mem_mgr=&def_mem_mgr;
  242. mem_mgr->free(A, sizeof(T), TypeTraits<T>::ID());
  243. }
  244. void* memcopy( void * destination, const void * source, size_t num){
  245. if(destination==source || num==0) return destination;
  246. return memcpy ( destination, source, num );
  247. }
  248. }//end namespace
  249. }//end namespace