profile.cpp 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. /**
  2. * \file profile.cpp
  3. * \author Dhairya Malhotra, dhairya.malhotra@gmail.com
  4. * \date 2-11-2011
  5. * \brief This file contains implementation of the class Profile.
  6. */
  7. #include <mpi.h>
  8. #include <omp.h>
  9. #include <iostream>
  10. #include <sstream>
  11. #include <iomanip>
  12. #include <cassert>
  13. #include <cstdlib>
  14. #include <string>
  15. #include <vector>
  16. #include <stack>
  17. #include <pvfmm_common.hpp>
  18. #include <profile.hpp>
  19. namespace pvfmm{
  20. long long Profile::Add_FLOP(long long inc){
  21. long long orig_val=FLOP;
  22. #if __PROFILE__ >= 0
  23. #pragma omp atomic update
  24. FLOP+=inc;
  25. #endif
  26. return orig_val;
  27. }
  28. long long Profile::Add_MEM(long long inc){
  29. long long orig_val=MEM;
  30. #if __PROFILE__ >= 0
  31. #pragma omp atomic update
  32. MEM+=inc;
  33. for(size_t i=0;i<max_mem.size();i++){
  34. if(max_mem[i]<MEM) max_mem[i]=MEM;
  35. }
  36. #endif
  37. return orig_val;
  38. }
  39. bool Profile::Enable(bool state){
  40. bool orig_val=enable_state;
  41. #if __PROFILE__ >= 0
  42. enable_state=state;
  43. #endif
  44. return orig_val;
  45. }
  46. void Profile::Tic(const char* name_, const MPI_Comm* comm_,bool sync_, int verbose){
  47. #if __PROFILE__ >= 0
  48. //sync_=true;
  49. if(!enable_state) return;
  50. if(verbose<=__PROFILE__ && verb_level.size()==enable_depth){
  51. if(comm_!=NULL && sync_) MPI_Barrier(*comm_);
  52. #ifdef __VERBOSE__
  53. int rank=0;
  54. if(comm_!=NULL) MPI_Comm_rank(*comm_,&rank);
  55. if(!rank){
  56. for(size_t i=0;i<name.size();i++) std::cout<<" ";
  57. std::cout << "\033[1;31m"<<name_<<"\033[0m {\n";
  58. }
  59. #endif
  60. name.push(name_);
  61. comm.push((MPI_Comm*)comm_);
  62. sync.push(sync_);
  63. max_mem.push_back(MEM);
  64. e_log.push_back(true);
  65. s_log.push_back(sync_);
  66. n_log.push_back(name.top());
  67. t_log.push_back(omp_get_wtime());
  68. f_log.push_back(FLOP);
  69. m_log.push_back(MEM);
  70. max_m_log.push_back(MEM);
  71. enable_depth++;
  72. }
  73. verb_level.push(verbose);
  74. #endif
  75. }
  76. void Profile::Toc(){
  77. #if __PROFILE__ >= 0
  78. if(!enable_state) return;
  79. ASSERT_WITH_MSG(!verb_level.empty(),"Unbalanced extra Toc()");
  80. if(verb_level.top()<=__PROFILE__ && verb_level.size()==enable_depth){
  81. ASSERT_WITH_MSG(!name.empty() && !comm.empty() && !sync.empty() && !max_mem.empty(),"Unbalanced extra Toc()");
  82. std::string name_=name.top();
  83. MPI_Comm* comm_=comm.top();
  84. bool sync_=sync.top();
  85. //sync_=true;
  86. e_log.push_back(false);
  87. s_log.push_back(sync_);
  88. n_log.push_back(name_);
  89. t_log.push_back(omp_get_wtime());
  90. f_log.push_back(FLOP);
  91. m_log.push_back(MEM);
  92. max_m_log.push_back(max_mem.back());
  93. #ifndef NDEBUG
  94. if(comm_!=NULL && sync_) MPI_Barrier(*comm_);
  95. #endif
  96. name.pop();
  97. comm.pop();
  98. sync.pop();
  99. max_mem.pop_back();
  100. #ifdef __VERBOSE__
  101. int rank=0;
  102. if(comm_!=NULL) MPI_Comm_rank(*comm_,&rank);
  103. if(!rank){
  104. for(size_t i=0;i<name.size();i++) std::cout<<" ";
  105. std::cout<<"}\n";
  106. }
  107. #endif
  108. enable_depth--;
  109. }
  110. verb_level.pop();
  111. #endif
  112. }
  113. void Profile::print(const MPI_Comm* comm_){
  114. #if __PROFILE__ >= 0
  115. ASSERT_WITH_MSG(name.empty(),"Missing balancing Toc()");
  116. int np, rank;
  117. MPI_Comm c_self=MPI_COMM_SELF;
  118. if(comm_==NULL) comm_=&c_self;
  119. MPI_Barrier(*comm_);
  120. MPI_Comm_size(*comm_,&np);
  121. MPI_Comm_rank(*comm_,&rank);
  122. std::stack<double> tt;
  123. std::stack<long long> ff;
  124. std::stack<long long> mm;
  125. int width=10;
  126. size_t level=0;
  127. if(!rank && e_log.size()>0){
  128. std::cout<<"\n"<<std::setw(width*3-2*level)<<" ";
  129. std::cout<<" "<<std::setw(width)<<"t_min";
  130. std::cout<<" "<<std::setw(width)<<"t_avg";
  131. std::cout<<" "<<std::setw(width)<<"t_max";
  132. std::cout<<" "<<std::setw(width)<<"f_min";
  133. std::cout<<" "<<std::setw(width)<<"f_avg";
  134. std::cout<<" "<<std::setw(width)<<"f_max";
  135. std::cout<<" "<<std::setw(width)<<"f/s_min";
  136. std::cout<<" "<<std::setw(width)<<"f/s_max";
  137. std::cout<<" "<<std::setw(width)<<"f/s_total";
  138. std::cout<<" "<<std::setw(width)<<"m_init";
  139. std::cout<<" "<<std::setw(width)<<"m_max";
  140. std::cout<<" "<<std::setw(width)<<"m_final"<<'\n';
  141. }
  142. std::stack<std::string> out_stack;
  143. std::string s;
  144. out_stack.push(s);
  145. for(size_t i=0;i<e_log.size();i++){
  146. if(e_log[i]){
  147. level++;
  148. tt.push(t_log[i]);
  149. ff.push(f_log[i]);
  150. mm.push(m_log[i]);
  151. std::string ss;
  152. out_stack.push(ss);
  153. }else{
  154. double t0=t_log[i]-tt.top();tt.pop();
  155. double f0=(double)(f_log[i]-ff.top())*1e-9;ff.pop();
  156. double fs0=f0/t0;
  157. double t_max, t_min, t_sum, t_avg;
  158. double f_max, f_min, f_sum, f_avg;
  159. double fs_max, fs_min, fs_sum;//, fs_avg;
  160. double m_init, m_max, m_final;
  161. MPI_Reduce(&t0, &t_max, 1, MPI_DOUBLE, MPI_MAX, 0, *comm_);
  162. MPI_Reduce(&f0, &f_max, 1, MPI_DOUBLE, MPI_MAX, 0, *comm_);
  163. MPI_Reduce(&fs0, &fs_max, 1, MPI_DOUBLE, MPI_MAX, 0, *comm_);
  164. MPI_Reduce(&t0, &t_min, 1, MPI_DOUBLE, MPI_MIN, 0, *comm_);
  165. MPI_Reduce(&f0, &f_min, 1, MPI_DOUBLE, MPI_MIN, 0, *comm_);
  166. MPI_Reduce(&fs0, &fs_min, 1, MPI_DOUBLE, MPI_MIN, 0, *comm_);
  167. MPI_Reduce(&t0, &t_sum, 1, MPI_DOUBLE, MPI_SUM, 0, *comm_);
  168. MPI_Reduce(&f0, &f_sum, 1, MPI_DOUBLE, MPI_SUM, 0, *comm_);
  169. m_final=(double)m_log[i]*1e-9;
  170. m_init =(double)mm.top()*1e-9; mm.pop();
  171. m_max =(double)max_m_log[i]*1e-9;
  172. t_avg=t_sum/np;
  173. f_avg=f_sum/np;
  174. //fs_avg=f_avg/t_max;
  175. fs_sum=f_sum/t_max;
  176. if(!rank){
  177. std::string s0=out_stack.top();out_stack.pop();
  178. std::string s1=out_stack.top();out_stack.pop();
  179. std::stringstream ss(std::stringstream::in | std::stringstream::out);
  180. ss<<setiosflags(std::ios::fixed)<<std::setprecision(4)<<std::setiosflags(std::ios::left);
  181. for(size_t j=0;j<level-1;j++){
  182. size_t l=i+1;
  183. size_t k=level-1;
  184. while(k>j && l<e_log.size()){
  185. k+=(e_log[l]?1:-1);
  186. l++;
  187. }
  188. if(l<e_log.size()?e_log[l]:false)
  189. ss<<"| ";
  190. else
  191. ss<<" ";
  192. }
  193. ss<<"+-";
  194. ss<<std::setw(width*3-2*level)<<n_log[i];
  195. ss<<std::setiosflags(std::ios::right);
  196. ss<<" "<<std::setw(width)<<t_min;
  197. ss<<" "<<std::setw(width)<<t_avg;
  198. ss<<" "<<std::setw(width)<<t_max;
  199. ss<<" "<<std::setw(width)<<f_min;
  200. ss<<" "<<std::setw(width)<<f_avg;
  201. ss<<" "<<std::setw(width)<<f_max;
  202. ss<<" "<<std::setw(width)<<fs_min;
  203. //ss<<" "<<std::setw(width)<<fs_avg;
  204. ss<<" "<<std::setw(width)<<fs_max;
  205. ss<<" "<<std::setw(width)<<fs_sum;
  206. ss<<" "<<std::setw(width)<<m_init;
  207. ss<<" "<<std::setw(width)<<m_max;
  208. ss<<" "<<std::setw(width)<<m_final<<'\n';
  209. s1+=ss.str()+s0;
  210. if(!s0.empty() && (i+1<e_log.size()?e_log[i+1]:false)){
  211. for(size_t j=0;j<level;j++){
  212. size_t l=i+1;
  213. size_t k=level-1;
  214. while(k>j && l<e_log.size()){
  215. k+=(e_log[l]?1:-1);
  216. l++;
  217. }
  218. if(l<e_log.size()?e_log[l]:false) s1+="| ";
  219. else s1+=" ";
  220. }
  221. s1+="\n";
  222. }// */
  223. out_stack.push(s1);
  224. }
  225. level--;
  226. }
  227. }
  228. if(!rank)
  229. std::cout<<out_stack.top()<<'\n';
  230. reset();
  231. #endif
  232. }
  233. void Profile::reset(){
  234. FLOP=0;
  235. while(!sync.empty())sync.pop();
  236. while(!name.empty())name.pop();
  237. while(!comm.empty())comm.pop();
  238. e_log.clear();
  239. s_log.clear();
  240. n_log.clear();
  241. t_log.clear();
  242. f_log.clear();
  243. m_log.clear();
  244. max_m_log.clear();
  245. }
  246. long long Profile::FLOP=0;
  247. long long Profile::MEM=0;
  248. bool Profile::enable_state=false;
  249. std::stack<bool> Profile::sync;
  250. std::stack<std::string> Profile::name;
  251. std::stack<MPI_Comm*> Profile::comm;
  252. std::vector<long long> Profile::max_mem;
  253. unsigned int Profile::enable_depth=0;
  254. std::stack<int> Profile::verb_level;
  255. std::vector<bool> Profile::e_log;
  256. std::vector<bool> Profile::s_log;
  257. std::vector<std::string> Profile::n_log;
  258. std::vector<double> Profile::t_log;
  259. std::vector<long long> Profile::f_log;
  260. std::vector<long long> Profile::m_log;
  261. std::vector<long long> Profile::max_m_log;
  262. }//end namespace