profile.txx 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. #include SCTL_INCLUDE(comm.hpp)
  2. #include <omp.h>
  3. #include <iostream>
  4. #include <sstream>
  5. #include <iomanip>
  6. #include <cassert>
  7. #include <cstdlib>
  8. namespace SCTL_NAMESPACE {
  9. inline Long Profile::Add_FLOP(Long inc) {
  10. Long& FLOP = ProfData().FLOP;
  11. Long orig_val = FLOP;
  12. #if SCTL_PROFILE >= 0
  13. #pragma omp atomic update
  14. FLOP += inc;
  15. #endif
  16. return orig_val;
  17. }
  18. inline Long Profile::Add_MEM(Long inc) {
  19. std::vector<Long>& max_mem = ProfData().max_mem;
  20. Long& MEM = ProfData().MEM;
  21. Long orig_val = MEM;
  22. #if SCTL_PROFILE >= 0
  23. #pragma omp atomic update
  24. MEM += inc;
  25. for (size_t i = 0; i < max_mem.size(); i++) {
  26. if (max_mem[i] < MEM) max_mem[i] = MEM;
  27. }
  28. #endif
  29. return orig_val;
  30. }
  31. inline bool Profile::Enable(bool state) {
  32. bool& enable_state = ProfData().enable_state;
  33. bool orig_val = enable_state;
  34. #if SCTL_PROFILE >= 0
  35. enable_state = state;
  36. #endif
  37. return orig_val;
  38. }
  39. inline void Profile::Tic(const char* name_, const Comm* comm_, bool sync_, Integer verbose) {
  40. #if SCTL_PROFILE >= 0
  41. ProfileData& prof = ProfData();
  42. // sync_=true;
  43. if (!prof.enable_state) return;
  44. if (verbose <= SCTL_PROFILE && prof.verb_level.size() == prof.enable_depth) {
  45. if (comm_ != nullptr && sync_) comm_->Barrier();
  46. #ifdef SCTL_VERBOSE
  47. Integer rank = 0;
  48. if (comm_ != nullptr) rank = comm_->Rank();
  49. if (!rank) {
  50. for (size_t i = 0; i < prof.name.size(); i++) std::cout << " ";
  51. std::cout << "\033[1;31m" << name_ << "\033[0m {\n";
  52. }
  53. #endif
  54. prof.name.push(name_);
  55. prof.comm.push(comm_);
  56. prof.sync.push(sync_);
  57. prof.max_mem.push_back(prof.MEM);
  58. prof.e_log.push_back(true);
  59. prof.s_log.push_back(sync_);
  60. prof.n_log.push_back(prof.name.top());
  61. prof.t_log.push_back(omp_get_wtime());
  62. prof.f_log.push_back(prof.FLOP);
  63. prof.m_log.push_back(prof.MEM);
  64. prof.max_m_log.push_back(prof.MEM);
  65. prof.enable_depth++;
  66. }
  67. prof.verb_level.push(verbose);
  68. #endif
  69. }
  70. inline void Profile::Toc() {
  71. #if SCTL_PROFILE >= 0
  72. ProfileData& prof = ProfData();
  73. if (!prof.enable_state) return;
  74. SCTL_ASSERT_MSG(!prof.verb_level.empty(), "Unbalanced extra Toc()");
  75. if (prof.verb_level.top() <= SCTL_PROFILE && prof.verb_level.size() == prof.enable_depth) {
  76. SCTL_ASSERT_MSG(!prof.name.empty() && !prof.comm.empty() && !prof.sync.empty() && !prof.max_mem.empty(), "Unbalanced extra Toc()");
  77. std::string name_ = prof.name.top();
  78. const Comm* comm_ = prof.comm.top();
  79. bool sync_ = prof.sync.top();
  80. // sync_=true;
  81. prof.e_log.push_back(false);
  82. prof.s_log.push_back(sync_);
  83. prof.n_log.push_back(name_);
  84. prof.t_log.push_back(omp_get_wtime());
  85. prof.f_log.push_back(prof.FLOP);
  86. prof.m_log.push_back(prof.MEM);
  87. prof.max_m_log.push_back(prof.max_mem.back());
  88. #ifndef NDEBUG
  89. if (comm_ != nullptr && sync_) comm_->Barrier();
  90. #endif
  91. prof.name.pop();
  92. prof.comm.pop();
  93. prof.sync.pop();
  94. prof.max_mem.pop_back();
  95. #ifdef SCTL_VERBOSE
  96. Integer rank = 0;
  97. if (comm_ != nullptr) rank = comm_->Rank();
  98. if (!rank) {
  99. for (size_t i = 0; i < prof.name.size(); i++) std::cout << " ";
  100. std::cout << "}\n";
  101. }
  102. #endif
  103. prof.enable_depth--;
  104. }
  105. prof.verb_level.pop();
  106. #endif
  107. }
  108. inline void Profile::print(const Comm* comm_) {
  109. #if SCTL_PROFILE >= 0
  110. ProfileData& prof = ProfData();
  111. SCTL_ASSERT_MSG(prof.name.empty(), "Missing balancing Toc()");
  112. Comm c_self = Comm::Self();
  113. if (comm_ == nullptr) comm_ = &c_self;
  114. comm_->Barrier();
  115. Integer np, rank;
  116. np = comm_->Size();
  117. rank = comm_->Rank();
  118. std::stack<double> tt;
  119. std::stack<Long> ff;
  120. std::stack<Long> mm;
  121. Integer width = 10;
  122. size_t level = 0;
  123. if (!rank && prof.e_log.size() > 0) {
  124. std::cout << "\n" << std::setw(width * 3 - 2 * level) << " ";
  125. if (np == 1) {
  126. std::cout << " " << std::setw(width) << "t";
  127. std::cout << " " << std::setw(width) << "f";
  128. std::cout << " " << std::setw(width) << "f/s";
  129. } else {
  130. std::cout << " " << std::setw(width) << "t_min";
  131. std::cout << " " << std::setw(width) << "t_avg";
  132. std::cout << " " << std::setw(width) << "t_max";
  133. std::cout << " " << std::setw(width) << "f_min";
  134. std::cout << " " << std::setw(width) << "f_avg";
  135. std::cout << " " << std::setw(width) << "f_max";
  136. std::cout << " " << std::setw(width) << "f/s_min";
  137. std::cout << " " << std::setw(width) << "f/s_max";
  138. std::cout << " " << std::setw(width) << "f/s_total";
  139. }
  140. std::cout << " " << std::setw(width) << "m_init";
  141. std::cout << " " << std::setw(width) << "m_max";
  142. std::cout << " " << std::setw(width) << "m_final" << '\n';
  143. }
  144. std::stack<std::string> out_stack;
  145. std::string s;
  146. out_stack.push(s);
  147. for (size_t i = 0; i < prof.e_log.size(); i++) {
  148. if (prof.e_log[i]) {
  149. level++;
  150. tt.push(prof.t_log[i]);
  151. ff.push(prof.f_log[i]);
  152. mm.push(prof.m_log[i]);
  153. std::string ss;
  154. out_stack.push(ss);
  155. } else {
  156. double t0 = prof.t_log[i] - tt.top();
  157. tt.pop();
  158. double f0 = (double)(prof.f_log[i] - ff.top()) * 1e-9;
  159. ff.pop();
  160. double fs0 = f0 / t0;
  161. double t_max, t_min, t_sum, t_avg;
  162. double f_max, f_min, f_sum, f_avg;
  163. double fs_max, fs_min, fs_sum; //, fs_avg;
  164. double m_init, m_max, m_final;
  165. comm_->Allreduce(Ptr2ConstItr<double>(&t0, 1), Ptr2Itr<double>(&t_max, 1), 1, Comm::CommOp::MAX);
  166. comm_->Allreduce(Ptr2ConstItr<double>(&f0, 1), Ptr2Itr<double>(&f_max, 1), 1, Comm::CommOp::MAX);
  167. comm_->Allreduce(Ptr2ConstItr<double>(&fs0, 1), Ptr2Itr<double>(&fs_max, 1), 1, Comm::CommOp::MAX);
  168. comm_->Allreduce(Ptr2ConstItr<double>(&t0, 1), Ptr2Itr<double>(&t_min, 1), 1, Comm::CommOp::MIN);
  169. comm_->Allreduce(Ptr2ConstItr<double>(&f0, 1), Ptr2Itr<double>(&f_min, 1), 1, Comm::CommOp::MIN);
  170. comm_->Allreduce(Ptr2ConstItr<double>(&fs0, 1), Ptr2Itr<double>(&fs_min, 1), 1, Comm::CommOp::MIN);
  171. comm_->Allreduce(Ptr2ConstItr<double>(&t0, 1), Ptr2Itr<double>(&t_sum, 1), 1, Comm::CommOp::SUM);
  172. comm_->Allreduce(Ptr2ConstItr<double>(&f0, 1), Ptr2Itr<double>(&f_sum, 1), 1, Comm::CommOp::SUM);
  173. m_final = (double)prof.m_log[i] * 1e-9;
  174. m_init = (double)mm.top() * 1e-9;
  175. mm.pop();
  176. m_max = (double)prof.max_m_log[i] * 1e-9;
  177. t_avg = t_sum / np;
  178. f_avg = f_sum / np;
  179. // fs_avg=f_avg/t_max;
  180. fs_sum = f_sum / t_max;
  181. if (!rank) {
  182. std::string s0 = out_stack.top();
  183. out_stack.pop();
  184. std::string s1 = out_stack.top();
  185. out_stack.pop();
  186. std::stringstream ss(std::stringstream::in | std::stringstream::out);
  187. ss << setiosflags(std::ios::fixed) << std::setprecision(4) << std::setiosflags(std::ios::left);
  188. for (size_t j = 0; j < level - 1; j++) {
  189. size_t l = i + 1;
  190. size_t k = level - 1;
  191. while (k > j && l < prof.e_log.size()) {
  192. k += (prof.e_log[l] ? 1 : -1);
  193. l++;
  194. }
  195. if (l < prof.e_log.size() ? prof.e_log[l] : false)
  196. ss << "| ";
  197. else
  198. ss << " ";
  199. }
  200. ss << "+-";
  201. ss << std::setw(width * 3 - 2 * level) << prof.n_log[i];
  202. ss << std::setiosflags(std::ios::right);
  203. if (np == 1) {
  204. ss << " " << std::setw(width) << t_avg;
  205. ss << " " << std::setw(width) << f_avg;
  206. ss << " " << std::setw(width) << fs_sum;
  207. } else {
  208. ss << " " << std::setw(width) << t_min;
  209. ss << " " << std::setw(width) << t_avg;
  210. ss << " " << std::setw(width) << t_max;
  211. ss << " " << std::setw(width) << f_min;
  212. ss << " " << std::setw(width) << f_avg;
  213. ss << " " << std::setw(width) << f_max;
  214. ss << " " << std::setw(width) << fs_min;
  215. // ss<<" "<<std::setw(width)<<fs_avg;
  216. ss << " " << std::setw(width) << fs_max;
  217. ss << " " << std::setw(width) << fs_sum;
  218. }
  219. ss << " " << std::setw(width) << m_init;
  220. ss << " " << std::setw(width) << m_max;
  221. ss << " " << std::setw(width) << m_final << '\n';
  222. s1 += ss.str() + s0;
  223. if (!s0.empty() && (i + 1 < prof.e_log.size() ? prof.e_log[i + 1] : false)) {
  224. for (size_t j = 0; j < level; j++) {
  225. size_t l = i + 1;
  226. size_t k = level - 1;
  227. while (k > j && l < prof.e_log.size()) {
  228. k += (prof.e_log[l] ? 1 : -1);
  229. l++;
  230. }
  231. if (l < prof.e_log.size() ? prof.e_log[l] : false)
  232. s1 += "| ";
  233. else
  234. s1 += " ";
  235. }
  236. s1 += "\n";
  237. } // */
  238. out_stack.push(s1);
  239. }
  240. level--;
  241. }
  242. }
  243. if (!rank) std::cout << out_stack.top() << '\n';
  244. reset();
  245. #endif
  246. }
  247. inline void Profile::reset() {
  248. ProfileData& prof = ProfData();
  249. prof.MEM = 0;
  250. prof.FLOP = 0;
  251. while (!prof.sync.empty()) prof.sync.pop();
  252. while (!prof.name.empty()) prof.name.pop();
  253. while (!prof.comm.empty()) prof.comm.pop();
  254. prof.e_log.clear();
  255. prof.s_log.clear();
  256. prof.n_log.clear();
  257. prof.t_log.clear();
  258. prof.f_log.clear();
  259. prof.m_log.clear();
  260. prof.max_m_log.clear();
  261. }
  262. } // end namespace