Dhairya Malhotra 7 lat temu
rodzic
commit
8803395086
4 zmienionych plików z 35 dodań i 29 usunięć
  1. 4 4
      Makefile
  2. 7 7
      include/sctl.hpp
  3. 2 2
      include/sctl/math_utils.txx
  4. 22 16
      include/sctl/profile.txx

+ 4 - 4
Makefile

@@ -1,6 +1,6 @@
 
 CXX=mpic++
-CXXFLAGS = -std=c++11 -fopenmp # need C++11 and OpenMP
+CXXFLAGS = -std=c++11 -fopenmp -Wall -Wfloat-conversion # need C++11 and OpenMP
 
 #Optional flags
 CXXFLAGS += -O0 # debug build
@@ -15,7 +15,9 @@ endif
 CXXFLAGS += -DSCTL_MEMDEBUG # Enable memory checks
 CXXFLAGS += -DSCTL_GLOBAL_MEM_BUFF=0 # Global memory buffer size in MB
 
-CXXFLAGS += -DSCTL_QUAD_T=__float128 -Wfloat-conversion # Enable quadruple precision
+CXXFLAGS += -DSCTL_PROFILE=5 -DSCTL_VERBOSE # Enable profiling
+
+CXXFLAGS += -DSCTL_QUAD_T=__float128 # Enable quadruple precision
 
 #CXXFLAGS += -DSCTL_HAVE_MPI #use MPI
 
@@ -27,8 +29,6 @@ CXXFLAGS += -lfftw3 -DSCTL_HAVE_FFTW
 CXXFLAGS += -lfftw3f -DSCTL_HAVE_FFTWF
 CXXFLAGS += -lfftw3l -DSCTL_HAVE_FFTWL
 
-CXXFLAGS += -DSCTL_PROFILE=5 -DSCTL_VERBOSE # Enable profiling
-
 
 RM = rm -f
 MKDIRS = mkdir -p

+ 7 - 7
include/sctl.hpp

@@ -5,9 +5,10 @@
 
 #define SCTL_NAMESPACE sctl
 
-#define SCTL_QUOTEME(x) SCTL_QUOTEME_1(x)
-#define SCTL_QUOTEME_1(x) #x
-#define SCTL_INCLUDE(x) SCTL_QUOTEME(sctl/x)
+// Profiling parameters
+#ifndef SCTL_PROFILE
+#define SCTL_PROFILE -1 // Granularity level
+#endif
 
 // Parameters for memory manager
 #define SCTL_MEM_ALIGN 64
@@ -15,10 +16,9 @@
 #define SCTL_GLOBAL_MEM_BUFF 1024LL * 0LL  // in MB
 #endif
 
-// Profiling parameters
-#ifndef SCTL_PROFILE
-#define SCTL_PROFILE -1 // Granularity level
-#endif
+#define SCTL_QUOTEME(x) SCTL_QUOTEME_1(x)
+#define SCTL_QUOTEME_1(x) #x
+#define SCTL_INCLUDE(x) SCTL_QUOTEME(SCTL_NAMESPACE/x)
 
 // MPI Wrapper
 #include SCTL_INCLUDE(comm.hpp)

+ 2 - 2
include/sctl/math_utils.txx

@@ -298,11 +298,11 @@ template <> inline QuadReal log<QuadReal>(const QuadReal a) { return log_generic
 
 template <> inline QuadReal pow<QuadReal>(const QuadReal b, const QuadReal e) { return pow_generic(b, e); }
 
-inline std::ostream& operator<<(std::ostream& output, const QuadReal q) { ostream_insertion_generic(output, q); }
+inline std::ostream& operator<<(std::ostream& output, const QuadReal q) { return ostream_insertion_generic(output, q); }
 
 }  // end namespace
 
-inline std::ostream& operator<<(std::ostream& output, const SCTL_QUAD_T q) { SCTL_NAMESPACE::ostream_insertion_generic(output, q); }
+inline std::ostream& operator<<(std::ostream& output, const SCTL_QUAD_T q) { return SCTL_NAMESPACE::ostream_insertion_generic(output, q); }
 
 #endif  // SCTL_QUAD_T
 

+ 22 - 16
include/sctl/profile.txx

@@ -9,13 +9,13 @@
 
 namespace SCTL_NAMESPACE {
 
+#if SCTL_PROFILE >= 0
+
 inline Long Profile::Add_FLOP(Long inc) {
   Long& FLOP = ProfData().FLOP;
   Long orig_val = FLOP;
-#if SCTL_PROFILE >= 0
 #pragma omp atomic update
   FLOP += inc;
-#endif
   return orig_val;
 }
 
@@ -23,30 +23,23 @@ inline Long Profile::Add_MEM(Long inc) {
   std::vector<Long>& max_mem = ProfData().max_mem;
   Long& MEM = ProfData().MEM;
   Long orig_val = MEM;
-#if SCTL_PROFILE >= 0
 #pragma omp atomic update
   MEM += inc;
-  for (size_t i = 0; i < max_mem.size(); i++) {
-    if (max_mem[i] < MEM) max_mem[i] = MEM;
-  }
-#endif
+  for (Integer i = max_mem.size() - 1; i >= 0 && max_mem[i] < MEM; i--) max_mem[i] = MEM;
   return orig_val;
 }
 
 inline bool Profile::Enable(bool state) {
   bool& enable_state = ProfData().enable_state;
   bool orig_val = enable_state;
-#if SCTL_PROFILE >= 0
   enable_state = state;
-#endif
   return orig_val;
 }
 
 inline void Profile::Tic(const char* name_, const Comm* comm_, bool sync_, Integer verbose) {
-#if SCTL_PROFILE >= 0
   ProfileData& prof = ProfData();
-  // sync_=true;
   if (!prof.enable_state) return;
+  // sync_=true;
   if (verbose <= SCTL_PROFILE && prof.verb_level.size() == prof.enable_depth) {
     if (comm_ != nullptr && sync_) comm_->Barrier();
 #ifdef SCTL_VERBOSE
@@ -73,11 +66,9 @@ inline void Profile::Tic(const char* name_, const Comm* comm_, bool sync_, Integ
     prof.enable_depth++;
   }
   prof.verb_level.push(verbose);
-#endif
 }
 
 inline void Profile::Toc() {
-#if SCTL_PROFILE >= 0
   ProfileData& prof = ProfData();
   if (!prof.enable_state) return;
   SCTL_ASSERT_MSG(!prof.verb_level.empty(), "Unbalanced extra Toc()");
@@ -116,11 +107,9 @@ inline void Profile::Toc() {
     prof.enable_depth--;
   }
   prof.verb_level.pop();
-#endif
 }
 
 inline void Profile::print(const Comm* comm_) {
-#if SCTL_PROFILE >= 0
   ProfileData& prof = ProfData();
   SCTL_ASSERT_MSG(prof.name.empty(), "Missing balancing Toc()");
 
@@ -275,7 +264,6 @@ inline void Profile::print(const Comm* comm_) {
   if (!rank) std::cout << out_stack.top() << '\n';
 
   reset();
-#endif
 }
 
 inline void Profile::reset() {
@@ -295,4 +283,22 @@ inline void Profile::reset() {
   prof.max_m_log.clear();
 }
 
+#else
+
+inline Long Profile::Add_FLOP(Long inc) { return 0; }
+
+inline Long Profile::Add_MEM(Long inc) { return 0; }
+
+inline bool Profile::Enable(bool state) { return false; }
+
+inline void Profile::Tic(const char* name_, const Comm* comm_, bool sync_, Integer verbose) { }
+
+inline void Profile::Toc() { }
+
+inline void Profile::print(const Comm* comm_) { }
+
+inline void Profile::reset() { }
+
+#endif
+
 }  // end namespace