Parcourir la source

Add support for SVML

Dhairya Malhotra il y a 5 ans
Parent
commit
6125dd22e7
2 fichiers modifiés avec 11 ajouts et 4 suppressions
  1. 2 0
      Makefile
  2. 9 4
      include/sctl/vec.hpp

+ 2 - 0
Makefile

@@ -6,6 +6,8 @@ FFLAGS = -O3 -fopenmp -march=native -Wall
 CXX = g++
 CXXFLAGS = -O3 -fopenmp -march=native -Wall -std=c++11 # need C++11
 
+#CXXFLAGS += -DSCTL_HAVE_SVML -mkl  # enable SVML with FC=ifort and CXX=icpc
+
 #CXXFLAGS += -DSCTL_MEMDEBUG # Enable memory checks
 CXXFLAGS += -DSCTL_PROFILE=5 -DSCTL_VERBOSE # Enable profiling
 

+ 9 - 4
include/sctl/vec.hpp

@@ -576,12 +576,12 @@ namespace SCTL_NAMESPACE {
       //                                                    )
       //                                  ); // int_e2 = int_one + (int_x_ << SigBits);
       IntegerVec int_e2 = IntegerVec(int_one) + (int_x_ << SigBits);
-      e2 = RealVec::LoadAligned((double*)&int_e2);
 
       // Handle underflow
-      static constexpr Integer max_exp = (2UL<<((sizeof(Real)*8-SigBits-3)));
-      auto underflow_cond = (int_x_ > IntegerVec(-max_exp));
-      e2 &= RealVec::LoadAligned((const RealType*)&underflow_cond);
+      static constexpr IntegerType max_exp = -(1UL<<((sizeof(Real)*8-SigBits-2)));
+      int_e2 &= (int_x_ > IntegerVec(max_exp));
+
+      e2 = RealVec::LoadAligned((RealType*)&int_e2);
     }
 
     expx = e1 * e2;
@@ -783,6 +783,7 @@ namespace SCTL_NAMESPACE {
 
       template <class Vec> friend Vec RoundReal2Real(const Vec& x);
       template <class Vec> friend void sincos_intrin(Vec& sinx, Vec& cosx, const Vec& x);
+      template <class Vec> friend void exp_intrin(Vec& expx, const Vec& x);
 
     private:
 
@@ -800,6 +801,10 @@ namespace SCTL_NAMESPACE {
     sinx.v = _mm256_sin_pd(x.v);
     cosx.v = _mm256_cos_pd(x.v);
   }
+
+  template <> inline void exp_intrin(Vec<double,4>& expx, const Vec<double,4>& x) {
+    expx.v = _mm256_exp_pd(x.v);
+  }
   #endif
 
 #endif