Explorar el Código

fix bug due to implicit conversion from __mask8 to Vec

Dhairya Malhotra hace 5 años
padre
commit
f9fce8682b
Se han modificado 3 ficheros con 17 adiciones y 9 borrados
  1. 2 2
      Makefile
  2. 7 5
      include/sctl/vec.hpp
  3. 8 2
      src/libkernels.cpp

+ 2 - 2
Makefile

@@ -1,10 +1,10 @@
 AR = ar cru
 
 FC = gfortran
-FFLAGS = -O3 -fopenmp -march=native -Wall
+FFLAGS = -Ofast -fopenmp -march=native -Wall
 
 CXX = g++
-CXXFLAGS = -O3 -fopenmp -march=native -Wall -std=c++11 # need C++11
+CXXFLAGS = -Ofast -fopenmp -march=native -Wall -std=c++11 # need C++11
 
 #CXXFLAGS += -DSCTL_HAVE_SVML -mkl  # enable SVML with FC=ifort and CXX=icpc
 

+ 7 - 5
include/sctl/vec.hpp

@@ -478,8 +478,8 @@ namespace SCTL_NAMESPACE {
       Real real_two;
     };
     Vec x_offset(real_zero);
-    Vec xAnd1 = (((x_+x_offset) & Vec(real_one)) == x_offset);
-    Vec xAnd2 = (((x_+x_offset) & Vec(real_two)) == x_offset);
+    auto xAnd1 = (((x_+x_offset) & Vec(real_one)) == x_offset);
+    auto xAnd2 = (((x_+x_offset) & Vec(real_two)) == x_offset);
 
     Vec s2 = AndNot( c1,xAnd1) | (s1 & xAnd1);
     Vec c2 = AndNot(-s1,xAnd1) | (c1 & xAnd1);
@@ -853,9 +853,7 @@ namespace SCTL_NAMESPACE {
         v = _mm512_set1_pd(a);
       }
 
-      //Vec(const __mmask8& a) {
-      //  v = _mm512_castsi512_pd(_mm512_movm_epi64(a));
-      //}
+      Vec(const __mmask8& a) = delete; // disallow implicit conversions
 
       void Store(ValueType* p) const {
         _mm512_storeu_pd(p, v);
@@ -969,6 +967,10 @@ namespace SCTL_NAMESPACE {
         lhs.v = _mm512_maskz_mov_pd(rhs, lhs.v);
         return lhs;
       }
+      friend Vec AndNot(Vec lhs, const __mmask8& rhs) {
+        lhs.v = _mm512_mask_mov_pd(lhs.v, rhs, _mm512_setzero_pd());
+        return lhs;
+      }
 
       // Assignment operators
       Vec& operator*=(const Vec& rhs) {

+ 8 - 2
src/libkernels.cpp

@@ -1,5 +1,11 @@
 #include <kernels.h>
 #include <template-kernels.hpp>
+#define VECDIM 4
+
+#ifdef __AVX512F__
+#undef VECDIM
+#define VECDIM 8
+#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -10,7 +16,7 @@ void helm3d_f_(const int32_t* nd, const float* zk, const float* sources, const f
 }
 
 void helm3d_vec_f_(const int32_t* nd, const float* zk, const float* sources, const float* charge, const int32_t* ns, const float* ztarg, const int32_t* nt, float* pot, const float* thresh) {
-  helm3d_vec<float,4>(nd, zk, sources, charge, ns, ztarg, nt, pot, thresh);
+  helm3d_vec<float,VECDIM>(nd, zk, sources, charge, ns, ztarg, nt, pot, thresh);
 }
 
 void helm3d_d_(const int32_t* nd, const double* zk, const double* sources, const double* charge, const int32_t* ns, const double* ztarg, const int32_t* nt, double* pot, const double* thresh) {
@@ -18,7 +24,7 @@ void helm3d_d_(const int32_t* nd, const double* zk, const double* sources, const
 }
 
 void helm3d_vec_d_(const int32_t* nd, const double* zk, const double* sources, const double* charge, const int32_t* ns, const double* ztarg, const int32_t* nt, double* pot, const double* thresh) {
-  helm3d_vec<double,4>(nd, zk, sources, charge, ns, ztarg, nt, pot, thresh);
+  helm3d_vec<double,VECDIM>(nd, zk, sources, charge, ns, ztarg, nt, pot, thresh);
 }
 
 #ifdef __cplusplus