Dhairya Malhotra 7 gadi atpakaļ
vecāks
revīzija
744ed981e2
1 mainītis faili ar 69 papildinājumiem un 56 dzēšanām
  1. 69 56
      include/sctl/vec.hpp

+ 69 - 56
include/sctl/vec.hpp

@@ -238,8 +238,8 @@ namespace SCTL_NAMESPACE {
         for (Integer i = 0; i < N; i++) os << in.v[i] << ' ';
         return os;
       }
-      friend Vec<ValueType,N> approx_rsqrt(const Vec<ValueType,N>& x) {
-        Vec<ValueType,N> r;
+      friend Vec approx_rsqrt(const Vec& x) {
+        Vec r;
         for (int i = 0; i < N; i++) r.v[i] = 1.0 / sqrt(x.v[i]);
         return r;
       }
@@ -267,7 +267,44 @@ namespace SCTL_NAMESPACE {
   };
 
   // Other operators
-  template <class Vec, Integer ORDER = 13> void sincos_intrin(Vec& sinx, Vec& cosx, const Vec& x) {
+  template <class RealVec, class IntVec> RealVec ConvertInt2Real(const IntVec& x) {
+    typedef typename RealVec::ScalarType Real;
+    typedef typename IntVec::ScalarType Int;
+    assert(sizeof(RealVec) == sizeof(IntVec));
+    assert(sizeof(Real) == sizeof(Int));
+    static constexpr Integer SigBits = TypeTraits<Real>::SigBits;
+    union {
+      Int Cint = (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(Real)*8 - SigBits - 2))-1)) << SigBits);
+      Real Creal;
+    };
+    IntVec l(x + IntVec(Cint));
+    return *(RealVec*)&l - RealVec(Creal);
+  }
+  template <class IntVec, class RealVec> IntVec RoundReal2Int(const RealVec& x) {
+    typedef typename RealVec::ScalarType Real;
+    typedef typename IntVec::ScalarType Int;
+    assert(sizeof(RealVec) == sizeof(IntVec));
+    assert(sizeof(Real) == sizeof(Int));
+    static constexpr Integer SigBits = TypeTraits<Real>::SigBits;
+    union {
+      Int Cint = (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(Real)*8 - SigBits - 2))-1)) << SigBits);
+      Real Creal;
+    };
+    RealVec d(x + RealVec(Creal));
+    return *(IntVec*)&d - IntVec(Cint);
+  }
+  template <class Vec> Vec RoundReal2Real(const Vec& x) {
+    typedef typename Vec::ScalarType Real;
+    static constexpr Integer SigBits = TypeTraits<Real>::SigBits;
+    union {
+      int64_t Cint = (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(Real)*8 - SigBits - 2))-1)) << SigBits);
+      Real Creal;
+    };
+    Vec Vreal(Creal);
+    return (x + Vreal) - Vreal;
+  }
+  template <class Vec> void sincos_intrin(Vec& sinx, Vec& cosx, const Vec& x) {
+    constexpr Integer ORDER = 13;
     // ORDER    ERROR
     //     1 8.81e-02
     //     3 2.45e-03
@@ -352,20 +389,20 @@ namespace SCTL_NAMESPACE {
     Vec c1 = cos_squared * inv_cos; // 1 - cycle
 
     union {
-      unsigned char int_one = 1;
-      Real real_one;
+      int64_t int_zero = 0 + (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(Real)*8 - SigBits - 2))-1)) << SigBits);
+      Real real_zero;
     };
     union {
-      unsigned char int_two = 2;
-      Real real_two;
+      int64_t int_one = 1 + (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(Real)*8 - SigBits - 2))-1)) << SigBits);
+      Real real_one;
     };
     union {
-      int64_t Cint = (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(Real)*8 - SigBits - 2))-1)) << SigBits);
-      Real Creal;
+      int64_t int_two = 2 + (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(Real)*8 - SigBits - 2))-1)) << SigBits);
+      Real real_two;
     };
-    Vec x_offset(Creal);
-    Vec xAnd1 = (((x_+x_offset) & Vec(real_one)) == Vec::Zero());
-    Vec xAnd2 = (((x_+x_offset) & Vec(real_two)) == Vec::Zero());
+    Vec x_offset(real_zero);
+    Vec xAnd1 = (((x_+x_offset) & Vec(real_one)) == x_offset);
+    Vec xAnd2 = (((x_+x_offset) & Vec(real_two)) == x_offset);
 
     Vec s2 = AndNot( c1,xAnd1) | (s1 & xAnd1);
     Vec c2 = AndNot(-s1,xAnd1) | (c1 & xAnd1);
@@ -375,45 +412,10 @@ namespace SCTL_NAMESPACE {
     sinx = s3;
     cosx = c3;
   }
-  template <class RealVec, class IntVec> RealVec ConvertInt2Real(const IntVec& x) {
-    typedef typename RealVec::ScalarType Real;
-    typedef typename IntVec::ScalarType Int;
-    assert(sizeof(RealVec) == sizeof(IntVec));
-    assert(sizeof(Real) == sizeof(Int));
-    static constexpr Integer SigBits = TypeTraits<Real>::SigBits;
-    union {
-      Int Cint = (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(Real)*8 - SigBits - 2))-1)) << SigBits);
-      Real Creal;
-    };
-    IntVec l(x + IntVec(Cint));
-    return *(RealVec*)&l - RealVec(Creal);
-  }
-  template <class IntVec, class RealVec> IntVec RoundReal2Int(const RealVec& x) {
-    typedef typename RealVec::ScalarType Real;
-    typedef typename IntVec::ScalarType Int;
-    assert(sizeof(RealVec) == sizeof(IntVec));
-    assert(sizeof(Real) == sizeof(Int));
-    static constexpr Integer SigBits = TypeTraits<Real>::SigBits;
-    union {
-      Int Cint = (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(Real)*8 - SigBits - 2))-1)) << SigBits);
-      Real Creal;
-    };
-    RealVec d(x + RealVec(Creal));
-    return *(IntVec*)&d - IntVec(Cint);
-  }
-  template <class Vec> Vec RoundReal2Real(const Vec& x) {
-    typedef typename Vec::ScalarType Real;
-    static constexpr Integer SigBits = TypeTraits<Real>::SigBits;
-    union {
-      int64_t Cint = (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(Real)*8 - SigBits - 2))-1)) << SigBits);
-      Real Creal;
-    };
-    Vec Vreal(Creal);
-    return (x + Vreal) - Vreal;
-  }
 
 #ifdef __AVX__
   template <> class alignas(sizeof(double)*4) Vec<double,4> {
+    typedef __m256d VecType;
     typedef double ValueType;
     static constexpr Integer N = 4;
     public:
@@ -478,7 +480,7 @@ namespace SCTL_NAMESPACE {
       // C-style cast
       template <class RetValueType> explicit operator Vec<RetValueType,N>() const {
         Vec<RetValueType,N> r;
-        __m256d& ret_v = *(__m256d*)&r.v;
+        VecType& ret_v = *(VecType*)&r.v;
         ret_v = v;
         return r;
       }
@@ -580,28 +582,39 @@ namespace SCTL_NAMESPACE {
 
       friend std::ostream& operator<<(std::ostream& os, const Vec& in) {
         union {
-          __m256d vec;
+          VecType vec;
           ValueType val[N];
         };
         vec = in.v;
         for (Integer i = 0; i < N; i++) os << val[i] << ' ';
         return os;
       }
-      friend Vec RoundReal2Real(const Vec& x) {
-        Vec r;
-        r.v = _mm256_round_pd(x.v,_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC);
-        return r;
-      }
       friend Vec approx_rsqrt(const Vec& x) {
         Vec r;
         r.v = _mm256_cvtps_pd(_mm_rsqrt_ps(_mm256_cvtpd_ps(x.v)));
         return r;
       }
 
+      template <class Vec> friend Vec RoundReal2Real(const Vec& x);
+
     private:
 
-      __m256d v;
+      VecType v;
   };
+
+  template <> Vec<double,4> RoundReal2Real(const Vec<double,4>& x) {
+    Vec<double,4> r;
+    r.v = _mm256_round_pd(x.v,_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC);
+    return r;
+  }
+
+  #ifdef SCTL_HAVE_SVML
+  template <> void sincos_intrin(Vec<double,4>& sinx, Vec<double,4>& cosx, const Vec<double,4>& x) {
+    sinx.v = _mm256_sin_pd(x.v);
+    cosx.v = _mm256_cos_pd(x.v);
+  }
+  #endif
+
 #endif
 
 }