před 5 roky · 58c573f43b
--- a/include/sctl/vec.hpp
+++ b/include/sctl/vec.hpp
@@ -26,7 +26,6 @@
 
				 #endif
			
 
				 
			
 
				 // TODO: Implement AVX versions of floats, int32_t, int64_t
			
 
				-// TODO: Add operators to reinterpret types
			
 
				 
			
 
				 // TODO: Check alignment when SCTL_MEMDEBUG is defined
			
 
				 // TODO: Replace pointers with iterators
			
@@ -127,7 +126,7 @@ namespace SCTL_NAMESPACE {
 
				         return r;
			
 
				       }
			
 
				 
			
 
				-      Vec() {}
			
 
				+      Vec() = default;
			
 
				 
			
 
				       Vec(const ValueType& a) {
			
 
				         for (Integer i = 0; i < N; i++) v[i] = a;
			
@@ -160,11 +159,11 @@ namespace SCTL_NAMESPACE {
 
				       }
			
 
				 
			
 
				       // C-style cast
			
 
				-      template <class RetValueType> explicit operator Vec<RetValueType,N>() const {
			
 
				-        Vec<RetValueType,N> r;
			
 
				-        for (Integer i = 0; i < N; i++) r.v[i] = (RetValueType)v[i];
			
 
				-        return r;
			
 
				-      }
			
 
				+      //template <class RetValueType> explicit operator Vec<RetValueType,N>() const {
			
 
				+      //  Vec<RetValueType,N> r;
			
 
				+      //  for (Integer i = 0; i < N; i++) r.v[i] = (RetValueType)v[i];
			
 
				+      //  return r;
			
 
				+      //}
			
 
				 
			
 
				       // Arithmetic operators
			
 
				       friend Vec operator*(Vec lhs, const Vec& rhs) {
			
@@ -313,6 +312,8 @@ namespace SCTL_NAMESPACE {
 
				         return r;
			
 
				       }
			
 
				 
			
 
				+      template <class Vec1, class Vec2> friend Vec1 reinterpret(const Vec2& x);
			
 
				+
			
 
				     private:
			
 
				 
			
 
				       static const ValueType const_zero() {
			
@@ -333,9 +334,16 @@ namespace SCTL_NAMESPACE {
 
				       }
			
 
				 
			
 
				       ValueType v[N];
			
 
				+      friend class Vec<IntegerType,N>;
			
 
				+      friend class Vec<RealType,N>;
			
 
				   };
			
 
				 
			
 
				   // Other operators
			
 
				+  template <class RetVec, class Vec> RetVec reinterpret(const Vec& v){
			
 
				+    static_assert(sizeof(RetVec) == sizeof(Vec));
			
 
				+    RetVec& r = *(RetVec*)&v;
			
 
				+    return r;
			
 
				+  }
			
 
				   template <class RealVec, class IntVec> RealVec ConvertInt2Real(const IntVec& x) {
			
 
				     typedef typename RealVec::ScalarType Real;
			
 
				     typedef typename IntVec::ScalarType Int;
			
@@ -349,34 +357,20 @@ namespace SCTL_NAMESPACE {
 
				     IntVec l(x + IntVec(Cint));
			
 
				     return *(RealVec*)&l - RealVec(Creal);
			
 
				   }
			
 
				-  //  union {
			
 
				-  //    Int Cint = (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(Real)*8 - SigBits - 2))-1)) << SigBits);
			
 
				-  //    Real Creal;
			
 
				-  //  };
			
 
				-  //  RealVec d(x + RealVec(Creal));
			
 
				-  //  return *(IntVec*)&d - IntVec(Cint);
			
 
				-  //}
			
 
				   template <class Vec> typename Vec::IntegerVec RoundReal2Int(const Vec& x) {
			
 
				     using IntegerType = typename Vec::IntegerType;
			
 
				     using RealType = typename Vec::RealType;
			
 
				     using IntegerVec = typename Vec::IntegerVec;
			
 
				     using RealVec = typename Vec::RealVec;
			
 
				+    static_assert(std::is_same<RealVec,Vec>::value, "RoundReal2Int: expected real input argument!");
			
 
				 
			
 
				-    if (std::is_same<IntegerVec,Vec>::value) {
			
 
				-      IntegerVec v = IntegerVec::LoadAligned((const IntegerType*)&x); // TODO: simplify: return x;
			
 
				-      return v;
			
 
				-    } else if (std::is_same<RealVec,Vec>::value) {
			
 
				-      static constexpr Integer SigBits = TypeTraits<RealType>::SigBits;
			
 
				-      union {
			
 
				-        IntegerType Cint = (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(RealType)*8 - SigBits - 2))-1)) << SigBits);
			
 
				-        RealType Creal;
			
 
				-      };
			
 
				-      RealVec d(x + RealVec(Creal));
			
 
				-      return IntegerVec::LoadAligned((const IntegerType*)&d) - IntegerVec(Cint);
			
 
				-    } else {
			
 
				-      IntegerVec v;
			
 
				-      return v;
			
 
				-    }
			
 
				+    static constexpr Integer SigBits = TypeTraits<RealType>::SigBits;
			
 
				+    union {
			
 
				+      IntegerType Cint = (1UL << (SigBits - 1)) + ((SigBits + ((1UL<<(sizeof(RealType)*8 - SigBits - 2))-1)) << SigBits);
			
 
				+      RealType Creal;
			
 
				+    };
			
 
				+    RealVec d = x + RealVec(Creal);
			
 
				+    return reinterpret<IntegerVec>(d) - IntegerVec(Cint);
			
 
				   }
			
 
				   template <class Vec> Vec RoundReal2Real(const Vec& x) {
			
 
				     typedef typename Vec::ScalarType Real;
			
@@ -626,7 +620,7 @@ namespace SCTL_NAMESPACE {
 
				         return r;
			
 
				       }
			
 
				 
			
 
				-      Vec() {}
			
 
				+      Vec() = default;
			
 
				 
			
 
				       Vec(const ValueType& a) {
			
 
				         v = _mm256_set1_pd(a);
			
@@ -656,12 +650,8 @@ namespace SCTL_NAMESPACE {
 
				       }
			
 
				 
			
 
				       // C-style cast
			
 
				-      template <class RetValueType> explicit operator Vec<RetValueType,N>() const {
			
 
				-        Vec<RetValueType,N> r;
			
 
				-        VecType& ret_v = *(VecType*)&r.v;
			
 
				-        ret_v = v;
			
 
				-        return r;
			
 
				-      }
			
 
				+      //template <class RetValueType> explicit operator Vec<RetValueType,N>() const {
			
 
				+      //}
			
 
				 
			
 
				       // Arithmetic operators
			
 
				       friend Vec operator*(Vec lhs, const Vec& rhs) {
			
@@ -781,6 +771,7 @@ namespace SCTL_NAMESPACE {
 
				         return r;
			
 
				       }
			
 
				 
			
 
				+      template <class Vec1, class Vec2> friend Vec1 reinterpret(const Vec2& x);
			
 
				       template <class Vec> friend Vec RoundReal2Real(const Vec& x);
			
 
				       template <class Vec> friend void sincos_intrin(Vec& sinx, Vec& cosx, const Vec& x);
			
 
				       template <class Vec> friend void exp_intrin(Vec& expx, const Vec& x);
			
@@ -790,6 +781,15 @@ namespace SCTL_NAMESPACE {
 
				       VecType v;
			
 
				   };
			
 
				 
			
 
				+  template <> inline Vec<int64_t,4> reinterpret<Vec<int64_t,4>,Vec<double,4>>(const Vec<double,4>& x){
			
 
				+    union {
			
 
				+      Vec<int64_t,4> r;
			
 
				+      __m256i y;
			
 
				+    };
			
 
				+    y = _mm256_castpd_si256(x.v);
			
 
				+    return r;
			
 
				+  }
			
 
				+
			
 
				   template <> inline Vec<double,4> RoundReal2Real(const Vec<double,4>& x) {
			
 
				     Vec<double,4> r;
			
 
				     r.v = _mm256_round_pd(x.v,_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC);