#ifndef _SCTL_VEC_WRAPPER_HPP_ #define _SCTL_VEC_WRAPPER_HPP_ #include #include SCTL_INCLUDE(intrin-wrapper.hpp) #include #include #include namespace SCTL_NAMESPACE { #if defined(__AVX512__) || defined(__AVX512F__) static_assert(SCTL_ALIGN_BYTES >= 64, "Insufficient memory alignment for SIMD vector types"); template constexpr Integer DefaultVecLen() { return 64/sizeof(ScalarType); } #elif defined(__AVX__) static_assert(SCTL_ALIGN_BYTES >= 32, "Insufficient memory alignment for SIMD vector types"); template constexpr Integer DefaultVecLen() { return 32/sizeof(ScalarType); } #elif defined(__SSE4_2__) static_assert(SCTL_ALIGN_BYTES >= 16, "Insufficient memory alignment for SIMD vector types"); template constexpr Integer DefaultVecLen() { return 16/sizeof(ScalarType); } #else static_assert(SCTL_ALIGN_BYTES >= 8, "Insufficient memory alignment for SIMD vector types"); template constexpr Integer DefaultVecLen() { return 1; } #endif template ()> class alignas(sizeof(ValueType) * N) Vec { public: using ScalarType = ValueType; using VData = VecData; using MaskType = Mask; static constexpr Integer Size() { return N; } static inline Vec Zero() { Vec r; r.v = zero_intrin(); return r; } static inline Vec Load1(ScalarType const* p) { Vec r; r.v = load1_intrin(p); return r; } static inline Vec Load(ScalarType const* p) { Vec r; r.v = loadu_intrin(p); return r; } static inline Vec LoadAligned(ScalarType const* p) { Vec r; r.v = load_intrin(p); return r; } Vec() = default; Vec(const Vec&) = default; Vec& operator=(const Vec&) = default; ~Vec() = default; inline Vec(const VData& v_) : v(v_) {} inline Vec(const ScalarType& a) : Vec(set1_intrin(a)) {} template inline Vec(T x, T1... args) : Vec(InitVec::template apply((ScalarType)x,args...)) {} inline void Store(ScalarType* p) const { storeu_intrin(p,v); } inline void StoreAligned(ScalarType* p) const { store_intrin(p,v); } inline void StreamStoreAligned(ScalarType* p) const { stream_store_intrin(p,v); } // Conversion operators friend inline Mask convert2mask(const Vec& a) { return convert_vec2mask_intrin(a.v); } friend inline Vec RoundReal2Real(const Vec& x) { return round_real2real_intrin(x.v); } template friend IntVec RoundReal2Int(const RealVec& x); template friend RealVec ConvertInt2Real(const IntVec& x); // Element access inline ScalarType operator[](Integer i) const { return extract_intrin(v,i); } inline void insert(Integer i, ScalarType value) { insert_intrin(v,i,value); } // Arithmetic operators inline Vec operator+() const { return *this; } inline Vec operator-() const { return unary_minus_intrin(v); // Zero() - (*this); } friend inline Vec operator*(const Vec& a, const Vec& b) { return mul_intrin(a.v, b.v); } friend inline Vec operator/(const Vec& a, const Vec& b) { return div_intrin(a.v, b.v); } friend inline Vec operator+(const Vec& a, const Vec& b) { return add_intrin(a.v, b.v); } friend inline Vec operator-(const Vec& a, const Vec& b) { return sub_intrin(a.v, b.v); } friend inline Vec FMA(const Vec& a, const Vec& b, const Vec& c) { return fma_intrin(a.v, b.v, c.v); } // Comparison operators friend inline Mask operator< (const Vec& a, const Vec& b) { return comp_intrin(a.v, b.v); } friend inline Mask operator<=(const Vec& a, const Vec& b) { return comp_intrin(a.v, b.v); } friend inline Mask operator>=(const Vec& a, const Vec& b) { return comp_intrin(a.v, b.v); } friend inline Mask operator> (const Vec& a, const Vec& b) { return comp_intrin(a.v, b.v); } friend inline Mask operator==(const Vec& a, const Vec& b) { return comp_intrin(a.v, b.v); } friend inline Mask operator!=(const Vec& a, const Vec& b) { return comp_intrin(a.v, b.v); } friend inline Vec select(const Mask& m, const Vec& a, const Vec& b) { return select_intrin(m, a.v, b.v); } // Bitwise operators inline Vec operator~() const { return not_intrin(v); } friend inline Vec operator&(const Vec& a, const Vec& b) { return and_intrin(a.v, b.v); } friend inline Vec operator^(const Vec& a, const Vec& b) { return xor_intrin(a.v, b.v); } friend inline Vec operator|(const Vec& a, const Vec& b) { return or_intrin(a.v, b.v); } friend inline Vec AndNot(const Vec& a, const Vec& b) { // return a & ~b return andnot_intrin(a.v, b.v); } // Bitshift friend inline Vec operator<<(const Vec& lhs, const Integer& rhs) { return bitshiftleft_intrin(lhs.v, rhs); } friend inline Vec operator>>(const Vec& lhs, const Integer& rhs) { return bitshiftright_intrin(lhs.v, rhs); } // Assignment operators inline Vec& operator=(const ScalarType& a) { v = set1_intrin(a); return *this; } inline Vec& operator*=(const Vec& rhs) { v = mul_intrin(v, rhs.v); return *this; } inline Vec& operator/=(const Vec& rhs) { v = div_intrin(v, rhs.v); return *this; } inline Vec& operator+=(const Vec& rhs) { v = add_intrin(v, rhs.v); return *this; } inline Vec& operator-=(const Vec& rhs) { v = sub_intrin(v, rhs.v); return *this; } inline Vec& operator&=(const Vec& rhs) { v = and_intrin(v, rhs.v); return *this; } inline Vec& operator^=(const Vec& rhs) { v = xor_intrin(v, rhs.v); return *this; } inline Vec& operator|=(const Vec& rhs) { v = or_intrin(v, rhs.v); return *this; } // Other operators friend inline Vec max(const Vec& lhs, const Vec& rhs) { return max_intrin(lhs.v, rhs.v); } friend inline Vec min(const Vec& lhs, const Vec& rhs) { return min_intrin(lhs.v, rhs.v); } // Special functions template friend RealVec approx_rsqrt(const RealVec& x); template friend RealVec approx_rsqrt(const RealVec& x, const typename RealVec::MaskType& m); friend inline void sincos(Vec& sinx, Vec& cosx, const Vec& x) { sincos_intrin(sinx.v, cosx.v, x.v); } template friend void approx_sincos(RealVec& sinx, RealVec& cosx, const RealVec& x); friend inline Vec exp(const Vec& x) { return exp_intrin(x.v); } template friend RealVec approx_exp(const RealVec& x); //template friend Vec1 reinterpret(const Vec2& x); //template friend Vec RoundReal2Real(const Vec& x); //template friend void exp_intrin(Vec& expx, const Vec& x); // Print friend inline std::ostream& operator<<(std::ostream& os, const Vec& in) { for (Integer i = 0; i < Size(); i++) os << in[i] << ' '; return os; } inline void set(const VData& v_) { v = v_; } inline const VData& get() const { return v; } private: template struct InitVec { template static inline VData apply(T1... start, T x, T2... rest) { return InitVec::template apply(start..., (ScalarType)x, rest...); } }; template struct InitVec { template static inline VData apply(T1... start, T x) { return set_intrin(start..., (ScalarType)x); } }; VData v; }; // Conversion operators template inline RealVec ConvertInt2Real(const IntVec& x) { return convert_int2real_intrin(x.v); } template inline IntVec RoundReal2Int(const RealVec& x) { return round_real2int_intrin(x.v); } template inline Vec convert2vec(const MaskType& a) { return convert_mask2vec_intrin(a); } // Special functions template inline RealVec approx_rsqrt(const RealVec& x) { static constexpr Integer digits_ = (digits==-1 ? (Integer)(TypeTraits::SigBits*0.3010299957) : digits); return rsqrt_approx_intrin::eval(x.v); } template inline RealVec approx_rsqrt(const RealVec& x, const typename RealVec::MaskType& m) { static constexpr Integer digits_ = (digits==-1 ? (Integer)(TypeTraits::SigBits*0.3010299957) : digits); return rsqrt_approx_intrin::eval(x.v, m); } template inline RealVec approx_sqrt(const RealVec& x) { return x*approx_rsqrt(x); } template inline RealVec approx_sqrt(const RealVec& x, const typename RealVec::MaskType& m) { return x*approx_rsqrt(x, m); } template inline void approx_sincos(RealVec& sinx, RealVec& cosx, const RealVec& x) { constexpr Integer ORDER = (digits>1?digits>9?digits>14?digits>17?digits-1:digits:digits+1:digits+2:1); if (digits == -1 || ORDER > 20) sincos(sinx, cosx, x); else approx_sincos_intrin(sinx.v, cosx.v, x.v); } template inline RealVec approx_exp(const RealVec& x) { constexpr Integer ORDER = digits; if (digits == -1 || ORDER > 13) return exp(x); else return approx_exp_intrin(x.v); } // Other operators template inline void printb(const ValueType& x) { // print binary union { ValueType v; uint8_t c[sizeof(ValueType)]; } u = {x}; //std::cout<