vec-test.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. #ifndef _SCTL_VEC_TEST_HPP_
  2. #define _SCTL_VEC_TEST_HPP_
  3. #include <sctl/common.hpp>
  4. #include SCTL_INCLUDE(vec.hpp)
  5. #include SCTL_INCLUDE(vector.hpp)
  6. #include <cassert>
  7. #include <cstdint>
  8. #include <ostream>
  9. namespace SCTL_NAMESPACE {
  10. // Verify Vec class
  11. template <class ValueType = double, Integer N = 1> class VecTest {
  12. public:
  13. using VecType = Vec<ValueType,N>;
  14. using ScalarType = typename VecType::ScalarType;
  15. using MaskType = Mask<typename VecType::VData>;
  16. static void test() {
  17. for (Integer i = 0; i < 1000; i++) {
  18. VecTest<ScalarType, 1>::test_all_types();
  19. VecTest<ScalarType, 2>::test_all_types();
  20. VecTest<ScalarType, 4>::test_all_types();
  21. VecTest<ScalarType, 8>::test_all_types();
  22. VecTest<ScalarType,16>::test_all_types();
  23. VecTest<ScalarType,32>::test_all_types();
  24. VecTest<ScalarType,64>::test_all_types();
  25. }
  26. }
  27. static void test_all_types() {
  28. VecTest< int8_t,N>::test_all();
  29. VecTest<int16_t,N>::test_all();
  30. VecTest<int32_t,N>::test_all();
  31. VecTest<int64_t,N>::test_all();
  32. VecTest<float,N>::test_all();
  33. VecTest<float,N>::test_reals();
  34. VecTest<double,N>::test_all();
  35. VecTest<double,N>::test_reals();
  36. //VecTest<long double,N>::test_all();
  37. //VecTest<long double,N>::test_reals();
  38. #ifdef SCTL_QUAD_T
  39. VecTest<QuadReal,N>::test_all();
  40. VecTest<QuadReal,N>::test_reals();
  41. #endif
  42. }
  43. static void test_all() {
  44. if (N*sizeof(ScalarType)*8<=512) {
  45. test_init();
  46. test_bitwise(); // TODO: fails for 'long double'
  47. test_arithmetic();
  48. test_maxmin();
  49. test_mask(); // TODO: fails for 'long double'
  50. test_comparison(); // TODO: fails for 'long double'
  51. }
  52. }
  53. static void test_reals() {
  54. if (N*sizeof(ScalarType)*8<=512) {
  55. test_reals_convert(); // TODO: fails for 'long double'
  56. test_reals_specialfunc();
  57. test_reals_rsqrt();
  58. }
  59. }
  60. private:
  61. static void test_init() {
  62. sctl::Vector<ScalarType> x(N+1), y(N+1), z(N);
  63. // Constructor: Vec(v)
  64. VecType v1((ScalarType)2);
  65. for (Integer i = 0; i < N; i++) {
  66. SCTL_ASSERT(v1[i] == (ScalarType)2);
  67. }
  68. // Constructor: Vec(v1,..,vn)
  69. VecType v2 = InitVec<N>::apply();
  70. for (Integer i = 0; i < N; i++) {
  71. SCTL_ASSERT(v2[i] == (ScalarType)(i+1));
  72. }
  73. // insert, operator[]
  74. for (Integer i = 0; i < N; i++) {
  75. v1.insert(i, (ScalarType)(i+2));
  76. }
  77. for (Integer i = 0; i < N; i++) {
  78. SCTL_ASSERT(v1[i] == (ScalarType)(i+2));
  79. }
  80. // Load1
  81. for (Integer i = 0; i < N+1; i++) {
  82. x[i] = (ScalarType)(i+7);
  83. }
  84. v1 = VecType::Load1(&x[1]);
  85. for (Integer i = 0; i < N; i++) {
  86. SCTL_ASSERT(v1[i] == (ScalarType)8);
  87. }
  88. // Load, Store
  89. v1 = VecType::Load(&x[1]);
  90. v1.Store(&y[1]);
  91. for (Integer i = 0; i < N; i++) {
  92. SCTL_ASSERT(y[i+1] == (ScalarType)(i+8));
  93. }
  94. // LoadAligned, StoreAligned
  95. v1 = VecType::LoadAligned(&x[0]);
  96. v1.StoreAligned(&z[0]);
  97. for (Integer i = 0; i < N; i++) {
  98. SCTL_ASSERT(z[i] == (ScalarType)(i+7));
  99. }
  100. // SetZero
  101. v1 = VecType::Zero();
  102. for (Integer i = 0; i < N; i++) {
  103. SCTL_ASSERT(v1[i] == (ScalarType)0);
  104. }
  105. // Assignment operators
  106. v1 = (ScalarType)3;
  107. for (Integer i = 0; i < N; i++) {
  108. SCTL_ASSERT(v1[i] == (ScalarType)3);
  109. }
  110. //// get_low, get_high
  111. //auto v_low = v2.get_low();
  112. //auto v_high = v2.get_high();
  113. //for (Integer i = 0; i < N/2; i++) {
  114. // SCTL_ASSERT(v_low[i] == (ScalarType)(N-i));
  115. // SCTL_ASSERT(v_high[i] == (ScalarType)(N-(i+N/2)));
  116. //}
  117. //// Constructor: Vec(v1, v2)
  118. //VecType v3(v_low,v_high);
  119. //for (Integer i = 0; i < N; i++) {
  120. // SCTL_ASSERT(v3[i] == (ScalarType)(N-i));
  121. //}
  122. }
  123. static void test_bitwise() {
  124. UnionType u1, u2, u3, u4, u5, u6, u7;
  125. for (Integer i = 0; i < SizeBytes; i++) {
  126. u1.c[i] = rand();
  127. u2.c[i] = rand();
  128. }
  129. u3.v = ~u1.v;
  130. u4.v = u1.v & u2.v;
  131. u5.v = u1.v ^ u2.v;
  132. u6.v = u1.v | u2.v;
  133. u7.v = AndNot(u1.v, u2.v);
  134. for (Integer i = 0; i < SizeBytes; i++) {
  135. SCTL_ASSERT(u3.c[i] == (int8_t)~u1.c[i]);
  136. SCTL_ASSERT(u4.c[i] == (int8_t)(u1.c[i] & u2.c[i]));
  137. SCTL_ASSERT(u5.c[i] == (int8_t)(u1.c[i] ^ u2.c[i]));
  138. SCTL_ASSERT(u6.c[i] == (int8_t)(u1.c[i] | u2.c[i]));
  139. SCTL_ASSERT(u7.c[i] == (int8_t)(u1.c[i] & (~u2.c[i])));
  140. }
  141. }
  142. static void test_arithmetic() {
  143. UnionType u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15, u16, u17;
  144. for (Integer i = 0; i < N; i++) {
  145. u1.x[i] = (ScalarType)(rand()%100)+1;
  146. u2.x[i] = (ScalarType)(rand()%100)+2;
  147. u3.x[i] = (ScalarType)(rand()%100)+5;
  148. }
  149. u4.v = -u1.v;
  150. u5.v = u1.v + u2.v;
  151. u6.v = u1.v - u2.v;
  152. u7.v = u1.v * u2.v;
  153. u8.v = u1.v / u2.v;
  154. u9.v = FMA(u1.v, u2.v, u3.v);
  155. u10.v = u1.v; u10.v += u2.v;
  156. u11.v = u1.v; u11.v -= u2.v;
  157. u12.v = u1.v; u12.v *= u2.v;
  158. u13.v = u1.v; u13.v /= u2.v;
  159. u14.v = u1.v; u14.v += u2.v[0];
  160. u15.v = u1.v; u15.v -= u2.v[0];
  161. u16.v = u1.v; u16.v *= u2.v[0];
  162. u17.v = u1.v; u17.v /= u2.v[0];
  163. for (Integer i = 0; i < N; i++) {
  164. SCTL_ASSERT(u4.x[i] == (ScalarType)-u1.x[i]);
  165. SCTL_ASSERT(u5.x[i] == (ScalarType)(u1.x[i] + u2.x[i]));
  166. SCTL_ASSERT(u6.x[i] == (ScalarType)(u1.x[i] - u2.x[i]));
  167. SCTL_ASSERT(u7.x[i] == (ScalarType)(u1.x[i] * u2.x[i]));
  168. SCTL_ASSERT(u8.x[i] == (ScalarType)(u1.x[i] / u2.x[i]));
  169. SCTL_ASSERT(u10.x[i] == (ScalarType)(u1.x[i] + u2.x[i]));
  170. SCTL_ASSERT(u11.x[i] == (ScalarType)(u1.x[i] - u2.x[i]));
  171. SCTL_ASSERT(u12.x[i] == (ScalarType)(u1.x[i] * u2.x[i]));
  172. SCTL_ASSERT(u13.x[i] == (ScalarType)(u1.x[i] / u2.x[i]));
  173. SCTL_ASSERT(u14.x[i] == (ScalarType)(u1.x[i] + u2.x[0]));
  174. SCTL_ASSERT(u15.x[i] == (ScalarType)(u1.x[i] - u2.x[0]));
  175. SCTL_ASSERT(u16.x[i] == (ScalarType)(u1.x[i] * u2.x[0]));
  176. SCTL_ASSERT(u17.x[i] == (ScalarType)(u1.x[i] / u2.x[0]));
  177. if (TypeTraits<ScalarType>::Type == DataType::Integer) {
  178. SCTL_ASSERT(u9.x[i] == (ScalarType)(u1.x[i]*u2.x[i] + u3.x[i]));
  179. } else {
  180. auto myabs = [](ScalarType a) {
  181. return (a < 0 ? -a : a);
  182. };
  183. auto machine_eps = [](){
  184. ScalarType eps = 1;
  185. while ((ScalarType)(1+eps/2) > 1) {
  186. eps = eps/2;
  187. }
  188. return eps;
  189. };
  190. static const ScalarType eps = machine_eps();
  191. ScalarType err = myabs(u9.x[i] - (ScalarType)(u1.x[i]*u2.x[i] + u3.x[i]));
  192. ScalarType max_val = myabs(u1.x[i]*u2.x[i]) + myabs(u3.x[i]);
  193. ScalarType rel_err = err / max_val;
  194. SCTL_ASSERT(rel_err < eps);
  195. }
  196. }
  197. }
  198. static void test_maxmin() {
  199. UnionType u1, u2, u3, u4;
  200. for (Integer i = 0; i < N; i++) {
  201. u1.x[i] = (ScalarType)rand();
  202. u2.x[i] = (ScalarType)rand();
  203. }
  204. u3.v = max(u1.v, u2.v);
  205. u4.v = min(u1.v, u2.v);
  206. for (Integer i = 0; i < N; i++) {
  207. SCTL_ASSERT(u3.x[i] == (u1.x[i] < u2.x[i] ? u2.x[i] : u1.x[i]));
  208. SCTL_ASSERT(u4.x[i] == (u1.x[i] < u2.x[i] ? u1.x[i] : u2.x[i]));
  209. }
  210. }
  211. static void test_mask() {
  212. union {
  213. MaskType v;
  214. int8_t c[sizeof(MaskType)];
  215. } u1, u2, u3, u4, u5, u6, u7;
  216. for (Integer i = 0; i < (Integer)sizeof(MaskType); i++) {
  217. u1.c[i] = rand();
  218. u2.c[i] = rand();
  219. }
  220. u3.v = ~u1.v;
  221. u4.v = u1.v & u2.v;
  222. u5.v = u1.v ^ u2.v;
  223. u6.v = u1.v | u2.v;
  224. u7.v = AndNot(u1.v, u2.v);
  225. for (Integer i = 0; i < (Integer)sizeof(MaskType); i++) {
  226. SCTL_ASSERT(u3.c[i] == (int8_t)~u1.c[i]);
  227. SCTL_ASSERT(u4.c[i] == (int8_t)(u1.c[i] & u2.c[i]));
  228. SCTL_ASSERT(u5.c[i] == (int8_t)(u1.c[i] ^ u2.c[i]));
  229. SCTL_ASSERT(u6.c[i] == (int8_t)(u1.c[i] | u2.c[i]));
  230. SCTL_ASSERT(u7.c[i] == (int8_t)(u1.c[i] & (~u2.c[i])));
  231. }
  232. }
  233. static void test_comparison() {
  234. UnionType u1, u2, u3, u4, u5, u6, u7, u8, u9, u10;
  235. for (Integer i = 0; i < SizeBytes; i++) {
  236. u1.c[i] = rand()%4;
  237. u2.c[i] = rand()%4;
  238. u3.c[i] = rand()%4;
  239. u4.c[i] = rand()%4;
  240. }
  241. u5 .v = select((u1.v < u2.v), u3.v, u4.v);
  242. u6 .v = select((u1.v <= u2.v), u3.v, u4.v);
  243. u7 .v = select((u1.v > u2.v), u3.v, u4.v);
  244. u8 .v = select((u1.v >= u2.v), u3.v, u4.v);
  245. u9 .v = select((u1.v == u2.v), u3.v, u4.v);
  246. u10.v = select((u1.v != u2.v), u3.v, u4.v);
  247. for (Integer i = 0; i < N; i++) {
  248. SCTL_ASSERT(u5 .x[i] == (u1.x[i] < u2.x[i] ? u3.x[i] : u4.x[i]));
  249. SCTL_ASSERT(u6 .x[i] == (u1.x[i] <= u2.x[i] ? u3.x[i] : u4.x[i]));
  250. SCTL_ASSERT(u7 .x[i] == (u1.x[i] > u2.x[i] ? u3.x[i] : u4.x[i]));
  251. SCTL_ASSERT(u8 .x[i] == (u1.x[i] >= u2.x[i] ? u3.x[i] : u4.x[i]));
  252. SCTL_ASSERT(u9 .x[i] == (u1.x[i] == u2.x[i] ? u3.x[i] : u4.x[i]));
  253. SCTL_ASSERT(u10.x[i] == (u1.x[i] != u2.x[i] ? u3.x[i] : u4.x[i]));
  254. }
  255. MaskType m0 = (u1.v < u2.v);
  256. VecType v1 = convert2vec(m0);
  257. MaskType m1 = convert2mask(v1);
  258. VecType v2 = select(m1, u3.v, u4.v);
  259. VecType v3 = (u3.v & v1) | AndNot(u4.v, v1);
  260. for (Integer i = 0; i < N; i++) {
  261. SCTL_ASSERT(v2[i] == (u1.x[i] < u2.x[i] ? u3.x[i] : u4.x[i]));
  262. SCTL_ASSERT(v3[i] == (u1.x[i] < u2.x[i] ? u3.x[i] : u4.x[i]));
  263. }
  264. }
  265. static void test_reals_convert() {
  266. using IntVec = Vec<typename IntegerType<sizeof(ScalarType)>::value,N>;
  267. using RealVec = Vec<ScalarType,N>;
  268. static_assert(TypeTraits<ScalarType>::Type == DataType::Real, "Expected real type!");
  269. RealVec a = RealVec::Zero();
  270. for (Integer i = 0; i < N; i++) a.insert(i, (ScalarType)(drand48()-0.5)*100);
  271. IntVec b = RoundReal2Int<IntVec>(a);
  272. RealVec c = RoundReal2Real(a);
  273. RealVec d = ConvertInt2Real<RealVec>(b);
  274. for (Integer i = 0; i < N; i++) {
  275. SCTL_ASSERT(b[i] == (typename IntVec::ScalarType)round(a[i]));
  276. SCTL_ASSERT(c[i] == (ScalarType)(typename IntVec::ScalarType)round(a[i]));
  277. SCTL_ASSERT(d[i] == (ScalarType)b[i]);
  278. }
  279. }
  280. static void test_reals_specialfunc() {
  281. VecType v0 = VecType::Zero(), v1, v2, v3;
  282. for (Integer i = 0; i < N; i++) {
  283. v0.insert(i, (ScalarType)(drand48()-0.5)*4*const_pi<ScalarType>());
  284. }
  285. sincos(v1, v2, v0);
  286. v3 = exp(v0);
  287. for (Integer i = 0; i < N; i++) {
  288. ScalarType err_tol = std::max<ScalarType>((ScalarType)1.77e-15, (pow<TypeTraits<ScalarType>::SigBits-3,ScalarType>((ScalarType)0.5))); // TODO: fix for accuracy greater than 1.77e-15
  289. SCTL_ASSERT(fabs(v1[i] - sin<ScalarType>(v0[i])) < err_tol);
  290. SCTL_ASSERT(fabs(v2[i] - cos<ScalarType>(v0[i])) < err_tol);
  291. SCTL_ASSERT(fabs(v3[i] - exp<ScalarType>(v0[i]))/fabs(exp<ScalarType>(v0[i])) < err_tol);
  292. }
  293. approx_sincos<3>(v1, v2, v0);
  294. v3 = approx_exp<3>(v0);
  295. for (Integer i = 0; i < N; i++) {
  296. ScalarType err_tol = (ScalarType)1e-3;
  297. SCTL_ASSERT(fabs(v1[i] - sin<ScalarType>(v0[i])) < err_tol);
  298. SCTL_ASSERT(fabs(v2[i] - cos<ScalarType>(v0[i])) < err_tol);
  299. SCTL_ASSERT(fabs(v3[i] - exp<ScalarType>(v0[i]))/fabs(exp<ScalarType>(v0[i])) < err_tol);
  300. }
  301. approx_sincos<5>(v1, v2, v0);
  302. v3 = approx_exp<5>(v0);
  303. for (Integer i = 0; i < N; i++) {
  304. ScalarType err_tol = (ScalarType)1e-5;
  305. SCTL_ASSERT(fabs(v1[i] - sin<ScalarType>(v0[i])) < err_tol);
  306. SCTL_ASSERT(fabs(v2[i] - cos<ScalarType>(v0[i])) < err_tol);
  307. SCTL_ASSERT(fabs(v3[i] - exp<ScalarType>(v0[i]))/fabs(exp<ScalarType>(v0[i])) < err_tol);
  308. }
  309. if (sizeof(ScalarType) < 16) return;
  310. approx_sincos<8>(v1, v2, v0);
  311. v3 = approx_exp<8>(v0);
  312. for (Integer i = 0; i < N; i++) {
  313. ScalarType err_tol = (ScalarType)1e-8;
  314. SCTL_ASSERT(fabs(v1[i] - sin<ScalarType>(v0[i])) < err_tol);
  315. SCTL_ASSERT(fabs(v2[i] - cos<ScalarType>(v0[i])) < err_tol);
  316. SCTL_ASSERT(fabs(v3[i] - exp<ScalarType>(v0[i]))/fabs(exp<ScalarType>(v0[i])) < err_tol);
  317. }
  318. approx_sincos<12>(v1, v2, v0);
  319. v3 = approx_exp<12>(v0);
  320. for (Integer i = 0; i < N; i++) {
  321. ScalarType err_tol = (ScalarType)1e-12;
  322. SCTL_ASSERT(fabs(v1[i] - sin<ScalarType>(v0[i])) < err_tol);
  323. SCTL_ASSERT(fabs(v2[i] - cos<ScalarType>(v0[i])) < err_tol);
  324. SCTL_ASSERT(fabs(v3[i] - exp<ScalarType>(v0[i]))/fabs(exp<ScalarType>(v0[i])) < err_tol);
  325. }
  326. }
  327. static void test_reals_rsqrt() {
  328. UnionType u1, b1, b2, u2, u3, u4, u5;
  329. for (Integer i = 0; i < N; i++) {
  330. u1.x[i] = (ScalarType)rand();
  331. b1.x[i] = (ScalarType)rand();
  332. b2.x[i] = (ScalarType)rand();
  333. }
  334. u2.v = approx_rsqrt<4>(u1.v);
  335. u3.v = approx_rsqrt<7>(u1.v);
  336. u4.v = approx_rsqrt<4>(u1.v, b1.v>b2.v);
  337. u5.v = approx_rsqrt<7>(u1.v, b1.v>b2.v);
  338. for (Integer i = 0; i < N; i++) {
  339. ScalarType err = fabs(u2.x[i] - 1/sqrt<ScalarType>(u1.x[i]));
  340. ScalarType max_val = fabs(1/sqrt<ScalarType>(u1.x[i]));
  341. ScalarType rel_err = err / max_val;
  342. SCTL_ASSERT(rel_err < (pow<11,ScalarType>((ScalarType)0.5)));
  343. err = u4.x[i] - (b1.x[i]>b2.x[i] ? u2.x[i] : 0);
  344. rel_err = err / max_val;
  345. SCTL_ASSERT(rel_err < (pow<11,ScalarType>((ScalarType)0.5)));
  346. SCTL_ASSERT(b1.x[i]>b2.x[i] || (u4.x[i]==0));
  347. }
  348. for (Integer i = 0; i < N; i++) {
  349. ScalarType err = fabs((ScalarType)(u3.x[i] - 1/sqrt((double)u1.x[i]))); // float is not accurate enough to compute reference solution with 7-digits
  350. ScalarType max_val = fabs(1/sqrt<ScalarType>(u1.x[i]));
  351. ScalarType rel_err = err / max_val;
  352. SCTL_ASSERT(rel_err < (pow<22,ScalarType>((ScalarType)0.5)));
  353. err = u5.x[i] - (b1.x[i]>b2.x[i] ? u3.x[i] : 0);
  354. rel_err = err / max_val;
  355. SCTL_ASSERT(rel_err < (pow<22,ScalarType>((ScalarType)0.5)));
  356. SCTL_ASSERT(b1.x[i]>b2.x[i] || (u5.x[i]==0));
  357. }
  358. }
  359. template <Integer k, class... T2> struct InitVec {
  360. static VecType apply(T2... rest) {
  361. return InitVec<k-1, ScalarType, T2...>::apply((ScalarType)k, rest...);
  362. }
  363. };
  364. template <class... T2> struct InitVec<0, T2...> {
  365. static VecType apply(T2... rest) {
  366. return VecType(rest...);
  367. }
  368. };
  369. static constexpr Integer SizeBytes = VecType::Size()*sizeof(ScalarType);
  370. union UnionType {
  371. VecType v;
  372. ScalarType x[N];
  373. int8_t c[SizeBytes];
  374. };
  375. };
  376. }
  377. #endif //_SCTL_VEC_TEST_HPP_