|
@@ -1070,7 +1070,43 @@
|
|
|
\end{frame}
|
|
|
%>>>
|
|
|
|
|
|
-\begin{frame} \frametitle{Vectorization - GEMM micro-kernel}{} %<<<
|
|
|
+\begin{frame} \frametitle{Libraries for special function evaluation} %<<<
|
|
|
+ % Fast function evaluation using polynomial evaluation
|
|
|
+ % baobzi
|
|
|
+
|
|
|
+ % sf_benchmarks : https://github.com/flatironinstitute/sf_benchmarks
|
|
|
+ % Baobzi (adaptive fast function interpolator)
|
|
|
+ % Agner Fog's Vector Class Library
|
|
|
+ % SLEEF Vectoried Math Library
|
|
|
+ % FORTRAN native routines
|
|
|
+ % C++ Standard Library
|
|
|
+ % Eigen
|
|
|
+ % Boost
|
|
|
+ % AMD Math Library (LibM)
|
|
|
+ % GNU Scientific Library (GSL)
|
|
|
+ % Scientific Computing Template Library (SCTL)
|
|
|
+
|
|
|
+ % func name Mevals/s cycles/eval
|
|
|
+ % bessel_J0 baobzi 162.9 20.8
|
|
|
+ % bessel_J0 fort 16.9 200.9
|
|
|
+ % bessel_J0 gsl 6.7 504.5
|
|
|
+ % bessel_J0 boost 6.2 542.9
|
|
|
+ %
|
|
|
+ % func name Mevals/s cycles/eval
|
|
|
+ % sin agnerfog 1054.0 3.2
|
|
|
+ % sin sctl 951.6 3.6
|
|
|
+ % sin sleef 740.3 4.6
|
|
|
+ % sin amdlibm 490.9 6.9
|
|
|
+ % sin amdlibm 145.7 23.3
|
|
|
+ % sin stl 103.1 32.9
|
|
|
+ % sin eigen 102.5 33.1
|
|
|
+ % sin gsl 22.7 149.4
|
|
|
+
|
|
|
+\end{frame}
|
|
|
+%>>>
|
|
|
+
|
|
|
+
|
|
|
+\begin{frame} \frametitle{GEMM micro-kernel}{} %<<<
|
|
|
% show different ways of vectorizing that don't work
|
|
|
% most languages don't make it easy to specify when it is safe to vectorize (aliasing)
|
|
|
|
|
@@ -1113,11 +1149,4 @@
|
|
|
\end{frame}
|
|
|
%>>>
|
|
|
|
|
|
-\begin{frame} \frametitle{Optimized libraries for function evaluation and vectorization} %<<<
|
|
|
- % Fast function evaluation using polynomial evaluation
|
|
|
- % baobzi
|
|
|
- % sf_benchmarks :
|
|
|
-\end{frame}
|
|
|
-%>>>
|
|
|
-
|
|
|
|