|
@@ -317,7 +317,6 @@
|
|
|
\end{overprint}
|
|
|
\end{columns}
|
|
|
|
|
|
- % coding example
|
|
|
\end{frame}
|
|
|
%>>>
|
|
|
|
|
@@ -601,7 +600,6 @@
|
|
|
\end{overprint}
|
|
|
\end{columns}
|
|
|
|
|
|
- % coding example
|
|
|
\end{frame}
|
|
|
%>>>
|
|
|
|
|
@@ -955,7 +953,117 @@
|
|
|
\end{frame}
|
|
|
%>>>
|
|
|
|
|
|
-\begin{frame} \frametitle{Polynomial evaluation: actual performance} %<<<
|
|
|
+\begin{frame}[t,fragile] \frametitle{Polynomial evaluation: actual performance} %<<<
|
|
|
+
|
|
|
+ \vspace{-1em}
|
|
|
+ \begin{columns}[t]
|
|
|
+ \column{0.55\textwidth}
|
|
|
+ \footnotesize
|
|
|
+ \begin{overprint}
|
|
|
+
|
|
|
+ \onslide<1>%<<<
|
|
|
+ \begin{minted}[
|
|
|
+ frame=lines,
|
|
|
+ fontsize=\footnotesize,
|
|
|
+ linenos,
|
|
|
+ gobble=10,
|
|
|
+ mathescape
|
|
|
+ ]{C++}
|
|
|
+ // Horner's rule
|
|
|
+ for (long i = 0; i < 1000000000L; i++) {
|
|
|
+ x = (((((a*x+b)*x+c)*x+d)*x+e)*x+f*x+g)*x+h;
|
|
|
+ }
|
|
|
+ \end{minted}
|
|
|
+ \begin{minted}[
|
|
|
+ frame=lines,
|
|
|
+ fontsize=\footnotesize,
|
|
|
+ linenos,
|
|
|
+ gobble=10,
|
|
|
+ mathescape
|
|
|
+ ]{C++}
|
|
|
+ // Estrin's method
|
|
|
+ for (long i = 0; i < 1000000000L; i++) {
|
|
|
+ double x2 = x * x;
|
|
|
+ double x4 = x2 * x2;
|
|
|
+ x = ((a*x+b)*x2+(c*x+d))*x4+(e*x+f)*x2+(g*x+h);
|
|
|
+ }
|
|
|
+ \end{minted}
|
|
|
+ %>>>
|
|
|
+
|
|
|
+ \onslide<2>%<<<
|
|
|
+ \begin{minted}[
|
|
|
+ frame=lines,
|
|
|
+ fontsize=\footnotesize,
|
|
|
+ linenos,
|
|
|
+ gobble=10,
|
|
|
+ mathescape
|
|
|
+ ]{C++}
|
|
|
+ // Horner's rule
|
|
|
+ for (long i = 0; i < 1000000000L; i++) {
|
|
|
+ x = (((((a*x+b)*x+c)*x+d)*x+e)*x+f*x+g)*x+h;
|
|
|
+ }
|
|
|
+ \end{minted}
|
|
|
+ \begin{minted}[
|
|
|
+ frame=lines,
|
|
|
+ fontsize=\footnotesize,
|
|
|
+ linenos,
|
|
|
+ gobble=10,
|
|
|
+ mathescape
|
|
|
+ ]{C++}
|
|
|
+ // Estrin's method (unrolled)
|
|
|
+ for (long i = 0; i < 1000000000L; i++) {
|
|
|
+ double x2 = x * x;
|
|
|
+ double x4 = x2 * x2;
|
|
|
+ double u = a * x + b;
|
|
|
+ double v = c * x + d;
|
|
|
+ double w = e * x + f;
|
|
|
+ double p = g * x + h;
|
|
|
+ double q = u * x2 + v;
|
|
|
+ double r = w * x2 + p;
|
|
|
+ x = q * x4 + r;
|
|
|
+ }
|
|
|
+ \end{minted}
|
|
|
+ %>>>
|
|
|
+
|
|
|
+ \end{overprint}
|
|
|
+
|
|
|
+ \column{0.05\textwidth}
|
|
|
+ \column{0.35\textwidth}
|
|
|
+
|
|
|
+ \begin{overprint}
|
|
|
+ \onslide<1>%<<<
|
|
|
+ \begin{minted}[gobble=8,fontsize=\footnotesize]{text}
|
|
|
+
|
|
|
+ Using Horner's rule:
|
|
|
+ T = 8.82432
|
|
|
+ cycles/iter = 29.1203
|
|
|
+
|
|
|
+
|
|
|
+ Using Estrin's method:
|
|
|
+ T = 5.7813
|
|
|
+ cycles/iter = 19.0783
|
|
|
+ \end{minted}
|
|
|
+
|
|
|
+ \textcolor{red}{\qquad only $1.5\times$ speedup :(}
|
|
|
+ %>>>
|
|
|
+
|
|
|
+ \onslide<2>%<<<
|
|
|
+ \begin{minted}[gobble=8,fontsize=\footnotesize]{text}
|
|
|
+
|
|
|
+ Using Horner's rule:
|
|
|
+ T = 8.82432
|
|
|
+ cycles/iter = 29.1203
|
|
|
+
|
|
|
+
|
|
|
+ Using Estrin's method:
|
|
|
+ T = 4.5794
|
|
|
+ cycles/iter = 15.112
|
|
|
+ \end{minted}
|
|
|
+
|
|
|
+ \textcolor{red}{\qquad $1.9\times$ speedup!}
|
|
|
+ %>>>
|
|
|
+ \end{overprint}
|
|
|
+ \end{columns}
|
|
|
|
|
|
% perf - show stalled cycles
|
|
|
|
|
@@ -1005,7 +1113,7 @@
|
|
|
\end{frame}
|
|
|
%>>>
|
|
|
|
|
|
-\begin{frame} \frametitle{Optimized libraries for function evaluationa and vectorization} %<<<
|
|
|
+\begin{frame} \frametitle{Optimized libraries for function evaluation and vectorization} %<<<
|
|
|
% Fast function evaluation using polynomial evaluation
|
|
|
% baobzi
|
|
|
% sf_benchmarks :
|