|  | @@ -317,7 +317,6 @@
 | 
	
		
			
				|  |  |      \end{overprint}
 | 
	
		
			
				|  |  |    \end{columns}
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  % coding example
 | 
	
		
			
				|  |  |  \end{frame}
 | 
	
		
			
				|  |  |  %>>>
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -601,7 +600,6 @@
 | 
	
		
			
				|  |  |      \end{overprint}
 | 
	
		
			
				|  |  |    \end{columns}
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  % coding example
 | 
	
		
			
				|  |  |  \end{frame}
 | 
	
		
			
				|  |  |  %>>>
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -955,7 +953,117 @@
 | 
	
		
			
				|  |  |  \end{frame}
 | 
	
		
			
				|  |  |  %>>>
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -\begin{frame} \frametitle{Polynomial evaluation: actual performance} %<<<
 | 
	
		
			
				|  |  | +\begin{frame}[t,fragile] \frametitle{Polynomial evaluation: actual performance} %<<<
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  \vspace{-1em}
 | 
	
		
			
				|  |  | +  \begin{columns}[t]
 | 
	
		
			
				|  |  | +    \column{0.55\textwidth}
 | 
	
		
			
				|  |  | +    \footnotesize
 | 
	
		
			
				|  |  | +    \begin{overprint}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      \onslide<1>%<<<
 | 
	
		
			
				|  |  | +      \begin{minted}[
 | 
	
		
			
				|  |  | +          frame=lines,
 | 
	
		
			
				|  |  | +          fontsize=\footnotesize,
 | 
	
		
			
				|  |  | +          linenos,
 | 
	
		
			
				|  |  | +          gobble=10,
 | 
	
		
			
				|  |  | +          mathescape
 | 
	
		
			
				|  |  | +        ]{C++}
 | 
	
		
			
				|  |  | +          // Horner's rule
 | 
	
		
			
				|  |  | +          for (long i = 0; i < 1000000000L; i++) {
 | 
	
		
			
				|  |  | +            x = (((((a*x+b)*x+c)*x+d)*x+e)*x+f*x+g)*x+h;
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +      \end{minted}
 | 
	
		
			
				|  |  | +      \begin{minted}[
 | 
	
		
			
				|  |  | +          frame=lines,
 | 
	
		
			
				|  |  | +          fontsize=\footnotesize,
 | 
	
		
			
				|  |  | +          linenos,
 | 
	
		
			
				|  |  | +          gobble=10,
 | 
	
		
			
				|  |  | +          mathescape
 | 
	
		
			
				|  |  | +        ]{C++}
 | 
	
		
			
				|  |  | +          // Estrin's method
 | 
	
		
			
				|  |  | +          for (long i = 0; i < 1000000000L; i++) {
 | 
	
		
			
				|  |  | +            double x2 = x * x;
 | 
	
		
			
				|  |  | +            double x4 = x2 * x2;
 | 
	
		
			
				|  |  | +            x = ((a*x+b)*x2+(c*x+d))*x4+(e*x+f)*x2+(g*x+h);
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +      \end{minted}
 | 
	
		
			
				|  |  | +      %>>>
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      \onslide<2>%<<<
 | 
	
		
			
				|  |  | +      \begin{minted}[
 | 
	
		
			
				|  |  | +          frame=lines,
 | 
	
		
			
				|  |  | +          fontsize=\footnotesize,
 | 
	
		
			
				|  |  | +          linenos,
 | 
	
		
			
				|  |  | +          gobble=10,
 | 
	
		
			
				|  |  | +          mathescape
 | 
	
		
			
				|  |  | +        ]{C++}
 | 
	
		
			
				|  |  | +          // Horner's rule
 | 
	
		
			
				|  |  | +          for (long i = 0; i < 1000000000L; i++) {
 | 
	
		
			
				|  |  | +            x = (((((a*x+b)*x+c)*x+d)*x+e)*x+f*x+g)*x+h;
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +      \end{minted}
 | 
	
		
			
				|  |  | +      \begin{minted}[
 | 
	
		
			
				|  |  | +          frame=lines,
 | 
	
		
			
				|  |  | +          fontsize=\footnotesize,
 | 
	
		
			
				|  |  | +          linenos,
 | 
	
		
			
				|  |  | +          gobble=10,
 | 
	
		
			
				|  |  | +          mathescape
 | 
	
		
			
				|  |  | +        ]{C++}
 | 
	
		
			
				|  |  | +          // Estrin's method (unrolled)
 | 
	
		
			
				|  |  | +          for (long i = 0; i < 1000000000L; i++) {
 | 
	
		
			
				|  |  | +            double x2 = x * x;
 | 
	
		
			
				|  |  | +            double x4 = x2 * x2;
 | 
	
		
			
				|  |  | +            double u = a * x + b;
 | 
	
		
			
				|  |  | +            double v = c * x + d;
 | 
	
		
			
				|  |  | +            double w = e * x + f;
 | 
	
		
			
				|  |  | +            double p = g * x + h;
 | 
	
		
			
				|  |  | +            double q = u * x2 + v;
 | 
	
		
			
				|  |  | +            double r = w * x2 + p;
 | 
	
		
			
				|  |  | +            x = q * x4 + r;
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +      \end{minted}
 | 
	
		
			
				|  |  | +      %>>>
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    \end{overprint}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    \column{0.05\textwidth}
 | 
	
		
			
				|  |  | +    \column{0.35\textwidth}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    \begin{overprint}
 | 
	
		
			
				|  |  | +      \onslide<1>%<<<
 | 
	
		
			
				|  |  | +      \begin{minted}[gobble=8,fontsize=\footnotesize]{text}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        Using Horner's rule:
 | 
	
		
			
				|  |  | +        T = 8.82432
 | 
	
		
			
				|  |  | +        cycles/iter = 29.1203
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        
 | 
	
		
			
				|  |  | +        Using Estrin's method:
 | 
	
		
			
				|  |  | +        T = 5.7813
 | 
	
		
			
				|  |  | +        cycles/iter = 19.0783
 | 
	
		
			
				|  |  | +      \end{minted}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      \textcolor{red}{\qquad only $1.5\times$ speedup :(}
 | 
	
		
			
				|  |  | +      %>>>
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      \onslide<2>%<<<
 | 
	
		
			
				|  |  | +      \begin{minted}[gobble=8,fontsize=\footnotesize]{text}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        Using Horner's rule:
 | 
	
		
			
				|  |  | +        T = 8.82432
 | 
	
		
			
				|  |  | +        cycles/iter = 29.1203
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        
 | 
	
		
			
				|  |  | +        Using Estrin's method:
 | 
	
		
			
				|  |  | +        T = 4.5794
 | 
	
		
			
				|  |  | +        cycles/iter = 15.112
 | 
	
		
			
				|  |  | +      \end{minted}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      \textcolor{red}{\qquad $1.9\times$ speedup!}
 | 
	
		
			
				|  |  | +      %>>>
 | 
	
		
			
				|  |  | +    \end{overprint}
 | 
	
		
			
				|  |  | +  \end{columns}
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    % perf - show stalled cycles
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -1005,7 +1113,7 @@
 | 
	
		
			
				|  |  |  \end{frame}
 | 
	
		
			
				|  |  |  %>>>
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -\begin{frame} \frametitle{Optimized libraries for function evaluationa and vectorization} %<<<
 | 
	
		
			
				|  |  | +\begin{frame} \frametitle{Optimized libraries for function evaluation and vectorization} %<<<
 | 
	
		
			
				|  |  |    % Fast function evaluation using polynomial evaluation
 | 
	
		
			
				|  |  |    % baobzi
 | 
	
		
			
				|  |  |    % sf_benchmarks :
 |