dmalhotra
/
2022-10-28-talk-fwam


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
							% vim: set foldmethod=marker foldmarker=<<<,>>>:

\section{Memory/bandwidth optimization}

\begin{frame} \frametitle{Memory benchmarks}{} %<<<

  % https://lwn.net/Articles/252125/
  % Ulrich Drepper -- What every programmer should know about memory

  % plot: X (size), Y (cycles)  ----  vary stride length

  % spatial and temporal data locality

  % hyper threading - shared cache - useful for latency bound

\end{frame}
%>>>

% vector vs linked list

\begin{frame} \frametitle{Shared memory pitfalls}{} %<<<

  % many ways to shoot yourself in the foot:

  % thread contention
  % cache coherency
  % thread pinning
  % NUMA
  % locks / atomic / synchronization

\end{frame}

\begin{frame} \frametitle{Cache Coherent Non-uniform Memory Access}{} %<<<

\begin{itemize}
  \item {\bf Cores:} individual processing units.
  \item {\bf Sockets:} collection of cores on the same silicon die.
  \item Each sockets connected to its own DRAM.
  \item Sockets interconnected using a network: QPI (Intel), HT (AMD).
  \item Location of memory pages determined by first-touch policy.
\end{itemize}

\includegraphics[width=0.7\textwidth]{figs/numa.png}
  \footnote{figure from: https://www.boost.org}
\end{frame} %>>>


%>>>