12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- % vim: set foldmethod=marker foldmarker=<<<,>>>:
- \section{Memory/bandwidth optimization}
- \begin{frame} \frametitle{Memory benchmarks}{} %<<<
- % https://lwn.net/Articles/252125/
- % Ulrich Drepper -- What every programmer should know about memory
- % plot: X (size), Y (cycles) ---- vary stride length
- % spatial and temporal data locality
- % hyper threading - shared cache - useful for latency bound
- \end{frame}
- %>>>
- % vector vs linked list
- \begin{frame} \frametitle{Shared memory pitfalls}{} %<<<
- % many ways to shoot yourself in the foot:
- % thread contention
- % cache coherency
- % thread pinning
- % NUMA
- % locks / atomic / synchronization
- \end{frame}
- \begin{frame} \frametitle{Cache Coherent Non-uniform Memory Access}{} %<<<
- \begin{itemize}
- \item {\bf Cores:} individual processing units.
- \item {\bf Sockets:} collection of cores on the same silicon die.
- \item Each sockets connected to its own DRAM.
- \item Sockets interconnected using a network: QPI (Intel), HT (AMD).
- \item Location of memory pages determined by first-touch policy.
- \end{itemize}
- \includegraphics[width=0.7\textwidth]{figs/numa.png}
- \footnote{figure from: https://www.boost.org}
- \end{frame} %>>>
- %>>>
|