mem.tex 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. % vim: set foldmethod=marker foldmarker=<<<,>>>:
  2. \section{Memory/bandwidth optimization}
  3. \begin{frame} \frametitle{Memory benchmarks}{} %<<<
  4. % https://lwn.net/Articles/252125/
  5. % Ulrich Drepper -- What every programmer should know about memory
  6. % plot: X (size), Y (cycles) ---- vary stride length
  7. % spatial and temporal data locality
  8. % hyper threading - shared cache - useful for latency bound
  9. \end{frame}
  10. %>>>
  11. % vector vs linked list
  12. \begin{frame} \frametitle{Shared memory pitfalls}{} %<<<
  13. % many ways to shoot yourself in the foot:
  14. % thread contention
  15. % cache coherency
  16. % thread pinning
  17. % NUMA
  18. % locks / atomic / synchronization
  19. \end{frame}
  20. \begin{frame} \frametitle{Cache Coherent Non-uniform Memory Access}{} %<<<
  21. \begin{itemize}
  22. \item {\bf Cores:} individual processing units.
  23. \item {\bf Sockets:} collection of cores on the same silicon die.
  24. \item Each sockets connected to its own DRAM.
  25. \item Sockets interconnected using a network: QPI (Intel), HT (AMD).
  26. \item Location of memory pages determined by first-touch policy.
  27. \end{itemize}
  28. \includegraphics[width=0.7\textwidth]{figs/numa.png}
  29. \footnote{figure from: https://www.boost.org}
  30. \end{frame} %>>>
  31. %>>>