mem.tex 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. % vim: set foldmethod=marker foldmarker=<<<,>>>:
  2. \section{Memory/bandwidth optimization}
  3. \begin{frame} \frametitle{Memory}{} %<<<
  4. \begin{columns}
  5. \column{0.5\textwidth}
  6. \begin{itemize}
  7. \item How does memory work?
  8. \end{itemize}
  9. Ulrich Drepper -- What every programmer should know about memory (2007)
  10. %https://lwn.net/Articles/252125/
  11. \column{0.5\textwidth}
  12. \center
  13. \includegraphics[width=0.99\textwidth]{figs/cache-hierarchy}
  14. {\footnotesize Source: Intel Software Developer Manual}
  15. \end{columns}
  16. \end{frame}
  17. %>>>
  18. \begin{frame} \frametitle{Latency and bandwidth}{} %<<<
  19. % 1) (malloc, first-touch, bandwidth, free) for (writing to array)
  20. % 2) (bandwidth) for (reading array) [reduction]
  21. % 3) (flop,bandwidth) for (vector copy, vector-add) (write causes read -- unless streaming write)
  22. % 4) (latency) for (sequential access, strided access) (integer array with indices)
  23. % plot: X (size), Y (cycles) ---- vary stride length
  24. % spatial and temporal data locality
  25. % hyper threading - shared cache - useful for latency bound
  26. \end{frame}
  27. %>>>
  28. % vector vs linked list
  29. \begin{frame} \frametitle{Shared memory pitfalls}{} %<<<
  30. % many ways to shoot yourself in the foot:
  31. % thread contention
  32. % cache coherency
  33. % thread pinning
  34. % NUMA
  35. % locks / atomic / synchronization
  36. \end{frame}
  37. \begin{frame} \frametitle{Cache Coherent Non-uniform Memory Access}{} %<<<
  38. \begin{itemize}
  39. \item {\bf Cores:} individual processing units.
  40. \item {\bf Sockets:} collection of cores on the same silicon die.
  41. \item Each sockets connected to its own DRAM.
  42. \item Sockets interconnected using a network: QPI (Intel), HT (AMD).
  43. \item Location of memory pages determined by first-touch policy.
  44. \end{itemize}
  45. \center
  46. \includegraphics[width=0.7\textwidth]{figs/numa.png}
  47. {\footnotesize Source: https://www.boost.org}
  48. \end{frame} %>>>
  49. %>>>