main.tex 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. % vim: set foldmethod=marker foldmarker=<<<,>>>:
  2. \input{ccmbeamer}
  3. %\usepackage{svg}
  4. \usetikzlibrary{graphdrawing.trees}
  5. \definecolor{c1} {rgb}{0,0,0}
  6. \definecolor{c2} {rgb}{0.1216,0.4706,0.7059}
  7. \definecolor{c3} {rgb}{0.2000,0.6275,0.1725}
  8. \definecolor{c4} {rgb}{0.9843,0.6039,0.6000}
  9. \definecolor{c5} {rgb}{0.8902,0.1020,0.1098}
  10. \definecolor{c6} {rgb}{0.9922,0.7490,0.4353}
  11. \definecolor{c7} {rgb}{1.0000,0.4980, 0}
  12. \definecolor{c8} {rgb}{0.4157,0.2392,0.6039}
  13. \definecolor{c9} {rgb}{0.6941,0.3490,0.1569}
  14. \definecolor{c10}{rgb}{0.6510,0.8078,0.8902}
  15. \definecolor{c11}{rgb}{0.6980,0.8745,0.5412}
  16. \definecolor{c12}{rgb}{0.7922,0.6980,0.8392}
  17. \definecolor{c12}{rgb}{1.0000,1.0000,0.6000}
  18. \usepackage{minted}
  19. %\usemintedstyle{pastie}
  20. \usemintedstyle{emacs}
  21. \usepackage{fontspec}
  22. \usepackage[nott]{inconsolata}
  23. \usepackage{booktabs}
  24. %<<< title, author, institute
  25. \title
  26. [What every programmer should know about \\ high performance computing]
  27. {What every programmer should know about \\ high performance computing}
  28. \author[Dhairya Malhotra]{Dhairya~Malhotra}
  29. %\institute{Flatiron Institute\\ \mbox{} \\ \pgfuseimage{FIbig} }
  30. %\institute{\pgfuseimage{FIbig} }
  31. \institute{\Large $F_\omega(\alpha+m)!$}
  32. \date[]{Oct 28, 2022}
  33. %>>>
  34. %<<< packages
  35. \usepackage{tikz}
  36. \usetikzlibrary{fit,shapes.geometric,arrows,calc,shapes,decorations.pathreplacing,patterns}
  37. \usepackage{pgfplots,pgfplotstable}
  38. \pgfplotsset{compat=1.17}
  39. \usepackage{mathtools}
  40. \usepackage{multirow}
  41. \usepackage{multimedia}
  42. \usepackage{media9}
  43. %\usepackage{movie15} %(obsolete)
  44. \usepackage{animate}
  45. \usepackage{fp}
  46. %\usepackage{enumitem}
  47. \usepackage{bm}
  48. \beamertemplateballitem % Numbered bullets
  49. \usepackage{xstring}
  50. \usepackage{mathtools}% Loads amsmath
  51. \usepackage{stmaryrd}
  52. \newcommand{\vcenteredinclude}[1]{\begingroup\setbox0=\hbox{{#1}}\parbox{\wd0}{\box0}\endgroup}
  53. %%------------------------------------------------------------------------------
  54. %%- Latin-abbreviations
  55. %%------------------------------------------------------------------------------
  56. \usepackage{expl3}
  57. \ExplSyntaxOn
  58. \newcommand\latinabbrev[1]{
  59. \peek_meaning:NTF . {% Same as \@ifnextchar
  60. #1\@}%
  61. { \peek_catcode:NTF a {% Check whether next char has same catcode as \'a, i.e., is a letter
  62. #1.\@ }%
  63. {#1.\@}}}
  64. \ExplSyntaxOff
  65. %Omit final dot from each def.
  66. \def\eg{\latinabbrev{e.g}}
  67. \def\etal{\latinabbrev{et al}}
  68. \def\etc{\latinabbrev{etc}}
  69. \def\ie{\latinabbrev{i.e}}
  70. %>>>
  71. \begin{document}
  72. \setbeamercovered{transparent}% Dim out "inactive" elements
  73. %\begin{frame}[t]%<<< Title
  74. % \titlepage
  75. %\end{frame}%>>>
  76. \input{intro}
  77. \input{ilp}
  78. \input{mem}
  79. \input{openmp}
  80. \end{document}
  81. % Examples:
  82. % Instruction level: polynomial evaluation, simple expressions (AXPY)
  83. % Compute bound: GEMM
  84. % Memory bound: AXPY, Gauss-Sidel / Gauss-Jacobi
  85. % Latency bound: sorting
  86. % Ideas to demonstrate:
  87. % Vectorization
  88. % Instruction latency, out-of-order execution, aliasing, loop-unrolling
  89. % Caching, blocking, memory bandwidth, memory latency, prefetching
  90. % Hyper threading
  91. % TOOLS:
  92. % godbold
  93. % https://quick-bench.com/
  94. % Profiling: https://hpc-wiki.info/hpc/Compiler_Sanitizers
  95. % Debugging: -fsanitize=address
  96. % profile! profile! profile!
  97. %omp_get_wtime() / MPI_Wtime()
  98. % htop
  99. %NUMA:
  100. % numactl -H
  101. % export OMP_PLACES="{0},{1},{2},{3}"
  102. % numactl -l myBinary // local memory for each thread
  103. % Distributed memory
  104. % cost model
  105. % load balancing
  106. % minimizing communication
  107. %false sharing, caching,
  108. % GEMM cube volume and surface area
  109. % Programming languages: https://hpc-wiki.info/hpc/Programming_Languages
  110. % NUMA: https://hpc-wiki.info/hpc/Binding/Pinning
  111. % export OMP_PROC_BIND=close/spread
  112. % memory copy; OMP_NUM_THREADS=8
  113. % non-temporal writes
  114. % single thread can saturate memory bandwidth.
  115. % do not optimize single-threaded, it may not reflect parallel performance.
  116. % Diagnosing performance issues: https://hpc-wiki.info/hpc/Performance_Patterns
  117. % Runtime profiling: https://hpc-wiki.info/hpc/Runtime_profiling