main.tex 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. % vim: set foldmethod=marker foldmarker=<<<,>>>:
  2. \input{ccmbeamer}
  3. %\usepackage{svg}
  4. \usetikzlibrary{graphdrawing.trees}
  5. \usepackage{overpic}
  6. \definecolor{c1} {rgb}{0,0,0}
  7. \definecolor{c2} {rgb}{0.1216,0.4706,0.7059}
  8. \definecolor{c3} {rgb}{0.2000,0.6275,0.1725}
  9. \definecolor{c4} {rgb}{0.9843,0.6039,0.6000}
  10. \definecolor{c5} {rgb}{0.8902,0.1020,0.1098}
  11. \definecolor{c6} {rgb}{0.9922,0.7490,0.4353}
  12. \definecolor{c7} {rgb}{1.0000,0.4980, 0}
  13. \definecolor{c8} {rgb}{0.4157,0.2392,0.6039}
  14. \definecolor{c9} {rgb}{0.6941,0.3490,0.1569}
  15. \definecolor{c10}{rgb}{0.6510,0.8078,0.8902}
  16. \definecolor{c11}{rgb}{0.6980,0.8745,0.5412}
  17. \definecolor{c12}{rgb}{0.7922,0.6980,0.8392}
  18. \definecolor{c12}{rgb}{1.0000,1.0000,0.6000}
  19. \usepackage{minted}
  20. \usemintedstyle{vs}
  21. %\usemintedstyle{borland}
  22. %\usemintedstyle{emacs}
  23. %\usemintedstyle{perldoc}
  24. %\usemintedstyle{friendly}
  25. %%\usemintedstyle{pastie}
  26. %%\usemintedstyle{vim}
  27. \usepackage{fontspec}
  28. \usepackage[nott]{inconsolata}
  29. \usepackage{booktabs}
  30. %<<< title, author, institute
  31. \title
  32. [What every programmer should know about \\ high performance computing]
  33. {What every programmer should know about \\ high performance computing}
  34. \subtitle{(instruction-level parallelism, memory)}
  35. \author[Dhairya Malhotra]{Codes: \url{https://github.com/dmalhotra/fwam2022} \\
  36. \phantom{.}\\
  37. Dhairya~Malhotra}
  38. %\institute{Flatiron Institute\\ \mbox{} \\ \pgfuseimage{FIbig} }
  39. %\institute{\pgfuseimage{FIbig} }
  40. \institute{\Large $F_\omega(\alpha+m)!$}
  41. \date[]{Oct 28, 2022}
  42. %>>>
  43. %<<< packages
  44. \usepackage{tikz}
  45. \usetikzlibrary{fit,shapes.geometric,arrows,calc,shapes,decorations.pathreplacing,patterns}
  46. \usepackage{pgfplots,pgfplotstable}
  47. \pgfplotsset{compat=1.17}
  48. \usepackage{mathtools}
  49. \usepackage{multirow}
  50. \usepackage{multimedia}
  51. \usepackage{media9}
  52. %\usepackage{movie15} %(obsolete)
  53. \usepackage{animate}
  54. \usepackage{fp}
  55. %\usepackage{enumitem}
  56. \usepackage{bm}
  57. \beamertemplateballitem % Numbered bullets
  58. \usepackage{xstring}
  59. \usepackage{mathtools}% Loads amsmath
  60. \usepackage{stmaryrd}
  61. \newcommand{\vcenteredinclude}[1]{\begingroup\setbox0=\hbox{{#1}}\parbox{\wd0}{\box0}\endgroup}
  62. %%------------------------------------------------------------------------------
  63. %%- Latin-abbreviations
  64. %%------------------------------------------------------------------------------
  65. \usepackage{expl3}
  66. \ExplSyntaxOn
  67. \newcommand\latinabbrev[1]{
  68. \peek_meaning:NTF . {% Same as \@ifnextchar
  69. #1\@}%
  70. { \peek_catcode:NTF a {% Check whether next char has same catcode as \'a, i.e., is a letter
  71. #1.\@ }%
  72. {#1.\@}}}
  73. \ExplSyntaxOff
  74. %Omit final dot from each def.
  75. \def\eg{\latinabbrev{e.g}}
  76. \def\etal{\latinabbrev{et al}}
  77. \def\etc{\latinabbrev{etc}}
  78. \def\ie{\latinabbrev{i.e}}
  79. %>>>
  80. \begin{document}
  81. \setbeamercovered{transparent}% Dim out "inactive" elements
  82. \begin{frame}[t]%<<< Title
  83. \titlepage
  84. \end{frame}%>>>
  85. \input{intro}
  86. \input{ilp}
  87. \input{mem}
  88. \end{document}
  89. %%\input{openmp}
  90. % Examples:
  91. % Instruction level: polynomial evaluation, simple expressions (AXPY)
  92. % Compute bound: GEMM
  93. % Memory bound: AXPY, Gauss-Sidel / Gauss-Jacobi
  94. % Latency bound: sorting
  95. % Ideas to demonstrate:
  96. % Hyper threading
  97. % TOOLS:
  98. % godbold
  99. % https://quick-bench.com/
  100. % Profiling: https://hpc-wiki.info/hpc/Compiler_Sanitizers
  101. % Debugging: -fsanitize=address
  102. % profile! profile! profile!
  103. %omp_get_wtime() / MPI_Wtime()
  104. % htop
  105. %NUMA:
  106. % numactl -H
  107. % export OMP_PLACES="{0},{1},{2},{3}"
  108. % numactl -l myBinary // local memory for each thread
  109. % Distributed memory
  110. % cost model
  111. % load balancing
  112. % minimizing communication
  113. %false sharing, caching,
  114. % GEMM cube volume and surface area
  115. % Programming languages: https://hpc-wiki.info/hpc/Programming_Languages
  116. % NUMA: https://hpc-wiki.info/hpc/Binding/Pinning
  117. % export OMP_PROC_BIND=close/spread
  118. % memory copy; OMP_NUM_THREADS=8
  119. % non-temporal writes
  120. % single thread can saturate memory bandwidth.
  121. % do not optimize single-threaded, it may not reflect parallel performance.
  122. % Diagnosing performance issues: https://hpc-wiki.info/hpc/Performance_Patterns
  123. % Runtime profiling: https://hpc-wiki.info/hpc/Runtime_profiling