main.tex 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. % vim: set foldmethod=marker foldmarker=<<<,>>>:
  2. \input{ccmbeamer}
  3. %\usepackage{svg}
  4. \usetikzlibrary{graphdrawing.trees}
  5. \definecolor{c1} {rgb}{0,0,0}
  6. \definecolor{c2} {rgb}{0.1216,0.4706,0.7059}
  7. \definecolor{c3} {rgb}{0.2000,0.6275,0.1725}
  8. \definecolor{c4} {rgb}{0.9843,0.6039,0.6000}
  9. \definecolor{c5} {rgb}{0.8902,0.1020,0.1098}
  10. \definecolor{c6} {rgb}{0.9922,0.7490,0.4353}
  11. \definecolor{c7} {rgb}{1.0000,0.4980, 0}
  12. \definecolor{c8} {rgb}{0.4157,0.2392,0.6039}
  13. \definecolor{c9} {rgb}{0.6941,0.3490,0.1569}
  14. \definecolor{c10}{rgb}{0.6510,0.8078,0.8902}
  15. \definecolor{c11}{rgb}{0.6980,0.8745,0.5412}
  16. \definecolor{c12}{rgb}{0.7922,0.6980,0.8392}
  17. \definecolor{c12}{rgb}{1.0000,1.0000,0.6000}
  18. \usepackage{minted}
  19. %\usemintedstyle{pastie}
  20. \usemintedstyle{emacs}
  21. \usepackage{fontspec}
  22. \usepackage[nott]{inconsolata}
  23. %<<< title, author, institute
  24. \title
  25. [What every programmer should know about \\ high performance computing]
  26. {What every programmer should know about \\ high performance computing}
  27. \author[Dhairya Malhotra]{Dhairya~Malhotra}
  28. %\institute{Flatiron Institute\\ \mbox{} \\ \pgfuseimage{FIbig} }
  29. %\institute{\pgfuseimage{FIbig} }
  30. \institute{\Large $F_\omega(\alpha+m)!$}
  31. \date[]{Oct 28, 2022}
  32. %>>>
  33. %<<< packages
  34. \usepackage{tikz}
  35. \usetikzlibrary{fit,shapes.geometric,arrows,calc,shapes,decorations.pathreplacing,patterns}
  36. \usepackage{pgfplots,pgfplotstable}
  37. \pgfplotsset{compat=1.17}
  38. \usepackage{mathtools}
  39. \usepackage{multirow}
  40. \usepackage{multimedia}
  41. \usepackage{media9}
  42. %\usepackage{movie15} %(obsolete)
  43. \usepackage{animate}
  44. \usepackage{fp}
  45. %\usepackage{enumitem}
  46. \usepackage{bm}
  47. \beamertemplateballitem % Numbered bullets
  48. \usepackage{xstring}
  49. \usepackage{mathtools}% Loads amsmath
  50. \usepackage{stmaryrd}
  51. \newcommand{\vcenteredinclude}[1]{\begingroup\setbox0=\hbox{{#1}}\parbox{\wd0}{\box0}\endgroup}
  52. %%------------------------------------------------------------------------------
  53. %%- Latin-abbreviations
  54. %%------------------------------------------------------------------------------
  55. \usepackage{expl3}
  56. \ExplSyntaxOn
  57. \newcommand\latinabbrev[1]{
  58. \peek_meaning:NTF . {% Same as \@ifnextchar
  59. #1\@}%
  60. { \peek_catcode:NTF a {% Check whether next char has same catcode as \'a, i.e., is a letter
  61. #1.\@ }%
  62. {#1.\@}}}
  63. \ExplSyntaxOff
  64. %Omit final dot from each def.
  65. \def\eg{\latinabbrev{e.g}}
  66. \def\etal{\latinabbrev{et al}}
  67. \def\etc{\latinabbrev{etc}}
  68. \def\ie{\latinabbrev{i.e}}
  69. %>>>
  70. \begin{document}
  71. \setbeamercovered{transparent}% Dim out "inactive" elements
  72. %\begin{frame}[t]%<<< Title
  73. % \titlepage
  74. %\end{frame}%>>>
  75. %\input{intro}
  76. \input{ilp}
  77. %\input{mem}
  78. %\input{openmp}
  79. \end{document}
  80. % Examples:
  81. % Instruction level: polynomial evaluation, simple expressions (AXPY)
  82. % Compute bound: GEMM
  83. % Memory bound: AXPY, Gauss-Sidel / Gauss-Jacobi
  84. % Latency bound: sorting
  85. % Ideas to demonstrate:
  86. % Vectorization
  87. % Instruction latency, out-of-order execution, aliasing, loop-unrolling
  88. % Caching, blocking, memory bandwidth, memory latency, prefetching
  89. % Hyper threading
  90. % TOOLS:
  91. % godbold
  92. % https://quick-bench.com/
  93. % Profiling: https://hpc-wiki.info/hpc/Compiler_Sanitizers
  94. % Debugging: -fsanitize=address
  95. % profile! profile! profile!
  96. %omp_get_wtime() / MPI_Wtime()
  97. % htop
  98. %NUMA:
  99. % numactl -H
  100. % export OMP_PLACES="{0},{1},{2},{3}"
  101. % numactl -l myBinary // local memory for each thread
  102. % Distributed memory
  103. % cost model
  104. % load balancing
  105. % minimizing communication
  106. %false sharing, caching,
  107. % GEMM cube volume and surface area
  108. % Programming languages: https://hpc-wiki.info/hpc/Programming_Languages
  109. % NUMA: https://hpc-wiki.info/hpc/Binding/Pinning
  110. % export OMP_PROC_BIND=close/spread
  111. % memory copy; OMP_NUM_THREADS=8
  112. % non-temporal writes
  113. % single thread can saturate memory bandwidth.
  114. % do not optimize single-threaded, it may not reflect parallel performance.
  115. % Diagnosing performance issues: https://hpc-wiki.info/hpc/Performance_Patterns
  116. % Runtime profiling: https://hpc-wiki.info/hpc/Runtime_profiling