| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 | 
							- % vim: set foldmethod=marker foldmarker=<<<,>>>:
 
- \input{ccmbeamer}
 
- %\usepackage{svg}
 
- \usetikzlibrary{graphdrawing.trees}
 
- \usepackage{overpic}
 
- \definecolor{c1} {rgb}{0,0,0}
 
- \definecolor{c2} {rgb}{0.1216,0.4706,0.7059}
 
- \definecolor{c3} {rgb}{0.2000,0.6275,0.1725}
 
- \definecolor{c4} {rgb}{0.9843,0.6039,0.6000}
 
- \definecolor{c5} {rgb}{0.8902,0.1020,0.1098}
 
- \definecolor{c6} {rgb}{0.9922,0.7490,0.4353}
 
- \definecolor{c7} {rgb}{1.0000,0.4980,     0}
 
- \definecolor{c8} {rgb}{0.4157,0.2392,0.6039}
 
- \definecolor{c9} {rgb}{0.6941,0.3490,0.1569}
 
- \definecolor{c10}{rgb}{0.6510,0.8078,0.8902}
 
- \definecolor{c11}{rgb}{0.6980,0.8745,0.5412}
 
- \definecolor{c12}{rgb}{0.7922,0.6980,0.8392}
 
- \definecolor{c12}{rgb}{1.0000,1.0000,0.6000}
 
- \usepackage{minted}
 
- \usemintedstyle{vs}
 
- %\usemintedstyle{borland}
 
- %\usemintedstyle{emacs}
 
- %\usemintedstyle{perldoc}
 
- %\usemintedstyle{friendly}
 
- %%\usemintedstyle{pastie}
 
- %%\usemintedstyle{vim}
 
- \usepackage{fontspec}
 
- \usepackage[nott]{inconsolata}
 
- \usepackage{booktabs}
 
- %<<< title, author, institute
 
-   \title
 
-   [What every programmer should know about \\ high performance computing]
 
-   {What every programmer should know about \\ high performance computing}
 
-   \subtitle{(instruction-level parallelism, memory)}
 
-   \author[Dhairya Malhotra]{Codes: \url{https://github.com/dmalhotra/fwam2022} \\
 
-   \phantom{.}\\
 
-   Dhairya~Malhotra}
 
-   %\institute{Flatiron Institute\\ \mbox{}  \\  \pgfuseimage{FIbig} }
 
-   %\institute{\pgfuseimage{FIbig} }
 
-   \institute{\Large $F_\omega(\alpha+m)!$}
 
-   \date[]{Oct 28, 2022}
 
- %>>>
 
- %<<< packages
 
-   \usepackage{tikz}
 
-   \usetikzlibrary{fit,shapes.geometric,arrows,calc,shapes,decorations.pathreplacing,patterns}
 
-   \usepackage{pgfplots,pgfplotstable}
 
-   \pgfplotsset{compat=1.17}
 
-   \usepackage{mathtools}
 
-   \usepackage{multirow}
 
-   \usepackage{multimedia}
 
-   \usepackage{media9}
 
-   %\usepackage{movie15} %(obsolete)
 
-   \usepackage{animate}
 
-   \usepackage{fp}
 
-   %\usepackage{enumitem}
 
-   \usepackage{bm}
 
-   \beamertemplateballitem % Numbered bullets
 
-   \usepackage{xstring}
 
-   \usepackage{mathtools}% Loads amsmath
 
-   \usepackage{stmaryrd}
 
-   \newcommand{\vcenteredinclude}[1]{\begingroup\setbox0=\hbox{{#1}}\parbox{\wd0}{\box0}\endgroup}
 
-   %%------------------------------------------------------------------------------
 
-   %%- Latin-abbreviations
 
-   %%------------------------------------------------------------------------------
 
-   \usepackage{expl3}
 
-   \ExplSyntaxOn
 
-   \newcommand\latinabbrev[1]{
 
-     \peek_meaning:NTF . {% Same as \@ifnextchar
 
-       #1\@}%
 
-     { \peek_catcode:NTF a {% Check whether next char has same catcode as \'a, i.e., is a letter
 
-         #1.\@ }%
 
-       {#1.\@}}}
 
-   \ExplSyntaxOff
 
-   %Omit final dot from each def.
 
-   \def\eg{\latinabbrev{e.g}}
 
-   \def\etal{\latinabbrev{et al}}
 
-   \def\etc{\latinabbrev{etc}}
 
-   \def\ie{\latinabbrev{i.e}}
 
- %>>>
 
- \begin{document}
 
-   \setbeamercovered{transparent}% Dim out "inactive" elements
 
-   \begin{frame}[t]%<<< Title
 
-     \titlepage
 
-   \end{frame}%>>>
 
-   \input{intro}
 
-   \input{ilp}
 
-   \input{mem}
 
- \end{document}
 
- %%\input{openmp}
 
- % Examples:
 
- % Instruction level: polynomial evaluation, simple expressions (AXPY)
 
- % Compute bound: GEMM
 
- % Memory bound: AXPY, Gauss-Sidel / Gauss-Jacobi
 
- % Latency bound: sorting
 
- % Ideas to demonstrate:
 
- % Hyper threading
 
- % TOOLS:
 
- % godbold
 
- % https://quick-bench.com/
 
- % Profiling: https://hpc-wiki.info/hpc/Compiler_Sanitizers
 
- % Debugging: -fsanitize=address
 
- % profile! profile! profile!
 
- %omp_get_wtime() / MPI_Wtime()
 
- % htop
 
- %NUMA:
 
- % numactl -H
 
- % export OMP_PLACES="{0},{1},{2},{3}"
 
- % numactl -l myBinary // local memory for each thread
 
- % Distributed memory
 
- % cost model
 
- % load balancing
 
- % minimizing communication
 
- %false sharing, caching,
 
- % GEMM cube volume and surface area
 
- % Programming languages: https://hpc-wiki.info/hpc/Programming_Languages
 
- % NUMA: https://hpc-wiki.info/hpc/Binding/Pinning
 
- % export OMP_PROC_BIND=close/spread
 
- % memory copy; OMP_NUM_THREADS=8
 
- % non-temporal writes
 
- % single thread can saturate memory bandwidth.
 
- % do not optimize single-threaded, it may not reflect parallel performance.
 
- % Diagnosing performance issues: https://hpc-wiki.info/hpc/Performance_Patterns
 
- % Runtime profiling: https://hpc-wiki.info/hpc/Runtime_profiling
 
 
  |