123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- % vim: set foldmethod=marker foldmarker=<<<,>>>:
- \input{ccmbeamer}
- %\usepackage{svg}
- \usetikzlibrary{graphdrawing.trees}
- \usepackage{overpic}
- \definecolor{c1} {rgb}{0,0,0}
- \definecolor{c2} {rgb}{0.1216,0.4706,0.7059}
- \definecolor{c3} {rgb}{0.2000,0.6275,0.1725}
- \definecolor{c4} {rgb}{0.9843,0.6039,0.6000}
- \definecolor{c5} {rgb}{0.8902,0.1020,0.1098}
- \definecolor{c6} {rgb}{0.9922,0.7490,0.4353}
- \definecolor{c7} {rgb}{1.0000,0.4980, 0}
- \definecolor{c8} {rgb}{0.4157,0.2392,0.6039}
- \definecolor{c9} {rgb}{0.6941,0.3490,0.1569}
- \definecolor{c10}{rgb}{0.6510,0.8078,0.8902}
- \definecolor{c11}{rgb}{0.6980,0.8745,0.5412}
- \definecolor{c12}{rgb}{0.7922,0.6980,0.8392}
- \definecolor{c12}{rgb}{1.0000,1.0000,0.6000}
- \usepackage{minted}
- \usemintedstyle{vs}
- %\usemintedstyle{borland}
- %\usemintedstyle{emacs}
- %\usemintedstyle{perldoc}
- %\usemintedstyle{friendly}
- %%\usemintedstyle{pastie}
- %%\usemintedstyle{vim}
- \usepackage{fontspec}
- \usepackage[nott]{inconsolata}
- \usepackage{booktabs}
- %<<< title, author, institute
- \title
- [What every programmer should know about \\ high performance computing]
- {What every programmer should know about \\ high performance computing}
- \subtitle{(instruction-level parallelism, memory)}
- \author[Dhairya Malhotra]{Codes: \url{https://github.com/dmalhotra/fwam2022} \\
- \phantom{.}\\
- Dhairya~Malhotra}
- %\institute{Flatiron Institute\\ \mbox{} \\ \pgfuseimage{FIbig} }
- %\institute{\pgfuseimage{FIbig} }
- \institute{\Large $F_\omega(\alpha+m)!$}
- \date[]{Oct 28, 2022}
- %>>>
- %<<< packages
- \usepackage{tikz}
- \usetikzlibrary{fit,shapes.geometric,arrows,calc,shapes,decorations.pathreplacing,patterns}
- \usepackage{pgfplots,pgfplotstable}
- \pgfplotsset{compat=1.17}
- \usepackage{mathtools}
- \usepackage{multirow}
- \usepackage{multimedia}
- \usepackage{media9}
- %\usepackage{movie15} %(obsolete)
- \usepackage{animate}
- \usepackage{fp}
- %\usepackage{enumitem}
- \usepackage{bm}
- \beamertemplateballitem % Numbered bullets
- \usepackage{xstring}
- \usepackage{mathtools}% Loads amsmath
- \usepackage{stmaryrd}
- \newcommand{\vcenteredinclude}[1]{\begingroup\setbox0=\hbox{{#1}}\parbox{\wd0}{\box0}\endgroup}
- %%------------------------------------------------------------------------------
- %%- Latin-abbreviations
- %%------------------------------------------------------------------------------
- \usepackage{expl3}
- \ExplSyntaxOn
- \newcommand\latinabbrev[1]{
- \peek_meaning:NTF . {% Same as \@ifnextchar
- #1\@}%
- { \peek_catcode:NTF a {% Check whether next char has same catcode as \'a, i.e., is a letter
- #1.\@ }%
- {#1.\@}}}
- \ExplSyntaxOff
- %Omit final dot from each def.
- \def\eg{\latinabbrev{e.g}}
- \def\etal{\latinabbrev{et al}}
- \def\etc{\latinabbrev{etc}}
- \def\ie{\latinabbrev{i.e}}
- %>>>
- \begin{document}
- \setbeamercovered{transparent}% Dim out "inactive" elements
- \begin{frame}[t]%<<< Title
- \titlepage
- \end{frame}%>>>
- \input{intro}
- \input{ilp}
- \input{mem}
- \end{document}
- %%\input{openmp}
- % Examples:
- % Instruction level: polynomial evaluation, simple expressions (AXPY)
- % Compute bound: GEMM
- % Memory bound: AXPY, Gauss-Sidel / Gauss-Jacobi
- % Latency bound: sorting
- % Ideas to demonstrate:
- % Hyper threading
- % TOOLS:
- % godbold
- % https://quick-bench.com/
- % Profiling: https://hpc-wiki.info/hpc/Compiler_Sanitizers
- % Debugging: -fsanitize=address
- % profile! profile! profile!
- %omp_get_wtime() / MPI_Wtime()
- % htop
- %NUMA:
- % numactl -H
- % export OMP_PLACES="{0},{1},{2},{3}"
- % numactl -l myBinary // local memory for each thread
- % Distributed memory
- % cost model
- % load balancing
- % minimizing communication
- %false sharing, caching,
- % GEMM cube volume and surface area
- % Programming languages: https://hpc-wiki.info/hpc/Programming_Languages
- % NUMA: https://hpc-wiki.info/hpc/Binding/Pinning
- % export OMP_PROC_BIND=close/spread
- % memory copy; OMP_NUM_THREADS=8
- % non-temporal writes
- % single thread can saturate memory bandwidth.
- % do not optimize single-threaded, it may not reflect parallel performance.
- % Diagnosing performance issues: https://hpc-wiki.info/hpc/Performance_Patterns
- % Runtime profiling: https://hpc-wiki.info/hpc/Runtime_profiling
|