single_node.sh 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. #!/bin/bash
  2. CORES=16;
  3. export EXEC=examples/bin/fmm_cheb
  4. # Set run parameters
  5. declare -a nodes=();
  6. declare -a cores=();
  7. declare -a mpi_proc=();
  8. declare -a threads=();
  9. declare -a testcase=();
  10. declare -a n_pts=();
  11. declare -a m=();
  12. declare -a q=();
  13. declare -a tol=();
  14. declare -a depth=();
  15. declare -a unif=();
  16. declare -a adap=();
  17. declare -a max_time=();
  18. ###################################################################################################
  19. # UNIFORM OCTREE, LAPLACE KERNEL, OMP SCALABILITY RESULTS #
  20. ###################################################################################################
  21. # m=10, q=14, octants=512, threads={1,2,4,8,16}
  22. nodes+=( 1 1 1 1 1 :)
  23. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  24. mpi_proc+=( 1 1 1 1 1 :)
  25. threads+=( 1 2 4 8 16 :)
  26. testcase+=( 1 1 1 1 1 :)
  27. n_pts+=( $((8**3)) $((8**3)) $((8**3)) $((8**3)) $((8**3)) :)
  28. m_pts+=( 1 1 1 1 1 :)
  29. m+=( 10 10 10 10 10 :)
  30. q+=( 14 14 14 14 14 :)
  31. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  32. depth+=( 15 15 15 15 15 :)
  33. unif+=( 1 1 1 1 1 :)
  34. adap+=( 0 0 0 0 0 :)
  35. max_time+=( 320 160 80 40 20 :)
  36. # m=10, q=14, octants=4096, threads={1,2,4,8,16}
  37. nodes+=( 1 1 1 1 1 :)
  38. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  39. mpi_proc+=( 1 1 1 1 1 :)
  40. threads+=( 1 2 4 8 16 :)
  41. testcase+=( 1 1 1 1 1 :)
  42. n_pts+=( $((8**4)) $((8**4)) $((8**4)) $((8**4)) $((8**4)) :)
  43. m_pts+=( 1 1 1 1 1 :)
  44. m+=( 10 10 10 10 10 :)
  45. q+=( 14 14 14 14 14 :)
  46. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  47. depth+=( 15 15 15 15 15 :)
  48. unif+=( 1 1 1 1 1 :)
  49. adap+=( 0 0 0 0 0 :)
  50. max_time+=( 2560 1280 640 320 160 :)
  51. # m=10, q=14, octants=32768, threads={1,2,4,8,16}
  52. nodes+=( 1 1 1 1 1 : :)
  53. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : :)
  54. mpi_proc+=( 1 1 1 1 1 : :)
  55. threads+=( 1 2 4 8 16 : :)
  56. testcase+=( 1 1 1 1 1 : :)
  57. n_pts+=( $((8**5)) $((8**5)) $((8**5)) $((8**5)) $((8**5)) : :)
  58. m_pts+=( 1 1 1 1 1 : :)
  59. m+=( 10 10 10 10 10 : :)
  60. q+=( 14 14 14 14 14 : :)
  61. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 : :)
  62. depth+=( 15 15 15 15 15 : :)
  63. unif+=( 1 1 1 1 1 : :)
  64. adap+=( 0 0 0 0 0 : :)
  65. max_time+=( 2560 1280 640 320 160 : :)
  66. ###################################################################################################
  67. # NON-UNIFORM OCTREE, LAPLACE KERNEL, OMP SCALABILITY RESULTS #
  68. ###################################################################################################
  69. # m=10, q=14, octants=939, threads={1,2,4,8,16}
  70. nodes+=( 1 1 1 1 1 :)
  71. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  72. mpi_proc+=( 1 1 1 1 1 :)
  73. threads+=( 1 2 4 8 16 :)
  74. testcase+=( 1 1 1 1 1 :)
  75. n_pts+=( 32 32 32 32 32 :)
  76. m_pts+=( 1 1 1 1 1 :)
  77. m+=( 10 10 10 10 10 :)
  78. q+=( 14 14 14 14 14 :)
  79. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  80. depth+=( 15 15 15 15 15 :)
  81. unif+=( 0 0 0 0 0 :)
  82. adap+=( 0 0 0 0 0 :)
  83. max_time+=( 320 160 80 40 20 :)
  84. # m=10, q=14, octants=5685, threads={1,2,4,8,16}
  85. nodes+=( 1 1 1 1 1 :)
  86. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  87. mpi_proc+=( 1 1 1 1 1 :)
  88. threads+=( 1 2 4 8 16 :)
  89. testcase+=( 1 1 1 1 1 :)
  90. n_pts+=( 256 256 256 256 256 :)
  91. m_pts+=( 1 1 1 1 1 :)
  92. m+=( 10 10 10 10 10 :)
  93. q+=( 14 14 14 14 14 :)
  94. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  95. depth+=( 15 15 15 15 15 :)
  96. unif+=( 0 0 0 0 0 :)
  97. adap+=( 0 0 0 0 0 :)
  98. max_time+=( 2560 1280 640 320 160 :)
  99. # m=10, q=14, octants=37416, threads={1,2,4,8,16}
  100. nodes+=( 1 1 1 1 1 : :)
  101. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : :)
  102. mpi_proc+=( 1 1 1 1 1 : :)
  103. threads+=( 1 2 4 8 16 : :)
  104. testcase+=( 1 1 1 1 1 : :)
  105. n_pts+=( 2048 2048 2048 2048 2048 : :)
  106. m_pts+=( 1 1 1 1 1 : :)
  107. m+=( 10 10 10 10 10 : :)
  108. q+=( 14 14 14 14 14 : :)
  109. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 : :)
  110. depth+=( 15 15 15 15 15 : :)
  111. unif+=( 0 0 0 0 0 : :)
  112. adap+=( 0 0 0 0 0 : :)
  113. max_time+=( 2560 1280 640 320 160 : :)
  114. ###################################################################################################
  115. ###################################################################################################
  116. # UNIFORM OCTREE, STOKES KERNEL, OMP SCALABILITY RESULTS #
  117. ###################################################################################################
  118. # m=10, q=14, octants=512, threads={1,2,4,8,16}
  119. nodes+=( 1 1 1 1 1 :)
  120. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  121. mpi_proc+=( 1 1 1 1 1 :)
  122. threads+=( 1 2 4 8 16 :)
  123. testcase+=( 3 3 3 3 3 :)
  124. n_pts+=( $((8**3)) $((8**3)) $((8**3)) $((8**3)) $((8**3)) :)
  125. m_pts+=( 1 1 1 1 1 :)
  126. m+=( 10 10 10 10 10 :)
  127. q+=( 14 14 14 14 14 :)
  128. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  129. depth+=( 15 15 15 15 15 :)
  130. unif+=( 1 1 1 1 1 :)
  131. adap+=( 0 0 0 0 0 :)
  132. max_time+=( 3080 1540 720 360 180 :)
  133. # m=10, q=14, octants=4096, threads={1,2,4,8,16}
  134. nodes+=( 1 1 1 1 1 :)
  135. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  136. mpi_proc+=( 1 1 1 1 1 :)
  137. threads+=( 1 2 4 8 16 :)
  138. testcase+=( 3 3 3 3 3 :)
  139. n_pts+=( $((8**4)) $((8**4)) $((8**4)) $((8**4)) $((8**4)) :)
  140. m_pts+=( 1 1 1 1 1 :)
  141. m+=( 10 10 10 10 10 :)
  142. q+=( 14 14 14 14 14 :)
  143. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  144. depth+=( 15 15 15 15 15 :)
  145. unif+=( 1 1 1 1 1 :)
  146. adap+=( 0 0 0 0 0 :)
  147. max_time+=( 23040 11520 5760 2880 1440 :)
  148. # m=10, q=14, octants=32768, threads={1,2,4,8,16}
  149. nodes+=( 1 1 1 1 1 : :)
  150. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : :)
  151. mpi_proc+=( 1 1 1 1 1 : :)
  152. threads+=( 1 2 4 8 16 : :)
  153. testcase+=( 3 3 3 3 3 : :)
  154. n_pts+=( $((8**5)) $((8**5)) $((8**5)) $((8**5)) $((8**5)) : :)
  155. m_pts+=( 1 1 1 1 1 : :)
  156. m+=( 10 10 10 10 10 : :)
  157. q+=( 14 14 14 14 14 : :)
  158. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 : :)
  159. depth+=( 15 15 15 15 15 : :)
  160. unif+=( 1 1 1 1 1 : :)
  161. adap+=( 0 0 0 0 0 : :)
  162. max_time+=( 184320 92160 46080 23040 11520 : :)
  163. ###################################################################################################
  164. # NON-UNIFORM OCTREE, STOKES KERNEL, OMP SCALABILITY RESULTS #
  165. ###################################################################################################
  166. # m=10, q=14, octants=939, threads={1,2,4,8,16}
  167. nodes+=( 1 1 1 1 1 :)
  168. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  169. mpi_proc+=( 1 1 1 1 1 :)
  170. threads+=( 1 2 4 8 16 :)
  171. testcase+=( 3 3 3 3 3 :)
  172. n_pts+=( 32 32 32 32 32 :)
  173. m_pts+=( 1 1 1 1 1 :)
  174. m+=( 10 10 10 10 10 :)
  175. q+=( 14 14 14 14 14 :)
  176. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  177. depth+=( 15 15 15 15 15 :)
  178. unif+=( 0 0 0 0 0 :)
  179. adap+=( 0 0 0 0 0 :)
  180. max_time+=( 3080 1540 720 360 180 :)
  181. # m=10, q=14, octants=5685, threads={1,2,4,8,16}
  182. nodes+=( 1 1 1 1 1 :)
  183. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  184. mpi_proc+=( 1 1 1 1 1 :)
  185. threads+=( 1 2 4 8 16 :)
  186. testcase+=( 3 3 3 3 3 :)
  187. n_pts+=( 256 256 256 256 256 :)
  188. m_pts+=( 1 1 1 1 1 :)
  189. m+=( 10 10 10 10 10 :)
  190. q+=( 14 14 14 14 14 :)
  191. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  192. depth+=( 15 15 15 15 15 :)
  193. unif+=( 0 0 0 0 0 :)
  194. adap+=( 0 0 0 0 0 :)
  195. max_time+=( 23040 11520 5760 2880 1440 :)
  196. # m=10, q=14, octants=37416, threads={1,2,4,8,16}
  197. nodes+=( 1 1 1 1 1 : : : :)
  198. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : : : :)
  199. mpi_proc+=( 1 1 1 1 1 : : : :)
  200. threads+=( 1 2 4 8 16 : : : :)
  201. testcase+=( 3 3 3 3 3 : : : :)
  202. n_pts+=( 2048 2048 2048 2048 2048 : : : :)
  203. m_pts+=( 1 1 1 1 1 : : : :)
  204. m+=( 10 10 10 10 10 : : : :)
  205. q+=( 14 14 14 14 14 : : : :)
  206. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 : : : :)
  207. depth+=( 15 15 15 15 15 : : : :)
  208. unif+=( 0 0 0 0 0 : : : :)
  209. adap+=( 0 0 0 0 0 : : : :)
  210. max_time+=( 184320 92160 46080 23040 11520 : : : :)
  211. ###################################################################################################
  212. ###################################################################################################
  213. # UNIFORM OCTREE, LAPLACE KERNEL, SINGLE NODE PERFORMANCE #
  214. ###################################################################################################
  215. # m=6, q=9, octants={512,4096,3276}
  216. nodes+=( 1 1 1 :)
  217. cores+=( ${CORES} ${CORES} ${CORES} :)
  218. mpi_proc+=( 1 1 1 :)
  219. threads+=( ${CORES} ${CORES} ${CORES} :)
  220. testcase+=( 1 1 1 :)
  221. n_pts+=( $((8**3)) $((8**4)) $((8**5)) :)
  222. m_pts+=( 1 1 1 :)
  223. m+=( 6 6 6 :)
  224. q+=( 9 9 9 :)
  225. tol+=( 1e-0 1e-0 1e-0 :)
  226. depth+=( 15 15 15 :)
  227. unif+=( 1 1 1 :)
  228. adap+=( 0 0 0 :)
  229. max_time+=( 20 160 1280 :)
  230. # m=10, q=14, octants={512,4096,3276}
  231. nodes+=( 1 1 1 : :)
  232. cores+=( ${CORES} ${CORES} ${CORES} : :)
  233. mpi_proc+=( 1 1 1 : :)
  234. threads+=( ${CORES} ${CORES} ${CORES} : :)
  235. testcase+=( 1 1 1 : :)
  236. n_pts+=( $((8**3)) $((8**4)) $((8**5)) : :)
  237. m_pts+=( 1 1 1 : :)
  238. m+=( 10 10 10 : :)
  239. q+=( 14 14 14 : :)
  240. tol+=( 1e-0 1e-0 1e-0 : :)
  241. depth+=( 15 15 15 : :)
  242. unif+=( 1 1 1 : :)
  243. adap+=( 0 0 0 : :)
  244. max_time+=( 20 160 1280 : :)
  245. ###################################################################################################
  246. # NON-UNIFORM OCTREE, LAPLACE KERNEL, SINGLE NODE PERFORMANCE #
  247. ###################################################################################################
  248. # m=6, q=9, octants={512,4096,3276}
  249. nodes+=( 1 1 1 :)
  250. cores+=( ${CORES} ${CORES} ${CORES} :)
  251. mpi_proc+=( 1 1 1 :)
  252. threads+=( ${CORES} ${CORES} ${CORES} :)
  253. testcase+=( 1 1 1 :)
  254. n_pts+=( 32 256 2048 :)
  255. m_pts+=( 1 1 1 :)
  256. m+=( 6 6 6 :)
  257. q+=( 9 9 9 :)
  258. tol+=( 1e-0 1e-0 1e-0 :)
  259. depth+=( 15 15 15 :)
  260. unif+=( 0 0 0 :)
  261. adap+=( 0 0 0 :)
  262. max_time+=( 20 160 1280 :)
  263. # m=10, q=14, octants={512,4096,3276}
  264. nodes+=( 1 1 1 : :)
  265. cores+=( ${CORES} ${CORES} ${CORES} : :)
  266. mpi_proc+=( 1 1 1 : :)
  267. threads+=( ${CORES} ${CORES} ${CORES} : :)
  268. testcase+=( 1 1 1 : :)
  269. n_pts+=( 32 256 2048 : :)
  270. m_pts+=( 1 1 1 : :)
  271. m+=( 10 10 10 : :)
  272. q+=( 14 14 14 : :)
  273. tol+=( 1e-0 1e-0 1e-0 : :)
  274. depth+=( 15 15 15 : :)
  275. unif+=( 0 0 0 : :)
  276. adap+=( 0 0 0 : :)
  277. max_time+=( 20 160 1280 : :)
  278. ###################################################################################################
  279. # UNIFORM OCTREE, STOKES KERNEL, SINGLE NODE PERFORMANCE #
  280. ###################################################################################################
  281. # m=6, q=9, octants={512,4096,3276}
  282. nodes+=( 1 1 1 :)
  283. cores+=( ${CORES} ${CORES} ${CORES} :)
  284. mpi_proc+=( 1 1 1 :)
  285. threads+=( ${CORES} ${CORES} ${CORES} :)
  286. testcase+=( 3 3 3 :)
  287. n_pts+=( $((8**3)) $((8**4)) $((8**5)) :)
  288. m_pts+=( 1 1 1 :)
  289. m+=( 6 6 6 :)
  290. q+=( 9 9 9 :)
  291. tol+=( 1e-0 1e-0 1e-0 :)
  292. depth+=( 15 15 15 :)
  293. unif+=( 1 1 1 :)
  294. adap+=( 0 0 0 :)
  295. max_time+=( 180 1440 11520 :)
  296. # m=10, q=14, octants={512,4096,3276}
  297. nodes+=( 1 1 1 : :)
  298. cores+=( ${CORES} ${CORES} ${CORES} : :)
  299. mpi_proc+=( 1 1 1 : :)
  300. threads+=( ${CORES} ${CORES} ${CORES} : :)
  301. testcase+=( 3 3 3 : :)
  302. n_pts+=( $((8**3)) $((8**4)) $((8**5)) : :)
  303. m_pts+=( 1 1 1 : :)
  304. m+=( 10 10 10 : :)
  305. q+=( 14 14 14 : :)
  306. tol+=( 1e-0 1e-0 1e-0 : :)
  307. depth+=( 15 15 15 : :)
  308. unif+=( 1 1 1 : :)
  309. adap+=( 0 0 0 : :)
  310. max_time+=( 180 1440 11520 : :)
  311. ###################################################################################################
  312. # NON-UNIFORM OCTREE, STOKES KERNEL, SINGLE NODE PERFORMANCE #
  313. ###################################################################################################
  314. # m=6, q=9, octants={512,4096,3276}
  315. nodes+=( 1 1 1 :)
  316. cores+=( ${CORES} ${CORES} ${CORES} :)
  317. mpi_proc+=( 1 1 1 :)
  318. threads+=( ${CORES} ${CORES} ${CORES} :)
  319. testcase+=( 3 3 3 :)
  320. n_pts+=( 32 256 2048 :)
  321. m_pts+=( 1 1 1 :)
  322. m+=( 6 6 6 :)
  323. q+=( 9 9 9 :)
  324. tol+=( 1e-0 1e-0 1e-0 :)
  325. depth+=( 15 15 15 :)
  326. unif+=( 0 0 0 :)
  327. adap+=( 0 0 0 :)
  328. max_time+=( 180 1440 11520 :)
  329. # m=10, q=14, octants={512,4096,3276}
  330. nodes+=( 1 1 1 : :)
  331. cores+=( ${CORES} ${CORES} ${CORES} : :)
  332. mpi_proc+=( 1 1 1 : :)
  333. threads+=( ${CORES} ${CORES} ${CORES} : :)
  334. testcase+=( 3 3 3 : :)
  335. n_pts+=( 32 256 2048 : :)
  336. m_pts+=( 1 1 1 : :)
  337. m+=( 10 10 10 : :)
  338. q+=( 14 14 14 : :)
  339. tol+=( 1e-0 1e-0 1e-0 : :)
  340. depth+=( 15 15 15 : :)
  341. unif+=( 0 0 0 : :)
  342. adap+=( 0 0 0 : :)
  343. max_time+=( 180 1440 11520 : :)
  344. ###################################################################################################
  345. # Export arrays
  346. export nodes_="$(declare -p nodes)";
  347. export cores_="$(declare -p cores)";
  348. export mpi_proc_="$(declare -p mpi_proc)";
  349. export threads_="$(declare -p threads)";
  350. export testcase_="$(declare -p testcase)";
  351. export n_pts_="$(declare -p n_pts)";
  352. export m_pts_="$(declare -p m_pts)";
  353. export m_="$(declare -p m)";
  354. export q_="$(declare -p q)";
  355. export tol_="$(declare -p tol)";
  356. export depth_="$(declare -p depth)";
  357. export unif_="$(declare -p unif)";
  358. export adap_="$(declare -p adap)";
  359. export max_time_="$(declare -p max_time)";
  360. export RESULT_FNAME=$(basename ${0%.*}).out;
  361. export WORK_DIR=$(dirname ${PWD}/$0)/..
  362. cd ${WORK_DIR}
  363. TERM_WIDTH=$(stty size | cut -d ' ' -f 2)
  364. ./scripts/.submit_jobs.sh | cut -b -${TERM_WIDTH}