single_node.sh 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. #!/bin/bash
  2. CORES=16;
  3. export EXEC=examples/bin/fmm_cheb
  4. # Set run parameters
  5. declare -a nodes=();
  6. declare -a cores=();
  7. declare -a mpi_proc=();
  8. declare -a threads=();
  9. declare -a testcase=();
  10. declare -a n_pts=();
  11. declare -a m=();
  12. declare -a q=();
  13. declare -a tol=();
  14. declare -a depth=();
  15. declare -a unif=();
  16. declare -a adap=();
  17. declare -a max_time=();
  18. ###################################################################################################
  19. # UNIFORM OCTREE, LAPLACE KERNEL, OMP SCALABILITY RESULTS #
  20. ###################################################################################################
  21. # m=10, q=14, octants=512, threads={1,2,4,8,16}
  22. nodes+=( 1 1 1 1 1 :)
  23. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  24. mpi_proc+=( 1 1 1 1 1 :)
  25. threads+=( 1 2 4 8 16 :)
  26. testcase+=( 1 1 1 1 1 :)
  27. n_pts+=( $((8**3)) $((8**3)) $((8**3)) $((8**3)) $((8**3)) :)
  28. m+=( 10 10 10 10 10 :)
  29. q+=( 14 14 14 14 14 :)
  30. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  31. depth+=( 15 15 15 15 15 :)
  32. unif+=( 1 1 1 1 1 :)
  33. adap+=( 0 0 0 0 0 :)
  34. max_time+=( 320 160 80 40 20 :)
  35. # m=10, q=14, octants=4096, threads={1,2,4,8,16}
  36. nodes+=( 1 1 1 1 1 :)
  37. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  38. mpi_proc+=( 1 1 1 1 1 :)
  39. threads+=( 1 2 4 8 16 :)
  40. testcase+=( 1 1 1 1 1 :)
  41. n_pts+=( $((8**4)) $((8**4)) $((8**4)) $((8**4)) $((8**4)) :)
  42. m+=( 10 10 10 10 10 :)
  43. q+=( 14 14 14 14 14 :)
  44. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  45. depth+=( 15 15 15 15 15 :)
  46. unif+=( 1 1 1 1 1 :)
  47. adap+=( 0 0 0 0 0 :)
  48. max_time+=( 2560 1280 640 320 160 :)
  49. # m=10, q=14, octants=32768, threads={1,2,4,8,16}
  50. nodes+=( 1 1 1 1 1 : :)
  51. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : :)
  52. mpi_proc+=( 1 1 1 1 1 : :)
  53. threads+=( 1 2 4 8 16 : :)
  54. testcase+=( 1 1 1 1 1 : :)
  55. n_pts+=( $((8**5)) $((8**5)) $((8**5)) $((8**5)) $((8**5)) : :)
  56. m+=( 10 10 10 10 10 : :)
  57. q+=( 14 14 14 14 14 : :)
  58. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 : :)
  59. depth+=( 15 15 15 15 15 : :)
  60. unif+=( 1 1 1 1 1 : :)
  61. adap+=( 0 0 0 0 0 : :)
  62. max_time+=( 2560 1280 640 320 160 : :)
  63. ###################################################################################################
  64. # NON-UNIFORM OCTREE, LAPLACE KERNEL, OMP SCALABILITY RESULTS #
  65. ###################################################################################################
  66. # m=10, q=14, octants=939, threads={1,2,4,8,16}
  67. nodes+=( 1 1 1 1 1 :)
  68. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  69. mpi_proc+=( 1 1 1 1 1 :)
  70. threads+=( 1 2 4 8 16 :)
  71. testcase+=( 1 1 1 1 1 :)
  72. n_pts+=( 32 32 32 32 32 :)
  73. m+=( 10 10 10 10 10 :)
  74. q+=( 14 14 14 14 14 :)
  75. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  76. depth+=( 15 15 15 15 15 :)
  77. unif+=( 0 0 0 0 0 :)
  78. adap+=( 0 0 0 0 0 :)
  79. max_time+=( 320 160 80 40 20 :)
  80. # m=10, q=14, octants=5685, threads={1,2,4,8,16}
  81. nodes+=( 1 1 1 1 1 :)
  82. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  83. mpi_proc+=( 1 1 1 1 1 :)
  84. threads+=( 1 2 4 8 16 :)
  85. testcase+=( 1 1 1 1 1 :)
  86. n_pts+=( 256 256 256 256 256 :)
  87. m+=( 10 10 10 10 10 :)
  88. q+=( 14 14 14 14 14 :)
  89. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  90. depth+=( 15 15 15 15 15 :)
  91. unif+=( 0 0 0 0 0 :)
  92. adap+=( 0 0 0 0 0 :)
  93. max_time+=( 2560 1280 640 320 160 :)
  94. # m=10, q=14, octants=37416, threads={1,2,4,8,16}
  95. nodes+=( 1 1 1 1 1 : :)
  96. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : :)
  97. mpi_proc+=( 1 1 1 1 1 : :)
  98. threads+=( 1 2 4 8 16 : :)
  99. testcase+=( 1 1 1 1 1 : :)
  100. n_pts+=( 2048 2048 2048 2048 2048 : :)
  101. m+=( 10 10 10 10 10 : :)
  102. q+=( 14 14 14 14 14 : :)
  103. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 : :)
  104. depth+=( 15 15 15 15 15 : :)
  105. unif+=( 0 0 0 0 0 : :)
  106. adap+=( 0 0 0 0 0 : :)
  107. max_time+=( 2560 1280 640 320 160 : :)
  108. ###################################################################################################
  109. ###################################################################################################
  110. # UNIFORM OCTREE, STOKES KERNEL, OMP SCALABILITY RESULTS #
  111. ###################################################################################################
  112. # m=10, q=14, octants=512, threads={1,2,4,8,16}
  113. nodes+=( 1 1 1 1 1 :)
  114. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  115. mpi_proc+=( 1 1 1 1 1 :)
  116. threads+=( 1 2 4 8 16 :)
  117. testcase+=( 3 3 3 3 3 :)
  118. n_pts+=( $((8**3)) $((8**3)) $((8**3)) $((8**3)) $((8**3)) :)
  119. m+=( 10 10 10 10 10 :)
  120. q+=( 14 14 14 14 14 :)
  121. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  122. depth+=( 15 15 15 15 15 :)
  123. unif+=( 1 1 1 1 1 :)
  124. adap+=( 0 0 0 0 0 :)
  125. max_time+=( 3080 1540 720 360 180 :)
  126. # m=10, q=14, octants=4096, threads={1,2,4,8,16}
  127. nodes+=( 1 1 1 1 1 :)
  128. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  129. mpi_proc+=( 1 1 1 1 1 :)
  130. threads+=( 1 2 4 8 16 :)
  131. testcase+=( 3 3 3 3 3 :)
  132. n_pts+=( $((8**4)) $((8**4)) $((8**4)) $((8**4)) $((8**4)) :)
  133. m+=( 10 10 10 10 10 :)
  134. q+=( 14 14 14 14 14 :)
  135. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  136. depth+=( 15 15 15 15 15 :)
  137. unif+=( 1 1 1 1 1 :)
  138. adap+=( 0 0 0 0 0 :)
  139. max_time+=( 23040 11520 5760 2880 1440 :)
  140. # m=10, q=14, octants=32768, threads={1,2,4,8,16}
  141. nodes+=( 1 1 1 1 1 : :)
  142. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : :)
  143. mpi_proc+=( 1 1 1 1 1 : :)
  144. threads+=( 1 2 4 8 16 : :)
  145. testcase+=( 3 3 3 3 3 : :)
  146. n_pts+=( $((8**5)) $((8**5)) $((8**5)) $((8**5)) $((8**5)) : :)
  147. m+=( 10 10 10 10 10 : :)
  148. q+=( 14 14 14 14 14 : :)
  149. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 : :)
  150. depth+=( 15 15 15 15 15 : :)
  151. unif+=( 1 1 1 1 1 : :)
  152. adap+=( 0 0 0 0 0 : :)
  153. max_time+=( 184320 92160 46080 23040 11520 : :)
  154. ###################################################################################################
  155. # NON-UNIFORM OCTREE, STOKES KERNEL, OMP SCALABILITY RESULTS #
  156. ###################################################################################################
  157. # m=10, q=14, octants=939, threads={1,2,4,8,16}
  158. nodes+=( 1 1 1 1 1 :)
  159. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  160. mpi_proc+=( 1 1 1 1 1 :)
  161. threads+=( 1 2 4 8 16 :)
  162. testcase+=( 3 3 3 3 3 :)
  163. n_pts+=( 32 32 32 32 32 :)
  164. m+=( 10 10 10 10 10 :)
  165. q+=( 14 14 14 14 14 :)
  166. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  167. depth+=( 15 15 15 15 15 :)
  168. unif+=( 0 0 0 0 0 :)
  169. adap+=( 0 0 0 0 0 :)
  170. max_time+=( 3080 1540 720 360 180 :)
  171. # m=10, q=14, octants=5685, threads={1,2,4,8,16}
  172. nodes+=( 1 1 1 1 1 :)
  173. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  174. mpi_proc+=( 1 1 1 1 1 :)
  175. threads+=( 1 2 4 8 16 :)
  176. testcase+=( 3 3 3 3 3 :)
  177. n_pts+=( 256 256 256 256 256 :)
  178. m+=( 10 10 10 10 10 :)
  179. q+=( 14 14 14 14 14 :)
  180. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  181. depth+=( 15 15 15 15 15 :)
  182. unif+=( 0 0 0 0 0 :)
  183. adap+=( 0 0 0 0 0 :)
  184. max_time+=( 23040 11520 5760 2880 1440 :)
  185. # m=10, q=14, octants=37416, threads={1,2,4,8,16}
  186. nodes+=( 1 1 1 1 1 : : : :)
  187. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : : : :)
  188. mpi_proc+=( 1 1 1 1 1 : : : :)
  189. threads+=( 1 2 4 8 16 : : : :)
  190. testcase+=( 3 3 3 3 3 : : : :)
  191. n_pts+=( 2048 2048 2048 2048 2048 : : : :)
  192. m+=( 10 10 10 10 10 : : : :)
  193. q+=( 14 14 14 14 14 : : : :)
  194. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 : : : :)
  195. depth+=( 15 15 15 15 15 : : : :)
  196. unif+=( 0 0 0 0 0 : : : :)
  197. adap+=( 0 0 0 0 0 : : : :)
  198. max_time+=( 184320 92160 46080 23040 11520 : : : :)
  199. ###################################################################################################
  200. ###################################################################################################
  201. # UNIFORM OCTREE, LAPLACE KERNEL, SINGLE NODE PERFORMANCE #
  202. ###################################################################################################
  203. # m=6, q=9, octants={512,4096,3276}
  204. nodes+=( 1 1 1 :)
  205. cores+=( ${CORES} ${CORES} ${CORES} :)
  206. mpi_proc+=( 1 1 1 :)
  207. threads+=( ${CORES} ${CORES} ${CORES} :)
  208. testcase+=( 1 1 1 :)
  209. n_pts+=( $((8**3)) $((8**4)) $((8**5)) :)
  210. m+=( 6 6 6 :)
  211. q+=( 9 9 9 :)
  212. tol+=( 1e-0 1e-0 1e-0 :)
  213. depth+=( 15 15 15 :)
  214. unif+=( 1 1 1 :)
  215. adap+=( 0 0 0 :)
  216. max_time+=( 20 160 1280 :)
  217. # m=10, q=14, octants={512,4096,3276}
  218. nodes+=( 1 1 1 : :)
  219. cores+=( ${CORES} ${CORES} ${CORES} : :)
  220. mpi_proc+=( 1 1 1 : :)
  221. threads+=( ${CORES} ${CORES} ${CORES} : :)
  222. testcase+=( 1 1 1 : :)
  223. n_pts+=( $((8**3)) $((8**4)) $((8**5)) : :)
  224. m+=( 10 10 10 : :)
  225. q+=( 14 14 14 : :)
  226. tol+=( 1e-0 1e-0 1e-0 : :)
  227. depth+=( 15 15 15 : :)
  228. unif+=( 1 1 1 : :)
  229. adap+=( 0 0 0 : :)
  230. max_time+=( 20 160 1280 : :)
  231. ###################################################################################################
  232. # NON-UNIFORM OCTREE, LAPLACE KERNEL, SINGLE NODE PERFORMANCE #
  233. ###################################################################################################
  234. # m=6, q=9, octants={512,4096,3276}
  235. nodes+=( 1 1 1 :)
  236. cores+=( ${CORES} ${CORES} ${CORES} :)
  237. mpi_proc+=( 1 1 1 :)
  238. threads+=( ${CORES} ${CORES} ${CORES} :)
  239. testcase+=( 1 1 1 :)
  240. n_pts+=( 32 256 2048 :)
  241. m+=( 6 6 6 :)
  242. q+=( 9 9 9 :)
  243. tol+=( 1e-0 1e-0 1e-0 :)
  244. depth+=( 15 15 15 :)
  245. unif+=( 0 0 0 :)
  246. adap+=( 0 0 0 :)
  247. max_time+=( 20 160 1280 :)
  248. # m=10, q=14, octants={512,4096,3276}
  249. nodes+=( 1 1 1 : :)
  250. cores+=( ${CORES} ${CORES} ${CORES} : :)
  251. mpi_proc+=( 1 1 1 : :)
  252. threads+=( ${CORES} ${CORES} ${CORES} : :)
  253. testcase+=( 1 1 1 : :)
  254. n_pts+=( 32 256 2048 : :)
  255. m+=( 10 10 10 : :)
  256. q+=( 14 14 14 : :)
  257. tol+=( 1e-0 1e-0 1e-0 : :)
  258. depth+=( 15 15 15 : :)
  259. unif+=( 0 0 0 : :)
  260. adap+=( 0 0 0 : :)
  261. max_time+=( 20 160 1280 : :)
  262. ###################################################################################################
  263. # UNIFORM OCTREE, STOKES KERNEL, SINGLE NODE PERFORMANCE #
  264. ###################################################################################################
  265. # m=6, q=9, octants={512,4096,3276}
  266. nodes+=( 1 1 1 :)
  267. cores+=( ${CORES} ${CORES} ${CORES} :)
  268. mpi_proc+=( 1 1 1 :)
  269. threads+=( ${CORES} ${CORES} ${CORES} :)
  270. testcase+=( 3 3 3 :)
  271. n_pts+=( $((8**3)) $((8**4)) $((8**5)) :)
  272. m+=( 6 6 6 :)
  273. q+=( 9 9 9 :)
  274. tol+=( 1e-0 1e-0 1e-0 :)
  275. depth+=( 15 15 15 :)
  276. unif+=( 1 1 1 :)
  277. adap+=( 0 0 0 :)
  278. max_time+=( 180 1440 11520 :)
  279. # m=10, q=14, octants={512,4096,3276}
  280. nodes+=( 1 1 1 : :)
  281. cores+=( ${CORES} ${CORES} ${CORES} : :)
  282. mpi_proc+=( 1 1 1 : :)
  283. threads+=( ${CORES} ${CORES} ${CORES} : :)
  284. testcase+=( 3 3 3 : :)
  285. n_pts+=( $((8**3)) $((8**4)) $((8**5)) : :)
  286. m+=( 10 10 10 : :)
  287. q+=( 14 14 14 : :)
  288. tol+=( 1e-0 1e-0 1e-0 : :)
  289. depth+=( 15 15 15 : :)
  290. unif+=( 1 1 1 : :)
  291. adap+=( 0 0 0 : :)
  292. max_time+=( 180 1440 11520 : :)
  293. ###################################################################################################
  294. # NON-UNIFORM OCTREE, STOKES KERNEL, SINGLE NODE PERFORMANCE #
  295. ###################################################################################################
  296. # m=6, q=9, octants={512,4096,3276}
  297. nodes+=( 1 1 1 :)
  298. cores+=( ${CORES} ${CORES} ${CORES} :)
  299. mpi_proc+=( 1 1 1 :)
  300. threads+=( ${CORES} ${CORES} ${CORES} :)
  301. testcase+=( 3 3 3 :)
  302. n_pts+=( 32 256 2048 :)
  303. m+=( 6 6 6 :)
  304. q+=( 9 9 9 :)
  305. tol+=( 1e-0 1e-0 1e-0 :)
  306. depth+=( 15 15 15 :)
  307. unif+=( 0 0 0 :)
  308. adap+=( 0 0 0 :)
  309. max_time+=( 180 1440 11520 :)
  310. # m=10, q=14, octants={512,4096,3276}
  311. nodes+=( 1 1 1 : :)
  312. cores+=( ${CORES} ${CORES} ${CORES} : :)
  313. mpi_proc+=( 1 1 1 : :)
  314. threads+=( ${CORES} ${CORES} ${CORES} : :)
  315. testcase+=( 3 3 3 : :)
  316. n_pts+=( 32 256 2048 : :)
  317. m+=( 10 10 10 : :)
  318. q+=( 14 14 14 : :)
  319. tol+=( 1e-0 1e-0 1e-0 : :)
  320. depth+=( 15 15 15 : :)
  321. unif+=( 0 0 0 : :)
  322. adap+=( 0 0 0 : :)
  323. max_time+=( 180 1440 11520 : :)
  324. ###################################################################################################
  325. # Export arrays
  326. export nodes_="$(declare -p nodes)";
  327. export cores_="$(declare -p cores)";
  328. export mpi_proc_="$(declare -p mpi_proc)";
  329. export threads_="$(declare -p threads)";
  330. export testcase_="$(declare -p testcase)";
  331. export n_pts_="$(declare -p n_pts)";
  332. export m_="$(declare -p m)";
  333. export q_="$(declare -p q)";
  334. export tol_="$(declare -p tol)";
  335. export depth_="$(declare -p depth)";
  336. export unif_="$(declare -p unif)";
  337. export adap_="$(declare -p adap)";
  338. export max_time_="$(declare -p max_time)";
  339. export RESULT_FNAME=$(basename ${0%.*}).out;
  340. export WORK_DIR=$(dirname ${PWD}/$0)/..
  341. cd ${WORK_DIR}
  342. TERM_WIDTH=$(stty size | cut -d ' ' -f 2)
  343. ./scripts/.submit_jobs.sh | cut -b -${TERM_WIDTH}