wscal.sh 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. #!/bin/bash
  2. CORES=16;
  3. export EXEC=examples/bin/fmm_cheb
  4. # Set run parameters
  5. declare -a nodes=();
  6. declare -a cores=();
  7. declare -a mpi_proc=();
  8. declare -a threads=();
  9. declare -a testcase=();
  10. declare -a n_pts=();
  11. declare -a m=();
  12. declare -a q=();
  13. declare -a tol=();
  14. declare -a depth=();
  15. declare -a unif=();
  16. declare -a adap=();
  17. declare -a max_time=();
  18. ###################################################################################################
  19. # UNIFORM OCTREE, LAPLACE KERNEL, WEAK SCALABILITY #
  20. ###################################################################################################
  21. # m=10, q=14, octants=64k oct/node
  22. nodes+=( 4 32 256 2048 16384 :)
  23. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  24. mpi_proc+=( 4 32 256 2048 16384 :)
  25. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  26. testcase+=( 1 1 1 1 1 :)
  27. n_pts+=( $((8**6)) $((8**7)) $((8**8)) $((8**9)) $((8**10)) :)
  28. m_pts+=( 1 1 1 1 1 :)
  29. m+=( 10 10 10 10 10 :)
  30. q+=( 14 14 14 14 14 :)
  31. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  32. depth+=( 15 15 15 15 15 :)
  33. unif+=( 1 1 1 1 1 :)
  34. adap+=( 0 0 0 0 0 :)
  35. max_time+=( 800 800 800 800 800 :)
  36. # m=10, q=14, octants=32k oct/node
  37. nodes+=( 1 8 64 512 4096 32768 :)
  38. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  39. mpi_proc+=( 1 8 64 512 4096 32768 :)
  40. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  41. testcase+=( 1 1 1 1 1 1 :)
  42. n_pts+=( $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) $((8**10)) :)
  43. m_pts+=( 1 1 1 1 1 1 :)
  44. m+=( 10 10 10 10 10 10 :)
  45. q+=( 14 14 14 14 14 14 :)
  46. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  47. depth+=( 15 15 15 15 15 15 :)
  48. unif+=( 1 1 1 1 1 1 :)
  49. adap+=( 0 0 0 0 0 0 :)
  50. max_time+=( 400 400 400 400 400 400 :)
  51. # m=10, q=14, octants=16k oct/node
  52. nodes+=( 2 16 128 1024 8192 :)
  53. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  54. mpi_proc+=( 2 16 128 1024 8192 :)
  55. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  56. testcase+=( 1 1 1 1 1 :)
  57. n_pts+=( $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) :)
  58. m_pts+=( 1 1 1 1 1 :)
  59. m+=( 10 10 10 10 10 :)
  60. q+=( 14 14 14 14 14 :)
  61. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  62. depth+=( 15 15 15 15 15 :)
  63. unif+=( 1 1 1 1 1 :)
  64. adap+=( 0 0 0 0 0 :)
  65. max_time+=( 200 200 200 200 200 :)
  66. # m=10, q=14, octants=8k oct/node
  67. nodes+=( 4 32 256 2048 16384 : :)
  68. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : :)
  69. mpi_proc+=( 4 32 256 2048 16384 : :)
  70. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : :)
  71. testcase+=( 1 1 1 1 1 : :)
  72. n_pts+=( $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) : :)
  73. m_pts+=( 1 1 1 1 1 : :)
  74. m+=( 10 10 10 10 10 : :)
  75. q+=( 14 14 14 14 14 : :)
  76. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 : :)
  77. depth+=( 15 15 15 15 15 : :)
  78. unif+=( 1 1 1 1 1 : :)
  79. adap+=( 0 0 0 0 0 : :)
  80. max_time+=( 100 100 100 100 100 : :)
  81. ###################################################################################################
  82. # NON-UNIFORM OCTREE, LAPLACE KERNEL, WEAK SCALABILITY #
  83. ###################################################################################################
  84. # m=10, q=13, octants=16k oct/node
  85. nodes+=( 1 4 16 64 256 1024 4096 :)
  86. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  87. mpi_proc+=( 1 4 16 64 256 1024 4096 :)
  88. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  89. testcase+=( 1 1 1 1 1 1 1 :)
  90. n_pts+=( $((2**20)) $((2**22)) $((2**24)) $((2**26)) $((2**28)) $((2**30)) $((2**32)) :)
  91. m_pts+=( 500 500 500 500 500 500 500 :)
  92. m+=( 10 10 10 10 10 10 10 :)
  93. q+=( 13 13 13 13 13 13 13 :)
  94. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  95. depth+=( 30 30 30 30 30 30 30 :)
  96. unif+=( 0 0 0 0 0 0 0 :)
  97. adap+=( 0 0 0 0 0 0 0 :)
  98. max_time+=( 500 500 500 500 500 500 500 :)
  99. # m=10, q=13, octants=32k oct/node
  100. nodes+=( 1 4 16 64 256 1024 4096 :)
  101. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  102. mpi_proc+=( 1 4 16 64 256 1024 4096 :)
  103. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  104. testcase+=( 1 1 1 1 1 1 1 :)
  105. n_pts+=( $((2**21)) $((2**23)) $((2**25)) $((2**27)) $((2**29)) $((2**31)) $((2**33)) :)
  106. m_pts+=( 500 500 500 500 500 500 500 :)
  107. m+=( 10 10 10 10 10 10 10 :)
  108. q+=( 13 13 13 13 13 13 13 :)
  109. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  110. depth+=( 30 30 30 30 30 30 30 :)
  111. unif+=( 0 0 0 0 0 0 0 :)
  112. adap+=( 0 0 0 0 0 0 0 :)
  113. max_time+=( 500 500 500 500 500 500 500 :)
  114. ###################################################################################################
  115. # UNIFORM OCTREE, STOKES KERNEL, WEAK SCALABILITY #
  116. ###################################################################################################
  117. # m=10, q=14, octants=32k oct/node
  118. nodes+=( 1 8 64 512 4096 32768 :)
  119. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  120. mpi_proc+=( 1 8 64 512 4096 32768 :)
  121. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  122. testcase+=( 3 3 3 3 3 3 :)
  123. n_pts+=( $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) $((8**10)) :)
  124. m_pts+=( 1 1 1 1 1 1 :)
  125. m+=( 10 10 10 10 10 10 :)
  126. q+=( 14 14 14 14 14 14 :)
  127. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  128. depth+=( 15 15 15 15 15 15 :)
  129. unif+=( 1 1 1 1 1 1 :)
  130. adap+=( 0 0 0 0 0 0 :)
  131. max_time+=( 2400 2400 2400 2400 2400 2400 :)
  132. # m=10, q=14, octants=16k oct/node
  133. nodes+=( 2 16 128 1024 8192 :)
  134. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  135. mpi_proc+=( 2 16 128 1024 8192 :)
  136. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  137. testcase+=( 3 3 3 3 3 :)
  138. n_pts+=( $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) :)
  139. m_pts+=( 1 1 1 1 1 :)
  140. m+=( 10 10 10 10 10 :)
  141. q+=( 14 14 14 14 14 :)
  142. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  143. depth+=( 15 15 15 15 15 :)
  144. unif+=( 1 1 1 1 1 :)
  145. adap+=( 0 0 0 0 0 :)
  146. max_time+=( 1200 1200 1200 1200 1200 :)
  147. # m=10, q=14, octants=8k oct/node
  148. nodes+=( 4 32 256 2048 16384 : :)
  149. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : :)
  150. mpi_proc+=( 4 32 256 2048 16384 : :)
  151. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} : :)
  152. testcase+=( 3 3 3 3 3 : :)
  153. n_pts+=( $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) : :)
  154. m_pts+=( 1 1 1 1 1 : :)
  155. m+=( 10 10 10 10 10 : :)
  156. q+=( 14 14 14 14 14 : :)
  157. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 : :)
  158. depth+=( 15 15 15 15 15 : :)
  159. unif+=( 1 1 1 1 1 : :)
  160. adap+=( 0 0 0 0 0 : :)
  161. max_time+=( 600 600 600 600 600 : :)
  162. ###################################################################################################
  163. # UNIFORM OCTREE, HELMHOLTZ KERNEL, WEAK SCALABILITY #
  164. ###################################################################################################
  165. # m=10, q=14, octants=32k oct/node
  166. nodes+=( 1 8 64 512 4096 32768 :)
  167. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  168. mpi_proc+=( 1 8 64 512 4096 32768 :)
  169. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  170. testcase+=( 5 5 5 5 5 5 :)
  171. n_pts+=( $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) $((8**10)) :)
  172. m_pts+=( 1 1 1 1 1 1 :)
  173. m+=( 10 10 10 10 10 10 :)
  174. q+=( 14 14 14 14 14 14 :)
  175. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  176. depth+=( 15 15 15 15 15 15 :)
  177. unif+=( 1 1 1 1 1 1 :)
  178. adap+=( 0 0 0 0 0 0 :)
  179. max_time+=( 2400 2400 2400 2400 2400 2400 :)
  180. # m=10, q=14, octants=16k oct/node
  181. nodes+=( 2 16 128 1024 8192 :)
  182. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  183. mpi_proc+=( 2 16 128 1024 8192 :)
  184. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  185. testcase+=( 5 5 5 5 5 :)
  186. n_pts+=( $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) :)
  187. m_pts+=( 1 1 1 1 1 :)
  188. m+=( 10 10 10 10 10 :)
  189. q+=( 14 14 14 14 14 :)
  190. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  191. depth+=( 15 15 15 15 15 :)
  192. unif+=( 1 1 1 1 1 :)
  193. adap+=( 0 0 0 0 0 :)
  194. max_time+=( 1200 1200 1200 1200 1200 :)
  195. # m=10, q=14, octants=8k oct/node
  196. nodes+=( 4 32 256 2048 16384 :)
  197. cores+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  198. mpi_proc+=( 4 32 256 2048 16384 :)
  199. threads+=( ${CORES} ${CORES} ${CORES} ${CORES} ${CORES} :)
  200. testcase+=( 5 5 5 5 5 :)
  201. n_pts+=( $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) :)
  202. m_pts+=( 1 1 1 1 1 :)
  203. m+=( 10 10 10 10 10 :)
  204. q+=( 14 14 14 14 14 :)
  205. tol+=( 1e-0 1e-0 1e-0 1e-0 1e-0 :)
  206. depth+=( 15 15 15 15 15 :)
  207. unif+=( 1 1 1 1 1 :)
  208. adap+=( 0 0 0 0 0 :)
  209. max_time+=( 1200 1200 1200 1200 1200 :)
  210. ###################################################################################################
  211. # Export arrays
  212. export nodes_="$(declare -p nodes)";
  213. export cores_="$(declare -p cores)";
  214. export mpi_proc_="$(declare -p mpi_proc)";
  215. export threads_="$(declare -p threads)";
  216. export testcase_="$(declare -p testcase)";
  217. export n_pts_="$(declare -p n_pts)";
  218. export m_pts_="$(declare -p m_pts)";
  219. export m_="$(declare -p m)";
  220. export q_="$(declare -p q)";
  221. export tol_="$(declare -p tol)";
  222. export depth_="$(declare -p depth)";
  223. export unif_="$(declare -p unif)";
  224. export adap_="$(declare -p adap)";
  225. export max_time_="$(declare -p max_time)";
  226. export RESULT_FNAME=$(basename ${0%.*}).out;
  227. export WORK_DIR=$(dirname ${PWD}/$0)/..
  228. cd ${WORK_DIR}
  229. TERM_WIDTH=$(stty size | cut -d ' ' -f 2)
  230. ./scripts/.submit_jobs.sh | cut -b -${TERM_WIDTH}