.submit_jobs.sh 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. #!/bin/bash
  2. make ${EXEC} -j
  3. if [ ! -f ${EXEC} ] ; then exit -1; fi;
  4. export RESULT_DIR=${WORK_DIR}/result
  5. mkdir -p ${RESULT_DIR}
  6. #find ${RESULT_DIR} -type f -size 0 -exec rm {} \;
  7. if command -v timeout >/dev/null; then
  8. export TIMEOUT="timeout";
  9. else
  10. export TIMEOUT="scripts/.timeout3 -t ";
  11. fi
  12. eval $nodes_;
  13. eval $cores_;
  14. eval $mpi_proc_;
  15. eval $threads_;
  16. eval $testcase_;
  17. eval $n_pts_;
  18. eval $m_;
  19. eval $q_;
  20. eval $tol_;
  21. eval $depth_;
  22. eval $unif_;
  23. eval $adap_;
  24. eval $max_time_;
  25. declare -a args=();
  26. declare -a fname=();
  27. for (( k=0; k<${#nodes[@]}; k++ )) ; do
  28. if [ "${nodes[k]}" == ":" ]; then continue; fi;
  29. args[$k]="-omp ${threads[k]} -test ${testcase[k]} -N ${n_pts[k]} -m ${m[k]} -q ${q[k]} -d ${depth[k]} -tol ${tol[k]}";
  30. case $HOSTNAME in
  31. *titan*) #titan.ccs.ornl.gov
  32. fname[$k]="host_titan";
  33. ;;
  34. *stampede*) #stampede.tacc.utexas.edu
  35. fname[$k]="host_stampede";
  36. ;;
  37. *ls4*) #lonestar.tacc.utexas.edu
  38. fname[$k]="host_lonestar";
  39. ;;
  40. *ronaldo*) #ronaldo.ices.utexas.edu
  41. fname[$k]="host_ronaldo";
  42. ;;
  43. *) # none of the known machines
  44. fname[$k]="host_${HOSTNAME}";
  45. esac
  46. fname[$k]="${fname[$k]}_n${nodes[k]}_mpi${mpi_proc[k]}_omp${threads[k]}_test${testcase[k]}_N${n_pts[k]}_m${m[k]}_q${q[k]}_d${depth[k]}_tol${tol[k]}";
  47. if (( ${unif[k]} )) ; then
  48. args[$k]="${args[$k]} -unif";
  49. fname[$k]="${fname[$k]}_unif";
  50. fi;
  51. if (( ${adap[k]} )) ; then
  52. args[$k]="${args[$k]} -adap";
  53. fname[$k]="${fname[$k]}_adap";
  54. fi;
  55. done
  56. export args_="$(declare -p args)";
  57. export fname_="$(declare -p fname)";
  58. for (( k=0; k<${#nodes[@]}; k++ )) ; do
  59. if [ "${nodes[k]}" == ":" ] ||
  60. [ -f ${RESULT_DIR}/$(basename ${EXEC})_${fname[k]}.out ]; then
  61. continue;
  62. fi;
  63. for (( j=0; j<$k; j++ )) ; do
  64. if [ "${nodes[k]}" == "${nodes[j]}" ] &&
  65. [ "${mpi_proc[k]}" == "${mpi_proc[j]}" ] &&
  66. [ ! -f ${RESULT_DIR}/$(basename ${EXEC})_${fname[j]}.out ]; then
  67. continue 2;
  68. fi
  69. done;
  70. TOTAL_TIME=0;
  71. for (( j=0; j<${#nodes[@]}; j++ )) ; do
  72. if [ "${nodes[k]}" == "${nodes[j]}" ] &&
  73. [ "${mpi_proc[k]}" == "${mpi_proc[j]}" ] &&
  74. [ ! -f ${RESULT_DIR}/$(basename ${EXEC})_${fname[j]}.out ]; then
  75. TOTAL_TIME=$(( ${TOTAL_TIME} + ${max_time[j]} ))
  76. fi
  77. done;
  78. export NODES=${nodes[k]}; # Number of compute nodes.
  79. export CORES=${cores[k]}; # Number of cores per node.
  80. export MPI_PROC=${mpi_proc[k]}; # Number of MPI processes.
  81. export THREADS=${threads[k]}; # Number of threads per MPI process.
  82. export TESTCASE=${testcase[k]}; # Test case.
  83. export MULORDER=${m[k]}; # Multipole order.
  84. export CHBORDER=${q[k]}; # Chebyshev degree.
  85. export FNAME=${RESULT_DIR}/$(basename ${EXEC})_nds${NODES}_mpi${MPI_PROC}
  86. #Submit Job
  87. case $HOSTNAME in
  88. *titan*) #titan.ccs.ornl.gov (Portable Batch System)
  89. qsub -l nodes=${NODES} \
  90. -o ${FNAME}.out -e ${FNAME}.err \
  91. -l walltime=${TOTAL_TIME} \
  92. ./scripts/.job.titan
  93. ;;
  94. *stampede*) #stampede.tacc.utexas.edu (Slurm Batch)
  95. if (( ${TOTAL_TIME} > 14400 )); then TOTAL_TIME="14400"; fi
  96. #if (( ${NODES} > 128 )) ; then continue; fi;
  97. sbatch -N${NODES} -n${MPI_PROC} \
  98. -o ${FNAME}.out -e ${FNAME}.err -D ${PWD} \
  99. --time=00:00:${TOTAL_TIME} \
  100. ./scripts/.job.stampede
  101. ;;
  102. *ls4*) #lonestar.tacc.utexas.edu (Sun Grid Engine)
  103. qsub -pe $((${MPI_PROC}/${NODES}))way $((${NODES}*${CORES})) \
  104. -o ${FNAME}.out -e ${FNAME}.err \
  105. -l h_rt=${TOTAL_TIME} \
  106. ./scripts/.job.lonestar
  107. ;;
  108. *ronaldo*) #ronaldo.ices.utexas.edu (Portable Batch System)
  109. qsub -l nodes=${NODES}:ppn=$((${MPI_PROC}/${NODES})) \
  110. -o ${FNAME}.out -e ${FNAME}.err \
  111. -l walltime=${TOTAL_TIME} \
  112. ./scripts/.job.qsub
  113. ;;
  114. *) # none of the known machines
  115. if command -v qsub >/dev/null; then # Portable Batch System
  116. qsub -l nodes=${NODES}:ppn=$((${MPI_PROC}/${NODES})) \
  117. -o ${FNAME}.out -e ${FNAME}.err \
  118. -l walltime=${TOTAL_TIME} \
  119. ./scripts/.job.qsub
  120. elif command -v sbatch >/dev/null; then # Slurm Batch
  121. sbatch -N${NODES} -n${MPI_PROC} \
  122. -o ${FNAME}.out -e ${FNAME}.err -D ${PWD} \
  123. --time=${TOTAL_TIME} \
  124. ./scripts/.job.sbatch
  125. else # Shell
  126. ./scripts/.job.sh
  127. fi
  128. esac
  129. #Exit on error.
  130. if (( $? != 0 )) ; then continue; fi;
  131. for (( j=0; j<${#nodes[@]}; j++ )) ; do
  132. if [ "${nodes[k]}" == "${nodes[j]}" ] &&
  133. [ "${mpi_proc[k]}" == "${mpi_proc[j]}" ] &&
  134. [ ! -f ${RESULT_DIR}/$(basename ${EXEC})_${fname[j]}.out ]; then
  135. touch ${RESULT_DIR}/$(basename ${EXEC})_${fname[j]}.out;
  136. fi
  137. done;
  138. done;
  139. # Display results
  140. ./scripts/.results.sh