.results.sh 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. #!/bin/bash
  2. eval $nodes_;
  3. eval $cores_;
  4. eval $mpi_proc_;
  5. eval $threads_;
  6. eval $testcase_;
  7. eval $n_pts_;
  8. eval $m_;
  9. eval $q_;
  10. eval $tol_;
  11. eval $depth_;
  12. eval $unif_;
  13. eval $adap_;
  14. eval $max_time_;
  15. eval $fname_;
  16. eval $args_;
  17. export TMPDIR=${WORK_DIR}/tmp
  18. export RESULT_FNAME=${TMPDIR}/${RESULT_FNAME}
  19. rm -f ${RESULT_FNAME}
  20. echo "#########################################################################################################" | tee -a ${RESULT_FNAME}
  21. echo "# CPU + MIC Results : Time (FLOP/s) [CPU Only, CPU+MIC, CPU+MIC (async)] #" | tee -a ${RESULT_FNAME}
  22. echo "#########################################################################################################" | tee -a ${RESULT_FNAME}
  23. ################### Input Parameter Fields ###################
  24. export PARAMSTR="FMM Kernel name;Order of multipole expansions;Order of Chebyshev polynomials;Maximum Tree Depth;Chebyshev Tolerance"
  25. IFS=';' read -ra PARAMARR <<< "$PARAMSTR"
  26. Nparam=${#PARAMARR[@]};
  27. export PARAMHEADERSTR="kernel;m;q;max_d;tol"
  28. IFS=';' read -ra PARAMHEADERARR <<< "$PARAMHEADERSTR"
  29. ################### Time (FLOP/s) Fields ###################
  30. export COLSTR="RunFMM;UpwardPass;ReduceBcast;DownwardPass;U-List;V-List;W-List;X-List;D2H_Wait:Trg;D2D;D2T"
  31. IFS=';' read -ra COLARR <<< "$COLSTR"
  32. N=${#COLARR[@]};
  33. ################### Error Fields ###################
  34. export ERRSTR="Maximum Relative Error \[Input\];Relative L2 Error \[Input\];Relative L2 Error \[Output\];Maximum Relative Error \[Output\];Relative L2 Error \[OutputGrad\];Maximum Relative Error \[OutputGrad\]"
  35. IFS=';' read -ra ERRARR <<< "$ERRSTR"
  36. Nerr=${#ERRARR[@]};
  37. export ERRHEADERSTR="Linf(f);L2(f);L2(u);Linf(u);L2(grad_u);Linf(grad_u)"
  38. IFS=';' read -ra ERRHEADERARR <<< "$ERRHEADERSTR"
  39. ################### Print Column Headers ###################
  40. printf "%$((16+$(($Nparam+4))*11+$N*18+$Nerr*14))s\n" |tr " " "=" | tee -a ${RESULT_FNAME} #=================================================
  41. #-----------------------------------------------------------
  42. for (( i=0; i<$Nparam; i++ )) ; do
  43. printf "%10s " "${PARAMHEADERARR[i]}" | tee -a ${RESULT_FNAME}
  44. done;
  45. printf " |" | tee -a ${RESULT_FNAME}
  46. #-----------------------------------------------------------
  47. HEADER_FORMAT="%10s "
  48. printf "${HEADER_FORMAT}" "MPI_PROC" | tee -a ${RESULT_FNAME}
  49. printf "${HEADER_FORMAT}" "THREADS" | tee -a ${RESULT_FNAME}
  50. printf "${HEADER_FORMAT}" "NODES" | tee -a ${RESULT_FNAME}
  51. printf "${HEADER_FORMAT}" "OCT/NODE" | tee -a ${RESULT_FNAME}
  52. printf " |" | tee -a ${RESULT_FNAME}
  53. #-----------------------------------------------------------
  54. for (( i=0; i<$N; i++ )) ; do
  55. printf " %17s" "${COLARR[i]}" | tee -a ${RESULT_FNAME}
  56. done;
  57. printf " |" | tee -a ${RESULT_FNAME}
  58. #-----------------------------------------------------------
  59. for (( i=0; i<$Nerr; i++ )) ; do
  60. printf " %13s" "${ERRHEADERARR[i]}" | tee -a ${RESULT_FNAME}
  61. done;
  62. printf " |\n" | tee -a ${RESULT_FNAME}
  63. #-----------------------------------------------------------
  64. printf "%$((16+$(($Nparam+4))*11+$N*18+$Nerr*14))s\n" |tr " " "=" | tee -a ${RESULT_FNAME} #=================================================
  65. #===========================================================
  66. ################### Loop over all runs ###################
  67. for (( l=0; l<${#nodes[@]}; l++ )) ; do
  68. ( # Begin parallel subshell
  69. RESULT_FNAME=${RESULT_FNAME}_${l};
  70. # File name.
  71. FNAME_NOMIC=${TMPDIR}/$(basename ${EXEC})_${fname[l]}.out;
  72. FNAME_MIC=${TMPDIR}/$(basename ${EXEC})_mic_${fname[l]}.out;
  73. FNAME_ASYNC=${TMPDIR}/$(basename ${EXEC})_async_${fname[l]}.out;
  74. subrow_cnt=0;
  75. for (( k=0; k<3; k++ )) ; do
  76. if [ $k -eq 0 ] ; then
  77. FNAME=${FNAME_NOMIC};
  78. fi
  79. if [ $k -eq 1 ] ; then
  80. FNAME=${FNAME_MIC};
  81. fi
  82. if [ $k -eq 2 ] ; then
  83. FNAME=${FNAME_ASYNC};
  84. fi
  85. if [ ! -f ${FNAME} ] ; then
  86. #echo >> ${RESULT_FNAME}
  87. continue;
  88. fi;
  89. subrow_cnt=$(( $subrow_cnt + 1 ))
  90. ######################### Parse Data #################################
  91. # Parse Data: Parameters
  92. for (( i=0; i<$Nparam; i++ )) ; do
  93. x="${PARAMARR[i]}"
  94. PARAM[i]="$(grep -hir "$x" ${FNAME} | tail -n 1 | rev | cut -d ' ' -f 1 | rev)";
  95. done
  96. #---------------------------------------------------------------------
  97. # Parse Data: Leaf Count
  98. NODE_LIST="$(grep -hir 'Leaf Nodes:' ${FNAME} | tail -n 1 | cut -d " " -f 3-)"
  99. NODES=0;
  100. for i in ${NODE_LIST} ; do
  101. NODES=$(( $NODES + $i ));
  102. done;
  103. #---------------------------------------------------------------------
  104. # Parse Data: Time, Flop, Flop/s
  105. for (( i=0; i<$N; i++ )) ; do
  106. x="${COLARR[i]}"
  107. T_MAX[i]="$(grep -hir "$x " ${FNAME} | tail -n 1 | tr -s ' ' | rev | cut -d ' ' -f 10 | rev)"
  108. if [ "${T_MAX[i]}" == "" ]; then continue; fi;
  109. FP_AVG[i]="$(grep -hir "$x " ${FNAME_NOMIC} | tail -n 1 | tr -s ' ' | rev | cut -d ' ' -f 8 | rev)"
  110. FLOPS[i]=$(echo "scale=10;${FP_AVG[i]}/(${T_MAX[i]}+0.0001)" | bc 2> /dev/null)
  111. if [ "${FLOPS[i]}" != "" ] && [ -f ${FNAME_MIC} ] && [ -f ${FNAME_ASYNC} ] && [ -f ${FNAME_NOMIC} ] ; then
  112. T_MAX_NOASYNC[i]="$(grep -hir "$x " ${FNAME_MIC} | tail -n 1 | tr -s ' ' | rev | cut -d ' ' -f 10 | rev)"
  113. if [ "${T_MAX_NOASYNC[i]}" == "" ]; then continue; fi;
  114. compare_result1=$(echo "${T_MAX[i]}<0.5*${T_MAX_NOASYNC[i]}" | bc)
  115. compare_result2=$(echo "${T_MAX[i]}<0.01" | bc)
  116. if [ ${compare_result1} -eq 1 ] && [ ${compare_result2} -eq 1 ] ; then
  117. FLOPS[i]=$(echo "scale=10;${FP_AVG[i]}/${T_MAX_NOASYNC[i]}" | bc 2> /dev/null)
  118. fi
  119. fi
  120. FLOPS[i]=$(echo "scale=10;${FLOPS[i]}*${mpi_proc[l]}/${nodes[l]}" | bc 2> /dev/null)
  121. done
  122. #---------------------------------------------------------------------
  123. # Parse Data: Error
  124. for (( i=0; i<$Nerr; i++ )) ; do
  125. x="${ERRARR[i]}"
  126. ERR[i]="$(grep -hir "$x" ${FNAME} | tail -n 1 | rev | cut -d ' ' -f 1 | rev)";
  127. done
  128. #=====================================================================
  129. ######################### Print Data #################################
  130. PARAM_FORMAT="%10s "
  131. for (( i=0; i<$Nparam; i++ )) ; do
  132. printf "${PARAM_FORMAT}" "${PARAM[i]}" >> ${RESULT_FNAME}
  133. done;
  134. printf " |" >> ${RESULT_FNAME}
  135. #---------------------------------------------------------------------
  136. printf "${PARAM_FORMAT}" "${mpi_proc[l]}" >> ${RESULT_FNAME}
  137. printf "${PARAM_FORMAT}" "${threads[l]}" >> ${RESULT_FNAME}
  138. printf "${PARAM_FORMAT}" "${nodes[l]}" >> ${RESULT_FNAME}
  139. printf "${PARAM_FORMAT}" "$((${NODES}/${nodes[l]}))" >> ${RESULT_FNAME}
  140. printf " |" >> ${RESULT_FNAME}
  141. #---------------------------------------------------------------------
  142. TIMING_FORMAT=" %9.3f (%5.1f)"
  143. for (( i=0; i<$N; i++ )) ; do
  144. printf "${TIMING_FORMAT}" "${T_MAX[i]}" "${FLOPS[i]}" >> ${RESULT_FNAME}
  145. done;
  146. printf " |" >> ${RESULT_FNAME}
  147. #---------------------------------------------------------------------
  148. ERR_FORMAT=" %1.2e"
  149. for (( i=0; i<$Nerr; i++ )) ; do
  150. printf "${ERR_FORMAT}" "${ERR[i]}" >> ${RESULT_FNAME}
  151. done;
  152. printf " |\n" >> ${RESULT_FNAME}
  153. #=====================================================================
  154. done
  155. if [[ $l == $(( ${#nodes[@]}-1 )) ]] || [ "${nodes[l]}" == ":" ]; then
  156. printf "%$((16+$(($Nparam+4))*11+$N*18+$Nerr*14))s\n" |tr " " "=" >> ${RESULT_FNAME} #=================================================
  157. elif [[ $subrow_cnt > 1 ]]; then
  158. printf "%$((16+$(($Nparam+4))*11+$N*18+$Nerr*14))s\n" |tr " " "-" >> ${RESULT_FNAME} #-------------------------------------------------
  159. fi
  160. )& # End parallel subshell
  161. # Combine results
  162. if (( ($l+1) % 10 == 0 )) || [[ $l == $(( ${#nodes[@]}-1 )) ]] ; then
  163. wait;
  164. for (( i=0; i<${#nodes[@]}; i++ )) ; do
  165. RESULT_FNAME_=${RESULT_FNAME}_${i};
  166. if [ -f ${RESULT_FNAME_} ] ; then
  167. cat ${RESULT_FNAME_} | tee -a ${RESULT_FNAME};
  168. rm ${RESULT_FNAME_};
  169. fi;
  170. done
  171. fi
  172. done