Ver Fonte

update scripts.

Dhairya Malhotra há 11 anos atrás
pai
commit
201d276277
7 ficheiros alterados com 148 adições e 31 exclusões
  1. 36 3
      examples/src/fmm_cheb.cpp
  2. 3 2
      scripts/.submit_jobs.sh
  3. 11 0
      scripts/conv.sh
  4. 21 0
      scripts/single_node.sh
  5. 16 13
      scripts/sscal.sh
  6. 15 13
      scripts/test.sh
  7. 46 0
      scripts/wscal.sh

+ 36 - 3
examples/src/fmm_cheb.cpp

@@ -210,7 +210,7 @@ void fn_poten_t5(Real_t* coord, int n, Real_t* out){
 ///////////////////////////////////////////////////////////////////////////////
 
 template <class Real_t>
-void fmm_test(int test_case, size_t N, bool unif, int mult_order, int cheb_deg, int depth, bool adap, Real_t tol, MPI_Comm comm){
+void fmm_test(int test_case, size_t N, size_t M, bool unif, int mult_order, int cheb_deg, int depth, bool adap, Real_t tol, MPI_Comm comm){
   typedef pvfmm::FMM_Node<pvfmm::Cheb_Node<Real_t> > FMMNode_t;
   typedef pvfmm::FMM_Cheb<FMMNode_t> FMM_Mat_t;
   typedef pvfmm::FMM_Tree<FMM_Mat_t> FMM_Tree_t;
@@ -290,7 +290,7 @@ void fmm_test(int test_case, size_t N, bool unif, int mult_order, int cheb_deg,
   std::vector<Real_t> pt_coord;
   if(unif) pt_coord=point_distrib<Real_t>(UnifGrid,N,comm);
   else pt_coord=point_distrib<Real_t>(RandElps,N,comm); //RandElps, RandGaus
-  tree_data.max_pts=1; // Points per octant.
+  tree_data.max_pts=M; // Points per octant.
   tree_data.pt_coord=pt_coord;
 
   //Print various parameters.
@@ -342,6 +342,7 @@ void fmm_test(int test_case, size_t N, bool unif, int mult_order, int cheb_deg,
     }
     delete tree;
     tree_data.pt_coord=pt_coord;
+    tree_data.max_pts=1; // Points per octant.
   }
   pvfmm::Profile::Toc();
 
@@ -438,12 +439,44 @@ void fmm_test(int test_case, size_t N, bool unif, int mult_order, int cheb_deg,
 
 int main(int argc, char **argv){
   MPI_Init(&argc, &argv);
+
   MPI_Comm comm=MPI_COMM_WORLD;
+  if(1){ // Remove slow processors.
+    MPI_Comm comm_=MPI_COMM_WORLD;
+    size_t N=2048;
+    pvfmm::Matrix<double> A(N,N);
+    pvfmm::Matrix<double> B(N,N);
+    pvfmm::Matrix<double> C(N,N);
+    for(int i=0;i<N;i++)
+    for(int j=0;j<N;j++){
+      A[i][j]=i+j;
+      B[i][j]=i-j;
+    }
+    C=A*B;
+    double t=-omp_get_wtime();
+    C=A*B;
+    t+=omp_get_wtime();
+
+    double tt;
+    int myrank, np;
+    MPI_Comm_size(comm_,&np);
+    MPI_Comm_rank(comm_,&myrank);
+    MPI_Allreduce(&t, &tt, 1, pvfmm::par::Mpi_datatype<double>::value(), MPI_SUM, comm_);
+    tt=tt/np;
+
+    int clr=(t<tt*1.05?0:1);
+    MPI_Comm_split(comm_, clr, myrank, &comm );
+    if(clr){
+      MPI_Finalize();
+      return 0;
+    }
+  }
 
   // Read command line options.
   commandline_option_start(argc, argv);
   omp_set_num_threads( atoi(commandline_option(argc, argv,  "-omp",     "1", false, "-omp  <int> = (1)    : Number of OpenMP threads."          )));
   size_t   N=(size_t)strtod(commandline_option(argc, argv,    "-N",     "1",  true, "-N    <int>          : Number of point sources."           ),NULL);
+  size_t   M=(size_t)strtod(commandline_option(argc, argv,    "-M",     "1", false, "-M    <int>          : Number of points per octant."       ),NULL);
   bool  unif=              (commandline_option(argc, argv, "-unif",    NULL, false, "-unif                : Uniform point distribution."        )!=NULL);
   int      m=       strtoul(commandline_option(argc, argv,    "-m",    "10", false, "-m    <int> = (10)   : Multipole order (+ve even integer)."),NULL,10);
   int      q=       strtoul(commandline_option(argc, argv,    "-q",    "14", false, "-q    <int> = (14)   : Chebyshev order (+ve integer)."     ),NULL,10);
@@ -460,7 +493,7 @@ int main(int argc, char **argv){
 
   // Run FMM with above options.
   pvfmm::Profile::Tic("FMM_Test",&comm,true);
-  fmm_test<double>(test, N,unif, m,q, d, adap,tol, comm);
+  fmm_test<double>(test, N,M,unif, m,q, d, adap,tol, comm);
   pvfmm::Profile::Toc();
 
   //Output Profiling results.

+ 3 - 2
scripts/.submit_jobs.sh

@@ -19,6 +19,7 @@ eval $mpi_proc_;
 eval  $threads_;
 eval $testcase_;
 eval    $n_pts_;
+eval    $m_pts_;
 eval        $m_;
 eval        $q_;
 eval      $tol_;
@@ -31,7 +32,7 @@ declare -a     args=();
 declare -a    fname=();
 for (( k=0; k<${#nodes[@]}; k++ )) ; do
   if [ "${nodes[k]}" == ":" ]; then continue; fi;
-  args[$k]="-omp ${threads[k]} -test ${testcase[k]} -N ${n_pts[k]} -m ${m[k]} -q ${q[k]} -d ${depth[k]} -tol ${tol[k]}";
+  args[$k]="-omp ${threads[k]} -test ${testcase[k]} -N ${n_pts[k]} -M ${m_pts[k]} -m ${m[k]} -q ${q[k]} -d ${depth[k]} -tol ${tol[k]}";
   case $HOSTNAME in
     *titan*) #titan.ccs.ornl.gov
         fname[$k]="host_titan";
@@ -48,7 +49,7 @@ for (( k=0; k<${#nodes[@]}; k++ )) ; do
     *) # none of the known machines
         fname[$k]="host_${HOSTNAME}";
   esac
-  fname[$k]="${fname[$k]}_n${nodes[k]}_mpi${mpi_proc[k]}_omp${threads[k]}_test${testcase[k]}_N${n_pts[k]}_m${m[k]}_q${q[k]}_d${depth[k]}_tol${tol[k]}";
+  fname[$k]="${fname[$k]}_n${nodes[k]}_mpi${mpi_proc[k]}_omp${threads[k]}_test${testcase[k]}_N${n_pts[k]}_M${m_pts[k]}_m${m[k]}_q${q[k]}_d${depth[k]}_tol${tol[k]}";
   if (( ${unif[k]} )) ; then
     args[$k]="${args[$k]} -unif";
     fname[$k]="${fname[$k]}_unif";

+ 11 - 0
scripts/conv.sh

@@ -30,6 +30,7 @@ mpi_proc+=(         1         1         1         1         1         1
 threads+=(          1         1         1         1         1         1         1 :)
 testcase+=(         1         1         1         1         1         1         1 :)
 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
+m_pts+=(            1         1         1         1         1         1         1 :)
 m+=(               10        10        10        10        10        10        10 :)
 q+=(                9         9         9         9         9         9         9 :)
 tol+=(           1e-0      1e-1      1e-2      1e-3      1e-4      1e-5      1e-6 :)
@@ -45,6 +46,7 @@ mpi_proc+=(         1         1         1         1         1         1
 threads+=(          1         1         1         1         1         1         1         1 :)
 testcase+=(         1         1         1         1         1         1         1         1 :)
 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
+m_pts+=(            1         1         1         1         1         1         1         1 :)
 m+=(               10        10        10        10        10        10        10        10 :)
 q+=(               14        14        14        14        14        14        14        14 :)
 tol+=(           1e-0      1e-1      1e-2      1e-3      1e-4      1e-5      1e-6      1e-7 :)
@@ -60,6 +62,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 threads+=(          1         1         1         1         1 :)
 testcase+=(         1         1         1         1         1 :)
 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(                4         6         8        10        12 :)
 q+=(                9         9         9         9         9 :)
 tol+=(           1e-4      1e-4      1e-4      1e-4      1e-4 :)
@@ -75,6 +78,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 threads+=(          1         1         1         1         1 : :)
 testcase+=(         1         1         1         1         1 : :)
 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) : :)
+m_pts+=(            1         1         1         1         1 : :)
 m+=(                4         6         8        10        12 : :)
 q+=(               14        14        14        14        14 : :)
 tol+=(           1e-4      1e-4      1e-4      1e-4      1e-4 : :)
@@ -95,6 +99,7 @@ mpi_proc+=(         1         1         1         1         1         1 :)
 threads+=(          1         1         1         1         1         1 :)
 testcase+=(         2         2         2         2         2         2 :)
 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
+m_pts+=(            1         1         1         1         1         1 :)
 m+=(               10        10        10        10        10        10 :)
 q+=(                9         9         9         9         9         9 :)
 tol+=(           1e-6      1e-6      1e-6      1e-6      1e-6      1e-6 :)
@@ -110,6 +115,7 @@ mpi_proc+=(         1         1         1         1         1         1 : :)
 threads+=(          1         1         1         1         1         1 : :)
 testcase+=(         2         2         2         2         2         2 : :)
 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) : :)
+m_pts+=(            1         1         1         1         1         1 : :)
 m+=(               10        10        10        10        10        10 : :)
 q+=(               14        14        14        14        14        14 : :)
 tol+=(           1e-6      1e-6      1e-6      1e-6      1e-6      1e-6 : :)
@@ -130,6 +136,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 threads+=(          1         1         1         1         1 :)
 testcase+=(         3         3         3         3         3 :)
 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(                4         6         8        10        12 :)
 q+=(                9         9         9         9         9 :)
 tol+=(           1e-4      1e-4      1e-4      1e-4      1e-4 :)
@@ -145,6 +152,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 threads+=(          1         1         1         1         1 : :)
 testcase+=(         3         3         3         3         3 : :)
 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) : :)
+m_pts+=(            1         1         1         1         1 : :)
 m+=(                4         6         8        10        12 : :)
 q+=(               14        14        14        14        14 : :)
 tol+=(           1e-4      1e-4      1e-4      1e-4      1e-4 : :)
@@ -165,6 +173,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 threads+=(          1         1         1         1         1 :)
 testcase+=(         5         5         5         5         5 :)
 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(                4         6         8        10        12 :)
 q+=(                9         9         9         9         9 :)
 tol+=(           1e-5      1e-5      1e-5      1e-5      1e-5 :)
@@ -180,6 +189,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 threads+=(          1         1         1         1         1 : :)
 testcase+=(         5         5         5         5         5 : :)
 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) : :)
+m_pts+=(            1         1         1         1         1 : :)
 m+=(                4         6         8        10        12 : :)
 q+=(               14        14        14        14        14 : :)
 tol+=(           1e-5      1e-5      1e-5      1e-5      1e-5 : :)
@@ -198,6 +208,7 @@ export mpi_proc_="$(declare -p mpi_proc)";
 export  threads_="$(declare -p  threads)";
 export testcase_="$(declare -p testcase)";
 export    n_pts_="$(declare -p    n_pts)";
+export    m_pts_="$(declare -p    m_pts)";
 export        m_="$(declare -p        m)";
 export        q_="$(declare -p        q)";
 export      tol_="$(declare -p      tol)";

+ 21 - 0
scripts/single_node.sh

@@ -30,6 +30,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 threads+=(          1         2         4         8        16 :)
 testcase+=(         1         1         1         1         1 :)
 n_pts+=(    $((8**3)) $((8**3)) $((8**3)) $((8**3)) $((8**3)) :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(               10        10        10        10        10 :)
 q+=(               14        14        14        14        14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
@@ -45,6 +46,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 threads+=(          1         2         4         8        16 :)
 testcase+=(         1         1         1         1         1 :)
 n_pts+=(    $((8**4)) $((8**4)) $((8**4)) $((8**4)) $((8**4)) :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(               10        10        10        10        10 :)
 q+=(               14        14        14        14        14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
@@ -60,6 +62,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 threads+=(          1         2         4         8        16 : :)
 testcase+=(         1         1         1         1         1 : :)
 n_pts+=(    $((8**5)) $((8**5)) $((8**5)) $((8**5)) $((8**5)) : :)
+m_pts+=(            1         1         1         1         1 : :)
 m+=(               10        10        10        10        10 : :)
 q+=(               14        14        14        14        14 : :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : :)
@@ -81,6 +84,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 threads+=(          1         2         4         8        16 :)
 testcase+=(         1         1         1         1         1 :)
 n_pts+=(           32        32        32        32        32 :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(               10        10        10        10        10 :)
 q+=(               14        14        14        14        14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
@@ -96,6 +100,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 threads+=(          1         2         4         8        16 :)
 testcase+=(         1         1         1         1         1 :)
 n_pts+=(          256       256       256       256       256 :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(               10        10        10        10        10 :)
 q+=(               14        14        14        14        14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
@@ -111,6 +116,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 threads+=(          1         2         4         8        16 : :)
 testcase+=(         1         1         1         1         1 : :)
 n_pts+=(         2048      2048      2048      2048      2048 : :)
+m_pts+=(            1         1         1         1         1 : :)
 m+=(               10        10        10        10        10 : :)
 q+=(               14        14        14        14        14 : :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : :)
@@ -133,6 +139,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 threads+=(          1         2         4         8        16 :)
 testcase+=(         3         3         3         3         3 :)
 n_pts+=(    $((8**3)) $((8**3)) $((8**3)) $((8**3)) $((8**3)) :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(               10        10        10        10        10 :)
 q+=(               14        14        14        14        14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
@@ -148,6 +155,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 threads+=(          1         2         4         8        16 :)
 testcase+=(         3         3         3         3         3 :)
 n_pts+=(    $((8**4)) $((8**4)) $((8**4)) $((8**4)) $((8**4)) :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(               10        10        10        10        10 :)
 q+=(               14        14        14        14        14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
@@ -163,6 +171,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 threads+=(          1         2         4         8        16 : :)
 testcase+=(         3         3         3         3         3 : :)
 n_pts+=(    $((8**5)) $((8**5)) $((8**5)) $((8**5)) $((8**5)) : :)
+m_pts+=(            1         1         1         1         1 : :)
 m+=(               10        10        10        10        10 : :)
 q+=(               14        14        14        14        14 : :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : :)
@@ -184,6 +193,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 threads+=(          1         2         4         8        16 :)
 testcase+=(         3         3         3         3         3 :)
 n_pts+=(           32        32        32        32        32 :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(               10        10        10        10        10 :)
 q+=(               14        14        14        14        14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
@@ -199,6 +209,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 threads+=(          1         2         4         8        16 :)
 testcase+=(         3         3         3         3         3 :)
 n_pts+=(          256       256       256       256       256 :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(               10        10        10        10        10 :)
 q+=(               14        14        14        14        14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
@@ -214,6 +225,7 @@ mpi_proc+=(         1         1         1         1         1 : : : :)
 threads+=(          1         2         4         8        16 : : : :)
 testcase+=(         3         3         3         3         3 : : : :)
 n_pts+=(         2048      2048      2048      2048      2048 : : : :)
+m_pts+=(            1         1         1         1         1 : : : :)
 m+=(               10        10        10        10        10 : : : :)
 q+=(               14        14        14        14        14 : : : :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : : : :)
@@ -236,6 +248,7 @@ mpi_proc+=(         1         1         1 :)
 threads+=(   ${CORES}  ${CORES}  ${CORES} :)
 testcase+=(         1         1         1 :)
 n_pts+=(    $((8**3)) $((8**4)) $((8**5)) :)
+m_pts+=(            1         1         1 :)
 m+=(                6         6         6 :)
 q+=(                9         9         9 :)
 tol+=(           1e-0      1e-0      1e-0 :)
@@ -251,6 +264,7 @@ mpi_proc+=(         1         1         1 : :)
 threads+=(   ${CORES}  ${CORES}  ${CORES} : :)
 testcase+=(         1         1         1 : :)
 n_pts+=(    $((8**3)) $((8**4)) $((8**5)) : :)
+m_pts+=(            1         1         1 : :)
 m+=(               10        10        10 : :)
 q+=(               14        14        14 : :)
 tol+=(           1e-0      1e-0      1e-0 : :)
@@ -272,6 +286,7 @@ mpi_proc+=(         1         1         1 :)
 threads+=(   ${CORES}  ${CORES}  ${CORES} :)
 testcase+=(         1         1         1 :)
 n_pts+=(           32       256      2048 :)
+m_pts+=(            1         1         1 :)
 m+=(                6         6         6 :)
 q+=(                9         9         9 :)
 tol+=(           1e-0      1e-0      1e-0 :)
@@ -287,6 +302,7 @@ mpi_proc+=(         1         1         1 : :)
 threads+=(   ${CORES}  ${CORES}  ${CORES} : :)
 testcase+=(         1         1         1 : :)
 n_pts+=(           32       256      2048 : :)
+m_pts+=(            1         1         1 : :)
 m+=(               10        10        10 : :)
 q+=(               14        14        14 : :)
 tol+=(           1e-0      1e-0      1e-0 : :)
@@ -306,6 +322,7 @@ mpi_proc+=(         1         1         1 :)
 threads+=(   ${CORES}  ${CORES}  ${CORES} :)
 testcase+=(         3         3         3 :)
 n_pts+=(    $((8**3)) $((8**4)) $((8**5)) :)
+m_pts+=(            1         1         1 :)
 m+=(                6         6         6 :)
 q+=(                9         9         9 :)
 tol+=(           1e-0      1e-0      1e-0 :)
@@ -321,6 +338,7 @@ mpi_proc+=(         1         1         1 : :)
 threads+=(   ${CORES}  ${CORES}  ${CORES} : :)
 testcase+=(         3         3         3 : :)
 n_pts+=(    $((8**3)) $((8**4)) $((8**5)) : :)
+m_pts+=(            1         1         1 : :)
 m+=(               10        10        10 : :)
 q+=(               14        14        14 : :)
 tol+=(           1e-0      1e-0      1e-0 : :)
@@ -342,6 +360,7 @@ mpi_proc+=(         1         1         1 :)
 threads+=(   ${CORES}  ${CORES}  ${CORES} :)
 testcase+=(         3         3         3 :)
 n_pts+=(           32       256      2048 :)
+m_pts+=(            1         1         1 :)
 m+=(                6         6         6 :)
 q+=(                9         9         9 :)
 tol+=(           1e-0      1e-0      1e-0 :)
@@ -357,6 +376,7 @@ mpi_proc+=(         1         1         1 : :)
 threads+=(   ${CORES}  ${CORES}  ${CORES} : :)
 testcase+=(         3         3         3 : :)
 n_pts+=(           32       256      2048 : :)
+m_pts+=(            1         1         1 : :)
 m+=(               10        10        10 : :)
 q+=(               14        14        14 : :)
 tol+=(           1e-0      1e-0      1e-0 : :)
@@ -376,6 +396,7 @@ export mpi_proc_="$(declare -p mpi_proc)";
 export  threads_="$(declare -p  threads)";
 export testcase_="$(declare -p testcase)";
 export    n_pts_="$(declare -p    n_pts)";
+export    m_pts_="$(declare -p    m_pts)";
 export        m_="$(declare -p        m)";
 export        q_="$(declare -p        q)";
 export      tol_="$(declare -p      tol)";

+ 16 - 13
scripts/sscal.sh

@@ -30,6 +30,7 @@ mpi_proc+=(         4        32       256      2048     16384     16384 :)
 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
 testcase+=(         1         1         1         1         1         1 :)
 n_pts+=(    $((8**7)) $((8**7)) $((8**7)) $((8**7)) $((8**7)) $((8**7)) :)
+m_pts+=(            1         1         1         1         1         1 :)
 m+=(               10        10        10        10        10        10 :)
 q+=(               14        14        14        14        14        14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0      1e-0 :)
@@ -44,19 +45,20 @@ max_time+=(       800       800       800       800       800       800 :)
 ###################################################################################################
 
 # m=10, q=14, octants=
-nodes+=(            1         8        64       512      4096      32768 :)
-cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}   ${CORES} :)
-mpi_proc+=(         1         8        64       512      4096      32768 :)
-threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}   ${CORES} :)
-testcase+=(         3         3         3         3         3          3 :)
-n_pts+=(    $((8**7)) $((8**7)) $((8**7)) $((8**7)) $((8**7))  $((8**7)) :)
-m+=(               10        10        10        10        10         10 :)
-q+=(               14        14        14        14        14         14 :)
-tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0       1e-0 :)
-depth+=(           15        15        15        15        15         15 :)
-unif+=(             0         0         0         0         0          0 :)
-adap+=(             0         0         0         0         0          0 :)
-max_time+=(      2400      2400      2400      2400      2400       2400 :)
+nodes+=(            1         8        64       512      4096     32768 :)
+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
+mpi_proc+=(         1         8        64       512      4096     32768 :)
+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
+testcase+=(         3         3         3         3         3         3 :)
+n_pts+=(    $((8**7)) $((8**7)) $((8**7)) $((8**7)) $((8**7)) $((8**7)) :)
+m_pts+=(            1         1         1         1         1         1 :)
+m+=(               10        10        10        10        10        10 :)
+q+=(               14        14        14        14        14        14 :)
+tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0      1e-0 :)
+depth+=(           15        15        15        15        15        15 :)
+unif+=(             0         0         0         0         0         0 :)
+adap+=(             0         0         0         0         0         0 :)
+max_time+=(      2400      2400      2400      2400      2400      2400 :)
 
 
 ###################################################################################################
@@ -68,6 +70,7 @@ export mpi_proc_="$(declare -p mpi_proc)";
 export  threads_="$(declare -p  threads)";
 export testcase_="$(declare -p testcase)";
 export    n_pts_="$(declare -p    n_pts)";
+export    m_pts_="$(declare -p    m_pts)";
 export        m_="$(declare -p        m)";
 export        q_="$(declare -p        q)";
 export      tol_="$(declare -p      tol)";

+ 15 - 13
scripts/test.sh

@@ -18,19 +18,20 @@ declare -a     unif=();
 declare -a     adap=();
 declare -a max_time=();
 
-nodes+=(            1         1         1         1)
-cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES})
-mpi_proc+=(         1         1         1         1)
-threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES})
-testcase+=(         1         1         1         1)
-n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)))
-m+=(               10        10        10        10)
-q+=(               14        14        14        14)
-tol+=(           1e-4      1e-5      1e-6      1e-7)
-depth+=(           15        15        15        15)
-unif+=(             0         0         0         0)
-adap+=(             1         1         1         1)
-max_time+=(   1000000   1000000   1000000   1000000)
+nodes+=(            1         1         1         1 ) # Number of compute nodes
+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES} ) # Number of CPU cores / node
+mpi_proc+=(         1         1         1         1 ) # Number of MPI processes
+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES} ) # Number of OpenMP threads / MPI process
+testcase+=(         1         1         1         1 ) # test case: 1) Laplace (smooth) 2) Laplace (discontinuous) ...
+n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) ) # Total number of points for tree construction
+m_pts+=(            1         1         1         1 ) # Maximum number of points per octant
+m+=(               10        10        10        10 ) # Multipole order
+q+=(               14        14        14        14 ) # Chebyshev order
+tol+=(           1e-4      1e-5      1e-6      1e-7 ) # Refinement tolerance
+depth+=(           15        15        15        15 ) # Octree maximum depth
+unif+=(             0         0         0         0 ) # Uniform point distribution
+adap+=(             1         1         1         1 ) # Adaptive refinement
+max_time+=(   1000000   1000000   1000000   1000000 ) # Maximum run time
 
 # Export arrays
 export    nodes_="$(declare -p    nodes)";
@@ -39,6 +40,7 @@ export mpi_proc_="$(declare -p mpi_proc)";
 export  threads_="$(declare -p  threads)";
 export testcase_="$(declare -p testcase)";
 export    n_pts_="$(declare -p    n_pts)";
+export    m_pts_="$(declare -p    m_pts)";
 export        m_="$(declare -p        m)";
 export        q_="$(declare -p        q)";
 export      tol_="$(declare -p      tol)";

+ 46 - 0
scripts/wscal.sh

@@ -30,6 +30,7 @@ mpi_proc+=(         4        32       256      2048      16384 :)
 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}   ${CORES} :)
 testcase+=(         1         1         1         1          1 :)
 n_pts+=(    $((8**6)) $((8**7)) $((8**8)) $((8**9)) $((8**10)) :)
+m_pts+=(            1         1         1         1          1 :)
 m+=(               10        10        10        10         10 :)
 q+=(               14        14        14        14         14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0       1e-0 :)
@@ -45,6 +46,7 @@ mpi_proc+=(         1         8        64       512      4096      32768 :)
 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}   ${CORES} :)
 testcase+=(         1         1         1         1         1          1 :)
 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) $((8**10)) :)
+m_pts+=(            1         1         1         1         1          1 :)
 m+=(               10        10        10        10        10         10 :)
 q+=(               14        14        14        14        14         14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0       1e-0 :)
@@ -60,6 +62,7 @@ mpi_proc+=(         2        16       128      1024      8192 :)
 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
 testcase+=(         1         1         1         1         1 :)
 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(               10        10        10        10        10 :)
 q+=(               14        14        14        14        14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
@@ -75,6 +78,7 @@ mpi_proc+=(         4        32       256      2048     16384 : :)
 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} : :)
 testcase+=(         1         1         1         1         1 : :)
 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) : :)
+m_pts+=(            1         1         1         1         1 : :)
 m+=(               10        10        10        10        10 : :)
 q+=(               14        14        14        14        14 : :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : :)
@@ -85,6 +89,44 @@ max_time+=(       100       100       100       100       100 : :)
 
 
 
+###################################################################################################
+#                    NON-UNIFORM OCTREE, LAPLACE KERNEL, WEAK SCALABILITY                         #
+###################################################################################################
+
+# m=10, q=13, octants=16k oct/node
+nodes+=(             1          4         16         64        256       1024       4096 :)
+cores+=(      ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES} :)
+mpi_proc+=(          1          4         16         64        256       1024       4096 :)
+threads+=(    ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES} :)
+testcase+=(          1          1          1          1          1          1          1 :)
+n_pts+=(    $((2**20)) $((2**22)) $((2**24)) $((2**26)) $((2**28)) $((2**30)) $((2**32)) :)
+m_pts+=(           500        500        500        500        500        500        500 :)
+m+=(                10         10         10         10         10         10         10 :)
+q+=(                13         13         13         13         13         13         13 :)
+tol+=(            1e-0       1e-0       1e-0       1e-0       1e-0       1e-0       1e-0 :)
+depth+=(            30         30         30         30         30         30         30 :)
+unif+=(              0          0          0          0          0          0          0 :)
+adap+=(              0          0          0          0          0          0          0 :)
+max_time+=(        500        500        500        500        500        500        500 :)
+
+# m=10, q=13, octants=32k oct/node
+nodes+=(             1          4         16         64        256       1024       4096 :)
+cores+=(      ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES} :)
+mpi_proc+=(          1          4         16         64        256       1024       4096 :)
+threads+=(    ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES} :)
+testcase+=(          1          1          1          1          1          1          1 :)
+n_pts+=(    $((2**21)) $((2**23)) $((2**25)) $((2**27)) $((2**29)) $((2**31)) $((2**33)) :)
+m_pts+=(           500        500        500        500        500        500        500 :)
+m+=(                10         10         10         10         10         10         10 :)
+q+=(                13         13         13         13         13         13         13 :)
+tol+=(            1e-0       1e-0       1e-0       1e-0       1e-0       1e-0       1e-0 :)
+depth+=(            30         30         30         30         30         30         30 :)
+unif+=(              0          0          0          0          0          0          0 :)
+adap+=(              0          0          0          0          0          0          0 :)
+max_time+=(        500        500        500        500        500        500        500 :)
+
+
+
 ###################################################################################################
 #                      UNIFORM OCTREE, STOKES KERNEL, WEAK SCALABILITY                            #
 ###################################################################################################
@@ -96,6 +138,7 @@ mpi_proc+=(         1         8        64       512      4096      32768 :)
 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}   ${CORES} :)
 testcase+=(         3         3         3         3         3          3 :)
 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) $((8**10)) :)
+m_pts+=(            1         1         1         1         1          1 :)
 m+=(               10        10        10        10        10         10 :)
 q+=(               14        14        14        14        14         14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0       1e-0 :)
@@ -111,6 +154,7 @@ mpi_proc+=(         2        16       128      1024      8192 :)
 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
 testcase+=(         3         3         3         3         3 :)
 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) :)
+m_pts+=(            1         1         1         1         1 :)
 m+=(               10        10        10        10        10 :)
 q+=(               14        14        14        14        14 :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
@@ -126,6 +170,7 @@ mpi_proc+=(         4        32       256      2048     16384 : :)
 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} : :)
 testcase+=(         3         3         3         3         3 : :)
 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) : :)
+m_pts+=(            1         1         1         1         1 : :)
 m+=(               10        10        10        10        10 : :)
 q+=(               14        14        14        14        14 : :)
 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : :)
@@ -145,6 +190,7 @@ export mpi_proc_="$(declare -p mpi_proc)";
 export  threads_="$(declare -p  threads)";
 export testcase_="$(declare -p testcase)";
 export    n_pts_="$(declare -p    n_pts)";
+export    m_pts_="$(declare -p    m_pts)";
 export        m_="$(declare -p        m)";
 export        q_="$(declare -p        q)";
 export      tol_="$(declare -p      tol)";