há 11 anos atrás · 201d276277
--- a/examples/src/fmm_cheb.cpp
+++ b/examples/src/fmm_cheb.cpp
@@ -210,7 +210,7 @@ void fn_poten_t5(Real_t* coord, int n, Real_t* out){
 
				 ///////////////////////////////////////////////////////////////////////////////
			
 
				 
			
 
				 template <class Real_t>
			
 
				-void fmm_test(int test_case, size_t N, bool unif, int mult_order, int cheb_deg, int depth, bool adap, Real_t tol, MPI_Comm comm){
			
 
				+void fmm_test(int test_case, size_t N, size_t M, bool unif, int mult_order, int cheb_deg, int depth, bool adap, Real_t tol, MPI_Comm comm){
			
 
				   typedef pvfmm::FMM_Node<pvfmm::Cheb_Node<Real_t> > FMMNode_t;
			
 
				   typedef pvfmm::FMM_Cheb<FMMNode_t> FMM_Mat_t;
			
 
				   typedef pvfmm::FMM_Tree<FMM_Mat_t> FMM_Tree_t;
			
@@ -290,7 +290,7 @@ void fmm_test(int test_case, size_t N, bool unif, int mult_order, int cheb_deg,
 
				   std::vector<Real_t> pt_coord;
			
 
				   if(unif) pt_coord=point_distrib<Real_t>(UnifGrid,N,comm);
			
 
				   else pt_coord=point_distrib<Real_t>(RandElps,N,comm); //RandElps, RandGaus
			
 
				-  tree_data.max_pts=1; // Points per octant.
			
 
				+  tree_data.max_pts=M; // Points per octant.
			
 
				   tree_data.pt_coord=pt_coord;
			
 
				 
			
 
				   //Print various parameters.
			
@@ -342,6 +342,7 @@ void fmm_test(int test_case, size_t N, bool unif, int mult_order, int cheb_deg,
 
				     }
			
 
				     delete tree;
			
 
				     tree_data.pt_coord=pt_coord;
			
 
				+    tree_data.max_pts=1; // Points per octant.
			
 
				   }
			
 
				   pvfmm::Profile::Toc();
			
 
				 
			
@@ -438,12 +439,44 @@ void fmm_test(int test_case, size_t N, bool unif, int mult_order, int cheb_deg,
 
				 
			
 
				 int main(int argc, char **argv){
			
 
				   MPI_Init(&argc, &argv);
			
 
				+
			
 
				   MPI_Comm comm=MPI_COMM_WORLD;
			
 
				+  if(1){ // Remove slow processors.
			
 
				+    MPI_Comm comm_=MPI_COMM_WORLD;
			
 
				+    size_t N=2048;
			
 
				+    pvfmm::Matrix<double> A(N,N);
			
 
				+    pvfmm::Matrix<double> B(N,N);
			
 
				+    pvfmm::Matrix<double> C(N,N);
			
 
				+    for(int i=0;i<N;i++)
			
 
				+    for(int j=0;j<N;j++){
			
 
				+      A[i][j]=i+j;
			
 
				+      B[i][j]=i-j;
			
 
				+    }
			
 
				+    C=A*B;
			
 
				+    double t=-omp_get_wtime();
			
 
				+    C=A*B;
			
 
				+    t+=omp_get_wtime();
			
 
				+
			
 
				+    double tt;
			
 
				+    int myrank, np;
			
 
				+    MPI_Comm_size(comm_,&np);
			
 
				+    MPI_Comm_rank(comm_,&myrank);
			
 
				+    MPI_Allreduce(&t, &tt, 1, pvfmm::par::Mpi_datatype<double>::value(), MPI_SUM, comm_);
			
 
				+    tt=tt/np;
			
 
				+
			
 
				+    int clr=(t<tt*1.05?0:1);
			
 
				+    MPI_Comm_split(comm_, clr, myrank, &comm );
			
 
				+    if(clr){
			
 
				+      MPI_Finalize();
			
 
				+      return 0;
			
 
				+    }
			
 
				+  }
			
 
				 
			
 
				   // Read command line options.
			
 
				   commandline_option_start(argc, argv);
			
 
				   omp_set_num_threads( atoi(commandline_option(argc, argv,  "-omp",     "1", false, "-omp  <int> = (1)    : Number of OpenMP threads."          )));
			
 
				   size_t   N=(size_t)strtod(commandline_option(argc, argv,    "-N",     "1",  true, "-N    <int>          : Number of point sources."           ),NULL);
			
 
				+  size_t   M=(size_t)strtod(commandline_option(argc, argv,    "-M",     "1", false, "-M    <int>          : Number of points per octant."       ),NULL);
			
 
				   bool  unif=              (commandline_option(argc, argv, "-unif",    NULL, false, "-unif                : Uniform point distribution."        )!=NULL);
			
 
				   int      m=       strtoul(commandline_option(argc, argv,    "-m",    "10", false, "-m    <int> = (10)   : Multipole order (+ve even integer)."),NULL,10);
			
 
				   int      q=       strtoul(commandline_option(argc, argv,    "-q",    "14", false, "-q    <int> = (14)   : Chebyshev order (+ve integer)."     ),NULL,10);
			
@@ -460,7 +493,7 @@ int main(int argc, char **argv){
 
				 
			
 
				   // Run FMM with above options.
			
 
				   pvfmm::Profile::Tic("FMM_Test",&comm,true);
			
 
				-  fmm_test<double>(test, N,unif, m,q, d, adap,tol, comm);
			
 
				+  fmm_test<double>(test, N,M,unif, m,q, d, adap,tol, comm);
			
 
				   pvfmm::Profile::Toc();
			
 
				 
			
 
				   //Output Profiling results.
			
--- a/scripts/.submit_jobs.sh
+++ b/scripts/.submit_jobs.sh
@@ -19,6 +19,7 @@ eval $mpi_proc_;
 
				 eval  $threads_;
			
 
				 eval $testcase_;
			
 
				 eval    $n_pts_;
			
 
				+eval    $m_pts_;
			
 
				 eval        $m_;
			
 
				 eval        $q_;
			
 
				 eval      $tol_;
			
@@ -31,7 +32,7 @@ declare -a     args=();
 
				 declare -a    fname=();
			
 
				 for (( k=0; k<${#nodes[@]}; k++ )) ; do
			
 
				   if [ "${nodes[k]}" == ":" ]; then continue; fi;
			
 
				-  args[$k]="-omp ${threads[k]} -test ${testcase[k]} -N ${n_pts[k]} -m ${m[k]} -q ${q[k]} -d ${depth[k]} -tol ${tol[k]}";
			
 
				+  args[$k]="-omp ${threads[k]} -test ${testcase[k]} -N ${n_pts[k]} -M ${m_pts[k]} -m ${m[k]} -q ${q[k]} -d ${depth[k]} -tol ${tol[k]}";
			
 
				   case $HOSTNAME in
			
 
				     *titan*) #titan.ccs.ornl.gov
			
 
				         fname[$k]="host_titan";
			
@@ -48,7 +49,7 @@ for (( k=0; k<${#nodes[@]}; k++ )) ; do
 
				     *) # none of the known machines
			
 
				         fname[$k]="host_${HOSTNAME}";
			
 
				   esac
			
 
				-  fname[$k]="${fname[$k]}_n${nodes[k]}_mpi${mpi_proc[k]}_omp${threads[k]}_test${testcase[k]}_N${n_pts[k]}_m${m[k]}_q${q[k]}_d${depth[k]}_tol${tol[k]}";
			
 
				+  fname[$k]="${fname[$k]}_n${nodes[k]}_mpi${mpi_proc[k]}_omp${threads[k]}_test${testcase[k]}_N${n_pts[k]}_M${m_pts[k]}_m${m[k]}_q${q[k]}_d${depth[k]}_tol${tol[k]}";
			
 
				   if (( ${unif[k]} )) ; then
			
 
				     args[$k]="${args[$k]} -unif";
			
 
				     fname[$k]="${fname[$k]}_unif";
			
--- a/scripts/conv.sh
+++ b/scripts/conv.sh
@@ -30,6 +30,7 @@ mpi_proc+=(         1         1         1         1         1         1
 
				 threads+=(          1         1         1         1         1         1         1 :)
			
 
				 testcase+=(         1         1         1         1         1         1         1 :)
			
 
				 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
			
 
				+m_pts+=(            1         1         1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10        10        10 :)
			
 
				 q+=(                9         9         9         9         9         9         9 :)
			
 
				 tol+=(           1e-0      1e-1      1e-2      1e-3      1e-4      1e-5      1e-6 :)
			
@@ -45,6 +46,7 @@ mpi_proc+=(         1         1         1         1         1         1
 
				 threads+=(          1         1         1         1         1         1         1         1 :)
			
 
				 testcase+=(         1         1         1         1         1         1         1         1 :)
			
 
				 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
			
 
				+m_pts+=(            1         1         1         1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-1      1e-2      1e-3      1e-4      1e-5      1e-6      1e-7 :)
			
@@ -60,6 +62,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 
				 threads+=(          1         1         1         1         1 :)
			
 
				 testcase+=(         1         1         1         1         1 :)
			
 
				 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(                4         6         8        10        12 :)
			
 
				 q+=(                9         9         9         9         9 :)
			
 
				 tol+=(           1e-4      1e-4      1e-4      1e-4      1e-4 :)
			
@@ -75,6 +78,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 
				 threads+=(          1         1         1         1         1 : :)
			
 
				 testcase+=(         1         1         1         1         1 : :)
			
 
				 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) : :)
			
 
				+m_pts+=(            1         1         1         1         1 : :)
			
 
				 m+=(                4         6         8        10        12 : :)
			
 
				 q+=(               14        14        14        14        14 : :)
			
 
				 tol+=(           1e-4      1e-4      1e-4      1e-4      1e-4 : :)
			
@@ -95,6 +99,7 @@ mpi_proc+=(         1         1         1         1         1         1 :)
 
				 threads+=(          1         1         1         1         1         1 :)
			
 
				 testcase+=(         2         2         2         2         2         2 :)
			
 
				 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
			
 
				+m_pts+=(            1         1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10        10 :)
			
 
				 q+=(                9         9         9         9         9         9 :)
			
 
				 tol+=(           1e-6      1e-6      1e-6      1e-6      1e-6      1e-6 :)
			
@@ -110,6 +115,7 @@ mpi_proc+=(         1         1         1         1         1         1 : :)
 
				 threads+=(          1         1         1         1         1         1 : :)
			
 
				 testcase+=(         2         2         2         2         2         2 : :)
			
 
				 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) : :)
			
 
				+m_pts+=(            1         1         1         1         1         1 : :)
			
 
				 m+=(               10        10        10        10        10        10 : :)
			
 
				 q+=(               14        14        14        14        14        14 : :)
			
 
				 tol+=(           1e-6      1e-6      1e-6      1e-6      1e-6      1e-6 : :)
			
@@ -130,6 +136,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 
				 threads+=(          1         1         1         1         1 :)
			
 
				 testcase+=(         3         3         3         3         3 :)
			
 
				 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(                4         6         8        10        12 :)
			
 
				 q+=(                9         9         9         9         9 :)
			
 
				 tol+=(           1e-4      1e-4      1e-4      1e-4      1e-4 :)
			
@@ -145,6 +152,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 
				 threads+=(          1         1         1         1         1 : :)
			
 
				 testcase+=(         3         3         3         3         3 : :)
			
 
				 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) : :)
			
 
				+m_pts+=(            1         1         1         1         1 : :)
			
 
				 m+=(                4         6         8        10        12 : :)
			
 
				 q+=(               14        14        14        14        14 : :)
			
 
				 tol+=(           1e-4      1e-4      1e-4      1e-4      1e-4 : :)
			
@@ -165,6 +173,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 
				 threads+=(          1         1         1         1         1 :)
			
 
				 testcase+=(         5         5         5         5         5 :)
			
 
				 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(                4         6         8        10        12 :)
			
 
				 q+=(                9         9         9         9         9 :)
			
 
				 tol+=(           1e-5      1e-5      1e-5      1e-5      1e-5 :)
			
@@ -180,6 +189,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 
				 threads+=(          1         1         1         1         1 : :)
			
 
				 testcase+=(         5         5         5         5         5 : :)
			
 
				 n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) $((8**1)) : :)
			
 
				+m_pts+=(            1         1         1         1         1 : :)
			
 
				 m+=(                4         6         8        10        12 : :)
			
 
				 q+=(               14        14        14        14        14 : :)
			
 
				 tol+=(           1e-5      1e-5      1e-5      1e-5      1e-5 : :)
			
@@ -198,6 +208,7 @@ export mpi_proc_="$(declare -p mpi_proc)";
 
				 export  threads_="$(declare -p  threads)";
			
 
				 export testcase_="$(declare -p testcase)";
			
 
				 export    n_pts_="$(declare -p    n_pts)";
			
 
				+export    m_pts_="$(declare -p    m_pts)";
			
 
				 export        m_="$(declare -p        m)";
			
 
				 export        q_="$(declare -p        q)";
			
 
				 export      tol_="$(declare -p      tol)";
			
--- a/scripts/single_node.sh
+++ b/scripts/single_node.sh
@@ -30,6 +30,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 
				 threads+=(          1         2         4         8        16 :)
			
 
				 testcase+=(         1         1         1         1         1 :)
			
 
				 n_pts+=(    $((8**3)) $((8**3)) $((8**3)) $((8**3)) $((8**3)) :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
@@ -45,6 +46,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 
				 threads+=(          1         2         4         8        16 :)
			
 
				 testcase+=(         1         1         1         1         1 :)
			
 
				 n_pts+=(    $((8**4)) $((8**4)) $((8**4)) $((8**4)) $((8**4)) :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
@@ -60,6 +62,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 
				 threads+=(          1         2         4         8        16 : :)
			
 
				 testcase+=(         1         1         1         1         1 : :)
			
 
				 n_pts+=(    $((8**5)) $((8**5)) $((8**5)) $((8**5)) $((8**5)) : :)
			
 
				+m_pts+=(            1         1         1         1         1 : :)
			
 
				 m+=(               10        10        10        10        10 : :)
			
 
				 q+=(               14        14        14        14        14 : :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : :)
			
@@ -81,6 +84,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 
				 threads+=(          1         2         4         8        16 :)
			
 
				 testcase+=(         1         1         1         1         1 :)
			
 
				 n_pts+=(           32        32        32        32        32 :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
@@ -96,6 +100,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 
				 threads+=(          1         2         4         8        16 :)
			
 
				 testcase+=(         1         1         1         1         1 :)
			
 
				 n_pts+=(          256       256       256       256       256 :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
@@ -111,6 +116,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 
				 threads+=(          1         2         4         8        16 : :)
			
 
				 testcase+=(         1         1         1         1         1 : :)
			
 
				 n_pts+=(         2048      2048      2048      2048      2048 : :)
			
 
				+m_pts+=(            1         1         1         1         1 : :)
			
 
				 m+=(               10        10        10        10        10 : :)
			
 
				 q+=(               14        14        14        14        14 : :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : :)
			
@@ -133,6 +139,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 
				 threads+=(          1         2         4         8        16 :)
			
 
				 testcase+=(         3         3         3         3         3 :)
			
 
				 n_pts+=(    $((8**3)) $((8**3)) $((8**3)) $((8**3)) $((8**3)) :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
@@ -148,6 +155,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 
				 threads+=(          1         2         4         8        16 :)
			
 
				 testcase+=(         3         3         3         3         3 :)
			
 
				 n_pts+=(    $((8**4)) $((8**4)) $((8**4)) $((8**4)) $((8**4)) :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
@@ -163,6 +171,7 @@ mpi_proc+=(         1         1         1         1         1 : :)
 
				 threads+=(          1         2         4         8        16 : :)
			
 
				 testcase+=(         3         3         3         3         3 : :)
			
 
				 n_pts+=(    $((8**5)) $((8**5)) $((8**5)) $((8**5)) $((8**5)) : :)
			
 
				+m_pts+=(            1         1         1         1         1 : :)
			
 
				 m+=(               10        10        10        10        10 : :)
			
 
				 q+=(               14        14        14        14        14 : :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : :)
			
@@ -184,6 +193,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 
				 threads+=(          1         2         4         8        16 :)
			
 
				 testcase+=(         3         3         3         3         3 :)
			
 
				 n_pts+=(           32        32        32        32        32 :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
@@ -199,6 +209,7 @@ mpi_proc+=(         1         1         1         1         1 :)
 
				 threads+=(          1         2         4         8        16 :)
			
 
				 testcase+=(         3         3         3         3         3 :)
			
 
				 n_pts+=(          256       256       256       256       256 :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
@@ -214,6 +225,7 @@ mpi_proc+=(         1         1         1         1         1 : : : :)
 
				 threads+=(          1         2         4         8        16 : : : :)
			
 
				 testcase+=(         3         3         3         3         3 : : : :)
			
 
				 n_pts+=(         2048      2048      2048      2048      2048 : : : :)
			
 
				+m_pts+=(            1         1         1         1         1 : : : :)
			
 
				 m+=(               10        10        10        10        10 : : : :)
			
 
				 q+=(               14        14        14        14        14 : : : :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : : : :)
			
@@ -236,6 +248,7 @@ mpi_proc+=(         1         1         1 :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES} :)
			
 
				 testcase+=(         1         1         1 :)
			
 
				 n_pts+=(    $((8**3)) $((8**4)) $((8**5)) :)
			
 
				+m_pts+=(            1         1         1 :)
			
 
				 m+=(                6         6         6 :)
			
 
				 q+=(                9         9         9 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0 :)
			
@@ -251,6 +264,7 @@ mpi_proc+=(         1         1         1 : :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES} : :)
			
 
				 testcase+=(         1         1         1 : :)
			
 
				 n_pts+=(    $((8**3)) $((8**4)) $((8**5)) : :)
			
 
				+m_pts+=(            1         1         1 : :)
			
 
				 m+=(               10        10        10 : :)
			
 
				 q+=(               14        14        14 : :)
			
 
				 tol+=(           1e-0      1e-0      1e-0 : :)
			
@@ -272,6 +286,7 @@ mpi_proc+=(         1         1         1 :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES} :)
			
 
				 testcase+=(         1         1         1 :)
			
 
				 n_pts+=(           32       256      2048 :)
			
 
				+m_pts+=(            1         1         1 :)
			
 
				 m+=(                6         6         6 :)
			
 
				 q+=(                9         9         9 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0 :)
			
@@ -287,6 +302,7 @@ mpi_proc+=(         1         1         1 : :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES} : :)
			
 
				 testcase+=(         1         1         1 : :)
			
 
				 n_pts+=(           32       256      2048 : :)
			
 
				+m_pts+=(            1         1         1 : :)
			
 
				 m+=(               10        10        10 : :)
			
 
				 q+=(               14        14        14 : :)
			
 
				 tol+=(           1e-0      1e-0      1e-0 : :)
			
@@ -306,6 +322,7 @@ mpi_proc+=(         1         1         1 :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES} :)
			
 
				 testcase+=(         3         3         3 :)
			
 
				 n_pts+=(    $((8**3)) $((8**4)) $((8**5)) :)
			
 
				+m_pts+=(            1         1         1 :)
			
 
				 m+=(                6         6         6 :)
			
 
				 q+=(                9         9         9 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0 :)
			
@@ -321,6 +338,7 @@ mpi_proc+=(         1         1         1 : :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES} : :)
			
 
				 testcase+=(         3         3         3 : :)
			
 
				 n_pts+=(    $((8**3)) $((8**4)) $((8**5)) : :)
			
 
				+m_pts+=(            1         1         1 : :)
			
 
				 m+=(               10        10        10 : :)
			
 
				 q+=(               14        14        14 : :)
			
 
				 tol+=(           1e-0      1e-0      1e-0 : :)
			
@@ -342,6 +360,7 @@ mpi_proc+=(         1         1         1 :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES} :)
			
 
				 testcase+=(         3         3         3 :)
			
 
				 n_pts+=(           32       256      2048 :)
			
 
				+m_pts+=(            1         1         1 :)
			
 
				 m+=(                6         6         6 :)
			
 
				 q+=(                9         9         9 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0 :)
			
@@ -357,6 +376,7 @@ mpi_proc+=(         1         1         1 : :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES} : :)
			
 
				 testcase+=(         3         3         3 : :)
			
 
				 n_pts+=(           32       256      2048 : :)
			
 
				+m_pts+=(            1         1         1 : :)
			
 
				 m+=(               10        10        10 : :)
			
 
				 q+=(               14        14        14 : :)
			
 
				 tol+=(           1e-0      1e-0      1e-0 : :)
			
@@ -376,6 +396,7 @@ export mpi_proc_="$(declare -p mpi_proc)";
 
				 export  threads_="$(declare -p  threads)";
			
 
				 export testcase_="$(declare -p testcase)";
			
 
				 export    n_pts_="$(declare -p    n_pts)";
			
 
				+export    m_pts_="$(declare -p    m_pts)";
			
 
				 export        m_="$(declare -p        m)";
			
 
				 export        q_="$(declare -p        q)";
			
 
				 export      tol_="$(declare -p      tol)";
			
--- a/scripts/sscal.sh
+++ b/scripts/sscal.sh
@@ -30,6 +30,7 @@ mpi_proc+=(         4        32       256      2048     16384     16384 :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				 testcase+=(         1         1         1         1         1         1 :)
			
 
				 n_pts+=(    $((8**7)) $((8**7)) $((8**7)) $((8**7)) $((8**7)) $((8**7)) :)
			
 
				+m_pts+=(            1         1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
@@ -44,19 +45,20 @@ max_time+=(       800       800       800       800       800       800 :)
 
				 ###################################################################################################
			
 
				 
			
 
				 # m=10, q=14, octants=
			
 
				-nodes+=(            1         8        64       512      4096      32768 :)
			
 
				-cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}   ${CORES} :)
			
 
				-mpi_proc+=(         1         8        64       512      4096      32768 :)
			
 
				-threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}   ${CORES} :)
			
 
				-testcase+=(         3         3         3         3         3          3 :)
			
 
				-n_pts+=(    $((8**7)) $((8**7)) $((8**7)) $((8**7)) $((8**7))  $((8**7)) :)
			
 
				-m+=(               10        10        10        10        10         10 :)
			
 
				-q+=(               14        14        14        14        14         14 :)
			
 
				-tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0       1e-0 :)
			
 
				-depth+=(           15        15        15        15        15         15 :)
			
 
				-unif+=(             0         0         0         0         0          0 :)
			
 
				-adap+=(             0         0         0         0         0          0 :)
			
 
				-max_time+=(      2400      2400      2400      2400      2400       2400 :)
			
 
				+nodes+=(            1         8        64       512      4096     32768 :)
			
 
				+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				+mpi_proc+=(         1         8        64       512      4096     32768 :)
			
 
				+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				+testcase+=(         3         3         3         3         3         3 :)
			
 
				+n_pts+=(    $((8**7)) $((8**7)) $((8**7)) $((8**7)) $((8**7)) $((8**7)) :)
			
 
				+m_pts+=(            1         1         1         1         1         1 :)
			
 
				+m+=(               10        10        10        10        10        10 :)
			
 
				+q+=(               14        14        14        14        14        14 :)
			
 
				+tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
 
				+depth+=(           15        15        15        15        15        15 :)
			
 
				+unif+=(             0         0         0         0         0         0 :)
			
 
				+adap+=(             0         0         0         0         0         0 :)
			
 
				+max_time+=(      2400      2400      2400      2400      2400      2400 :)
			
 
				 
			
 
				 
			
 
				 ###################################################################################################
			
@@ -68,6 +70,7 @@ export mpi_proc_="$(declare -p mpi_proc)";
 
				 export  threads_="$(declare -p  threads)";
			
 
				 export testcase_="$(declare -p testcase)";
			
 
				 export    n_pts_="$(declare -p    n_pts)";
			
 
				+export    m_pts_="$(declare -p    m_pts)";
			
 
				 export        m_="$(declare -p        m)";
			
 
				 export        q_="$(declare -p        q)";
			
 
				 export      tol_="$(declare -p      tol)";
			
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -18,19 +18,20 @@ declare -a     unif=();
 
				 declare -a     adap=();
			
 
				 declare -a max_time=();
			
 
				 
			
 
				-nodes+=(            1         1         1         1)
			
 
				-cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES})
			
 
				-mpi_proc+=(         1         1         1         1)
			
 
				-threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES})
			
 
				-testcase+=(         1         1         1         1)
			
 
				-n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)))
			
 
				-m+=(               10        10        10        10)
			
 
				-q+=(               14        14        14        14)
			
 
				-tol+=(           1e-4      1e-5      1e-6      1e-7)
			
 
				-depth+=(           15        15        15        15)
			
 
				-unif+=(             0         0         0         0)
			
 
				-adap+=(             1         1         1         1)
			
 
				-max_time+=(   1000000   1000000   1000000   1000000)
			
 
				+nodes+=(            1         1         1         1 ) # Number of compute nodes
			
 
				+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES} ) # Number of CPU cores / node
			
 
				+mpi_proc+=(         1         1         1         1 ) # Number of MPI processes
			
 
				+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES} ) # Number of OpenMP threads / MPI process
			
 
				+testcase+=(         1         1         1         1 ) # test case: 1) Laplace (smooth) 2) Laplace (discontinuous) ...
			
 
				+n_pts+=(    $((8**1)) $((8**1)) $((8**1)) $((8**1)) ) # Total number of points for tree construction
			
 
				+m_pts+=(            1         1         1         1 ) # Maximum number of points per octant
			
 
				+m+=(               10        10        10        10 ) # Multipole order
			
 
				+q+=(               14        14        14        14 ) # Chebyshev order
			
 
				+tol+=(           1e-4      1e-5      1e-6      1e-7 ) # Refinement tolerance
			
 
				+depth+=(           15        15        15        15 ) # Octree maximum depth
			
 
				+unif+=(             0         0         0         0 ) # Uniform point distribution
			
 
				+adap+=(             1         1         1         1 ) # Adaptive refinement
			
 
				+max_time+=(   1000000   1000000   1000000   1000000 ) # Maximum run time
			
 
				 
			
 
				 # Export arrays
			
 
				 export    nodes_="$(declare -p    nodes)";
			
@@ -39,6 +40,7 @@ export mpi_proc_="$(declare -p mpi_proc)";
 
				 export  threads_="$(declare -p  threads)";
			
 
				 export testcase_="$(declare -p testcase)";
			
 
				 export    n_pts_="$(declare -p    n_pts)";
			
 
				+export    m_pts_="$(declare -p    m_pts)";
			
 
				 export        m_="$(declare -p        m)";
			
 
				 export        q_="$(declare -p        q)";
			
 
				 export      tol_="$(declare -p      tol)";
			
--- a/scripts/wscal.sh
+++ b/scripts/wscal.sh
@@ -30,6 +30,7 @@ mpi_proc+=(         4        32       256      2048      16384 :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}   ${CORES} :)
			
 
				 testcase+=(         1         1         1         1          1 :)
			
 
				 n_pts+=(    $((8**6)) $((8**7)) $((8**8)) $((8**9)) $((8**10)) :)
			
 
				+m_pts+=(            1         1         1         1          1 :)
			
 
				 m+=(               10        10        10        10         10 :)
			
 
				 q+=(               14        14        14        14         14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0       1e-0 :)
			
@@ -45,6 +46,7 @@ mpi_proc+=(         1         8        64       512      4096      32768 :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}   ${CORES} :)
			
 
				 testcase+=(         1         1         1         1         1          1 :)
			
 
				 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) $((8**10)) :)
			
 
				+m_pts+=(            1         1         1         1         1          1 :)
			
 
				 m+=(               10        10        10        10        10         10 :)
			
 
				 q+=(               14        14        14        14        14         14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0       1e-0 :)
			
@@ -60,6 +62,7 @@ mpi_proc+=(         2        16       128      1024      8192 :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				 testcase+=(         1         1         1         1         1 :)
			
 
				 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
@@ -75,6 +78,7 @@ mpi_proc+=(         4        32       256      2048     16384 : :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} : :)
			
 
				 testcase+=(         1         1         1         1         1 : :)
			
 
				 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) : :)
			
 
				+m_pts+=(            1         1         1         1         1 : :)
			
 
				 m+=(               10        10        10        10        10 : :)
			
 
				 q+=(               14        14        14        14        14 : :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : :)
			
@@ -85,6 +89,44 @@ max_time+=(       100       100       100       100       100 : :)
 
				 
			
 
				 
			
 
				 
			
 
				+###################################################################################################
			
 
				+#                    NON-UNIFORM OCTREE, LAPLACE KERNEL, WEAK SCALABILITY                         #
			
 
				+###################################################################################################
			
 
				+
			
 
				+# m=10, q=13, octants=16k oct/node
			
 
				+nodes+=(             1          4         16         64        256       1024       4096 :)
			
 
				+cores+=(      ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES} :)
			
 
				+mpi_proc+=(          1          4         16         64        256       1024       4096 :)
			
 
				+threads+=(    ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES} :)
			
 
				+testcase+=(          1          1          1          1          1          1          1 :)
			
 
				+n_pts+=(    $((2**20)) $((2**22)) $((2**24)) $((2**26)) $((2**28)) $((2**30)) $((2**32)) :)
			
 
				+m_pts+=(           500        500        500        500        500        500        500 :)
			
 
				+m+=(                10         10         10         10         10         10         10 :)
			
 
				+q+=(                13         13         13         13         13         13         13 :)
			
 
				+tol+=(            1e-0       1e-0       1e-0       1e-0       1e-0       1e-0       1e-0 :)
			
 
				+depth+=(            30         30         30         30         30         30         30 :)
			
 
				+unif+=(              0          0          0          0          0          0          0 :)
			
 
				+adap+=(              0          0          0          0          0          0          0 :)
			
 
				+max_time+=(        500        500        500        500        500        500        500 :)
			
 
				+
			
 
				+# m=10, q=13, octants=32k oct/node
			
 
				+nodes+=(             1          4         16         64        256       1024       4096 :)
			
 
				+cores+=(      ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES} :)
			
 
				+mpi_proc+=(          1          4         16         64        256       1024       4096 :)
			
 
				+threads+=(    ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES}   ${CORES} :)
			
 
				+testcase+=(          1          1          1          1          1          1          1 :)
			
 
				+n_pts+=(    $((2**21)) $((2**23)) $((2**25)) $((2**27)) $((2**29)) $((2**31)) $((2**33)) :)
			
 
				+m_pts+=(           500        500        500        500        500        500        500 :)
			
 
				+m+=(                10         10         10         10         10         10         10 :)
			
 
				+q+=(                13         13         13         13         13         13         13 :)
			
 
				+tol+=(            1e-0       1e-0       1e-0       1e-0       1e-0       1e-0       1e-0 :)
			
 
				+depth+=(            30         30         30         30         30         30         30 :)
			
 
				+unif+=(              0          0          0          0          0          0          0 :)
			
 
				+adap+=(              0          0          0          0          0          0          0 :)
			
 
				+max_time+=(        500        500        500        500        500        500        500 :)
			
 
				+
			
 
				+
			
 
				+
			
 
				 ###################################################################################################
			
 
				 #                      UNIFORM OCTREE, STOKES KERNEL, WEAK SCALABILITY                            #
			
 
				 ###################################################################################################
			
@@ -96,6 +138,7 @@ mpi_proc+=(         1         8        64       512      4096      32768 :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}   ${CORES} :)
			
 
				 testcase+=(         3         3         3         3         3          3 :)
			
 
				 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) $((8**10)) :)
			
 
				+m_pts+=(            1         1         1         1         1          1 :)
			
 
				 m+=(               10        10        10        10        10         10 :)
			
 
				 q+=(               14        14        14        14        14         14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0       1e-0 :)
			
@@ -111,6 +154,7 @@ mpi_proc+=(         2        16       128      1024      8192 :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				 testcase+=(         3         3         3         3         3 :)
			
 
				 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) :)
			
 
				+m_pts+=(            1         1         1         1         1 :)
			
 
				 m+=(               10        10        10        10        10 :)
			
 
				 q+=(               14        14        14        14        14 :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 :)
			
@@ -126,6 +170,7 @@ mpi_proc+=(         4        32       256      2048     16384 : :)
 
				 threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} : :)
			
 
				 testcase+=(         3         3         3         3         3 : :)
			
 
				 n_pts+=(    $((8**5)) $((8**6)) $((8**7)) $((8**8)) $((8**9)) : :)
			
 
				+m_pts+=(            1         1         1         1         1 : :)
			
 
				 m+=(               10        10        10        10        10 : :)
			
 
				 q+=(               14        14        14        14        14 : :)
			
 
				 tol+=(           1e-0      1e-0      1e-0      1e-0      1e-0 : :)
			
@@ -145,6 +190,7 @@ export mpi_proc_="$(declare -p mpi_proc)";
 
				 export  threads_="$(declare -p  threads)";
			
 
				 export testcase_="$(declare -p testcase)";
			
 
				 export    n_pts_="$(declare -p    n_pts)";
			
 
				+export    m_pts_="$(declare -p    m_pts)";
			
 
				 export        m_="$(declare -p        m)";
			
 
				 export        q_="$(declare -p        q)";
			
 
				 export      tol_="$(declare -p      tol)";