10 jaren geleden · abc80e75b2
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -23,6 +23,7 @@ INCDIR = ./include
 
				 TARGET_BIN = \
			
 
				        $(BINDIR)/example1 \
			
 
				        $(BINDIR)/example2 \
			
 
				+       $(BINDIR)/fmm_pts \
			
 
				        $(BINDIR)/fmm_cheb
			
 
				 
			
 
				 all : $(TARGET_BIN)
			
--- a/examples/include/utils.txx
+++ b/examples/include/utils.txx
@@ -5,7 +5,7 @@
 
				  */
			
 
				 
			
 
				 template <class FMM_Mat_t>
			
 
				-void CheckFMMOutput(pvfmm::FMM_Tree<FMM_Mat_t>* mytree, const pvfmm::Kernel<typename FMM_Mat_t::Real_t>* mykernel){
			
 
				+void CheckFMMOutput(pvfmm::FMM_Tree<FMM_Mat_t>* mytree, const pvfmm::Kernel<typename FMM_Mat_t::Real_t>* mykernel, std::string t_name){
			
 
				   if(mykernel==NULL) return;
			
 
				 
			
 
				   // Find out number of OMP thereads.
			
@@ -102,8 +102,8 @@ void CheckFMMOutput(pvfmm::FMM_Tree<FMM_Mat_t>* mytree, const pvfmm::Kernel<type
 
				     MPI_Reduce(&max_   , &glb_max    , 1, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::max(), 0, c1);
			
 
				     MPI_Reduce(&max_err, &glb_max_err, 1, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::max(), 0, c1);
			
 
				     if(!myrank){
			
 
				-      std::cout<<"Maximum Absolute Error ["<<mykernel->ker_name<<"] :  "<<std::scientific<<glb_max_err<<'\n';
			
 
				-      std::cout<<"Maximum Relative Error ["<<mykernel->ker_name<<"] :  "<<std::scientific<<glb_max_err/glb_max<<'\n';
			
 
				+      std::cout<<"Maximum Absolute Error ["<<t_name<<"] :  "<<std::scientific<<glb_max_err<<'\n';
			
 
				+      std::cout<<"Maximum Relative Error ["<<t_name<<"] :  "<<std::scientific<<glb_max_err/glb_max<<'\n';
			
 
				     }
			
 
				   }
			
 
				 }
			
--- a/examples/src/fmm_pts.cpp
+++ b/examples/src/fmm_pts.cpp
@@ -0,0 +1,187 @@
 
				+#include <mpi.h>
			
 
				+#include <pvfmm_common.hpp>
			
 
				+#include <cstdlib>
			
 
				+#include <iostream>
			
 
				+#include <omp.h>
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#include <profile.hpp>
			
 
				+#include <fmm_pts.hpp>
			
 
				+#include <fmm_node.hpp>
			
 
				+#include <fmm_tree.hpp>
			
 
				+#include <utils.hpp>
			
 
				+
			
 
				+template <class Real_t>
			
 
				+void fmm_test(int ker, size_t N, size_t M, Real_t b, int dist, int mult_order, int depth, MPI_Comm comm){
			
 
				+  typedef pvfmm::FMM_Node<pvfmm::MPI_Node<Real_t> > FMMNode_t;
			
 
				+  typedef pvfmm::FMM_Pts<FMMNode_t> FMM_Mat_t;
			
 
				+  typedef pvfmm::FMM_Tree<FMM_Mat_t> FMM_Tree_t;
			
 
				+
			
 
				+  //Set kernel.
			
 
				+  const pvfmm::Kernel<Real_t>* mykernel=NULL;
			
 
				+  pvfmm::BoundaryType bndry=pvfmm::FreeSpace;
			
 
				+
			
 
				+  switch (ker){
			
 
				+    case 1:
			
 
				+      mykernel     =&pvfmm::LaplaceKernel<Real_t>::potential();
			
 
				+      break;
			
 
				+    case 2:
			
 
				+      mykernel     =&pvfmm::LaplaceKernel<Real_t>::gradient();
			
 
				+      break;
			
 
				+    case 3:
			
 
				+      mykernel     =&pvfmm::HelmholtzKernel<Real_t>::potential();
			
 
				+      break;
			
 
				+    default:
			
 
				+      break;
			
 
				+  }
			
 
				+
			
 
				+  // Find out number of OMP thereads.
			
 
				+  int omp_p=omp_get_max_threads();
			
 
				+
			
 
				+  // Find out my identity in the default communicator
			
 
				+  int myrank, p;
			
 
				+  MPI_Comm_rank(comm, &myrank);
			
 
				+  MPI_Comm_size(comm,&p);
			
 
				+
			
 
				+  //Various parameters.
			
 
				+  typename FMMNode_t::NodeData tree_data;
			
 
				+  tree_data.dim=COORD_DIM;
			
 
				+  tree_data.max_depth=depth;
			
 
				+  tree_data.max_pts=M; // Points per octant.
			
 
				+
			
 
				+  //Set source coordinates and values.
			
 
				+  std::vector<Real_t> src_coord, src_value;
			
 
				+  src_coord=point_distrib<Real_t>((dist==0?UnifGrid:(dist==1?RandSphr:RandElps)),N,comm);
			
 
				+  for(size_t i=0;i<src_coord.size();i++) src_coord[i]*=b;
			
 
				+  for(size_t i=0;i<src_coord.size()*mykernel->ker_dim[0]/COORD_DIM;i++) src_value.push_back(drand48());
			
 
				+  tree_data.pt_coord=src_coord;
			
 
				+  tree_data.src_coord=src_coord;
			
 
				+  tree_data.src_value=src_value;
			
 
				+
			
 
				+  //Set target coordinates.
			
 
				+  tree_data.trg_coord=tree_data.src_coord;
			
 
				+
			
 
				+  //Print various parameters.
			
 
				+  if(!myrank){
			
 
				+    std::cout<<std::setprecision(2)<<std::scientific;
			
 
				+    std::cout<<"Number of MPI processes: "<<p<<'\n';
			
 
				+    std::cout<<"Number of OpenMP threads: "<<omp_p<<'\n';
			
 
				+    std::cout<<"Order of multipole expansions: "<<mult_order<<'\n';
			
 
				+    std::cout<<"FMM Kernel name: "<<mykernel->ker_name<<'\n';
			
 
				+    std::cout<<"Number of point samples: "<<N<<'\n';
			
 
				+    std::cout<<"Point distribution: "<<(dist==0?"Unif":(dist==1?"Sphere":"Ellipse"))<<'\n';
			
 
				+    std::cout<<"Maximum points per octant: "<<tree_data.max_pts<<'\n';
			
 
				+    std::cout<<"Maximum Tree Depth: "<<depth<<'\n';
			
 
				+    std::cout<<"BoundaryType: "<<(bndry==pvfmm::Periodic?"Periodic":"FreeSpace")<<'\n';
			
 
				+  }
			
 
				+
			
 
				+  //Initialize FMM_Mat.
			
 
				+  FMM_Mat_t* fmm_mat=new FMM_Mat_t;
			
 
				+  fmm_mat->Initialize(mult_order,comm,mykernel);
			
 
				+
			
 
				+  //Create Tree and initialize with input data.
			
 
				+  FMM_Tree_t* tree=new FMM_Tree_t(comm);
			
 
				+  tree->Initialize(&tree_data);
			
 
				+
			
 
				+  //Initialize FMM Tree
			
 
				+  tree->InitFMM_Tree(false,bndry);
			
 
				+
			
 
				+  { //Output max tree depth.
			
 
				+    long nleaf=0, maxdepth=0;
			
 
				+    std::vector<size_t> all_nodes(MAX_DEPTH+1,0);
			
 
				+    std::vector<size_t> leaf_nodes(MAX_DEPTH+1,0);
			
 
				+    std::vector<FMMNode_t*>& nodes=tree->GetNodeList();
			
 
				+    for(size_t i=0;i<nodes.size();i++){
			
 
				+      FMMNode_t* n=nodes[i];
			
 
				+      if(!n->IsGhost()) all_nodes[n->Depth()]++;
			
 
				+      if(!n->IsGhost() && n->IsLeaf()){
			
 
				+        leaf_nodes[n->Depth()]++;
			
 
				+        if(maxdepth<n->Depth()) maxdepth=n->Depth();
			
 
				+        nleaf++;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if(!myrank) std::cout<<"All  Nodes: ";
			
 
				+    for(int i=0;i<MAX_DEPTH;i++){
			
 
				+      int local_size=all_nodes[i];
			
 
				+      int global_size;
			
 
				+      MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm);
			
 
				+      if(!myrank) std::cout<<global_size<<' ';
			
 
				+    }
			
 
				+    if(!myrank) std::cout<<'\n';
			
 
				+
			
 
				+    if(!myrank) std::cout<<"Leaf Nodes: ";
			
 
				+    for(int i=0;i<MAX_DEPTH;i++){
			
 
				+      int local_size=leaf_nodes[i];
			
 
				+      int global_size;
			
 
				+      MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm);
			
 
				+      if(!myrank) std::cout<<global_size<<' ';
			
 
				+    }
			
 
				+    if(!myrank) std::cout<<'\n';
			
 
				+
			
 
				+    long nleaf_glb=0, maxdepth_glb=0;
			
 
				+    { // MPI_Reduce
			
 
				+      MPI_Allreduce(&nleaf, &nleaf_glb, 1, MPI_INT, MPI_SUM, comm);
			
 
				+      MPI_Allreduce(&maxdepth, &maxdepth_glb, 1, MPI_INT, MPI_MAX, comm);
			
 
				+    }
			
 
				+    if(!myrank) std::cout<<"Number of Leaf Nodes: "<<nleaf_glb<<'\n';
			
 
				+    if(!myrank) std::cout<<"Tree Depth: "<<maxdepth_glb<<'\n';
			
 
				+  }
			
 
				+
			
 
				+  // Setup FMM
			
 
				+  tree->SetupFMM(fmm_mat);
			
 
				+  tree->RunFMM();
			
 
				+
			
 
				+  //Re-run FMM
			
 
				+  tree->ClearFMMData();
			
 
				+  tree->RunFMM();
			
 
				+
			
 
				+  //Find error in FMM output.
			
 
				+  CheckFMMOutput<FMM_Mat_t>(tree, mykernel, "Output");
			
 
				+
			
 
				+  //Write2File
			
 
				+  //tree->Write2File("result/output");
			
 
				+
			
 
				+  //Delete matrices.
			
 
				+  delete fmm_mat;
			
 
				+
			
 
				+  //Delete the tree.
			
 
				+  delete tree;
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char **argv){
			
 
				+  MPI_Init(&argc, &argv);
			
 
				+
			
 
				+  MPI_Comm comm=MPI_COMM_WORLD;
			
 
				+
			
 
				+  // Read command line options.
			
 
				+  commandline_option_start(argc, argv);
			
 
				+  omp_set_num_threads( atoi(commandline_option(argc, argv,  "-omp",     "1", false, "-omp  <int> =  (1)   : Number of OpenMP threads."          )));
			
 
				+  size_t   N=(size_t)strtod(commandline_option(argc, argv,    "-N",     "1",  true, "-N    <int>          : Number of points."                  ),NULL);
			
 
				+  size_t   M=(size_t)strtod(commandline_option(argc, argv,    "-M",   "350", false, "-M    <int>          : Number of points per octant."       ),NULL);
			
 
				+  double   b=        strtod(commandline_option(argc, argv,    "-b",     "1", false, "-b    <int> =  (1)   : Bounding-box length (0 < b <= 1)"   ),NULL);
			
 
				+  int      m=       strtoul(commandline_option(argc, argv,    "-m",    "10", false, "-m    <int> = (10)   : Multipole order (+ve even integer)."),NULL,10);
			
 
				+  int      d=       strtoul(commandline_option(argc, argv,    "-d",    "15", false, "-d    <int> = (15)   : Maximum tree depth."                ),NULL,10);
			
 
				+  bool    sp=   (1==strtoul(commandline_option(argc, argv,   "-sp",     "0", false, "-sp   <0/1> =  (0)   : Single Precision."                  ),NULL,10));
			
 
				+  int   dist=       strtoul(commandline_option(argc, argv, "-dist",     "0", false, "-dist <int> =  (0)   : 0) Unif 1) Sphere 2) Ellipse"       ),NULL,10);
			
 
				+  int    ker=       strtoul(commandline_option(argc, argv,  "-ker",     "1", false,
			
 
				+       "-ker  <int> =  (1)   : 1) Laplace potential\n\
			
 
				+                               2) Laplace gradient\n\
			
 
				+                               3) Helmholtz"),NULL,10);
			
 
				+  commandline_option_end(argc, argv);
			
 
				+  pvfmm::Profile::Enable(true);
			
 
				+
			
 
				+  // Run FMM with above options.
			
 
				+  pvfmm::Profile::Tic("FMM_Test",&comm,true);
			
 
				+  if(sp) fmm_test<float >(ker, N,M,b, dist,m,d,comm);
			
 
				+  else   fmm_test<double>(ker, N,M,b, dist,m,d,comm);
			
 
				+  pvfmm::Profile::Toc();
			
 
				+
			
 
				+  //Output Profiling results.
			
 
				+  pvfmm::Profile::print(&comm);
			
 
				+
			
 
				+  // Shut down MPI
			
 
				+  MPI_Finalize();
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
--- a/scripts/conv_pts.sh
+++ b/scripts/conv_pts.sh
@@ -0,0 +1,110 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+CORES=16;
			
 
				+export EXEC=examples/bin/fmm_pts
			
 
				+
			
 
				+# List arrays and corresponding executable option prefix
			
 
				+declare -a opt_array=(nodes cores mpi_proc threads ker n_pts m_pts b_len dist m depth sin_pr max_time);
			
 
				+declare -a opt_names=(    -     -        -     omp ker     N     M     b dist m     d     sp        -);
			
 
				+for (( i=0; i<${#opt_names[@]}; i++ )) ; do # Declare arrays
			
 
				+  eval "declare -a ${opt_array[$i]}=()";
			
 
				+done
			
 
				+
			
 
				+
			
 
				+###################################################################################################
			
 
				+#                    Convergence Laplace kernel, 1M points, uniform distribution                  #
			
 
				+###################################################################################################
			
 
				+nodes+=(            1         1         1         1         1         1         1         1         1 :)
			
 
				+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				+mpi_proc+=(         1         1         1         1         1         1         1         1         1 :)
			
 
				+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				+ker+=(              1         1         1         1         1         1         1         1         1 :)
			
 
				+n_pts+=(         1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6 :)
			
 
				+m_pts+=(          300       300       300       300       300       300       300       300       300 :)
			
 
				+b_len+=(       0.8125    0.8125      0.75      0.75     0.625       1.0       1.0     0.875      0.75 :)
			
 
				+dist+=(             0         0         0         0         0         0         0         0         0 :)
			
 
				+m+=(                2         4         6         6         8        10        12        14        16 :)
			
 
				+depth+=(            6         6         6         6         6         5         5         5         5 :)
			
 
				+sin_pr+=(           1         1         1         0         0         0         0         0         0 :)
			
 
				+max_time+=(   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000 :)
			
 
				+
			
 
				+
			
 
				+###################################################################################################
			
 
				+#                  Convergence Laplace kernel, 1M points, non-uniform distribution (sphere)       #
			
 
				+###################################################################################################
			
 
				+nodes+=(            1         1         1         1         1         1         1         1         1 :)
			
 
				+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				+mpi_proc+=(         1         1         1         1         1         1         1         1         1 :)
			
 
				+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				+ker+=(              1         1         1         1         1         1         1         1         1 :)
			
 
				+n_pts+=(         1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6 :)
			
 
				+m_pts+=(          180       190       200       180       200       700       750       750      1500 :)
			
 
				+b_len+=(            1         1         1         1         1         1         1         1         1 :)
			
 
				+dist+=(             1         1         1         1         1         1         1         1         1 :)
			
 
				+m+=(                2         4         6         6         8        10        12        14        16 :)
			
 
				+depth+=(           15        15        15        15        15        15        15        15        15 :)
			
 
				+sin_pr+=(           1         1         1         0         0         0         0         0         0 :)
			
 
				+max_time+=(   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000 :)
			
 
				+
			
 
				+
			
 
				+###################################################################################################
			
 
				+#                  Convergence Laplace kernel, 1M points, non-uniform distribution (ellipse)      #
			
 
				+###################################################################################################
			
 
				+nodes+=(            1         1         1         1         1         1         1         1         1 :)
			
 
				+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				+mpi_proc+=(         1         1         1         1         1         1         1         1         1 :)
			
 
				+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				+ker+=(              1         1         1         1         1         1         1         1         1 :)
			
 
				+n_pts+=(         1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6 :)
			
 
				+m_pts+=(          190       270       370       260       370       430       640      1000      1400 :)
			
 
				+b_len+=(            1         1         1         1         1         1         1         1         1 :)
			
 
				+dist+=(             2         2         2         2         2         2         2         2         2 :)
			
 
				+m+=(                2         4         6         6         8        10        12        14        16 :)
			
 
				+depth+=(           15        15        15        15        15        15        15        15        15 :)
			
 
				+sin_pr+=(           1         1         1         0         0         0         0         0         0 :)
			
 
				+max_time+=(   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000 :)
			
 
				+
			
 
				+
			
 
				+###################################################################################################
			
 
				+#    Convergence Helmholtz kernel (wave-number=10), 1M points, uniform distribution (ellipse)     #
			
 
				+###################################################################################################
			
 
				+nodes+=(            1         1         1         1         1         1         1         1 :)
			
 
				+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				+mpi_proc+=(         1         1         1         1         1         1         1         1 :)
			
 
				+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
			
 
				+ker+=(              3         3         3         3         3         3         3         3 :)
			
 
				+n_pts+=(         1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6 :)
			
 
				+m_pts+=(          370       430       640       370       430       640       640       640 :)
			
 
				+m_pts+=(          150       150       150       200       300       400       400       400 :)
			
 
				+b_len+=(            1         1         1         1         1         1         1         1 :)
			
 
				+dist+=(             0         0         0         0         0         0         0         0 :)
			
 
				+m+=(                8        10        10        12        14        16        18        20 :)
			
 
				+depth+=(           15        15        15        15        15        15        15        15 :)
			
 
				+sin_pr+=(           1         1         0         0         0         0         0         0 :)
			
 
				+max_time+=(   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000 :)
			
 
				+
			
 
				+
			
 
				+RESULT_HEADER=" Script: $0          Convergence with multipole order for 1M points"
			
 
				+
			
 
				+declare -a RESULT_FIELDS=()
			
 
				+RESULT_FIELDS+=("FMM Kernel name"                    "kernel" )
			
 
				+RESULT_FIELDS+=("Point distribution"                 "dist"   )
			
 
				+RESULT_FIELDS+=("Number of Leaf Nodes"               "Noct"   )
			
 
				+RESULT_FIELDS+=("Tree Depth"                         "d"      )
			
 
				+RESULT_FIELDS+=("Maximum points per octant"          "M"      )
			
 
				+RESULT_FIELDS+=("Order of multipole expansions"      "m"      )
			
 
				+RESULT_FIELDS+=("|"                                  "|"      )
			
 
				+RESULT_FIELDS+=("Maximum Relative Error \[Output\]"  "Linf(e)")
			
 
				+
			
 
				+declare -a PROF_FIELDS=()
			
 
				+PROF_FIELDS+=("InitTree"    )
			
 
				+PROF_FIELDS+=("SetupFMM"    )
			
 
				+PROF_FIELDS+=("RunFMM"      )
			
 
				+#PROF_FIELDS+=("UpwardPass"  )
			
 
				+#PROF_FIELDS+=("ReduceBcast" )
			
 
				+#PROF_FIELDS+=("DownwardPass")
			
 
				+
			
 
				+WORK_DIR=$(dirname ${PWD}/$0)/..
			
 
				+TERM_WIDTH=$(stty size | cut -d ' ' -f 2)
			
 
				+source ${WORK_DIR}/scripts/.submit_jobs.sh | cut -b -${TERM_WIDTH}
			
 
				+