Bladeren bron

New example and script for profiling particle FMM

Dhairya Malhotra 10 jaren geleden
bovenliggende
commit
abc80e75b2
4 gewijzigde bestanden met toevoegingen van 301 en 3 verwijderingen
  1. 1 0
      examples/Makefile
  2. 3 3
      examples/include/utils.txx
  3. 187 0
      examples/src/fmm_pts.cpp
  4. 110 0
      scripts/conv_pts.sh

+ 1 - 0
examples/Makefile

@@ -23,6 +23,7 @@ INCDIR = ./include
 TARGET_BIN = \
        $(BINDIR)/example1 \
        $(BINDIR)/example2 \
+       $(BINDIR)/fmm_pts \
        $(BINDIR)/fmm_cheb
 
 all : $(TARGET_BIN)

+ 3 - 3
examples/include/utils.txx

@@ -5,7 +5,7 @@
  */
 
 template <class FMM_Mat_t>
-void CheckFMMOutput(pvfmm::FMM_Tree<FMM_Mat_t>* mytree, const pvfmm::Kernel<typename FMM_Mat_t::Real_t>* mykernel){
+void CheckFMMOutput(pvfmm::FMM_Tree<FMM_Mat_t>* mytree, const pvfmm::Kernel<typename FMM_Mat_t::Real_t>* mykernel, std::string t_name){
   if(mykernel==NULL) return;
 
   // Find out number of OMP thereads.
@@ -102,8 +102,8 @@ void CheckFMMOutput(pvfmm::FMM_Tree<FMM_Mat_t>* mytree, const pvfmm::Kernel<type
     MPI_Reduce(&max_   , &glb_max    , 1, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::max(), 0, c1);
     MPI_Reduce(&max_err, &glb_max_err, 1, pvfmm::par::Mpi_datatype<Real_t>::value(), pvfmm::par::Mpi_datatype<Real_t>::max(), 0, c1);
     if(!myrank){
-      std::cout<<"Maximum Absolute Error ["<<mykernel->ker_name<<"] :  "<<std::scientific<<glb_max_err<<'\n';
-      std::cout<<"Maximum Relative Error ["<<mykernel->ker_name<<"] :  "<<std::scientific<<glb_max_err/glb_max<<'\n';
+      std::cout<<"Maximum Absolute Error ["<<t_name<<"] :  "<<std::scientific<<glb_max_err<<'\n';
+      std::cout<<"Maximum Relative Error ["<<t_name<<"] :  "<<std::scientific<<glb_max_err/glb_max<<'\n';
     }
   }
 }

+ 187 - 0
examples/src/fmm_pts.cpp

@@ -0,0 +1,187 @@
+#include <mpi.h>
+#include <pvfmm_common.hpp>
+#include <cstdlib>
+#include <iostream>
+#include <omp.h>
+#include <stdio.h>
+
+#include <profile.hpp>
+#include <fmm_pts.hpp>
+#include <fmm_node.hpp>
+#include <fmm_tree.hpp>
+#include <utils.hpp>
+
+template <class Real_t>
+void fmm_test(int ker, size_t N, size_t M, Real_t b, int dist, int mult_order, int depth, MPI_Comm comm){
+  typedef pvfmm::FMM_Node<pvfmm::MPI_Node<Real_t> > FMMNode_t;
+  typedef pvfmm::FMM_Pts<FMMNode_t> FMM_Mat_t;
+  typedef pvfmm::FMM_Tree<FMM_Mat_t> FMM_Tree_t;
+
+  //Set kernel.
+  const pvfmm::Kernel<Real_t>* mykernel=NULL;
+  pvfmm::BoundaryType bndry=pvfmm::FreeSpace;
+
+  switch (ker){
+    case 1:
+      mykernel     =&pvfmm::LaplaceKernel<Real_t>::potential();
+      break;
+    case 2:
+      mykernel     =&pvfmm::LaplaceKernel<Real_t>::gradient();
+      break;
+    case 3:
+      mykernel     =&pvfmm::HelmholtzKernel<Real_t>::potential();
+      break;
+    default:
+      break;
+  }
+
+  // Find out number of OMP thereads.
+  int omp_p=omp_get_max_threads();
+
+  // Find out my identity in the default communicator
+  int myrank, p;
+  MPI_Comm_rank(comm, &myrank);
+  MPI_Comm_size(comm,&p);
+
+  //Various parameters.
+  typename FMMNode_t::NodeData tree_data;
+  tree_data.dim=COORD_DIM;
+  tree_data.max_depth=depth;
+  tree_data.max_pts=M; // Points per octant.
+
+  //Set source coordinates and values.
+  std::vector<Real_t> src_coord, src_value;
+  src_coord=point_distrib<Real_t>((dist==0?UnifGrid:(dist==1?RandSphr:RandElps)),N,comm);
+  for(size_t i=0;i<src_coord.size();i++) src_coord[i]*=b;
+  for(size_t i=0;i<src_coord.size()*mykernel->ker_dim[0]/COORD_DIM;i++) src_value.push_back(drand48());
+  tree_data.pt_coord=src_coord;
+  tree_data.src_coord=src_coord;
+  tree_data.src_value=src_value;
+
+  //Set target coordinates.
+  tree_data.trg_coord=tree_data.src_coord;
+
+  //Print various parameters.
+  if(!myrank){
+    std::cout<<std::setprecision(2)<<std::scientific;
+    std::cout<<"Number of MPI processes: "<<p<<'\n';
+    std::cout<<"Number of OpenMP threads: "<<omp_p<<'\n';
+    std::cout<<"Order of multipole expansions: "<<mult_order<<'\n';
+    std::cout<<"FMM Kernel name: "<<mykernel->ker_name<<'\n';
+    std::cout<<"Number of point samples: "<<N<<'\n';
+    std::cout<<"Point distribution: "<<(dist==0?"Unif":(dist==1?"Sphere":"Ellipse"))<<'\n';
+    std::cout<<"Maximum points per octant: "<<tree_data.max_pts<<'\n';
+    std::cout<<"Maximum Tree Depth: "<<depth<<'\n';
+    std::cout<<"BoundaryType: "<<(bndry==pvfmm::Periodic?"Periodic":"FreeSpace")<<'\n';
+  }
+
+  //Initialize FMM_Mat.
+  FMM_Mat_t* fmm_mat=new FMM_Mat_t;
+  fmm_mat->Initialize(mult_order,comm,mykernel);
+
+  //Create Tree and initialize with input data.
+  FMM_Tree_t* tree=new FMM_Tree_t(comm);
+  tree->Initialize(&tree_data);
+
+  //Initialize FMM Tree
+  tree->InitFMM_Tree(false,bndry);
+
+  { //Output max tree depth.
+    long nleaf=0, maxdepth=0;
+    std::vector<size_t> all_nodes(MAX_DEPTH+1,0);
+    std::vector<size_t> leaf_nodes(MAX_DEPTH+1,0);
+    std::vector<FMMNode_t*>& nodes=tree->GetNodeList();
+    for(size_t i=0;i<nodes.size();i++){
+      FMMNode_t* n=nodes[i];
+      if(!n->IsGhost()) all_nodes[n->Depth()]++;
+      if(!n->IsGhost() && n->IsLeaf()){
+        leaf_nodes[n->Depth()]++;
+        if(maxdepth<n->Depth()) maxdepth=n->Depth();
+        nleaf++;
+      }
+    }
+
+    if(!myrank) std::cout<<"All  Nodes: ";
+    for(int i=0;i<MAX_DEPTH;i++){
+      int local_size=all_nodes[i];
+      int global_size;
+      MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm);
+      if(!myrank) std::cout<<global_size<<' ';
+    }
+    if(!myrank) std::cout<<'\n';
+
+    if(!myrank) std::cout<<"Leaf Nodes: ";
+    for(int i=0;i<MAX_DEPTH;i++){
+      int local_size=leaf_nodes[i];
+      int global_size;
+      MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm);
+      if(!myrank) std::cout<<global_size<<' ';
+    }
+    if(!myrank) std::cout<<'\n';
+
+    long nleaf_glb=0, maxdepth_glb=0;
+    { // MPI_Reduce
+      MPI_Allreduce(&nleaf, &nleaf_glb, 1, MPI_INT, MPI_SUM, comm);
+      MPI_Allreduce(&maxdepth, &maxdepth_glb, 1, MPI_INT, MPI_MAX, comm);
+    }
+    if(!myrank) std::cout<<"Number of Leaf Nodes: "<<nleaf_glb<<'\n';
+    if(!myrank) std::cout<<"Tree Depth: "<<maxdepth_glb<<'\n';
+  }
+
+  // Setup FMM
+  tree->SetupFMM(fmm_mat);
+  tree->RunFMM();
+
+  //Re-run FMM
+  tree->ClearFMMData();
+  tree->RunFMM();
+
+  //Find error in FMM output.
+  CheckFMMOutput<FMM_Mat_t>(tree, mykernel, "Output");
+
+  //Write2File
+  //tree->Write2File("result/output");
+
+  //Delete matrices.
+  delete fmm_mat;
+
+  //Delete the tree.
+  delete tree;
+}
+
+int main(int argc, char **argv){
+  MPI_Init(&argc, &argv);
+
+  MPI_Comm comm=MPI_COMM_WORLD;
+
+  // Read command line options.
+  commandline_option_start(argc, argv);
+  omp_set_num_threads( atoi(commandline_option(argc, argv,  "-omp",     "1", false, "-omp  <int> =  (1)   : Number of OpenMP threads."          )));
+  size_t   N=(size_t)strtod(commandline_option(argc, argv,    "-N",     "1",  true, "-N    <int>          : Number of points."                  ),NULL);
+  size_t   M=(size_t)strtod(commandline_option(argc, argv,    "-M",   "350", false, "-M    <int>          : Number of points per octant."       ),NULL);
+  double   b=        strtod(commandline_option(argc, argv,    "-b",     "1", false, "-b    <int> =  (1)   : Bounding-box length (0 < b <= 1)"   ),NULL);
+  int      m=       strtoul(commandline_option(argc, argv,    "-m",    "10", false, "-m    <int> = (10)   : Multipole order (+ve even integer)."),NULL,10);
+  int      d=       strtoul(commandline_option(argc, argv,    "-d",    "15", false, "-d    <int> = (15)   : Maximum tree depth."                ),NULL,10);
+  bool    sp=   (1==strtoul(commandline_option(argc, argv,   "-sp",     "0", false, "-sp   <0/1> =  (0)   : Single Precision."                  ),NULL,10));
+  int   dist=       strtoul(commandline_option(argc, argv, "-dist",     "0", false, "-dist <int> =  (0)   : 0) Unif 1) Sphere 2) Ellipse"       ),NULL,10);
+  int    ker=       strtoul(commandline_option(argc, argv,  "-ker",     "1", false,
+       "-ker  <int> =  (1)   : 1) Laplace potential\n\
+                               2) Laplace gradient\n\
+                               3) Helmholtz"),NULL,10);
+  commandline_option_end(argc, argv);
+  pvfmm::Profile::Enable(true);
+
+  // Run FMM with above options.
+  pvfmm::Profile::Tic("FMM_Test",&comm,true);
+  if(sp) fmm_test<float >(ker, N,M,b, dist,m,d,comm);
+  else   fmm_test<double>(ker, N,M,b, dist,m,d,comm);
+  pvfmm::Profile::Toc();
+
+  //Output Profiling results.
+  pvfmm::Profile::print(&comm);
+
+  // Shut down MPI
+  MPI_Finalize();
+  return 0;
+}
+

+ 110 - 0
scripts/conv_pts.sh

@@ -0,0 +1,110 @@
+#!/bin/bash
+
+CORES=16;
+export EXEC=examples/bin/fmm_pts
+
+# List arrays and corresponding executable option prefix
+declare -a opt_array=(nodes cores mpi_proc threads ker n_pts m_pts b_len dist m depth sin_pr max_time);
+declare -a opt_names=(    -     -        -     omp ker     N     M     b dist m     d     sp        -);
+for (( i=0; i<${#opt_names[@]}; i++ )) ; do # Declare arrays
+  eval "declare -a ${opt_array[$i]}=()";
+done
+
+
+###################################################################################################
+#                    Convergence Laplace kernel, 1M points, uniform distribution                  #
+###################################################################################################
+nodes+=(            1         1         1         1         1         1         1         1         1 :)
+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
+mpi_proc+=(         1         1         1         1         1         1         1         1         1 :)
+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
+ker+=(              1         1         1         1         1         1         1         1         1 :)
+n_pts+=(         1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6 :)
+m_pts+=(          300       300       300       300       300       300       300       300       300 :)
+b_len+=(       0.8125    0.8125      0.75      0.75     0.625       1.0       1.0     0.875      0.75 :)
+dist+=(             0         0         0         0         0         0         0         0         0 :)
+m+=(                2         4         6         6         8        10        12        14        16 :)
+depth+=(            6         6         6         6         6         5         5         5         5 :)
+sin_pr+=(           1         1         1         0         0         0         0         0         0 :)
+max_time+=(   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000 :)
+
+
+###################################################################################################
+#                  Convergence Laplace kernel, 1M points, non-uniform distribution (sphere)       #
+###################################################################################################
+nodes+=(            1         1         1         1         1         1         1         1         1 :)
+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
+mpi_proc+=(         1         1         1         1         1         1         1         1         1 :)
+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
+ker+=(              1         1         1         1         1         1         1         1         1 :)
+n_pts+=(         1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6 :)
+m_pts+=(          180       190       200       180       200       700       750       750      1500 :)
+b_len+=(            1         1         1         1         1         1         1         1         1 :)
+dist+=(             1         1         1         1         1         1         1         1         1 :)
+m+=(                2         4         6         6         8        10        12        14        16 :)
+depth+=(           15        15        15        15        15        15        15        15        15 :)
+sin_pr+=(           1         1         1         0         0         0         0         0         0 :)
+max_time+=(   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000 :)
+
+
+###################################################################################################
+#                  Convergence Laplace kernel, 1M points, non-uniform distribution (ellipse)      #
+###################################################################################################
+nodes+=(            1         1         1         1         1         1         1         1         1 :)
+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
+mpi_proc+=(         1         1         1         1         1         1         1         1         1 :)
+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
+ker+=(              1         1         1         1         1         1         1         1         1 :)
+n_pts+=(         1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6 :)
+m_pts+=(          190       270       370       260       370       430       640      1000      1400 :)
+b_len+=(            1         1         1         1         1         1         1         1         1 :)
+dist+=(             2         2         2         2         2         2         2         2         2 :)
+m+=(                2         4         6         6         8        10        12        14        16 :)
+depth+=(           15        15        15        15        15        15        15        15        15 :)
+sin_pr+=(           1         1         1         0         0         0         0         0         0 :)
+max_time+=(   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000 :)
+
+
+###################################################################################################
+#    Convergence Helmholtz kernel (wave-number=10), 1M points, uniform distribution (ellipse)     #
+###################################################################################################
+nodes+=(            1         1         1         1         1         1         1         1 :)
+cores+=(     ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
+mpi_proc+=(         1         1         1         1         1         1         1         1 :)
+threads+=(   ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES}  ${CORES} :)
+ker+=(              3         3         3         3         3         3         3         3 :)
+n_pts+=(         1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6      1e+6 :)
+m_pts+=(          370       430       640       370       430       640       640       640 :)
+m_pts+=(          150       150       150       200       300       400       400       400 :)
+b_len+=(            1         1         1         1         1         1         1         1 :)
+dist+=(             0         0         0         0         0         0         0         0 :)
+m+=(                8        10        10        12        14        16        18        20 :)
+depth+=(           15        15        15        15        15        15        15        15 :)
+sin_pr+=(           1         1         0         0         0         0         0         0 :)
+max_time+=(   1000000   1000000   1000000   1000000   1000000   1000000   1000000   1000000 :)
+
+
+RESULT_HEADER=" Script: $0          Convergence with multipole order for 1M points"
+
+declare -a RESULT_FIELDS=()
+RESULT_FIELDS+=("FMM Kernel name"                    "kernel" )
+RESULT_FIELDS+=("Point distribution"                 "dist"   )
+RESULT_FIELDS+=("Number of Leaf Nodes"               "Noct"   )
+RESULT_FIELDS+=("Tree Depth"                         "d"      )
+RESULT_FIELDS+=("Maximum points per octant"          "M"      )
+RESULT_FIELDS+=("Order of multipole expansions"      "m"      )
+RESULT_FIELDS+=("|"                                  "|"      )
+RESULT_FIELDS+=("Maximum Relative Error \[Output\]"  "Linf(e)")
+
+declare -a PROF_FIELDS=()
+PROF_FIELDS+=("InitTree"    )
+PROF_FIELDS+=("SetupFMM"    )
+PROF_FIELDS+=("RunFMM"      )
+#PROF_FIELDS+=("UpwardPass"  )
+#PROF_FIELDS+=("ReduceBcast" )
+#PROF_FIELDS+=("DownwardPass")
+
+WORK_DIR=$(dirname ${PWD}/$0)/..
+TERM_WIDTH=$(stty size | cut -d ' ' -f 2)
+source ${WORK_DIR}/scripts/.submit_jobs.sh | cut -b -${TERM_WIDTH}
+