Dhairya Malhotra 7 jaren geleden
bovenliggende
commit
32ec2adaad

+ 50 - 50
include/sctl/cheb_utils.hpp

@@ -61,11 +61,11 @@ template <class ValueType, class Derived> class BasisInterface {
           Vector<ValueType> x, p;
           Derived::Nodes1D(order, x);
           Derived::EvalBasis1D(order, x, p);
-          Matrix<ValueType> Mp1(order, order, p.Begin(), false);
+          Matrix<ValueType> Mp1(order, order, p.begin(), false);
           Mp1.pinv().Swap(precomp[order]);
         }
       }
-      Mp.ReInit(precomp[order].Dim(0), precomp[order].Dim(1), precomp[order].Begin(), false);
+      Mp.ReInit(precomp[order].Dim(0), precomp[order].Dim(1), precomp[order].begin(), false);
     }
 
     Integer order_DIM = pow<Integer>(order, DIM);
@@ -76,12 +76,12 @@ template <class ValueType, class Derived> class BasisInterface {
     // Create work buffers
     Long buff_size = dof * order_DIM;
     Vector<ValueType> buff(2 * buff_size);
-    Iterator<ValueType> buff1 = buff.Begin() + buff_size * 0;
-    Iterator<ValueType> buff2 = buff.Begin() + buff_size * 1;
+    Iterator<ValueType> buff1 = buff.begin() + buff_size * 0;
+    Iterator<ValueType> buff2 = buff.begin() + buff_size * 1;
 
-    Vector<ValueType> fn(order_DIM * dof, (Iterator<ValueType>)fn_v.Begin(), false);
+    Vector<ValueType> fn(order_DIM * dof, (Iterator<ValueType>)fn_v.begin(), false);
     for (Integer k = 0; k < DIM; k++) {  // Apply Mp along k-dimension
-      Matrix<ValueType> Mi(dof * order_DIM_, order, fn.Begin(), false);
+      Matrix<ValueType> Mi(dof * order_DIM_, order, fn.begin(), false);
       Matrix<ValueType> Mo(dof * order_DIM_, order, buff2, false);
       Matrix<ValueType>::GEMM(Mo, Mi, Mp);
 
@@ -112,11 +112,11 @@ template <class ValueType, class Derived> class BasisInterface {
           Derived::Nodes1D(order, x);
           for (Integer i = 0; i < order; i++) x[i] = (x[i] - 0.5) * scale + 0.5;
           Derived::EvalBasis1D(order, x, p);
-          Matrix<ValueType> Mp1(order, order, p.Begin(), false);
+          Matrix<ValueType> Mp1(order, order, p.begin(), false);
           Mp1.pinv().Swap(precomp[order]);
         }
       }
-      Mp.ReInit(precomp[order].Dim(0), precomp[order].Dim(1), precomp[order].Begin(), false);
+      Mp.ReInit(precomp[order].Dim(0), precomp[order].Dim(1), precomp[order].begin(), false);
     }
 
     Integer order_DIM = pow<Integer>(order, DIM);
@@ -127,12 +127,12 @@ template <class ValueType, class Derived> class BasisInterface {
     // Create work buffers
     Long buff_size = dof * order_DIM;
     Vector<ValueType> buff(2 * buff_size);
-    Iterator<ValueType> buff1 = buff.Begin() + buff_size * 0;
-    Iterator<ValueType> buff2 = buff.Begin() + buff_size * 1;
+    Iterator<ValueType> buff1 = buff.begin() + buff_size * 0;
+    Iterator<ValueType> buff2 = buff.begin() + buff_size * 1;
 
-    Vector<ValueType> fn(order_DIM * dof, (Iterator<ValueType>)fn_v.Begin(), false);
+    Vector<ValueType> fn(order_DIM * dof, (Iterator<ValueType>)fn_v.begin(), false);
     for (Integer k = 0; k < DIM; k++) {  // Apply Mp along k-dimension
-      Matrix<ValueType> Mi(dof * order_DIM_, order, fn.Begin(), false);
+      Matrix<ValueType> Mi(dof * order_DIM_, order, fn.begin(), false);
       Matrix<ValueType> Mo(dof * order_DIM_, order, buff2, false);
       Matrix<ValueType>::GEMM(Mo, Mi, Mp);
 
@@ -172,15 +172,15 @@ template <class ValueType, class Derived> class BasisInterface {
       Integer n = in_x[i].Dim();
       if (!n) return;
       Mp[i].ReInit(order, n);
-      Vector<ValueType> p(order * n, Mp[i].Begin(), false);
+      Vector<ValueType> p(order * n, Mp[i].begin(), false);
       Derived::EvalBasis1D(order, in_x[i], p);
       buff_size *= std::max(order, n);
     }
 
     // Create work buffers
     Vector<ValueType> buff(2 * buff_size);
-    Iterator<ValueType> buff1 = buff.Begin() + buff_size * 0;
-    Iterator<ValueType> buff2 = buff.Begin() + buff_size * 1;
+    Iterator<ValueType> buff1 = buff.begin() + buff_size * 0;
+    Iterator<ValueType> buff2 = buff.begin() + buff_size * 1;
 
     {  // Rearrange coefficients into a tensor.
       Vector<ValueType> tensor(dof * pow<Integer>(order, DIM), buff1, false);
@@ -202,7 +202,7 @@ template <class ValueType, class Derived> class BasisInterface {
       Matrix<ValueType>::GEMM(Mo, Mi, Mp[k]);
 
       Matrix<ValueType> Mo_t(in_x[k].Dim(), dof * order_DIM, buff1, false);
-      if (k == DIM - 1) Mo_t.ReInit(in_x[k].Dim(), dof * order_DIM, out.Begin(), false);
+      if (k == DIM - 1) Mo_t.ReInit(in_x[k].Dim(), dof * order_DIM, out.begin(), false);
       for (Long i = 0; i < Mo.Dim(0); i++) {
         for (Long j = 0; j < Mo.Dim(1); j++) {
           Mo_t[j][i] = Mo[i][j];
@@ -280,16 +280,16 @@ template <class ValueType, class Derived> class BasisInterface {
           M.Swap(precomp[order]);
         }
       }
-      Mdiff.ReInit(precomp[order].Dim(0), precomp[order].Dim(1), precomp[order].Begin(), false);
+      Mdiff.ReInit(precomp[order].Dim(0), precomp[order].Dim(1), precomp[order].begin(), false);
     }
 
     // Create work buffers
     Long buff_size = dof * pow<Integer>(order, DIM);
     Vector<ValueType> buff((3 + DIM) * buff_size);
-    Vector<ValueType> buff0(buff_size * 1, buff.Begin() + buff_size * 0, false);
-    Vector<ValueType> buff1(buff_size * 1, buff.Begin() + buff_size * 1, false);
-    Vector<ValueType> buff2(buff_size * 1, buff.Begin() + buff_size * 2, false);
-    Vector<ValueType> buff3(buff_size * DIM, buff.Begin() + buff_size * 3, false);
+    Vector<ValueType> buff0(buff_size * 1, buff.begin() + buff_size * 0, false);
+    Vector<ValueType> buff1(buff_size * 1, buff.begin() + buff_size * 1, false);
+    Vector<ValueType> buff2(buff_size * 1, buff.begin() + buff_size * 2, false);
+    Vector<ValueType> buff3(buff_size * DIM, buff.begin() + buff_size * 3, false);
 
     {  // buff0 <-- coeff2tensor(coeff_in);
       coeff2tensor<DIM>(order, coeff_in, buff0);
@@ -311,8 +311,8 @@ template <class ValueType, class Derived> class BasisInterface {
       }
 
       {  // buff2 <-- buff1 * Mdiff
-        Matrix<ValueType> Mi(dof * N0 * N2, N1, buff1.Begin(), false);
-        Matrix<ValueType> Mo(dof * N0 * N2, N1, buff2.Begin(), false);
+        Matrix<ValueType> Mi(dof * N0 * N2, N1, buff1.begin(), false);
+        Matrix<ValueType> Mo(dof * N0 * N2, N1, buff2.begin(), false);
         Matrix<ValueType>::GEMM(Mo, Mi, Mdiff);
       }
 
@@ -521,7 +521,7 @@ template <class ValueType, class Derived> class BasisInterface {
         M_diff.SetZero();
         StaticArray<Vector<ValueType>, DIM> nodes_;
         for (Integer i = 0; i < DIM; i++) { // Set nodes_
-          nodes_[i].ReInit(nodes.Dim(), nodes.Begin(), false);
+          nodes_[i].ReInit(nodes.Dim(), nodes.begin(), false);
         }
         Vector<ValueType> nodes0, nodes1;
         nodes0.PushBack(0);
@@ -533,15 +533,15 @@ template <class ValueType, class Derived> class BasisInterface {
         for (Integer i = 0; i < Ncoeff; i++) {
           coeff[i]=0.5;
           value.ReInit(Ngrid, M_diff[i + Ncoeff * 0], false);
-          nodes_[dir0/2].ReInit(1, (dir0 & 1 ? nodes1.Begin() : nodes0.Begin()), false);
+          nodes_[dir0/2].ReInit(1, (dir0 & 1 ? nodes1.begin() : nodes0.begin()), false);
           Eval<DIM>(order, coeff, nodes_, value);
-          nodes_[dir0/2].ReInit(nodes.Dim(), nodes.Begin(), false);
+          nodes_[dir0/2].ReInit(nodes.Dim(), nodes.begin(), false);
 
           coeff[i]=-0.5;
           value.ReInit(Ngrid, M_diff[i + Ncoeff * 1], false);
-          nodes_[dir1/2].ReInit(1, (dir1 & 1 ? nodes1.Begin() : nodes0.Begin()), false);
+          nodes_[dir1/2].ReInit(1, (dir1 & 1 ? nodes1.begin() : nodes0.begin()), false);
           Eval<DIM>(order, coeff, nodes_, value);
-          nodes_[dir1/2].ReInit(nodes.Dim(), nodes.Begin(), false);
+          nodes_[dir1/2].ReInit(nodes.Dim(), nodes.begin(), false);
 
           coeff[i]=0;
         }
@@ -660,7 +660,7 @@ template <class ValueType, class Derived> class BasisInterface {
       //{ // Set Mgrid2coeff
       //  StaticArray<Vector<ValueType>, DIM> nodes_;
       //  for (Integer i = 0; i < DIM; i++) { // Set nodes_
-      //    nodes_[i].ReInit(nodes.Dim(), nodes.Begin(), false);
+      //    nodes_[i].ReInit(nodes.Dim(), nodes.begin(), false);
       //  }
 
       //  Integer Ngrid = pow<Integer>(order, DIM);
@@ -693,7 +693,7 @@ template <class ValueType, class Derived> class BasisInterface {
       //  Matrix<ValueType> M1_(Ngrid, 2 * Ngrid0, MM[2 * Ngrid0 + Ngrid * 1], false); M1_ = (Mgrid2coeff * M_grad * M_diff).Transpose();
       //  for (Long i=0;i<2*Ngrid*2*Ngrid0;i++) MM[0][2*Ngrid0*2*Ngrid0 +i] *= 10000;
       //  MM = MM.Transpose().pinv();
-      //  M[dir].ReInit(2 * Ngrid0, 2 * Ngrid0, MM.Begin());
+      //  M[dir].ReInit(2 * Ngrid0, 2 * Ngrid0, MM.begin());
       //  M[dir] = Mcoeff2grid * M[dir] * Mgrid2coeff;
       //} else {
       //  SCTL_ASSERT(DIM==2);
@@ -830,7 +830,7 @@ template <class ValueType, class Derived> class BasisInterface {
         M_diff.SetZero();
         StaticArray<Vector<ValueType>, DIM> nodes_;
         for (Integer i = 0; i < DIM; i++) { // Set nodes_
-          nodes_[i].ReInit(nodes.Dim(), nodes.Begin(), false);
+          nodes_[i].ReInit(nodes.Dim(), nodes.begin(), false);
         }
         Vector<ValueType> nodes0, nodes1;
         nodes0.PushBack(0);
@@ -842,15 +842,15 @@ template <class ValueType, class Derived> class BasisInterface {
         for (Integer i = 0; i < Ncoeff; i++) {
           coeff[i]=0.5;
           value.ReInit(Ngrid, M_diff[i + Ncoeff * 0], false);
-          nodes_[dir0/2].ReInit(1, (dir0 & 1 ? nodes1.Begin() : nodes0.Begin()), false);
+          nodes_[dir0/2].ReInit(1, (dir0 & 1 ? nodes1.begin() : nodes0.begin()), false);
           Eval<DIM>(order, coeff, nodes_, value);
-          nodes_[dir0/2].ReInit(nodes.Dim(), nodes.Begin(), false);
+          nodes_[dir0/2].ReInit(nodes.Dim(), nodes.begin(), false);
 
           coeff[i]=-0.5;
           value.ReInit(Ngrid, M_diff[i + Ncoeff * 1], false);
-          nodes_[dir1/2].ReInit(1, (dir1 & 1 ? nodes1.Begin() : nodes0.Begin()), false);
+          nodes_[dir1/2].ReInit(1, (dir1 & 1 ? nodes1.begin() : nodes0.begin()), false);
           Eval<DIM>(order, coeff, nodes_, value);
-          nodes_[dir1/2].ReInit(nodes.Dim(), nodes.Begin(), false);
+          nodes_[dir1/2].ReInit(nodes.Dim(), nodes.begin(), false);
 
           coeff[i]=0;
         }
@@ -1078,7 +1078,7 @@ template <class ValueType, class Derived> class BasisInterface {
       Vector<ValueType> V_cheb(order * order);
       cheb_basis_1d(order, x_, V_cheb);
       for (size_t i = 0; i < order; i++) V_cheb[i] /= 2.0;
-      Matrix<ValueType> M(order, order, V_cheb.Begin());
+      Matrix<ValueType> M(order, order, V_cheb.begin());
 
       Vector<ValueType> w_sample(order);
       w_sample.SetZero();
@@ -1146,7 +1146,7 @@ template <class ValueType, class Derived> class BasisInterface {
         r_.PushBack(fabs(trg[i] - 0.0));
         r_.PushBack(fabs(trg[i] - side));
       }
-      std::sort(r_.Begin(), r_.Begin() + r_.Dim());
+      std::sort(r_.begin(), r_.begin() + r_.Dim());
 
       ValueType r0, r1 = r_[r_.Dim() - 1];
       r0 = (r1 > side ? r1 - side : 0.0);
@@ -1296,14 +1296,14 @@ template <class ValueType, class Derived> class BasisInterface {
             for (Integer j = 0; j < kdim[0]; j++) {  // Evaluate ker
               for (Integer k = 0; k < kdim[0]; k++) v_src[k] = 0.0;
               v_src[j] = 1.0;
-              Vector<ValueType> ker_value(N * kdim[1], kern_value.Begin() + j * N * kdim[1], false);
+              Vector<ValueType> ker_value(N * kdim[1], kern_value.begin() + j * N * kdim[1], false);
               ker(r_src, n_src, v_src, eval_coord, ker_value);
             }
             {  // Transpose
               v0.ReInit(kern_value.Dim());
               for (Integer k = 0; k < v0.Dim(); k++) v0[k] = kern_value[k];
-              Matrix<ValueType> M0(kdim[0], N * kdim[1], v0.Begin(), false);
-              Matrix<ValueType> M1(N * kdim[1], kdim[0], kern_value.Begin(), false);
+              Matrix<ValueType> M0(kdim[0], N * kdim[1], v0.begin(), false);
+              Matrix<ValueType> M1(N * kdim[1], kdim[0], kern_value.begin(), false);
               for (Integer l0 = 0; l0 < M1.Dim(0); l0++) {  // Transpose
                 for (Integer l1 = 0; l1 < M1.Dim(1); l1++) {
                   M1[l0][l1] = M0[l1][l0];
@@ -1312,16 +1312,16 @@ template <class ValueType, class Derived> class BasisInterface {
             }
           }
           {  // Set Update M
-            Matrix<ValueType> Mkern(eval_mesh[SUBDIM - 1].Dim(), kern_value.Dim() / eval_mesh[SUBDIM - 1].Dim(), kern_value.Begin(), false);
+            Matrix<ValueType> Mkern(eval_mesh[SUBDIM - 1].Dim(), kern_value.Dim() / eval_mesh[SUBDIM - 1].Dim(), kern_value.begin(), false);
             for (Integer k = SUBDIM - 1; k >= 0; k--) {  // Compute v0
-              Matrix<ValueType> Mpoly(order, eval_mesh[k].Dim(), eval_poly[k].Begin(), false);
+              Matrix<ValueType> Mpoly(order, eval_mesh[k].Dim(), eval_poly[k].begin(), false);
 
               v1.ReInit(Mpoly.Dim(0) * Mkern.Dim(1));
-              Matrix<ValueType> Mcoef(Mpoly.Dim(0), Mkern.Dim(1), v1.Begin(), false);
+              Matrix<ValueType> Mcoef(Mpoly.Dim(0), Mkern.Dim(1), v1.begin(), false);
               Matrix<ValueType>::GEMM(Mcoef, Mpoly, Mkern);
 
               v0.ReInit(v1.Dim());
-              Matrix<ValueType> Mt(Mkern.Dim(1), Mpoly.Dim(0), v0.Begin(), false);
+              Matrix<ValueType> Mt(Mkern.Dim(1), Mpoly.Dim(0), v0.begin(), false);
               for (Integer l0 = 0; l0 < Mt.Dim(0); l0++) {  // Transpose
                 for (Integer l1 = 0; l1 < Mt.Dim(1); l1++) {
                   Mt[l0][l1] = Mcoef[l1][l0];
@@ -1329,7 +1329,7 @@ template <class ValueType, class Derived> class BasisInterface {
               }
 
               if (k > 0) {  // Reinit Mkern
-                Mkern.ReInit(eval_mesh[k - 1].Dim(), v0.Dim() / eval_mesh[k - 1].Dim(), v0.Begin(), false);
+                Mkern.ReInit(eval_mesh[k - 1].Dim(), v0.Dim() / eval_mesh[k - 1].Dim(), v0.begin(), false);
               }
             }
 
@@ -1348,8 +1348,8 @@ template <class ValueType, class Derived> class BasisInterface {
       if (Mcoeff.Dim(0) != kdim[1] || Mcoeff.Dim(1) != kdim[0] * Ncoeff) {
         Mcoeff.ReInit(kdim[1], kdim[0] * Ncoeff);
       }
-      Vector<ValueType> Mtensor_(Mtensor.Dim(0) * Mtensor.Dim(1), Mtensor.Begin(), false);
-      Vector<ValueType> Mcoeff_(Mcoeff.Dim(0) * Mcoeff.Dim(1), Mcoeff.Begin(), false);
+      Vector<ValueType> Mtensor_(Mtensor.Dim(0) * Mtensor.Dim(1), Mtensor.begin(), false);
+      Vector<ValueType> Mcoeff_(Mcoeff.Dim(0) * Mcoeff.Dim(1), Mcoeff.begin(), false);
       tensor2coeff<SUBDIM>(order, Mtensor_, Mcoeff_);
     }
   }
@@ -1383,12 +1383,12 @@ template <class ValueType, class Derived> class BasisInterface {
 
     Vector<ValueType> p;
     Derived::EvalBasis1D(order, nodes, p);
-    Matrix<ValueType> Mp(order, N, p.Begin(), false);
+    Matrix<ValueType> Mp(order, N, p.begin(), false);
     M0 = Mp * M0;
 
     Vector<ValueType> coeff;
-    Approx<1>(order, Vector<ValueType>(M0.Dim(0) * M0.Dim(1), M0.Begin(), false), coeff);
-    (*M) = Matrix<ValueType>(M0.Dim(0), coeff.Dim() / M0.Dim(0), coeff.Begin(), false);
+    Approx<1>(order, Vector<ValueType>(M0.Dim(0) * M0.Dim(1), M0.begin(), false), coeff);
+    (*M) = Matrix<ValueType>(M0.Dim(0), coeff.Dim() / M0.Dim(0), coeff.begin(), false);
   }
 
   friend Derived;

+ 67 - 67
include/sctl/comm.txx

@@ -174,7 +174,7 @@ template <class SType, class RType> void Comm::Allgatherv(ConstIterator<SType> s
     rbuf[0];
     rbuf[rcount_sum - 1];
   }
-  MPI_Allgatherv((scount ? &sbuf[0] : NULL), scount, CommDatatype<SType>::value(), (rcount_sum ? &rbuf[0] : NULL), &rcounts_.Begin()[0], &rdispls_.Begin()[0], CommDatatype<RType>::value(), mpi_comm_);
+  MPI_Allgatherv((scount ? &sbuf[0] : NULL), scount, CommDatatype<SType>::value(), (rcount_sum ? &rbuf[0] : NULL), &rcounts_.begin()[0], &rdispls_.begin()[0], CommDatatype<RType>::value(), mpi_comm_);
 #else
   memcopy((Iterator<char>)(rbuf + rdispls[0]), (ConstIterator<char>)sbuf, scount * sizeof(SType));
 #endif
@@ -347,7 +347,7 @@ template <class Type> void Comm::PartitionW(Vector<Type>& nodeList, const Vector
     }
     localWt = nlSize;
   } else {
-    wts.ReInit(nlSize, (Iterator<Long>)wts_->Begin(), false);
+    wts.ReInit(nlSize, (Iterator<Long>)wts_->begin(), false);
 #pragma omp parallel for reduction(+ : localWt)
     for (Long i = 0; i < nlSize; i++) {
       localWt += wts[i];
@@ -365,7 +365,7 @@ template <class Type> void Comm::PartitionW(Vector<Type>& nodeList, const Vector
   if (nlSize) {  // perform a local scan on the weights first ...
     lscn.ReInit(nlSize);
     lscn[0] = off1;
-    omp_par::scan(wts.Begin(), lscn.Begin(), nlSize);
+    omp_par::scan(wts.begin(), lscn.begin(), nlSize);
   }
 
   Vector<Long> sendSz, recvSz, sendOff, recvOff;
@@ -385,8 +385,8 @@ template <class Type> void Comm::PartitionW(Vector<Type>& nodeList, const Vector
     for (Integer i = pid1; i < pid2; i++) {
       Long wt1 = (totalWt * (i)) / npes;
       Long wt2 = (totalWt * (i + 1)) / npes;
-      Long start = std::lower_bound(lscn.Begin(), lscn.Begin() + nlSize, wt1, std::less<Long>()) - lscn.Begin();
-      Long end = std::lower_bound(lscn.Begin(), lscn.Begin() + nlSize, wt2, std::less<Long>()) - lscn.Begin();
+      Long start = std::lower_bound(lscn.begin(), lscn.begin() + nlSize, wt1, std::less<Long>()) - lscn.begin();
+      Long end = std::lower_bound(lscn.begin(), lscn.begin() + nlSize, wt2, std::less<Long>()) - lscn.begin();
       if (i == 0) start = 0;
       if (i == npes - 1) end = nlSize;
       sendSz[i] = end - start;
@@ -396,20 +396,20 @@ template <class Type> void Comm::PartitionW(Vector<Type>& nodeList, const Vector
   }
 
   // Exchange sendSz, recvSz
-  Alltoall<Long>(sendSz.Begin(), 1, recvSz.Begin(), 1);
+  Alltoall<Long>(sendSz.begin(), 1, recvSz.begin(), 1);
 
   {  // Compute sendOff, recvOff
     sendOff[0] = 0;
-    omp_par::scan(sendSz.Begin(), sendOff.Begin(), npes);
+    omp_par::scan(sendSz.begin(), sendOff.begin(), npes);
     recvOff[0] = 0;
-    omp_par::scan(recvSz.Begin(), recvOff.Begin(), npes);
+    omp_par::scan(recvSz.begin(), recvOff.begin(), npes);
     assert(sendOff[npes - 1] + sendSz[npes - 1] == nlSize);
   }
 
   // perform All2All  ...
   Vector<Type> newNodes;
   newNodes.ReInit(recvSz[npes - 1] + recvOff[npes - 1]);
-  void* mpi_req = Ialltoallv_sparse<Type>(nodeList.Begin(), sendSz.Begin(), sendOff.Begin(), newNodes.Begin(), recvSz.Begin(), recvOff.Begin());
+  void* mpi_req = Ialltoallv_sparse<Type>(nodeList.begin(), sendSz.begin(), sendOff.begin(), newNodes.begin(), recvSz.begin(), recvOff.begin());
   Wait(mpi_req);
 
   // reset the pointer ...
@@ -426,15 +426,15 @@ template <class Type> void Comm::PartitionN(Vector<Type>& v, Long N) const {
   {  // Set v_cnt, v_dsp
     v_dsp[0] = 0;
     Long cnt = v.Dim();
-    Allgather(Ptr2ConstItr<Long>(&cnt, 1), 1, v_cnt.Begin(), 1);
-    omp_par::scan(v_cnt.Begin(), v_dsp.Begin(), np);
+    Allgather(Ptr2ConstItr<Long>(&cnt, 1), 1, v_cnt.begin(), 1);
+    omp_par::scan(v_cnt.begin(), v_dsp.begin(), np);
     v_dsp[np] = v_cnt[np - 1] + v_dsp[np - 1];
   }
   {  // Set N_cnt, N_dsp
     N_dsp[0] = 0;
     Long cnt = N;
-    Allgather(Ptr2ConstItr<Long>(&cnt, 1), 1, N_cnt.Begin(), 1);
-    omp_par::scan(N_cnt.Begin(), N_dsp.Begin(), np);
+    Allgather(Ptr2ConstItr<Long>(&cnt, 1), 1, N_cnt.begin(), 1);
+    omp_par::scan(N_cnt.begin(), N_dsp.begin(), np);
     N_dsp[np] = N_cnt[np - 1] + N_dsp[np - 1];
   }
   {  // Adjust for dof
@@ -476,11 +476,11 @@ template <class Type> void Comm::PartitionN(Vector<Type>& v, Long N) const {
       }
     }
     sdsp[0] = 0;
-    omp_par::scan(scnt.Begin(), sdsp.Begin(), np);
+    omp_par::scan(scnt.begin(), sdsp.begin(), np);
     rdsp[0] = 0;
-    omp_par::scan(rcnt.Begin(), rdsp.Begin(), np);
+    omp_par::scan(rcnt.begin(), rdsp.begin(), np);
 
-    void* mpi_request = Ialltoallv_sparse(v.Begin(), scnt.Begin(), sdsp.Begin(), v_.Begin(), rcnt.Begin(), rdsp.Begin());
+    void* mpi_request = Ialltoallv_sparse(v.begin(), scnt.begin(), sdsp.begin(), v_.begin(), rcnt.begin(), rdsp.begin());
     Wait(mpi_request);
   }
   v.Swap(v_);
@@ -491,14 +491,14 @@ template <class Type> void Comm::PartitionS(Vector<Type>& nodeList, const Type&
   if (npes == 1) return;
 
   Vector<Type> mins(npes);
-  Allgather(Ptr2ConstItr<Type>(&splitter, 1), 1, mins.Begin(), 1);
+  Allgather(Ptr2ConstItr<Type>(&splitter, 1), 1, mins.begin(), 1);
 
   Vector<Long> scnt(npes), sdsp(npes);
   Vector<Long> rcnt(npes), rdsp(npes);
   {  // Compute scnt, sdsp
 #pragma omp parallel for schedule(static)
     for (Integer i = 0; i < npes; i++) {
-      sdsp[i] = std::lower_bound(nodeList.Begin(), nodeList.Begin() + nodeList.Dim(), mins[i]) - nodeList.Begin();
+      sdsp[i] = std::lower_bound(nodeList.begin(), nodeList.begin() + nodeList.Dim(), mins[i]) - nodeList.begin();
     }
 #pragma omp parallel for schedule(static)
     for (Integer i = 0; i < npes - 1; i++) {
@@ -508,12 +508,12 @@ template <class Type> void Comm::PartitionS(Vector<Type>& nodeList, const Type&
   }
   {  // Compute rcnt, rdsp
     rdsp[0] = 0;
-    Alltoall(scnt.Begin(), 1, rcnt.Begin(), 1);
-    omp_par::scan(rcnt.Begin(), rdsp.Begin(), npes);
+    Alltoall(scnt.begin(), 1, rcnt.begin(), 1);
+    omp_par::scan(rcnt.begin(), rdsp.begin(), npes);
   }
   {  // Redistribute nodeList
     Vector<Type> nodeList_(rdsp[npes - 1] + rcnt[npes - 1]);
-    void* mpi_request = Ialltoallv_sparse(nodeList.Begin(), scnt.Begin(), sdsp.Begin(), nodeList_.Begin(), rcnt.Begin(), rdsp.Begin());
+    void* mpi_request = Ialltoallv_sparse(nodeList.begin(), scnt.begin(), sdsp.begin(), nodeList_.begin(), rcnt.begin(), rdsp.begin());
     Wait(mpi_request);
     nodeList.Swap(nodeList_);
   }
@@ -541,7 +541,7 @@ template <class Type> void Comm::SortScatterIndex(const Vector<Type>& key, Vecto
 
   if (npes > 1 && split_key_ != NULL) {  // Partition data
     Vector<Type> split_key(npes);
-    Allgather(Ptr2ConstItr<Type>(split_key_, 1), 1, split_key.Begin(), 1);
+    Allgather(Ptr2ConstItr<Type>(split_key_, 1), 1, split_key.begin(), 1);
 
     Vector<Long> sendSz(npes);
     Vector<Long> recvSz(npes);
@@ -552,8 +552,8 @@ template <class Type> void Comm::SortScatterIndex(const Vector<Type>& key, Vecto
 
     if (nlSize > 0) {  // Compute sendSz
       // Determine processor range.
-      Long pid1 = std::lower_bound(split_key.Begin(), split_key.Begin() + npes, psorted[0].key) - split_key.Begin() - 1;
-      Long pid2 = std::upper_bound(split_key.Begin(), split_key.Begin() + npes, psorted[nlSize - 1].key) - split_key.Begin() + 1;
+      Long pid1 = std::lower_bound(split_key.begin(), split_key.begin() + npes, psorted[0].key) - split_key.begin() - 1;
+      Long pid2 = std::upper_bound(split_key.begin(), split_key.begin() + npes, psorted[nlSize - 1].key) - split_key.begin() + 1;
       pid1 = (pid1 < 0 ? 0 : pid1);
       pid2 = (pid2 > npes ? npes : pid2);
 
@@ -563,8 +563,8 @@ template <class Type> void Comm::SortScatterIndex(const Vector<Type>& key, Vecto
         p1.key = split_key[i];
         Pair_t p2;
         p2.key = split_key[i + 1 < npes ? i + 1 : i];
-        Long start = std::lower_bound(psorted.Begin(), psorted.Begin() + nlSize, p1, std::less<Pair_t>()) - psorted.Begin();
-        Long end = std::lower_bound(psorted.Begin(), psorted.Begin() + nlSize, p2, std::less<Pair_t>()) - psorted.Begin();
+        Long start = std::lower_bound(psorted.begin(), psorted.begin() + nlSize, p1, std::less<Pair_t>()) - psorted.begin();
+        Long end = std::lower_bound(psorted.begin(), psorted.begin() + nlSize, p2, std::less<Pair_t>()) - psorted.begin();
         if (i == 0) start = 0;
         if (i == npes - 1) end = nlSize;
         sendSz[i] = end - start;
@@ -572,20 +572,20 @@ template <class Type> void Comm::SortScatterIndex(const Vector<Type>& key, Vecto
     }
 
     // Exchange sendSz, recvSz
-    Alltoall<Long>(sendSz.Begin(), 1, recvSz.Begin(), 1);
+    Alltoall<Long>(sendSz.begin(), 1, recvSz.begin(), 1);
 
     // compute offsets ...
     {  // Compute sendOff, recvOff
       sendOff[0] = 0;
-      omp_par::scan(sendSz.Begin(), sendOff.Begin(), npes);
+      omp_par::scan(sendSz.begin(), sendOff.begin(), npes);
       recvOff[0] = 0;
-      omp_par::scan(recvSz.Begin(), recvOff.Begin(), npes);
+      omp_par::scan(recvSz.begin(), recvOff.begin(), npes);
       assert(sendOff[npes - 1] + sendSz[npes - 1] == nlSize);
     }
 
     // perform All2All  ...
     Vector<Pair_t> newNodes(recvSz[npes - 1] + recvOff[npes - 1]);
-    void* mpi_req = Ialltoallv_sparse<Pair_t>(psorted.Begin(), sendSz.Begin(), sendOff.Begin(), newNodes.Begin(), recvSz.Begin(), recvOff.Begin());
+    void* mpi_req = Ialltoallv_sparse<Pair_t>(psorted.begin(), sendSz.begin(), sendOff.begin(), newNodes.begin(), recvSz.begin(), recvOff.begin());
     Wait(mpi_req);
 
     // reset the pointer ...
@@ -638,7 +638,7 @@ template <class Type> void Comm::ScatterForward(Vector<Type>& data_, const Vecto
     Long glb_rank = 0;
     Scan(Ptr2ConstItr<Long>(&send_size, 1), Ptr2Itr<Long>(&glb_rank, 1), 1, CommOp::SUM);
     glb_rank -= send_size;
-    Allgather(Ptr2ConstItr<Long>(&glb_rank, 1), 1, glb_scan.Begin(), 1);
+    Allgather(Ptr2ConstItr<Long>(&glb_rank, 1), 1, glb_scan.begin(), 1);
   }
 
   Vector<Pair_t> psorted;
@@ -649,7 +649,7 @@ template <class Type> void Comm::ScatterForward(Vector<Type>& data_, const Vecto
       psorted[i].key = scatter_index[i];
       psorted[i].data = i;
     }
-    omp_par::merge_sort(psorted.Begin(), psorted.Begin() + recv_size);
+    omp_par::merge_sort(psorted.begin(), psorted.begin() + recv_size);
   }
 
   Vector<Long> recv_indx(recv_size);
@@ -666,18 +666,18 @@ template <class Type> void Comm::ScatterForward(Vector<Type>& data_, const Vecto
 
 #pragma omp parallel for schedule(static)
     for (Integer i = 0; i < npes; i++) {
-      Long start = std::lower_bound(recv_indx.Begin(), recv_indx.Begin() + recv_size, glb_scan[i]) - recv_indx.Begin();
-      Long end = (i + 1 < npes ? std::lower_bound(recv_indx.Begin(), recv_indx.Begin() + recv_size, glb_scan[i + 1]) - recv_indx.Begin() : recv_size);
+      Long start = std::lower_bound(recv_indx.begin(), recv_indx.begin() + recv_size, glb_scan[i]) - recv_indx.begin();
+      Long end = (i + 1 < npes ? std::lower_bound(recv_indx.begin(), recv_indx.begin() + recv_size, glb_scan[i + 1]) - recv_indx.begin() : recv_size);
       recvSz[i] = end - start;
       recvOff[i] = start;
     }
 
-    Alltoall(recvSz.Begin(), 1, sendSz.Begin(), 1);
+    Alltoall(recvSz.begin(), 1, sendSz.begin(), 1);
     sendOff[0] = 0;
-    omp_par::scan(sendSz.Begin(), sendOff.Begin(), npes);
+    omp_par::scan(sendSz.begin(), sendOff.begin(), npes);
     assert(sendOff[npes - 1] + sendSz[npes - 1] == send_size);
 
-    Alltoallv(recv_indx.Begin(), recvSz.Begin(), recvOff.Begin(), send_indx.Begin(), sendSz.Begin(), sendOff.Begin());
+    Alltoallv(recv_indx.begin(), recvSz.begin(), recvOff.begin(), send_indx.begin(), sendSz.begin(), sendOff.begin());
 #pragma omp parallel for schedule(static)
     for (Long i = 0; i < send_size; i++) {
       assert(send_indx[i] >= glb_scan[rank]);
@@ -689,7 +689,7 @@ template <class Type> void Comm::ScatterForward(Vector<Type>& data_, const Vecto
   Vector<Type> send_buff;
   {  // Prepare send buffer
     send_buff.ReInit(send_size * data_dim);
-    ConstIterator<Type> data = data_.Begin();
+    ConstIterator<Type> data = data_.begin();
 #pragma omp parallel for schedule(static)
     for (Long i = 0; i < send_size; i++) {
       Long src_indx = send_indx[i] * data_dim;
@@ -708,12 +708,12 @@ template <class Type> void Comm::ScatterForward(Vector<Type>& data_, const Vecto
       recvSz[i] *= data_dim;
       recvOff[i] *= data_dim;
     }
-    Alltoallv(send_buff.Begin(), sendSz.Begin(), sendOff.Begin(), recv_buff.Begin(), recvSz.Begin(), recvOff.Begin());
+    Alltoallv(send_buff.begin(), sendSz.begin(), sendOff.begin(), recv_buff.begin(), recvSz.begin(), recvOff.begin());
   }
 
   {  // Build output data.
     data_.ReInit(recv_size * data_dim);
-    Iterator<Type> data = data_.Begin();
+    Iterator<Type> data = data_.begin();
 #pragma omp parallel for schedule(static)
     for (Long i = 0; i < recv_size; i++) {
       Long src_indx = i * data_dim;
@@ -779,8 +779,8 @@ template <class Type> void Comm::ScatterReverse(Vector<Type>& data_, const Vecto
 
     Vector<Long> glb_scan0(npes + 1);
     Vector<Long> glb_scan1(npes + 1);
-    Allgather(glb_rank + 0, 1, glb_scan0.Begin(), 1);
-    Allgather(glb_rank + 1, 1, glb_scan1.Begin(), 1);
+    Allgather(glb_rank + 0, 1, glb_scan0.begin(), 1);
+    Allgather(glb_rank + 1, 1, glb_scan1.begin(), 1);
     glb_scan0[npes] = glb_size[0];
     glb_scan1[npes] = glb_size[1];
 
@@ -806,10 +806,10 @@ template <class Type> void Comm::ScatterReverse(Vector<Type>& data_, const Vecto
         // if(recv_cnt[i] && i!=rank) commCnt++;
       }
 
-      void* mpi_req = Ialltoallv_sparse<Long>(scatter_index_.Begin(), send_cnt.Begin(), send_dsp.Begin(), scatter_index.Begin(), recv_cnt.Begin(), recv_dsp.Begin(), 0);
+      void* mpi_req = Ialltoallv_sparse<Long>(scatter_index_.begin(), send_cnt.begin(), send_dsp.begin(), scatter_index.begin(), recv_cnt.begin(), recv_dsp.begin(), 0);
       Wait(mpi_req);
     } else {
-      scatter_index.ReInit(scatter_index_.Dim(), (Iterator<Long>)scatter_index_.Begin(), false);
+      scatter_index.ReInit(scatter_index_.Dim(), (Iterator<Long>)scatter_index_.begin(), false);
     }
   }
 
@@ -818,7 +818,7 @@ template <class Type> void Comm::ScatterReverse(Vector<Type>& data_, const Vecto
     Long glb_rank = 0;
     Scan(Ptr2ConstItr<Long>(&recv_size, 1), Ptr2Itr<Long>(&glb_rank, 1), 1, CommOp::SUM);
     glb_rank -= recv_size;
-    Allgather(Ptr2ConstItr<Long>(&glb_rank, 1), 1, glb_scan.Begin(), 1);
+    Allgather(Ptr2ConstItr<Long>(&glb_rank, 1), 1, glb_scan.begin(), 1);
   }
 
   Vector<Pair_t> psorted(send_size);
@@ -828,7 +828,7 @@ template <class Type> void Comm::ScatterReverse(Vector<Type>& data_, const Vecto
       psorted[i].key = scatter_index[i];
       psorted[i].data = i;
     }
-    omp_par::merge_sort(psorted.Begin(), psorted.Begin() + send_size);
+    omp_par::merge_sort(psorted.begin(), psorted.begin() + send_size);
   }
 
   Vector<Long> recv_indx(recv_size);
@@ -845,18 +845,18 @@ template <class Type> void Comm::ScatterReverse(Vector<Type>& data_, const Vecto
 
 #pragma omp parallel for schedule(static)
     for (Integer i = 0; i < npes; i++) {
-      Long start = std::lower_bound(send_indx.Begin(), send_indx.Begin() + send_size, glb_scan[i]) - send_indx.Begin();
-      Long end = (i + 1 < npes ? std::lower_bound(send_indx.Begin(), send_indx.Begin() + send_size, glb_scan[i + 1]) - send_indx.Begin() : send_size);
+      Long start = std::lower_bound(send_indx.begin(), send_indx.begin() + send_size, glb_scan[i]) - send_indx.begin();
+      Long end = (i + 1 < npes ? std::lower_bound(send_indx.begin(), send_indx.begin() + send_size, glb_scan[i + 1]) - send_indx.begin() : send_size);
       sendSz[i] = end - start;
       sendOff[i] = start;
     }
 
-    Alltoall(sendSz.Begin(), 1, recvSz.Begin(), 1);
+    Alltoall(sendSz.begin(), 1, recvSz.begin(), 1);
     recvOff[0] = 0;
-    omp_par::scan(recvSz.Begin(), recvOff.Begin(), npes);
+    omp_par::scan(recvSz.begin(), recvOff.begin(), npes);
     assert(recvOff[npes - 1] + recvSz[npes - 1] == recv_size);
 
-    Alltoallv(send_indx.Begin(), sendSz.Begin(), sendOff.Begin(), recv_indx.Begin(), recvSz.Begin(), recvOff.Begin());
+    Alltoallv(send_indx.begin(), sendSz.begin(), sendOff.begin(), recv_indx.begin(), recvSz.begin(), recvOff.begin());
 #pragma omp parallel for schedule(static)
     for (Long i = 0; i < recv_size; i++) {
       assert(recv_indx[i] >= glb_scan[rank]);
@@ -868,7 +868,7 @@ template <class Type> void Comm::ScatterReverse(Vector<Type>& data_, const Vecto
   Vector<Type> send_buff;
   {  // Prepare send buffer
     send_buff.ReInit(send_size * data_dim);
-    ConstIterator<Type> data = data_.Begin();
+    ConstIterator<Type> data = data_.begin();
 #pragma omp parallel for schedule(static)
     for (Long i = 0; i < send_size; i++) {
       Long src_indx = psorted[i].data * data_dim;
@@ -887,12 +887,12 @@ template <class Type> void Comm::ScatterReverse(Vector<Type>& data_, const Vecto
       recvSz[i] *= data_dim;
       recvOff[i] *= data_dim;
     }
-    Alltoallv(send_buff.Begin(), sendSz.Begin(), sendOff.Begin(), recv_buff.Begin(), recvSz.Begin(), recvOff.Begin());
+    Alltoallv(send_buff.begin(), sendSz.begin(), sendOff.begin(), recv_buff.begin(), recvSz.begin(), recvOff.begin());
   }
 
   {  // Build output data.
     data_.ReInit(recv_size * data_dim);
-    Iterator<Type> data = data_.Begin();
+    Iterator<Type> data = data_.begin();
 #pragma omp parallel for schedule(static)
     for (Long i = 0; i < recv_size; i++) {
       Long src_indx = i * data_dim;
@@ -956,14 +956,14 @@ template <class Type> void Comm::HyperQuickSort(const Vector<Type>& arr_, Vector
 
   if (npes == 1) {  // SortedElem <--- local_sort(arr_)
     SortedElem = arr_;
-    omp_par::merge_sort(SortedElem.Begin(), SortedElem.Begin() + nelem);
+    omp_par::merge_sort(SortedElem.begin(), SortedElem.begin() + nelem);
     return;
   }
 
   Vector<Type> arr;
   {  // arr <-- local_sort(arr_)
     arr = arr_;
-    omp_par::merge_sort(arr.Begin(), arr.Begin() + nelem);
+    omp_par::merge_sort(arr.begin(), arr.begin() + nelem);
   }
 
   Vector<Type> nbuff, nbuff_ext, rbuff, rbuff_ext;  // Allocate memory.
@@ -994,7 +994,7 @@ template <class Type> void Comm::HyperQuickSort(const Vector<Type>& arr_, Vector
         {  // Set glb_splt_count, glb_splt_cnts, glb_splt_disp
           MPI_Allgather(&splt_count, 1, CommDatatype<Integer>::value(), &glb_splt_cnts[0], 1, CommDatatype<Integer>::value(), comm);
           glb_splt_disp[0] = 0;
-          omp_par::scan(glb_splt_cnts.Begin(), glb_splt_disp.Begin(), npes);
+          omp_par::scan(glb_splt_cnts.begin(), glb_splt_disp.begin(), npes);
           glb_splt_count = glb_splt_cnts[npes - 1] + glb_splt_disp[npes - 1];
           SCTL_ASSERT(glb_splt_count);
         }
@@ -1015,7 +1015,7 @@ template <class Type> void Comm::HyperQuickSort(const Vector<Type>& arr_, Vector
       {  // Compute local rank
 #pragma omp parallel for schedule(static)
         for (Integer i = 0; i < glb_splt_count; i++) {
-          lrank[i] = std::lower_bound(arr.Begin(), arr.Begin() + nelem, glb_splitters[i]) - arr.Begin();
+          lrank[i] = std::lower_bound(arr.begin(), arr.begin() + nelem, glb_splitters[i]) - arr.begin();
         }
       }
 
@@ -1025,13 +1025,13 @@ template <class Type> void Comm::HyperQuickSort(const Vector<Type>& arr_, Vector
       }
 
       {  // Determine split_key, totSize_new
-        ConstIterator<Long> split_disp = grank.Begin();
+        ConstIterator<Long> split_disp = grank.begin();
         for (Integer i = 0; i < glb_splt_count; i++) {
           if (labs(grank[i] - totSize / 2) < labs(*split_disp - totSize / 2)) {
-            split_disp = grank.Begin() + i;
+            split_disp = grank.begin() + i;
           }
         }
-        split_key = glb_splitters[split_disp - grank.Begin()];
+        split_key = glb_splitters[split_disp - grank.begin()];
 
         if (myrank <= (npes - 1) / 2)
           totSize_new = split_disp[0];
@@ -1060,11 +1060,11 @@ template <class Type> void Comm::HyperQuickSort(const Vector<Type>& arr_, Vector
       Long ssize = 0, lsize = 0;
       ConstIterator<Type> sbuff, lbuff;
       {  // Set ssize, lsize, sbuff, lbuff
-        Long split_indx = std::lower_bound(arr.Begin(), arr.Begin() + nelem, split_key) - arr.Begin();
+        Long split_indx = std::lower_bound(arr.begin(), arr.begin() + nelem, split_key) - arr.begin();
         ssize = (myrank > split_id ? split_indx : nelem - split_indx);
-        sbuff = (myrank > split_id ? arr.Begin() : arr.Begin() + split_indx);
+        sbuff = (myrank > split_id ? arr.begin() : arr.begin() + split_indx);
         lsize = (myrank <= split_id ? split_indx : nelem - split_indx);
-        lbuff = (myrank <= split_id ? arr.Begin() : arr.Begin() + split_indx);
+        lbuff = (myrank <= split_id ? arr.begin() : arr.begin() + split_indx);
       }
 
       Long rsize = 0, ext_rsize = 0;
@@ -1086,10 +1086,10 @@ template <class Type> void Comm::HyperQuickSort(const Vector<Type>& arr_, Vector
       Long nbuff_size = lsize + rsize + ext_rsize;
       {  // nbuff <-- merge(lbuff, rbuff, rbuff_ext)
         nbuff.ReInit(lsize + rsize);
-        omp_par::merge<ConstIterator<Type>>(lbuff, (lbuff + lsize), rbuff.Begin(), rbuff.Begin() + rsize, nbuff.Begin(), omp_p, std::less<Type>());
+        omp_par::merge<ConstIterator<Type>>(lbuff, (lbuff + lsize), rbuff.begin(), rbuff.begin() + rsize, nbuff.begin(), omp_p, std::less<Type>());
         if (ext_rsize > 0 && nbuff.Dim() > 0) {
           nbuff_ext.ReInit(nbuff_size);
-          omp_par::merge(nbuff.Begin(), nbuff.Begin() + (lsize + rsize), rbuff_ext.Begin(), rbuff_ext.Begin() + ext_rsize, nbuff_ext.Begin(), omp_p, std::less<Type>());
+          omp_par::merge(nbuff.begin(), nbuff.begin() + (lsize + rsize), rbuff_ext.begin(), rbuff_ext.begin() + ext_rsize, nbuff_ext.begin(), omp_p, std::less<Type>());
           nbuff.Swap(nbuff_ext);
           nbuff_ext.ReInit(0);
         }
@@ -1119,7 +1119,7 @@ template <class Type> void Comm::HyperQuickSort(const Vector<Type>& arr_, Vector
   PartitionW<Type>(SortedElem);
 #else
   SortedElem = arr_;
-  std::sort(SortedElem.Begin(), SortedElem.Begin() + SortedElem.Dim());
+  std::sort(SortedElem.begin(), SortedElem.begin() + SortedElem.Dim());
 #endif
 }
 

+ 14 - 13
include/sctl/fft_wrapper.hpp

@@ -145,32 +145,32 @@ template <class ValueType> class FFT {
 
     if (plan.fft_type == FFT_Type::C2R) {
       const Matrix<ValueType>& M = plan.M[rank - 1];
-      transpose<ComplexType>(buff0.Begin(), in.Begin(), N / M.Dim(0), M.Dim(0) / 2);
+      transpose<ComplexType>(buff0.begin(), in.begin(), N / M.Dim(0), M.Dim(0) / 2);
 
       for (Long i = 0; i < rank - 1; i++) {
         const Matrix<ValueType>& M = plan.M[i];
-        Matrix<ValueType> vi(N / M.Dim(0), M.Dim(0), buff0.Begin(), false);
-        Matrix<ValueType> vo(N / M.Dim(0), M.Dim(1), buff1.Begin(), false);
+        Matrix<ValueType> vi(N / M.Dim(0), M.Dim(0), buff0.begin(), false);
+        Matrix<ValueType> vo(N / M.Dim(0), M.Dim(1), buff1.begin(), false);
         Matrix<ValueType>::GEMM(vo, vi, M);
         N = N * M.Dim(1) / M.Dim(0);
-        transpose<ComplexType>(buff0.Begin(), buff1.Begin(), N / M.Dim(1), M.Dim(1) / 2);
+        transpose<ComplexType>(buff0.begin(), buff1.begin(), N / M.Dim(1), M.Dim(1) / 2);
       }
-      transpose<ComplexType>(buff1.Begin(), buff0.Begin(), N / howmany / 2, howmany);
+      transpose<ComplexType>(buff1.begin(), buff0.begin(), N / howmany / 2, howmany);
 
-      Matrix<ValueType> vi(N / M.Dim(0), M.Dim(0), buff1.Begin(), false);
-      Matrix<ValueType> vo(N / M.Dim(0), M.Dim(1), out.Begin(), false);
+      Matrix<ValueType> vi(N / M.Dim(0), M.Dim(0), buff1.begin(), false);
+      Matrix<ValueType> vo(N / M.Dim(0), M.Dim(1), out.begin(), false);
       Matrix<ValueType>::GEMM(vo, vi, M);
     } else {
-      memcopy(buff0.Begin(), in.Begin(), in.Dim());
+      memcopy(buff0.begin(), in.begin(), in.Dim());
       for (Long i = 0; i < rank; i++) {
         const Matrix<ValueType>& M = plan.M[i];
-        Matrix<ValueType> vi(N / M.Dim(0), M.Dim(0), buff0.Begin(), false);
-        Matrix<ValueType> vo(N / M.Dim(0), M.Dim(1), buff1.Begin(), false);
+        Matrix<ValueType> vi(N / M.Dim(0), M.Dim(0), buff0.begin(), false);
+        Matrix<ValueType> vo(N / M.Dim(0), M.Dim(1), buff1.begin(), false);
         Matrix<ValueType>::GEMM(vo, vi, M);
         N = N * M.Dim(1) / M.Dim(0);
-        transpose<ComplexType>(buff0.Begin(), buff1.Begin(), N / M.Dim(1), M.Dim(1) / 2);
+        transpose<ComplexType>(buff0.begin(), buff1.begin(), N / M.Dim(1), M.Dim(1) / 2);
       }
-      transpose<ComplexType>(out.Begin(), buff0.Begin(), N / howmany / 2, howmany);
+      transpose<ComplexType>(out.begin(), buff0.begin(), N / howmany / 2, howmany);
     }
   }
 
@@ -244,11 +244,12 @@ template <class ValueType> class FFT {
     ValueType s = 1.0 / sqrt<ValueType>(N0);
     Long N1 = (N0 / 2 + 1);
     Matrix<ValueType> M(2 * N1, N0);
-    for (Long i = 0; i < N1; i++)
+    for (Long i = 0; i < N1; i++) {
       for (Long j = 0; j < N0; j++) {
         M[2 * i + 0][j] = 2 * cos<ValueType>(j * i * (1.0 / N0) * 2.0 * const_pi<ValueType>())*s;
         M[2 * i + 1][j] = 2 * sin<ValueType>(j * i * (1.0 / N0) * 2.0 * const_pi<ValueType>())*s;
       }
+    }
     if (N1 > 0) {
       for (Long j = 0; j < N0; j++) {
         M[0][j] = M[0][j] * 0.5;

+ 7 - 3
include/sctl/matrix.hpp

@@ -35,9 +35,13 @@ template <class ValueType> class Matrix {
 
   void SetZero();
 
-  Iterator<ValueType> Begin();
+  Iterator<ValueType> begin();
 
-  ConstIterator<ValueType> Begin() const;
+  ConstIterator<ValueType> begin() const;
+
+  Iterator<ValueType> end();
+
+  ConstIterator<ValueType> end() const;
 
   // Matrix-Matrix operations
 
@@ -114,7 +118,7 @@ template <class ValueType> class Matrix {
 template <class ValueType> std::ostream& operator<<(std::ostream& output, const Matrix<ValueType>& M);
 
 template <class ValueType> Matrix<ValueType> operator+(ValueType s, const Matrix<ValueType>& M) { return M + s; }
-template <class ValueType> Matrix<ValueType> operator-(ValueType s, const Matrix<ValueType>& M) { return s + (-1.0 * M); }
+template <class ValueType> Matrix<ValueType> operator-(ValueType s, const Matrix<ValueType>& M) { return s + (M * -1.0); }
 template <class ValueType> Matrix<ValueType> operator*(ValueType s, const Matrix<ValueType>& M) { return M * s; }
 
 /**

+ 15 - 11
include/sctl/matrix.txx

@@ -138,9 +138,13 @@ template <class ValueType> void Matrix<ValueType>::SetZero() {
   if (dim[0] * dim[1]) memset(data_ptr, 0, dim[0] * dim[1]);
 }
 
-template <class ValueType> Iterator<ValueType> Matrix<ValueType>::Begin() { return data_ptr; }
+template <class ValueType> Iterator<ValueType> Matrix<ValueType>::begin() { return data_ptr; }
 
-template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::Begin() const { return data_ptr; }
+template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::begin() const { return data_ptr; }
+
+template <class ValueType> Iterator<ValueType> Matrix<ValueType>::end() { return data_ptr + dim[0] * dim[1]; }
+
+template <class ValueType> ConstIterator<ValueType> Matrix<ValueType>::end() const { return data_ptr + dim[0] * dim[1]; }
 
 // Matrix-Matrix operations
 
@@ -248,16 +252,16 @@ template <class ValueType> void Matrix<ValueType>::GEMM(Matrix<ValueType>& M_r,
 
   if (beta == 0) {
     for (Long i = 0; i < d0; i++) {
-      ConstIterator<Long> perm_ = P.perm.Begin();
-      ConstIterator<ValueType> scal_ = P.scal.Begin();
+      ConstIterator<Long> perm_ = P.perm.begin();
+      ConstIterator<ValueType> scal_ = P.scal.begin();
       ConstIterator<ValueType> M_ = M[i];
       Iterator<ValueType> M_r_ = M_r[i];
       for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j];
     }
   } else {
     for (Long i = 0; i < d0; i++) {
-      ConstIterator<Long> perm_ = P.perm.Begin();
-      ConstIterator<ValueType> scal_ = P.scal.Begin();
+      ConstIterator<Long> perm_ = P.perm.begin();
+      ConstIterator<ValueType> scal_ = P.scal.begin();
       ConstIterator<ValueType> M_ = M[i];
       Iterator<ValueType> M_r_ = M_r[i];
       for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j] + M_r_[j] * beta;
@@ -413,8 +417,8 @@ template <class ValueType> void Matrix<ValueType>::ColPerm(const Permutation<Val
   Integer omp_p = omp_get_max_threads();
   Matrix<ValueType> M_buff(omp_p, d1);
 
-  ConstIterator<Long> perm_ = P.perm.Begin();
-  ConstIterator<ValueType> scal_ = P.scal.Begin();
+  ConstIterator<Long> perm_ = P.perm.begin();
+  ConstIterator<ValueType> scal_ = P.scal.begin();
 #pragma omp parallel for schedule(static)
   for (Long i = 0; i < d0; i++) {
     Integer pid = omp_get_thread_num();
@@ -518,7 +522,7 @@ template <class ValueType> void Matrix<ValueType>::SVD(Matrix<ValueType>& tU, Ma
   wssize = (wssize > wssize1 ? wssize : wssize1);
 
   Iterator<ValueType> wsbuf = aligned_new<ValueType>(wssize);
-  mat::svd(&JOBU, &JOBVT, &m, &n, M.Begin(), &m, tS.Begin(), tVT.Begin(), &m, tU.Begin(), &k, wsbuf, &wssize, &INFO);
+  mat::svd(&JOBU, &JOBVT, &m, &n, M.begin(), &m, tS.begin(), tVT.begin(), &m, tU.begin(), &k, wsbuf, &wssize, &INFO);
   aligned_delete<ValueType>(wsbuf);
 
   if (INFO != 0) std::cout << INFO << '\n';
@@ -659,8 +663,8 @@ template <class ValueType> Matrix<ValueType> operator*(const Matrix<ValueType>&
 
   Matrix<ValueType> M_r(d0, d1, NULL);
   for (Long i = 0; i < d0; i++) {
-    ConstIterator<Long> perm_ = P.perm.Begin();
-    ConstIterator<ValueType> scal_ = P.scal.Begin();
+    ConstIterator<Long> perm_ = P.perm.begin();
+    ConstIterator<ValueType> scal_ = P.scal.begin();
     ConstIterator<ValueType> M_ = M[i];
     Iterator<ValueType> M_r_ = M_r[i];
     for (Long j = 0; j < d1; j++) M_r_[j] = M_[perm_[j]] * scal_[j];

+ 3 - 3
include/sctl/ompUtils.txx

@@ -61,12 +61,12 @@ template <class ConstIter, class Iter, class Int, class StrictWeakOrdering> inli
   for (int i = 1; i < p; i++) {
     _DiffType req_size = (i * (N1 + N2)) / p;
 
-    int j = std::lower_bound(split_size.Begin(), split_size.Begin() + p * n, req_size, std::less<_DiffType>()) - split_size.Begin();
+    int j = std::lower_bound(split_size.begin(), split_size.begin() + p * n, req_size, std::less<_DiffType>()) - split_size.begin();
     if (j >= p * n) j = p * n - 1;
     _ValType split1 = split[j];
     _DiffType split_size1 = split_size[j];
 
-    j = (std::lower_bound(split_size.Begin() + p * n, split_size.Begin() + p * n * 2, req_size, std::less<_DiffType>()) - split_size.Begin() + p * n) + p * n;
+    j = (std::lower_bound(split_size.begin() + p * n, split_size.begin() + p * n * 2, req_size, std::less<_DiffType>()) - split_size.begin() + p * n) + p * n;
     if (j >= 2 * p * n) j = 2 * p * n - 1;
     if (abs(split_size[j] - req_size) < abs(split_size1 - req_size)) {
       split1 = split[j];
@@ -117,7 +117,7 @@ template <class T, class StrictWeakOrdering> inline void omp_par::merge_sort(T A
   Vector<_ValType> B;
   B.ReInit(N);
   Iterator<_ValType> A_ = Ptr2Itr<_ValType>(&A[0], N);
-  Iterator<_ValType> B_ = B.Begin();
+  Iterator<_ValType> B_ = B.begin();
   for (int j = 1; j < p; j = j * 2) {
     for (int i = 0; i < p; i = i + 2 * j) {
       if (i + j < p) {

+ 14 - 11
include/sctl/vector.hpp

@@ -12,7 +12,6 @@ namespace SCTL_NAMESPACE {
 template <class ValueType> class Vector {
 
  public:
-  typedef ValueType ValType;
 
   Vector();
 
@@ -38,9 +37,13 @@ template <class ValueType> class Vector {
 
   void SetZero();
 
-  Iterator<ValueType> Begin();
+  Iterator<ValueType> begin();
 
-  ConstIterator<ValueType> Begin() const;
+  ConstIterator<ValueType> begin() const;
+
+  Iterator<ValueType> end();
+
+  ConstIterator<ValueType> end() const;
 
   void PushBack(const ValueType& x);
 
@@ -64,13 +67,13 @@ template <class ValueType> class Vector {
 
   Vector& operator/=(const Vector& V);
 
-  Vector operator+(const Vector& V) const ;
+  Vector operator+(const Vector& V) const;
 
-  Vector operator-(const Vector& V) const ;
+  Vector operator-(const Vector& V) const;
 
-  Vector operator*(const Vector& V) const ;
+  Vector operator*(const Vector& V) const;
 
-  Vector operator/(const Vector& V) const ;
+  Vector operator/(const Vector& V) const;
 
   // Vector-Scalar operations
 
@@ -84,13 +87,13 @@ template <class ValueType> class Vector {
 
   Vector& operator/=(ValueType s);
 
-  Vector operator+(ValueType s) const ;
+  Vector operator+(ValueType s) const;
 
-  Vector operator-(ValueType s) const ;
+  Vector operator-(ValueType s) const;
 
-  Vector operator*(ValueType s) const ;
+  Vector operator*(ValueType s) const;
 
-  Vector operator/(ValueType s) const ;
+  Vector operator/(ValueType s) const;
 
  private:
   Long dim;

+ 6 - 2
include/sctl/vector.txx

@@ -130,9 +130,13 @@ template <class ValueType> void Vector<ValueType>::SetZero() {
   if (dim > 0) memset<ValueType>(data_ptr, 0, dim);
 }
 
-template <class ValueType> Iterator<ValueType> Vector<ValueType>::Begin() { return data_ptr; }
+template <class ValueType> Iterator<ValueType> Vector<ValueType>::begin() { return data_ptr; }
 
-template <class ValueType> ConstIterator<ValueType> Vector<ValueType>::Begin() const { return ConstIterator<ValueType>(data_ptr); }
+template <class ValueType> ConstIterator<ValueType> Vector<ValueType>::begin() const { return data_ptr; }
+
+template <class ValueType> Iterator<ValueType> Vector<ValueType>::end() { return data_ptr + dim; }
+
+template <class ValueType> ConstIterator<ValueType> Vector<ValueType>::end() const { return data_ptr + dim; }
 
 template <class ValueType> void Vector<ValueType>::PushBack(const ValueType& x) {
   if (capacity > dim) {