#include #include #include namespace pvfmm{ template void omp_par::merge(T A_,T A_last,T B_,T B_last,T C_,int p,StrictWeakOrdering comp){ typedef typename std::iterator_traits::difference_type _DiffType; typedef typename std::iterator_traits::value_type _ValType; _DiffType N1=A_last-A_; _DiffType N2=B_last-B_; if(N1==0 && N2==0) return; if(N1==0 || N2==0){ _ValType* A=(N1==0? &B_[0]: &A_[0]); _DiffType N=(N1==0? N2 : N1 ); #pragma omp parallel for for(int i=0;i())-&split_size[0]; if(j>=p*n) j=p*n-1; _ValType split1 =split [j]; _DiffType split_size1=split_size[j]; j=(std::lower_bound(&split_size[p*n],&split_size[p*n*2],req_size,std::less<_DiffType>())-&split_size[p*n])+p*n; if(j>=2*p*n) j=2*p*n-1; if(abs(split_size[j]-req_size) void omp_par::merge_sort(T A,T A_last,StrictWeakOrdering comp){ typedef typename std::iterator_traits::difference_type _DiffType; typedef typename std::iterator_traits::value_type _ValType; int p=omp_get_max_threads(); _DiffType N=A_last-A; if(N<2*p){ std::sort(A,A_last,comp); return; } //Split the array A into p equal parts. _DiffType* split=new _DiffType[p+1]; split[p]=N; #pragma omp parallel for for(int id=0;id void omp_par::merge_sort(T A,T A_last){ typedef typename std::iterator_traits::value_type _ValType; omp_par::merge_sort(A,A_last,std::less<_ValType>()); } template T omp_par::reduce(T* A, I cnt){ T sum=0; #pragma omp parallel for reduction(+:sum) for(I i = 0; i < cnt; i++) sum+=A[i]; return sum; } template void omp_par::scan(T* A, T* B,I cnt){ int p=omp_get_max_threads(); if(cnt<(I)100*p){ for(I i=1;i