|
@@ -5,11 +5,12 @@
|
|
#include <omp.h>
|
|
#include <omp.h>
|
|
|
|
|
|
#define CPU_clockrate 3.3 // GHz
|
|
#define CPU_clockrate 3.3 // GHz
|
|
|
|
+constexpr int VecLen = sctl::DefaultVecLen<double>();
|
|
|
|
|
|
-template <class Type, int K> void test_add() {
|
|
|
|
|
|
+template <class Type, int K> void test_add() { // add K elements of Type
|
|
Type x[K], one = 1.0;
|
|
Type x[K], one = 1.0;
|
|
for (long k = 0; k < K; k++)
|
|
for (long k = 0; k < K; k++)
|
|
- x[k] = 3.14 + k;
|
|
|
|
|
|
+ x[k] = 3.14 + k; // initialize x[k]
|
|
|
|
|
|
double T = -omp_get_wtime();
|
|
double T = -omp_get_wtime();
|
|
for (long i = 0; i < 1000000000L; i++)
|
|
for (long i = 0; i < 1000000000L; i++)
|
|
@@ -20,17 +21,16 @@ template <class Type, int K> void test_add() {
|
|
std::cout<<"cycles/iter = "<< CPU_clockrate*T <<'\n';
|
|
std::cout<<"cycles/iter = "<< CPU_clockrate*T <<'\n';
|
|
|
|
|
|
// print the result otherwise the
|
|
// print the result otherwise the
|
|
- // compiler optimize out everything
|
|
|
|
|
|
+ // compiler skips everything
|
|
Type sum = 0.;
|
|
Type sum = 0.;
|
|
- for (long k = 0; k < K; k++)
|
|
|
|
- sum += x[k];
|
|
|
|
|
|
+ for (long k = 0; k < K; k++) sum += x[k];
|
|
std::cout<<"Result = "<<sum<<'\n';
|
|
std::cout<<"Result = "<<sum<<'\n';
|
|
}
|
|
}
|
|
|
|
|
|
-template <class Type, int K> void test_division() {
|
|
|
|
|
|
+template <class Type, int K> void test_division() { // divide K elements of Type
|
|
Type x[K], one = 1.0;
|
|
Type x[K], one = 1.0;
|
|
for (long k = 0; k < K; k++)
|
|
for (long k = 0; k < K; k++)
|
|
- x[k] = 3.14 + k;
|
|
|
|
|
|
+ x[k] = 3.14 + k; // initialize x[k]
|
|
|
|
|
|
double T = -omp_get_wtime();
|
|
double T = -omp_get_wtime();
|
|
for (long i = 0; i < 1000000000L; i++)
|
|
for (long i = 0; i < 1000000000L; i++)
|
|
@@ -41,10 +41,9 @@ template <class Type, int K> void test_division() {
|
|
std::cout<<"cycles/iter = "<< CPU_clockrate*T <<'\n';
|
|
std::cout<<"cycles/iter = "<< CPU_clockrate*T <<'\n';
|
|
|
|
|
|
// print the result otherwise the
|
|
// print the result otherwise the
|
|
- // compiler optimize out everything
|
|
|
|
|
|
+ // compiler skips everything
|
|
Type sum = 0.;
|
|
Type sum = 0.;
|
|
- for (long k = 0; k < K; k++)
|
|
|
|
- sum += x[k];
|
|
|
|
|
|
+ for (long k = 0; k < K; k++) sum += x[k];
|
|
std::cout<<"Result = "<<sum<<'\n';
|
|
std::cout<<"Result = "<<sum<<'\n';
|
|
}
|
|
}
|
|
|
|
|
|
@@ -58,11 +57,11 @@ int main(int argc, char** argv) {
|
|
std::cout<<"\n\nAdding 32 doubles at a time:\n";
|
|
std::cout<<"\n\nAdding 32 doubles at a time:\n";
|
|
test_add<double, 32>();
|
|
test_add<double, 32>();
|
|
|
|
|
|
- std::cout<<"\n\nAdding 8 Vec<doubles,8> at a time:\n";
|
|
|
|
- test_add<sctl::Vec<double,8>, 8>();
|
|
|
|
|
|
+ std::cout<<"\n\nAdding 8 Vec<doubles,"<<VecLen<<"> at a time:\n";
|
|
|
|
+ test_add<sctl::Vec<double,VecLen>, 8>();
|
|
|
|
|
|
- std::cout<<"\n\nDividing 8 Vec<doubles,8> at a time:\n";
|
|
|
|
- test_division<sctl::Vec<double,8>, 8>();
|
|
|
|
|
|
+ std::cout<<"\n\nDividing 8 Vec<doubles,"<<VecLen<<"> at a time:\n";
|
|
|
|
+ test_division<sctl::Vec<double,8>,VecLen>();
|
|
|
|
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|