instruction.cpp 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. // example codes showing instruction latency and throughput
  2. #include <iostream>
  3. #include <sctl.hpp>
  4. #include <omp.h>
  5. #define CPU_clockrate 3.3 // GHz
  6. template <class Type, int K> void test_add() {
  7. Type x[K], one = 1.0;
  8. for (long k = 0; k < K; k++)
  9. x[k] = 3.14 + k;
  10. double T = -omp_get_wtime();
  11. for (long i = 0; i < 1000000000L; i++)
  12. for (long k = 0; k < K; k++)
  13. x[k] = one + x[k];
  14. T += omp_get_wtime();
  15. std::cout<<"T = "<< T <<'\n';
  16. std::cout<<"cycles/iter = "<< CPU_clockrate*T <<'\n';
  17. // print the result otherwise the
  18. // compiler optimize out everything
  19. Type sum = 0.;
  20. for (long k = 0; k < K; k++)
  21. sum += x[k];
  22. std::cout<<"Result = "<<sum<<'\n';
  23. }
  24. template <class Type, int K> void test_division() {
  25. Type x[K], one = 1.0;
  26. for (long k = 0; k < K; k++)
  27. x[k] = 3.14 + k;
  28. double T = -omp_get_wtime();
  29. for (long i = 0; i < 1000000000L; i++)
  30. for (long k = 0; k < K; k++)
  31. x[k] = one / x[k];
  32. T += omp_get_wtime();
  33. std::cout<<"T = "<< T <<'\n';
  34. std::cout<<"cycles/iter = "<< CPU_clockrate*T <<'\n';
  35. // print the result otherwise the
  36. // compiler optimize out everything
  37. Type sum = 0.;
  38. for (long k = 0; k < K; k++)
  39. sum += x[k];
  40. std::cout<<"Result = "<<sum<<'\n';
  41. }
  42. int main(int argc, char** argv) {
  43. std::cout<<"\n\nCPU clockrate = "<<CPU_clockrate<<"\n";
  44. std::cout<<"\n\nAdding one doubles at a time:\n";
  45. test_add<double, 1>();
  46. std::cout<<"\n\nAdding 32 doubles at a time:\n";
  47. test_add<double, 32>();
  48. std::cout<<"\n\nAdding 8 Vec<doubles,8> at a time:\n";
  49. test_add<sctl::Vec<double,8>, 8>();
  50. std::cout<<"\n\nDividing 8 Vec<doubles,8> at a time:\n";
  51. test_division<sctl::Vec<double,8>, 8>();
  52. return 0;
  53. }