Dhairya Malhotra 2 년 전
부모
커밋
6d44a4ed57
3개의 변경된 파일143개의 추가작업 그리고 2개의 파일을 삭제
  1. 2 2
      code/Makefile
  2. 72 0
      code/src/instruction.cpp
  3. 69 0
      code/src/poly-eval.cpp

+ 2 - 2
code/Makefile

@@ -17,14 +17,14 @@ OBJDIR = ./obj
 INCDIR = ./SCTL/include
 
 TARGET_BIN = \
-       $(BINDIR)/ilp
+       $(BINDIR)/ilp \
+       $(BINDIR)/poly-eval
 
 all : $(TARGET_BIN)
 
 $(BINDIR)/%: $(OBJDIR)/%.o
 	-@$(MKDIRS) $(dir $@)
 	$(CXX) $^ $(CXXFLAGS) $(LDLIBS) -o $@
-	./$@
 
 $(OBJDIR)/%.o: $(SRCDIR)/%.cpp
 	-@$(MKDIRS) $(dir $@)

+ 72 - 0
code/src/instruction.cpp

@@ -0,0 +1,72 @@
+// example codes showing instruction latency and throughput
+
+#include <iostream>
+#include <sctl.hpp>
+#include <omp.h>
+
+#define CPU_clockrate 3.3 // GHz
+
+template <class Type, int K> void test_add() {
+  Type x[K], one = 1.0;
+  for (long k = 0; k < K; k++)
+    x[k] = 3.14 + k;
+
+  double T = -omp_get_wtime();
+  for (long i = 0; i < 1000000000L; i++)
+    for (long k = 0; k < K; k++)
+      x[k] = one + x[k];
+  T += omp_get_wtime();
+  std::cout<<"T = "<< T <<'\n';
+  std::cout<<"cycles/iter = "<< CPU_clockrate*T <<'\n';
+
+  // print the result otherwise the
+  // compiler optimize out everything
+  Type sum = 0.;
+  for (long k = 0; k < K; k++)
+    sum += x[k];
+  std::cout<<"Result = "<<sum<<'\n';
+}
+
+template <class Type, int K> void test_division() {
+  Type x[K], one = 1.0;
+  for (long k = 0; k < K; k++)
+    x[k] = 3.14 + k;
+
+  double T = -omp_get_wtime();
+  for (long i = 0; i < 1000000000L; i++)
+    for (long k = 0; k < K; k++)
+      x[k] = one / x[k];
+  T += omp_get_wtime();
+  std::cout<<"T = "<< T <<'\n';
+  std::cout<<"cycles/iter = "<< CPU_clockrate*T <<'\n';
+
+  // print the result otherwise the
+  // compiler optimize out everything
+  Type sum = 0.;
+  for (long k = 0; k < K; k++)
+    sum += x[k];
+  std::cout<<"Result = "<<sum<<'\n';
+}
+
+int main(int argc, char** argv) {
+
+  std::cout<<"\n\nCPU clockrate = "<<CPU_clockrate<<"\n";
+
+  std::cout<<"\n\nAdding one doubles at a time:\n";
+  test_add<double, 1>();
+
+  std::cout<<"\n\nAdding 32 doubles at a time:\n";
+  test_add<double, 32>();
+
+  std::cout<<"\n\nAdding 8 Vec<doubles,8> at a time:\n";
+  test_add<sctl::Vec<double,8>, 8>();
+
+  std::cout<<"\n\nDividing 8 Vec<doubles,8> at a time:\n";
+  test_division<sctl::Vec<double,8>, 8>();
+
+  return 0;
+}
+
+
+
+

+ 69 - 0
code/src/poly-eval.cpp

@@ -0,0 +1,69 @@
+// example code showing effect of pipelining in evaluating polynomial
+
+#include <iostream>
+#include <sctl.hpp>
+#include <omp.h>
+
+#define CPU_clockrate 3.3 // GHz
+
+template <class Type> void test_polynomial() {
+  Type a,b,c,d,e,f,g,h; // coefficients
+  a =  2.3515e-07;
+  b =  9.8697e-04;
+  c = -1.8656e-02;
+  d =  1.0716e-01;
+  e = -1.1821e-01;
+  f = -3.9467e-01;
+  g = -3.8480e-02;
+  h =  1.0033e+00;
+  Type x = drand48();
+
+  std::cout<<"\n\nEvaluating polynomials using Horner's rule:\n";
+  double T = -omp_get_wtime();
+  for (long i = 0; i < 1000000000L; i++) {
+    x = (((((a*x+b)*x+c)*x+d)*x+e)*x+f*x+g)*x+h;
+  }
+  T += omp_get_wtime();
+  std::cout<<"T = "<< T <<'\n';
+  std::cout<<"cycles/iter = "<< CPU_clockrate*T <<'\n';
+
+  std::cout<<"\n\nEvaluating polynomials using Estrin's method:\n";
+  T = -omp_get_wtime();
+  for (long i = 0; i < 1000000000L; i++) {
+    Type x2 = x * x;
+    Type x4 = x2 * x2;
+    x = ((a*x+b)*x2+(c*x+d))*x4+(e*x+f)*x2+(g*x+h);
+  }
+  T += omp_get_wtime();
+  std::cout<<"T = "<< T <<'\n';
+  std::cout<<"cycles/iter = "<< CPU_clockrate*T <<'\n';
+
+  std::cout<<"\n\nEvaluating polynomials using Estrin's method (unrolled):\n";
+  T = -omp_get_wtime();
+  for (long i = 0; i < 1000000000L; i++) {
+    Type x2 = x * x;
+    Type x4 = x2 * x2;
+    Type u = a * x + b;
+    Type v = c * x + d;
+    Type w = e * x + f;
+    Type p = g * x + h;
+    Type q = u * x2 + v;
+    Type r = w * x2 + p;
+    x = q * x4 + r;
+  }
+  T += omp_get_wtime();
+  std::cout<<"T = "<< T <<'\n';
+  std::cout<<"cycles/iter = "<< CPU_clockrate*T <<'\n';
+
+  std::cout<<"Result = "<<x<<"\n\n\n";
+}
+
+int main(int argc, char** argv) {
+
+  std::cout<<"\n\nCPU clockrate = "<<CPU_clockrate<<"\n";
+
+  test_polynomial<sctl::Vec<double,8>>();
+
+  return 0;
+}
+