add solutions

27ed452f · Mactavish · 2f1289a7 · 27ed452f · 27ed452f · 27ed452f
Commit 27ed452f authored 2 years ago by Mactavish
--- a/exercises/OpenMP/Makefile
+++ b/exercises/OpenMP/Makefile
 # copy to make.def
 UNAME := $(shell uname)
+VPATH += solutions
 CC          = gcc
 CFLAGS      = -fopenmp
 LIBS        =
-BINS = hello pi #pi_spmd_simple_soln pi_spmd_final_soln pi_loop_soln
+BINS = hello pi pi_spmd_simple pi_spmd_optimized pi_spmd_atomic pi_loop
 ifeq ($(UNAME), Darwin)
 	CFLAGS = -Xpreprocessor -fopenmp
@@ -20,7 +21,11 @@ $(BINS): %: %.o
 test: $(BINS)
 	./hello 
-	./pi 
+	./pi
+	./pi_spmd_simple
+	./pi_spmd_optimized
+	./pi_spmd_atomic
+	./pi_loop
 clean:
 	$(RM) $(BINS) *.o
--- a/exercises/OpenMP/solutions/pi_loop.c
+++ b/exercises/OpenMP/solutions/pi_loop.c
+/*
+This program will numerically compute the integral of
+                  4/(1+x*x)
+from 0 to 1.  The value of this integral is pi -- which
+is great since it gives us an easy way to check the answer.
+The program was parallelized using OpenMP by adding just
+four lines
+(1) A line to include omp.h -- the include file that
+contains OpenMP's function prototypes and constants.
+(2) A pragma that tells OpenMP to create a team of threads
+(3) A pragma to cause one of the threads to print the
+number of threads being used by the program.
+(4) A pragma to split up loop iterations among the team
+of threads.  This pragma includes 2 clauses to (1) create a
+private variable and (2) to cause the threads to compute their
+sums locally and then combine their local sums into a
+single global value.
+History: Written by Tim Mattson, 11/99.
+*/
+#include <omp.h>
+#include <stdio.h>
+#define MAX_THREADS 8
+static long num_steps = 100000000;
+double step;
+int main() {
+  int i;
+  double pi, sum = 0.0;
+  double start_time, run_time;
+  step = 1.0 / (double)num_steps;
+  for (i = 1; i <= MAX_THREADS; i++) {
+    sum = 0.0;
+    omp_set_num_threads(i);
+    start_time = omp_get_wtime();
+    #pragma omp parallel
+    {
+      double x;
+      #pragma omp single
+      printf(" num_threads = %d", omp_get_num_threads());
+      #pragma omp for reduction(+:sum)
+      for (i = 1; i <= num_steps; i++) {
+        x = (i - 0.5) * step;
+        sum = sum + 4.0 / (1.0 + x * x);
+      }
+    }
+    pi = step * sum;
+    run_time = omp_get_wtime() - start_time;
+    printf("\n pi is %f in %f seconds and %d threads\n", pi, run_time, i);
+  }
+}
--- a/exercises/OpenMP/solutions/pi_spmd_atomic.c
+++ b/exercises/OpenMP/solutions/pi_spmd_atomic.c
+/*
+NAME:   PI SPMD final version without false sharing
+This program will numerically compute the integral of
+                  4/(1+x*x)
+from 0 to 1.  The value of this integral is pi -- which
+is great since it gives us an easy way to check the answer.
+The program was parallelized using OpenMP and an SPMD
+algorithm.  The following OpenMP specific lines were
+added:
+(1) A line to include omp.h -- the include file that
+contains OpenMP's function prototypes and constants.
+(2) A pragma that tells OpenMP to create a team of threads
+with an integer variable i being created for each thread.
+(3) two function calls: one to get the thread ID (ranging
+from 0 to one less than the number of threads), and the other
+returning the total number of threads.
+(4) A "single" construct so only one thread prints the number
+of threads.
+(5) A cyclic distribution of the loop by changing loop control
+expressions to run from the thread ID incremented by the number
+of threads.  Local sums accumlated into sum[id].
+(6) A barrier to make sure everyone's done.
+(7) A single construct so only one thread combines the local
+sums into a single global sum.
+Note that this program avoids the false sharing problem
+by storing partial sums into a private scalar.
+History: Written by Tim Mattson, 11/99.
+*/
+#include <omp.h>
+#include <stdio.h>
+#define MAX_THREADS 8
+static long num_steps = 100000000;
+double step;
+int main() {
+  int i, j;
+  double pi, full_sum = 0.0;
+  double start_time, run_time;
+  double sum[MAX_THREADS];
+  step = 1.0 / (double)num_steps;
+  for (j = 1; j <= MAX_THREADS; j++) {
+    omp_set_num_threads(j);
+    full_sum = 0.0;
+    start_time = omp_get_wtime();
+    #pragma omp parallel private(i)
+    {
+      int id = omp_get_thread_num();
+      int numthreads = omp_get_num_threads();
+      double x;
+      double partial_sum = 0;
+      #pragma omp single
+        printf(" num_threads = %d", numthreads);
+      for (i = id; i < num_steps; i += numthreads) {
+        x = (i + 0.5) * step;
+        partial_sum += +4.0 / (1.0 + x * x);
+      }
+      #pragma omp atomic
+      full_sum += partial_sum;
+    }
+    pi = step * full_sum;
+    run_time = omp_get_wtime() - start_time;
+    printf("\n pi is %f in %f seconds %d threads \n", pi, run_time, j);
+  }
+}
--- a/exercises/OpenMP/solutions/pi_spmd_optimized.c
+++ b/exercises/OpenMP/solutions/pi_spmd_optimized.c
+/*
+NAME: PI SPMD ... a simple version.
+This program will numerically compute the integral of
+                  4/(1+x*x)
+from 0 to 1.  The value of this integral is pi -- which
+is great since it gives us an easy way to check the answer.
+The program was parallelized using OpenMP and an SPMD
+algorithm.  The following OpenMP specific lines were
+added:
+(1) A line to include omp.h -- the include file that
+contains OpenMP's function prototypes and constants.
+(2) A pragma that tells OpenMP to create a team of threads
+with an integer variable i being created for each thread.
+(3) two function calls: one to get the thread ID (ranging
+from 0 to one less than the number of threads), and the other
+returning the total number of threads.
+(4) A cyclic distribution of the loop by changing loop control
+expressions to run from the thread ID incremented by the number
+of threads.  Local sums accumlated into sum[id].
+Note that this program will show low performance due to
+false sharing.  In particular, sum[id] is unique to each
+thread, but adfacent values of this array share a cache line
+causing cache thrashing as the program runs.
+History: Written by Tim Mattson, 11/99.
+*/
+#include <omp.h>
+#include <stdio.h>
+#define MAX_THREADS 8
+#define PAD 8 // assume 64 byte L1 cache line size
+static long num_steps = 100000000;
+double step;
+int main() {
+  int i, j;
+  double pi, full_sum = 0.0;
+  double start_time, run_time;
+  double sum[MAX_THREADS][PAD];
+  step = 1.0 / (double)num_steps;
+  // run the test for 1 to max_threads
+  for (j = 1; j <= MAX_THREADS; j++) {
+    omp_set_num_threads(j);
+    full_sum = 0.0;
+    start_time = omp_get_wtime();
+    #pragma omp parallel
+    {
+      int i;
+      int id = omp_get_thread_num();
+      int numthreads = omp_get_num_threads();
+      double x;
+      sum[id][0] = 0.0;
+      if (id == 0)
+        printf(" num_threads = %d", numthreads);
+      for (i = id; i < num_steps; i += numthreads) {
+        x = (i + 0.5) * step;
+        sum[id][0] = sum[id][0] + 4.0 / (1.0 + x * x);
+      }
+    }
+    for (full_sum = 0.0, i = 0; i < j; i++)
+      full_sum += sum[i][0];
+    pi = step * full_sum;
+    run_time = omp_get_wtime() - start_time;
+    printf("\n pi is %f in %f seconds %d threads \n", pi, run_time, j);
+  }
+}
--- a/exercises/OpenMP/solutions/pi_spmd_simple.c
+++ b/exercises/OpenMP/solutions/pi_spmd_simple.c
+/*
+NAME: PI SPMD ... a simple version.
+This program will numerically compute the integral of
+                  4/(1+x*x)
+from 0 to 1.  The value of this integral is pi -- which
+is great since it gives us an easy way to check the answer.
+The program was parallelized using OpenMP and an SPMD
+algorithm.  The following OpenMP specific lines were
+added:
+(1) A line to include omp.h -- the include file that
+contains OpenMP's function prototypes and constants.
+(2) A pragma that tells OpenMP to create a team of threads
+with an integer variable i being created for each thread.
+(3) two function calls: one to get the thread ID (ranging
+from 0 to one less than the number of threads), and the other
+returning the total number of threads.
+(4) A cyclic distribution of the loop by changing loop control
+expressions to run from the thread ID incremented by the number
+of threads.  Local sums accumlated into sum[id].
+Note that this program will show low performance due to
+false sharing.  In particular, sum[id] is unique to each
+thread, but adfacent values of this array share a cache line
+causing cache thrashing as the program runs.
+History: Written by Tim Mattson, 11/99.
+*/
+#include <omp.h>
+#include <stdio.h>
+#define MAX_THREADS 8
+static long num_steps = 100000000;
+double step;
+int main() {
+  int i, j;
+  double pi, full_sum = 0.0;
+  double start_time, run_time;
+  double sum[MAX_THREADS];
+  step = 1.0 / (double)num_steps;
+  // run the test for 1 to max_threads
+  for (j = 1; j <= MAX_THREADS; j++) {
+    omp_set_num_threads(j);
+    full_sum = 0.0;
+    start_time = omp_get_wtime();
+    #pragma omp parallel
+    {
+      int i;
+      int id = omp_get_thread_num();
+      int numthreads = omp_get_num_threads();
+      double x;
+      sum[id] = 0.0;
+      if (id == 0)
+        printf(" num_threads = %d", numthreads);
+      for (i = id; i < num_steps; i += numthreads) {
+        x = (i + 0.5) * step;
+        sum[id] = sum[id] + 4.0 / (1.0 + x * x);
+      }
+    }
+    for (full_sum = 0.0, i = 0; i < j; i++)
+      full_sum += sum[i];
+    pi = step * full_sum;
+    run_time = omp_get_wtime() - start_time;
+    printf("\n pi is %f in %f seconds %d threads \n", pi, run_time, j);
+  }
+}