Skip to content
Snippets Groups Projects
Commit 27ed452f authored by Mactavish's avatar Mactavish
Browse files

add solutions

parent 2f1289a7
No related branches found
No related tags found
No related merge requests found
# copy to make.def # copy to make.def
UNAME := $(shell uname) UNAME := $(shell uname)
VPATH += solutions
CC = gcc CC = gcc
CFLAGS = -fopenmp CFLAGS = -fopenmp
LIBS = LIBS =
BINS = hello pi #pi_spmd_simple_soln pi_spmd_final_soln pi_loop_soln BINS = hello pi pi_spmd_simple pi_spmd_optimized pi_spmd_atomic pi_loop
ifeq ($(UNAME), Darwin) ifeq ($(UNAME), Darwin)
CFLAGS = -Xpreprocessor -fopenmp CFLAGS = -Xpreprocessor -fopenmp
...@@ -20,7 +21,11 @@ $(BINS): %: %.o ...@@ -20,7 +21,11 @@ $(BINS): %: %.o
test: $(BINS) test: $(BINS)
./hello ./hello
./pi ./pi
./pi_spmd_simple
./pi_spmd_optimized
./pi_spmd_atomic
./pi_loop
clean: clean:
$(RM) $(BINS) *.o $(RM) $(BINS) *.o
/*
This program will numerically compute the integral of
4/(1+x*x)
from 0 to 1. The value of this integral is pi -- which
is great since it gives us an easy way to check the answer.
The program was parallelized using OpenMP by adding just
four lines
(1) A line to include omp.h -- the include file that
contains OpenMP's function prototypes and constants.
(2) A pragma that tells OpenMP to create a team of threads
(3) A pragma to cause one of the threads to print the
number of threads being used by the program.
(4) A pragma to split up loop iterations among the team
of threads. This pragma includes 2 clauses to (1) create a
private variable and (2) to cause the threads to compute their
sums locally and then combine their local sums into a
single global value.
History: Written by Tim Mattson, 11/99.
*/
#include <omp.h>
#include <stdio.h>
#define MAX_THREADS 8
static long num_steps = 100000000;
double step;
int main() {
int i;
double pi, sum = 0.0;
double start_time, run_time;
step = 1.0 / (double)num_steps;
for (i = 1; i <= MAX_THREADS; i++) {
sum = 0.0;
omp_set_num_threads(i);
start_time = omp_get_wtime();
#pragma omp parallel
{
double x;
#pragma omp single
printf(" num_threads = %d", omp_get_num_threads());
#pragma omp for reduction(+:sum)
for (i = 1; i <= num_steps; i++) {
x = (i - 0.5) * step;
sum = sum + 4.0 / (1.0 + x * x);
}
}
pi = step * sum;
run_time = omp_get_wtime() - start_time;
printf("\n pi is %f in %f seconds and %d threads\n", pi, run_time, i);
}
}
/*
NAME: PI SPMD final version without false sharing
This program will numerically compute the integral of
4/(1+x*x)
from 0 to 1. The value of this integral is pi -- which
is great since it gives us an easy way to check the answer.
The program was parallelized using OpenMP and an SPMD
algorithm. The following OpenMP specific lines were
added:
(1) A line to include omp.h -- the include file that
contains OpenMP's function prototypes and constants.
(2) A pragma that tells OpenMP to create a team of threads
with an integer variable i being created for each thread.
(3) two function calls: one to get the thread ID (ranging
from 0 to one less than the number of threads), and the other
returning the total number of threads.
(4) A "single" construct so only one thread prints the number
of threads.
(5) A cyclic distribution of the loop by changing loop control
expressions to run from the thread ID incremented by the number
of threads. Local sums accumlated into sum[id].
(6) A barrier to make sure everyone's done.
(7) A single construct so only one thread combines the local
sums into a single global sum.
Note that this program avoids the false sharing problem
by storing partial sums into a private scalar.
History: Written by Tim Mattson, 11/99.
*/
#include <omp.h>
#include <stdio.h>
#define MAX_THREADS 8
static long num_steps = 100000000;
double step;
int main() {
int i, j;
double pi, full_sum = 0.0;
double start_time, run_time;
double sum[MAX_THREADS];
step = 1.0 / (double)num_steps;
for (j = 1; j <= MAX_THREADS; j++) {
omp_set_num_threads(j);
full_sum = 0.0;
start_time = omp_get_wtime();
#pragma omp parallel private(i)
{
int id = omp_get_thread_num();
int numthreads = omp_get_num_threads();
double x;
double partial_sum = 0;
#pragma omp single
printf(" num_threads = %d", numthreads);
for (i = id; i < num_steps; i += numthreads) {
x = (i + 0.5) * step;
partial_sum += +4.0 / (1.0 + x * x);
}
#pragma omp atomic
full_sum += partial_sum;
}
pi = step * full_sum;
run_time = omp_get_wtime() - start_time;
printf("\n pi is %f in %f seconds %d threads \n", pi, run_time, j);
}
}
/*
NAME: PI SPMD ... a simple version.
This program will numerically compute the integral of
4/(1+x*x)
from 0 to 1. The value of this integral is pi -- which
is great since it gives us an easy way to check the answer.
The program was parallelized using OpenMP and an SPMD
algorithm. The following OpenMP specific lines were
added:
(1) A line to include omp.h -- the include file that
contains OpenMP's function prototypes and constants.
(2) A pragma that tells OpenMP to create a team of threads
with an integer variable i being created for each thread.
(3) two function calls: one to get the thread ID (ranging
from 0 to one less than the number of threads), and the other
returning the total number of threads.
(4) A cyclic distribution of the loop by changing loop control
expressions to run from the thread ID incremented by the number
of threads. Local sums accumlated into sum[id].
Note that this program will show low performance due to
false sharing. In particular, sum[id] is unique to each
thread, but adfacent values of this array share a cache line
causing cache thrashing as the program runs.
History: Written by Tim Mattson, 11/99.
*/
#include <omp.h>
#include <stdio.h>
#define MAX_THREADS 8
#define PAD 8 // assume 64 byte L1 cache line size
static long num_steps = 100000000;
double step;
int main() {
int i, j;
double pi, full_sum = 0.0;
double start_time, run_time;
double sum[MAX_THREADS][PAD];
step = 1.0 / (double)num_steps;
// run the test for 1 to max_threads
for (j = 1; j <= MAX_THREADS; j++) {
omp_set_num_threads(j);
full_sum = 0.0;
start_time = omp_get_wtime();
#pragma omp parallel
{
int i;
int id = omp_get_thread_num();
int numthreads = omp_get_num_threads();
double x;
sum[id][0] = 0.0;
if (id == 0)
printf(" num_threads = %d", numthreads);
for (i = id; i < num_steps; i += numthreads) {
x = (i + 0.5) * step;
sum[id][0] = sum[id][0] + 4.0 / (1.0 + x * x);
}
}
for (full_sum = 0.0, i = 0; i < j; i++)
full_sum += sum[i][0];
pi = step * full_sum;
run_time = omp_get_wtime() - start_time;
printf("\n pi is %f in %f seconds %d threads \n", pi, run_time, j);
}
}
/*
NAME: PI SPMD ... a simple version.
This program will numerically compute the integral of
4/(1+x*x)
from 0 to 1. The value of this integral is pi -- which
is great since it gives us an easy way to check the answer.
The program was parallelized using OpenMP and an SPMD
algorithm. The following OpenMP specific lines were
added:
(1) A line to include omp.h -- the include file that
contains OpenMP's function prototypes and constants.
(2) A pragma that tells OpenMP to create a team of threads
with an integer variable i being created for each thread.
(3) two function calls: one to get the thread ID (ranging
from 0 to one less than the number of threads), and the other
returning the total number of threads.
(4) A cyclic distribution of the loop by changing loop control
expressions to run from the thread ID incremented by the number
of threads. Local sums accumlated into sum[id].
Note that this program will show low performance due to
false sharing. In particular, sum[id] is unique to each
thread, but adfacent values of this array share a cache line
causing cache thrashing as the program runs.
History: Written by Tim Mattson, 11/99.
*/
#include <omp.h>
#include <stdio.h>
#define MAX_THREADS 8
static long num_steps = 100000000;
double step;
int main() {
int i, j;
double pi, full_sum = 0.0;
double start_time, run_time;
double sum[MAX_THREADS];
step = 1.0 / (double)num_steps;
// run the test for 1 to max_threads
for (j = 1; j <= MAX_THREADS; j++) {
omp_set_num_threads(j);
full_sum = 0.0;
start_time = omp_get_wtime();
#pragma omp parallel
{
int i;
int id = omp_get_thread_num();
int numthreads = omp_get_num_threads();
double x;
sum[id] = 0.0;
if (id == 0)
printf(" num_threads = %d", numthreads);
for (i = id; i < num_steps; i += numthreads) {
x = (i + 0.5) * step;
sum[id] = sum[id] + 4.0 / (1.0 + x * x);
}
}
for (full_sum = 0.0, i = 0; i < j; i++)
full_sum += sum[i];
pi = step * full_sum;
run_time = omp_get_wtime() - start_time;
printf("\n pi is %f in %f seconds %d threads \n", pi, run_time, j);
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment