Added SSE Implementation
This commit is contained in:
parent
ec073db31d
commit
600e5eb149
10
Makefile
10
Makefile
|
@ -6,7 +6,7 @@ SRC=src
|
||||||
BUILD=build
|
BUILD=build
|
||||||
BIN=bin
|
BIN=bin
|
||||||
|
|
||||||
all: sequential mpi_line mpi_line_async omp
|
all: sequential mpi_line mpi_line_async omp sse
|
||||||
|
|
||||||
sequential: config utils main
|
sequential: config utils main
|
||||||
${CC} ${CFLAGS} \
|
${CC} ${CFLAGS} \
|
||||||
|
@ -16,6 +16,14 @@ sequential: config utils main
|
||||||
${SRC}/impl/sequential.c \
|
${SRC}/impl/sequential.c \
|
||||||
-o ${BIN}/jacobi_sequential
|
-o ${BIN}/jacobi_sequential
|
||||||
|
|
||||||
|
sse: config utils main
|
||||||
|
${CC_OMP} ${CFLAGS} \
|
||||||
|
${BUILD}/config.o \
|
||||||
|
${BUILD}/utils.o \
|
||||||
|
${BUILD}/main.o \
|
||||||
|
${SRC}/impl/sse.c \
|
||||||
|
-o ${BIN}/jacobi_sse
|
||||||
|
|
||||||
omp: config utils main
|
omp: config utils main
|
||||||
${CC_OMP} ${CFLAGS} \
|
${CC_OMP} ${CFLAGS} \
|
||||||
${BUILD}/config.o \
|
${BUILD}/config.o \
|
||||||
|
|
|
@ -39,9 +39,9 @@ double *compute_jacobi(int n, double init_value, double threshold, borders b, in
|
||||||
*iterations = 0;
|
*iterations = 0;
|
||||||
do {
|
do {
|
||||||
max_diff = 0;
|
max_diff = 0;
|
||||||
#pragma omp parallel for schedule(static) \
|
#pragma omp parallel for \
|
||||||
reduction (max:max_diff) \
|
reduction (max:max_diff) \
|
||||||
default(none) private(new_value, j) firstprivate(n, nb) shared(x, new_x)
|
default(none) private(new_value, j) shared(x, new_x, n, nb)
|
||||||
for (i = 1; i <= n; i++) {
|
for (i = 1; i <= n; i++) {
|
||||||
for (j = 1; j <= n; j++) {
|
for (j = 1; j <= n; j++) {
|
||||||
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
||||||
|
|
70
src/impl/sse.c
Normal file
70
src/impl/sse.c
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* SSE version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#include "../config.h"
|
||||||
|
#include "../utils.h"
|
||||||
|
|
||||||
|
double *compute_jacobi(int n, double init_value, double threshold, borders b, int *iterations) {
|
||||||
|
double *x;
|
||||||
|
double *new_x;
|
||||||
|
double *tmp_x;
|
||||||
|
double max_diff, new_value;
|
||||||
|
int i, j;
|
||||||
|
int nb = n + 2; // n plus the border
|
||||||
|
int n_mult = (n % 2 == 0) ? n : n - 1;
|
||||||
|
/* Initialize boundary regions */
|
||||||
|
x = create_sa_matrix(nb, nb);
|
||||||
|
new_x = create_sa_matrix(nb, nb);
|
||||||
|
for (i = 0; i < nb; i++) {
|
||||||
|
x[IDX(nb, 0, i)] = b.north;
|
||||||
|
x[IDX(nb, n + 1, i)] = b.south;
|
||||||
|
x[IDX(nb, i, 0)] = b.west;
|
||||||
|
x[IDX(nb, i, n + 1)] = b.east;
|
||||||
|
new_x[IDX(nb, 0, i)] = b.north;
|
||||||
|
new_x[IDX(nb, n + 1, i)] = b.south;
|
||||||
|
new_x[IDX(nb, i, 0)] = b.west;
|
||||||
|
new_x[IDX(nb, i, n + 1)] = b.east;
|
||||||
|
}
|
||||||
|
/* Initialize the rest of the matrix */
|
||||||
|
for (i = 1; i <= n; i++) {
|
||||||
|
for (j = 1; j <= n; j++) {
|
||||||
|
x[IDX(nb, i, j)] = init_value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Iterative refinement of x until values converge */
|
||||||
|
*iterations = 0;
|
||||||
|
do {
|
||||||
|
max_diff = 0;
|
||||||
|
for (i = 1; i <= n; i++) {
|
||||||
|
for (j = 1; j <= n_mult; j += 2) {
|
||||||
|
__m128d new_value_vec, tmp_vec;
|
||||||
|
new_value_vec = _mm_loadu_pd(&x[IDX(nb, i - 1, j)]);
|
||||||
|
tmp_vec = _mm_loadu_pd(&x[IDX(nb, i + 1, j)]);
|
||||||
|
new_value_vec = _mm_add_pd(new_value_vec, tmp_vec);
|
||||||
|
tmp_vec = _mm_loadu_pd(&x[IDX(nb, i, j - 1)]);
|
||||||
|
new_value_vec = _mm_add_pd(new_value_vec, tmp_vec);
|
||||||
|
tmp_vec = _mm_loadu_pd(&x[IDX(nb, i, j + 1)]);
|
||||||
|
new_value_vec = _mm_add_pd(new_value_vec, tmp_vec);
|
||||||
|
tmp_vec = _mm_set1_pd(0.25);
|
||||||
|
new_value_vec = _mm_mul_pd(new_value_vec, tmp_vec);
|
||||||
|
_mm_storeu_pd(&new_x[IDX(nb, i, j)], new_value_vec);
|
||||||
|
max_diff = (double) fmax(max_diff, fabs(new_x[IDX(nb, i, j)] - x[IDX(nb, i, j)]));
|
||||||
|
max_diff = (double) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 1)] - x[IDX(nb, i, j + 1)]));
|
||||||
|
}
|
||||||
|
for (j = n_mult; j <= n; j++) {
|
||||||
|
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
||||||
|
max_diff = (double) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
||||||
|
new_x[IDX(nb, i, j)] = new_value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tmp_x = new_x;
|
||||||
|
new_x = x;
|
||||||
|
x = tmp_x;
|
||||||
|
(*iterations)++;
|
||||||
|
} while (max_diff > threshold);
|
||||||
|
return x;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user