Replaced doubles with floats

This commit is contained in:
Fabio Salvini
2016-12-13 18:43:33 +01:00
parent 600e5eb149
commit ef2e49659d
11 changed files with 123 additions and 121 deletions

View File

@@ -12,12 +12,12 @@
#define TAG_BORDER 0
#define TAG_MATRIX 1
double *compute_jacobi(int rank, int numprocs, int n, double init_value, double threshold, borders b, int *iterations) {
double *complete_x;
double *x;
double *new_x;
double *tmp_x;
double max_diff, global_max_diff, new_value;
float *compute_jacobi(int rank, int numprocs, int n, float init_value, float threshold, borders b, int *iterations) {
float *complete_x;
float *x;
float *new_x;
float *tmp_x;
float max_diff, global_max_diff, new_value;
int i, j;
int nb = n + 2; // n plus the border
int rows, rows_to_transmit;
@@ -67,7 +67,7 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
for (i = 1; i <= rows; i++) {
for (j = 1; j <= n; j++) {
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
max_diff = (double) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
max_diff = (float) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
new_x[IDX(nb, i, j)] = new_value;
}
}
@@ -77,40 +77,40 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
if (rank % 2 == 0) {
if (rank != numprocs - 1) {
// Send and receive south border
MPI_Send(&x[IDX(nb, rows, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD);
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send(&x[IDX(nb, rows, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD);
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
if (rank != 0) {
// Send and receive north border
MPI_Send(&x[IDX(nb, 1, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD);
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send(&x[IDX(nb, 1, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD);
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
} else {
// Receive and send north border
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send(&x[IDX(nb, 1, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD);
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send(&x[IDX(nb, 1, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD);
if (rank != numprocs - 1) {
// Receive and send south border
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send(&x[IDX(nb, rows, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD);
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send(&x[IDX(nb, rows, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD);
}
}
/* LOG(printf("[Process %d/%d] max_diff: %f\n", rank, numprocs, max_diff)); */
MPI_Allreduce(&max_diff, &global_max_diff, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
MPI_Allreduce(&max_diff, &global_max_diff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
/* LOG(printf("[Process %d/%d] global_max_diff: %f\n", rank, numprocs, global_max_diff)); */
(*iterations)++;
} while (global_max_diff > threshold);
if (rank == 0) {
complete_x = create_sa_matrix(nb, nb);
memcpy(complete_x, x, (rows + ((rank == numprocs - 1) ? 2 : 1)) * (nb) * sizeof(double));
memcpy(complete_x, x, (rows + ((rank == numprocs - 1) ? 2 : 1)) * (nb) * sizeof(float));
rows_to_transmit = n / numprocs;
receive_pos = rows + 1;
for (i = 1; i < numprocs; i++) {
if (i == numprocs - 1) {
rows_to_transmit++;
}
MPI_Recv(&complete_x[IDX(nb, receive_pos, 0)], rows_to_transmit * (nb), MPI_DOUBLE, i, TAG_MATRIX, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&complete_x[IDX(nb, receive_pos, 0)], rows_to_transmit * (nb), MPI_FLOAT, i, TAG_MATRIX, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
receive_pos += n / numprocs;
}
} else {
@@ -119,7 +119,7 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
if (rank == numprocs - 1) {
rows_to_transmit++;
}
MPI_Send(&x[IDX(nb, 1, 0)], rows_to_transmit * (nb), MPI_DOUBLE, 0, TAG_MATRIX, MPI_COMM_WORLD);
MPI_Send(&x[IDX(nb, 1, 0)], rows_to_transmit * (nb), MPI_FLOAT, 0, TAG_MATRIX, MPI_COMM_WORLD);
}
return complete_x;

View File

@@ -12,12 +12,12 @@
#define TAG_BORDER 0
#define TAG_MATRIX 1
double *compute_jacobi(int rank, int numprocs, int n, double init_value, double threshold, borders b, int *iterations) {
double *complete_x;
double *x;
double *new_x;
double *tmp_x;
double max_diff, global_max_diff, new_value;
float *compute_jacobi(int rank, int numprocs, int n, float init_value, float threshold, borders b, int *iterations) {
float *complete_x;
float *x;
float *new_x;
float *tmp_x;
float max_diff, global_max_diff, new_value;
int i, j;
int nb = n + 2; // n plus the border
int rows, rows_to_transmit;
@@ -66,18 +66,18 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
do {
if (rank != numprocs - 1) {
// Send south border
MPI_Isend(&x[IDX(nb, rows, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD, &request_south);
MPI_Isend(&x[IDX(nb, rows, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD, &request_south);
}
if (rank != 0) {
// Send north border
MPI_Isend(&x[IDX(nb, 1, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD, &request_north);
MPI_Isend(&x[IDX(nb, 1, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD, &request_north);
}
max_diff = 0;
global_max_diff = 0;
for (i = 1; i <= rows; i++) {
for (j = 1; j <= n; j++) {
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
max_diff = (double) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
max_diff = (float) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
new_x[IDX(nb, i, j)] = new_value;
}
}
@@ -86,28 +86,28 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
x = tmp_x;
if (rank != numprocs - 1) {
// Receive south border
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
if (rank != 0) {
// Receive north border
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
LOG(printf("[Process %d/%d] max_diff: %f\n", rank, numprocs, max_diff));
MPI_Allreduce(&max_diff, &global_max_diff, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
MPI_Allreduce(&max_diff, &global_max_diff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
/* LOG(printf("[Process %d/%d] global_max_diff: %f\n", rank, numprocs, global_max_diff)); */
(*iterations)++;
} while (global_max_diff > threshold);
if (rank == 0) {
complete_x = create_sa_matrix(nb, nb);
memcpy(complete_x, x, (rows + ((rank == numprocs - 1) ? 2 : 1)) * (nb) * sizeof(double));
memcpy(complete_x, x, (rows + ((rank == numprocs - 1) ? 2 : 1)) * (nb) * sizeof(float));
rows_to_transmit = n / numprocs;
receive_pos = rows + 1;
for (i = 1; i < numprocs; i++) {
if (i == numprocs - 1) {
rows_to_transmit++;
}
MPI_Recv(&complete_x[IDX(nb, receive_pos, 0)], rows_to_transmit * (nb), MPI_DOUBLE, i, TAG_MATRIX, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&complete_x[IDX(nb, receive_pos, 0)], rows_to_transmit * (nb), MPI_FLOAT, i, TAG_MATRIX, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
receive_pos += n / numprocs;
}
} else {
@@ -116,7 +116,7 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
if (rank == numprocs - 1) {
rows_to_transmit++;
}
MPI_Send(&x[IDX(nb, 1, 0)], rows_to_transmit * (nb), MPI_DOUBLE, 0, TAG_MATRIX, MPI_COMM_WORLD);
MPI_Send(&x[IDX(nb, 1, 0)], rows_to_transmit * (nb), MPI_FLOAT, 0, TAG_MATRIX, MPI_COMM_WORLD);
}
return complete_x;

View File

@@ -8,11 +8,11 @@
#include "../config.h"
#include "../utils.h"
double *compute_jacobi(int n, double init_value, double threshold, borders b, int *iterations) {
double *x;
double *new_x;
double *tmp_x;
double max_diff, new_value;
float *compute_jacobi(int n, float init_value, float threshold, borders b, int *iterations) {
float *x;
float *new_x;
float *tmp_x;
float max_diff, new_value;
int i, j;
int nb = n + 2; // n plus the border
@@ -45,7 +45,7 @@ double *compute_jacobi(int n, double init_value, double threshold, borders b, in
for (i = 1; i <= n; i++) {
for (j = 1; j <= n; j++) {
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
max_diff = (double) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
max_diff = (float) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
new_x[IDX(nb, i, j)] = new_value;
}
}

View File

@@ -8,11 +8,11 @@
#include "../config.h"
#include "../utils.h"
double *compute_jacobi(int n, double init_value, double threshold, borders b, int *iterations) {
double *x;
double *new_x;
double *tmp_x;
double max_diff, new_value;
float *compute_jacobi(int n, float init_value, float threshold, borders b, int *iterations) {
float *x;
float *new_x;
float *tmp_x;
float max_diff, new_value;
int i, j;
int nb = n + 2; // n plus the border
@@ -42,7 +42,7 @@ double *compute_jacobi(int n, double init_value, double threshold, borders b, in
for (i = 1; i <= n; i++) {
for (j = 1; j <= n; j++) {
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
max_diff = (double) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
max_diff = (float) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
new_x[IDX(nb, i, j)] = new_value;
}
}

View File

@@ -8,14 +8,15 @@
#include "../config.h"
#include "../utils.h"
double *compute_jacobi(int n, double init_value, double threshold, borders b, int *iterations) {
double *x;
double *new_x;
double *tmp_x;
double max_diff, new_value;
float *compute_jacobi(int n, float init_value, float threshold, borders b, int *iterations) {
float *x;
float *new_x;
float *tmp_x;
float max_diff, new_value;
int i, j;
int nb = n + 2; // n plus the border
int n_mult = (n % 2 == 0) ? n : n - 1;
int n_mult = (n / 4) * 4;
__m128 new_value_vec, tmp_vec;
/* Initialize boundary regions */
x = create_sa_matrix(nb, nb);
new_x = create_sa_matrix(nb, nb);
@@ -40,24 +41,25 @@ double *compute_jacobi(int n, double init_value, double threshold, borders b, in
do {
max_diff = 0;
for (i = 1; i <= n; i++) {
for (j = 1; j <= n_mult; j += 2) {
__m128d new_value_vec, tmp_vec;
new_value_vec = _mm_loadu_pd(&x[IDX(nb, i - 1, j)]);
tmp_vec = _mm_loadu_pd(&x[IDX(nb, i + 1, j)]);
new_value_vec = _mm_add_pd(new_value_vec, tmp_vec);
tmp_vec = _mm_loadu_pd(&x[IDX(nb, i, j - 1)]);
new_value_vec = _mm_add_pd(new_value_vec, tmp_vec);
tmp_vec = _mm_loadu_pd(&x[IDX(nb, i, j + 1)]);
new_value_vec = _mm_add_pd(new_value_vec, tmp_vec);
tmp_vec = _mm_set1_pd(0.25);
new_value_vec = _mm_mul_pd(new_value_vec, tmp_vec);
_mm_storeu_pd(&new_x[IDX(nb, i, j)], new_value_vec);
max_diff = (double) fmax(max_diff, fabs(new_x[IDX(nb, i, j)] - x[IDX(nb, i, j)]));
max_diff = (double) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 1)] - x[IDX(nb, i, j + 1)]));
for (j = 1; j <= n_mult; j += 4) {
new_value_vec = _mm_loadu_ps(&x[IDX(nb, i - 1, j)]);
tmp_vec = _mm_loadu_ps(&x[IDX(nb, i + 1, j)]);
new_value_vec = _mm_add_ps(new_value_vec, tmp_vec);
tmp_vec = _mm_loadu_ps(&x[IDX(nb, i, j - 1)]);
new_value_vec = _mm_add_ps(new_value_vec, tmp_vec);
tmp_vec = _mm_loadu_ps(&x[IDX(nb, i, j + 1)]);
new_value_vec = _mm_add_ps(new_value_vec, tmp_vec);
tmp_vec = _mm_set1_ps(0.25);
new_value_vec = _mm_mul_ps(new_value_vec, tmp_vec);
_mm_storeu_ps(&new_x[IDX(nb, i, j)], new_value_vec);
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j)] - x[IDX(nb, i, j)]));
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 1)] - x[IDX(nb, i, j + 1)]));
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 2)] - x[IDX(nb, i, j + 2)]));
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 3)] - x[IDX(nb, i, j + 3)]));
}
for (j = n_mult; j <= n; j++) {
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
max_diff = (double) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
max_diff = (float) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
new_x[IDX(nb, i, j)] = new_value;
}
}