Replaced doubles with floats
This commit is contained in:
parent
600e5eb149
commit
ef2e49659d
24
src/config.c
24
src/config.c
|
@ -3,12 +3,12 @@
|
||||||
|
|
||||||
typedef struct configuration {
|
typedef struct configuration {
|
||||||
int n;
|
int n;
|
||||||
double north;
|
float north;
|
||||||
double east;
|
float east;
|
||||||
double south;
|
float south;
|
||||||
double west;
|
float west;
|
||||||
double init_value;
|
float init_value;
|
||||||
double threshold;
|
float threshold;
|
||||||
} configuration;
|
} configuration;
|
||||||
|
|
||||||
int load_config(configuration *config) {
|
int load_config(configuration *config) {
|
||||||
|
@ -38,17 +38,17 @@ int load_config(configuration *config) {
|
||||||
if (strcmp(property, "N") == 0) {
|
if (strcmp(property, "N") == 0) {
|
||||||
sscanf(value, "%d", &(config->n));
|
sscanf(value, "%d", &(config->n));
|
||||||
} else if (strcmp(property, "NORTH") == 0) {
|
} else if (strcmp(property, "NORTH") == 0) {
|
||||||
sscanf(value, "%lf", &(config->north));
|
sscanf(value, "%f", &(config->north));
|
||||||
} else if (strcmp(property, "EAST") == 0) {
|
} else if (strcmp(property, "EAST") == 0) {
|
||||||
sscanf(value, "%lf", &(config->east));
|
sscanf(value, "%f", &(config->east));
|
||||||
} else if (strcmp(property, "SOUTH") == 0) {
|
} else if (strcmp(property, "SOUTH") == 0) {
|
||||||
sscanf(value, "%lf", &(config->south));
|
sscanf(value, "%f", &(config->south));
|
||||||
} else if (strcmp(property, "WEST") == 0) {
|
} else if (strcmp(property, "WEST") == 0) {
|
||||||
sscanf(value, "%lf", &(config->west));
|
sscanf(value, "%f", &(config->west));
|
||||||
} else if (strcmp(property, "INIT_VALUE") == 0) {
|
} else if (strcmp(property, "INIT_VALUE") == 0) {
|
||||||
sscanf(value, "%lf", &(config->init_value));
|
sscanf(value, "%f", &(config->init_value));
|
||||||
} else if (strcmp(property, "THRESHOLD") == 0) {
|
} else if (strcmp(property, "THRESHOLD") == 0) {
|
||||||
sscanf(value, "%lf", &(config->threshold));
|
sscanf(value, "%f", &(config->threshold));
|
||||||
} else {
|
} else {
|
||||||
printf("Unknown property %s\n", property);
|
printf("Unknown property %s\n", property);
|
||||||
}
|
}
|
||||||
|
|
12
src/config.h
12
src/config.h
|
@ -1,11 +1,11 @@
|
||||||
typedef struct configuration {
|
typedef struct configuration {
|
||||||
int n;
|
int n;
|
||||||
double north;
|
float north;
|
||||||
double east;
|
float east;
|
||||||
double south;
|
float south;
|
||||||
double west;
|
float west;
|
||||||
double init_value;
|
float init_value;
|
||||||
double threshold;
|
float threshold;
|
||||||
} configuration;
|
} configuration;
|
||||||
|
|
||||||
int load_config(configuration *config);
|
int load_config(configuration *config);
|
||||||
|
|
|
@ -12,12 +12,12 @@
|
||||||
#define TAG_BORDER 0
|
#define TAG_BORDER 0
|
||||||
#define TAG_MATRIX 1
|
#define TAG_MATRIX 1
|
||||||
|
|
||||||
double *compute_jacobi(int rank, int numprocs, int n, double init_value, double threshold, borders b, int *iterations) {
|
float *compute_jacobi(int rank, int numprocs, int n, float init_value, float threshold, borders b, int *iterations) {
|
||||||
double *complete_x;
|
float *complete_x;
|
||||||
double *x;
|
float *x;
|
||||||
double *new_x;
|
float *new_x;
|
||||||
double *tmp_x;
|
float *tmp_x;
|
||||||
double max_diff, global_max_diff, new_value;
|
float max_diff, global_max_diff, new_value;
|
||||||
int i, j;
|
int i, j;
|
||||||
int nb = n + 2; // n plus the border
|
int nb = n + 2; // n plus the border
|
||||||
int rows, rows_to_transmit;
|
int rows, rows_to_transmit;
|
||||||
|
@ -67,7 +67,7 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
|
||||||
for (i = 1; i <= rows; i++) {
|
for (i = 1; i <= rows; i++) {
|
||||||
for (j = 1; j <= n; j++) {
|
for (j = 1; j <= n; j++) {
|
||||||
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
||||||
max_diff = (double) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
max_diff = (float) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
||||||
new_x[IDX(nb, i, j)] = new_value;
|
new_x[IDX(nb, i, j)] = new_value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -77,40 +77,40 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
|
||||||
if (rank % 2 == 0) {
|
if (rank % 2 == 0) {
|
||||||
if (rank != numprocs - 1) {
|
if (rank != numprocs - 1) {
|
||||||
// Send and receive south border
|
// Send and receive south border
|
||||||
MPI_Send(&x[IDX(nb, rows, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD);
|
MPI_Send(&x[IDX(nb, rows, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD);
|
||||||
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
}
|
}
|
||||||
if (rank != 0) {
|
if (rank != 0) {
|
||||||
// Send and receive north border
|
// Send and receive north border
|
||||||
MPI_Send(&x[IDX(nb, 1, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD);
|
MPI_Send(&x[IDX(nb, 1, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD);
|
||||||
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Receive and send north border
|
// Receive and send north border
|
||||||
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
MPI_Send(&x[IDX(nb, 1, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD);
|
MPI_Send(&x[IDX(nb, 1, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD);
|
||||||
if (rank != numprocs - 1) {
|
if (rank != numprocs - 1) {
|
||||||
// Receive and send south border
|
// Receive and send south border
|
||||||
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
MPI_Send(&x[IDX(nb, rows, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD);
|
MPI_Send(&x[IDX(nb, rows, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* LOG(printf("[Process %d/%d] max_diff: %f\n", rank, numprocs, max_diff)); */
|
/* LOG(printf("[Process %d/%d] max_diff: %f\n", rank, numprocs, max_diff)); */
|
||||||
MPI_Allreduce(&max_diff, &global_max_diff, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
|
MPI_Allreduce(&max_diff, &global_max_diff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
|
||||||
/* LOG(printf("[Process %d/%d] global_max_diff: %f\n", rank, numprocs, global_max_diff)); */
|
/* LOG(printf("[Process %d/%d] global_max_diff: %f\n", rank, numprocs, global_max_diff)); */
|
||||||
(*iterations)++;
|
(*iterations)++;
|
||||||
} while (global_max_diff > threshold);
|
} while (global_max_diff > threshold);
|
||||||
|
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
complete_x = create_sa_matrix(nb, nb);
|
complete_x = create_sa_matrix(nb, nb);
|
||||||
memcpy(complete_x, x, (rows + ((rank == numprocs - 1) ? 2 : 1)) * (nb) * sizeof(double));
|
memcpy(complete_x, x, (rows + ((rank == numprocs - 1) ? 2 : 1)) * (nb) * sizeof(float));
|
||||||
rows_to_transmit = n / numprocs;
|
rows_to_transmit = n / numprocs;
|
||||||
receive_pos = rows + 1;
|
receive_pos = rows + 1;
|
||||||
for (i = 1; i < numprocs; i++) {
|
for (i = 1; i < numprocs; i++) {
|
||||||
if (i == numprocs - 1) {
|
if (i == numprocs - 1) {
|
||||||
rows_to_transmit++;
|
rows_to_transmit++;
|
||||||
}
|
}
|
||||||
MPI_Recv(&complete_x[IDX(nb, receive_pos, 0)], rows_to_transmit * (nb), MPI_DOUBLE, i, TAG_MATRIX, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
MPI_Recv(&complete_x[IDX(nb, receive_pos, 0)], rows_to_transmit * (nb), MPI_FLOAT, i, TAG_MATRIX, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
receive_pos += n / numprocs;
|
receive_pos += n / numprocs;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -119,7 +119,7 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
|
||||||
if (rank == numprocs - 1) {
|
if (rank == numprocs - 1) {
|
||||||
rows_to_transmit++;
|
rows_to_transmit++;
|
||||||
}
|
}
|
||||||
MPI_Send(&x[IDX(nb, 1, 0)], rows_to_transmit * (nb), MPI_DOUBLE, 0, TAG_MATRIX, MPI_COMM_WORLD);
|
MPI_Send(&x[IDX(nb, 1, 0)], rows_to_transmit * (nb), MPI_FLOAT, 0, TAG_MATRIX, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
|
|
||||||
return complete_x;
|
return complete_x;
|
||||||
|
|
|
@ -12,12 +12,12 @@
|
||||||
#define TAG_BORDER 0
|
#define TAG_BORDER 0
|
||||||
#define TAG_MATRIX 1
|
#define TAG_MATRIX 1
|
||||||
|
|
||||||
double *compute_jacobi(int rank, int numprocs, int n, double init_value, double threshold, borders b, int *iterations) {
|
float *compute_jacobi(int rank, int numprocs, int n, float init_value, float threshold, borders b, int *iterations) {
|
||||||
double *complete_x;
|
float *complete_x;
|
||||||
double *x;
|
float *x;
|
||||||
double *new_x;
|
float *new_x;
|
||||||
double *tmp_x;
|
float *tmp_x;
|
||||||
double max_diff, global_max_diff, new_value;
|
float max_diff, global_max_diff, new_value;
|
||||||
int i, j;
|
int i, j;
|
||||||
int nb = n + 2; // n plus the border
|
int nb = n + 2; // n plus the border
|
||||||
int rows, rows_to_transmit;
|
int rows, rows_to_transmit;
|
||||||
|
@ -66,18 +66,18 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
|
||||||
do {
|
do {
|
||||||
if (rank != numprocs - 1) {
|
if (rank != numprocs - 1) {
|
||||||
// Send south border
|
// Send south border
|
||||||
MPI_Isend(&x[IDX(nb, rows, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD, &request_south);
|
MPI_Isend(&x[IDX(nb, rows, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD, &request_south);
|
||||||
}
|
}
|
||||||
if (rank != 0) {
|
if (rank != 0) {
|
||||||
// Send north border
|
// Send north border
|
||||||
MPI_Isend(&x[IDX(nb, 1, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD, &request_north);
|
MPI_Isend(&x[IDX(nb, 1, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD, &request_north);
|
||||||
}
|
}
|
||||||
max_diff = 0;
|
max_diff = 0;
|
||||||
global_max_diff = 0;
|
global_max_diff = 0;
|
||||||
for (i = 1; i <= rows; i++) {
|
for (i = 1; i <= rows; i++) {
|
||||||
for (j = 1; j <= n; j++) {
|
for (j = 1; j <= n; j++) {
|
||||||
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
||||||
max_diff = (double) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
max_diff = (float) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
||||||
new_x[IDX(nb, i, j)] = new_value;
|
new_x[IDX(nb, i, j)] = new_value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -86,28 +86,28 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
|
||||||
x = tmp_x;
|
x = tmp_x;
|
||||||
if (rank != numprocs - 1) {
|
if (rank != numprocs - 1) {
|
||||||
// Receive south border
|
// Receive south border
|
||||||
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
}
|
}
|
||||||
if (rank != 0) {
|
if (rank != 0) {
|
||||||
// Receive north border
|
// Receive north border
|
||||||
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
}
|
}
|
||||||
LOG(printf("[Process %d/%d] max_diff: %f\n", rank, numprocs, max_diff));
|
LOG(printf("[Process %d/%d] max_diff: %f\n", rank, numprocs, max_diff));
|
||||||
MPI_Allreduce(&max_diff, &global_max_diff, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
|
MPI_Allreduce(&max_diff, &global_max_diff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
|
||||||
/* LOG(printf("[Process %d/%d] global_max_diff: %f\n", rank, numprocs, global_max_diff)); */
|
/* LOG(printf("[Process %d/%d] global_max_diff: %f\n", rank, numprocs, global_max_diff)); */
|
||||||
(*iterations)++;
|
(*iterations)++;
|
||||||
} while (global_max_diff > threshold);
|
} while (global_max_diff > threshold);
|
||||||
|
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
complete_x = create_sa_matrix(nb, nb);
|
complete_x = create_sa_matrix(nb, nb);
|
||||||
memcpy(complete_x, x, (rows + ((rank == numprocs - 1) ? 2 : 1)) * (nb) * sizeof(double));
|
memcpy(complete_x, x, (rows + ((rank == numprocs - 1) ? 2 : 1)) * (nb) * sizeof(float));
|
||||||
rows_to_transmit = n / numprocs;
|
rows_to_transmit = n / numprocs;
|
||||||
receive_pos = rows + 1;
|
receive_pos = rows + 1;
|
||||||
for (i = 1; i < numprocs; i++) {
|
for (i = 1; i < numprocs; i++) {
|
||||||
if (i == numprocs - 1) {
|
if (i == numprocs - 1) {
|
||||||
rows_to_transmit++;
|
rows_to_transmit++;
|
||||||
}
|
}
|
||||||
MPI_Recv(&complete_x[IDX(nb, receive_pos, 0)], rows_to_transmit * (nb), MPI_DOUBLE, i, TAG_MATRIX, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
MPI_Recv(&complete_x[IDX(nb, receive_pos, 0)], rows_to_transmit * (nb), MPI_FLOAT, i, TAG_MATRIX, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
receive_pos += n / numprocs;
|
receive_pos += n / numprocs;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -116,7 +116,7 @@ double *compute_jacobi(int rank, int numprocs, int n, double init_value, double
|
||||||
if (rank == numprocs - 1) {
|
if (rank == numprocs - 1) {
|
||||||
rows_to_transmit++;
|
rows_to_transmit++;
|
||||||
}
|
}
|
||||||
MPI_Send(&x[IDX(nb, 1, 0)], rows_to_transmit * (nb), MPI_DOUBLE, 0, TAG_MATRIX, MPI_COMM_WORLD);
|
MPI_Send(&x[IDX(nb, 1, 0)], rows_to_transmit * (nb), MPI_FLOAT, 0, TAG_MATRIX, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
|
|
||||||
return complete_x;
|
return complete_x;
|
||||||
|
|
|
@ -8,11 +8,11 @@
|
||||||
#include "../config.h"
|
#include "../config.h"
|
||||||
#include "../utils.h"
|
#include "../utils.h"
|
||||||
|
|
||||||
double *compute_jacobi(int n, double init_value, double threshold, borders b, int *iterations) {
|
float *compute_jacobi(int n, float init_value, float threshold, borders b, int *iterations) {
|
||||||
double *x;
|
float *x;
|
||||||
double *new_x;
|
float *new_x;
|
||||||
double *tmp_x;
|
float *tmp_x;
|
||||||
double max_diff, new_value;
|
float max_diff, new_value;
|
||||||
int i, j;
|
int i, j;
|
||||||
int nb = n + 2; // n plus the border
|
int nb = n + 2; // n plus the border
|
||||||
|
|
||||||
|
@ -45,7 +45,7 @@ double *compute_jacobi(int n, double init_value, double threshold, borders b, in
|
||||||
for (i = 1; i <= n; i++) {
|
for (i = 1; i <= n; i++) {
|
||||||
for (j = 1; j <= n; j++) {
|
for (j = 1; j <= n; j++) {
|
||||||
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
||||||
max_diff = (double) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
max_diff = (float) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
||||||
new_x[IDX(nb, i, j)] = new_value;
|
new_x[IDX(nb, i, j)] = new_value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,11 +8,11 @@
|
||||||
#include "../config.h"
|
#include "../config.h"
|
||||||
#include "../utils.h"
|
#include "../utils.h"
|
||||||
|
|
||||||
double *compute_jacobi(int n, double init_value, double threshold, borders b, int *iterations) {
|
float *compute_jacobi(int n, float init_value, float threshold, borders b, int *iterations) {
|
||||||
double *x;
|
float *x;
|
||||||
double *new_x;
|
float *new_x;
|
||||||
double *tmp_x;
|
float *tmp_x;
|
||||||
double max_diff, new_value;
|
float max_diff, new_value;
|
||||||
int i, j;
|
int i, j;
|
||||||
int nb = n + 2; // n plus the border
|
int nb = n + 2; // n plus the border
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ double *compute_jacobi(int n, double init_value, double threshold, borders b, in
|
||||||
for (i = 1; i <= n; i++) {
|
for (i = 1; i <= n; i++) {
|
||||||
for (j = 1; j <= n; j++) {
|
for (j = 1; j <= n; j++) {
|
||||||
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
||||||
max_diff = (double) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
max_diff = (float) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
||||||
new_x[IDX(nb, i, j)] = new_value;
|
new_x[IDX(nb, i, j)] = new_value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,14 +8,15 @@
|
||||||
#include "../config.h"
|
#include "../config.h"
|
||||||
#include "../utils.h"
|
#include "../utils.h"
|
||||||
|
|
||||||
double *compute_jacobi(int n, double init_value, double threshold, borders b, int *iterations) {
|
float *compute_jacobi(int n, float init_value, float threshold, borders b, int *iterations) {
|
||||||
double *x;
|
float *x;
|
||||||
double *new_x;
|
float *new_x;
|
||||||
double *tmp_x;
|
float *tmp_x;
|
||||||
double max_diff, new_value;
|
float max_diff, new_value;
|
||||||
int i, j;
|
int i, j;
|
||||||
int nb = n + 2; // n plus the border
|
int nb = n + 2; // n plus the border
|
||||||
int n_mult = (n % 2 == 0) ? n : n - 1;
|
int n_mult = (n / 4) * 4;
|
||||||
|
__m128 new_value_vec, tmp_vec;
|
||||||
/* Initialize boundary regions */
|
/* Initialize boundary regions */
|
||||||
x = create_sa_matrix(nb, nb);
|
x = create_sa_matrix(nb, nb);
|
||||||
new_x = create_sa_matrix(nb, nb);
|
new_x = create_sa_matrix(nb, nb);
|
||||||
|
@ -40,24 +41,25 @@ double *compute_jacobi(int n, double init_value, double threshold, borders b, in
|
||||||
do {
|
do {
|
||||||
max_diff = 0;
|
max_diff = 0;
|
||||||
for (i = 1; i <= n; i++) {
|
for (i = 1; i <= n; i++) {
|
||||||
for (j = 1; j <= n_mult; j += 2) {
|
for (j = 1; j <= n_mult; j += 4) {
|
||||||
__m128d new_value_vec, tmp_vec;
|
new_value_vec = _mm_loadu_ps(&x[IDX(nb, i - 1, j)]);
|
||||||
new_value_vec = _mm_loadu_pd(&x[IDX(nb, i - 1, j)]);
|
tmp_vec = _mm_loadu_ps(&x[IDX(nb, i + 1, j)]);
|
||||||
tmp_vec = _mm_loadu_pd(&x[IDX(nb, i + 1, j)]);
|
new_value_vec = _mm_add_ps(new_value_vec, tmp_vec);
|
||||||
new_value_vec = _mm_add_pd(new_value_vec, tmp_vec);
|
tmp_vec = _mm_loadu_ps(&x[IDX(nb, i, j - 1)]);
|
||||||
tmp_vec = _mm_loadu_pd(&x[IDX(nb, i, j - 1)]);
|
new_value_vec = _mm_add_ps(new_value_vec, tmp_vec);
|
||||||
new_value_vec = _mm_add_pd(new_value_vec, tmp_vec);
|
tmp_vec = _mm_loadu_ps(&x[IDX(nb, i, j + 1)]);
|
||||||
tmp_vec = _mm_loadu_pd(&x[IDX(nb, i, j + 1)]);
|
new_value_vec = _mm_add_ps(new_value_vec, tmp_vec);
|
||||||
new_value_vec = _mm_add_pd(new_value_vec, tmp_vec);
|
tmp_vec = _mm_set1_ps(0.25);
|
||||||
tmp_vec = _mm_set1_pd(0.25);
|
new_value_vec = _mm_mul_ps(new_value_vec, tmp_vec);
|
||||||
new_value_vec = _mm_mul_pd(new_value_vec, tmp_vec);
|
_mm_storeu_ps(&new_x[IDX(nb, i, j)], new_value_vec);
|
||||||
_mm_storeu_pd(&new_x[IDX(nb, i, j)], new_value_vec);
|
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j)] - x[IDX(nb, i, j)]));
|
||||||
max_diff = (double) fmax(max_diff, fabs(new_x[IDX(nb, i, j)] - x[IDX(nb, i, j)]));
|
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 1)] - x[IDX(nb, i, j + 1)]));
|
||||||
max_diff = (double) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 1)] - x[IDX(nb, i, j + 1)]));
|
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 2)] - x[IDX(nb, i, j + 2)]));
|
||||||
|
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 3)] - x[IDX(nb, i, j + 3)]));
|
||||||
}
|
}
|
||||||
for (j = n_mult; j <= n; j++) {
|
for (j = n_mult; j <= n; j++) {
|
||||||
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
||||||
max_diff = (double) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
max_diff = (float) fmax(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
||||||
new_x[IDX(nb, i, j)] = new_value;
|
new_x[IDX(nb, i, j)] = new_value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,15 +5,15 @@
|
||||||
#include "../config.h"
|
#include "../config.h"
|
||||||
#include "../utils.h"
|
#include "../utils.h"
|
||||||
|
|
||||||
double *compute_jacobi(int n, double init_value, double threshold, borders b, int *iterations);
|
float *compute_jacobi(int n, float init_value, float threshold, borders b, int *iterations);
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
int n;
|
int n;
|
||||||
double init_value, threshold;
|
float init_value, threshold;
|
||||||
borders b;
|
borders b;
|
||||||
int config_loaded;
|
int config_loaded;
|
||||||
configuration config;
|
configuration config;
|
||||||
double *x;
|
float *x;
|
||||||
int iterations;
|
int iterations;
|
||||||
struct timeval start, end;
|
struct timeval start, end;
|
||||||
long secs_used, micros_used;
|
long secs_used, micros_used;
|
||||||
|
@ -36,7 +36,7 @@ int main(int argc, char* argv[]) {
|
||||||
|
|
||||||
secs_used = (end.tv_sec - start.tv_sec);
|
secs_used = (end.tv_sec - start.tv_sec);
|
||||||
micros_used = ((secs_used * 1000000) + end.tv_usec) - (start.tv_usec);
|
micros_used = ((secs_used * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||||
printf("Wall clock time: %fs\n", (double) micros_used / 1000000);
|
printf("Wall clock time: %fs\n", (float) micros_used / 1000000);
|
||||||
printf("Iterations: %d\n", iterations);
|
printf("Iterations: %d\n", iterations);
|
||||||
if (n < 10) {
|
if (n < 10) {
|
||||||
print_sa_matrix(x, n + 2, n + 2);
|
print_sa_matrix(x, n + 2, n + 2);
|
||||||
|
|
|
@ -5,18 +5,18 @@
|
||||||
#include "../config.h"
|
#include "../config.h"
|
||||||
#include "../utils.h"
|
#include "../utils.h"
|
||||||
|
|
||||||
double *compute_jacobi(int rank, int numprocs, int n, double init_value, double threshold, borders b, int *iterations);
|
float *compute_jacobi(int rank, int numprocs, int n, float init_value, float threshold, borders b, int *iterations);
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
int rank;
|
int rank;
|
||||||
int numprocs;
|
int numprocs;
|
||||||
int n;
|
int n;
|
||||||
double init_value, threshold;
|
float init_value, threshold;
|
||||||
double north, south, east, west;
|
float north, south, east, west;
|
||||||
borders b;
|
borders b;
|
||||||
int config_loaded;
|
int config_loaded;
|
||||||
configuration config;
|
configuration config;
|
||||||
double *x;
|
float *x;
|
||||||
double startwtime = 0.0, endwtime;
|
double startwtime = 0.0, endwtime;
|
||||||
int iterations;
|
int iterations;
|
||||||
|
|
||||||
|
@ -39,12 +39,12 @@ int main(int argc, char* argv[]) {
|
||||||
west = config.west;
|
west = config.west;
|
||||||
}
|
}
|
||||||
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
|
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
|
||||||
MPI_Bcast(&init_value, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
MPI_Bcast(&init_value, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
|
||||||
MPI_Bcast(&threshold, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
MPI_Bcast(&threshold, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
|
||||||
MPI_Bcast(&north, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
MPI_Bcast(&north, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
|
||||||
MPI_Bcast(&south, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
MPI_Bcast(&south, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
|
||||||
MPI_Bcast(&east, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
MPI_Bcast(&east, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
|
||||||
MPI_Bcast(&west, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
MPI_Bcast(&west, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
b.north = north;
|
b.north = north;
|
||||||
b.south = south;
|
b.south = south;
|
||||||
|
|
22
src/utils.c
22
src/utils.c
|
@ -2,18 +2,18 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
double *create_sa_matrix(int rows, int cols) {
|
float *create_sa_matrix(int rows, int cols) {
|
||||||
double *x;
|
float *x;
|
||||||
|
|
||||||
x = (double *) malloc(rows * cols * sizeof(double));
|
x = (float *) malloc(rows * cols * sizeof(float));
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
void destroy_sa_matrix(double *x) {
|
void destroy_sa_matrix(float *x) {
|
||||||
free(x);
|
free(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_sa_matrix(double *x, int rows, int cols) {
|
void print_sa_matrix(float *x, int rows, int cols) {
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < rows; i++) {
|
for (i = 0; i < rows; i++) {
|
||||||
for (j = 0; j < cols; j++) {
|
for (j = 0; j < cols; j++) {
|
||||||
|
@ -24,18 +24,18 @@ void print_sa_matrix(double *x, int rows, int cols) {
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
double **create_matrix(int rows, int cols) {
|
float **create_matrix(int rows, int cols) {
|
||||||
int i;
|
int i;
|
||||||
double **x;
|
float **x;
|
||||||
|
|
||||||
x = (double **) malloc(rows * sizeof(double));
|
x = (float **) malloc(rows * sizeof(float));
|
||||||
for (i = 0; i < rows; i++) {
|
for (i = 0; i < rows; i++) {
|
||||||
x[i] = (double *) malloc(cols * sizeof(double));
|
x[i] = (float *) malloc(cols * sizeof(float));
|
||||||
}
|
}
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
void destroy_matrix(double **x, int rows) {
|
void destroy_matrix(float **x, int rows) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < rows; i++) {
|
for (i = 0; i < rows; i++) {
|
||||||
|
@ -44,7 +44,7 @@ void destroy_matrix(double **x, int rows) {
|
||||||
free(x);
|
free(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_matrix(double **x, int rows, int cols) {
|
void print_matrix(float **x, int rows, int cols) {
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < rows; i++) {
|
for (i = 0; i < rows; i++) {
|
||||||
for (j = 0; j < cols; j++) {
|
for (j = 0; j < cols; j++) {
|
||||||
|
|
20
src/utils.h
20
src/utils.h
|
@ -17,30 +17,30 @@
|
||||||
#define IDX(cols, r, c) ((r) * (cols) + (c))
|
#define IDX(cols, r, c) ((r) * (cols) + (c))
|
||||||
|
|
||||||
typedef struct borders {
|
typedef struct borders {
|
||||||
double north;
|
float north;
|
||||||
double east;
|
float east;
|
||||||
double south;
|
float south;
|
||||||
double west;
|
float west;
|
||||||
} borders;
|
} borders;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create a matrix stored in a single array.
|
* Create a matrix stored in a single array.
|
||||||
*/
|
*/
|
||||||
double *create_sa_matrix(int rows, int cols);
|
float *create_sa_matrix(int rows, int cols);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Destroy a single array matrix.
|
* Destroy a single array matrix.
|
||||||
*/
|
*/
|
||||||
void destroy_sa_matrix(double *x);
|
void destroy_sa_matrix(float *x);
|
||||||
|
|
||||||
int sa_index(int cols, int r, int c);
|
int sa_index(int cols, int r, int c);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Print a single array matrix.
|
* Print a single array matrix.
|
||||||
*/
|
*/
|
||||||
void print_sa_matrix(double *x, int rows, int cols);
|
void print_sa_matrix(float *x, int rows, int cols);
|
||||||
|
|
||||||
double **create_matrix(int rows, int cols);
|
float **create_matrix(int rows, int cols);
|
||||||
void destroy_matrix(double **x, int rows);
|
void destroy_matrix(float **x, int rows);
|
||||||
void print_matrix(double **x, int rows, int cols);
|
void print_matrix(float **x, int rows, int cols);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user