diff --git a/Makefile b/Makefile index a120085..b65f2a1 100644 --- a/Makefile +++ b/Makefile @@ -4,14 +4,23 @@ SRC=src BUILD=build BIN=bin -all: sequential mpi_line +all: sequential mpi_line mpi_line_async sequential: config utils main - ${CC} ${CFLAGS} ${BUILD}/config.o ${BUILD}/utils.o ${BUILD}/main.o ${SRC}/impl/sequential.c -o ${BIN}/jacobi_sequential + ${CC} ${CFLAGS} \ + ${BUILD}/config.o \ + ${BUILD}/utils.o \ + ${BUILD}/main.o \ + ${SRC}/impl/sequential.c \ + -o ${BIN}/jacobi_sequential mpi_line: config utils main_mpi ${CC} ${CFLAGS} ${BUILD}/config.o ${BUILD}/utils.o ${BUILD}/main_mpi.o ${SRC}/impl/mpi_line.c -o ${BIN}/jacobi_mpi_line +mpi_line_async: config utils main_mpi + ${CC} ${CFLAGS} ${BUILD}/config.o ${BUILD}/utils.o ${BUILD}/main_mpi.o ${SRC}/impl/mpi_line_async.c \ + -o ${BIN}/jacobi_mpi_line_async + main: ${SRC}/main.c ${CC} -c ${CFLAGS} ${SRC}/main.c -o ${BUILD}/main.o diff --git a/jacobi.conf b/jacobi.conf index 3e7fd02..37689f1 100644 --- a/jacobi.conf +++ b/jacobi.conf @@ -1,7 +1,7 @@ # Configuration file for the Jacobi project. # The size of the matrix (borders excluded). -N 5 +N 10000 # The value at each border. NORTH 0.0 diff --git a/src/impl/mpi_line_async.c b/src/impl/mpi_line_async.c new file mode 100644 index 0000000..787f60e --- /dev/null +++ b/src/impl/mpi_line_async.c @@ -0,0 +1,113 @@ +/* + * MPI version with the matrix subdivided by "lines". + */ + +#include +#include +#include +#include +#include "../config.h" +#include "../utils.h" + +#define TAG_BORDER 0 +#define TAG_MATRIX 1 + +double *compute_jacobi(int rank, int numprocs, int n, double init_value, double threshold, borders b, int *iterations) { + double *complete_x; + double *x; + double max_diff, global_max_diff, new_x; + int i, j; + int nb = n + 2; // n plus the border + int rows, rows_to_transmit; + int receive_pos; + MPI_Status status; + MPI_Request request_north; + MPI_Request request_south; + + if (rank == 0) { + rows = n - (n / numprocs) * (numprocs - 1); + } else { + rows = n / numprocs; + } + LOG(printf("[Process %d/%d] rows: %d\n", rank, numprocs, rows)); + /* LOG(printf("[Process %d/%d] initializing matrix\n", rank, numprocs)); */ + /* Initialize the matrix */ + x = create_sa_matrix(rows + 2, nb); + for (i = 0; i < rows + 2; i++) { + for (j = 1; j <= n; j++) { + x[IDX(nb, i, j)] = init_value; + } + } + /* Initialize boundary regions */ + for (i = 0; i < rows + 2; i++) { + x[IDX(nb, i, 0)] = b.west; + x[IDX(nb, i, n + 1)] = b.east; + } + if (rank == 0) { + for (i = 1; i <= n + 1; i++) { + x[IDX(nb, 0, i)] = b.north; + } + } + if (rank == numprocs - 1){ + for (i = 1; i < n + 1; i++) { + x[IDX(nb, rows + 1, i)] = b.south; + } + } + /* LOG(printf("[Process %d/%d] matrix initialized\n", rank, numprocs)); */ + /* Iterative refinement of x until values converge */ + *iterations = 0; + do { + if (rank != numprocs - 1) { + // Send south border + MPI_Isend(&x[IDX(nb, rows, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD, &request_south); + } + if (rank != 0) { + // Send north border + MPI_Isend(&x[IDX(nb, 1, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD, &request_north); + } + max_diff = 0; + global_max_diff = 0; + for (i = 1; i <= rows; i++) { + for (j = 1; j <= n; j++) { + new_x = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]); + max_diff = (double) fmax(max_diff, fabs(new_x - x[IDX(nb, i, j)])); + x[IDX(nb, i, j)] = new_x; + } + } + if (rank != numprocs - 1) { + // Receive south border + MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_DOUBLE, rank + 1, TAG_BORDER, MPI_COMM_WORLD, &status); + } + if (rank != 0) { + // Receive north border + MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_DOUBLE, rank - 1, TAG_BORDER, MPI_COMM_WORLD, &status); + } + LOG(printf("[Process %d/%d] max_diff: %f\n", rank, numprocs, max_diff)); + MPI_Allreduce(&max_diff, &global_max_diff, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + /* LOG(printf("[Process %d/%d] global_max_diff: %f\n", rank, numprocs, global_max_diff)); */ + (*iterations)++; + } while (global_max_diff > threshold); + + if (rank == 0) { + complete_x = create_sa_matrix(nb, nb); + memcpy(complete_x, x, (rows + ((rank == numprocs - 1) ? 2 : 1)) * (nb) * sizeof(double)); + rows_to_transmit = n / numprocs; + receive_pos = rows + 1; + for (i = 1; i < numprocs; i++) { + if (i == numprocs - 1) { + rows_to_transmit++; + } + MPI_Recv(&complete_x[IDX(nb, receive_pos, 0)], rows_to_transmit * (nb), MPI_DOUBLE, i, TAG_MATRIX, MPI_COMM_WORLD, &status); + receive_pos += n / numprocs; + } + } else { + complete_x = NULL; + rows_to_transmit = rows; + if (rank == numprocs - 1) { + rows_to_transmit++; + } + MPI_Send(&x[IDX(nb, 1, 0)], rows_to_transmit * (nb), MPI_DOUBLE, 0, TAG_MATRIX, MPI_COMM_WORLD); + } + + return complete_x; +} diff --git a/src/main_mpi.c b/src/main_mpi.c index f7994ed..53cedef 100644 --- a/src/main_mpi.c +++ b/src/main_mpi.c @@ -61,7 +61,7 @@ int main(int argc, char* argv[]) { endwtime = MPI_Wtime(); printf("Wall clock time: %fs\n", endwtime - startwtime); printf("Iterations: %d\n", iterations); - print_sa_matrix(x, n + 2, n + 2); + /* print_sa_matrix(x, n + 2, n + 2); */ } destroy_sa_matrix(x);