Last commit
This commit is contained in:
parent
7f60341812
commit
a3be6d5298
45
Makefile
45
Makefile
|
@ -1,12 +1,14 @@
|
||||||
CC=gcc
|
CC=gcc
|
||||||
CC_OMP=gcc -fopenmp
|
CC_OMP=gcc -fopenmp
|
||||||
CC_MPI=mpicc
|
CC_MPI=mpicc
|
||||||
|
CC_CUDA=nvcc
|
||||||
CFLAGS=-Wall -lm -std=c99
|
CFLAGS=-Wall -lm -std=c99
|
||||||
SRC=src
|
SRC=src
|
||||||
BUILD=build
|
BUILD=build
|
||||||
|
BUILD_CUDA=build_cuda
|
||||||
BIN=bin
|
BIN=bin
|
||||||
|
|
||||||
all: sequential mpi_line mpi_line_async omp sse
|
all: sequential mpi omp sse cuda
|
||||||
|
|
||||||
sequential: config utils main
|
sequential: config utils main
|
||||||
${CC} ${CFLAGS} \
|
${CC} ${CFLAGS} \
|
||||||
|
@ -16,8 +18,16 @@ sequential: config utils main
|
||||||
${SRC}/impl/sequential.c \
|
${SRC}/impl/sequential.c \
|
||||||
-o ${BIN}/jacobi_sequential
|
-o ${BIN}/jacobi_sequential
|
||||||
|
|
||||||
|
cuda: config_cuda utils_cuda main_cuda
|
||||||
|
${CC_CUDA} \
|
||||||
|
${BUILD_CUDA}/config.o \
|
||||||
|
${BUILD_CUDA}/utils.o \
|
||||||
|
${BUILD_CUDA}/main.o \
|
||||||
|
${SRC}/impl/cuda.cu \
|
||||||
|
-o ${BIN}/jacobi_cuda
|
||||||
|
|
||||||
sse: config utils main
|
sse: config utils main
|
||||||
${CC_OMP} ${CFLAGS} \
|
${CC} ${CFLAGS} \
|
||||||
${BUILD}/config.o \
|
${BUILD}/config.o \
|
||||||
${BUILD}/utils.o \
|
${BUILD}/utils.o \
|
||||||
${BUILD}/main.o \
|
${BUILD}/main.o \
|
||||||
|
@ -32,21 +42,13 @@ omp: config utils main
|
||||||
${SRC}/impl/omp.c \
|
${SRC}/impl/omp.c \
|
||||||
-o ${BIN}/jacobi_omp
|
-o ${BIN}/jacobi_omp
|
||||||
|
|
||||||
mpi_line: config utils main_mpi
|
mpi: config utils main_mpi
|
||||||
${CC_MPI} ${CFLAGS} \
|
${CC_MPI} ${CFLAGS} \
|
||||||
${BUILD}/config.o \
|
${BUILD}/config.o \
|
||||||
${BUILD}/utils.o \
|
${BUILD}/utils.o \
|
||||||
${BUILD}/main_mpi.o \
|
${BUILD}/main_mpi.o \
|
||||||
${SRC}/impl/mpi_line.c \
|
${SRC}/impl/mpi.c \
|
||||||
-o ${BIN}/jacobi_mpi_line
|
-o ${BIN}/jacobi_mpi
|
||||||
|
|
||||||
mpi_line_async: config utils main_mpi
|
|
||||||
${CC_MPI} ${CFLAGS} \
|
|
||||||
${BUILD}/config.o \
|
|
||||||
${BUILD}/utils.o \
|
|
||||||
${BUILD}/main_mpi.o \
|
|
||||||
${SRC}/impl/mpi_line_async.c \
|
|
||||||
-o ${BIN}/jacobi_mpi_line_async
|
|
||||||
|
|
||||||
main: ${SRC}/main/main.c
|
main: ${SRC}/main/main.c
|
||||||
${CC_MPI} -c ${CFLAGS} \
|
${CC_MPI} -c ${CFLAGS} \
|
||||||
|
@ -58,16 +60,31 @@ main_mpi: ${SRC}/main/main_mpi.c
|
||||||
${SRC}/main/main_mpi.c \
|
${SRC}/main/main_mpi.c \
|
||||||
-o ${BUILD}/main_mpi.o
|
-o ${BUILD}/main_mpi.o
|
||||||
|
|
||||||
|
main_cuda: ${SRC}/main/main.cu
|
||||||
|
${CC_CUDA} -c \
|
||||||
|
${SRC}/main/main.cu \
|
||||||
|
-o ${BUILD_CUDA}/main.o
|
||||||
|
|
||||||
config: ${SRC}/config.c
|
config: ${SRC}/config.c
|
||||||
${CC_MPI} -c ${CFLAGS} \
|
${CC_MPI} -c ${CFLAGS} \
|
||||||
${SRC}/config.c \
|
${SRC}/config.c \
|
||||||
-o ${BUILD}/config.o
|
-o ${BUILD}/config.o
|
||||||
|
|
||||||
|
config_cuda: ${SRC}/config.cu
|
||||||
|
${CC_CUDA} -c \
|
||||||
|
${SRC}/config.cu \
|
||||||
|
-o ${BUILD_CUDA}/config.o
|
||||||
|
|
||||||
utils: ${SRC}/utils.c
|
utils: ${SRC}/utils.c
|
||||||
${CC_MPI} -c ${CFLAGS} \
|
${CC_MPI} -c ${CFLAGS} \
|
||||||
${SRC}/utils.c \
|
${SRC}/utils.c \
|
||||||
-o ${BUILD}/utils.o
|
-o ${BUILD}/utils.o
|
||||||
|
|
||||||
|
utils_cuda: ${SRC}/utils.cu
|
||||||
|
${CC_CUDA} -c \
|
||||||
|
${SRC}/utils.cu \
|
||||||
|
-o ${BUILD_CUDA}/utils.o
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
rm build/[^.]* bin/[^.]*
|
rm build/[^.]* build_cuda/[^.]* bin/[^.]*
|
||||||
|
|
3
benchmarks/b1/cuda.sh
Normal file
3
benchmarks/b1/cuda.sh
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
../../bin/jacobi_cuda > results/stdout_cuda.txt
|
16
benchmarks/b1/jacobi.conf
Normal file
16
benchmarks/b1/jacobi.conf
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
# Configuration file for the Jacobi project.
|
||||||
|
|
||||||
|
# The size of the matrix (borders excluded).
|
||||||
|
N 1000
|
||||||
|
|
||||||
|
# The value at each border.
|
||||||
|
NORTH 0.0
|
||||||
|
EAST 0.0
|
||||||
|
SOUTH 300.0
|
||||||
|
WEST 0.0
|
||||||
|
|
||||||
|
# The initial value to assign at each internal cell.
|
||||||
|
INIT_VALUE 0.0
|
||||||
|
|
||||||
|
# The threshold that determines the convergence.
|
||||||
|
THRESHOLD 1.0
|
10
benchmarks/b1/mpi_1_1.sh
Normal file
10
benchmarks/b1/mpi_1_1.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_1_1.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_1_1.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
10
benchmarks/b1/mpi_1_10.sh
Normal file
10
benchmarks/b1/mpi_1_10.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_1_10.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_1_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=10
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
10
benchmarks/b1/mpi_2_5.sh
Normal file
10
benchmarks/b1/mpi_2_5.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_2_5.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_2_5.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 2
|
||||||
|
#SBATCH --tasks-per-node=5
|
||||||
|
#SBATCH --nodelist=c6,c7
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
12
benchmarks/b1/omp_1.sh
Normal file
12
benchmarks/b1/omp_1.sh
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J omp
|
||||||
|
#SBATCH -o results/stdout_omp_1.txt
|
||||||
|
#SBATCH -e results/stderr_omp_1.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=1
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_omp
|
12
benchmarks/b1/omp_10.sh
Normal file
12
benchmarks/b1/omp_10.sh
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J omp
|
||||||
|
#SBATCH -o results/stdout_omp_10.txt
|
||||||
|
#SBATCH -e results/stderr_omp_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=10
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_omp
|
10
benchmarks/b1/sequential.sh
Normal file
10
benchmarks/b1/sequential.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J sequential
|
||||||
|
#SBATCH -o results/stdout_sequential.txt
|
||||||
|
#SBATCH -e results/stderr_sequential.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_sequential
|
10
benchmarks/b1/sse.sh
Normal file
10
benchmarks/b1/sse.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J sse
|
||||||
|
#SBATCH -o results/stdout_sse.txt
|
||||||
|
#SBATCH -e results/stderr_sse.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_sse
|
3
benchmarks/b2/cuda.sh
Normal file
3
benchmarks/b2/cuda.sh
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
../../bin/jacobi_cuda > results/stdout_cuda.txt
|
16
benchmarks/b2/jacobi.conf
Normal file
16
benchmarks/b2/jacobi.conf
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
# Configuration file for the Jacobi project.
|
||||||
|
|
||||||
|
# The size of the matrix (borders excluded).
|
||||||
|
N 2000
|
||||||
|
|
||||||
|
# The value at each border.
|
||||||
|
NORTH 0.0
|
||||||
|
EAST 0.0
|
||||||
|
SOUTH 300.0
|
||||||
|
WEST 0.0
|
||||||
|
|
||||||
|
# The initial value to assign at each internal cell.
|
||||||
|
INIT_VALUE 0.0
|
||||||
|
|
||||||
|
# The threshold that determines the convergence.
|
||||||
|
THRESHOLD 1.0
|
10
benchmarks/b2/mpi_1_1.sh
Normal file
10
benchmarks/b2/mpi_1_1.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_1_1.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_1_1.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
10
benchmarks/b2/mpi_1_10.sh
Normal file
10
benchmarks/b2/mpi_1_10.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_1_10.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_1_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=10
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
10
benchmarks/b2/mpi_2_5.sh
Normal file
10
benchmarks/b2/mpi_2_5.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_2_5.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_2_5.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 2
|
||||||
|
#SBATCH --tasks-per-node=5
|
||||||
|
#SBATCH --nodelist=c6,c7
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
12
benchmarks/b2/omp_1.sh
Normal file
12
benchmarks/b2/omp_1.sh
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J omp
|
||||||
|
#SBATCH -o results/stdout_omp_1.txt
|
||||||
|
#SBATCH -e results/stderr_omp_1.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=1
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_omp
|
12
benchmarks/b2/omp_10.sh
Normal file
12
benchmarks/b2/omp_10.sh
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J omp
|
||||||
|
#SBATCH -o results/stdout_omp_10.txt
|
||||||
|
#SBATCH -e results/stderr_omp_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=10
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_omp
|
10
benchmarks/b2/sequential.sh
Normal file
10
benchmarks/b2/sequential.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J sequential
|
||||||
|
#SBATCH -o results/stdout_sequential.txt
|
||||||
|
#SBATCH -e results/stderr_sequential.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_sequential
|
10
benchmarks/b2/sse.sh
Normal file
10
benchmarks/b2/sse.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J sse
|
||||||
|
#SBATCH -o results/stdout_sse.txt
|
||||||
|
#SBATCH -e results/stderr_sse.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_sse
|
3
benchmarks/b3/cuda.sh
Normal file
3
benchmarks/b3/cuda.sh
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
../../bin/jacobi_cuda > results/stdout_cuda.txt
|
16
benchmarks/b3/jacobi.conf
Normal file
16
benchmarks/b3/jacobi.conf
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
# Configuration file for the Jacobi project.
|
||||||
|
|
||||||
|
# The size of the matrix (borders excluded).
|
||||||
|
N 5000
|
||||||
|
|
||||||
|
# The value at each border.
|
||||||
|
NORTH 0.0
|
||||||
|
EAST 0.0
|
||||||
|
SOUTH 300.0
|
||||||
|
WEST 0.0
|
||||||
|
|
||||||
|
# The initial value to assign at each internal cell.
|
||||||
|
INIT_VALUE 0.0
|
||||||
|
|
||||||
|
# The threshold that determines the convergence.
|
||||||
|
THRESHOLD 1.0
|
10
benchmarks/b3/mpi_1_1.sh
Normal file
10
benchmarks/b3/mpi_1_1.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_1_1.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_1_1.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
10
benchmarks/b3/mpi_1_10.sh
Normal file
10
benchmarks/b3/mpi_1_10.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_1_10.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_1_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=10
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
10
benchmarks/b3/mpi_2_5.sh
Normal file
10
benchmarks/b3/mpi_2_5.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_2_5.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_2_5.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 2
|
||||||
|
#SBATCH --tasks-per-node=5
|
||||||
|
#SBATCH --nodelist=c6,c7
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
12
benchmarks/b3/omp_1.sh
Normal file
12
benchmarks/b3/omp_1.sh
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J omp
|
||||||
|
#SBATCH -o results/stdout_omp_1.txt
|
||||||
|
#SBATCH -e results/stderr_omp_1.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=1
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_omp
|
12
benchmarks/b3/omp_10.sh
Normal file
12
benchmarks/b3/omp_10.sh
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J omp
|
||||||
|
#SBATCH -o results/stdout_omp_10.txt
|
||||||
|
#SBATCH -e results/stderr_omp_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=10
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_omp
|
10
benchmarks/b3/sequential.sh
Normal file
10
benchmarks/b3/sequential.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J sequential
|
||||||
|
#SBATCH -o results/stdout_sequential.txt
|
||||||
|
#SBATCH -e results/stderr_sequential.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_sequential
|
10
benchmarks/b3/sse.sh
Normal file
10
benchmarks/b3/sse.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J sse
|
||||||
|
#SBATCH -o results/stdout_sse.txt
|
||||||
|
#SBATCH -e results/stderr_sse.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_sse
|
3
benchmarks/b4/cuda.sh
Normal file
3
benchmarks/b4/cuda.sh
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
../../bin/jacobi_cuda > results/stdout_cuda.txt
|
16
benchmarks/b4/jacobi.conf
Normal file
16
benchmarks/b4/jacobi.conf
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
# Configuration file for the Jacobi project.
|
||||||
|
|
||||||
|
# The size of the matrix (borders excluded).
|
||||||
|
N 10000
|
||||||
|
|
||||||
|
# The value at each border.
|
||||||
|
NORTH 0.0
|
||||||
|
EAST 0.0
|
||||||
|
SOUTH 300.0
|
||||||
|
WEST 0.0
|
||||||
|
|
||||||
|
# The initial value to assign at each internal cell.
|
||||||
|
INIT_VALUE 0.0
|
||||||
|
|
||||||
|
# The threshold that determines the convergence.
|
||||||
|
THRESHOLD 1.0
|
10
benchmarks/b4/mpi_1_1.sh
Normal file
10
benchmarks/b4/mpi_1_1.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_1_1.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_1_1.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
10
benchmarks/b4/mpi_1_10.sh
Normal file
10
benchmarks/b4/mpi_1_10.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_1_10.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_1_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=10
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
10
benchmarks/b4/mpi_5_10.sh
Normal file
10
benchmarks/b4/mpi_5_10.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_5_10.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_5_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 5
|
||||||
|
#SBATCH --tasks-per-node=10
|
||||||
|
#SBATCH --nodelist=c2,c3,c4,c6,c7
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
12
benchmarks/b4/omp_1.sh
Normal file
12
benchmarks/b4/omp_1.sh
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J omp
|
||||||
|
#SBATCH -o results/stdout_omp_1.txt
|
||||||
|
#SBATCH -e results/stderr_omp_1.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=1
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_omp
|
12
benchmarks/b4/omp_10.sh
Normal file
12
benchmarks/b4/omp_10.sh
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J omp
|
||||||
|
#SBATCH -o results/stdout_omp_10.txt
|
||||||
|
#SBATCH -e results/stderr_omp_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=10
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_omp
|
10
benchmarks/b4/sequential.sh
Normal file
10
benchmarks/b4/sequential.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J sequential
|
||||||
|
#SBATCH -o results/stdout_sequential.txt
|
||||||
|
#SBATCH -e results/stderr_sequential.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_sequential
|
10
benchmarks/b4/sse.sh
Normal file
10
benchmarks/b4/sse.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J sse
|
||||||
|
#SBATCH -o results/stdout_sse.txt
|
||||||
|
#SBATCH -e results/stderr_sse.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_sse
|
3
benchmarks/b5/cuda.sh
Normal file
3
benchmarks/b5/cuda.sh
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
../../bin/jacobi_cuda > results/stdout_cuda.txt
|
16
benchmarks/b5/jacobi.conf
Normal file
16
benchmarks/b5/jacobi.conf
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
# Configuration file for the Jacobi project.
|
||||||
|
|
||||||
|
# The size of the matrix (borders excluded).
|
||||||
|
N 20000
|
||||||
|
|
||||||
|
# The value at each border.
|
||||||
|
NORTH 0.0
|
||||||
|
EAST 0.0
|
||||||
|
SOUTH 300.0
|
||||||
|
WEST 0.0
|
||||||
|
|
||||||
|
# The initial value to assign at each internal cell.
|
||||||
|
INIT_VALUE 0.0
|
||||||
|
|
||||||
|
# The threshold that determines the convergence.
|
||||||
|
THRESHOLD 1.0
|
10
benchmarks/b5/mpi_1_1.sh
Normal file
10
benchmarks/b5/mpi_1_1.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_1_1.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_1_1.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
10
benchmarks/b5/mpi_1_10.sh
Normal file
10
benchmarks/b5/mpi_1_10.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_1_10.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_1_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=10
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
10
benchmarks/b5/mpi_5_10.sh
Normal file
10
benchmarks/b5/mpi_5_10.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J mpi
|
||||||
|
#SBATCH -o results/stdout_mpi_5_10.txt
|
||||||
|
#SBATCH -e results/stderr_mpi_5_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 5
|
||||||
|
#SBATCH --tasks-per-node=10
|
||||||
|
#SBATCH --nodelist=c2,c3,c4,c6,c7
|
||||||
|
|
||||||
|
mpirun ~/JacobiHPC/bin/jacobi_mpi
|
12
benchmarks/b5/omp_1.sh
Normal file
12
benchmarks/b5/omp_1.sh
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J omp
|
||||||
|
#SBATCH -o results/stdout_omp_1.txt
|
||||||
|
#SBATCH -e results/stderr_omp_1.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=1
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_omp
|
12
benchmarks/b5/omp_10.sh
Normal file
12
benchmarks/b5/omp_10.sh
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J omp
|
||||||
|
#SBATCH -o results/stdout_omp_10.txt
|
||||||
|
#SBATCH -e results/stderr_omp_10.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=10
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_omp
|
10
benchmarks/b5/sequential.sh
Normal file
10
benchmarks/b5/sequential.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J sequential
|
||||||
|
#SBATCH -o results/stdout_sequential.txt
|
||||||
|
#SBATCH -e results/stderr_sequential.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_sequential
|
10
benchmarks/b5/sse.sh
Normal file
10
benchmarks/b5/sse.sh
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -J sse
|
||||||
|
#SBATCH -o results/stdout_sse.txt
|
||||||
|
#SBATCH -e results/stderr_sse.txt
|
||||||
|
#SBATCH -t 00:30:00
|
||||||
|
#SBATCH -N 1
|
||||||
|
#SBATCH --tasks-per-node=1
|
||||||
|
#SBATCH --nodelist=c6
|
||||||
|
|
||||||
|
~/JacobiHPC/bin/jacobi_sse
|
4
build_cuda/.gitignore
vendored
Normal file
4
build_cuda/.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
# Ignore everything in this directory
|
||||||
|
*
|
||||||
|
# Except this file
|
||||||
|
!.gitignore
|
|
@ -11,6 +11,7 @@ typedef struct configuration {
|
||||||
float threshold;
|
float threshold;
|
||||||
} configuration;
|
} configuration;
|
||||||
|
|
||||||
|
|
||||||
int load_config(configuration *config) {
|
int load_config(configuration *config) {
|
||||||
char property[100];
|
char property[100];
|
||||||
char *value;
|
char *value;
|
||||||
|
|
79
src/config.cu
Normal file
79
src/config.cu
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "cuda_runtime.h"
|
||||||
|
#include "device_launch_parameters.h"
|
||||||
|
|
||||||
|
typedef struct configuration {
|
||||||
|
int n;
|
||||||
|
float north;
|
||||||
|
float east;
|
||||||
|
float south;
|
||||||
|
float west;
|
||||||
|
float init_value;
|
||||||
|
float threshold;
|
||||||
|
} configuration;
|
||||||
|
|
||||||
|
__host__ int old_load_config(configuration *config) {
|
||||||
|
config->n = 5;
|
||||||
|
config->north = 0.0;
|
||||||
|
config->east = 0.0;
|
||||||
|
config->west = 0.0;
|
||||||
|
config->south = 300.0;
|
||||||
|
config->init_value = 0.0;
|
||||||
|
config->threshold = 1.0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ int load_config(configuration *config) {
|
||||||
|
char property[100];
|
||||||
|
char *value;
|
||||||
|
FILE *fp;
|
||||||
|
|
||||||
|
fp = fopen("jacobi.conf", "r");
|
||||||
|
if (fp == NULL) {
|
||||||
|
perror("Error opening file jacobi.conf");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
while (fgets(property, 100, fp) != NULL) {
|
||||||
|
if (property[0] == '\n' || property[0] == '#') {
|
||||||
|
/* Skip empty lines and comments */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
value = strchr(property, ' ');
|
||||||
|
if (value == NULL) {
|
||||||
|
fclose(fp);
|
||||||
|
perror("Error reading file jacobi.conf");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
value[0] = '\0';
|
||||||
|
value += sizeof(char);
|
||||||
|
value[strlen(value) - 1] = '\0';
|
||||||
|
if (strcmp(property, "N") == 0) {
|
||||||
|
sscanf(value, "%d", &(config->n));
|
||||||
|
}
|
||||||
|
else if (strcmp(property, "NORTH") == 0) {
|
||||||
|
sscanf(value, "%f", &(config->north));
|
||||||
|
}
|
||||||
|
else if (strcmp(property, "EAST") == 0) {
|
||||||
|
sscanf(value, "%f", &(config->east));
|
||||||
|
}
|
||||||
|
else if (strcmp(property, "SOUTH") == 0) {
|
||||||
|
sscanf(value, "%f", &(config->south));
|
||||||
|
}
|
||||||
|
else if (strcmp(property, "WEST") == 0) {
|
||||||
|
sscanf(value, "%f", &(config->west));
|
||||||
|
}
|
||||||
|
else if (strcmp(property, "INIT_VALUE") == 0) {
|
||||||
|
sscanf(value, "%f", &(config->init_value));
|
||||||
|
}
|
||||||
|
else if (strcmp(property, "THRESHOLD") == 0) {
|
||||||
|
sscanf(value, "%f", &(config->threshold));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
printf("Unknown property %s\n", property);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
return 0;
|
||||||
|
}
|
14
src/config.cuh
Normal file
14
src/config.cuh
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
#include "cuda_runtime.h"
|
||||||
|
#include "device_launch_parameters.h"
|
||||||
|
|
||||||
|
typedef struct configuration {
|
||||||
|
int n;
|
||||||
|
float north;
|
||||||
|
float east;
|
||||||
|
float south;
|
||||||
|
float west;
|
||||||
|
float init_value;
|
||||||
|
float threshold;
|
||||||
|
} configuration;
|
||||||
|
|
||||||
|
__host__ int load_config(configuration *config);
|
118
src/impl/cuda.cu
Normal file
118
src/impl/cuda.cu
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
/*
|
||||||
|
* CUDA version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include "../config.cuh"
|
||||||
|
#include "../utils.cuh"
|
||||||
|
|
||||||
|
#include "cuda_runtime.h"
|
||||||
|
#include "device_launch_parameters.h"
|
||||||
|
|
||||||
|
#define THREADS_BLOCK 256;
|
||||||
|
|
||||||
|
__host__ void check_status(cudaError_t cuda_status, char *msg) {
|
||||||
|
if (cuda_status != cudaSuccess) {
|
||||||
|
fprintf(stderr, msg);
|
||||||
|
fprintf(stderr, ": ");
|
||||||
|
fprintf(stderr, cudaGetErrorString(cuda_status));
|
||||||
|
fprintf(stderr, " (error code: %d)\n", cuda_status);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void initialize_matrix_on_gpu(float *x, int n, float init_value, borders b, cudaError_t *cuda_status) {
|
||||||
|
int i, j;
|
||||||
|
int nb = n + 2;
|
||||||
|
|
||||||
|
/* Initialize borders */
|
||||||
|
for (i = 0; i < nb; i++) {
|
||||||
|
x[IDX(nb, 0, i)] = b.north;
|
||||||
|
x[IDX(nb, n + 1, i)] = b.south;
|
||||||
|
x[IDX(nb, i, 0)] = b.west;
|
||||||
|
x[IDX(nb, i, n + 1)] = b.east;
|
||||||
|
}
|
||||||
|
/* Initialize the rest of the matrix */
|
||||||
|
for (i = 1; i <= n; i++) {
|
||||||
|
for (j = 1; j <= n; j++) {
|
||||||
|
x[IDX(nb, i, j)] = init_value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void iterate(int n, float *x, float *new_x) {
|
||||||
|
int idx, nb;
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
nb = n + 2;
|
||||||
|
idx = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
i = idx / nb;
|
||||||
|
j = idx % nb;
|
||||||
|
if (i >= 1 && i <= n && j >= 1 && j <= n) {
|
||||||
|
new_x[idx] = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ float *compute_jacobi(int n, float init_value, float threshold, borders b, int *iterations) {
|
||||||
|
float *x, *new_x;
|
||||||
|
float *x_gpu, *new_x_gpu;
|
||||||
|
float *tmp_x;
|
||||||
|
float max_diff;
|
||||||
|
int i, j;
|
||||||
|
int nb = n + 2; // n plus the border
|
||||||
|
int blocks_number;
|
||||||
|
int threads_block = THREADS_BLOCK;
|
||||||
|
cudaError_t cuda_status;
|
||||||
|
|
||||||
|
// Select the GPU
|
||||||
|
check_status(cudaSetDevice(0), "cudaSetDevice failed!");
|
||||||
|
|
||||||
|
/* Create the matrixes on the GPU */
|
||||||
|
x_gpu = create_sa_matrix_on_gpu(nb, nb, &cuda_status);
|
||||||
|
check_status(cuda_status, "create_sa_matrix_on_gpu failed!");
|
||||||
|
new_x_gpu = create_sa_matrix_on_gpu(nb, nb, &cuda_status);
|
||||||
|
check_status(cuda_status, "create_sa_matrix_on_gpu failed!");
|
||||||
|
|
||||||
|
/* Initialize the matrixes */
|
||||||
|
initialize_matrix_on_gpu<<<1, 1>>>(x_gpu, n, init_value, b, &cuda_status);
|
||||||
|
check_status(cuda_status, "initialize_matrix_on_gpu failed!");
|
||||||
|
initialize_matrix_on_gpu<<<1, 1>>>(new_x_gpu, n, init_value, b, &cuda_status);
|
||||||
|
check_status(cuda_status, "initialize_matrix_on_gpu failed!");
|
||||||
|
|
||||||
|
/* Iterative refinement of x until values converge */
|
||||||
|
x = retrieve_sa_matrix_from_gpu(x_gpu, nb, nb, &cuda_status);
|
||||||
|
check_status(cuda_status, "retrieve_sa_matrix_from_gpu failed!");
|
||||||
|
|
||||||
|
blocks_number = nb / threads_block + 1;
|
||||||
|
*iterations = 0;
|
||||||
|
do {
|
||||||
|
iterate<<<blocks_number, threads_block>>>(n, x_gpu, new_x_gpu);
|
||||||
|
new_x = retrieve_sa_matrix_from_gpu(new_x_gpu, nb, nb, &cuda_status);
|
||||||
|
check_status(cuda_status, "retrieve_sa_matrix_from_gpu failed!");
|
||||||
|
max_diff = 0;
|
||||||
|
for (i = 1; i <= n; i++) {
|
||||||
|
for (j = 1; j <= n; j++) {
|
||||||
|
max_diff = fmaxf(max_diff, fabs(new_x[IDX(nb, i, j)] - x[IDX(nb, i, j)]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp_x = new_x;
|
||||||
|
new_x = x;
|
||||||
|
x = tmp_x;
|
||||||
|
|
||||||
|
tmp_x = new_x_gpu;
|
||||||
|
new_x_gpu = x_gpu;
|
||||||
|
x_gpu = tmp_x;
|
||||||
|
|
||||||
|
(*iterations)++;
|
||||||
|
} while (max_diff > threshold);
|
||||||
|
|
||||||
|
x = retrieve_sa_matrix_from_gpu(x_gpu, nb, nb, &cuda_status);
|
||||||
|
check_status(cuda_status, "retrieve_sa_matrix_from_gpu failed!");
|
||||||
|
|
||||||
|
destroy_sa_matrix_on_gpu(x_gpu);
|
||||||
|
destroy_sa_matrix_on_gpu(new_x_gpu);
|
||||||
|
|
||||||
|
return x;
|
||||||
|
}
|
|
@ -1,123 +0,0 @@
|
||||||
/*
|
|
||||||
* MPI version with the matrix subdivided by "lines".
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <mpi.h>
|
|
||||||
#include "../config.h"
|
|
||||||
#include "../utils.h"
|
|
||||||
|
|
||||||
#define TAG_BORDER 0
|
|
||||||
#define TAG_MATRIX 1
|
|
||||||
|
|
||||||
float *compute_jacobi(int rank, int numprocs, int n, float init_value, float threshold, borders b, int *iterations) {
|
|
||||||
float *complete_x;
|
|
||||||
float *x;
|
|
||||||
float *new_x;
|
|
||||||
float *tmp_x;
|
|
||||||
float max_diff, global_max_diff, new_value;
|
|
||||||
int i, j;
|
|
||||||
int nb = n + 2; // n plus the border
|
|
||||||
int rows, rows_to_transmit;
|
|
||||||
int receive_pos;
|
|
||||||
MPI_Request request_north;
|
|
||||||
MPI_Request request_south;
|
|
||||||
|
|
||||||
if (rank == 0) {
|
|
||||||
rows = n - (n / numprocs) * (numprocs - 1);
|
|
||||||
} else {
|
|
||||||
rows = n / numprocs;
|
|
||||||
}
|
|
||||||
LOG(printf("[Process %d/%d] rows: %d\n", rank, numprocs, rows));
|
|
||||||
/* LOG(printf("[Process %d/%d] initializing matrix\n", rank, numprocs)); */
|
|
||||||
/* Initialize the matrix */
|
|
||||||
x = create_sa_matrix(rows + 2, nb);
|
|
||||||
new_x = create_sa_matrix(rows + 2, nb);
|
|
||||||
for (i = 0; i < rows + 2; i++) {
|
|
||||||
for (j = 1; j <= n; j++) {
|
|
||||||
x[IDX(nb, i, j)] = init_value;
|
|
||||||
new_x[IDX(nb, i, j)] = init_value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Initialize boundary regions */
|
|
||||||
for (i = 0; i < rows + 2; i++) {
|
|
||||||
x[IDX(nb, i, 0)] = b.west;
|
|
||||||
x[IDX(nb, i, n + 1)] = b.east;
|
|
||||||
new_x[IDX(nb, i, 0)] = b.west;
|
|
||||||
new_x[IDX(nb, i, n + 1)] = b.east;
|
|
||||||
}
|
|
||||||
if (rank == 0) {
|
|
||||||
for (i = 1; i <= n + 1; i++) {
|
|
||||||
x[IDX(nb, 0, i)] = b.north;
|
|
||||||
new_x[IDX(nb, 0, i)] = b.north;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (rank == numprocs - 1){
|
|
||||||
for (i = 1; i < n + 1; i++) {
|
|
||||||
x[IDX(nb, rows + 1, i)] = b.south;
|
|
||||||
new_x[IDX(nb, rows + 1, i)] = b.south;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* LOG(printf("[Process %d/%d] matrix initialized\n", rank, numprocs)); */
|
|
||||||
/* Iterative refinement of x until values converge */
|
|
||||||
*iterations = 0;
|
|
||||||
do {
|
|
||||||
if (rank != numprocs - 1) {
|
|
||||||
// Send south border
|
|
||||||
MPI_Isend(&x[IDX(nb, rows, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD, &request_south);
|
|
||||||
}
|
|
||||||
if (rank != 0) {
|
|
||||||
// Send north border
|
|
||||||
MPI_Isend(&x[IDX(nb, 1, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD, &request_north);
|
|
||||||
}
|
|
||||||
max_diff = 0;
|
|
||||||
global_max_diff = 0;
|
|
||||||
for (i = 1; i <= rows; i++) {
|
|
||||||
for (j = 1; j <= n; j++) {
|
|
||||||
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
|
||||||
max_diff = fmaxf(max_diff, fabs(new_value - x[IDX(nb, i, j)]));
|
|
||||||
new_x[IDX(nb, i, j)] = new_value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tmp_x = new_x;
|
|
||||||
new_x = x;
|
|
||||||
x = tmp_x;
|
|
||||||
if (rank != numprocs - 1) {
|
|
||||||
// Receive south border
|
|
||||||
MPI_Recv(&x[IDX(nb, rows + 1, 0)], nb, MPI_FLOAT, rank + 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
|
||||||
}
|
|
||||||
if (rank != 0) {
|
|
||||||
// Receive north border
|
|
||||||
MPI_Recv(&x[IDX(nb, 0, 0)], nb, MPI_FLOAT, rank - 1, TAG_BORDER, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
|
||||||
}
|
|
||||||
LOG(printf("[Process %d/%d] max_diff: %f\n", rank, numprocs, max_diff));
|
|
||||||
MPI_Allreduce(&max_diff, &global_max_diff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
|
|
||||||
/* LOG(printf("[Process %d/%d] global_max_diff: %f\n", rank, numprocs, global_max_diff)); */
|
|
||||||
(*iterations)++;
|
|
||||||
} while (global_max_diff > threshold);
|
|
||||||
|
|
||||||
if (rank == 0) {
|
|
||||||
complete_x = create_sa_matrix(nb, nb);
|
|
||||||
memcpy(complete_x, x, (rows + ((rank == numprocs - 1) ? 2 : 1)) * (nb) * sizeof(float));
|
|
||||||
rows_to_transmit = n / numprocs;
|
|
||||||
receive_pos = rows + 1;
|
|
||||||
for (i = 1; i < numprocs; i++) {
|
|
||||||
if (i == numprocs - 1) {
|
|
||||||
rows_to_transmit++;
|
|
||||||
}
|
|
||||||
MPI_Recv(&complete_x[IDX(nb, receive_pos, 0)], rows_to_transmit * (nb), MPI_FLOAT, i, TAG_MATRIX, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
|
||||||
receive_pos += n / numprocs;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
complete_x = NULL;
|
|
||||||
rows_to_transmit = rows;
|
|
||||||
if (rank == numprocs - 1) {
|
|
||||||
rows_to_transmit++;
|
|
||||||
}
|
|
||||||
MPI_Send(&x[IDX(nb, 1, 0)], rows_to_transmit * (nb), MPI_FLOAT, 0, TAG_MATRIX, MPI_COMM_WORLD);
|
|
||||||
}
|
|
||||||
|
|
||||||
return complete_x;
|
|
||||||
}
|
|
52
src/main/main.cu
Normal file
52
src/main/main.cu
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
|
||||||
|
#include "cuda_runtime.h"
|
||||||
|
#include "device_launch_parameters.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
|
||||||
|
#include "../config.cuh"
|
||||||
|
#include "../utils.cuh"
|
||||||
|
|
||||||
|
__host__ float *compute_jacobi(int n, float init_value, float threshold, borders b, int *iterations);
|
||||||
|
|
||||||
|
__host__ int main(int argc, char* argv[]) {
|
||||||
|
int n;
|
||||||
|
float init_value, threshold;
|
||||||
|
borders b;
|
||||||
|
int config_loaded;
|
||||||
|
configuration config;
|
||||||
|
float *x;
|
||||||
|
int iterations;
|
||||||
|
struct timeval start, end;
|
||||||
|
long secs_used, micros_used;
|
||||||
|
|
||||||
|
config_loaded = load_config(&config);
|
||||||
|
if (config_loaded != 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
n = config.n;
|
||||||
|
threshold = config.threshold;
|
||||||
|
init_value = config.init_value;
|
||||||
|
b.north = config.north;
|
||||||
|
b.south = config.south;
|
||||||
|
b.east = config.east;
|
||||||
|
b.west = config.west;
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
x = compute_jacobi(n, init_value, threshold, b, &iterations);
|
||||||
|
gettimeofday(&end, NULL);
|
||||||
|
|
||||||
|
secs_used = (end.tv_sec - start.tv_sec);
|
||||||
|
micros_used = ((secs_used * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||||
|
printf("Wall clock time: %fs\n", (float)micros_used / 1000000);
|
||||||
|
printf("Iterations: N/A\n", iterations);
|
||||||
|
if (n < 10) {
|
||||||
|
print_sa_matrix(x, n + 2, n + 2);
|
||||||
|
}
|
||||||
|
destroy_sa_matrix(x);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -54,3 +54,12 @@ void print_matrix(float **x, int rows, int cols) {
|
||||||
}
|
}
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
float fmaxf(float a, float b) {
|
||||||
|
if (a > b) {
|
||||||
|
return a;
|
||||||
|
} else {
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
79
src/utils.cu
Normal file
79
src/utils.cu
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "utils.cuh"
|
||||||
|
|
||||||
|
__host__ float *create_sa_matrix(int rows, int cols) {
|
||||||
|
float *x;
|
||||||
|
|
||||||
|
x = (float *)malloc(rows * cols * sizeof(float));
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ float *create_sa_matrix_on_gpu(int rows, int cols, cudaError_t *cuda_status) {
|
||||||
|
float *x = NULL;
|
||||||
|
|
||||||
|
*cuda_status = cudaMalloc((void**)&x, rows * cols * sizeof(float));
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ void destroy_sa_matrix(float *x) {
|
||||||
|
free(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ void destroy_sa_matrix_on_gpu(float *x) {
|
||||||
|
cudaFree(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ void initialize_matrix_on_gpu(float *x, int rows, int cols, cudaError_t *cuda_status) {
|
||||||
|
*cuda_status = cudaMemset(x, 0, rows * cols * sizeof(float));
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ float *retrieve_sa_matrix_from_gpu(float *x, int rows, int cols, cudaError_t *cuda_status) {
|
||||||
|
float *x_host;
|
||||||
|
|
||||||
|
x_host = create_sa_matrix(rows, cols);
|
||||||
|
*cuda_status = cudaMemcpy(x_host, x, rows * cols * sizeof(float), cudaMemcpyDeviceToHost);
|
||||||
|
return x_host;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ void print_sa_matrix(float *x, int rows, int cols) {
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < rows; i++) {
|
||||||
|
for (j = 0; j < cols; j++) {
|
||||||
|
printf("%f\t", x[IDX(cols, i, j)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ float **create_matrix(int rows, int cols) {
|
||||||
|
int i;
|
||||||
|
float **x;
|
||||||
|
|
||||||
|
x = (float **)malloc(rows * sizeof(float));
|
||||||
|
for (i = 0; i < rows; i++) {
|
||||||
|
x[i] = (float *)malloc(cols * sizeof(float));
|
||||||
|
}
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ void destroy_matrix(float **x, int rows) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < rows; i++) {
|
||||||
|
free(x[i]);
|
||||||
|
}
|
||||||
|
free(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ void print_matrix(float **x, int rows, int cols) {
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < rows; i++) {
|
||||||
|
for (j = 0; j < cols; j++) {
|
||||||
|
printf("%f\t", x[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
56
src/utils.cuh
Normal file
56
src/utils.cuh
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "cuda_runtime.h"
|
||||||
|
#include "device_launch_parameters.h"
|
||||||
|
|
||||||
|
/* #define ENABLE_LOG */
|
||||||
|
|
||||||
|
#ifdef ENABLE_LOG
|
||||||
|
# define LOG(x) x
|
||||||
|
#else
|
||||||
|
# define LOG(x) (void) 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Macro used with single array matrices to
|
||||||
|
* get the array index given the number of columns,
|
||||||
|
* the row index and the column index.
|
||||||
|
*/
|
||||||
|
#define IDX(cols, r, c) ((r) * (cols) + (c))
|
||||||
|
|
||||||
|
typedef struct borders {
|
||||||
|
float north;
|
||||||
|
float east;
|
||||||
|
float south;
|
||||||
|
float west;
|
||||||
|
} borders;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a matrix stored in a single array.
|
||||||
|
*/
|
||||||
|
__host__ float *create_sa_matrix(int rows, int cols);
|
||||||
|
|
||||||
|
__host__ float *create_sa_matrix_on_gpu(int rows, int cols, cudaError_t *cuda_status);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Destroy a single array matrix.
|
||||||
|
*/
|
||||||
|
__host__ void destroy_sa_matrix(float *x);
|
||||||
|
|
||||||
|
__host__ void destroy_sa_matrix_on_gpu(float *x);
|
||||||
|
|
||||||
|
__host__ void initialize_matrix_on_gpu(float *x, int rows, int cols, cudaError_t *cuda_status);
|
||||||
|
|
||||||
|
__host__ float *retrieve_sa_matrix_from_gpu(float *x, int rows, int cols, cudaError_t *cuda_status);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Print a single array matrix.
|
||||||
|
*/
|
||||||
|
__host__ void print_sa_matrix(float *x, int rows, int cols);
|
||||||
|
|
||||||
|
__host__ float **create_matrix(int rows, int cols);
|
||||||
|
__host__ void destroy_matrix(float **x, int rows);
|
||||||
|
__host__ void print_matrix(float **x, int rows, int cols);
|
|
@ -44,3 +44,5 @@ void print_sa_matrix(float *x, int rows, int cols);
|
||||||
float **create_matrix(int rows, int cols);
|
float **create_matrix(int rows, int cols);
|
||||||
void destroy_matrix(float **x, int rows);
|
void destroy_matrix(float **x, int rows);
|
||||||
void print_matrix(float **x, int rows, int cols);
|
void print_matrix(float **x, int rows, int cols);
|
||||||
|
|
||||||
|
float fmaxf(float a, float b);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user