JacobiHPC/src/utils.cu

80 lines
1.7 KiB
Plaintext

#include <stdio.h>
#include <stdlib.h>
#include "utils.cuh"
__host__ float *create_sa_matrix(int rows, int cols) {
float *x;
x = (float *)malloc(rows * cols * sizeof(float));
return x;
}
__host__ float *create_sa_matrix_on_gpu(int rows, int cols, cudaError_t *cuda_status) {
float *x = NULL;
*cuda_status = cudaMalloc((void**)&x, rows * cols * sizeof(float));
return x;
}
__host__ void destroy_sa_matrix(float *x) {
free(x);
}
__host__ void destroy_sa_matrix_on_gpu(float *x) {
cudaFree(x);
}
__host__ void initialize_matrix_on_gpu(float *x, int rows, int cols, cudaError_t *cuda_status) {
*cuda_status = cudaMemset(x, 0, rows * cols * sizeof(float));
}
__host__ float *retrieve_sa_matrix_from_gpu(float *x, int rows, int cols, cudaError_t *cuda_status) {
float *x_host;
x_host = create_sa_matrix(rows, cols);
*cuda_status = cudaMemcpy(x_host, x, rows * cols * sizeof(float), cudaMemcpyDeviceToHost);
return x_host;
}
__host__ void print_sa_matrix(float *x, int rows, int cols) {
int i, j;
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
printf("%f\t", x[IDX(cols, i, j)]);
}
printf("\n");
}
fflush(stdout);
}
__host__ float **create_matrix(int rows, int cols) {
int i;
float **x;
x = (float **)malloc(rows * sizeof(float));
for (i = 0; i < rows; i++) {
x[i] = (float *)malloc(cols * sizeof(float));
}
return x;
}
__host__ void destroy_matrix(float **x, int rows) {
int i;
for (i = 0; i < rows; i++) {
free(x[i]);
}
free(x);
}
__host__ void print_matrix(float **x, int rows, int cols) {
int i, j;
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
printf("%f\t", x[i][j]);
}
printf("\n");
}
fflush(stdout);
}