JacobiHPC/src/utils.cu

#include <stdio.h>
#include <stdlib.h>
#include "utils.cuh"

__host__ float *create_sa_matrix(int rows, int cols) {
	float *x;

	x = (float *)malloc(rows * cols * sizeof(float));
	return x;
}

__host__ float *create_sa_matrix_on_gpu(int rows, int cols, cudaError_t *cuda_status) {
	float *x = NULL;

	*cuda_status = cudaMalloc((void**)&x, rows * cols * sizeof(float));
	return x;
}

__host__ void destroy_sa_matrix(float *x) {
	free(x);
}

__host__ void destroy_sa_matrix_on_gpu(float *x) {
	cudaFree(x);
}

__host__ void initialize_matrix_on_gpu(float *x, int rows, int cols, cudaError_t *cuda_status) {
	*cuda_status = cudaMemset(x, 0, rows * cols * sizeof(float));
}

__host__ float *retrieve_sa_matrix_from_gpu(float *x, int rows, int cols, cudaError_t *cuda_status) {
	float *x_host;

	x_host = create_sa_matrix(rows, cols);
	*cuda_status = cudaMemcpy(x_host, x, rows * cols * sizeof(float), cudaMemcpyDeviceToHost);
	return x_host;
}

__host__ void print_sa_matrix(float *x, int rows, int cols) {
	int i, j;
	for (i = 0; i < rows; i++) {
		for (j = 0; j < cols; j++) {
			printf("%f\t", x[IDX(cols, i, j)]);
		}
		printf("\n");
	}
	fflush(stdout);
}

__host__ float **create_matrix(int rows, int cols) {
	int i;
	float **x;

	x = (float **)malloc(rows * sizeof(float));
	for (i = 0; i < rows; i++) {
		x[i] = (float *)malloc(cols * sizeof(float));
	}
	return x;
}

__host__ void destroy_matrix(float **x, int rows) {
	int i;

	for (i = 0; i < rows; i++) {
		free(x[i]);
	}
	free(x);
}

__host__ void print_matrix(float **x, int rows, int cols) {
	int i, j;
	for (i = 0; i < rows; i++) {
		for (j = 0; j < cols; j++) {
			printf("%f\t", x[i][j]);
		}
		printf("\n");
	}
	fflush(stdout);
}