80 lines
1.7 KiB
Plaintext
80 lines
1.7 KiB
Plaintext
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include "utils.cuh"
|
|
|
|
__host__ float *create_sa_matrix(int rows, int cols) {
|
|
float *x;
|
|
|
|
x = (float *)malloc(rows * cols * sizeof(float));
|
|
return x;
|
|
}
|
|
|
|
__host__ float *create_sa_matrix_on_gpu(int rows, int cols, cudaError_t *cuda_status) {
|
|
float *x = NULL;
|
|
|
|
*cuda_status = cudaMalloc((void**)&x, rows * cols * sizeof(float));
|
|
return x;
|
|
}
|
|
|
|
__host__ void destroy_sa_matrix(float *x) {
|
|
free(x);
|
|
}
|
|
|
|
__host__ void destroy_sa_matrix_on_gpu(float *x) {
|
|
cudaFree(x);
|
|
}
|
|
|
|
__host__ void initialize_matrix_on_gpu(float *x, int rows, int cols, cudaError_t *cuda_status) {
|
|
*cuda_status = cudaMemset(x, 0, rows * cols * sizeof(float));
|
|
}
|
|
|
|
__host__ float *retrieve_sa_matrix_from_gpu(float *x, int rows, int cols, cudaError_t *cuda_status) {
|
|
float *x_host;
|
|
|
|
x_host = create_sa_matrix(rows, cols);
|
|
*cuda_status = cudaMemcpy(x_host, x, rows * cols * sizeof(float), cudaMemcpyDeviceToHost);
|
|
return x_host;
|
|
}
|
|
|
|
__host__ void print_sa_matrix(float *x, int rows, int cols) {
|
|
int i, j;
|
|
for (i = 0; i < rows; i++) {
|
|
for (j = 0; j < cols; j++) {
|
|
printf("%f\t", x[IDX(cols, i, j)]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
fflush(stdout);
|
|
}
|
|
|
|
__host__ float **create_matrix(int rows, int cols) {
|
|
int i;
|
|
float **x;
|
|
|
|
x = (float **)malloc(rows * sizeof(float));
|
|
for (i = 0; i < rows; i++) {
|
|
x[i] = (float *)malloc(cols * sizeof(float));
|
|
}
|
|
return x;
|
|
}
|
|
|
|
__host__ void destroy_matrix(float **x, int rows) {
|
|
int i;
|
|
|
|
for (i = 0; i < rows; i++) {
|
|
free(x[i]);
|
|
}
|
|
free(x);
|
|
}
|
|
|
|
__host__ void print_matrix(float **x, int rows, int cols) {
|
|
int i, j;
|
|
for (i = 0; i < rows; i++) {
|
|
for (j = 0; j < cols; j++) {
|
|
printf("%f\t", x[i][j]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
fflush(stdout);
|
|
}
|