SSE: diffs with vector instructions

This commit is contained in:
Fabio Salvini 2016-12-13 18:52:49 +01:00
parent ef2e49659d
commit 4f80d09da9

View File

@ -13,10 +13,11 @@ float *compute_jacobi(int n, float init_value, float threshold, borders b, int *
float *new_x;
float *tmp_x;
float max_diff, new_value;
int i, j;
int i, j, k;
int nb = n + 2; // n plus the border
int n_mult = (n / 4) * 4;
__m128 new_value_vec, tmp_vec;
__m128 new_value_vec, tmp_vec, diff_vec;
float diffs[] = {0.0, 0.0, 0.0, 0.0};
/* Initialize boundary regions */
x = create_sa_matrix(nb, nb);
new_x = create_sa_matrix(nb, nb);
@ -52,10 +53,14 @@ float *compute_jacobi(int n, float init_value, float threshold, borders b, int *
tmp_vec = _mm_set1_ps(0.25);
new_value_vec = _mm_mul_ps(new_value_vec, tmp_vec);
_mm_storeu_ps(&new_x[IDX(nb, i, j)], new_value_vec);
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j)] - x[IDX(nb, i, j)]));
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 1)] - x[IDX(nb, i, j + 1)]));
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 2)] - x[IDX(nb, i, j + 2)]));
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 3)] - x[IDX(nb, i, j + 3)]));
diff_vec = _mm_loadu_ps(&new_x[IDX(nb, i, j)]);
tmp_vec = _mm_loadu_ps(&x[IDX(nb, i, j)]);
diff_vec = _mm_sub_ps(diff_vec, tmp_vec);
_mm_storeu_ps(diffs, diff_vec);
for (k = 0; k < 4; k++) {
max_diff = (float) fmax(max_diff, fabs(diffs[k]));
}
}
for (j = n_mult; j <= n; j++) {
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);