SSE: diffs with vector instructions
This commit is contained in:
parent
ef2e49659d
commit
4f80d09da9
|
@ -13,10 +13,11 @@ float *compute_jacobi(int n, float init_value, float threshold, borders b, int *
|
||||||
float *new_x;
|
float *new_x;
|
||||||
float *tmp_x;
|
float *tmp_x;
|
||||||
float max_diff, new_value;
|
float max_diff, new_value;
|
||||||
int i, j;
|
int i, j, k;
|
||||||
int nb = n + 2; // n plus the border
|
int nb = n + 2; // n plus the border
|
||||||
int n_mult = (n / 4) * 4;
|
int n_mult = (n / 4) * 4;
|
||||||
__m128 new_value_vec, tmp_vec;
|
__m128 new_value_vec, tmp_vec, diff_vec;
|
||||||
|
float diffs[] = {0.0, 0.0, 0.0, 0.0};
|
||||||
/* Initialize boundary regions */
|
/* Initialize boundary regions */
|
||||||
x = create_sa_matrix(nb, nb);
|
x = create_sa_matrix(nb, nb);
|
||||||
new_x = create_sa_matrix(nb, nb);
|
new_x = create_sa_matrix(nb, nb);
|
||||||
|
@ -52,10 +53,14 @@ float *compute_jacobi(int n, float init_value, float threshold, borders b, int *
|
||||||
tmp_vec = _mm_set1_ps(0.25);
|
tmp_vec = _mm_set1_ps(0.25);
|
||||||
new_value_vec = _mm_mul_ps(new_value_vec, tmp_vec);
|
new_value_vec = _mm_mul_ps(new_value_vec, tmp_vec);
|
||||||
_mm_storeu_ps(&new_x[IDX(nb, i, j)], new_value_vec);
|
_mm_storeu_ps(&new_x[IDX(nb, i, j)], new_value_vec);
|
||||||
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j)] - x[IDX(nb, i, j)]));
|
|
||||||
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 1)] - x[IDX(nb, i, j + 1)]));
|
diff_vec = _mm_loadu_ps(&new_x[IDX(nb, i, j)]);
|
||||||
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 2)] - x[IDX(nb, i, j + 2)]));
|
tmp_vec = _mm_loadu_ps(&x[IDX(nb, i, j)]);
|
||||||
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 3)] - x[IDX(nb, i, j + 3)]));
|
diff_vec = _mm_sub_ps(diff_vec, tmp_vec);
|
||||||
|
_mm_storeu_ps(diffs, diff_vec);
|
||||||
|
for (k = 0; k < 4; k++) {
|
||||||
|
max_diff = (float) fmax(max_diff, fabs(diffs[k]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (j = n_mult; j <= n; j++) {
|
for (j = n_mult; j <= n; j++) {
|
||||||
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user