SSE: diffs with vector instructions
This commit is contained in:
parent
ef2e49659d
commit
4f80d09da9
|
@ -13,10 +13,11 @@ float *compute_jacobi(int n, float init_value, float threshold, borders b, int *
|
|||
float *new_x;
|
||||
float *tmp_x;
|
||||
float max_diff, new_value;
|
||||
int i, j;
|
||||
int i, j, k;
|
||||
int nb = n + 2; // n plus the border
|
||||
int n_mult = (n / 4) * 4;
|
||||
__m128 new_value_vec, tmp_vec;
|
||||
__m128 new_value_vec, tmp_vec, diff_vec;
|
||||
float diffs[] = {0.0, 0.0, 0.0, 0.0};
|
||||
/* Initialize boundary regions */
|
||||
x = create_sa_matrix(nb, nb);
|
||||
new_x = create_sa_matrix(nb, nb);
|
||||
|
@ -52,10 +53,14 @@ float *compute_jacobi(int n, float init_value, float threshold, borders b, int *
|
|||
tmp_vec = _mm_set1_ps(0.25);
|
||||
new_value_vec = _mm_mul_ps(new_value_vec, tmp_vec);
|
||||
_mm_storeu_ps(&new_x[IDX(nb, i, j)], new_value_vec);
|
||||
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j)] - x[IDX(nb, i, j)]));
|
||||
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 1)] - x[IDX(nb, i, j + 1)]));
|
||||
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 2)] - x[IDX(nb, i, j + 2)]));
|
||||
max_diff = (float) fmax(max_diff, fabs(new_x[IDX(nb, i, j + 3)] - x[IDX(nb, i, j + 3)]));
|
||||
|
||||
diff_vec = _mm_loadu_ps(&new_x[IDX(nb, i, j)]);
|
||||
tmp_vec = _mm_loadu_ps(&x[IDX(nb, i, j)]);
|
||||
diff_vec = _mm_sub_ps(diff_vec, tmp_vec);
|
||||
_mm_storeu_ps(diffs, diff_vec);
|
||||
for (k = 0; k < 4; k++) {
|
||||
max_diff = (float) fmax(max_diff, fabs(diffs[k]));
|
||||
}
|
||||
}
|
||||
for (j = n_mult; j <= n; j++) {
|
||||
new_value = 0.25 * (x[IDX(nb, i - 1, j)] + x[IDX(nb, i, j + 1)] + x[IDX(nb, i + 1, j)] + x[IDX(nb, i, j - 1)]);
|
||||
|
|
Loading…
Reference in New Issue
Block a user