Tejas Belagod <[email protected]> writes:
> Fix udr-sve.c target test that to check for the correct results based on the
> OpenMP clauses used. The test was first written with a misunderstood
> functionality of the reduction clause.
>
> Tested with aarch64-linux-gnu. OK for trunk?
>
> libgomp/ChangeLog:
>
> * testsuite/libgomp.c-target/aarch64/udr-sve.c: Fix test.
Thanks for the update. OK if Jakub has no further comments by Monday.
Richard
> ---
> .../libgomp.c-target/aarch64/udr-sve.c | 58 +++++++++++++++----
> 1 file changed, 47 insertions(+), 11 deletions(-)
>
> diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
> b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
> index 03d93cc44b2..02e02dc04b6 100644
> --- a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
> +++ b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
> @@ -9,8 +9,8 @@
> void __attribute__ ((noipa))
> parallel_reduction ()
> {
> - int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
> - int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
> + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
> + int b[8] = {0, 0, 0, 0, 0, 0, 0, 0};
> svint32_t va = svld1_s32 (svptrue_b32 (), b);
> int i = 0;
> int64_t res;
> @@ -30,8 +30,8 @@ parallel_reduction ()
> void __attribute__ ((noipa))
> for_reduction ()
> {
> - int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
> - int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
> + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
> + int b[8] = {0, 0, 0, 0, 0, 0, 0, 0};
> svint32_t va = svld1_s32 (svptrue_b32 (), b);
> int j;
> int64_t res;
> @@ -58,13 +58,13 @@ simd_reduction ()
> for (j = 0; j < 8; j++)
> a[j] = 1;
>
> - #pragma omp simd reduction (+:va, i)
> + #pragma omp simd reduction (+:va)
> for (j = 0; j < 16; j++)
> - va = svld1_s32 (svptrue_b32 (), a);
> + va += svld1_s32 (svptrue_b32 (), a);
>
> res = svaddv_s32 (svptrue_b32 (), va);
>
> - if (res != 8)
> + if (res != 128)
> __builtin_abort ();
> }
>
> @@ -72,22 +72,57 @@ void __attribute__ ((noipa))
> inscan_reduction_incl ()
> {
> svint32_t va = svindex_s32 (0, 0);
> + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
> + int b[64] = { 0 };
> int j;
> int64_t res = 0;
>
> - #pragma omp parallel
> - #pragma omp for reduction (inscan,+:va) firstprivate (res) lastprivate
> (res)
> + #pragma omp parallel for reduction (inscan, +:va)
> for (j = 0; j < 8; j++)
> {
> - va = svindex_s32 (1, 0);
> + va += svld1_s32 (svptrue_b32 (), a);
> #pragma omp scan inclusive (va)
> - res += svaddv_s32 (svptrue_b32 (), va);
> + svst1_s32 (svptrue_b32 (), b + j * 8, va);
> + }
> +
> + res = svaddv_s32 (svptrue_b32 (), va);
> +
> + if (res != 64)
> + __builtin_abort ();
> +
> + for (j = 0; j < 64; j+=8)
> + if (b[j] != (j / 8 + 1))
> + __builtin_abort ();
> +}
> +
> +void __attribute__ ((noipa))
> +inscan_reduction_excl ()
> +{
> + svint32_t va = svindex_s32 (0, 0);
> + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
> + int b[64] = { 0 };
> + int j;
> + int64_t res = 0;
> +
> + #pragma omp parallel for reduction (inscan, +:va)
> + for (j = 0; j < 8; j++)
> + {
> + svst1_s32 (svptrue_b32 (), b + j * 8, va);
> + #pragma omp scan exclusive (va)
> + va += svld1_s32 (svptrue_b32 (), a);
> }
>
> + res = svaddv_s32 (svptrue_b32 (), va);
> +
> if (res != 64)
> __builtin_abort ();
> +
> + for (j = 0; j < 64; j+=8)
> + if (b[j] != j / 8)
> + __builtin_abort ();
> }
>
> +
> int
> main ()
> {
> @@ -95,4 +130,5 @@ main ()
> for_reduction ();
> simd_reduction ();
> inscan_reduction_incl ();
> + inscan_reduction_excl ();
> }