Tejas Belagod <tejas.bela...@arm.com> writes: > Fix udr-sve.c target test that to check for the correct results based on the > OpenMP clauses used. The test was first written with a misunderstood > functionality of the reduction clause. > > Tested with aarch64-linux-gnu. OK for trunk? > > libgomp/ChangeLog: > > * testsuite/libgomp.c-target/aarch64/udr-sve.c: Fix test.
Thanks for the update. OK if Jakub has no further comments by Monday. Richard > --- > .../libgomp.c-target/aarch64/udr-sve.c | 58 +++++++++++++++---- > 1 file changed, 47 insertions(+), 11 deletions(-) > > diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c > b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c > index 03d93cc44b2..02e02dc04b6 100644 > --- a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c > +++ b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c > @@ -9,8 +9,8 @@ > void __attribute__ ((noipa)) > parallel_reduction () > { > - int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1}; > - int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0}; > + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; > + int b[8] = {0, 0, 0, 0, 0, 0, 0, 0}; > svint32_t va = svld1_s32 (svptrue_b32 (), b); > int i = 0; > int64_t res; > @@ -30,8 +30,8 @@ parallel_reduction () > void __attribute__ ((noipa)) > for_reduction () > { > - int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1}; > - int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0}; > + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; > + int b[8] = {0, 0, 0, 0, 0, 0, 0, 0}; > svint32_t va = svld1_s32 (svptrue_b32 (), b); > int j; > int64_t res; > @@ -58,13 +58,13 @@ simd_reduction () > for (j = 0; j < 8; j++) > a[j] = 1; > > - #pragma omp simd reduction (+:va, i) > + #pragma omp simd reduction (+:va) > for (j = 0; j < 16; j++) > - va = svld1_s32 (svptrue_b32 (), a); > + va += svld1_s32 (svptrue_b32 (), a); > > res = svaddv_s32 (svptrue_b32 (), va); > > - if (res != 8) > + if (res != 128) > __builtin_abort (); > } > > @@ -72,22 +72,57 @@ void __attribute__ ((noipa)) > inscan_reduction_incl () > { > svint32_t va = svindex_s32 (0, 0); > + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; > + int b[64] = { 0 }; > int j; > int64_t res = 0; > > - #pragma omp parallel > - #pragma omp for reduction (inscan,+:va) firstprivate (res) lastprivate > (res) > + #pragma omp parallel for reduction (inscan, +:va) > for (j = 0; j < 8; j++) > { > - va = svindex_s32 (1, 0); > + va += svld1_s32 (svptrue_b32 (), a); > #pragma omp scan inclusive (va) > - res += svaddv_s32 (svptrue_b32 (), va); > + svst1_s32 (svptrue_b32 (), b + j * 8, va); > + } > + > + res = svaddv_s32 (svptrue_b32 (), va); > + > + if (res != 64) > + __builtin_abort (); > + > + for (j = 0; j < 64; j+=8) > + if (b[j] != (j / 8 + 1)) > + __builtin_abort (); > +} > + > +void __attribute__ ((noipa)) > +inscan_reduction_excl () > +{ > + svint32_t va = svindex_s32 (0, 0); > + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; > + int b[64] = { 0 }; > + int j; > + int64_t res = 0; > + > + #pragma omp parallel for reduction (inscan, +:va) > + for (j = 0; j < 8; j++) > + { > + svst1_s32 (svptrue_b32 (), b + j * 8, va); > + #pragma omp scan exclusive (va) > + va += svld1_s32 (svptrue_b32 (), a); > } > > + res = svaddv_s32 (svptrue_b32 (), va); > + > if (res != 64) > __builtin_abort (); > + > + for (j = 0; j < 64; j+=8) > + if (b[j] != j / 8) > + __builtin_abort (); > } > > + > int > main () > { > @@ -95,4 +130,5 @@ main () > for_reduction (); > simd_reduction (); > inscan_reduction_incl (); > + inscan_reduction_excl (); > }