Hi! lastprivate or conditional lastprivate could be modified either in the input phase, or in the scan phase (but not both), and as we don't really know in which one it is, we need to copy the value from the first simd into simd lanes of the second simd.
Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2019-07-06 Jakub Jelinek <ja...@redhat.com> * omp-low.c (lower_rec_input_clauses): For lastprivate clauses in ctx->for_simd_scan_phase simd copy the outer var to the privatized variable(s). For conditional lastprivate look through outer GIMPLE_OMP_SCAN context. (lower_omp_1): For conditional lastprivate look through outer GIMPLE_OMP_SCAN context. * testsuite/libgomp.c/scan-19.c: New test. * testsuite/libgomp.c/scan-20.c: New test. --- gcc/omp-low.c.jj 2019-07-06 16:48:02.373495843 +0200 +++ gcc/omp-low.c 2019-07-06 18:36:32.268367658 +0200 @@ -5006,6 +5006,17 @@ lower_rec_input_clauses (tree clauses, g lower_omp (&tseq, ctx->outer); gimple_seq_add_seq (&llist[1], tseq); } + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE + && ctx->for_simd_scan_phase) + { + x = unshare_expr (ivar); + tree orig_v + = build_outer_var_ref (var, ctx, + OMP_CLAUSE_LASTPRIVATE); + x = lang_hooks.decls.omp_clause_assign_op (c, x, + orig_v); + gimplify_and_add (x, &llist[0]); + } if (y) { y = lang_hooks.decls.omp_clause_dtor (c, ivar); @@ -5035,6 +5046,16 @@ lower_rec_input_clauses (tree clauses, g } if (nx) gimplify_and_add (nx, ilist); + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE + && is_simd + && ctx->for_simd_scan_phase) + { + tree orig_v = build_outer_var_ref (var, ctx, + OMP_CLAUSE_LASTPRIVATE); + x = lang_hooks.decls.omp_clause_assign_op (c, new_var, + orig_v); + gimplify_and_add (x, ilist); + } /* FALLTHRU */ do_dtor: @@ -5709,11 +5730,12 @@ lower_rec_input_clauses (tree clauses, g && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c)) { tree o = lookup_decl (OMP_CLAUSE_DECL (c), ctx); - tree *v - = ctx->lastprivate_conditional_map->get (o); - tree po = lookup_decl (OMP_CLAUSE_DECL (c), ctx->outer); - tree *pv - = ctx->outer->lastprivate_conditional_map->get (po); + omp_context *outer = ctx->outer; + if (gimple_code (outer->stmt) == GIMPLE_OMP_SCAN) + outer = outer->outer; + tree *v = ctx->lastprivate_conditional_map->get (o); + tree po = lookup_decl (OMP_CLAUSE_DECL (c), outer); + tree *pv = outer->lastprivate_conditional_map->get (po); *v = *pv; } } @@ -12421,7 +12443,11 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p { tree clauses; if (up->combined_into_simd_safelen1) - up = up->outer; + { + up = up->outer; + if (gimple_code (up->stmt) == GIMPLE_OMP_SCAN) + up = up->outer; + } if (gimple_code (up->stmt) == GIMPLE_OMP_FOR) clauses = gimple_omp_for_clauses (up->stmt); else --- libgomp/testsuite/libgomp.c/scan-19.c.jj 2019-07-06 11:12:15.284732446 +0200 +++ libgomp/testsuite/libgomp.c/scan-19.c 2019-07-06 19:23:18.189268880 +0200 @@ -0,0 +1,119 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } } */ + +extern void abort (void); +int r, a[1024], b[1024], x, y, z; + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp for simd reduction (inscan, +:r) lastprivate (conditional: z) firstprivate (x) private (y) + for (int i = 0; i < 1024; i++) + { + { y = a[i]; r += y + x + 12; } + #pragma omp scan inclusive(r) + { b[i] = r; if ((i & 1) == 0 && i < 937) z = r; } + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp parallel + #pragma omp for simd reduction (inscan, +:s) firstprivate (x) private (y) lastprivate (z) + for (int i = 0; i < 1024; i++) + { + { y = 2 * a[i]; s += y; z = y; } + #pragma omp scan inclusive(s) + { y = s; b[i] = y + x + 12; } + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp parallel for simd reduction (inscan, +:r) firstprivate (x) lastprivate (x) if (simd: 0) + for (int i = 0; i < 1024; i++) + { + { r += a[i]; if (i == 1023) x = 29; } + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp parallel for simd simdlen (1) reduction (inscan, +:s) lastprivate (conditional: x, y) + for (int i = 0; i < 1024; i++) + { + { s += 2 * a[i]; if ((a[i] & 1) == 1 && i < 825) x = a[i]; } + #pragma omp scan inclusive(s) + { b[i] = s; if ((a[i] & 1) == 0 && i < 829) y = a[i]; } + } + return s; +} + +int +main () +{ + int s = 0; + x = -12; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r != 1024 * 1023 / 2 || x != -12 || z != b[936]) + abort (); + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = 25; + } + if (bar () != 1024 * 1023 || x != -12 || z != 2 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + else + b[i] = -1; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2 || x != 29) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = -25; + } + if (qux () != 1024 * 1023 || x != 823 || y != 828) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + } + return 0; +} --- libgomp/testsuite/libgomp.c/scan-20.c.jj 2019-07-06 11:12:25.213572759 +0200 +++ libgomp/testsuite/libgomp.c/scan-20.c 2019-07-06 19:23:35.237005367 +0200 @@ -0,0 +1,119 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } } */ + +extern void abort (void); +int r, a[1024], b[1024], x, y, z; + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp for simd reduction (inscan, +:r) lastprivate (conditional: z) firstprivate (x) private (y) simdlen(1) + for (int i = 0; i < 1024; i++) + { + { b[i] = r; if ((i & 1) == 0 && i < 937) z = r; } + #pragma omp scan exclusive(r) + { y = a[i]; r += y + x + 12; } + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp parallel + #pragma omp for simd reduction (inscan, +:s) firstprivate (x) private (y) lastprivate (z) if (0) + for (int i = 0; i < 1024; i++) + { + { y = s; b[i] = y + x + 12; } + #pragma omp scan exclusive(s) + { y = 2 * a[i]; s += y; z = y; } + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp parallel for simd reduction (inscan, +:r) firstprivate (x) lastprivate (x) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + { r += a[i]; if (i == 1023) x = 29; } + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp parallel for simd reduction (inscan, +:s) lastprivate (conditional: x, y) + for (int i = 0; i < 1024; i++) + { + { b[i] = s; if ((a[i] & 1) == 0 && i < 829) y = a[i]; } + #pragma omp scan exclusive(s) + { s += 2 * a[i]; if ((a[i] & 1) == 1 && i < 825) x = a[i]; } + } + return s; +} + +int +main () +{ + int s = 0; + x = -12; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r != 1024 * 1023 / 2 || x != -12 || z != b[936]) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023 || x != -12 || z != 2 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2 || x != 29) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023 || x != 823 || y != 828) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} Jakub