Hi!

lastprivate or conditional lastprivate could be modified either in the input
phase, or in the scan phase (but not both), and as we don't really know in
which one it is, we need to copy the value from the first simd into simd
lanes of the second simd.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-07-06  Jakub Jelinek  <ja...@redhat.com>

        * omp-low.c (lower_rec_input_clauses): For lastprivate clauses in
        ctx->for_simd_scan_phase simd copy the outer var to the privatized
        variable(s).  For conditional lastprivate look through outer
        GIMPLE_OMP_SCAN context.
        (lower_omp_1): For conditional lastprivate look through outer
        GIMPLE_OMP_SCAN context.

        * testsuite/libgomp.c/scan-19.c: New test.
        * testsuite/libgomp.c/scan-20.c: New test.

--- gcc/omp-low.c.jj    2019-07-06 16:48:02.373495843 +0200
+++ gcc/omp-low.c       2019-07-06 18:36:32.268367658 +0200
@@ -5006,6 +5006,17 @@ lower_rec_input_clauses (tree clauses, g
                            lower_omp (&tseq, ctx->outer);
                          gimple_seq_add_seq (&llist[1], tseq);
                        }
+                     if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
+                         && ctx->for_simd_scan_phase)
+                       {
+                         x = unshare_expr (ivar);
+                         tree orig_v
+                           = build_outer_var_ref (var, ctx,
+                                                  OMP_CLAUSE_LASTPRIVATE);
+                         x = lang_hooks.decls.omp_clause_assign_op (c, x,
+                                                                    orig_v);
+                         gimplify_and_add (x, &llist[0]);
+                       }
                      if (y)
                        {
                          y = lang_hooks.decls.omp_clause_dtor (c, ivar);
@@ -5035,6 +5046,16 @@ lower_rec_input_clauses (tree clauses, g
                }
              if (nx)
                gimplify_and_add (nx, ilist);
+             if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
+                 && is_simd
+                 && ctx->for_simd_scan_phase)
+               {
+                 tree orig_v = build_outer_var_ref (var, ctx,
+                                                    OMP_CLAUSE_LASTPRIVATE);
+                 x = lang_hooks.decls.omp_clause_assign_op (c, new_var,
+                                                            orig_v);
+                 gimplify_and_add (x, ilist);
+               }
              /* FALLTHRU */
 
            do_dtor:
@@ -5709,11 +5730,12 @@ lower_rec_input_clauses (tree clauses, g
                    && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
                  {
                    tree o = lookup_decl (OMP_CLAUSE_DECL (c), ctx);
-                   tree *v
-                     = ctx->lastprivate_conditional_map->get (o);
-                   tree po = lookup_decl (OMP_CLAUSE_DECL (c), ctx->outer);
-                   tree *pv
-                     = ctx->outer->lastprivate_conditional_map->get (po);
+                   omp_context *outer = ctx->outer;
+                   if (gimple_code (outer->stmt) == GIMPLE_OMP_SCAN)
+                     outer = outer->outer;
+                   tree *v = ctx->lastprivate_conditional_map->get (o);
+                   tree po = lookup_decl (OMP_CLAUSE_DECL (c), outer);
+                   tree *pv = outer->lastprivate_conditional_map->get (po);
                    *v = *pv;
                  }
            }
@@ -12421,7 +12443,11 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p
              {
                tree clauses;
                if (up->combined_into_simd_safelen1)
-                 up = up->outer;
+                 {
+                   up = up->outer;
+                   if (gimple_code (up->stmt) == GIMPLE_OMP_SCAN)
+                     up = up->outer;
+                 }
                if (gimple_code (up->stmt) == GIMPLE_OMP_FOR)
                  clauses = gimple_omp_for_clauses (up->stmt);
                else
--- libgomp/testsuite/libgomp.c/scan-19.c.jj    2019-07-06 11:12:15.284732446 
+0200
+++ libgomp/testsuite/libgomp.c/scan-19.c       2019-07-06 19:23:18.189268880 
+0200
@@ -0,0 +1,119 @@
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { 
target sse2_runtime } } } */
+
+extern void abort (void);
+int r, a[1024], b[1024], x, y, z;
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+  #pragma omp for simd reduction (inscan, +:r) lastprivate (conditional: z) 
firstprivate (x) private (y)
+  for (int i = 0; i < 1024; i++)
+    {
+      { y = a[i]; r += y + x + 12; }
+      #pragma omp scan inclusive(r)
+      { b[i] = r; if ((i & 1) == 0 && i < 937) z = r; }
+    }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+  int s = 0;
+  #pragma omp parallel
+  #pragma omp for simd reduction (inscan, +:s) firstprivate (x) private (y) 
lastprivate (z)
+  for (int i = 0; i < 1024; i++)
+    {
+      { y = 2 * a[i]; s += y; z = y; }
+      #pragma omp scan inclusive(s)
+      { y = s; b[i] = y + x + 12; }
+    }
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+  #pragma omp parallel for simd reduction (inscan, +:r) firstprivate (x) 
lastprivate (x) if (simd: 0)
+  for (int i = 0; i < 1024; i++)
+    {
+      { r += a[i]; if (i == 1023) x = 29; }
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+  int s = 0;
+  #pragma omp parallel for simd simdlen (1) reduction (inscan, +:s) 
lastprivate (conditional: x, y)
+  for (int i = 0; i < 1024; i++)
+    {
+      { s += 2 * a[i]; if ((a[i] & 1) == 1 && i < 825) x = a[i]; }
+      #pragma omp scan inclusive(s)
+      { b[i] = s; if ((a[i] & 1) == 0 && i < 829) y = a[i]; }
+    }
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+  x = -12;
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  #pragma omp parallel
+  foo (a, b);
+  if (r != 1024 * 1023 / 2 || x != -12 || z != b[936])
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = 25;
+    }
+  if (bar () != 1024 * 1023 || x != -12 || z != 2 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = -1;
+    }
+  r = 0;
+  baz (a, b);
+  if (r != 1024 * 1023 / 2 || x != 29)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = -25;
+    }
+  if (qux () != 1024 * 1023 || x != 823 || y != 828)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s)
+       abort ();
+    }
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/scan-20.c.jj    2019-07-06 11:12:25.213572759 
+0200
+++ libgomp/testsuite/libgomp.c/scan-20.c       2019-07-06 19:23:35.237005367 
+0200
@@ -0,0 +1,119 @@
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { 
target sse2_runtime } } } */
+
+extern void abort (void);
+int r, a[1024], b[1024], x, y, z;
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+  #pragma omp for simd reduction (inscan, +:r) lastprivate (conditional: z) 
firstprivate (x) private (y) simdlen(1)
+  for (int i = 0; i < 1024; i++)
+    {
+      { b[i] = r; if ((i & 1) == 0 && i < 937) z = r; }
+      #pragma omp scan exclusive(r)
+      { y = a[i]; r += y + x + 12; }
+    }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+  int s = 0;
+  #pragma omp parallel
+  #pragma omp for simd reduction (inscan, +:s) firstprivate (x) private (y) 
lastprivate (z) if (0)
+  for (int i = 0; i < 1024; i++)
+    {
+      { y = s; b[i] = y + x + 12; }
+      #pragma omp scan exclusive(s)
+      { y = 2 * a[i]; s += y; z = y; }
+    }
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+  #pragma omp parallel for simd reduction (inscan, +:r) firstprivate (x) 
lastprivate (x)
+  for (int i = 0; i < 1024; i++)
+    {
+      b[i] = r;
+      #pragma omp scan exclusive(r)
+      { r += a[i]; if (i == 1023) x = 29; }
+    }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+  int s = 0;
+  #pragma omp parallel for simd reduction (inscan, +:s) lastprivate 
(conditional: x, y)
+  for (int i = 0; i < 1024; i++)
+    {
+      { b[i] = s; if ((a[i] & 1) == 0 && i < 829) y = a[i]; }
+      #pragma omp scan exclusive(s)
+      { s += 2 * a[i]; if ((a[i] & 1) == 1 && i < 825) x = a[i]; }
+    }
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+  x = -12;
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  #pragma omp parallel
+  foo (a, b);
+  if (r != 1024 * 1023 / 2 || x != -12 || z != b[936])
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = 25;
+      s += i;
+    }
+  if (bar () != 1024 * 1023 || x != -12 || z != 2 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = -1;
+      s += 2 * i;
+    }
+  r = 0;
+  baz (a, b);
+  if (r != 1024 * 1023 / 2 || x != 29)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = -25;
+      s += i;
+    }
+  if (qux () != 1024 * 1023 || x != 823 || y != 828)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (b[i] != s)
+       abort ();
+      s += 2 * i;
+    }
+  return 0;
+}

        Jakub

Reply via email to