On Wed, 3 Jul 2019 at 07:11, Jakub Jelinek <ja...@redhat.com> wrote:
>
> Hi!
>
> The following patch implements roughly the
> https://gcc.gnu.org/ml/gcc-patches/2019-06/msg01330.html
> design for worksharing loops (so far not for composite for simd, that will
> be the next larger task).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.
>
> 2019-07-03  Jakub Jelinek  <ja...@redhat.com>
>
>         * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_
>         clause.
>         * tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__SCANTEMP_ instead of
>         OMP_CLAUSE__CONDTEMP_ as range's upper bound.
>         (OMP_CLAUSE__SCANTEMP__ALLOC, OMP_CLAUSE__SCANTEMP__CONTROL): Define.
>         * tree.c (omp_clause_num_ops, omp_clause_code_name): Add
>         OMP_CLAUSE__SCANTEMP_ entry.
>         (walk_tree_1): Handle OMP_CLAUSE__SCANTEMP_.
>         * tree-pretty-print.c (dump_omp_clause): Likewise.
>         * tree-nested.c (convert_nonlocal_omp_clauses,
>         convert_local_omp_clauses): Likewise.
>         * omp-general.h (struct omp_for_data): Add have_scantemp and
>         have_nonctrl_scantemp members.
>         * omp-general.c (omp_extract_for_data): Initialize them.
>         * omp-low.c (struct omp_context): Add scan_exclusive member.
>         (scan_omp_1_stmt): Don't unnecessarily mask gimple_omp_for_kind
>         result again with GF_OMP_FOR_KIND_MASK.  Initialize also
>         ctx->scan_exclusive.
>         (lower_rec_simd_input_clauses): Use ctx->scan_exclusive instead
>         of !ctx->scan_inclusive.
>         (lower_rec_input_clauses): Simplify gimplification of dtors using
>         gimplify_and_add.  For non-is_simd test OMP_CLAUSE_REDUCTION_INSCAN
>         rather than rvarp.  Handle OMP_CLAUSE_REDUCTION_INSCAN in worksharing
>         loops.  Don't add barrier for reduction_omp_orig_ref if
>         ctx->scan_??xclusive.
>         (lower_reduction_clauses): Don't do anything for ctx->scan_??xclusive.
>         (lower_omp_scan): Use ctx->scan_exclusive instead
>         of !ctx->scan_inclusive.  Handle worksharing loops with inscan
>         reductions.  Use new_vard != new_var instead of repeated
>         omp_is_reference calls.
>         (omp_find_scan, lower_omp_for_scan): New functions.
>         (lower_omp_for): Call lower_omp_for_scan for worksharing loops with
>         inscan reductions.
>         * omp-expand.c (expand_omp_scantemp_alloc): New function.
>         (expand_omp_for_static_nochunk): Handle fd->have_nonctrl_scantemp
>         and fd->have_scantemp.
>
>         * c-c++-common/gomp/scan-3.c (f1): Don't expect a sorry message.
>         * c-c++-common/gomp/scan-5.c (foo): Likewise.
>

Hi Jakub,

This patch leads to new failures on arm:
FAIL: c-c++-common/gomp/scan-3.c (internal compiler error)
FAIL: c-c++-common/gomp/scan-5.c (internal compiler error)


The logs say:
spawn -ignore SIGHUP
/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/gcc/xgcc
-B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/gcc/
/gcc/testsuite/c-c++-common/gomp/scan-3.c -fno-diagnostics-show-caret
-fno-diagnostics-show-line-numbers -fdiagnostics-color=never -fopenmp
-Wno-hsa -S -o scan-3.s
during RTL pass: expand
/gcc/testsuite/c-c++-common/gomp/scan-3.c: In function 'f1':
/gcc/testsuite/c-c++-common/gomp/scan-3.c:4:1: internal compiler
error: tree check: expected ssa_name, have var_decl in single_imm_use,
at ssa-iterators.h:421
0x5b43ad tree_check_failed(tree_node const*, char const*, int, char const*, ...)
        /gcc/tree.c:9902
0x9c18c5 tree_check(tree_node const*, char const*, int, char const*, tree_code)
        /gcc/tree.h:3473
0x9c18c5 single_imm_use
        /gcc/ssa-iterators.h:421
0x9c18c5 expand_mul_overflow
        /gcc/internal-fn.c:1590
0x9c209b expand_arith_overflow
        /gcc/internal-fn.c:2318
0x75f807 expand_call_stmt
        /gcc/cfgexpand.c:2638
0x75f807 expand_gimple_stmt_1
        /gcc/cfgexpand.c:3708
0x75f807 expand_gimple_stmt
        /gcc/cfgexpand.c:3867
0x765c73 expand_gimple_basic_block
        /gcc/cfgexpand.c:5907
0x7682b6 execute
        /gcc/cfgexpand.c:6530
Please submit a full bug report,


Christophe


>         * testsuite/libgomp.c++/scan-1.C: New test.
>         * testsuite/libgomp.c++/scan-2.C: New test.
>         * testsuite/libgomp.c++/scan-3.C: New test.
>         * testsuite/libgomp.c++/scan-4.C: New test.
>         * testsuite/libgomp.c++/scan-5.C: New test.
>         * testsuite/libgomp.c++/scan-6.C: New test.
>         * testsuite/libgomp.c++/scan-7.C: New test.
>         * testsuite/libgomp.c++/scan-8.C: New test.
>         * testsuite/libgomp.c/scan-1.c: New test.
>         * testsuite/libgomp.c/scan-2.c: New test.
>         * testsuite/libgomp.c/scan-3.c: New test.
>         * testsuite/libgomp.c/scan-4.c: New test.
>         * testsuite/libgomp.c/scan-5.c: New test.
>         * testsuite/libgomp.c/scan-6.c: New test.
>         * testsuite/libgomp.c/scan-7.c: New test.
>         * testsuite/libgomp.c/scan-8.c: New test.
>
> --- gcc/tree-core.h.jj  2019-07-02 12:57:30.283555122 +0200
> +++ gcc/tree-core.h     2019-07-02 13:08:01.387672125 +0200
> @@ -352,6 +352,9 @@ enum omp_clause_code {
>    /* Internal clause: temporary for lastprivate(conditional:).  */
>    OMP_CLAUSE__CONDTEMP_,
>
> +  /* Internal clause: temporary for inscan reductions.  */
> +  OMP_CLAUSE__SCANTEMP_,
> +
>    /* OpenACC/OpenMP clause: if (scalar-expression).  */
>    OMP_CLAUSE_IF,
>
> --- gcc/tree.h.jj       2019-07-02 12:57:30.284555106 +0200
> +++ gcc/tree.h  2019-07-02 13:08:01.385672156 +0200
> @@ -1449,7 +1449,7 @@ class auto_suppress_location_wrappers
>  #define OMP_CLAUSE_DECL(NODE)                                          \
>    OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \
>                                               OMP_CLAUSE_PRIVATE,       \
> -                                             OMP_CLAUSE__CONDTEMP_), 0)
> +                                             OMP_CLAUSE__SCANTEMP_), 0)
>  #define OMP_CLAUSE_HAS_LOCATION(NODE) \
>    (LOCATION_LOCUS ((OMP_CLAUSE_CHECK (NODE))->omp_clause.locus)              
>   \
>    != UNKNOWN_LOCATION)
> @@ -1761,6 +1761,17 @@ class auto_suppress_location_wrappers
>  #define OMP_CLAUSE__CONDTEMP__ITER(NODE) \
>    (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__CONDTEMP_)->base.public_flag)
>
> +/* _SCANTEMP_ holding temporary with pointer to thread's local array;
> +   allocation.  */
> +#define OMP_CLAUSE__SCANTEMP__ALLOC(NODE) \
> +  (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_)->base.public_flag)
> +
> +/* _SCANTEMP_ holding temporary with a control variable for deallocation;
> +   one boolean_type_node for test whether alloca was used, another one
> +   to pass to __builtin_stack_restore or free.  */
> +#define OMP_CLAUSE__SCANTEMP__CONTROL(NODE) \
> +  TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_))
> +
>  /* SSA_NAME accessors.  */
>
>  /* Whether SSA_NAME NODE is a virtual operand.  This simply caches the
> --- gcc/tree.c.jj       2019-07-02 12:57:30.284555106 +0200
> +++ gcc/tree.c  2019-07-02 13:08:01.382672202 +0200
> @@ -311,6 +311,7 @@ unsigned const char omp_clause_num_ops[]
>    1, /* OMP_CLAUSE__LOOPTEMP_  */
>    1, /* OMP_CLAUSE__REDUCTEMP_  */
>    1, /* OMP_CLAUSE__CONDTEMP_  */
> +  1, /* OMP_CLAUSE__SCANTEMP_  */
>    1, /* OMP_CLAUSE_IF  */
>    1, /* OMP_CLAUSE_NUM_THREADS  */
>    1, /* OMP_CLAUSE_SCHEDULE  */
> @@ -391,6 +392,7 @@ const char * const omp_clause_code_name[
>    "_looptemp_",
>    "_reductemp_",
>    "_condtemp_",
> +  "_scantemp_",
>    "if",
>    "num_threads",
>    "schedule",
> @@ -12316,6 +12318,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func
>         case OMP_CLAUSE__LOOPTEMP_:
>         case OMP_CLAUSE__REDUCTEMP_:
>         case OMP_CLAUSE__CONDTEMP_:
> +       case OMP_CLAUSE__SCANTEMP_:
>         case OMP_CLAUSE__SIMDUID_:
>           WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0));
>           /* FALLTHRU */
> --- gcc/tree-pretty-print.c.jj  2019-07-02 12:57:30.409553135 +0200
> +++ gcc/tree-pretty-print.c     2019-07-02 13:08:01.383672187 +0200
> @@ -483,6 +483,9 @@ dump_omp_clause (pretty_printer *pp, tre
>      case OMP_CLAUSE__CONDTEMP_:
>        name = "_condtemp_";
>        goto print_remap;
> +    case OMP_CLAUSE__SCANTEMP_:
> +      name = "_scantemp_";
> +      goto print_remap;
>      case OMP_CLAUSE_TO_DECLARE:
>        name = "to";
>        goto print_remap;
> --- gcc/tree-nested.c.jj        2019-07-02 12:57:30.282555137 +0200
> +++ gcc/tree-nested.c   2019-07-02 13:08:01.386672141 +0200
> @@ -1349,6 +1349,7 @@ convert_nonlocal_omp_clauses (tree *pcla
>         case OMP_CLAUSE_IF_PRESENT:
>         case OMP_CLAUSE_FINALIZE:
>         case OMP_CLAUSE__CONDTEMP_:
> +       case OMP_CLAUSE__SCANTEMP_:
>           break;
>
>           /* The following clause belongs to the OpenACC cache directive, 
> which
> @@ -2078,6 +2079,7 @@ convert_local_omp_clauses (tree *pclause
>         case OMP_CLAUSE_IF_PRESENT:
>         case OMP_CLAUSE_FINALIZE:
>         case OMP_CLAUSE__CONDTEMP_:
> +       case OMP_CLAUSE__SCANTEMP_:
>           break;
>
>           /* The following clause belongs to the OpenACC cache directive, 
> which
> --- gcc/omp-general.h.jj        2019-07-02 12:57:30.282555137 +0200
> +++ gcc/omp-general.h   2019-07-02 13:08:01.387672125 +0200
> @@ -63,7 +63,7 @@ struct omp_for_data
>    int collapse;  /* Collapsed loops, 1 for a non-collapsed loop.  */
>    int ordered;
>    bool have_nowait, have_ordered, simd_schedule, have_reductemp;
> -  bool have_pointer_condtemp;
> +  bool have_pointer_condtemp, have_scantemp, have_nonctrl_scantemp;
>    int lastprivate_conditional;
>    unsigned char sched_modifiers;
>    enum omp_clause_schedule_kind sched_kind;
> --- gcc/omp-general.c.jj        2019-07-02 12:57:30.282555137 +0200
> +++ gcc/omp-general.c   2019-07-02 13:08:01.385672156 +0200
> @@ -169,6 +169,8 @@ omp_extract_for_data (gomp_for *for_stmt
>    fd->have_ordered = false;
>    fd->have_reductemp = false;
>    fd->have_pointer_condtemp = false;
> +  fd->have_scantemp = false;
> +  fd->have_nonctrl_scantemp = false;
>    fd->lastprivate_conditional = 0;
>    fd->tiling = NULL_TREE;
>    fd->collapse = 1;
> @@ -231,6 +233,12 @@ omp_extract_for_data (gomp_for *for_stmt
>         if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t))))
>           fd->have_pointer_condtemp = true;
>         break;
> +      case OMP_CLAUSE__SCANTEMP_:
> +       fd->have_scantemp = true;
> +       if (!OMP_CLAUSE__SCANTEMP__ALLOC (t)
> +           && !OMP_CLAUSE__SCANTEMP__CONTROL (t))
> +         fd->have_nonctrl_scantemp = true;
> +       break;
>        default:
>         break;
>        }
> --- gcc/omp-low.c.jj    2019-07-02 12:57:30.285555090 +0200
> +++ gcc/omp-low.c       2019-07-02 16:37:26.229374596 +0200
> @@ -144,6 +144,9 @@ struct omp_context
>
>    /* True if there is nested scan context with inclusive clause.  */
>    bool scan_inclusive;
> +
> +  /* True if there is nested scan context with exclusive clause.  */
> +  bool scan_exclusive;
>  };
>
>  static splay_tree all_contexts;
> @@ -3316,8 +3319,8 @@ scan_omp_1_stmt (gimple_stmt_iterator *g
>        break;
>
>      case GIMPLE_OMP_FOR:
> -      if (((gimple_omp_for_kind (as_a <gomp_for *> (stmt))
> -           & GF_OMP_FOR_KIND_MASK) == GF_OMP_FOR_KIND_SIMD)
> +      if ((gimple_omp_for_kind (as_a <gomp_for *> (stmt))
> +          == GF_OMP_FOR_KIND_SIMD)
>           && omp_maybe_offloaded_ctx (ctx)
>           && omp_max_simt_vf ())
>         scan_omp_simd (gsi, as_a <gomp_for *> (stmt), ctx);
> @@ -3335,8 +3338,12 @@ scan_omp_1_stmt (gimple_stmt_iterator *g
>
>      case GIMPLE_OMP_SCAN:
>        if (tree clauses = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
> -       if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE)
> -         ctx->scan_inclusive = true;
> +       {
> +         if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE)
> +           ctx->scan_inclusive = true;
> +         else if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_EXCLUSIVE)
> +           ctx->scan_exclusive = true;
> +       }
>        /* FALLTHRU */
>      case GIMPLE_OMP_SECTION:
>      case GIMPLE_OMP_MASTER:
> @@ -3769,7 +3776,7 @@ lower_rec_simd_input_clauses (tree new_v
>                           sctx->lastlane, NULL_TREE, NULL_TREE);
>           TREE_THIS_NOTRAP (*rvar) = 1;
>
> -         if (!ctx->scan_inclusive)
> +         if (ctx->scan_exclusive)
>             {
>               /* And for exclusive scan yet another one, which will
>                  hold the value during the scan phase.  */
> @@ -3854,7 +3861,7 @@ static void
>  lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
>                          omp_context *ctx, struct omp_for_data *fd)
>  {
> -  tree c, dtor, copyin_seq, x, ptr;
> +  tree c, copyin_seq, x, ptr;
>    bool copyin_by_ref = false;
>    bool lastprivate_firstprivate = false;
>    bool reduction_omp_orig_ref = false;
> @@ -4541,12 +4548,7 @@ lower_rec_input_clauses (tree clauses, g
>                       x = lang_hooks.decls.omp_clause_dtor
>                                                 (c, build_simple_mem_ref 
> (y2));
>                       if (x)
> -                       {
> -                         gimple_seq tseq = NULL;
> -                         dtor = x;
> -                         gimplify_stmt (&dtor, &tseq);
> -                         gimple_seq_add_seq (dlist, tseq);
> -                       }
> +                       gimplify_and_add (x, dlist);
>                     }
>                 }
>               else
> @@ -4913,13 +4915,7 @@ lower_rec_input_clauses (tree clauses, g
>                         {
>                           y = lang_hooks.decls.omp_clause_dtor (c, ivar);
>                           if (y)
> -                           {
> -                             gimple_seq tseq = NULL;
> -
> -                             dtor = y;
> -                             gimplify_stmt (&dtor, &tseq);
> -                             gimple_seq_add_seq (&llist[1], tseq);
> -                           }
> +                           gimplify_and_add (y, &llist[1]);
>                         }
>                       break;
>                     }
> @@ -4949,13 +4945,7 @@ lower_rec_input_clauses (tree clauses, g
>             do_dtor:
>               x = lang_hooks.decls.omp_clause_dtor (c, new_var);
>               if (x)
> -               {
> -                 gimple_seq tseq = NULL;
> -
> -                 dtor = x;
> -                 gimplify_stmt (&dtor, &tseq);
> -                 gimple_seq_add_seq (dlist, tseq);
> -               }
> +               gimplify_and_add (x, dlist);
>               break;
>
>             case OMP_CLAUSE_LINEAR:
> @@ -5103,13 +5093,7 @@ lower_rec_input_clauses (tree clauses, g
>                       gimplify_and_add (x, &llist[0]);
>                       x = lang_hooks.decls.omp_clause_dtor (c, ivar);
>                       if (x)
> -                       {
> -                         gimple_seq tseq = NULL;
> -
> -                         dtor = x;
> -                         gimplify_stmt (&dtor, &tseq);
> -                         gimple_seq_add_seq (&llist[1], tseq);
> -                       }
> +                       gimplify_and_add (x, &llist[1]);
>                       break;
>                     }
>                   if (omp_is_reference (var))
> @@ -5282,12 +5266,7 @@ lower_rec_input_clauses (tree clauses, g
>
>                               x = lang_hooks.decls.omp_clause_dtor (c, nv);
>                               if (x)
> -                               {
> -                                 tseq = NULL;
> -                                 dtor = x;
> -                                 gimplify_stmt (&dtor, &tseq);
> -                                 gimple_seq_add_seq (dlist, tseq);
> -                               }
> +                               gimplify_and_add (x, dlist);
>                             }
>
>                           tree ref = build_outer_var_ref (var, ctx);
> @@ -5310,34 +5289,19 @@ lower_rec_input_clauses (tree clauses, g
>
>                           x = lang_hooks.decls.omp_clause_dtor (c, ivar);
>                           if (x)
> -                           {
> -                             tseq = NULL;
> -                             dtor = x;
> -                             gimplify_stmt (&dtor, &tseq);
> -                             gimple_seq_add_seq (&llist[1], tseq);
> -                           }
> +                           gimplify_and_add (x, &llist[1]);
>
>                           tree ivar2 = unshare_expr (lvar);
>                           TREE_OPERAND (ivar2, 1) = sctx.idx;
>                           x = lang_hooks.decls.omp_clause_dtor (c, ivar2);
>                           if (x)
> -                           {
> -                             tseq = NULL;
> -                             dtor = x;
> -                             gimplify_stmt (&dtor, &tseq);
> -                             gimple_seq_add_seq (&llist[1], tseq);
> -                           }
> +                           gimplify_and_add (x, &llist[1]);
>
>                           if (rvar2)
>                             {
>                               x = lang_hooks.decls.omp_clause_dtor (c, rvar2);
>                               if (x)
> -                               {
> -                                 tseq = NULL;
> -                                 dtor = x;
> -                                 gimplify_stmt (&dtor, &tseq);
> -                                 gimple_seq_add_seq (&llist[1], tseq);
> -                               }
> +                               gimplify_and_add (x, &llist[1]);
>                             }
>                           break;
>                         }
> @@ -5362,12 +5326,7 @@ lower_rec_input_clauses (tree clauses, g
>                                              build_fold_addr_expr (lvar));
>                       x = lang_hooks.decls.omp_clause_dtor (c, ivar);
>                       if (x)
> -                       {
> -                         tseq = NULL;
> -                         dtor = x;
> -                         gimplify_stmt (&dtor, &tseq);
> -                         gimple_seq_add_seq (&llist[1], tseq);
> -                       }
> +                       gimplify_and_add (x, &llist[1]);
>                       break;
>                     }
>                   /* If this is a reference to constant size reduction var
> @@ -5409,16 +5368,19 @@ lower_rec_input_clauses (tree clauses, g
>                   if (x)
>                     gimplify_and_add (x, ilist);
>
> -                 if (rvarp)
> +                 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
> +                     && OMP_CLAUSE_REDUCTION_INSCAN (c))
>                     {
> -                     if (x)
> +                     if (x || (!is_simd
> +                               && OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)))
>                         {
>                           tree nv = create_tmp_var_raw (TREE_TYPE (new_var));
>                           gimple_add_tmp_var (nv);
>                           ctx->cb.decl_map->put (new_vard, nv);
>                           x = lang_hooks.decls.omp_clause_default_ctor
>                                 (c, nv, build_outer_var_ref (var, ctx));
> -                         gimplify_and_add (x, ilist);
> +                         if (x)
> +                           gimplify_and_add (x, ilist);
>                           if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
>                             {
>                               tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
> @@ -5433,7 +5395,7 @@ lower_rec_input_clauses (tree clauses, g
>                               gimple_seq_add_seq (ilist, tseq);
>                             }
>                           OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
> -                         if (!ctx->scan_inclusive)
> +                         if (is_simd && ctx->scan_exclusive)
>                             {
>                               tree nv2
>                                 = create_tmp_var_raw (TREE_TYPE (new_var));
> @@ -5444,23 +5406,14 @@ lower_rec_input_clauses (tree clauses, g
>                               gimplify_and_add (x, ilist);
>                               x = lang_hooks.decls.omp_clause_dtor (c, nv2);
>                               if (x)
> -                               {
> -                                 tseq = NULL;
> -                                 dtor = x;
> -                                 gimplify_stmt (&dtor, &tseq);
> -                                 gimple_seq_add_seq (dlist, tseq);
> -                               }
> +                               gimplify_and_add (x, dlist);
>                             }
>                           x = lang_hooks.decls.omp_clause_dtor (c, nv);
>                           if (x)
> -                           {
> -                             tseq = NULL;
> -                             dtor = x;
> -                             gimplify_stmt (&dtor, &tseq);
> -                             gimple_seq_add_seq (dlist, tseq);
> -                           }
> +                           gimplify_and_add (x, dlist);
>                         }
> -                     else if (!ctx->scan_inclusive
> +                     else if (is_simd
> +                              && ctx->scan_exclusive
>                                && TREE_ADDRESSABLE (TREE_TYPE (new_var)))
>                         {
>                           tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var));
> @@ -5468,12 +5421,7 @@ lower_rec_input_clauses (tree clauses, g
>                           ctx->cb.decl_map->put (new_vard, nv2);
>                           x = lang_hooks.decls.omp_clause_dtor (c, nv2);
>                           if (x)
> -                           {
> -                             tseq = NULL;
> -                             dtor = x;
> -                             gimplify_stmt (&dtor, &tseq);
> -                             gimple_seq_add_seq (dlist, tseq);
> -                           }
> +                           gimplify_and_add (x, dlist);
>                         }
>                       DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
>                       goto do_dtor;
> @@ -5611,7 +5559,8 @@ lower_rec_input_clauses (tree clauses, g
>                     {
>                       if (omp_is_reference (var) && is_simd)
>                         handle_simd_reference (clause_loc, new_vard, ilist);
> -                     if (rvarp)
> +                     if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
> +                         && OMP_CLAUSE_REDUCTION_INSCAN (c))
>                         break;
>                       gimplify_assign (new_var, x, ilist);
>                       if (is_simd)
> @@ -5815,7 +5764,10 @@ lower_rec_input_clauses (tree clauses, g
>       lastprivate clauses we need to ensure the lastprivate copying
>       happens after firstprivate copying in all threads.  And similarly
>       for UDRs if initializer expression refers to omp_orig.  */
> -  if (copyin_by_ref || lastprivate_firstprivate || reduction_omp_orig_ref)
> +  if (copyin_by_ref || lastprivate_firstprivate
> +      || (reduction_omp_orig_ref
> +         && !ctx->scan_inclusive
> +         && !ctx->scan_exclusive))
>      {
>        /* Don't add any barrier for #pragma omp simd or
>          #pragma omp distribute.  */
> @@ -6464,6 +6416,10 @@ lower_reduction_clauses (tree clauses, g
>        && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
>      return;
>
> +  /* inscan reductions are handled elsewhere.  */
> +  if (ctx->scan_inclusive || ctx->scan_exclusive)
> +    return;
> +
>    /* First see if there is exactly one reduction clause.  Use OMP_ATOMIC
>       update in that case, otherwise use a lock.  */
>    for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
> @@ -8650,7 +8606,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
>    gimple_seq before = NULL;
>    omp_context *octx = ctx->outer;
>    gcc_assert (octx);
> -  if (!octx->scan_inclusive && !has_clauses)
> +  if (octx->scan_exclusive && !has_clauses)
>      {
>        gimple_stmt_iterator gsi2 = *gsi_p;
>        gsi_next (&gsi2);
> @@ -8672,23 +8628,29 @@ lower_omp_scan (gimple_stmt_iterator *gs
>      }
>
>    bool input_phase = has_clauses ^ octx->scan_inclusive;
> -  if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
> -      && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
> -      && !gimple_omp_for_combined_into_p (octx->stmt))
> -    {
> -      if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
> -                                   OMP_CLAUSE__SIMDUID_))
> -       {
> -         tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
> -         lane = create_tmp_var (unsigned_type_node);
> -         tree t = build_int_cst (integer_type_node,
> -                                 input_phase ? 1
> -                                 : octx->scan_inclusive ? 2 : 3);
> -         gimple *g
> -           = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
> -         gimple_call_set_lhs (g, lane);
> -         gimple_seq_add_stmt (&before, g);
> -       }
> +  bool is_simd = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
> +                 && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
> +                 && !gimple_omp_for_combined_into_p (octx->stmt));
> +  bool is_for = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
> +                && gimple_omp_for_kind (octx->stmt) == GF_OMP_FOR_KIND_FOR
> +                && !gimple_omp_for_combined_p (octx->stmt));
> +  if (is_simd)
> +    if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
> +                                 OMP_CLAUSE__SIMDUID_))
> +      {
> +       tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
> +       lane = create_tmp_var (unsigned_type_node);
> +       tree t = build_int_cst (integer_type_node,
> +                               input_phase ? 1
> +                               : octx->scan_inclusive ? 2 : 3);
> +       gimple *g
> +         = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
> +       gimple_call_set_lhs (g, lane);
> +       gimple_seq_add_stmt (&before, g);
> +      }
> +
> +  if (is_simd || is_for)
> +    {
>        for (tree c = gimple_omp_for_clauses (octx->stmt);
>            c; c = OMP_CLAUSE_CHAIN (c))
>         if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
> @@ -8711,7 +8673,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
>             if (DECL_HAS_VALUE_EXPR_P (new_vard))
>               {
>                 val = DECL_VALUE_EXPR (new_vard);
> -               if (omp_is_reference (var))
> +               if (new_vard != new_var)
>                   {
>                     gcc_assert (TREE_CODE (val) == ADDR_EXPR);
>                     val = TREE_OPERAND (val, 0);
> @@ -8727,7 +8689,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                         lane0 = TREE_OPERAND (val, 1);
>                         TREE_OPERAND (val, 1) = lane;
>                         var2 = lookup_decl (v, octx);
> -                       if (!octx->scan_inclusive)
> +                       if (octx->scan_exclusive)
>                           var4 = lookup_decl (var2, octx);
>                         if (input_phase
>                             && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> @@ -8737,7 +8699,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                             var2 = build4 (ARRAY_REF, TREE_TYPE (val),
>                                            var2, lane, NULL_TREE, NULL_TREE);
>                             TREE_THIS_NOTRAP (var2) = 1;
> -                           if (!octx->scan_inclusive)
> +                           if (octx->scan_exclusive)
>                               {
>                                 var4 = build4 (ARRAY_REF, TREE_TYPE (val),
>                                                var4, lane, NULL_TREE,
> @@ -8759,7 +8721,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                     var3 = maybe_lookup_decl (new_vard, octx);
>                     if (var3 == new_vard || var3 == NULL_TREE)
>                       var3 = NULL_TREE;
> -                   else if (!octx->scan_inclusive && !input_phase)
> +                   else if (is_simd && octx->scan_exclusive && !input_phase)
>                       {
>                         var4 = maybe_lookup_decl (var3, octx);
>                         if (var4 == var3 || var4 == NULL_TREE)
> @@ -8774,7 +8736,10 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                           }
>                       }
>                   }
> -               if (!octx->scan_inclusive && !input_phase && var4 == 
> NULL_TREE)
> +               if (is_simd
> +                   && octx->scan_exclusive
> +                   && !input_phase
> +                   && var4 == NULL_TREE)
>                   var4 = create_tmp_var (TREE_TYPE (val));
>               }
>             if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> @@ -8794,12 +8759,14 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                       {
>                         /* Otherwise, assign to it the identity element.  */
>                         gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT 
> (c);
> +                       if (is_for)
> +                         tseq = copy_gimple_seq_and_replace_locals (tseq);
>                         tree ref = build_outer_var_ref (var, octx);
>                         tree x = (DECL_HAS_VALUE_EXPR_P (new_vard)
>                                   ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
>                         if (x)
>                           {
> -                           if (omp_is_reference (var))
> +                           if (new_vard != new_var)
>                               val = build_fold_addr_expr_loc (clause_loc, 
> val);
>                             SET_DECL_VALUE_EXPR (new_vard, val);
>                           }
> @@ -8811,13 +8778,14 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                         SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
>                         DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
>                         gimple_seq_add_seq (&before, tseq);
> -                       OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
> +                       if (is_simd)
> +                         OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
>                       }
>                   }
> -               else
> +               else if (is_simd)
>                   {
>                     tree x;
> -                   if (!octx->scan_inclusive)
> +                   if (octx->scan_exclusive)
>                       {
>                         tree v4 = unshare_expr (var4);
>                         tree v2 = unshare_expr (var2);
> @@ -8828,7 +8796,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                     x = (DECL_HAS_VALUE_EXPR_P (new_vard)
>                          ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
>                     tree vexpr = val;
> -                   if (x && omp_is_reference (var))
> +                   if (x && new_vard != new_var)
>                       vexpr = build_fold_addr_expr_loc (clause_loc, val);
>                     if (x)
>                       SET_DECL_VALUE_EXPR (new_vard, vexpr);
> @@ -8864,7 +8832,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                     tree x = omp_reduction_init (c, TREE_TYPE (new_var));
>                     gimplify_assign (val, x, &before);
>                   }
> -               else
> +               else if (is_simd)
>                   {
>                     /* scan phase.  */
>                     enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
> @@ -8888,11 +8856,11 @@ lower_omp_scan (gimple_stmt_iterator *gs
>                       }
>                   }
>               }
> -           if (!octx->scan_inclusive && !input_phase && lane0)
> +           if (octx->scan_exclusive && !input_phase && lane0)
>               {
>                 tree vexpr = unshare_expr (var4);
>                 TREE_OPERAND (vexpr, 1) = lane0;
> -               if (omp_is_reference (var))
> +               if (new_vard != new_var)
>                   vexpr = build_fold_addr_expr_loc (clause_loc, vexpr);
>                 SET_DECL_VALUE_EXPR (new_vard, vexpr);
>               }
> @@ -8901,9 +8869,17 @@ lower_omp_scan (gimple_stmt_iterator *gs
>    else if (has_clauses)
>      sorry_at (gimple_location (stmt),
>               "%<#pragma omp scan%> not supported yet");
> -  gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
> -  gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
> -  gsi_replace (gsi_p, gimple_build_nop (), true);
> +  if (!is_for)
> +    {
> +      gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
> +      gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
> +      gsi_replace (gsi_p, gimple_build_nop (), true);
> +    }
> +  else if (before)
> +    {
> +      gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (stmt));
> +      gsi_insert_seq_before (&gsi, before, GSI_SAME_STMT);
> +    }
>  }
>
>
> @@ -9124,6 +9100,712 @@ lower_omp_for_lastprivate (struct omp_fo
>      }
>  }
>
> +/* Callback for walk_gimple_seq.  Find #pragma omp scan statement.  */
> +
> +tree
> +omp_find_scan (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
> +              struct walk_stmt_info *wi)
> +{
> +  gimple *stmt = gsi_stmt (*gsi_p);
> +
> +  *handled_ops_p = true;
> +  switch (gimple_code (stmt))
> +    {
> +    WALK_SUBSTMTS;
> +
> +    case GIMPLE_OMP_SCAN:
> +      *(gimple_stmt_iterator *) (wi->info) = *gsi_p;
> +      return integer_zero_node;
> +    default:
> +      break;
> +    }
> +  return NULL;
> +}
> +
> +/* Helper function for lower_omp_for, add transformations for a worksharing
> +   loop with scan directives inside of it.
> +   For worksharing loop not combined with simd, transform:
> +   #pragma omp for reduction(inscan,+:r) private(i)
> +   for (i = 0; i < n; i = i + 1)
> +     {
> +       {
> +        update (r);
> +       }
> +       #pragma omp scan inclusive(r)
> +       {
> +        use (r);
> +       }
> +     }
> +
> +   into two worksharing loops + code to merge results:
> +
> +   num_threads = omp_get_num_threads ();
> +   thread_num = omp_get_thread_num ();
> +   if (thread_num == 0) goto <D.2099>; else goto <D.2100>;
> +   <D.2099>:
> +   var2 = r;
> +   goto <D.2101>;
> +   <D.2100>:
> +   // For UDRs this is UDR init, or if ctors are needed, copy from
> +   // var3 that has been constructed to contain the neutral element.
> +   var2 = 0;
> +   <D.2101>:
> +   ivar = 0;
> +   // The _scantemp_ clauses will arrange for rpriva to be initialized to
> +   // a shared array with num_threads elements and rprivb to a local array
> +   // number of elements equal to the number of (contiguous) iterations the
> +   // current thread will perform.  controlb and controlp variables are
> +   // temporaries to handle deallocation of rprivb at the end of second
> +   // GOMP_FOR.
> +   #pragma omp for _scantemp_(rpriva) _scantemp_(rprivb) 
> _scantemp_(controlb) \
> +     _scantemp_(controlp) reduction(inscan,+:r) private(i) nowait
> +   for (i = 0; i < n; i = i + 1)
> +     {
> +       {
> +        // For UDRs this is UDR init or copy from var3.
> +        r = 0;
> +        // This is the input phase from user code.
> +        update (r);
> +       }
> +       {
> +        // For UDRs this is UDR merge.
> +        var2 = var2 + r;
> +        // Rather than handing it over to the user, save to local thread's
> +        // array.
> +        rprivb[ivar] = var2;
> +        // For exclusive scan, the above two statements are swapped.
> +        ivar = ivar + 1;
> +       }
> +     }
> +   // And remember the final value from this thread's into the shared
> +   // rpriva array.
> +   rpriva[(sizetype) thread_num] = var2;
> +   // If more than one thread, compute using Work-Efficient prefix sum
> +   // the inclusive parallel scan of the rpriva array.
> +   if (num_threads > 1) goto <D.2102>; else goto <D.2103>;
> +   <D.2102>:
> +   GOMP_barrier ();
> +   down = 0;
> +   k = 1;
> +   num_threadsu = (unsigned int) num_threads;
> +   thread_numup1 = (unsigned int) thread_num + 1;
> +   <D.2108>:
> +   twok = k << 1;
> +   if (twok > num_threadsu) goto <D.2110>; else goto <D.2111>;
> +   <D.2110>:
> +   down = 4294967295;
> +   k = k >> 1;
> +   if (k == num_threadsu) goto <D.2112>; else goto <D.2111>;
> +   <D.2112>:
> +   k = k >> 1;
> +   <D.2111>:
> +   twok = k << 1;
> +   cplx = .MUL_OVERFLOW (thread_nump1, twok);
> +   mul = REALPART_EXPR <cplx>;
> +   ovf = IMAGPART_EXPR <cplx>;
> +   if (ovf == 0) goto <D.2116>; else goto <D.2117>;
> +   <D.2116>:
> +   andv = k & down;
> +   andvm1 = andv + 4294967295;
> +   l = mul + andvm1;
> +   if (l < num_threadsu) goto <D.2120>; else goto <D.2117>;
> +   <D.2120>:
> +   // For UDRs this is UDR merge, performed using var2 variable as temporary,
> +   // i.e. var2 = rpriva[l - k]; UDR merge (var2, rpriva[l]); rpriva[l] = 
> var2;
> +   rpriva[l] = rpriva[l - k] + rpriva[l];
> +   <D.2117>:
> +   if (down == 0) goto <D.2121>; else goto <D.2122>;
> +   <D.2121>:
> +   k = k << 1;
> +   goto <D.2123>;
> +   <D.2122>:
> +   k = k >> 1;
> +   <D.2123>:
> +   GOMP_barrier ();
> +   if (k != 0) goto <D.2108>; else goto <D.2103>;
> +   <D.2103>:
> +   if (thread_num == 0) goto <D.2124>; else goto <D.2125>;
> +   <D.2124>:
> +   // For UDRs this is UDR init or copy from var3.
> +   var2 = 0;
> +   goto <D.2126>;
> +   <D.2125>:
> +   var2 = rpriva[thread_num - 1];
> +   <D.2126>:
> +   ivar = 0;
> +   #pragma omp for _scantemp_(controlb) _scantemp_(controlp) \
> +     reduction(inscan,+:r) private(i)
> +   for (i = 0; i < n; i = i + 1)
> +     {
> +       {
> +        // For UDRs, this is UDR merge (rprivb[ivar], var2); r = 
> rprivb[ivar];
> +        r = rprivb[ivar] + var2;
> +       }
> +       {
> +        // This is the scan phase from user code.
> +        use (r);
> +        // Plus a bump of the iterator.
> +        ivar = ivar + 1;
> +       }
> +     }  */
> +
> +static void
> +lower_omp_for_scan (gimple_seq *body_p, gimple_seq *dlist, gomp_for *stmt,
> +                   struct omp_for_data *fd, omp_context *ctx)
> +{
> +  gcc_assert (ctx->scan_inclusive || ctx->scan_exclusive);
> +
> +  gimple_seq body = gimple_omp_body (stmt);
> +  gimple_stmt_iterator input1_gsi = gsi_none ();
> +  struct walk_stmt_info wi;
> +  memset (&wi, 0, sizeof (wi));
> +  wi.val_only = true;
> +  wi.info = (void *) &input1_gsi;
> +  walk_gimple_seq_mod (&body, omp_find_scan, NULL, &wi);
> +  gcc_assert (!gsi_end_p (input1_gsi));
> +
> +  gimple *input_stmt1 = gsi_stmt (input1_gsi);
> +  gimple_stmt_iterator gsi = input1_gsi;
> +  gsi_next (&gsi);
> +  gimple_stmt_iterator scan1_gsi = gsi;
> +  gimple *scan_stmt1 = gsi_stmt (gsi);
> +  gcc_assert (scan_stmt1 && gimple_code (scan_stmt1) == GIMPLE_OMP_SCAN);
> +
> +  gimple_seq input_body = gimple_omp_body (input_stmt1);
> +  gimple_seq scan_body = gimple_omp_body (scan_stmt1);
> +  gimple_omp_set_body (input_stmt1, NULL);
> +  gimple_omp_set_body (scan_stmt1, NULL);
> +  gimple_omp_set_body (stmt, NULL);
> +
> +  gomp_for *new_stmt = as_a <gomp_for *> (gimple_copy (stmt));
> +  gimple_seq new_body = copy_gimple_seq_and_replace_locals (body);
> +  gimple_omp_set_body (stmt, body);
> +  gimple_omp_set_body (input_stmt1, input_body);
> +
> +  gimple_stmt_iterator input2_gsi = gsi_none ();
> +  memset (&wi, 0, sizeof (wi));
> +  wi.val_only = true;
> +  wi.info = (void *) &input2_gsi;
> +  walk_gimple_seq_mod (&new_body, omp_find_scan, NULL, &wi);
> +  gcc_assert (!gsi_end_p (input2_gsi));
> +
> +  gimple *input_stmt2 = gsi_stmt (input2_gsi);
> +  gsi = input2_gsi;
> +  gsi_next (&gsi);
> +  gimple_stmt_iterator scan2_gsi = gsi;
> +  gimple *scan_stmt2 = gsi_stmt (gsi);
> +  gcc_assert (scan_stmt2 && gimple_code (scan_stmt2) == GIMPLE_OMP_SCAN);
> +  gimple_omp_set_body (scan_stmt2, scan_body);
> +
> +  tree num_threads = create_tmp_var (integer_type_node);
> +  tree thread_num = create_tmp_var (integer_type_node);
> +  tree nthreads_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
> +  tree threadnum_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
> +  gimple *g = gimple_build_call (nthreads_decl, 0);
> +  gimple_call_set_lhs (g, num_threads);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_call (threadnum_decl, 0);
> +  gimple_call_set_lhs (g, thread_num);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree ivar = create_tmp_var (sizetype);
> +  tree new_clauses1 = NULL_TREE, new_clauses2 = NULL_TREE;
> +  tree *cp1 = &new_clauses1, *cp2 = &new_clauses2;
> +  tree k = create_tmp_var (unsigned_type_node);
> +  tree l = create_tmp_var (unsigned_type_node);
> +
> +  gimple_seq clist = NULL, mdlist = NULL;
> +  gimple_seq thr01_list = NULL, thrn1_list = NULL;
> +  gimple_seq thr02_list = NULL, thrn2_list = NULL;
> +  gimple_seq scan1_list = NULL, input2_list = NULL;
> +  gimple_seq last_list = NULL, reduc_list = NULL;
> +  for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
> +    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
> +       && OMP_CLAUSE_REDUCTION_INSCAN (c))
> +      {
> +       location_t clause_loc = OMP_CLAUSE_LOCATION (c);
> +       tree var = OMP_CLAUSE_DECL (c);
> +       tree new_var = lookup_decl (var, ctx);
> +       tree var3 = NULL_TREE;
> +       tree new_vard = new_var;
> +       if (omp_is_reference (var))
> +         new_var = build_simple_mem_ref_loc (clause_loc, new_var);
> +       if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> +         {
> +           var3 = maybe_lookup_decl (new_vard, ctx);
> +           if (var3 == new_vard)
> +             var3 = NULL_TREE;
> +         }
> +
> +       tree ptype = build_pointer_type (TREE_TYPE (new_var));
> +       tree rpriva = create_tmp_var (ptype);
> +       tree nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_);
> +       OMP_CLAUSE_DECL (nc) = rpriva;
> +       *cp1 = nc;
> +       cp1 = &OMP_CLAUSE_CHAIN (nc);
> +
> +       tree rprivb = create_tmp_var (ptype);
> +       nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_);
> +       OMP_CLAUSE_DECL (nc) = rprivb;
> +       OMP_CLAUSE__SCANTEMP__ALLOC (nc) = 1;
> +       *cp1 = nc;
> +       cp1 = &OMP_CLAUSE_CHAIN (nc);
> +
> +       tree var2 = create_tmp_var_raw (TREE_TYPE (new_var));
> +       if (new_vard != new_var)
> +         TREE_ADDRESSABLE (var2) = 1;
> +       gimple_add_tmp_var (var2);
> +
> +       tree x = fold_convert_loc (clause_loc, sizetype, thread_num);
> +       x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
> +                            TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
> +       x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
> +       tree rpriva_ref = build_simple_mem_ref_loc (clause_loc, x);
> +
> +       x = fold_build2_loc (clause_loc, PLUS_EXPR, integer_type_node,
> +                            thread_num, integer_minus_one_node);
> +       x = fold_convert_loc (clause_loc, sizetype, x);
> +       x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
> +                            TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
> +       x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
> +       tree rprivam1_ref = build_simple_mem_ref_loc (clause_loc, x);
> +
> +       x = fold_convert_loc (clause_loc, sizetype, l);
> +       x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
> +                            TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
> +       x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
> +       tree rprival_ref = build_simple_mem_ref_loc (clause_loc, x);
> +
> +       x = fold_build2_loc (clause_loc, MINUS_EXPR, unsigned_type_node, l, 
> k);
> +       x = fold_convert_loc (clause_loc, sizetype, x);
> +       x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
> +                            TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
> +       x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
> +       tree rprivalmk_ref = build_simple_mem_ref_loc (clause_loc, x);
> +
> +       x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, ivar,
> +                            TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
> +       x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rprivb), rprivb, x);
> +       tree rprivb_ref = build_simple_mem_ref_loc (clause_loc, x);
> +
> +       if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> +         {
> +           tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
> +           tree val = var2;
> +           if (new_vard != new_var)
> +             val = build_fold_addr_expr_loc (clause_loc, val);
> +
> +           x = lang_hooks.decls.omp_clause_default_ctor
> +                   (c, var2, build_outer_var_ref (var, ctx));
> +           if (x)
> +             gimplify_and_add (x, &clist);
> +
> +           x = build_outer_var_ref (var, ctx);
> +           x = lang_hooks.decls.omp_clause_assign_op (c, var2, x);
> +           gimplify_and_add (x, &thr01_list);
> +
> +           tree y = (DECL_HAS_VALUE_EXPR_P (new_vard)
> +                     ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
> +           if (var3)
> +             {
> +               x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3);
> +               gimplify_and_add (x, &thrn1_list);
> +               x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3);
> +               gimplify_and_add (x, &thr02_list);
> +             }
> +           else if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
> +             {
> +               /* Otherwise, assign to it the identity element.  */
> +               gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
> +               tseq = copy_gimple_seq_and_replace_locals (tseq);
> +               SET_DECL_VALUE_EXPR (new_vard, val);
> +               DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
> +               SET_DECL_VALUE_EXPR (placeholder, error_mark_node);
> +               DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
> +               lower_omp (&tseq, ctx);
> +               gimple_seq_add_seq (&thrn1_list, tseq);
> +               tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
> +               lower_omp (&tseq, ctx);
> +               gimple_seq_add_seq (&thr02_list, tseq);
> +               SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
> +               DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
> +               OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
> +               if (y)
> +                 SET_DECL_VALUE_EXPR (new_vard, y);
> +               else
> +                 {
> +                   DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
> +                   SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
> +                 }
> +             }
> +
> +           x = lang_hooks.decls.omp_clause_assign_op (c, var2, rprivam1_ref);
> +           gimplify_and_add (x, &thrn2_list);
> +
> +           if (ctx->scan_exclusive)
> +             {
> +               x = unshare_expr (rprivb_ref);
> +               x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
> +               gimplify_and_add (x, &scan1_list);
> +             }
> +
> +           gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
> +           tseq = copy_gimple_seq_and_replace_locals (tseq);
> +           SET_DECL_VALUE_EXPR (placeholder, var2);
> +           DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
> +           lower_omp (&tseq, ctx);
> +           gimple_seq_add_seq (&scan1_list, tseq);
> +
> +           if (ctx->scan_inclusive)
> +             {
> +               x = unshare_expr (rprivb_ref);
> +               x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
> +               gimplify_and_add (x, &scan1_list);
> +             }
> +
> +           x = unshare_expr (rpriva_ref);
> +           x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
> +           gimplify_and_add (x, &mdlist);
> +
> +           tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
> +           tseq = copy_gimple_seq_and_replace_locals (tseq);
> +           SET_DECL_VALUE_EXPR (new_vard, val);
> +           DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
> +           SET_DECL_VALUE_EXPR (placeholder, rprivb_ref);
> +           lower_omp (&tseq, ctx);
> +           if (y)
> +             SET_DECL_VALUE_EXPR (new_vard, y);
> +           else
> +             {
> +               DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
> +               SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
> +             }
> +           gimple_seq_add_seq (&input2_list, tseq);
> +
> +           x = unshare_expr (new_var);
> +           x = lang_hooks.decls.omp_clause_assign_op (c, x, rprivb_ref);
> +           gimplify_and_add (x, &input2_list);
> +
> +           x = build_outer_var_ref (var, ctx);
> +           x = lang_hooks.decls.omp_clause_assign_op (c, x, rpriva_ref);
> +           gimplify_and_add (x, &last_list);
> +
> +           x = lang_hooks.decls.omp_clause_assign_op (c, var2, 
> rprivalmk_ref);
> +           gimplify_and_add (x, &reduc_list);
> +           tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
> +           tseq = copy_gimple_seq_and_replace_locals (tseq);
> +           val = rprival_ref;
> +           if (new_vard != new_var)
> +             val = build_fold_addr_expr_loc (clause_loc, val);
> +           SET_DECL_VALUE_EXPR (new_vard, val);
> +           DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
> +           SET_DECL_VALUE_EXPR (placeholder, var2);
> +           lower_omp (&tseq, ctx);
> +           OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
> +           SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
> +           DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
> +           if (y)
> +             SET_DECL_VALUE_EXPR (new_vard, y);
> +           else
> +             {
> +               DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
> +               SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
> +             }
> +           gimple_seq_add_seq (&reduc_list, tseq);
> +           x = lang_hooks.decls.omp_clause_assign_op (c, rprival_ref, var2);
> +           gimplify_and_add (x, &reduc_list);
> +
> +           x = lang_hooks.decls.omp_clause_dtor (c, var2);
> +           if (x)
> +             gimplify_and_add (x, dlist);
> +         }
> +       else
> +         {
> +           x = build_outer_var_ref (var, ctx);
> +           gimplify_assign (var2, x, &thr01_list);
> +
> +           x = omp_reduction_init (c, TREE_TYPE (new_var));
> +           gimplify_assign (var2, unshare_expr (x), &thrn1_list);
> +           gimplify_assign (var2, x, &thr02_list);
> +
> +           gimplify_assign (var2, rprivam1_ref, &thrn2_list);
> +
> +           enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
> +           if (code == MINUS_EXPR)
> +             code = PLUS_EXPR;
> +
> +           if (ctx->scan_exclusive)
> +             gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list);
> +           x = build2 (code, TREE_TYPE (new_var), var2, new_var);
> +           gimplify_assign (var2, x, &scan1_list);
> +           if (ctx->scan_inclusive)
> +             gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list);
> +
> +           gimplify_assign (unshare_expr (rpriva_ref), var2, &mdlist);
> +
> +           x = build2 (code, TREE_TYPE (new_var), rprivb_ref, var2);
> +           gimplify_assign (new_var, x, &input2_list);
> +
> +           gimplify_assign (build_outer_var_ref (var, ctx), rpriva_ref,
> +                            &last_list);
> +
> +           x = build2 (code, TREE_TYPE (new_var), rprivalmk_ref,
> +                       unshare_expr (rprival_ref));
> +           gimplify_assign (rprival_ref, x, &reduc_list);
> +         }
> +      }
> +
> +  g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node);
> +  gimple_seq_add_stmt (&scan1_list, g);
> +  g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node);
> +  gimple_seq_add_stmt (gimple_omp_body_ptr (scan_stmt2), g);
> +
> +  tree controlb = create_tmp_var (boolean_type_node);
> +  tree controlp = create_tmp_var (ptr_type_node);
> +  tree nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
> +  OMP_CLAUSE_DECL (nc) = controlb;
> +  OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
> +  *cp1 = nc;
> +  cp1 = &OMP_CLAUSE_CHAIN (nc);
> +  nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
> +  OMP_CLAUSE_DECL (nc) = controlp;
> +  OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
> +  *cp1 = nc;
> +  cp1 = &OMP_CLAUSE_CHAIN (nc);
> +  nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
> +  OMP_CLAUSE_DECL (nc) = controlb;
> +  OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
> +  *cp2 = nc;
> +  cp2 = &OMP_CLAUSE_CHAIN (nc);
> +  nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
> +  OMP_CLAUSE_DECL (nc) = controlp;
> +  OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
> +  *cp2 = nc;
> +  cp2 = &OMP_CLAUSE_CHAIN (nc);
> +
> +  *cp1 = gimple_omp_for_clauses (stmt);
> +  gimple_omp_for_set_clauses (stmt, new_clauses1);
> +  *cp2 = gimple_omp_for_clauses (new_stmt);
> +  gimple_omp_for_set_clauses (new_stmt, new_clauses2);
> +
> +  gimple_omp_set_body (scan_stmt1, scan1_list);
> +  gimple_omp_set_body (input_stmt2, input2_list);
> +
> +  gsi_insert_seq_after (&input1_gsi, gimple_omp_body (input_stmt1),
> +                       GSI_SAME_STMT);
> +  gsi_remove (&input1_gsi, true);
> +  gsi_insert_seq_after (&scan1_gsi, gimple_omp_body (scan_stmt1),
> +                       GSI_SAME_STMT);
> +  gsi_remove (&scan1_gsi, true);
> +  gsi_insert_seq_after (&input2_gsi, gimple_omp_body (input_stmt2),
> +                       GSI_SAME_STMT);
> +  gsi_remove (&input2_gsi, true);
> +  gsi_insert_seq_after (&scan2_gsi, gimple_omp_body (scan_stmt2),
> +                       GSI_SAME_STMT);
> +  gsi_remove (&scan2_gsi, true);
> +
> +  gimple_seq_add_seq (body_p, clist);
> +
> +  tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
> +  tree lab2 = create_artificial_label (UNKNOWN_LOCATION);
> +  tree lab3 = create_artificial_label (UNKNOWN_LOCATION);
> +  g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab1);
> +  gimple_seq_add_stmt (body_p, g);
> +  gimple_seq_add_seq (body_p, thr01_list);
> +  g = gimple_build_goto (lab3);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab2);
> +  gimple_seq_add_stmt (body_p, g);
> +  gimple_seq_add_seq (body_p, thrn1_list);
> +  g = gimple_build_label (lab3);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = gimple_build_assign (ivar, size_zero_node);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  gimple_seq_add_stmt (body_p, stmt);
> +  gimple_seq_add_seq (body_p, body);
> +  gimple_seq_add_stmt (body_p, gimple_build_omp_continue (fd->loop.v,
> +                                                         fd->loop.v));
> +
> +  g = gimple_build_omp_return (true);
> +  gimple_seq_add_stmt (body_p, g);
> +  gimple_seq_add_seq (body_p, mdlist);
> +
> +  lab1 = create_artificial_label (UNKNOWN_LOCATION);
> +  lab2 = create_artificial_label (UNKNOWN_LOCATION);
> +  g = gimple_build_cond (GT_EXPR, num_threads, integer_one_node, lab1, lab2);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab1);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = omp_build_barrier (NULL);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree down = create_tmp_var (unsigned_type_node);
> +  g = gimple_build_assign (down, build_zero_cst (unsigned_type_node));
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = gimple_build_assign (k, build_one_cst (unsigned_type_node));
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree num_threadsu = create_tmp_var (unsigned_type_node);
> +  g = gimple_build_assign (num_threadsu, NOP_EXPR, num_threads);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree thread_numu = create_tmp_var (unsigned_type_node);
> +  g = gimple_build_assign (thread_numu, NOP_EXPR, thread_num);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree thread_nump1 = create_tmp_var (unsigned_type_node);
> +  g = gimple_build_assign (thread_nump1, PLUS_EXPR, thread_numu,
> +                          build_int_cst (unsigned_type_node, 1));
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  lab3 = create_artificial_label (UNKNOWN_LOCATION);
> +  g = gimple_build_label (lab3);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree twok = create_tmp_var (unsigned_type_node);
> +  g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree lab4 = create_artificial_label (UNKNOWN_LOCATION);
> +  tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
> +  tree lab6 = create_artificial_label (UNKNOWN_LOCATION);
> +  g = gimple_build_cond (GT_EXPR, twok, num_threadsu, lab4, lab5);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab4);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_assign (down, build_all_ones_cst (unsigned_type_node));
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = gimple_build_cond (EQ_EXPR, k, num_threadsu, lab6, lab5);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab6);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = gimple_build_label (lab5);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree cplx = create_tmp_var (build_complex_type (unsigned_type_node, 
> false));
> +  g = gimple_build_call_internal (IFN_MUL_OVERFLOW, 2, thread_nump1, twok);
> +  gimple_call_set_lhs (g, cplx);
> +  gimple_seq_add_stmt (body_p, g);
> +  tree mul = create_tmp_var (unsigned_type_node);
> +  g = gimple_build_assign (mul, REALPART_EXPR,
> +                          build1 (REALPART_EXPR, unsigned_type_node, cplx));
> +  gimple_seq_add_stmt (body_p, g);
> +  tree ovf = create_tmp_var (unsigned_type_node);
> +  g = gimple_build_assign (ovf, IMAGPART_EXPR,
> +                          build1 (IMAGPART_EXPR, unsigned_type_node, cplx));
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree lab7 = create_artificial_label (UNKNOWN_LOCATION);
> +  tree lab8 = create_artificial_label (UNKNOWN_LOCATION);
> +  g = gimple_build_cond (EQ_EXPR, ovf, build_zero_cst (unsigned_type_node),
> +                        lab7, lab8);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab7);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree andv = create_tmp_var (unsigned_type_node);
> +  g = gimple_build_assign (andv, BIT_AND_EXPR, k, down);
> +  gimple_seq_add_stmt (body_p, g);
> +  tree andvm1 = create_tmp_var (unsigned_type_node);
> +  g = gimple_build_assign (andvm1, PLUS_EXPR, andv,
> +                          build_minus_one_cst (unsigned_type_node));
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = gimple_build_assign (l, PLUS_EXPR, mul, andvm1);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree lab9 = create_artificial_label (UNKNOWN_LOCATION);
> +  g = gimple_build_cond (LT_EXPR, l, num_threadsu, lab9, lab8);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab9);
> +  gimple_seq_add_stmt (body_p, g);
> +  gimple_seq_add_seq (body_p, reduc_list);
> +  g = gimple_build_label (lab8);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  tree lab10 = create_artificial_label (UNKNOWN_LOCATION);
> +  tree lab11 = create_artificial_label (UNKNOWN_LOCATION);
> +  tree lab12 = create_artificial_label (UNKNOWN_LOCATION);
> +  g = gimple_build_cond (EQ_EXPR, down, build_zero_cst (unsigned_type_node),
> +                        lab10, lab11);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab10);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_assign (k, LSHIFT_EXPR, k, integer_one_node);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_goto (lab12);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab11);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab12);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = omp_build_barrier (NULL);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = gimple_build_cond (NE_EXPR, k, build_zero_cst (unsigned_type_node),
> +                        lab3, lab2);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = gimple_build_label (lab2);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  lab1 = create_artificial_label (UNKNOWN_LOCATION);
> +  lab2 = create_artificial_label (UNKNOWN_LOCATION);
> +  lab3 = create_artificial_label (UNKNOWN_LOCATION);
> +  g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab1);
> +  gimple_seq_add_stmt (body_p, g);
> +  gimple_seq_add_seq (body_p, thr02_list);
> +  g = gimple_build_goto (lab3);
> +  gimple_seq_add_stmt (body_p, g);
> +  g = gimple_build_label (lab2);
> +  gimple_seq_add_stmt (body_p, g);
> +  gimple_seq_add_seq (body_p, thrn2_list);
> +  g = gimple_build_label (lab3);
> +  gimple_seq_add_stmt (body_p, g);
> +
> +  g = gimple_build_assign (ivar, size_zero_node);
> +  gimple_seq_add_stmt (body_p, g);
> +  gimple_seq_add_stmt (body_p, new_stmt);
> +  gimple_seq_add_seq (body_p, new_body);
> +
> +  gimple_seq new_dlist = NULL;
> +  lab1 = create_artificial_label (UNKNOWN_LOCATION);
> +  lab2 = create_artificial_label (UNKNOWN_LOCATION);
> +  tree num_threadsm1 = create_tmp_var (integer_type_node);
> +  g = gimple_build_assign (num_threadsm1, PLUS_EXPR, num_threads,
> +                          integer_minus_one_node);
> +  gimple_seq_add_stmt (&new_dlist, g);
> +  g = gimple_build_cond (EQ_EXPR, thread_num, num_threadsm1, lab1, lab2);
> +  gimple_seq_add_stmt (&new_dlist, g);
> +  g = gimple_build_label (lab1);
> +  gimple_seq_add_stmt (&new_dlist, g);
> +  gimple_seq_add_seq (&new_dlist, last_list);
> +  g = gimple_build_label (lab2);
> +  gimple_seq_add_stmt (&new_dlist, g);
> +  gimple_seq_add_seq (&new_dlist, *dlist);
> +  *dlist = new_dlist;
> +}
>
>  /* Lower code for an OMP loop directive.  */
>
> @@ -9317,9 +9999,18 @@ lower_omp_for (gimple_stmt_iterator *gsi
>
>    bool phony_loop = (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP
>                      && gimple_omp_for_grid_phony (stmt));
> -  if (!phony_loop)
> -    gimple_seq_add_stmt (&body, stmt);
> -  gimple_seq_add_seq (&body, gimple_omp_body (stmt));
> +  if ((ctx->scan_inclusive || ctx->scan_exclusive)
> +      && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR)
> +    {
> +      gcc_assert (!phony_loop);
> +      lower_omp_for_scan (&body, &dlist, stmt, &fd, ctx);
> +    }
> +  else
> +    {
> +      if (!phony_loop)
> +       gimple_seq_add_stmt (&body, stmt);
> +      gimple_seq_add_seq (&body, gimple_omp_body (stmt));
> +    }
>
>    if (!phony_loop)
>      gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v,
> --- gcc/omp-expand.c.jj 2019-07-02 12:58:01.459063560 +0200
> +++ gcc/omp-expand.c    2019-07-02 13:08:01.388672109 +0200
> @@ -3502,6 +3502,98 @@ expand_omp_for_generic (struct omp_regio
>      }
>  }
>
> +/* Helper function for expand_omp_for_static_nochunk.  If PTR is NULL,
> +   compute needed allocation size.  If !ALLOC of team allocations,
> +   if ALLOC of thread allocation.  SZ is the initial needed size for
> +   other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
> +   CNT number of elements of each array, for !ALLOC this is
> +   omp_get_num_threads (), for ALLOC number of iterations handled by the
> +   current thread.  If PTR is non-NULL, it is the start of the allocation
> +   and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
> +   clauses pointers to the corresponding arrays.  */
> +
> +static tree
> +expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
> +                          unsigned HOST_WIDE_INT alloc_align, tree cnt,
> +                          gimple_stmt_iterator *gsi, bool alloc)
> +{
> +  tree eltsz = NULL_TREE;
> +  unsigned HOST_WIDE_INT preval = 0;
> +  if (ptr && sz)
> +    ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
> +                      ptr, size_int (sz));
> +  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
> +    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
> +       && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
> +       && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
> +      {
> +       tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
> +       unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
> +       if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
> +         {
> +           unsigned HOST_WIDE_INT szl
> +             = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
> +           szl = least_bit_hwi (szl);
> +           if (szl)
> +             al = MIN (al, szl);
> +         }
> +       if (ptr == NULL_TREE)
> +         {
> +           if (eltsz == NULL_TREE)
> +             eltsz = TYPE_SIZE_UNIT (pointee_type);
> +           else
> +             eltsz = size_binop (PLUS_EXPR, eltsz,
> +                                 TYPE_SIZE_UNIT (pointee_type));
> +         }
> +       if (preval == 0 && al <= alloc_align)
> +         {
> +           unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
> +           sz += diff;
> +           if (diff && ptr)
> +             ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
> +                                ptr, size_int (diff));
> +         }
> +       else if (al > preval)
> +         {
> +           if (ptr)
> +             {
> +               ptr = fold_convert (pointer_sized_int_node, ptr);
> +               ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
> +                                  build_int_cst (pointer_sized_int_node,
> +                                                 al - 1));
> +               ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
> +                                  build_int_cst (pointer_sized_int_node,
> +                                                 -(HOST_WIDE_INT) al));
> +               ptr = fold_convert (ptr_type_node, ptr);
> +             }
> +           else
> +             sz += al - 1;
> +         }
> +       if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
> +         preval = al;
> +       else
> +         preval = 1;
> +       if (ptr)
> +         {
> +           expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
> +           ptr = OMP_CLAUSE_DECL (c);
> +           ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
> +                              size_binop (MULT_EXPR, cnt,
> +                                          TYPE_SIZE_UNIT (pointee_type)));
> +         }
> +      }
> +
> +  if (ptr == NULL_TREE)
> +    {
> +      eltsz = size_binop (MULT_EXPR, eltsz, cnt);
> +      if (sz)
> +       eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
> +      return eltsz;
> +    }
> +  else
> +    return ptr;
> +}
> +
>  /* A subroutine of expand_omp_for.  Generate code for a parallel
>     loop with static schedule and no specified chunk size.  Given
>     parameters:
> @@ -3544,11 +3636,12 @@ expand_omp_for_static_nochunk (struct om
>                                struct omp_for_data *fd,
>                                gimple *inner_stmt)
>  {
> -  tree n, q, s0, e0, e, t, tt, nthreads, threadid;
> +  tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
>    tree type, itype, vmain, vback;
>    basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
>    basic_block body_bb, cont_bb, collapse_bb = NULL;
> -  basic_block fin_bb;
> +  basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
> +  basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
>    gimple_stmt_iterator gsi, gsip;
>    edge ep;
>    bool broken_loop = region->cont == NULL;
> @@ -3650,7 +3743,9 @@ expand_omp_for_static_nochunk (struct om
>        c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
>        cond_var = OMP_CLAUSE_DECL (c);
>      }
> -  if (fd->have_reductemp || fd->have_pointer_condtemp)
> +  if (fd->have_reductemp
> +      || fd->have_pointer_condtemp
> +      || fd->have_nonctrl_scantemp)
>      {
>        tree t1 = build_int_cst (long_integer_type_node, 0);
>        tree t2 = build_int_cst (long_integer_type_node, 1);
> @@ -3660,8 +3755,11 @@ expand_omp_for_static_nochunk (struct om
>        gimple_stmt_iterator gsi2 = gsi_none ();
>        gimple *g = NULL;
>        tree mem = null_pointer_node, memv = NULL_TREE;
> +      unsigned HOST_WIDE_INT condtemp_sz = 0;
> +      unsigned HOST_WIDE_INT alloc_align = 0;
>        if (fd->have_reductemp)
>         {
> +         gcc_assert (!fd->have_nonctrl_scantemp);
>           tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
>           reductions = OMP_CLAUSE_DECL (c);
>           gcc_assert (TREE_CODE (reductions) == SSA_NAME);
> @@ -3678,16 +3776,40 @@ expand_omp_for_static_nochunk (struct om
>             gsi2 = gsip;
>           reductions = null_pointer_node;
>         }
> -      if (fd->have_pointer_condtemp)
> +      if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
>         {
> -         tree type = TREE_TYPE (condtemp);
> +         tree type;
> +         if (fd->have_pointer_condtemp)
> +           type = TREE_TYPE (condtemp);
> +         else
> +           type = ptr_type_node;
>           memv = create_tmp_var (type);
>           TREE_ADDRESSABLE (memv) = 1;
> -         unsigned HOST_WIDE_INT sz
> -           = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
> -         sz *= fd->lastprivate_conditional;
> -         expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
> -                                  false);
> +         unsigned HOST_WIDE_INT sz = 0;
> +         tree size = NULL_TREE;
> +         if (fd->have_pointer_condtemp)
> +           {
> +             sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
> +             sz *= fd->lastprivate_conditional;
> +             condtemp_sz = sz;
> +           }
> +         if (fd->have_nonctrl_scantemp)
> +           {
> +             nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
> +             gimple *g = gimple_build_call (nthreads, 0);
> +             nthreads = create_tmp_var (integer_type_node);
> +             gimple_call_set_lhs (g, nthreads);
> +             gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
> +             nthreads = fold_convert (sizetype, nthreads);
> +             alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
> +             size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
> +                                               alloc_align, nthreads, NULL,
> +                                               false);
> +             size = fold_convert (type, size);
> +           }
> +         else
> +           size = build_int_cst (type, sz);
> +         expand_omp_build_assign (&gsi2, memv, size, false);
>           mem = build_fold_addr_expr (memv);
>         }
>        tree t
> @@ -3698,6 +3820,12 @@ expand_omp_for_static_nochunk (struct om
>                                 true, GSI_SAME_STMT);
>        if (fd->have_pointer_condtemp)
>         expand_omp_build_assign (&gsi2, condtemp, memv, false);
> +      if (fd->have_nonctrl_scantemp)
> +       {
> +         tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
> +         expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
> +                                    alloc_align, nthreads, &gsi2, false);
> +       }
>        if (fd->have_reductemp)
>         {
>           gsi_remove (&gsi2, true);
> @@ -3788,6 +3916,72 @@ expand_omp_for_static_nochunk (struct om
>    gsi = gsi_last_nondebug_bb (third_bb);
>    gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
>
> +  if (fd->have_nonctrl_scantemp)
> +    {
> +      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
> +      tree controlp = NULL_TREE, controlb = NULL_TREE;
> +      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
> +       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
> +           && OMP_CLAUSE__SCANTEMP__CONTROL (c))
> +         {
> +           if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
> +             controlb = OMP_CLAUSE_DECL (c);
> +           else
> +             controlp = OMP_CLAUSE_DECL (c);
> +           if (controlb && controlp)
> +             break;
> +         }
> +      gcc_assert (controlp && controlb);
> +      tree cnt = create_tmp_var (sizetype);
> +      gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
> +      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +      unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
> +      tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
> +                                          alloc_align, cnt, NULL, true);
> +      tree size = create_tmp_var (sizetype);
> +      expand_omp_build_assign (&gsi, size, sz, false);
> +      tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
> +                             size, size_int (16384));
> +      expand_omp_build_assign (&gsi, controlb, cmp);
> +      g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
> +                            NULL_TREE, NULL_TREE);
> +      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +      fourth_bb = split_block (third_bb, g)->dest;
> +      gsi = gsi_last_nondebug_bb (fourth_bb);
> +      /* FIXME: Once we have allocators, this should use allocator.  */
> +      g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, 
> size);
> +      gimple_call_set_lhs (g, controlp);
> +      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +      expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
> +                                &gsi, true);
> +      gsi_prev (&gsi);
> +      g = gsi_stmt (gsi);
> +      fifth_bb = split_block (fourth_bb, g)->dest;
> +      gsi = gsi_last_nondebug_bb (fifth_bb);
> +
> +      g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
> +      gimple_call_set_lhs (g, controlp);
> +      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +      tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
> +      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
> +       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
> +           && OMP_CLAUSE__SCANTEMP__ALLOC (c))
> +         {
> +           tree tmp = create_tmp_var (sizetype);
> +           tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
> +           g = gimple_build_assign (tmp, MULT_EXPR, cnt,
> +                                    TYPE_SIZE_UNIT (pointee_type));
> +           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +           g = gimple_build_call (alloca_decl, 2, tmp,
> +                                  size_int (TYPE_ALIGN (pointee_type)));
> +           gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
> +           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +         }
> +
> +      sixth_bb = split_block (fifth_bb, g)->dest;
> +      gsi = gsi_last_nondebug_bb (sixth_bb);
> +    }
> +
>    t = build2 (MULT_EXPR, itype, q, threadid);
>    t = build2 (PLUS_EXPR, itype, t, tt);
>    s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 
> GSI_SAME_STMT);
> @@ -4018,7 +4212,9 @@ expand_omp_for_static_nochunk (struct om
>    if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
>      {
>        t = gimple_omp_return_lhs (gsi_stmt (gsi));
> -      if (fd->have_reductemp || fd->have_pointer_condtemp)
> +      if (fd->have_reductemp
> +         || ((fd->have_pointer_condtemp || fd->have_scantemp)
> +             && !fd->have_nonctrl_scantemp))
>         {
>           tree fn;
>           if (t)
> @@ -4045,6 +4241,38 @@ expand_omp_for_static_nochunk (struct om
>        gcall *g = gimple_build_call (fn, 0);
>        gsi_insert_after (&gsi, g, GSI_SAME_STMT);
>      }
> +  if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
> +    {
> +      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
> +      tree controlp = NULL_TREE, controlb = NULL_TREE;
> +      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
> +       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
> +           && OMP_CLAUSE__SCANTEMP__CONTROL (c))
> +         {
> +           if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
> +             controlb = OMP_CLAUSE_DECL (c);
> +           else
> +             controlp = OMP_CLAUSE_DECL (c);
> +           if (controlb && controlp)
> +             break;
> +         }
> +      gcc_assert (controlp && controlb);
> +      gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
> +                                    NULL_TREE, NULL_TREE);
> +      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +      exit1_bb = split_block (exit_bb, g)->dest;
> +      gsi = gsi_after_labels (exit1_bb);
> +      g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
> +                            controlp);
> +      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +      exit2_bb = split_block (exit1_bb, g)->dest;
> +      gsi = gsi_after_labels (exit2_bb);
> +      g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 
> 1,
> +                            controlp);
> +      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +      exit3_bb = split_block (exit2_bb, g)->dest;
> +      gsi = gsi_after_labels (exit3_bb);
> +    }
>    gsi_remove (&gsi, true);
>
>    /* Connect all the blocks.  */
> @@ -4053,8 +4281,34 @@ expand_omp_for_static_nochunk (struct om
>    ep = find_edge (entry_bb, second_bb);
>    ep->flags = EDGE_TRUE_VALUE;
>    ep->probability = profile_probability::guessed_always ().apply_scale (1, 
> 4);
> -  find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
> -  find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
> +  if (fourth_bb)
> +    {
> +      ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
> +      ep->probability
> +       = profile_probability::guessed_always ().apply_scale (1, 2);
> +      ep = find_edge (third_bb, fourth_bb);
> +      ep->flags = EDGE_TRUE_VALUE;
> +      ep->probability
> +       = profile_probability::guessed_always ().apply_scale (1, 2);
> +      ep = find_edge (fourth_bb, fifth_bb);
> +      redirect_edge_and_branch (ep, sixth_bb);
> +    }
> +  else
> +    sixth_bb = third_bb;
> +  find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
> +  find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
> +  if (exit1_bb)
> +    {
> +      ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
> +      ep->probability
> +       = profile_probability::guessed_always ().apply_scale (1, 2);
> +      ep = find_edge (exit_bb, exit1_bb);
> +      ep->flags = EDGE_TRUE_VALUE;
> +      ep->probability
> +       = profile_probability::guessed_always ().apply_scale (1, 2);
> +      ep = find_edge (exit1_bb, exit2_bb);
> +      redirect_edge_and_branch (ep, exit3_bb);
> +    }
>
>    if (!broken_loop)
>      {
> @@ -4082,12 +4336,22 @@ expand_omp_for_static_nochunk (struct om
>
>    set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
>    set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
> -  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
> +  if (fourth_bb)
> +    {
> +      set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
> +      set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
> +    }
> +  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
>
>    set_immediate_dominator (CDI_DOMINATORS, body_bb,
>                            recompute_dominator (CDI_DOMINATORS, body_bb));
>    set_immediate_dominator (CDI_DOMINATORS, fin_bb,
>                            recompute_dominator (CDI_DOMINATORS, fin_bb));
> +  if (exit1_bb)
> +    {
> +      set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
> +      set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
> +    }
>
>    struct loop *loop = body_bb->loop_father;
>    if (loop != entry_bb->loop_father)
> --- gcc/testsuite/c-c++-common/gomp/scan-3.c.jj 2019-07-02 12:57:30.366553813 
> +0200
> +++ gcc/testsuite/c-c++-common/gomp/scan-3.c    2019-07-02 13:08:01.383672187 
> +0200
> @@ -8,7 +8,7 @@ f1 (int *c, int *d)
>    for (i = 0; i < 64; i++)
>      {
>        d[i] = a;
> -      #pragma omp scan inclusive (a)           /* { dg-message "sorry, 
> unimplemented: '#pragma omp scan' not supported yet" } */
> +      #pragma omp scan inclusive (a)
>        a += c[i];
>      }
>  }
> --- gcc/testsuite/c-c++-common/gomp/scan-5.c.jj 2019-07-02 13:01:48.959476445 
> +0200
> +++ gcc/testsuite/c-c++-common/gomp/scan-5.c    2019-07-02 13:08:24.532311809 
> +0200
> @@ -6,7 +6,7 @@ foo (int *a, int *b)
>    for (int i = 0; i < 64; i++)
>      {
>        r += a[i];
> -      #pragma omp scan inclusive (r)   /* { dg-message "sorry, 
> unimplemented: '#pragma omp scan' not supported yet" } */
> +      #pragma omp scan inclusive (r)
>        b[i] = r;
>      }
>    return r;
> --- libgomp/testsuite/libgomp.c++/scan-1.C.jj   2019-07-02 13:16:12.144014405 
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-1.C      2019-07-02 13:17:41.705604254 
> +0200
> @@ -0,0 +1,151 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +
> +struct S {
> +  inline S ();
> +  inline ~S ();
> +  inline S (const S &);
> +  inline S & operator= (const S &);
> +  int s;
> +};
> +
> +S::S () : s (0)
> +{
> +}
> +
> +S::~S ()
> +{
> +}
> +
> +S::S (const S &x)
> +{
> +  s = x.s;
> +}
> +
> +S &
> +S::operator= (const S &x)
> +{
> +  s = x.s;
> +  return *this;
> +}
> +
> +static inline void
> +ini (S &x)
> +{
> +  x.s = 0;
> +}
> +
> +S r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
> +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer 
> (ini (omp_priv))
> +
> +__attribute__((noipa)) void
> +foo (S *a, S *b)
> +{
> +  #pragma omp for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r.s += a[i].s;
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) S
> +bar (void)
> +{
> +  S s;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s.s += 2 * a[i].s;
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return S (s);
> +}
> +
> +__attribute__((noipa)) void
> +baz (S *a, S *b)
> +{
> +  #pragma omp parallel for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r.s += a[i].s;
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) S
> +qux (void)
> +{
> +  S s;
> +  #pragma omp parallel for reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s.s += 2 * a[i].s;
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return S (s);
> +}
> +
> +int
> +main ()
> +{
> +  S s;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i].s = i;
> +      b[i].s = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s.s += i;
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +    }
> +  if (bar ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s.s += 2 * i;
> +      if (b[i].s != s.s)
> +       abort ();
> +    }
> +  r.s = 0;
> +  baz (a, b);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s.s += i;
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +    }
> +  if (qux ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s.s += 2 * i;
> +      if (b[i].s != s.s)
> +       abort ();
> +    }
> +  return 0;
> +}
> --- libgomp/testsuite/libgomp.c++/scan-2.C.jj   2019-07-02 13:16:12.151014294 
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-2.C      2019-07-02 13:29:26.250606680 
> +0200
> @@ -0,0 +1,116 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +int r, a[1024], b[1024], q;
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, int &r)
> +{
> +  #pragma omp for reduction (inscan, +:r) nowait
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r += a[i];
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  int &s = q;
> +  q = 0;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, +:s) nowait
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s += 2 * a[i];
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, int &r)
> +{
> +  #pragma omp parallel for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r += a[i];
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  int &s = q;
> +  q = 0;
> +  #pragma omp parallel for reduction (inscan, +:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s += 2 * a[i];
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +    }
> +  if (bar () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += 2 * i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +    }
> +  r = 0;
> +  baz (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +    }
> +  if (qux () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += 2 * i;
> +      if (b[i] != s)
> +       abort ();
> +    }
> +}
> --- libgomp/testsuite/libgomp.c++/scan-3.C.jj   2019-07-02 13:16:12.157014200 
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-3.C      2019-07-02 13:29:33.038500894 
> +0200
> @@ -0,0 +1,119 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +int r, a[1024], b[1024], q;
> +
> +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer 
> (omp_priv = 0)
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, int &r)
> +{
> +  #pragma omp for reduction (inscan, foo:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r += a[i];
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  int &s = q;
> +  q = 0;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, foo:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s += 2 * a[i];
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, int &r)
> +{
> +  #pragma omp parallel for reduction (inscan, foo:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r += a[i];
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  int &s = q;
> +  q = 0;
> +  #pragma omp parallel for reduction (inscan, foo:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s += 2 * a[i];
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +    }
> +  if (bar () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += 2 * i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +    }
> +  r = 0;
> +  baz (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +    }
> +  if (qux () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += 2 * i;
> +      if (b[i] != s)
> +       abort ();
> +    }
> +  return 0;
> +}
> --- libgomp/testsuite/libgomp.c++/scan-4.C.jj   2019-07-02 13:16:12.559007871 
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-4.C      2019-07-02 13:52:26.973993019 
> +0200
> @@ -0,0 +1,150 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +
> +struct S {
> +  inline S ();
> +  inline ~S ();
> +  inline S (const S &);
> +  inline S & operator= (const S &);
> +  int s;
> +};
> +
> +S::S () : s (0)
> +{
> +}
> +
> +S::~S ()
> +{
> +}
> +
> +S::S (const S &x)
> +{
> +  s = x.s;
> +}
> +
> +S &
> +S::operator= (const S &x)
> +{
> +  s = x.s;
> +  return *this;
> +}
> +
> +static inline void
> +ini (S &x)
> +{
> +  x.s = 0;
> +}
> +
> +S r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
> +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer 
> (ini (omp_priv))
> +
> +__attribute__((noipa)) void
> +foo (S *a, S *b, S &r)
> +{
> +  #pragma omp for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r.s += a[i].s;
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) S
> +bar ()
> +{
> +  S s;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s.s += 2 * a[i].s;
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (S *a, S *b, S &r)
> +{
> +  #pragma omp parallel for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r.s += a[i].s;
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) S
> +qux ()
> +{
> +  S s;
> +  #pragma omp parallel for reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s.s += 2 * a[i].s;
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  S s;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i].s = i;
> +      b[i].s = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b, r);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s.s += i;
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +    }
> +  if (bar ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s.s += 2 * i;
> +      if (b[i].s != s.s)
> +       abort ();
> +    }
> +  r.s = 0;
> +  baz (a, b, r);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s.s += i;
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +    }
> +  if (qux ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s.s += 2 * i;
> +      if (b[i].s != s.s)
> +       abort ();
> +    }
> +}
> --- libgomp/testsuite/libgomp.c++/scan-5.C.jj   2019-07-02 13:16:12.567007745 
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-5.C      2019-07-02 13:29:08.895877145 
> +0200
> @@ -0,0 +1,158 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +
> +template <typename T>
> +struct S {
> +  inline S ();
> +  inline ~S ();
> +  inline S (const S &);
> +  inline S & operator= (const S &);
> +  T s;
> +};
> +
> +template <typename T>
> +S<T>::S () : s (0)
> +{
> +}
> +
> +template <typename T>
> +S<T>::~S ()
> +{
> +}
> +
> +template <typename T>
> +S<T>::S (const S &x)
> +{
> +  s = x.s;
> +}
> +
> +template <typename T>
> +S<T> &
> +S<T>::operator= (const S &x)
> +{
> +  s = x.s;
> +  return *this;
> +}
> +
> +template <typename T>
> +static inline void
> +ini (S<T> &x)
> +{
> +  x.s = 0;
> +}
> +
> +S<int> r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
> +#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) 
> initializer (ini (omp_priv))
> +
> +template <typename T>
> +__attribute__((noipa)) void
> +foo (S<T> *a, S<T> *b)
> +{
> +  #pragma omp for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r.s += a[i].s;
> +    }
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) S<T>
> +bar (void)
> +{
> +  S<T> s;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s.s += 2 * a[i].s;
> +    }
> +  return S<T> (s);
> +}
> +
> +__attribute__((noipa)) void
> +baz (S<int> *a, S<int> *b)
> +{
> +  #pragma omp parallel for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r.s += a[i].s;
> +    }
> +}
> +
> +__attribute__((noipa)) S<int>
> +qux (void)
> +{
> +  S<int> s;
> +  #pragma omp parallel for reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s.s += 2 * a[i].s;
> +    }
> +  return S<int> (s);
> +}
> +
> +int
> +main ()
> +{
> +  S<int> s;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i].s = i;
> +      b[i].s = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +      s.s += i;
> +    }
> +  if (bar<int> ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      s.s += 2 * i;
> +    }
> +  r.s = 0;
> +  baz (a, b);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +      s.s += i;
> +    }
> +  if (qux ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      s.s += 2 * i;
> +    }
> +}
> --- libgomp/testsuite/libgomp.c++/scan-6.C.jj   2019-07-02 13:16:12.574007634 
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-6.C      2019-07-02 13:31:59.909212015 
> +0200
> @@ -0,0 +1,120 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +int r, a[1024], b[1024], q;
> +
> +template <typename T, typename U>
> +__attribute__((noipa)) void
> +foo (T a, T b, U r)
> +{
> +  #pragma omp for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) T
> +bar ()
> +{
> +  T &s = q;
> +  q = 0;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, +:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) void
> +baz (T *a, T *b, T &r)
> +{
> +  #pragma omp parallel for reduction (inscan, +:r)
> +  for (T i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) int
> +qux ()
> +{
> +  T s = q;
> +  q = 0;
> +  #pragma omp parallel for reduction (inscan, +:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo<int *, int &> (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +      s += i;
> +    }
> +  if (bar<int> () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +      s += 2 * i;
> +    }
> +  r = 0;
> +  baz<int> (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +      s += i;
> +    }
> +  if (qux<int &> () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      s += 2 * i;
> +    }
> +}
> --- libgomp/testsuite/libgomp.c++/scan-7.C.jj   2019-07-02 13:16:12.581007523 
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-7.C      2019-07-02 13:33:29.036823011 
> +0200
> @@ -0,0 +1,118 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +int r, a[1024], b[1024], q;
> +
> +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer 
> (omp_priv = 0)
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, int &r)
> +{
> +  #pragma omp for reduction (inscan, foo:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  int &s = q;
> +  q = 0;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, foo:s) nowait
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, int &r)
> +{
> +  #pragma omp parallel for reduction (inscan, foo:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  int &s = q;
> +  q = 0;
> +  #pragma omp parallel for reduction (inscan, foo:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +      s += i;
> +    }
> +  if (bar () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +      s += 2 * i;
> +    }
> +  r = 0;
> +  baz (a, b, r);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +      s += i;
> +    }
> +  if (qux () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      s += 2 * i;
> +    }
> +}
> --- libgomp/testsuite/libgomp.c++/scan-8.C.jj   2019-07-02 13:16:12.587007429 
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-8.C      2019-07-02 13:43:27.518448406 
> +0200
> @@ -0,0 +1,150 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +
> +struct S {
> +  inline S ();
> +  inline ~S ();
> +  inline S (const S &);
> +  inline S & operator= (const S &);
> +  int s;
> +};
> +
> +S::S () : s (0)
> +{
> +}
> +
> +S::~S ()
> +{
> +}
> +
> +S::S (const S &x)
> +{
> +  s = x.s;
> +}
> +
> +S &
> +S::operator= (const S &x)
> +{
> +  s = x.s;
> +  return *this;
> +}
> +
> +static inline void
> +ini (S &x)
> +{
> +  x.s = 0;
> +}
> +
> +S r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
> +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer 
> (ini (omp_priv))
> +
> +__attribute__((noipa)) void
> +foo (S *a, S *b, S &r)
> +{
> +  #pragma omp for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r.s += a[i].s;
> +    }
> +}
> +
> +__attribute__((noipa)) S
> +bar (void)
> +{
> +  S s;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s.s += 2 * a[i].s;
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (S *a, S *b, S &r)
> +{
> +  #pragma omp parallel for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r.s += a[i].s;
> +    }
> +}
> +
> +__attribute__((noipa)) S
> +qux (void)
> +{
> +  S s;
> +  #pragma omp parallel for reduction (inscan, plus:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s.s += 2 * a[i].s;
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  S s;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i].s = i;
> +      b[i].s = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b, r);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +      s.s += i;
> +    }
> +  if (bar ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      s.s += 2 * i;
> +    }
> +  r.s = 0;
> +  baz (a, b, r);
> +  if (r.s != 1024 * 1023 / 2)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      else
> +       b[i].s = 25;
> +      s.s += i;
> +    }
> +  if (qux ().s != 1024 * 1023)
> +    abort ();
> +  s.s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i].s != s.s)
> +       abort ();
> +      s.s += 2 * i;
> +    }
> +}
> --- libgomp/testsuite/libgomp.c/scan-1.c.jj     2019-07-02 13:08:01.390672078 
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-1.c        2019-07-02 13:08:01.390672078 
> +0200
> @@ -0,0 +1,115 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b)
> +{
> +  #pragma omp for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r += a[i];
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  int s = 0;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, +:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s += 2 * a[i];
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b)
> +{
> +  #pragma omp parallel for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r += a[i];
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  int s = 0;
> +  #pragma omp parallel for reduction (inscan, +:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s += 2 * a[i];
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +    }
> +  if (bar () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += 2 * i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +    }
> +  r = 0;
> +  baz (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +    }
> +  if (qux () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += 2 * i;
> +      if (b[i] != s)
> +       abort ();
> +    }
> +  return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-2.c.jj     2019-07-02 13:08:01.390672078 
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-2.c        2019-07-02 13:08:01.390672078 
> +0200
> @@ -0,0 +1,117 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer 
> (omp_priv = 0)
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b)
> +{
> +  #pragma omp for reduction (inscan, foo:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r += a[i];
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  int s = 0;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, foo:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s += 2 * a[i];
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b)
> +{
> +  #pragma omp parallel for reduction (inscan, foo:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r += a[i];
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  int s = 0;
> +  #pragma omp parallel for reduction (inscan, foo:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s += 2 * a[i];
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +    }
> +  if (bar () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += 2 * i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +    }
> +  r = 0;
> +  baz (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += i;
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +    }
> +  if (qux () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += 2 * i;
> +      if (b[i] != s)
> +       abort ();
> +    }
> +  return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-3.c.jj     2019-07-02 13:08:01.390672078 
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-3.c        2019-07-02 13:08:01.390672078 
> +0200
> @@ -0,0 +1,88 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +float r = 1.0f, a[1024], b[1024];
> +
> +__attribute__((noipa)) void
> +foo (float *a, float *b)
> +{
> +  #pragma omp for reduction (inscan, *:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      r *= a[i];
> +      #pragma omp scan inclusive(r)
> +      b[i] = r;
> +    }
> +}
> +
> +__attribute__((noipa)) float
> +bar (void)
> +{
> +  float s = -__builtin_inff ();
> +  #pragma omp parallel for reduction (inscan, max:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      s = s > a[i] ? s : a[i];
> +      #pragma omp scan inclusive(s)
> +      b[i] = s;
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  float s = 1.0f;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (i < 80)
> +       a[i] = (i & 1) ? 0.25f : 0.5f;
> +      else if (i < 200)
> +       a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
> +      else if (i < 280)
> +       a[i] = (i & 1) ? 0.25f : 0.5f;
> +      else if (i < 380)
> +       a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
> +      else
> +       switch (i % 6)
> +         {
> +         case 0: a[i] = 0.25f; break;
> +         case 1: a[i] = 2.0f; break;
> +         case 2: a[i] = -1.0f; break;
> +         case 3: a[i] = -4.0f; break;
> +         case 4: a[i] = 0.5f; break;
> +         case 5: a[i] = 1.0f; break;
> +         default: a[i] = 0.0f; break;
> +         }
> +      b[i] = -19.0f;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b);
> +  if (r * 16384.0f != 0.125f)
> +    abort ();
> +  float m = -175.25f;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s *= a[i];
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       {
> +         a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
> +         b[i] = -231.75f;
> +         m += 0.75f;
> +       }
> +    }
> +  if (bar () != 592.0f)
> +    abort ();
> +  s = -__builtin_inff ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (s < a[i])
> +       s = a[i];
> +      if (b[i] != s)
> +       abort ();
> +    }
> +  return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-4.c.jj     2019-07-02 13:08:01.390672078 
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-4.c        2019-07-02 13:08:01.390672078 
> +0200
> @@ -0,0 +1,179 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +unsigned short r2, b2[1024];
> +unsigned char r3, b3[1024];
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
> +{
> +  #pragma omp for reduction (inscan, +:r, r2, r3)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      { r += a[i]; r2 += a[i]; r3 += a[i]; }
> +      #pragma omp scan inclusive(r, r2, r3)
> +      {
> +       b[i] = r;
> +       b2[i] = r2;
> +       b3[i] = r3;
> +      }
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (unsigned short *s2p, unsigned char *s3p)
> +{
> +  int s = 0;
> +  unsigned short s2 = 0;
> +  unsigned char s3 = 0;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, +:s, s2, s3)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      {
> +       s += 2 * a[i];
> +       s2 += 2 * a[i];
> +       s3 += 2 * a[i];
> +      }
> +      #pragma omp scan inclusive(s, s2, s3)
> +      { b[i] = s; b2[i] = s2; b3[i] = s3; }
> +    }
> +  *s2p = s2;
> +  *s3p = s3;
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
> +{
> +  #pragma omp parallel for reduction (inscan, +:r, r2, r3)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      {
> +       r += a[i];
> +       r2 += a[i];
> +       r3 += a[i];
> +      }
> +      #pragma omp scan inclusive(r, r2, r3)
> +      {
> +       b[i] = r;
> +       b2[i] = r2;
> +       b3[i] = r3;
> +      }
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (unsigned short *s2p, unsigned char *s3p)
> +{
> +  int s = 0;
> +  unsigned short s2 = 0;
> +  unsigned char s3 = 0;
> +  #pragma omp parallel for reduction (inscan, +:s, s2, s3)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
> +      #pragma omp scan inclusive(s, s2, s3)
> +      { b[i] = s; b2[i] = s2; b3[i] = s3; }
> +    }
> +  *s2p = s2;
> +  *s3p = s3;
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  unsigned short s2;
> +  unsigned char s3;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      b2[i] = -1;
> +      b3[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b, b2, b3);
> +  if (r != 1024 * 1023 / 2
> +      || r2 != (unsigned short) r
> +      || r3 != (unsigned char) r)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += i;
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +      else
> +       {
> +         b[i] = 25;
> +         b2[i] = 24;
> +         b3[i] = 26;
> +       }
> +    }
> +  if (bar (&s2, &s3) != 1024 * 1023)
> +    abort ();
> +  if (s2 != (unsigned short) (1024 * 1023)
> +      || s3 != (unsigned char) (1024 * 1023))
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += 2 * i;
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +      else
> +       {
> +         b[i] = -1;
> +         b2[i] = -1;
> +         b3[i] = -1;
> +       }
> +    }
> +  r = 0;
> +  r2 = 0;
> +  r3 = 0;
> +  baz (a, b, b2, b3);
> +  if (r != 1024 * 1023 / 2
> +      || r2 != (unsigned short) r
> +      || r3 != (unsigned char) r)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += i;
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +      else
> +       {
> +         b[i] = 25;
> +         b2[i] = 24;
> +         b3[i] = 26;
> +       }
> +    }
> +  s2 = 0;
> +  s3 = 0;
> +  if (qux (&s2, &s3) != 1024 * 1023)
> +    abort ();
> +  if (s2 != (unsigned short) (1024 * 1023)
> +      || s3 != (unsigned char) (1024 * 1023))
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      s += 2 * i;
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +    }
> +  return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-5.c.jj     2019-07-02 13:08:01.390672078 
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-5.c        2019-07-02 13:08:01.390672078 
> +0200
> @@ -0,0 +1,115 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b)
> +{
> +  #pragma omp for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  int s = 0;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, +:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b)
> +{
> +  #pragma omp parallel for reduction (inscan, +:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  int s = 0;
> +  #pragma omp parallel for reduction (inscan, +:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +      s += i;
> +    }
> +  if (bar () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +      s += 2 * i;
> +    }
> +  r = 0;
> +  baz (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +      s += i;
> +    }
> +  if (qux () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      s += 2 * i;
> +    }
> +  return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-6.c.jj     2019-07-02 13:08:01.390672078 
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-6.c        2019-07-02 13:08:01.390672078 
> +0200
> @@ -0,0 +1,117 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer 
> (omp_priv = 0)
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b)
> +{
> +  #pragma omp for reduction (inscan, foo:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  int s = 0;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, foo:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b)
> +{
> +  #pragma omp parallel for reduction (inscan, foo:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r += a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  int s = 0;
> +  #pragma omp parallel for reduction (inscan, foo:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s += 2 * a[i];
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = 25;
> +      s += i;
> +    }
> +  if (bar () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -1;
> +      s += 2 * i;
> +    }
> +  r = 0;
> +  baz (a, b);
> +  if (r != 1024 * 1023 / 2)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -25;
> +      s += i;
> +    }
> +  if (qux () != 1024 * 1023)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      s += 2 * i;
> +    }
> +  return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-7.c.jj     2019-07-02 13:08:01.390672078 
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-7.c        2019-07-02 13:08:01.390672078 
> +0200
> @@ -0,0 +1,86 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +float r = 1.0f, a[1024], b[1024];
> +
> +__attribute__((noipa)) void
> +foo (float *a, float *b)
> +{
> +  #pragma omp for reduction (inscan, *:r)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = r;
> +      #pragma omp scan exclusive(r)
> +      r *= a[i];
> +    }
> +}
> +
> +__attribute__((noipa)) float
> +bar (void)
> +{
> +  float s = -__builtin_inff ();
> +  #pragma omp parallel for reduction (inscan, max:s)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      b[i] = s;
> +      #pragma omp scan exclusive(s)
> +      s = s > a[i] ? s : a[i];
> +    }
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  float s = 1.0f;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (i < 80)
> +       a[i] = (i & 1) ? 0.25f : 0.5f;
> +      else if (i < 200)
> +       a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
> +      else if (i < 280)
> +       a[i] = (i & 1) ? 0.25f : 0.5f;
> +      else if (i < 380)
> +       a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
> +      else
> +       switch (i % 6)
> +         {
> +         case 0: a[i] = 0.25f; break;
> +         case 1: a[i] = 2.0f; break;
> +         case 2: a[i] = -1.0f; break;
> +         case 3: a[i] = -4.0f; break;
> +         case 4: a[i] = 0.5f; break;
> +         case 5: a[i] = 1.0f; break;
> +         default: a[i] = 0.0f; break;
> +         }
> +      b[i] = -19.0f;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b);
> +  if (r * 16384.0f != 0.125f)
> +    abort ();
> +  float m = -175.25f;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      else
> +       b[i] = -231.75f;
> +      s *= a[i];
> +      a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
> +      m += 0.75f;
> +    }
> +  if (bar () != 592.0f)
> +    abort ();
> +  s = -__builtin_inff ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s)
> +       abort ();
> +      if (s < a[i])
> +       s = a[i];
> +    }
> +  return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-8.c.jj     2019-07-02 13:08:01.390672078 
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-8.c        2019-07-02 13:08:01.390672078 
> +0200
> @@ -0,0 +1,179 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +unsigned short r2, b2[1024];
> +unsigned char r3, b3[1024];
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
> +{
> +  #pragma omp for reduction (inscan, +:r, r2, r3)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      {
> +       b[i] = r;
> +       b2[i] = r2;
> +       b3[i] = r3;
> +      }
> +      #pragma omp scan exclusive(r, r2, r3)
> +      { r += a[i]; r2 += a[i]; r3 += a[i]; }
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +bar (unsigned short *s2p, unsigned char *s3p)
> +{
> +  int s = 0;
> +  unsigned short s2 = 0;
> +  unsigned char s3 = 0;
> +  #pragma omp parallel
> +  #pragma omp for reduction (inscan, +:s, s2, s3)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      { b[i] = s; b2[i] = s2; b3[i] = s3; }
> +      #pragma omp scan exclusive(s, s2, s3)
> +      {
> +       s += 2 * a[i];
> +       s2 += 2 * a[i];
> +       s3 += 2 * a[i];
> +      }
> +    }
> +  *s2p = s2;
> +  *s3p = s3;
> +  return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
> +{
> +  #pragma omp parallel for reduction (inscan, +:r, r2, r3)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      {
> +       b[i] = r;
> +       b2[i] = r2;
> +       b3[i] = r3;
> +      }
> +      #pragma omp scan exclusive(r, r2, r3)
> +      {
> +       r += a[i];
> +       r2 += a[i];
> +       r3 += a[i];
> +      }
> +    }
> +}
> +
> +__attribute__((noipa)) int
> +qux (unsigned short *s2p, unsigned char *s3p)
> +{
> +  int s = 0;
> +  unsigned short s2 = 0;
> +  unsigned char s3 = 0;
> +  #pragma omp parallel for reduction (inscan, +:s, s2, s3)
> +  for (int i = 0; i < 1024; i++)
> +    {
> +      { b[i] = s; b2[i] = s2; b3[i] = s3; }
> +      #pragma omp scan exclusive(s, s2, s3)
> +      { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
> +    }
> +  *s2p = s2;
> +  *s3p = s3;
> +  return s;
> +}
> +
> +int
> +main ()
> +{
> +  int s = 0;
> +  unsigned short s2;
> +  unsigned char s3;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      a[i] = i;
> +      b[i] = -1;
> +      b2[i] = -1;
> +      b3[i] = -1;
> +      asm ("" : "+g" (i));
> +    }
> +  #pragma omp parallel
> +  foo (a, b, b2, b3);
> +  if (r != 1024 * 1023 / 2
> +      || r2 != (unsigned short) r
> +      || r3 != (unsigned char) r)
> +    abort ();
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +      else
> +       {
> +         b[i] = 25;
> +         b2[i] = 24;
> +         b3[i] = 26;
> +       }
> +      s += i;
> +    }
> +  if (bar (&s2, &s3) != 1024 * 1023)
> +    abort ();
> +  if (s2 != (unsigned short) (1024 * 1023)
> +      || s3 != (unsigned char) (1024 * 1023))
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +      else
> +       {
> +         b[i] = -1;
> +         b2[i] = -1;
> +         b3[i] = -1;
> +       }
> +      s += 2 * i;
> +    }
> +  r = 0;
> +  r2 = 0;
> +  r3 = 0;
> +  baz (a, b, b2, b3);
> +  if (r != 1024 * 1023 / 2
> +      || r2 != (unsigned short) r
> +      || r3 != (unsigned char) r)
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +      else
> +       {
> +         b[i] = 25;
> +         b2[i] = 24;
> +         b3[i] = 26;
> +       }
> +      s += i;
> +    }
> +  s2 = 0;
> +  s3 = 0;
> +  if (qux (&s2, &s3) != 1024 * 1023)
> +    abort ();
> +  if (s2 != (unsigned short) (1024 * 1023)
> +      || s3 != (unsigned char) (1024 * 1023))
> +    abort ();
> +  s = 0;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (b[i] != s
> +         || b2[i] != (unsigned short) s
> +         || b3[i] != (unsigned char) s)
> +       abort ();
> +      s += 2 * i;
> +    }
> +  return 0;
> +}
>
>         Jakub

Reply via email to