On Wed, 3 Jul 2019 at 07:11, Jakub Jelinek <[email protected]> wrote:
>
> Hi!
>
> The following patch implements roughly the
> https://gcc.gnu.org/ml/gcc-patches/2019-06/msg01330.html
> design for worksharing loops (so far not for composite for simd, that will
> be the next larger task).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.
>
> 2019-07-03 Jakub Jelinek <[email protected]>
>
> * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_
> clause.
> * tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__SCANTEMP_ instead of
> OMP_CLAUSE__CONDTEMP_ as range's upper bound.
> (OMP_CLAUSE__SCANTEMP__ALLOC, OMP_CLAUSE__SCANTEMP__CONTROL): Define.
> * tree.c (omp_clause_num_ops, omp_clause_code_name): Add
> OMP_CLAUSE__SCANTEMP_ entry.
> (walk_tree_1): Handle OMP_CLAUSE__SCANTEMP_.
> * tree-pretty-print.c (dump_omp_clause): Likewise.
> * tree-nested.c (convert_nonlocal_omp_clauses,
> convert_local_omp_clauses): Likewise.
> * omp-general.h (struct omp_for_data): Add have_scantemp and
> have_nonctrl_scantemp members.
> * omp-general.c (omp_extract_for_data): Initialize them.
> * omp-low.c (struct omp_context): Add scan_exclusive member.
> (scan_omp_1_stmt): Don't unnecessarily mask gimple_omp_for_kind
> result again with GF_OMP_FOR_KIND_MASK. Initialize also
> ctx->scan_exclusive.
> (lower_rec_simd_input_clauses): Use ctx->scan_exclusive instead
> of !ctx->scan_inclusive.
> (lower_rec_input_clauses): Simplify gimplification of dtors using
> gimplify_and_add. For non-is_simd test OMP_CLAUSE_REDUCTION_INSCAN
> rather than rvarp. Handle OMP_CLAUSE_REDUCTION_INSCAN in worksharing
> loops. Don't add barrier for reduction_omp_orig_ref if
> ctx->scan_??xclusive.
> (lower_reduction_clauses): Don't do anything for ctx->scan_??xclusive.
> (lower_omp_scan): Use ctx->scan_exclusive instead
> of !ctx->scan_inclusive. Handle worksharing loops with inscan
> reductions. Use new_vard != new_var instead of repeated
> omp_is_reference calls.
> (omp_find_scan, lower_omp_for_scan): New functions.
> (lower_omp_for): Call lower_omp_for_scan for worksharing loops with
> inscan reductions.
> * omp-expand.c (expand_omp_scantemp_alloc): New function.
> (expand_omp_for_static_nochunk): Handle fd->have_nonctrl_scantemp
> and fd->have_scantemp.
>
> * c-c++-common/gomp/scan-3.c (f1): Don't expect a sorry message.
> * c-c++-common/gomp/scan-5.c (foo): Likewise.
>
Hi Jakub,
This patch leads to new failures on arm:
FAIL: c-c++-common/gomp/scan-3.c (internal compiler error)
FAIL: c-c++-common/gomp/scan-5.c (internal compiler error)
The logs say:
spawn -ignore SIGHUP
/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/gcc/xgcc
-B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/gcc/
/gcc/testsuite/c-c++-common/gomp/scan-3.c -fno-diagnostics-show-caret
-fno-diagnostics-show-line-numbers -fdiagnostics-color=never -fopenmp
-Wno-hsa -S -o scan-3.s
during RTL pass: expand
/gcc/testsuite/c-c++-common/gomp/scan-3.c: In function 'f1':
/gcc/testsuite/c-c++-common/gomp/scan-3.c:4:1: internal compiler
error: tree check: expected ssa_name, have var_decl in single_imm_use,
at ssa-iterators.h:421
0x5b43ad tree_check_failed(tree_node const*, char const*, int, char const*, ...)
/gcc/tree.c:9902
0x9c18c5 tree_check(tree_node const*, char const*, int, char const*, tree_code)
/gcc/tree.h:3473
0x9c18c5 single_imm_use
/gcc/ssa-iterators.h:421
0x9c18c5 expand_mul_overflow
/gcc/internal-fn.c:1590
0x9c209b expand_arith_overflow
/gcc/internal-fn.c:2318
0x75f807 expand_call_stmt
/gcc/cfgexpand.c:2638
0x75f807 expand_gimple_stmt_1
/gcc/cfgexpand.c:3708
0x75f807 expand_gimple_stmt
/gcc/cfgexpand.c:3867
0x765c73 expand_gimple_basic_block
/gcc/cfgexpand.c:5907
0x7682b6 execute
/gcc/cfgexpand.c:6530
Please submit a full bug report,
Christophe
> * testsuite/libgomp.c++/scan-1.C: New test.
> * testsuite/libgomp.c++/scan-2.C: New test.
> * testsuite/libgomp.c++/scan-3.C: New test.
> * testsuite/libgomp.c++/scan-4.C: New test.
> * testsuite/libgomp.c++/scan-5.C: New test.
> * testsuite/libgomp.c++/scan-6.C: New test.
> * testsuite/libgomp.c++/scan-7.C: New test.
> * testsuite/libgomp.c++/scan-8.C: New test.
> * testsuite/libgomp.c/scan-1.c: New test.
> * testsuite/libgomp.c/scan-2.c: New test.
> * testsuite/libgomp.c/scan-3.c: New test.
> * testsuite/libgomp.c/scan-4.c: New test.
> * testsuite/libgomp.c/scan-5.c: New test.
> * testsuite/libgomp.c/scan-6.c: New test.
> * testsuite/libgomp.c/scan-7.c: New test.
> * testsuite/libgomp.c/scan-8.c: New test.
>
> --- gcc/tree-core.h.jj 2019-07-02 12:57:30.283555122 +0200
> +++ gcc/tree-core.h 2019-07-02 13:08:01.387672125 +0200
> @@ -352,6 +352,9 @@ enum omp_clause_code {
> /* Internal clause: temporary for lastprivate(conditional:). */
> OMP_CLAUSE__CONDTEMP_,
>
> + /* Internal clause: temporary for inscan reductions. */
> + OMP_CLAUSE__SCANTEMP_,
> +
> /* OpenACC/OpenMP clause: if (scalar-expression). */
> OMP_CLAUSE_IF,
>
> --- gcc/tree.h.jj 2019-07-02 12:57:30.284555106 +0200
> +++ gcc/tree.h 2019-07-02 13:08:01.385672156 +0200
> @@ -1449,7 +1449,7 @@ class auto_suppress_location_wrappers
> #define OMP_CLAUSE_DECL(NODE) \
> OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \
> OMP_CLAUSE_PRIVATE, \
> - OMP_CLAUSE__CONDTEMP_), 0)
> + OMP_CLAUSE__SCANTEMP_), 0)
> #define OMP_CLAUSE_HAS_LOCATION(NODE) \
> (LOCATION_LOCUS ((OMP_CLAUSE_CHECK (NODE))->omp_clause.locus)
> \
> != UNKNOWN_LOCATION)
> @@ -1761,6 +1761,17 @@ class auto_suppress_location_wrappers
> #define OMP_CLAUSE__CONDTEMP__ITER(NODE) \
> (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__CONDTEMP_)->base.public_flag)
>
> +/* _SCANTEMP_ holding temporary with pointer to thread's local array;
> + allocation. */
> +#define OMP_CLAUSE__SCANTEMP__ALLOC(NODE) \
> + (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_)->base.public_flag)
> +
> +/* _SCANTEMP_ holding temporary with a control variable for deallocation;
> + one boolean_type_node for test whether alloca was used, another one
> + to pass to __builtin_stack_restore or free. */
> +#define OMP_CLAUSE__SCANTEMP__CONTROL(NODE) \
> + TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_))
> +
> /* SSA_NAME accessors. */
>
> /* Whether SSA_NAME NODE is a virtual operand. This simply caches the
> --- gcc/tree.c.jj 2019-07-02 12:57:30.284555106 +0200
> +++ gcc/tree.c 2019-07-02 13:08:01.382672202 +0200
> @@ -311,6 +311,7 @@ unsigned const char omp_clause_num_ops[]
> 1, /* OMP_CLAUSE__LOOPTEMP_ */
> 1, /* OMP_CLAUSE__REDUCTEMP_ */
> 1, /* OMP_CLAUSE__CONDTEMP_ */
> + 1, /* OMP_CLAUSE__SCANTEMP_ */
> 1, /* OMP_CLAUSE_IF */
> 1, /* OMP_CLAUSE_NUM_THREADS */
> 1, /* OMP_CLAUSE_SCHEDULE */
> @@ -391,6 +392,7 @@ const char * const omp_clause_code_name[
> "_looptemp_",
> "_reductemp_",
> "_condtemp_",
> + "_scantemp_",
> "if",
> "num_threads",
> "schedule",
> @@ -12316,6 +12318,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func
> case OMP_CLAUSE__LOOPTEMP_:
> case OMP_CLAUSE__REDUCTEMP_:
> case OMP_CLAUSE__CONDTEMP_:
> + case OMP_CLAUSE__SCANTEMP_:
> case OMP_CLAUSE__SIMDUID_:
> WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0));
> /* FALLTHRU */
> --- gcc/tree-pretty-print.c.jj 2019-07-02 12:57:30.409553135 +0200
> +++ gcc/tree-pretty-print.c 2019-07-02 13:08:01.383672187 +0200
> @@ -483,6 +483,9 @@ dump_omp_clause (pretty_printer *pp, tre
> case OMP_CLAUSE__CONDTEMP_:
> name = "_condtemp_";
> goto print_remap;
> + case OMP_CLAUSE__SCANTEMP_:
> + name = "_scantemp_";
> + goto print_remap;
> case OMP_CLAUSE_TO_DECLARE:
> name = "to";
> goto print_remap;
> --- gcc/tree-nested.c.jj 2019-07-02 12:57:30.282555137 +0200
> +++ gcc/tree-nested.c 2019-07-02 13:08:01.386672141 +0200
> @@ -1349,6 +1349,7 @@ convert_nonlocal_omp_clauses (tree *pcla
> case OMP_CLAUSE_IF_PRESENT:
> case OMP_CLAUSE_FINALIZE:
> case OMP_CLAUSE__CONDTEMP_:
> + case OMP_CLAUSE__SCANTEMP_:
> break;
>
> /* The following clause belongs to the OpenACC cache directive,
> which
> @@ -2078,6 +2079,7 @@ convert_local_omp_clauses (tree *pclause
> case OMP_CLAUSE_IF_PRESENT:
> case OMP_CLAUSE_FINALIZE:
> case OMP_CLAUSE__CONDTEMP_:
> + case OMP_CLAUSE__SCANTEMP_:
> break;
>
> /* The following clause belongs to the OpenACC cache directive,
> which
> --- gcc/omp-general.h.jj 2019-07-02 12:57:30.282555137 +0200
> +++ gcc/omp-general.h 2019-07-02 13:08:01.387672125 +0200
> @@ -63,7 +63,7 @@ struct omp_for_data
> int collapse; /* Collapsed loops, 1 for a non-collapsed loop. */
> int ordered;
> bool have_nowait, have_ordered, simd_schedule, have_reductemp;
> - bool have_pointer_condtemp;
> + bool have_pointer_condtemp, have_scantemp, have_nonctrl_scantemp;
> int lastprivate_conditional;
> unsigned char sched_modifiers;
> enum omp_clause_schedule_kind sched_kind;
> --- gcc/omp-general.c.jj 2019-07-02 12:57:30.282555137 +0200
> +++ gcc/omp-general.c 2019-07-02 13:08:01.385672156 +0200
> @@ -169,6 +169,8 @@ omp_extract_for_data (gomp_for *for_stmt
> fd->have_ordered = false;
> fd->have_reductemp = false;
> fd->have_pointer_condtemp = false;
> + fd->have_scantemp = false;
> + fd->have_nonctrl_scantemp = false;
> fd->lastprivate_conditional = 0;
> fd->tiling = NULL_TREE;
> fd->collapse = 1;
> @@ -231,6 +233,12 @@ omp_extract_for_data (gomp_for *for_stmt
> if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t))))
> fd->have_pointer_condtemp = true;
> break;
> + case OMP_CLAUSE__SCANTEMP_:
> + fd->have_scantemp = true;
> + if (!OMP_CLAUSE__SCANTEMP__ALLOC (t)
> + && !OMP_CLAUSE__SCANTEMP__CONTROL (t))
> + fd->have_nonctrl_scantemp = true;
> + break;
> default:
> break;
> }
> --- gcc/omp-low.c.jj 2019-07-02 12:57:30.285555090 +0200
> +++ gcc/omp-low.c 2019-07-02 16:37:26.229374596 +0200
> @@ -144,6 +144,9 @@ struct omp_context
>
> /* True if there is nested scan context with inclusive clause. */
> bool scan_inclusive;
> +
> + /* True if there is nested scan context with exclusive clause. */
> + bool scan_exclusive;
> };
>
> static splay_tree all_contexts;
> @@ -3316,8 +3319,8 @@ scan_omp_1_stmt (gimple_stmt_iterator *g
> break;
>
> case GIMPLE_OMP_FOR:
> - if (((gimple_omp_for_kind (as_a <gomp_for *> (stmt))
> - & GF_OMP_FOR_KIND_MASK) == GF_OMP_FOR_KIND_SIMD)
> + if ((gimple_omp_for_kind (as_a <gomp_for *> (stmt))
> + == GF_OMP_FOR_KIND_SIMD)
> && omp_maybe_offloaded_ctx (ctx)
> && omp_max_simt_vf ())
> scan_omp_simd (gsi, as_a <gomp_for *> (stmt), ctx);
> @@ -3335,8 +3338,12 @@ scan_omp_1_stmt (gimple_stmt_iterator *g
>
> case GIMPLE_OMP_SCAN:
> if (tree clauses = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
> - if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE)
> - ctx->scan_inclusive = true;
> + {
> + if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE)
> + ctx->scan_inclusive = true;
> + else if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_EXCLUSIVE)
> + ctx->scan_exclusive = true;
> + }
> /* FALLTHRU */
> case GIMPLE_OMP_SECTION:
> case GIMPLE_OMP_MASTER:
> @@ -3769,7 +3776,7 @@ lower_rec_simd_input_clauses (tree new_v
> sctx->lastlane, NULL_TREE, NULL_TREE);
> TREE_THIS_NOTRAP (*rvar) = 1;
>
> - if (!ctx->scan_inclusive)
> + if (ctx->scan_exclusive)
> {
> /* And for exclusive scan yet another one, which will
> hold the value during the scan phase. */
> @@ -3854,7 +3861,7 @@ static void
> lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
> omp_context *ctx, struct omp_for_data *fd)
> {
> - tree c, dtor, copyin_seq, x, ptr;
> + tree c, copyin_seq, x, ptr;
> bool copyin_by_ref = false;
> bool lastprivate_firstprivate = false;
> bool reduction_omp_orig_ref = false;
> @@ -4541,12 +4548,7 @@ lower_rec_input_clauses (tree clauses, g
> x = lang_hooks.decls.omp_clause_dtor
> (c, build_simple_mem_ref
> (y2));
> if (x)
> - {
> - gimple_seq tseq = NULL;
> - dtor = x;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (dlist, tseq);
> - }
> + gimplify_and_add (x, dlist);
> }
> }
> else
> @@ -4913,13 +4915,7 @@ lower_rec_input_clauses (tree clauses, g
> {
> y = lang_hooks.decls.omp_clause_dtor (c, ivar);
> if (y)
> - {
> - gimple_seq tseq = NULL;
> -
> - dtor = y;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (&llist[1], tseq);
> - }
> + gimplify_and_add (y, &llist[1]);
> }
> break;
> }
> @@ -4949,13 +4945,7 @@ lower_rec_input_clauses (tree clauses, g
> do_dtor:
> x = lang_hooks.decls.omp_clause_dtor (c, new_var);
> if (x)
> - {
> - gimple_seq tseq = NULL;
> -
> - dtor = x;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (dlist, tseq);
> - }
> + gimplify_and_add (x, dlist);
> break;
>
> case OMP_CLAUSE_LINEAR:
> @@ -5103,13 +5093,7 @@ lower_rec_input_clauses (tree clauses, g
> gimplify_and_add (x, &llist[0]);
> x = lang_hooks.decls.omp_clause_dtor (c, ivar);
> if (x)
> - {
> - gimple_seq tseq = NULL;
> -
> - dtor = x;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (&llist[1], tseq);
> - }
> + gimplify_and_add (x, &llist[1]);
> break;
> }
> if (omp_is_reference (var))
> @@ -5282,12 +5266,7 @@ lower_rec_input_clauses (tree clauses, g
>
> x = lang_hooks.decls.omp_clause_dtor (c, nv);
> if (x)
> - {
> - tseq = NULL;
> - dtor = x;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (dlist, tseq);
> - }
> + gimplify_and_add (x, dlist);
> }
>
> tree ref = build_outer_var_ref (var, ctx);
> @@ -5310,34 +5289,19 @@ lower_rec_input_clauses (tree clauses, g
>
> x = lang_hooks.decls.omp_clause_dtor (c, ivar);
> if (x)
> - {
> - tseq = NULL;
> - dtor = x;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (&llist[1], tseq);
> - }
> + gimplify_and_add (x, &llist[1]);
>
> tree ivar2 = unshare_expr (lvar);
> TREE_OPERAND (ivar2, 1) = sctx.idx;
> x = lang_hooks.decls.omp_clause_dtor (c, ivar2);
> if (x)
> - {
> - tseq = NULL;
> - dtor = x;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (&llist[1], tseq);
> - }
> + gimplify_and_add (x, &llist[1]);
>
> if (rvar2)
> {
> x = lang_hooks.decls.omp_clause_dtor (c, rvar2);
> if (x)
> - {
> - tseq = NULL;
> - dtor = x;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (&llist[1], tseq);
> - }
> + gimplify_and_add (x, &llist[1]);
> }
> break;
> }
> @@ -5362,12 +5326,7 @@ lower_rec_input_clauses (tree clauses, g
> build_fold_addr_expr (lvar));
> x = lang_hooks.decls.omp_clause_dtor (c, ivar);
> if (x)
> - {
> - tseq = NULL;
> - dtor = x;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (&llist[1], tseq);
> - }
> + gimplify_and_add (x, &llist[1]);
> break;
> }
> /* If this is a reference to constant size reduction var
> @@ -5409,16 +5368,19 @@ lower_rec_input_clauses (tree clauses, g
> if (x)
> gimplify_and_add (x, ilist);
>
> - if (rvarp)
> + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
> + && OMP_CLAUSE_REDUCTION_INSCAN (c))
> {
> - if (x)
> + if (x || (!is_simd
> + && OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)))
> {
> tree nv = create_tmp_var_raw (TREE_TYPE (new_var));
> gimple_add_tmp_var (nv);
> ctx->cb.decl_map->put (new_vard, nv);
> x = lang_hooks.decls.omp_clause_default_ctor
> (c, nv, build_outer_var_ref (var, ctx));
> - gimplify_and_add (x, ilist);
> + if (x)
> + gimplify_and_add (x, ilist);
> if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
> {
> tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
> @@ -5433,7 +5395,7 @@ lower_rec_input_clauses (tree clauses, g
> gimple_seq_add_seq (ilist, tseq);
> }
> OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
> - if (!ctx->scan_inclusive)
> + if (is_simd && ctx->scan_exclusive)
> {
> tree nv2
> = create_tmp_var_raw (TREE_TYPE (new_var));
> @@ -5444,23 +5406,14 @@ lower_rec_input_clauses (tree clauses, g
> gimplify_and_add (x, ilist);
> x = lang_hooks.decls.omp_clause_dtor (c, nv2);
> if (x)
> - {
> - tseq = NULL;
> - dtor = x;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (dlist, tseq);
> - }
> + gimplify_and_add (x, dlist);
> }
> x = lang_hooks.decls.omp_clause_dtor (c, nv);
> if (x)
> - {
> - tseq = NULL;
> - dtor = x;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (dlist, tseq);
> - }
> + gimplify_and_add (x, dlist);
> }
> - else if (!ctx->scan_inclusive
> + else if (is_simd
> + && ctx->scan_exclusive
> && TREE_ADDRESSABLE (TREE_TYPE (new_var)))
> {
> tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var));
> @@ -5468,12 +5421,7 @@ lower_rec_input_clauses (tree clauses, g
> ctx->cb.decl_map->put (new_vard, nv2);
> x = lang_hooks.decls.omp_clause_dtor (c, nv2);
> if (x)
> - {
> - tseq = NULL;
> - dtor = x;
> - gimplify_stmt (&dtor, &tseq);
> - gimple_seq_add_seq (dlist, tseq);
> - }
> + gimplify_and_add (x, dlist);
> }
> DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
> goto do_dtor;
> @@ -5611,7 +5559,8 @@ lower_rec_input_clauses (tree clauses, g
> {
> if (omp_is_reference (var) && is_simd)
> handle_simd_reference (clause_loc, new_vard, ilist);
> - if (rvarp)
> + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
> + && OMP_CLAUSE_REDUCTION_INSCAN (c))
> break;
> gimplify_assign (new_var, x, ilist);
> if (is_simd)
> @@ -5815,7 +5764,10 @@ lower_rec_input_clauses (tree clauses, g
> lastprivate clauses we need to ensure the lastprivate copying
> happens after firstprivate copying in all threads. And similarly
> for UDRs if initializer expression refers to omp_orig. */
> - if (copyin_by_ref || lastprivate_firstprivate || reduction_omp_orig_ref)
> + if (copyin_by_ref || lastprivate_firstprivate
> + || (reduction_omp_orig_ref
> + && !ctx->scan_inclusive
> + && !ctx->scan_exclusive))
> {
> /* Don't add any barrier for #pragma omp simd or
> #pragma omp distribute. */
> @@ -6464,6 +6416,10 @@ lower_reduction_clauses (tree clauses, g
> && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
> return;
>
> + /* inscan reductions are handled elsewhere. */
> + if (ctx->scan_inclusive || ctx->scan_exclusive)
> + return;
> +
> /* First see if there is exactly one reduction clause. Use OMP_ATOMIC
> update in that case, otherwise use a lock. */
> for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
> @@ -8650,7 +8606,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
> gimple_seq before = NULL;
> omp_context *octx = ctx->outer;
> gcc_assert (octx);
> - if (!octx->scan_inclusive && !has_clauses)
> + if (octx->scan_exclusive && !has_clauses)
> {
> gimple_stmt_iterator gsi2 = *gsi_p;
> gsi_next (&gsi2);
> @@ -8672,23 +8628,29 @@ lower_omp_scan (gimple_stmt_iterator *gs
> }
>
> bool input_phase = has_clauses ^ octx->scan_inclusive;
> - if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
> - && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
> - && !gimple_omp_for_combined_into_p (octx->stmt))
> - {
> - if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
> - OMP_CLAUSE__SIMDUID_))
> - {
> - tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
> - lane = create_tmp_var (unsigned_type_node);
> - tree t = build_int_cst (integer_type_node,
> - input_phase ? 1
> - : octx->scan_inclusive ? 2 : 3);
> - gimple *g
> - = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
> - gimple_call_set_lhs (g, lane);
> - gimple_seq_add_stmt (&before, g);
> - }
> + bool is_simd = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
> + && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
> + && !gimple_omp_for_combined_into_p (octx->stmt));
> + bool is_for = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
> + && gimple_omp_for_kind (octx->stmt) == GF_OMP_FOR_KIND_FOR
> + && !gimple_omp_for_combined_p (octx->stmt));
> + if (is_simd)
> + if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
> + OMP_CLAUSE__SIMDUID_))
> + {
> + tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
> + lane = create_tmp_var (unsigned_type_node);
> + tree t = build_int_cst (integer_type_node,
> + input_phase ? 1
> + : octx->scan_inclusive ? 2 : 3);
> + gimple *g
> + = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
> + gimple_call_set_lhs (g, lane);
> + gimple_seq_add_stmt (&before, g);
> + }
> +
> + if (is_simd || is_for)
> + {
> for (tree c = gimple_omp_for_clauses (octx->stmt);
> c; c = OMP_CLAUSE_CHAIN (c))
> if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
> @@ -8711,7 +8673,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
> if (DECL_HAS_VALUE_EXPR_P (new_vard))
> {
> val = DECL_VALUE_EXPR (new_vard);
> - if (omp_is_reference (var))
> + if (new_vard != new_var)
> {
> gcc_assert (TREE_CODE (val) == ADDR_EXPR);
> val = TREE_OPERAND (val, 0);
> @@ -8727,7 +8689,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
> lane0 = TREE_OPERAND (val, 1);
> TREE_OPERAND (val, 1) = lane;
> var2 = lookup_decl (v, octx);
> - if (!octx->scan_inclusive)
> + if (octx->scan_exclusive)
> var4 = lookup_decl (var2, octx);
> if (input_phase
> && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> @@ -8737,7 +8699,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
> var2 = build4 (ARRAY_REF, TREE_TYPE (val),
> var2, lane, NULL_TREE, NULL_TREE);
> TREE_THIS_NOTRAP (var2) = 1;
> - if (!octx->scan_inclusive)
> + if (octx->scan_exclusive)
> {
> var4 = build4 (ARRAY_REF, TREE_TYPE (val),
> var4, lane, NULL_TREE,
> @@ -8759,7 +8721,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
> var3 = maybe_lookup_decl (new_vard, octx);
> if (var3 == new_vard || var3 == NULL_TREE)
> var3 = NULL_TREE;
> - else if (!octx->scan_inclusive && !input_phase)
> + else if (is_simd && octx->scan_exclusive && !input_phase)
> {
> var4 = maybe_lookup_decl (var3, octx);
> if (var4 == var3 || var4 == NULL_TREE)
> @@ -8774,7 +8736,10 @@ lower_omp_scan (gimple_stmt_iterator *gs
> }
> }
> }
> - if (!octx->scan_inclusive && !input_phase && var4 ==
> NULL_TREE)
> + if (is_simd
> + && octx->scan_exclusive
> + && !input_phase
> + && var4 == NULL_TREE)
> var4 = create_tmp_var (TREE_TYPE (val));
> }
> if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> @@ -8794,12 +8759,14 @@ lower_omp_scan (gimple_stmt_iterator *gs
> {
> /* Otherwise, assign to it the identity element. */
> gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT
> (c);
> + if (is_for)
> + tseq = copy_gimple_seq_and_replace_locals (tseq);
> tree ref = build_outer_var_ref (var, octx);
> tree x = (DECL_HAS_VALUE_EXPR_P (new_vard)
> ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
> if (x)
> {
> - if (omp_is_reference (var))
> + if (new_vard != new_var)
> val = build_fold_addr_expr_loc (clause_loc,
> val);
> SET_DECL_VALUE_EXPR (new_vard, val);
> }
> @@ -8811,13 +8778,14 @@ lower_omp_scan (gimple_stmt_iterator *gs
> SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
> DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
> gimple_seq_add_seq (&before, tseq);
> - OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
> + if (is_simd)
> + OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
> }
> }
> - else
> + else if (is_simd)
> {
> tree x;
> - if (!octx->scan_inclusive)
> + if (octx->scan_exclusive)
> {
> tree v4 = unshare_expr (var4);
> tree v2 = unshare_expr (var2);
> @@ -8828,7 +8796,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
> x = (DECL_HAS_VALUE_EXPR_P (new_vard)
> ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
> tree vexpr = val;
> - if (x && omp_is_reference (var))
> + if (x && new_vard != new_var)
> vexpr = build_fold_addr_expr_loc (clause_loc, val);
> if (x)
> SET_DECL_VALUE_EXPR (new_vard, vexpr);
> @@ -8864,7 +8832,7 @@ lower_omp_scan (gimple_stmt_iterator *gs
> tree x = omp_reduction_init (c, TREE_TYPE (new_var));
> gimplify_assign (val, x, &before);
> }
> - else
> + else if (is_simd)
> {
> /* scan phase. */
> enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
> @@ -8888,11 +8856,11 @@ lower_omp_scan (gimple_stmt_iterator *gs
> }
> }
> }
> - if (!octx->scan_inclusive && !input_phase && lane0)
> + if (octx->scan_exclusive && !input_phase && lane0)
> {
> tree vexpr = unshare_expr (var4);
> TREE_OPERAND (vexpr, 1) = lane0;
> - if (omp_is_reference (var))
> + if (new_vard != new_var)
> vexpr = build_fold_addr_expr_loc (clause_loc, vexpr);
> SET_DECL_VALUE_EXPR (new_vard, vexpr);
> }
> @@ -8901,9 +8869,17 @@ lower_omp_scan (gimple_stmt_iterator *gs
> else if (has_clauses)
> sorry_at (gimple_location (stmt),
> "%<#pragma omp scan%> not supported yet");
> - gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
> - gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
> - gsi_replace (gsi_p, gimple_build_nop (), true);
> + if (!is_for)
> + {
> + gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
> + gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
> + gsi_replace (gsi_p, gimple_build_nop (), true);
> + }
> + else if (before)
> + {
> + gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (stmt));
> + gsi_insert_seq_before (&gsi, before, GSI_SAME_STMT);
> + }
> }
>
>
> @@ -9124,6 +9100,712 @@ lower_omp_for_lastprivate (struct omp_fo
> }
> }
>
> +/* Callback for walk_gimple_seq. Find #pragma omp scan statement. */
> +
> +tree
> +omp_find_scan (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
> + struct walk_stmt_info *wi)
> +{
> + gimple *stmt = gsi_stmt (*gsi_p);
> +
> + *handled_ops_p = true;
> + switch (gimple_code (stmt))
> + {
> + WALK_SUBSTMTS;
> +
> + case GIMPLE_OMP_SCAN:
> + *(gimple_stmt_iterator *) (wi->info) = *gsi_p;
> + return integer_zero_node;
> + default:
> + break;
> + }
> + return NULL;
> +}
> +
> +/* Helper function for lower_omp_for, add transformations for a worksharing
> + loop with scan directives inside of it.
> + For worksharing loop not combined with simd, transform:
> + #pragma omp for reduction(inscan,+:r) private(i)
> + for (i = 0; i < n; i = i + 1)
> + {
> + {
> + update (r);
> + }
> + #pragma omp scan inclusive(r)
> + {
> + use (r);
> + }
> + }
> +
> + into two worksharing loops + code to merge results:
> +
> + num_threads = omp_get_num_threads ();
> + thread_num = omp_get_thread_num ();
> + if (thread_num == 0) goto <D.2099>; else goto <D.2100>;
> + <D.2099>:
> + var2 = r;
> + goto <D.2101>;
> + <D.2100>:
> + // For UDRs this is UDR init, or if ctors are needed, copy from
> + // var3 that has been constructed to contain the neutral element.
> + var2 = 0;
> + <D.2101>:
> + ivar = 0;
> + // The _scantemp_ clauses will arrange for rpriva to be initialized to
> + // a shared array with num_threads elements and rprivb to a local array
> + // number of elements equal to the number of (contiguous) iterations the
> + // current thread will perform. controlb and controlp variables are
> + // temporaries to handle deallocation of rprivb at the end of second
> + // GOMP_FOR.
> + #pragma omp for _scantemp_(rpriva) _scantemp_(rprivb)
> _scantemp_(controlb) \
> + _scantemp_(controlp) reduction(inscan,+:r) private(i) nowait
> + for (i = 0; i < n; i = i + 1)
> + {
> + {
> + // For UDRs this is UDR init or copy from var3.
> + r = 0;
> + // This is the input phase from user code.
> + update (r);
> + }
> + {
> + // For UDRs this is UDR merge.
> + var2 = var2 + r;
> + // Rather than handing it over to the user, save to local thread's
> + // array.
> + rprivb[ivar] = var2;
> + // For exclusive scan, the above two statements are swapped.
> + ivar = ivar + 1;
> + }
> + }
> + // And remember the final value from this thread's into the shared
> + // rpriva array.
> + rpriva[(sizetype) thread_num] = var2;
> + // If more than one thread, compute using Work-Efficient prefix sum
> + // the inclusive parallel scan of the rpriva array.
> + if (num_threads > 1) goto <D.2102>; else goto <D.2103>;
> + <D.2102>:
> + GOMP_barrier ();
> + down = 0;
> + k = 1;
> + num_threadsu = (unsigned int) num_threads;
> + thread_numup1 = (unsigned int) thread_num + 1;
> + <D.2108>:
> + twok = k << 1;
> + if (twok > num_threadsu) goto <D.2110>; else goto <D.2111>;
> + <D.2110>:
> + down = 4294967295;
> + k = k >> 1;
> + if (k == num_threadsu) goto <D.2112>; else goto <D.2111>;
> + <D.2112>:
> + k = k >> 1;
> + <D.2111>:
> + twok = k << 1;
> + cplx = .MUL_OVERFLOW (thread_nump1, twok);
> + mul = REALPART_EXPR <cplx>;
> + ovf = IMAGPART_EXPR <cplx>;
> + if (ovf == 0) goto <D.2116>; else goto <D.2117>;
> + <D.2116>:
> + andv = k & down;
> + andvm1 = andv + 4294967295;
> + l = mul + andvm1;
> + if (l < num_threadsu) goto <D.2120>; else goto <D.2117>;
> + <D.2120>:
> + // For UDRs this is UDR merge, performed using var2 variable as temporary,
> + // i.e. var2 = rpriva[l - k]; UDR merge (var2, rpriva[l]); rpriva[l] =
> var2;
> + rpriva[l] = rpriva[l - k] + rpriva[l];
> + <D.2117>:
> + if (down == 0) goto <D.2121>; else goto <D.2122>;
> + <D.2121>:
> + k = k << 1;
> + goto <D.2123>;
> + <D.2122>:
> + k = k >> 1;
> + <D.2123>:
> + GOMP_barrier ();
> + if (k != 0) goto <D.2108>; else goto <D.2103>;
> + <D.2103>:
> + if (thread_num == 0) goto <D.2124>; else goto <D.2125>;
> + <D.2124>:
> + // For UDRs this is UDR init or copy from var3.
> + var2 = 0;
> + goto <D.2126>;
> + <D.2125>:
> + var2 = rpriva[thread_num - 1];
> + <D.2126>:
> + ivar = 0;
> + #pragma omp for _scantemp_(controlb) _scantemp_(controlp) \
> + reduction(inscan,+:r) private(i)
> + for (i = 0; i < n; i = i + 1)
> + {
> + {
> + // For UDRs, this is UDR merge (rprivb[ivar], var2); r =
> rprivb[ivar];
> + r = rprivb[ivar] + var2;
> + }
> + {
> + // This is the scan phase from user code.
> + use (r);
> + // Plus a bump of the iterator.
> + ivar = ivar + 1;
> + }
> + } */
> +
> +static void
> +lower_omp_for_scan (gimple_seq *body_p, gimple_seq *dlist, gomp_for *stmt,
> + struct omp_for_data *fd, omp_context *ctx)
> +{
> + gcc_assert (ctx->scan_inclusive || ctx->scan_exclusive);
> +
> + gimple_seq body = gimple_omp_body (stmt);
> + gimple_stmt_iterator input1_gsi = gsi_none ();
> + struct walk_stmt_info wi;
> + memset (&wi, 0, sizeof (wi));
> + wi.val_only = true;
> + wi.info = (void *) &input1_gsi;
> + walk_gimple_seq_mod (&body, omp_find_scan, NULL, &wi);
> + gcc_assert (!gsi_end_p (input1_gsi));
> +
> + gimple *input_stmt1 = gsi_stmt (input1_gsi);
> + gimple_stmt_iterator gsi = input1_gsi;
> + gsi_next (&gsi);
> + gimple_stmt_iterator scan1_gsi = gsi;
> + gimple *scan_stmt1 = gsi_stmt (gsi);
> + gcc_assert (scan_stmt1 && gimple_code (scan_stmt1) == GIMPLE_OMP_SCAN);
> +
> + gimple_seq input_body = gimple_omp_body (input_stmt1);
> + gimple_seq scan_body = gimple_omp_body (scan_stmt1);
> + gimple_omp_set_body (input_stmt1, NULL);
> + gimple_omp_set_body (scan_stmt1, NULL);
> + gimple_omp_set_body (stmt, NULL);
> +
> + gomp_for *new_stmt = as_a <gomp_for *> (gimple_copy (stmt));
> + gimple_seq new_body = copy_gimple_seq_and_replace_locals (body);
> + gimple_omp_set_body (stmt, body);
> + gimple_omp_set_body (input_stmt1, input_body);
> +
> + gimple_stmt_iterator input2_gsi = gsi_none ();
> + memset (&wi, 0, sizeof (wi));
> + wi.val_only = true;
> + wi.info = (void *) &input2_gsi;
> + walk_gimple_seq_mod (&new_body, omp_find_scan, NULL, &wi);
> + gcc_assert (!gsi_end_p (input2_gsi));
> +
> + gimple *input_stmt2 = gsi_stmt (input2_gsi);
> + gsi = input2_gsi;
> + gsi_next (&gsi);
> + gimple_stmt_iterator scan2_gsi = gsi;
> + gimple *scan_stmt2 = gsi_stmt (gsi);
> + gcc_assert (scan_stmt2 && gimple_code (scan_stmt2) == GIMPLE_OMP_SCAN);
> + gimple_omp_set_body (scan_stmt2, scan_body);
> +
> + tree num_threads = create_tmp_var (integer_type_node);
> + tree thread_num = create_tmp_var (integer_type_node);
> + tree nthreads_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
> + tree threadnum_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
> + gimple *g = gimple_build_call (nthreads_decl, 0);
> + gimple_call_set_lhs (g, num_threads);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_call (threadnum_decl, 0);
> + gimple_call_set_lhs (g, thread_num);
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree ivar = create_tmp_var (sizetype);
> + tree new_clauses1 = NULL_TREE, new_clauses2 = NULL_TREE;
> + tree *cp1 = &new_clauses1, *cp2 = &new_clauses2;
> + tree k = create_tmp_var (unsigned_type_node);
> + tree l = create_tmp_var (unsigned_type_node);
> +
> + gimple_seq clist = NULL, mdlist = NULL;
> + gimple_seq thr01_list = NULL, thrn1_list = NULL;
> + gimple_seq thr02_list = NULL, thrn2_list = NULL;
> + gimple_seq scan1_list = NULL, input2_list = NULL;
> + gimple_seq last_list = NULL, reduc_list = NULL;
> + for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
> + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
> + && OMP_CLAUSE_REDUCTION_INSCAN (c))
> + {
> + location_t clause_loc = OMP_CLAUSE_LOCATION (c);
> + tree var = OMP_CLAUSE_DECL (c);
> + tree new_var = lookup_decl (var, ctx);
> + tree var3 = NULL_TREE;
> + tree new_vard = new_var;
> + if (omp_is_reference (var))
> + new_var = build_simple_mem_ref_loc (clause_loc, new_var);
> + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> + {
> + var3 = maybe_lookup_decl (new_vard, ctx);
> + if (var3 == new_vard)
> + var3 = NULL_TREE;
> + }
> +
> + tree ptype = build_pointer_type (TREE_TYPE (new_var));
> + tree rpriva = create_tmp_var (ptype);
> + tree nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_);
> + OMP_CLAUSE_DECL (nc) = rpriva;
> + *cp1 = nc;
> + cp1 = &OMP_CLAUSE_CHAIN (nc);
> +
> + tree rprivb = create_tmp_var (ptype);
> + nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_);
> + OMP_CLAUSE_DECL (nc) = rprivb;
> + OMP_CLAUSE__SCANTEMP__ALLOC (nc) = 1;
> + *cp1 = nc;
> + cp1 = &OMP_CLAUSE_CHAIN (nc);
> +
> + tree var2 = create_tmp_var_raw (TREE_TYPE (new_var));
> + if (new_vard != new_var)
> + TREE_ADDRESSABLE (var2) = 1;
> + gimple_add_tmp_var (var2);
> +
> + tree x = fold_convert_loc (clause_loc, sizetype, thread_num);
> + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
> + TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
> + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
> + tree rpriva_ref = build_simple_mem_ref_loc (clause_loc, x);
> +
> + x = fold_build2_loc (clause_loc, PLUS_EXPR, integer_type_node,
> + thread_num, integer_minus_one_node);
> + x = fold_convert_loc (clause_loc, sizetype, x);
> + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
> + TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
> + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
> + tree rprivam1_ref = build_simple_mem_ref_loc (clause_loc, x);
> +
> + x = fold_convert_loc (clause_loc, sizetype, l);
> + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
> + TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
> + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
> + tree rprival_ref = build_simple_mem_ref_loc (clause_loc, x);
> +
> + x = fold_build2_loc (clause_loc, MINUS_EXPR, unsigned_type_node, l,
> k);
> + x = fold_convert_loc (clause_loc, sizetype, x);
> + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
> + TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
> + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
> + tree rprivalmk_ref = build_simple_mem_ref_loc (clause_loc, x);
> +
> + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, ivar,
> + TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
> + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rprivb), rprivb, x);
> + tree rprivb_ref = build_simple_mem_ref_loc (clause_loc, x);
> +
> + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> + {
> + tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
> + tree val = var2;
> + if (new_vard != new_var)
> + val = build_fold_addr_expr_loc (clause_loc, val);
> +
> + x = lang_hooks.decls.omp_clause_default_ctor
> + (c, var2, build_outer_var_ref (var, ctx));
> + if (x)
> + gimplify_and_add (x, &clist);
> +
> + x = build_outer_var_ref (var, ctx);
> + x = lang_hooks.decls.omp_clause_assign_op (c, var2, x);
> + gimplify_and_add (x, &thr01_list);
> +
> + tree y = (DECL_HAS_VALUE_EXPR_P (new_vard)
> + ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
> + if (var3)
> + {
> + x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3);
> + gimplify_and_add (x, &thrn1_list);
> + x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3);
> + gimplify_and_add (x, &thr02_list);
> + }
> + else if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
> + {
> + /* Otherwise, assign to it the identity element. */
> + gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
> + tseq = copy_gimple_seq_and_replace_locals (tseq);
> + SET_DECL_VALUE_EXPR (new_vard, val);
> + DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
> + SET_DECL_VALUE_EXPR (placeholder, error_mark_node);
> + DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
> + lower_omp (&tseq, ctx);
> + gimple_seq_add_seq (&thrn1_list, tseq);
> + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
> + lower_omp (&tseq, ctx);
> + gimple_seq_add_seq (&thr02_list, tseq);
> + SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
> + DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
> + OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
> + if (y)
> + SET_DECL_VALUE_EXPR (new_vard, y);
> + else
> + {
> + DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
> + SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
> + }
> + }
> +
> + x = lang_hooks.decls.omp_clause_assign_op (c, var2, rprivam1_ref);
> + gimplify_and_add (x, &thrn2_list);
> +
> + if (ctx->scan_exclusive)
> + {
> + x = unshare_expr (rprivb_ref);
> + x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
> + gimplify_and_add (x, &scan1_list);
> + }
> +
> + gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
> + tseq = copy_gimple_seq_and_replace_locals (tseq);
> + SET_DECL_VALUE_EXPR (placeholder, var2);
> + DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
> + lower_omp (&tseq, ctx);
> + gimple_seq_add_seq (&scan1_list, tseq);
> +
> + if (ctx->scan_inclusive)
> + {
> + x = unshare_expr (rprivb_ref);
> + x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
> + gimplify_and_add (x, &scan1_list);
> + }
> +
> + x = unshare_expr (rpriva_ref);
> + x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
> + gimplify_and_add (x, &mdlist);
> +
> + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
> + tseq = copy_gimple_seq_and_replace_locals (tseq);
> + SET_DECL_VALUE_EXPR (new_vard, val);
> + DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
> + SET_DECL_VALUE_EXPR (placeholder, rprivb_ref);
> + lower_omp (&tseq, ctx);
> + if (y)
> + SET_DECL_VALUE_EXPR (new_vard, y);
> + else
> + {
> + DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
> + SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
> + }
> + gimple_seq_add_seq (&input2_list, tseq);
> +
> + x = unshare_expr (new_var);
> + x = lang_hooks.decls.omp_clause_assign_op (c, x, rprivb_ref);
> + gimplify_and_add (x, &input2_list);
> +
> + x = build_outer_var_ref (var, ctx);
> + x = lang_hooks.decls.omp_clause_assign_op (c, x, rpriva_ref);
> + gimplify_and_add (x, &last_list);
> +
> + x = lang_hooks.decls.omp_clause_assign_op (c, var2,
> rprivalmk_ref);
> + gimplify_and_add (x, &reduc_list);
> + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
> + tseq = copy_gimple_seq_and_replace_locals (tseq);
> + val = rprival_ref;
> + if (new_vard != new_var)
> + val = build_fold_addr_expr_loc (clause_loc, val);
> + SET_DECL_VALUE_EXPR (new_vard, val);
> + DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
> + SET_DECL_VALUE_EXPR (placeholder, var2);
> + lower_omp (&tseq, ctx);
> + OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
> + SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
> + DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
> + if (y)
> + SET_DECL_VALUE_EXPR (new_vard, y);
> + else
> + {
> + DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
> + SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
> + }
> + gimple_seq_add_seq (&reduc_list, tseq);
> + x = lang_hooks.decls.omp_clause_assign_op (c, rprival_ref, var2);
> + gimplify_and_add (x, &reduc_list);
> +
> + x = lang_hooks.decls.omp_clause_dtor (c, var2);
> + if (x)
> + gimplify_and_add (x, dlist);
> + }
> + else
> + {
> + x = build_outer_var_ref (var, ctx);
> + gimplify_assign (var2, x, &thr01_list);
> +
> + x = omp_reduction_init (c, TREE_TYPE (new_var));
> + gimplify_assign (var2, unshare_expr (x), &thrn1_list);
> + gimplify_assign (var2, x, &thr02_list);
> +
> + gimplify_assign (var2, rprivam1_ref, &thrn2_list);
> +
> + enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
> + if (code == MINUS_EXPR)
> + code = PLUS_EXPR;
> +
> + if (ctx->scan_exclusive)
> + gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list);
> + x = build2 (code, TREE_TYPE (new_var), var2, new_var);
> + gimplify_assign (var2, x, &scan1_list);
> + if (ctx->scan_inclusive)
> + gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list);
> +
> + gimplify_assign (unshare_expr (rpriva_ref), var2, &mdlist);
> +
> + x = build2 (code, TREE_TYPE (new_var), rprivb_ref, var2);
> + gimplify_assign (new_var, x, &input2_list);
> +
> + gimplify_assign (build_outer_var_ref (var, ctx), rpriva_ref,
> + &last_list);
> +
> + x = build2 (code, TREE_TYPE (new_var), rprivalmk_ref,
> + unshare_expr (rprival_ref));
> + gimplify_assign (rprival_ref, x, &reduc_list);
> + }
> + }
> +
> + g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node);
> + gimple_seq_add_stmt (&scan1_list, g);
> + g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node);
> + gimple_seq_add_stmt (gimple_omp_body_ptr (scan_stmt2), g);
> +
> + tree controlb = create_tmp_var (boolean_type_node);
> + tree controlp = create_tmp_var (ptr_type_node);
> + tree nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
> + OMP_CLAUSE_DECL (nc) = controlb;
> + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
> + *cp1 = nc;
> + cp1 = &OMP_CLAUSE_CHAIN (nc);
> + nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
> + OMP_CLAUSE_DECL (nc) = controlp;
> + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
> + *cp1 = nc;
> + cp1 = &OMP_CLAUSE_CHAIN (nc);
> + nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
> + OMP_CLAUSE_DECL (nc) = controlb;
> + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
> + *cp2 = nc;
> + cp2 = &OMP_CLAUSE_CHAIN (nc);
> + nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
> + OMP_CLAUSE_DECL (nc) = controlp;
> + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
> + *cp2 = nc;
> + cp2 = &OMP_CLAUSE_CHAIN (nc);
> +
> + *cp1 = gimple_omp_for_clauses (stmt);
> + gimple_omp_for_set_clauses (stmt, new_clauses1);
> + *cp2 = gimple_omp_for_clauses (new_stmt);
> + gimple_omp_for_set_clauses (new_stmt, new_clauses2);
> +
> + gimple_omp_set_body (scan_stmt1, scan1_list);
> + gimple_omp_set_body (input_stmt2, input2_list);
> +
> + gsi_insert_seq_after (&input1_gsi, gimple_omp_body (input_stmt1),
> + GSI_SAME_STMT);
> + gsi_remove (&input1_gsi, true);
> + gsi_insert_seq_after (&scan1_gsi, gimple_omp_body (scan_stmt1),
> + GSI_SAME_STMT);
> + gsi_remove (&scan1_gsi, true);
> + gsi_insert_seq_after (&input2_gsi, gimple_omp_body (input_stmt2),
> + GSI_SAME_STMT);
> + gsi_remove (&input2_gsi, true);
> + gsi_insert_seq_after (&scan2_gsi, gimple_omp_body (scan_stmt2),
> + GSI_SAME_STMT);
> + gsi_remove (&scan2_gsi, true);
> +
> + gimple_seq_add_seq (body_p, clist);
> +
> + tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
> + tree lab2 = create_artificial_label (UNKNOWN_LOCATION);
> + tree lab3 = create_artificial_label (UNKNOWN_LOCATION);
> + g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab1);
> + gimple_seq_add_stmt (body_p, g);
> + gimple_seq_add_seq (body_p, thr01_list);
> + g = gimple_build_goto (lab3);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab2);
> + gimple_seq_add_stmt (body_p, g);
> + gimple_seq_add_seq (body_p, thrn1_list);
> + g = gimple_build_label (lab3);
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = gimple_build_assign (ivar, size_zero_node);
> + gimple_seq_add_stmt (body_p, g);
> +
> + gimple_seq_add_stmt (body_p, stmt);
> + gimple_seq_add_seq (body_p, body);
> + gimple_seq_add_stmt (body_p, gimple_build_omp_continue (fd->loop.v,
> + fd->loop.v));
> +
> + g = gimple_build_omp_return (true);
> + gimple_seq_add_stmt (body_p, g);
> + gimple_seq_add_seq (body_p, mdlist);
> +
> + lab1 = create_artificial_label (UNKNOWN_LOCATION);
> + lab2 = create_artificial_label (UNKNOWN_LOCATION);
> + g = gimple_build_cond (GT_EXPR, num_threads, integer_one_node, lab1, lab2);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab1);
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = omp_build_barrier (NULL);
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree down = create_tmp_var (unsigned_type_node);
> + g = gimple_build_assign (down, build_zero_cst (unsigned_type_node));
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = gimple_build_assign (k, build_one_cst (unsigned_type_node));
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree num_threadsu = create_tmp_var (unsigned_type_node);
> + g = gimple_build_assign (num_threadsu, NOP_EXPR, num_threads);
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree thread_numu = create_tmp_var (unsigned_type_node);
> + g = gimple_build_assign (thread_numu, NOP_EXPR, thread_num);
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree thread_nump1 = create_tmp_var (unsigned_type_node);
> + g = gimple_build_assign (thread_nump1, PLUS_EXPR, thread_numu,
> + build_int_cst (unsigned_type_node, 1));
> + gimple_seq_add_stmt (body_p, g);
> +
> + lab3 = create_artificial_label (UNKNOWN_LOCATION);
> + g = gimple_build_label (lab3);
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree twok = create_tmp_var (unsigned_type_node);
> + g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node);
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree lab4 = create_artificial_label (UNKNOWN_LOCATION);
> + tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
> + tree lab6 = create_artificial_label (UNKNOWN_LOCATION);
> + g = gimple_build_cond (GT_EXPR, twok, num_threadsu, lab4, lab5);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab4);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_assign (down, build_all_ones_cst (unsigned_type_node));
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = gimple_build_cond (EQ_EXPR, k, num_threadsu, lab6, lab5);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab6);
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = gimple_build_label (lab5);
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node);
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree cplx = create_tmp_var (build_complex_type (unsigned_type_node,
> false));
> + g = gimple_build_call_internal (IFN_MUL_OVERFLOW, 2, thread_nump1, twok);
> + gimple_call_set_lhs (g, cplx);
> + gimple_seq_add_stmt (body_p, g);
> + tree mul = create_tmp_var (unsigned_type_node);
> + g = gimple_build_assign (mul, REALPART_EXPR,
> + build1 (REALPART_EXPR, unsigned_type_node, cplx));
> + gimple_seq_add_stmt (body_p, g);
> + tree ovf = create_tmp_var (unsigned_type_node);
> + g = gimple_build_assign (ovf, IMAGPART_EXPR,
> + build1 (IMAGPART_EXPR, unsigned_type_node, cplx));
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree lab7 = create_artificial_label (UNKNOWN_LOCATION);
> + tree lab8 = create_artificial_label (UNKNOWN_LOCATION);
> + g = gimple_build_cond (EQ_EXPR, ovf, build_zero_cst (unsigned_type_node),
> + lab7, lab8);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab7);
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree andv = create_tmp_var (unsigned_type_node);
> + g = gimple_build_assign (andv, BIT_AND_EXPR, k, down);
> + gimple_seq_add_stmt (body_p, g);
> + tree andvm1 = create_tmp_var (unsigned_type_node);
> + g = gimple_build_assign (andvm1, PLUS_EXPR, andv,
> + build_minus_one_cst (unsigned_type_node));
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = gimple_build_assign (l, PLUS_EXPR, mul, andvm1);
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree lab9 = create_artificial_label (UNKNOWN_LOCATION);
> + g = gimple_build_cond (LT_EXPR, l, num_threadsu, lab9, lab8);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab9);
> + gimple_seq_add_stmt (body_p, g);
> + gimple_seq_add_seq (body_p, reduc_list);
> + g = gimple_build_label (lab8);
> + gimple_seq_add_stmt (body_p, g);
> +
> + tree lab10 = create_artificial_label (UNKNOWN_LOCATION);
> + tree lab11 = create_artificial_label (UNKNOWN_LOCATION);
> + tree lab12 = create_artificial_label (UNKNOWN_LOCATION);
> + g = gimple_build_cond (EQ_EXPR, down, build_zero_cst (unsigned_type_node),
> + lab10, lab11);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab10);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_assign (k, LSHIFT_EXPR, k, integer_one_node);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_goto (lab12);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab11);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab12);
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = omp_build_barrier (NULL);
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = gimple_build_cond (NE_EXPR, k, build_zero_cst (unsigned_type_node),
> + lab3, lab2);
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = gimple_build_label (lab2);
> + gimple_seq_add_stmt (body_p, g);
> +
> + lab1 = create_artificial_label (UNKNOWN_LOCATION);
> + lab2 = create_artificial_label (UNKNOWN_LOCATION);
> + lab3 = create_artificial_label (UNKNOWN_LOCATION);
> + g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab1);
> + gimple_seq_add_stmt (body_p, g);
> + gimple_seq_add_seq (body_p, thr02_list);
> + g = gimple_build_goto (lab3);
> + gimple_seq_add_stmt (body_p, g);
> + g = gimple_build_label (lab2);
> + gimple_seq_add_stmt (body_p, g);
> + gimple_seq_add_seq (body_p, thrn2_list);
> + g = gimple_build_label (lab3);
> + gimple_seq_add_stmt (body_p, g);
> +
> + g = gimple_build_assign (ivar, size_zero_node);
> + gimple_seq_add_stmt (body_p, g);
> + gimple_seq_add_stmt (body_p, new_stmt);
> + gimple_seq_add_seq (body_p, new_body);
> +
> + gimple_seq new_dlist = NULL;
> + lab1 = create_artificial_label (UNKNOWN_LOCATION);
> + lab2 = create_artificial_label (UNKNOWN_LOCATION);
> + tree num_threadsm1 = create_tmp_var (integer_type_node);
> + g = gimple_build_assign (num_threadsm1, PLUS_EXPR, num_threads,
> + integer_minus_one_node);
> + gimple_seq_add_stmt (&new_dlist, g);
> + g = gimple_build_cond (EQ_EXPR, thread_num, num_threadsm1, lab1, lab2);
> + gimple_seq_add_stmt (&new_dlist, g);
> + g = gimple_build_label (lab1);
> + gimple_seq_add_stmt (&new_dlist, g);
> + gimple_seq_add_seq (&new_dlist, last_list);
> + g = gimple_build_label (lab2);
> + gimple_seq_add_stmt (&new_dlist, g);
> + gimple_seq_add_seq (&new_dlist, *dlist);
> + *dlist = new_dlist;
> +}
>
> /* Lower code for an OMP loop directive. */
>
> @@ -9317,9 +9999,18 @@ lower_omp_for (gimple_stmt_iterator *gsi
>
> bool phony_loop = (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP
> && gimple_omp_for_grid_phony (stmt));
> - if (!phony_loop)
> - gimple_seq_add_stmt (&body, stmt);
> - gimple_seq_add_seq (&body, gimple_omp_body (stmt));
> + if ((ctx->scan_inclusive || ctx->scan_exclusive)
> + && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR)
> + {
> + gcc_assert (!phony_loop);
> + lower_omp_for_scan (&body, &dlist, stmt, &fd, ctx);
> + }
> + else
> + {
> + if (!phony_loop)
> + gimple_seq_add_stmt (&body, stmt);
> + gimple_seq_add_seq (&body, gimple_omp_body (stmt));
> + }
>
> if (!phony_loop)
> gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v,
> --- gcc/omp-expand.c.jj 2019-07-02 12:58:01.459063560 +0200
> +++ gcc/omp-expand.c 2019-07-02 13:08:01.388672109 +0200
> @@ -3502,6 +3502,98 @@ expand_omp_for_generic (struct omp_regio
> }
> }
>
> +/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
> + compute needed allocation size. If !ALLOC of team allocations,
> + if ALLOC of thread allocation. SZ is the initial needed size for
> + other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
> + CNT number of elements of each array, for !ALLOC this is
> + omp_get_num_threads (), for ALLOC number of iterations handled by the
> + current thread. If PTR is non-NULL, it is the start of the allocation
> + and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
> + clauses pointers to the corresponding arrays. */
> +
> +static tree
> +expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
> + unsigned HOST_WIDE_INT alloc_align, tree cnt,
> + gimple_stmt_iterator *gsi, bool alloc)
> +{
> + tree eltsz = NULL_TREE;
> + unsigned HOST_WIDE_INT preval = 0;
> + if (ptr && sz)
> + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
> + ptr, size_int (sz));
> + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
> + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
> + && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
> + && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
> + {
> + tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
> + unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
> + if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
> + {
> + unsigned HOST_WIDE_INT szl
> + = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
> + szl = least_bit_hwi (szl);
> + if (szl)
> + al = MIN (al, szl);
> + }
> + if (ptr == NULL_TREE)
> + {
> + if (eltsz == NULL_TREE)
> + eltsz = TYPE_SIZE_UNIT (pointee_type);
> + else
> + eltsz = size_binop (PLUS_EXPR, eltsz,
> + TYPE_SIZE_UNIT (pointee_type));
> + }
> + if (preval == 0 && al <= alloc_align)
> + {
> + unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
> + sz += diff;
> + if (diff && ptr)
> + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
> + ptr, size_int (diff));
> + }
> + else if (al > preval)
> + {
> + if (ptr)
> + {
> + ptr = fold_convert (pointer_sized_int_node, ptr);
> + ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
> + build_int_cst (pointer_sized_int_node,
> + al - 1));
> + ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
> + build_int_cst (pointer_sized_int_node,
> + -(HOST_WIDE_INT) al));
> + ptr = fold_convert (ptr_type_node, ptr);
> + }
> + else
> + sz += al - 1;
> + }
> + if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
> + preval = al;
> + else
> + preval = 1;
> + if (ptr)
> + {
> + expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
> + ptr = OMP_CLAUSE_DECL (c);
> + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
> + size_binop (MULT_EXPR, cnt,
> + TYPE_SIZE_UNIT (pointee_type)));
> + }
> + }
> +
> + if (ptr == NULL_TREE)
> + {
> + eltsz = size_binop (MULT_EXPR, eltsz, cnt);
> + if (sz)
> + eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
> + return eltsz;
> + }
> + else
> + return ptr;
> +}
> +
> /* A subroutine of expand_omp_for. Generate code for a parallel
> loop with static schedule and no specified chunk size. Given
> parameters:
> @@ -3544,11 +3636,12 @@ expand_omp_for_static_nochunk (struct om
> struct omp_for_data *fd,
> gimple *inner_stmt)
> {
> - tree n, q, s0, e0, e, t, tt, nthreads, threadid;
> + tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
> tree type, itype, vmain, vback;
> basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
> basic_block body_bb, cont_bb, collapse_bb = NULL;
> - basic_block fin_bb;
> + basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
> + basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
> gimple_stmt_iterator gsi, gsip;
> edge ep;
> bool broken_loop = region->cont == NULL;
> @@ -3650,7 +3743,9 @@ expand_omp_for_static_nochunk (struct om
> c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
> cond_var = OMP_CLAUSE_DECL (c);
> }
> - if (fd->have_reductemp || fd->have_pointer_condtemp)
> + if (fd->have_reductemp
> + || fd->have_pointer_condtemp
> + || fd->have_nonctrl_scantemp)
> {
> tree t1 = build_int_cst (long_integer_type_node, 0);
> tree t2 = build_int_cst (long_integer_type_node, 1);
> @@ -3660,8 +3755,11 @@ expand_omp_for_static_nochunk (struct om
> gimple_stmt_iterator gsi2 = gsi_none ();
> gimple *g = NULL;
> tree mem = null_pointer_node, memv = NULL_TREE;
> + unsigned HOST_WIDE_INT condtemp_sz = 0;
> + unsigned HOST_WIDE_INT alloc_align = 0;
> if (fd->have_reductemp)
> {
> + gcc_assert (!fd->have_nonctrl_scantemp);
> tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
> reductions = OMP_CLAUSE_DECL (c);
> gcc_assert (TREE_CODE (reductions) == SSA_NAME);
> @@ -3678,16 +3776,40 @@ expand_omp_for_static_nochunk (struct om
> gsi2 = gsip;
> reductions = null_pointer_node;
> }
> - if (fd->have_pointer_condtemp)
> + if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
> {
> - tree type = TREE_TYPE (condtemp);
> + tree type;
> + if (fd->have_pointer_condtemp)
> + type = TREE_TYPE (condtemp);
> + else
> + type = ptr_type_node;
> memv = create_tmp_var (type);
> TREE_ADDRESSABLE (memv) = 1;
> - unsigned HOST_WIDE_INT sz
> - = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
> - sz *= fd->lastprivate_conditional;
> - expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
> - false);
> + unsigned HOST_WIDE_INT sz = 0;
> + tree size = NULL_TREE;
> + if (fd->have_pointer_condtemp)
> + {
> + sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
> + sz *= fd->lastprivate_conditional;
> + condtemp_sz = sz;
> + }
> + if (fd->have_nonctrl_scantemp)
> + {
> + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
> + gimple *g = gimple_build_call (nthreads, 0);
> + nthreads = create_tmp_var (integer_type_node);
> + gimple_call_set_lhs (g, nthreads);
> + gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
> + nthreads = fold_convert (sizetype, nthreads);
> + alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
> + size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
> + alloc_align, nthreads, NULL,
> + false);
> + size = fold_convert (type, size);
> + }
> + else
> + size = build_int_cst (type, sz);
> + expand_omp_build_assign (&gsi2, memv, size, false);
> mem = build_fold_addr_expr (memv);
> }
> tree t
> @@ -3698,6 +3820,12 @@ expand_omp_for_static_nochunk (struct om
> true, GSI_SAME_STMT);
> if (fd->have_pointer_condtemp)
> expand_omp_build_assign (&gsi2, condtemp, memv, false);
> + if (fd->have_nonctrl_scantemp)
> + {
> + tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
> + expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
> + alloc_align, nthreads, &gsi2, false);
> + }
> if (fd->have_reductemp)
> {
> gsi_remove (&gsi2, true);
> @@ -3788,6 +3916,72 @@ expand_omp_for_static_nochunk (struct om
> gsi = gsi_last_nondebug_bb (third_bb);
> gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
>
> + if (fd->have_nonctrl_scantemp)
> + {
> + tree clauses = gimple_omp_for_clauses (fd->for_stmt);
> + tree controlp = NULL_TREE, controlb = NULL_TREE;
> + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
> + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
> + && OMP_CLAUSE__SCANTEMP__CONTROL (c))
> + {
> + if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
> + controlb = OMP_CLAUSE_DECL (c);
> + else
> + controlp = OMP_CLAUSE_DECL (c);
> + if (controlb && controlp)
> + break;
> + }
> + gcc_assert (controlp && controlb);
> + tree cnt = create_tmp_var (sizetype);
> + gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
> + gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> + unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
> + tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
> + alloc_align, cnt, NULL, true);
> + tree size = create_tmp_var (sizetype);
> + expand_omp_build_assign (&gsi, size, sz, false);
> + tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
> + size, size_int (16384));
> + expand_omp_build_assign (&gsi, controlb, cmp);
> + g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
> + NULL_TREE, NULL_TREE);
> + gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> + fourth_bb = split_block (third_bb, g)->dest;
> + gsi = gsi_last_nondebug_bb (fourth_bb);
> + /* FIXME: Once we have allocators, this should use allocator. */
> + g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1,
> size);
> + gimple_call_set_lhs (g, controlp);
> + gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> + expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
> + &gsi, true);
> + gsi_prev (&gsi);
> + g = gsi_stmt (gsi);
> + fifth_bb = split_block (fourth_bb, g)->dest;
> + gsi = gsi_last_nondebug_bb (fifth_bb);
> +
> + g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
> + gimple_call_set_lhs (g, controlp);
> + gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> + tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
> + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
> + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
> + && OMP_CLAUSE__SCANTEMP__ALLOC (c))
> + {
> + tree tmp = create_tmp_var (sizetype);
> + tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
> + g = gimple_build_assign (tmp, MULT_EXPR, cnt,
> + TYPE_SIZE_UNIT (pointee_type));
> + gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> + g = gimple_build_call (alloca_decl, 2, tmp,
> + size_int (TYPE_ALIGN (pointee_type)));
> + gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
> + gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> + }
> +
> + sixth_bb = split_block (fifth_bb, g)->dest;
> + gsi = gsi_last_nondebug_bb (sixth_bb);
> + }
> +
> t = build2 (MULT_EXPR, itype, q, threadid);
> t = build2 (PLUS_EXPR, itype, t, tt);
> s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
> GSI_SAME_STMT);
> @@ -4018,7 +4212,9 @@ expand_omp_for_static_nochunk (struct om
> if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
> {
> t = gimple_omp_return_lhs (gsi_stmt (gsi));
> - if (fd->have_reductemp || fd->have_pointer_condtemp)
> + if (fd->have_reductemp
> + || ((fd->have_pointer_condtemp || fd->have_scantemp)
> + && !fd->have_nonctrl_scantemp))
> {
> tree fn;
> if (t)
> @@ -4045,6 +4241,38 @@ expand_omp_for_static_nochunk (struct om
> gcall *g = gimple_build_call (fn, 0);
> gsi_insert_after (&gsi, g, GSI_SAME_STMT);
> }
> + if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
> + {
> + tree clauses = gimple_omp_for_clauses (fd->for_stmt);
> + tree controlp = NULL_TREE, controlb = NULL_TREE;
> + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
> + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
> + && OMP_CLAUSE__SCANTEMP__CONTROL (c))
> + {
> + if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
> + controlb = OMP_CLAUSE_DECL (c);
> + else
> + controlp = OMP_CLAUSE_DECL (c);
> + if (controlb && controlp)
> + break;
> + }
> + gcc_assert (controlp && controlb);
> + gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
> + NULL_TREE, NULL_TREE);
> + gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> + exit1_bb = split_block (exit_bb, g)->dest;
> + gsi = gsi_after_labels (exit1_bb);
> + g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
> + controlp);
> + gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> + exit2_bb = split_block (exit1_bb, g)->dest;
> + gsi = gsi_after_labels (exit2_bb);
> + g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE),
> 1,
> + controlp);
> + gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> + exit3_bb = split_block (exit2_bb, g)->dest;
> + gsi = gsi_after_labels (exit3_bb);
> + }
> gsi_remove (&gsi, true);
>
> /* Connect all the blocks. */
> @@ -4053,8 +4281,34 @@ expand_omp_for_static_nochunk (struct om
> ep = find_edge (entry_bb, second_bb);
> ep->flags = EDGE_TRUE_VALUE;
> ep->probability = profile_probability::guessed_always ().apply_scale (1,
> 4);
> - find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
> - find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
> + if (fourth_bb)
> + {
> + ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
> + ep->probability
> + = profile_probability::guessed_always ().apply_scale (1, 2);
> + ep = find_edge (third_bb, fourth_bb);
> + ep->flags = EDGE_TRUE_VALUE;
> + ep->probability
> + = profile_probability::guessed_always ().apply_scale (1, 2);
> + ep = find_edge (fourth_bb, fifth_bb);
> + redirect_edge_and_branch (ep, sixth_bb);
> + }
> + else
> + sixth_bb = third_bb;
> + find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
> + find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
> + if (exit1_bb)
> + {
> + ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
> + ep->probability
> + = profile_probability::guessed_always ().apply_scale (1, 2);
> + ep = find_edge (exit_bb, exit1_bb);
> + ep->flags = EDGE_TRUE_VALUE;
> + ep->probability
> + = profile_probability::guessed_always ().apply_scale (1, 2);
> + ep = find_edge (exit1_bb, exit2_bb);
> + redirect_edge_and_branch (ep, exit3_bb);
> + }
>
> if (!broken_loop)
> {
> @@ -4082,12 +4336,22 @@ expand_omp_for_static_nochunk (struct om
>
> set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
> set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
> - set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
> + if (fourth_bb)
> + {
> + set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
> + set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
> + }
> + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
>
> set_immediate_dominator (CDI_DOMINATORS, body_bb,
> recompute_dominator (CDI_DOMINATORS, body_bb));
> set_immediate_dominator (CDI_DOMINATORS, fin_bb,
> recompute_dominator (CDI_DOMINATORS, fin_bb));
> + if (exit1_bb)
> + {
> + set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
> + set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
> + }
>
> struct loop *loop = body_bb->loop_father;
> if (loop != entry_bb->loop_father)
> --- gcc/testsuite/c-c++-common/gomp/scan-3.c.jj 2019-07-02 12:57:30.366553813
> +0200
> +++ gcc/testsuite/c-c++-common/gomp/scan-3.c 2019-07-02 13:08:01.383672187
> +0200
> @@ -8,7 +8,7 @@ f1 (int *c, int *d)
> for (i = 0; i < 64; i++)
> {
> d[i] = a;
> - #pragma omp scan inclusive (a) /* { dg-message "sorry,
> unimplemented: '#pragma omp scan' not supported yet" } */
> + #pragma omp scan inclusive (a)
> a += c[i];
> }
> }
> --- gcc/testsuite/c-c++-common/gomp/scan-5.c.jj 2019-07-02 13:01:48.959476445
> +0200
> +++ gcc/testsuite/c-c++-common/gomp/scan-5.c 2019-07-02 13:08:24.532311809
> +0200
> @@ -6,7 +6,7 @@ foo (int *a, int *b)
> for (int i = 0; i < 64; i++)
> {
> r += a[i];
> - #pragma omp scan inclusive (r) /* { dg-message "sorry,
> unimplemented: '#pragma omp scan' not supported yet" } */
> + #pragma omp scan inclusive (r)
> b[i] = r;
> }
> return r;
> --- libgomp/testsuite/libgomp.c++/scan-1.C.jj 2019-07-02 13:16:12.144014405
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-1.C 2019-07-02 13:17:41.705604254
> +0200
> @@ -0,0 +1,151 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +
> +struct S {
> + inline S ();
> + inline ~S ();
> + inline S (const S &);
> + inline S & operator= (const S &);
> + int s;
> +};
> +
> +S::S () : s (0)
> +{
> +}
> +
> +S::~S ()
> +{
> +}
> +
> +S::S (const S &x)
> +{
> + s = x.s;
> +}
> +
> +S &
> +S::operator= (const S &x)
> +{
> + s = x.s;
> + return *this;
> +}
> +
> +static inline void
> +ini (S &x)
> +{
> + x.s = 0;
> +}
> +
> +S r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
> +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer
> (ini (omp_priv))
> +
> +__attribute__((noipa)) void
> +foo (S *a, S *b)
> +{
> + #pragma omp for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r.s += a[i].s;
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) S
> +bar (void)
> +{
> + S s;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, plus:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s.s += 2 * a[i].s;
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return S (s);
> +}
> +
> +__attribute__((noipa)) void
> +baz (S *a, S *b)
> +{
> + #pragma omp parallel for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r.s += a[i].s;
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) S
> +qux (void)
> +{
> + S s;
> + #pragma omp parallel for reduction (inscan, plus:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s.s += 2 * a[i].s;
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return S (s);
> +}
> +
> +int
> +main ()
> +{
> + S s;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i].s = i;
> + b[i].s = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b);
> + if (r.s != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + s.s += i;
> + if (b[i].s != s.s)
> + abort ();
> + else
> + b[i].s = 25;
> + }
> + if (bar ().s != 1024 * 1023)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s.s += 2 * i;
> + if (b[i].s != s.s)
> + abort ();
> + }
> + r.s = 0;
> + baz (a, b);
> + if (r.s != 1024 * 1023 / 2)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s.s += i;
> + if (b[i].s != s.s)
> + abort ();
> + else
> + b[i].s = 25;
> + }
> + if (qux ().s != 1024 * 1023)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s.s += 2 * i;
> + if (b[i].s != s.s)
> + abort ();
> + }
> + return 0;
> +}
> --- libgomp/testsuite/libgomp.c++/scan-2.C.jj 2019-07-02 13:16:12.151014294
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-2.C 2019-07-02 13:29:26.250606680
> +0200
> @@ -0,0 +1,116 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +int r, a[1024], b[1024], q;
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, int &r)
> +{
> + #pragma omp for reduction (inscan, +:r) nowait
> + for (int i = 0; i < 1024; i++)
> + {
> + r += a[i];
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> + int &s = q;
> + q = 0;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, +:s) nowait
> + for (int i = 0; i < 1024; i++)
> + {
> + s += 2 * a[i];
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, int &r)
> +{
> + #pragma omp parallel for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r += a[i];
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> + int &s = q;
> + q = 0;
> + #pragma omp parallel for reduction (inscan, +:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s += 2 * a[i];
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + int s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i] = i;
> + b[i] = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b, r);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = 25;
> + }
> + if (bar () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += 2 * i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -1;
> + }
> + r = 0;
> + baz (a, b, r);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -25;
> + }
> + if (qux () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += 2 * i;
> + if (b[i] != s)
> + abort ();
> + }
> +}
> --- libgomp/testsuite/libgomp.c++/scan-3.C.jj 2019-07-02 13:16:12.157014200
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-3.C 2019-07-02 13:29:33.038500894
> +0200
> @@ -0,0 +1,119 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +int r, a[1024], b[1024], q;
> +
> +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer
> (omp_priv = 0)
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, int &r)
> +{
> + #pragma omp for reduction (inscan, foo:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r += a[i];
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> + int &s = q;
> + q = 0;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, foo:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s += 2 * a[i];
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, int &r)
> +{
> + #pragma omp parallel for reduction (inscan, foo:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r += a[i];
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> + int &s = q;
> + q = 0;
> + #pragma omp parallel for reduction (inscan, foo:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s += 2 * a[i];
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + int s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i] = i;
> + b[i] = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b, r);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = 25;
> + }
> + if (bar () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += 2 * i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -1;
> + }
> + r = 0;
> + baz (a, b, r);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -25;
> + }
> + if (qux () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += 2 * i;
> + if (b[i] != s)
> + abort ();
> + }
> + return 0;
> +}
> --- libgomp/testsuite/libgomp.c++/scan-4.C.jj 2019-07-02 13:16:12.559007871
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-4.C 2019-07-02 13:52:26.973993019
> +0200
> @@ -0,0 +1,150 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +
> +struct S {
> + inline S ();
> + inline ~S ();
> + inline S (const S &);
> + inline S & operator= (const S &);
> + int s;
> +};
> +
> +S::S () : s (0)
> +{
> +}
> +
> +S::~S ()
> +{
> +}
> +
> +S::S (const S &x)
> +{
> + s = x.s;
> +}
> +
> +S &
> +S::operator= (const S &x)
> +{
> + s = x.s;
> + return *this;
> +}
> +
> +static inline void
> +ini (S &x)
> +{
> + x.s = 0;
> +}
> +
> +S r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
> +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer
> (ini (omp_priv))
> +
> +__attribute__((noipa)) void
> +foo (S *a, S *b, S &r)
> +{
> + #pragma omp for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r.s += a[i].s;
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) S
> +bar ()
> +{
> + S s;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, plus:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s.s += 2 * a[i].s;
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (S *a, S *b, S &r)
> +{
> + #pragma omp parallel for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r.s += a[i].s;
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) S
> +qux ()
> +{
> + S s;
> + #pragma omp parallel for reduction (inscan, plus:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s.s += 2 * a[i].s;
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + S s;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i].s = i;
> + b[i].s = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b, r);
> + if (r.s != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + s.s += i;
> + if (b[i].s != s.s)
> + abort ();
> + else
> + b[i].s = 25;
> + }
> + if (bar ().s != 1024 * 1023)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s.s += 2 * i;
> + if (b[i].s != s.s)
> + abort ();
> + }
> + r.s = 0;
> + baz (a, b, r);
> + if (r.s != 1024 * 1023 / 2)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s.s += i;
> + if (b[i].s != s.s)
> + abort ();
> + else
> + b[i].s = 25;
> + }
> + if (qux ().s != 1024 * 1023)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s.s += 2 * i;
> + if (b[i].s != s.s)
> + abort ();
> + }
> +}
> --- libgomp/testsuite/libgomp.c++/scan-5.C.jj 2019-07-02 13:16:12.567007745
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-5.C 2019-07-02 13:29:08.895877145
> +0200
> @@ -0,0 +1,158 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +
> +template <typename T>
> +struct S {
> + inline S ();
> + inline ~S ();
> + inline S (const S &);
> + inline S & operator= (const S &);
> + T s;
> +};
> +
> +template <typename T>
> +S<T>::S () : s (0)
> +{
> +}
> +
> +template <typename T>
> +S<T>::~S ()
> +{
> +}
> +
> +template <typename T>
> +S<T>::S (const S &x)
> +{
> + s = x.s;
> +}
> +
> +template <typename T>
> +S<T> &
> +S<T>::operator= (const S &x)
> +{
> + s = x.s;
> + return *this;
> +}
> +
> +template <typename T>
> +static inline void
> +ini (S<T> &x)
> +{
> + x.s = 0;
> +}
> +
> +S<int> r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
> +#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s)
> initializer (ini (omp_priv))
> +
> +template <typename T>
> +__attribute__((noipa)) void
> +foo (S<T> *a, S<T> *b)
> +{
> + #pragma omp for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r.s += a[i].s;
> + }
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) S<T>
> +bar (void)
> +{
> + S<T> s;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, plus:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s.s += 2 * a[i].s;
> + }
> + return S<T> (s);
> +}
> +
> +__attribute__((noipa)) void
> +baz (S<int> *a, S<int> *b)
> +{
> + #pragma omp parallel for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r.s += a[i].s;
> + }
> +}
> +
> +__attribute__((noipa)) S<int>
> +qux (void)
> +{
> + S<int> s;
> + #pragma omp parallel for reduction (inscan, plus:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s.s += 2 * a[i].s;
> + }
> + return S<int> (s);
> +}
> +
> +int
> +main ()
> +{
> + S<int> s;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i].s = i;
> + b[i].s = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b);
> + if (r.s != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i].s != s.s)
> + abort ();
> + else
> + b[i].s = 25;
> + s.s += i;
> + }
> + if (bar<int> ().s != 1024 * 1023)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i].s != s.s)
> + abort ();
> + s.s += 2 * i;
> + }
> + r.s = 0;
> + baz (a, b);
> + if (r.s != 1024 * 1023 / 2)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i].s != s.s)
> + abort ();
> + else
> + b[i].s = 25;
> + s.s += i;
> + }
> + if (qux ().s != 1024 * 1023)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i].s != s.s)
> + abort ();
> + s.s += 2 * i;
> + }
> +}
> --- libgomp/testsuite/libgomp.c++/scan-6.C.jj 2019-07-02 13:16:12.574007634
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-6.C 2019-07-02 13:31:59.909212015
> +0200
> @@ -0,0 +1,120 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +int r, a[1024], b[1024], q;
> +
> +template <typename T, typename U>
> +__attribute__((noipa)) void
> +foo (T a, T b, U r)
> +{
> + #pragma omp for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r += a[i];
> + }
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) T
> +bar ()
> +{
> + T &s = q;
> + q = 0;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, +:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s += 2 * a[i];
> + }
> + return s;
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) void
> +baz (T *a, T *b, T &r)
> +{
> + #pragma omp parallel for reduction (inscan, +:r)
> + for (T i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r += a[i];
> + }
> +}
> +
> +template <typename T>
> +__attribute__((noipa)) int
> +qux ()
> +{
> + T s = q;
> + q = 0;
> + #pragma omp parallel for reduction (inscan, +:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s += 2 * a[i];
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + int s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i] = i;
> + b[i] = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo<int *, int &> (a, b, r);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = 25;
> + s += i;
> + }
> + if (bar<int> () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -1;
> + s += 2 * i;
> + }
> + r = 0;
> + baz<int> (a, b, r);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -25;
> + s += i;
> + }
> + if (qux<int &> () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + s += 2 * i;
> + }
> +}
> --- libgomp/testsuite/libgomp.c++/scan-7.C.jj 2019-07-02 13:16:12.581007523
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-7.C 2019-07-02 13:33:29.036823011
> +0200
> @@ -0,0 +1,118 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +int r, a[1024], b[1024], q;
> +
> +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer
> (omp_priv = 0)
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, int &r)
> +{
> + #pragma omp for reduction (inscan, foo:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r += a[i];
> + }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> + int &s = q;
> + q = 0;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, foo:s) nowait
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s += 2 * a[i];
> + }
> + return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, int &r)
> +{
> + #pragma omp parallel for reduction (inscan, foo:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r += a[i];
> + }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> + int &s = q;
> + q = 0;
> + #pragma omp parallel for reduction (inscan, foo:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s += 2 * a[i];
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + int s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i] = i;
> + b[i] = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b, r);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = 25;
> + s += i;
> + }
> + if (bar () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -1;
> + s += 2 * i;
> + }
> + r = 0;
> + baz (a, b, r);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -25;
> + s += i;
> + }
> + if (qux () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + s += 2 * i;
> + }
> +}
> --- libgomp/testsuite/libgomp.c++/scan-8.C.jj 2019-07-02 13:16:12.587007429
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-8.C 2019-07-02 13:43:27.518448406
> +0200
> @@ -0,0 +1,150 @@
> +// { dg-require-effective-target size32plus }
> +
> +extern "C" void abort ();
> +
> +struct S {
> + inline S ();
> + inline ~S ();
> + inline S (const S &);
> + inline S & operator= (const S &);
> + int s;
> +};
> +
> +S::S () : s (0)
> +{
> +}
> +
> +S::~S ()
> +{
> +}
> +
> +S::S (const S &x)
> +{
> + s = x.s;
> +}
> +
> +S &
> +S::operator= (const S &x)
> +{
> + s = x.s;
> + return *this;
> +}
> +
> +static inline void
> +ini (S &x)
> +{
> + x.s = 0;
> +}
> +
> +S r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
> +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer
> (ini (omp_priv))
> +
> +__attribute__((noipa)) void
> +foo (S *a, S *b, S &r)
> +{
> + #pragma omp for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r.s += a[i].s;
> + }
> +}
> +
> +__attribute__((noipa)) S
> +bar (void)
> +{
> + S s;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, plus:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s.s += 2 * a[i].s;
> + }
> + return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (S *a, S *b, S &r)
> +{
> + #pragma omp parallel for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r.s += a[i].s;
> + }
> +}
> +
> +__attribute__((noipa)) S
> +qux (void)
> +{
> + S s;
> + #pragma omp parallel for reduction (inscan, plus:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s.s += 2 * a[i].s;
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + S s;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i].s = i;
> + b[i].s = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b, r);
> + if (r.s != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i].s != s.s)
> + abort ();
> + else
> + b[i].s = 25;
> + s.s += i;
> + }
> + if (bar ().s != 1024 * 1023)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i].s != s.s)
> + abort ();
> + s.s += 2 * i;
> + }
> + r.s = 0;
> + baz (a, b, r);
> + if (r.s != 1024 * 1023 / 2)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i].s != s.s)
> + abort ();
> + else
> + b[i].s = 25;
> + s.s += i;
> + }
> + if (qux ().s != 1024 * 1023)
> + abort ();
> + s.s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i].s != s.s)
> + abort ();
> + s.s += 2 * i;
> + }
> +}
> --- libgomp/testsuite/libgomp.c/scan-1.c.jj 2019-07-02 13:08:01.390672078
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-1.c 2019-07-02 13:08:01.390672078
> +0200
> @@ -0,0 +1,115 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b)
> +{
> + #pragma omp for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r += a[i];
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> + int s = 0;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, +:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s += 2 * a[i];
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b)
> +{
> + #pragma omp parallel for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r += a[i];
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> + int s = 0;
> + #pragma omp parallel for reduction (inscan, +:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s += 2 * a[i];
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + int s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i] = i;
> + b[i] = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = 25;
> + }
> + if (bar () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += 2 * i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -1;
> + }
> + r = 0;
> + baz (a, b);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -25;
> + }
> + if (qux () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += 2 * i;
> + if (b[i] != s)
> + abort ();
> + }
> + return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-2.c.jj 2019-07-02 13:08:01.390672078
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-2.c 2019-07-02 13:08:01.390672078
> +0200
> @@ -0,0 +1,117 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer
> (omp_priv = 0)
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b)
> +{
> + #pragma omp for reduction (inscan, foo:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r += a[i];
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> + int s = 0;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, foo:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s += 2 * a[i];
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b)
> +{
> + #pragma omp parallel for reduction (inscan, foo:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r += a[i];
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> + int s = 0;
> + #pragma omp parallel for reduction (inscan, foo:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s += 2 * a[i];
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + int s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i] = i;
> + b[i] = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = 25;
> + }
> + if (bar () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += 2 * i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -1;
> + }
> + r = 0;
> + baz (a, b);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += i;
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -25;
> + }
> + if (qux () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += 2 * i;
> + if (b[i] != s)
> + abort ();
> + }
> + return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-3.c.jj 2019-07-02 13:08:01.390672078
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-3.c 2019-07-02 13:08:01.390672078
> +0200
> @@ -0,0 +1,88 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +float r = 1.0f, a[1024], b[1024];
> +
> +__attribute__((noipa)) void
> +foo (float *a, float *b)
> +{
> + #pragma omp for reduction (inscan, *:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + r *= a[i];
> + #pragma omp scan inclusive(r)
> + b[i] = r;
> + }
> +}
> +
> +__attribute__((noipa)) float
> +bar (void)
> +{
> + float s = -__builtin_inff ();
> + #pragma omp parallel for reduction (inscan, max:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + s = s > a[i] ? s : a[i];
> + #pragma omp scan inclusive(s)
> + b[i] = s;
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + float s = 1.0f;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (i < 80)
> + a[i] = (i & 1) ? 0.25f : 0.5f;
> + else if (i < 200)
> + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
> + else if (i < 280)
> + a[i] = (i & 1) ? 0.25f : 0.5f;
> + else if (i < 380)
> + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
> + else
> + switch (i % 6)
> + {
> + case 0: a[i] = 0.25f; break;
> + case 1: a[i] = 2.0f; break;
> + case 2: a[i] = -1.0f; break;
> + case 3: a[i] = -4.0f; break;
> + case 4: a[i] = 0.5f; break;
> + case 5: a[i] = 1.0f; break;
> + default: a[i] = 0.0f; break;
> + }
> + b[i] = -19.0f;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b);
> + if (r * 16384.0f != 0.125f)
> + abort ();
> + float m = -175.25f;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s *= a[i];
> + if (b[i] != s)
> + abort ();
> + else
> + {
> + a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
> + b[i] = -231.75f;
> + m += 0.75f;
> + }
> + }
> + if (bar () != 592.0f)
> + abort ();
> + s = -__builtin_inff ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (s < a[i])
> + s = a[i];
> + if (b[i] != s)
> + abort ();
> + }
> + return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-4.c.jj 2019-07-02 13:08:01.390672078
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-4.c 2019-07-02 13:08:01.390672078
> +0200
> @@ -0,0 +1,179 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +unsigned short r2, b2[1024];
> +unsigned char r3, b3[1024];
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
> +{
> + #pragma omp for reduction (inscan, +:r, r2, r3)
> + for (int i = 0; i < 1024; i++)
> + {
> + { r += a[i]; r2 += a[i]; r3 += a[i]; }
> + #pragma omp scan inclusive(r, r2, r3)
> + {
> + b[i] = r;
> + b2[i] = r2;
> + b3[i] = r3;
> + }
> + }
> +}
> +
> +__attribute__((noipa)) int
> +bar (unsigned short *s2p, unsigned char *s3p)
> +{
> + int s = 0;
> + unsigned short s2 = 0;
> + unsigned char s3 = 0;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, +:s, s2, s3)
> + for (int i = 0; i < 1024; i++)
> + {
> + {
> + s += 2 * a[i];
> + s2 += 2 * a[i];
> + s3 += 2 * a[i];
> + }
> + #pragma omp scan inclusive(s, s2, s3)
> + { b[i] = s; b2[i] = s2; b3[i] = s3; }
> + }
> + *s2p = s2;
> + *s3p = s3;
> + return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
> +{
> + #pragma omp parallel for reduction (inscan, +:r, r2, r3)
> + for (int i = 0; i < 1024; i++)
> + {
> + {
> + r += a[i];
> + r2 += a[i];
> + r3 += a[i];
> + }
> + #pragma omp scan inclusive(r, r2, r3)
> + {
> + b[i] = r;
> + b2[i] = r2;
> + b3[i] = r3;
> + }
> + }
> +}
> +
> +__attribute__((noipa)) int
> +qux (unsigned short *s2p, unsigned char *s3p)
> +{
> + int s = 0;
> + unsigned short s2 = 0;
> + unsigned char s3 = 0;
> + #pragma omp parallel for reduction (inscan, +:s, s2, s3)
> + for (int i = 0; i < 1024; i++)
> + {
> + { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
> + #pragma omp scan inclusive(s, s2, s3)
> + { b[i] = s; b2[i] = s2; b3[i] = s3; }
> + }
> + *s2p = s2;
> + *s3p = s3;
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + int s = 0;
> + unsigned short s2;
> + unsigned char s3;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i] = i;
> + b[i] = -1;
> + b2[i] = -1;
> + b3[i] = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b, b2, b3);
> + if (r != 1024 * 1023 / 2
> + || r2 != (unsigned short) r
> + || r3 != (unsigned char) r)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += i;
> + if (b[i] != s
> + || b2[i] != (unsigned short) s
> + || b3[i] != (unsigned char) s)
> + abort ();
> + else
> + {
> + b[i] = 25;
> + b2[i] = 24;
> + b3[i] = 26;
> + }
> + }
> + if (bar (&s2, &s3) != 1024 * 1023)
> + abort ();
> + if (s2 != (unsigned short) (1024 * 1023)
> + || s3 != (unsigned char) (1024 * 1023))
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += 2 * i;
> + if (b[i] != s
> + || b2[i] != (unsigned short) s
> + || b3[i] != (unsigned char) s)
> + abort ();
> + else
> + {
> + b[i] = -1;
> + b2[i] = -1;
> + b3[i] = -1;
> + }
> + }
> + r = 0;
> + r2 = 0;
> + r3 = 0;
> + baz (a, b, b2, b3);
> + if (r != 1024 * 1023 / 2
> + || r2 != (unsigned short) r
> + || r3 != (unsigned char) r)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += i;
> + if (b[i] != s
> + || b2[i] != (unsigned short) s
> + || b3[i] != (unsigned char) s)
> + abort ();
> + else
> + {
> + b[i] = 25;
> + b2[i] = 24;
> + b3[i] = 26;
> + }
> + }
> + s2 = 0;
> + s3 = 0;
> + if (qux (&s2, &s3) != 1024 * 1023)
> + abort ();
> + if (s2 != (unsigned short) (1024 * 1023)
> + || s3 != (unsigned char) (1024 * 1023))
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + s += 2 * i;
> + if (b[i] != s
> + || b2[i] != (unsigned short) s
> + || b3[i] != (unsigned char) s)
> + abort ();
> + }
> + return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-5.c.jj 2019-07-02 13:08:01.390672078
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-5.c 2019-07-02 13:08:01.390672078
> +0200
> @@ -0,0 +1,115 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b)
> +{
> + #pragma omp for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r += a[i];
> + }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> + int s = 0;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, +:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s += 2 * a[i];
> + }
> + return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b)
> +{
> + #pragma omp parallel for reduction (inscan, +:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r += a[i];
> + }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> + int s = 0;
> + #pragma omp parallel for reduction (inscan, +:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s += 2 * a[i];
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + int s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i] = i;
> + b[i] = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = 25;
> + s += i;
> + }
> + if (bar () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -1;
> + s += 2 * i;
> + }
> + r = 0;
> + baz (a, b);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -25;
> + s += i;
> + }
> + if (qux () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + s += 2 * i;
> + }
> + return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-6.c.jj 2019-07-02 13:08:01.390672078
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-6.c 2019-07-02 13:08:01.390672078
> +0200
> @@ -0,0 +1,117 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +
> +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer
> (omp_priv = 0)
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b)
> +{
> + #pragma omp for reduction (inscan, foo:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r += a[i];
> + }
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> + int s = 0;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, foo:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s += 2 * a[i];
> + }
> + return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b)
> +{
> + #pragma omp parallel for reduction (inscan, foo:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r += a[i];
> + }
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> + int s = 0;
> + #pragma omp parallel for reduction (inscan, foo:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s += 2 * a[i];
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + int s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i] = i;
> + b[i] = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = 25;
> + s += i;
> + }
> + if (bar () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -1;
> + s += 2 * i;
> + }
> + r = 0;
> + baz (a, b);
> + if (r != 1024 * 1023 / 2)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -25;
> + s += i;
> + }
> + if (qux () != 1024 * 1023)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + s += 2 * i;
> + }
> + return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-7.c.jj 2019-07-02 13:08:01.390672078
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-7.c 2019-07-02 13:08:01.390672078
> +0200
> @@ -0,0 +1,86 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +float r = 1.0f, a[1024], b[1024];
> +
> +__attribute__((noipa)) void
> +foo (float *a, float *b)
> +{
> + #pragma omp for reduction (inscan, *:r)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = r;
> + #pragma omp scan exclusive(r)
> + r *= a[i];
> + }
> +}
> +
> +__attribute__((noipa)) float
> +bar (void)
> +{
> + float s = -__builtin_inff ();
> + #pragma omp parallel for reduction (inscan, max:s)
> + for (int i = 0; i < 1024; i++)
> + {
> + b[i] = s;
> + #pragma omp scan exclusive(s)
> + s = s > a[i] ? s : a[i];
> + }
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + float s = 1.0f;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (i < 80)
> + a[i] = (i & 1) ? 0.25f : 0.5f;
> + else if (i < 200)
> + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
> + else if (i < 280)
> + a[i] = (i & 1) ? 0.25f : 0.5f;
> + else if (i < 380)
> + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
> + else
> + switch (i % 6)
> + {
> + case 0: a[i] = 0.25f; break;
> + case 1: a[i] = 2.0f; break;
> + case 2: a[i] = -1.0f; break;
> + case 3: a[i] = -4.0f; break;
> + case 4: a[i] = 0.5f; break;
> + case 5: a[i] = 1.0f; break;
> + default: a[i] = 0.0f; break;
> + }
> + b[i] = -19.0f;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b);
> + if (r * 16384.0f != 0.125f)
> + abort ();
> + float m = -175.25f;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + else
> + b[i] = -231.75f;
> + s *= a[i];
> + a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
> + m += 0.75f;
> + }
> + if (bar () != 592.0f)
> + abort ();
> + s = -__builtin_inff ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s)
> + abort ();
> + if (s < a[i])
> + s = a[i];
> + }
> + return 0;
> +}
> --- libgomp/testsuite/libgomp.c/scan-8.c.jj 2019-07-02 13:08:01.390672078
> +0200
> +++ libgomp/testsuite/libgomp.c/scan-8.c 2019-07-02 13:08:01.390672078
> +0200
> @@ -0,0 +1,179 @@
> +/* { dg-require-effective-target size32plus } */
> +
> +extern void abort (void);
> +int r, a[1024], b[1024];
> +unsigned short r2, b2[1024];
> +unsigned char r3, b3[1024];
> +
> +__attribute__((noipa)) void
> +foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
> +{
> + #pragma omp for reduction (inscan, +:r, r2, r3)
> + for (int i = 0; i < 1024; i++)
> + {
> + {
> + b[i] = r;
> + b2[i] = r2;
> + b3[i] = r3;
> + }
> + #pragma omp scan exclusive(r, r2, r3)
> + { r += a[i]; r2 += a[i]; r3 += a[i]; }
> + }
> +}
> +
> +__attribute__((noipa)) int
> +bar (unsigned short *s2p, unsigned char *s3p)
> +{
> + int s = 0;
> + unsigned short s2 = 0;
> + unsigned char s3 = 0;
> + #pragma omp parallel
> + #pragma omp for reduction (inscan, +:s, s2, s3)
> + for (int i = 0; i < 1024; i++)
> + {
> + { b[i] = s; b2[i] = s2; b3[i] = s3; }
> + #pragma omp scan exclusive(s, s2, s3)
> + {
> + s += 2 * a[i];
> + s2 += 2 * a[i];
> + s3 += 2 * a[i];
> + }
> + }
> + *s2p = s2;
> + *s3p = s3;
> + return s;
> +}
> +
> +__attribute__((noipa)) void
> +baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
> +{
> + #pragma omp parallel for reduction (inscan, +:r, r2, r3)
> + for (int i = 0; i < 1024; i++)
> + {
> + {
> + b[i] = r;
> + b2[i] = r2;
> + b3[i] = r3;
> + }
> + #pragma omp scan exclusive(r, r2, r3)
> + {
> + r += a[i];
> + r2 += a[i];
> + r3 += a[i];
> + }
> + }
> +}
> +
> +__attribute__((noipa)) int
> +qux (unsigned short *s2p, unsigned char *s3p)
> +{
> + int s = 0;
> + unsigned short s2 = 0;
> + unsigned char s3 = 0;
> + #pragma omp parallel for reduction (inscan, +:s, s2, s3)
> + for (int i = 0; i < 1024; i++)
> + {
> + { b[i] = s; b2[i] = s2; b3[i] = s3; }
> + #pragma omp scan exclusive(s, s2, s3)
> + { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
> + }
> + *s2p = s2;
> + *s3p = s3;
> + return s;
> +}
> +
> +int
> +main ()
> +{
> + int s = 0;
> + unsigned short s2;
> + unsigned char s3;
> + for (int i = 0; i < 1024; ++i)
> + {
> + a[i] = i;
> + b[i] = -1;
> + b2[i] = -1;
> + b3[i] = -1;
> + asm ("" : "+g" (i));
> + }
> + #pragma omp parallel
> + foo (a, b, b2, b3);
> + if (r != 1024 * 1023 / 2
> + || r2 != (unsigned short) r
> + || r3 != (unsigned char) r)
> + abort ();
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s
> + || b2[i] != (unsigned short) s
> + || b3[i] != (unsigned char) s)
> + abort ();
> + else
> + {
> + b[i] = 25;
> + b2[i] = 24;
> + b3[i] = 26;
> + }
> + s += i;
> + }
> + if (bar (&s2, &s3) != 1024 * 1023)
> + abort ();
> + if (s2 != (unsigned short) (1024 * 1023)
> + || s3 != (unsigned char) (1024 * 1023))
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s
> + || b2[i] != (unsigned short) s
> + || b3[i] != (unsigned char) s)
> + abort ();
> + else
> + {
> + b[i] = -1;
> + b2[i] = -1;
> + b3[i] = -1;
> + }
> + s += 2 * i;
> + }
> + r = 0;
> + r2 = 0;
> + r3 = 0;
> + baz (a, b, b2, b3);
> + if (r != 1024 * 1023 / 2
> + || r2 != (unsigned short) r
> + || r3 != (unsigned char) r)
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s
> + || b2[i] != (unsigned short) s
> + || b3[i] != (unsigned char) s)
> + abort ();
> + else
> + {
> + b[i] = 25;
> + b2[i] = 24;
> + b3[i] = 26;
> + }
> + s += i;
> + }
> + s2 = 0;
> + s3 = 0;
> + if (qux (&s2, &s3) != 1024 * 1023)
> + abort ();
> + if (s2 != (unsigned short) (1024 * 1023)
> + || s3 != (unsigned char) (1024 * 1023))
> + abort ();
> + s = 0;
> + for (int i = 0; i < 1024; ++i)
> + {
> + if (b[i] != s
> + || b2[i] != (unsigned short) s
> + || b3[i] != (unsigned char) s)
> + abort ();
> + s += 2 * i;
> + }
> + return 0;
> +}
>
> Jakub