On Wed, 3 Jul 2019 at 07:11, Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > The following patch implements roughly the > https://gcc.gnu.org/ml/gcc-patches/2019-06/msg01330.html > design for worksharing loops (so far not for composite for simd, that will > be the next larger task). > > Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. > > 2019-07-03 Jakub Jelinek <ja...@redhat.com> > > * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_ > clause. > * tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__SCANTEMP_ instead of > OMP_CLAUSE__CONDTEMP_ as range's upper bound. > (OMP_CLAUSE__SCANTEMP__ALLOC, OMP_CLAUSE__SCANTEMP__CONTROL): Define. > * tree.c (omp_clause_num_ops, omp_clause_code_name): Add > OMP_CLAUSE__SCANTEMP_ entry. > (walk_tree_1): Handle OMP_CLAUSE__SCANTEMP_. > * tree-pretty-print.c (dump_omp_clause): Likewise. > * tree-nested.c (convert_nonlocal_omp_clauses, > convert_local_omp_clauses): Likewise. > * omp-general.h (struct omp_for_data): Add have_scantemp and > have_nonctrl_scantemp members. > * omp-general.c (omp_extract_for_data): Initialize them. > * omp-low.c (struct omp_context): Add scan_exclusive member. > (scan_omp_1_stmt): Don't unnecessarily mask gimple_omp_for_kind > result again with GF_OMP_FOR_KIND_MASK. Initialize also > ctx->scan_exclusive. > (lower_rec_simd_input_clauses): Use ctx->scan_exclusive instead > of !ctx->scan_inclusive. > (lower_rec_input_clauses): Simplify gimplification of dtors using > gimplify_and_add. For non-is_simd test OMP_CLAUSE_REDUCTION_INSCAN > rather than rvarp. Handle OMP_CLAUSE_REDUCTION_INSCAN in worksharing > loops. Don't add barrier for reduction_omp_orig_ref if > ctx->scan_??xclusive. > (lower_reduction_clauses): Don't do anything for ctx->scan_??xclusive. > (lower_omp_scan): Use ctx->scan_exclusive instead > of !ctx->scan_inclusive. Handle worksharing loops with inscan > reductions. Use new_vard != new_var instead of repeated > omp_is_reference calls. > (omp_find_scan, lower_omp_for_scan): New functions. > (lower_omp_for): Call lower_omp_for_scan for worksharing loops with > inscan reductions. > * omp-expand.c (expand_omp_scantemp_alloc): New function. > (expand_omp_for_static_nochunk): Handle fd->have_nonctrl_scantemp > and fd->have_scantemp. > > * c-c++-common/gomp/scan-3.c (f1): Don't expect a sorry message. > * c-c++-common/gomp/scan-5.c (foo): Likewise. >
Hi Jakub, This patch leads to new failures on arm: FAIL: c-c++-common/gomp/scan-3.c (internal compiler error) FAIL: c-c++-common/gomp/scan-5.c (internal compiler error) The logs say: spawn -ignore SIGHUP /aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/gcc/xgcc -B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/gcc/ /gcc/testsuite/c-c++-common/gomp/scan-3.c -fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers -fdiagnostics-color=never -fopenmp -Wno-hsa -S -o scan-3.s during RTL pass: expand /gcc/testsuite/c-c++-common/gomp/scan-3.c: In function 'f1': /gcc/testsuite/c-c++-common/gomp/scan-3.c:4:1: internal compiler error: tree check: expected ssa_name, have var_decl in single_imm_use, at ssa-iterators.h:421 0x5b43ad tree_check_failed(tree_node const*, char const*, int, char const*, ...) /gcc/tree.c:9902 0x9c18c5 tree_check(tree_node const*, char const*, int, char const*, tree_code) /gcc/tree.h:3473 0x9c18c5 single_imm_use /gcc/ssa-iterators.h:421 0x9c18c5 expand_mul_overflow /gcc/internal-fn.c:1590 0x9c209b expand_arith_overflow /gcc/internal-fn.c:2318 0x75f807 expand_call_stmt /gcc/cfgexpand.c:2638 0x75f807 expand_gimple_stmt_1 /gcc/cfgexpand.c:3708 0x75f807 expand_gimple_stmt /gcc/cfgexpand.c:3867 0x765c73 expand_gimple_basic_block /gcc/cfgexpand.c:5907 0x7682b6 execute /gcc/cfgexpand.c:6530 Please submit a full bug report, Christophe > * testsuite/libgomp.c++/scan-1.C: New test. > * testsuite/libgomp.c++/scan-2.C: New test. > * testsuite/libgomp.c++/scan-3.C: New test. > * testsuite/libgomp.c++/scan-4.C: New test. > * testsuite/libgomp.c++/scan-5.C: New test. > * testsuite/libgomp.c++/scan-6.C: New test. > * testsuite/libgomp.c++/scan-7.C: New test. > * testsuite/libgomp.c++/scan-8.C: New test. > * testsuite/libgomp.c/scan-1.c: New test. > * testsuite/libgomp.c/scan-2.c: New test. > * testsuite/libgomp.c/scan-3.c: New test. > * testsuite/libgomp.c/scan-4.c: New test. > * testsuite/libgomp.c/scan-5.c: New test. > * testsuite/libgomp.c/scan-6.c: New test. > * testsuite/libgomp.c/scan-7.c: New test. > * testsuite/libgomp.c/scan-8.c: New test. > > --- gcc/tree-core.h.jj 2019-07-02 12:57:30.283555122 +0200 > +++ gcc/tree-core.h 2019-07-02 13:08:01.387672125 +0200 > @@ -352,6 +352,9 @@ enum omp_clause_code { > /* Internal clause: temporary for lastprivate(conditional:). */ > OMP_CLAUSE__CONDTEMP_, > > + /* Internal clause: temporary for inscan reductions. */ > + OMP_CLAUSE__SCANTEMP_, > + > /* OpenACC/OpenMP clause: if (scalar-expression). */ > OMP_CLAUSE_IF, > > --- gcc/tree.h.jj 2019-07-02 12:57:30.284555106 +0200 > +++ gcc/tree.h 2019-07-02 13:08:01.385672156 +0200 > @@ -1449,7 +1449,7 @@ class auto_suppress_location_wrappers > #define OMP_CLAUSE_DECL(NODE) \ > OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \ > OMP_CLAUSE_PRIVATE, \ > - OMP_CLAUSE__CONDTEMP_), 0) > + OMP_CLAUSE__SCANTEMP_), 0) > #define OMP_CLAUSE_HAS_LOCATION(NODE) \ > (LOCATION_LOCUS ((OMP_CLAUSE_CHECK (NODE))->omp_clause.locus) > \ > != UNKNOWN_LOCATION) > @@ -1761,6 +1761,17 @@ class auto_suppress_location_wrappers > #define OMP_CLAUSE__CONDTEMP__ITER(NODE) \ > (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__CONDTEMP_)->base.public_flag) > > +/* _SCANTEMP_ holding temporary with pointer to thread's local array; > + allocation. */ > +#define OMP_CLAUSE__SCANTEMP__ALLOC(NODE) \ > + (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_)->base.public_flag) > + > +/* _SCANTEMP_ holding temporary with a control variable for deallocation; > + one boolean_type_node for test whether alloca was used, another one > + to pass to __builtin_stack_restore or free. */ > +#define OMP_CLAUSE__SCANTEMP__CONTROL(NODE) \ > + TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_)) > + > /* SSA_NAME accessors. */ > > /* Whether SSA_NAME NODE is a virtual operand. This simply caches the > --- gcc/tree.c.jj 2019-07-02 12:57:30.284555106 +0200 > +++ gcc/tree.c 2019-07-02 13:08:01.382672202 +0200 > @@ -311,6 +311,7 @@ unsigned const char omp_clause_num_ops[] > 1, /* OMP_CLAUSE__LOOPTEMP_ */ > 1, /* OMP_CLAUSE__REDUCTEMP_ */ > 1, /* OMP_CLAUSE__CONDTEMP_ */ > + 1, /* OMP_CLAUSE__SCANTEMP_ */ > 1, /* OMP_CLAUSE_IF */ > 1, /* OMP_CLAUSE_NUM_THREADS */ > 1, /* OMP_CLAUSE_SCHEDULE */ > @@ -391,6 +392,7 @@ const char * const omp_clause_code_name[ > "_looptemp_", > "_reductemp_", > "_condtemp_", > + "_scantemp_", > "if", > "num_threads", > "schedule", > @@ -12316,6 +12318,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func > case OMP_CLAUSE__LOOPTEMP_: > case OMP_CLAUSE__REDUCTEMP_: > case OMP_CLAUSE__CONDTEMP_: > + case OMP_CLAUSE__SCANTEMP_: > case OMP_CLAUSE__SIMDUID_: > WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0)); > /* FALLTHRU */ > --- gcc/tree-pretty-print.c.jj 2019-07-02 12:57:30.409553135 +0200 > +++ gcc/tree-pretty-print.c 2019-07-02 13:08:01.383672187 +0200 > @@ -483,6 +483,9 @@ dump_omp_clause (pretty_printer *pp, tre > case OMP_CLAUSE__CONDTEMP_: > name = "_condtemp_"; > goto print_remap; > + case OMP_CLAUSE__SCANTEMP_: > + name = "_scantemp_"; > + goto print_remap; > case OMP_CLAUSE_TO_DECLARE: > name = "to"; > goto print_remap; > --- gcc/tree-nested.c.jj 2019-07-02 12:57:30.282555137 +0200 > +++ gcc/tree-nested.c 2019-07-02 13:08:01.386672141 +0200 > @@ -1349,6 +1349,7 @@ convert_nonlocal_omp_clauses (tree *pcla > case OMP_CLAUSE_IF_PRESENT: > case OMP_CLAUSE_FINALIZE: > case OMP_CLAUSE__CONDTEMP_: > + case OMP_CLAUSE__SCANTEMP_: > break; > > /* The following clause belongs to the OpenACC cache directive, > which > @@ -2078,6 +2079,7 @@ convert_local_omp_clauses (tree *pclause > case OMP_CLAUSE_IF_PRESENT: > case OMP_CLAUSE_FINALIZE: > case OMP_CLAUSE__CONDTEMP_: > + case OMP_CLAUSE__SCANTEMP_: > break; > > /* The following clause belongs to the OpenACC cache directive, > which > --- gcc/omp-general.h.jj 2019-07-02 12:57:30.282555137 +0200 > +++ gcc/omp-general.h 2019-07-02 13:08:01.387672125 +0200 > @@ -63,7 +63,7 @@ struct omp_for_data > int collapse; /* Collapsed loops, 1 for a non-collapsed loop. */ > int ordered; > bool have_nowait, have_ordered, simd_schedule, have_reductemp; > - bool have_pointer_condtemp; > + bool have_pointer_condtemp, have_scantemp, have_nonctrl_scantemp; > int lastprivate_conditional; > unsigned char sched_modifiers; > enum omp_clause_schedule_kind sched_kind; > --- gcc/omp-general.c.jj 2019-07-02 12:57:30.282555137 +0200 > +++ gcc/omp-general.c 2019-07-02 13:08:01.385672156 +0200 > @@ -169,6 +169,8 @@ omp_extract_for_data (gomp_for *for_stmt > fd->have_ordered = false; > fd->have_reductemp = false; > fd->have_pointer_condtemp = false; > + fd->have_scantemp = false; > + fd->have_nonctrl_scantemp = false; > fd->lastprivate_conditional = 0; > fd->tiling = NULL_TREE; > fd->collapse = 1; > @@ -231,6 +233,12 @@ omp_extract_for_data (gomp_for *for_stmt > if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t)))) > fd->have_pointer_condtemp = true; > break; > + case OMP_CLAUSE__SCANTEMP_: > + fd->have_scantemp = true; > + if (!OMP_CLAUSE__SCANTEMP__ALLOC (t) > + && !OMP_CLAUSE__SCANTEMP__CONTROL (t)) > + fd->have_nonctrl_scantemp = true; > + break; > default: > break; > } > --- gcc/omp-low.c.jj 2019-07-02 12:57:30.285555090 +0200 > +++ gcc/omp-low.c 2019-07-02 16:37:26.229374596 +0200 > @@ -144,6 +144,9 @@ struct omp_context > > /* True if there is nested scan context with inclusive clause. */ > bool scan_inclusive; > + > + /* True if there is nested scan context with exclusive clause. */ > + bool scan_exclusive; > }; > > static splay_tree all_contexts; > @@ -3316,8 +3319,8 @@ scan_omp_1_stmt (gimple_stmt_iterator *g > break; > > case GIMPLE_OMP_FOR: > - if (((gimple_omp_for_kind (as_a <gomp_for *> (stmt)) > - & GF_OMP_FOR_KIND_MASK) == GF_OMP_FOR_KIND_SIMD) > + if ((gimple_omp_for_kind (as_a <gomp_for *> (stmt)) > + == GF_OMP_FOR_KIND_SIMD) > && omp_maybe_offloaded_ctx (ctx) > && omp_max_simt_vf ()) > scan_omp_simd (gsi, as_a <gomp_for *> (stmt), ctx); > @@ -3335,8 +3338,12 @@ scan_omp_1_stmt (gimple_stmt_iterator *g > > case GIMPLE_OMP_SCAN: > if (tree clauses = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt))) > - if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE) > - ctx->scan_inclusive = true; > + { > + if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE) > + ctx->scan_inclusive = true; > + else if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_EXCLUSIVE) > + ctx->scan_exclusive = true; > + } > /* FALLTHRU */ > case GIMPLE_OMP_SECTION: > case GIMPLE_OMP_MASTER: > @@ -3769,7 +3776,7 @@ lower_rec_simd_input_clauses (tree new_v > sctx->lastlane, NULL_TREE, NULL_TREE); > TREE_THIS_NOTRAP (*rvar) = 1; > > - if (!ctx->scan_inclusive) > + if (ctx->scan_exclusive) > { > /* And for exclusive scan yet another one, which will > hold the value during the scan phase. */ > @@ -3854,7 +3861,7 @@ static void > lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, > omp_context *ctx, struct omp_for_data *fd) > { > - tree c, dtor, copyin_seq, x, ptr; > + tree c, copyin_seq, x, ptr; > bool copyin_by_ref = false; > bool lastprivate_firstprivate = false; > bool reduction_omp_orig_ref = false; > @@ -4541,12 +4548,7 @@ lower_rec_input_clauses (tree clauses, g > x = lang_hooks.decls.omp_clause_dtor > (c, build_simple_mem_ref > (y2)); > if (x) > - { > - gimple_seq tseq = NULL; > - dtor = x; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (dlist, tseq); > - } > + gimplify_and_add (x, dlist); > } > } > else > @@ -4913,13 +4915,7 @@ lower_rec_input_clauses (tree clauses, g > { > y = lang_hooks.decls.omp_clause_dtor (c, ivar); > if (y) > - { > - gimple_seq tseq = NULL; > - > - dtor = y; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (&llist[1], tseq); > - } > + gimplify_and_add (y, &llist[1]); > } > break; > } > @@ -4949,13 +4945,7 @@ lower_rec_input_clauses (tree clauses, g > do_dtor: > x = lang_hooks.decls.omp_clause_dtor (c, new_var); > if (x) > - { > - gimple_seq tseq = NULL; > - > - dtor = x; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (dlist, tseq); > - } > + gimplify_and_add (x, dlist); > break; > > case OMP_CLAUSE_LINEAR: > @@ -5103,13 +5093,7 @@ lower_rec_input_clauses (tree clauses, g > gimplify_and_add (x, &llist[0]); > x = lang_hooks.decls.omp_clause_dtor (c, ivar); > if (x) > - { > - gimple_seq tseq = NULL; > - > - dtor = x; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (&llist[1], tseq); > - } > + gimplify_and_add (x, &llist[1]); > break; > } > if (omp_is_reference (var)) > @@ -5282,12 +5266,7 @@ lower_rec_input_clauses (tree clauses, g > > x = lang_hooks.decls.omp_clause_dtor (c, nv); > if (x) > - { > - tseq = NULL; > - dtor = x; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (dlist, tseq); > - } > + gimplify_and_add (x, dlist); > } > > tree ref = build_outer_var_ref (var, ctx); > @@ -5310,34 +5289,19 @@ lower_rec_input_clauses (tree clauses, g > > x = lang_hooks.decls.omp_clause_dtor (c, ivar); > if (x) > - { > - tseq = NULL; > - dtor = x; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (&llist[1], tseq); > - } > + gimplify_and_add (x, &llist[1]); > > tree ivar2 = unshare_expr (lvar); > TREE_OPERAND (ivar2, 1) = sctx.idx; > x = lang_hooks.decls.omp_clause_dtor (c, ivar2); > if (x) > - { > - tseq = NULL; > - dtor = x; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (&llist[1], tseq); > - } > + gimplify_and_add (x, &llist[1]); > > if (rvar2) > { > x = lang_hooks.decls.omp_clause_dtor (c, rvar2); > if (x) > - { > - tseq = NULL; > - dtor = x; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (&llist[1], tseq); > - } > + gimplify_and_add (x, &llist[1]); > } > break; > } > @@ -5362,12 +5326,7 @@ lower_rec_input_clauses (tree clauses, g > build_fold_addr_expr (lvar)); > x = lang_hooks.decls.omp_clause_dtor (c, ivar); > if (x) > - { > - tseq = NULL; > - dtor = x; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (&llist[1], tseq); > - } > + gimplify_and_add (x, &llist[1]); > break; > } > /* If this is a reference to constant size reduction var > @@ -5409,16 +5368,19 @@ lower_rec_input_clauses (tree clauses, g > if (x) > gimplify_and_add (x, ilist); > > - if (rvarp) > + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION > + && OMP_CLAUSE_REDUCTION_INSCAN (c)) > { > - if (x) > + if (x || (!is_simd > + && OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))) > { > tree nv = create_tmp_var_raw (TREE_TYPE (new_var)); > gimple_add_tmp_var (nv); > ctx->cb.decl_map->put (new_vard, nv); > x = lang_hooks.decls.omp_clause_default_ctor > (c, nv, build_outer_var_ref (var, ctx)); > - gimplify_and_add (x, ilist); > + if (x) > + gimplify_and_add (x, ilist); > if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c)) > { > tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); > @@ -5433,7 +5395,7 @@ lower_rec_input_clauses (tree clauses, g > gimple_seq_add_seq (ilist, tseq); > } > OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; > - if (!ctx->scan_inclusive) > + if (is_simd && ctx->scan_exclusive) > { > tree nv2 > = create_tmp_var_raw (TREE_TYPE (new_var)); > @@ -5444,23 +5406,14 @@ lower_rec_input_clauses (tree clauses, g > gimplify_and_add (x, ilist); > x = lang_hooks.decls.omp_clause_dtor (c, nv2); > if (x) > - { > - tseq = NULL; > - dtor = x; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (dlist, tseq); > - } > + gimplify_and_add (x, dlist); > } > x = lang_hooks.decls.omp_clause_dtor (c, nv); > if (x) > - { > - tseq = NULL; > - dtor = x; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (dlist, tseq); > - } > + gimplify_and_add (x, dlist); > } > - else if (!ctx->scan_inclusive > + else if (is_simd > + && ctx->scan_exclusive > && TREE_ADDRESSABLE (TREE_TYPE (new_var))) > { > tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var)); > @@ -5468,12 +5421,7 @@ lower_rec_input_clauses (tree clauses, g > ctx->cb.decl_map->put (new_vard, nv2); > x = lang_hooks.decls.omp_clause_dtor (c, nv2); > if (x) > - { > - tseq = NULL; > - dtor = x; > - gimplify_stmt (&dtor, &tseq); > - gimple_seq_add_seq (dlist, tseq); > - } > + gimplify_and_add (x, dlist); > } > DECL_HAS_VALUE_EXPR_P (placeholder) = 0; > goto do_dtor; > @@ -5611,7 +5559,8 @@ lower_rec_input_clauses (tree clauses, g > { > if (omp_is_reference (var) && is_simd) > handle_simd_reference (clause_loc, new_vard, ilist); > - if (rvarp) > + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION > + && OMP_CLAUSE_REDUCTION_INSCAN (c)) > break; > gimplify_assign (new_var, x, ilist); > if (is_simd) > @@ -5815,7 +5764,10 @@ lower_rec_input_clauses (tree clauses, g > lastprivate clauses we need to ensure the lastprivate copying > happens after firstprivate copying in all threads. And similarly > for UDRs if initializer expression refers to omp_orig. */ > - if (copyin_by_ref || lastprivate_firstprivate || reduction_omp_orig_ref) > + if (copyin_by_ref || lastprivate_firstprivate > + || (reduction_omp_orig_ref > + && !ctx->scan_inclusive > + && !ctx->scan_exclusive)) > { > /* Don't add any barrier for #pragma omp simd or > #pragma omp distribute. */ > @@ -6464,6 +6416,10 @@ lower_reduction_clauses (tree clauses, g > && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD) > return; > > + /* inscan reductions are handled elsewhere. */ > + if (ctx->scan_inclusive || ctx->scan_exclusive) > + return; > + > /* First see if there is exactly one reduction clause. Use OMP_ATOMIC > update in that case, otherwise use a lock. */ > for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c)) > @@ -8650,7 +8606,7 @@ lower_omp_scan (gimple_stmt_iterator *gs > gimple_seq before = NULL; > omp_context *octx = ctx->outer; > gcc_assert (octx); > - if (!octx->scan_inclusive && !has_clauses) > + if (octx->scan_exclusive && !has_clauses) > { > gimple_stmt_iterator gsi2 = *gsi_p; > gsi_next (&gsi2); > @@ -8672,23 +8628,29 @@ lower_omp_scan (gimple_stmt_iterator *gs > } > > bool input_phase = has_clauses ^ octx->scan_inclusive; > - if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR > - && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD) > - && !gimple_omp_for_combined_into_p (octx->stmt)) > - { > - if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt), > - OMP_CLAUSE__SIMDUID_)) > - { > - tree uid = OMP_CLAUSE__SIMDUID__DECL (c); > - lane = create_tmp_var (unsigned_type_node); > - tree t = build_int_cst (integer_type_node, > - input_phase ? 1 > - : octx->scan_inclusive ? 2 : 3); > - gimple *g > - = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t); > - gimple_call_set_lhs (g, lane); > - gimple_seq_add_stmt (&before, g); > - } > + bool is_simd = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR > + && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD) > + && !gimple_omp_for_combined_into_p (octx->stmt)); > + bool is_for = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR > + && gimple_omp_for_kind (octx->stmt) == GF_OMP_FOR_KIND_FOR > + && !gimple_omp_for_combined_p (octx->stmt)); > + if (is_simd) > + if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt), > + OMP_CLAUSE__SIMDUID_)) > + { > + tree uid = OMP_CLAUSE__SIMDUID__DECL (c); > + lane = create_tmp_var (unsigned_type_node); > + tree t = build_int_cst (integer_type_node, > + input_phase ? 1 > + : octx->scan_inclusive ? 2 : 3); > + gimple *g > + = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t); > + gimple_call_set_lhs (g, lane); > + gimple_seq_add_stmt (&before, g); > + } > + > + if (is_simd || is_for) > + { > for (tree c = gimple_omp_for_clauses (octx->stmt); > c; c = OMP_CLAUSE_CHAIN (c)) > if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION > @@ -8711,7 +8673,7 @@ lower_omp_scan (gimple_stmt_iterator *gs > if (DECL_HAS_VALUE_EXPR_P (new_vard)) > { > val = DECL_VALUE_EXPR (new_vard); > - if (omp_is_reference (var)) > + if (new_vard != new_var) > { > gcc_assert (TREE_CODE (val) == ADDR_EXPR); > val = TREE_OPERAND (val, 0); > @@ -8727,7 +8689,7 @@ lower_omp_scan (gimple_stmt_iterator *gs > lane0 = TREE_OPERAND (val, 1); > TREE_OPERAND (val, 1) = lane; > var2 = lookup_decl (v, octx); > - if (!octx->scan_inclusive) > + if (octx->scan_exclusive) > var4 = lookup_decl (var2, octx); > if (input_phase > && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) > @@ -8737,7 +8699,7 @@ lower_omp_scan (gimple_stmt_iterator *gs > var2 = build4 (ARRAY_REF, TREE_TYPE (val), > var2, lane, NULL_TREE, NULL_TREE); > TREE_THIS_NOTRAP (var2) = 1; > - if (!octx->scan_inclusive) > + if (octx->scan_exclusive) > { > var4 = build4 (ARRAY_REF, TREE_TYPE (val), > var4, lane, NULL_TREE, > @@ -8759,7 +8721,7 @@ lower_omp_scan (gimple_stmt_iterator *gs > var3 = maybe_lookup_decl (new_vard, octx); > if (var3 == new_vard || var3 == NULL_TREE) > var3 = NULL_TREE; > - else if (!octx->scan_inclusive && !input_phase) > + else if (is_simd && octx->scan_exclusive && !input_phase) > { > var4 = maybe_lookup_decl (var3, octx); > if (var4 == var3 || var4 == NULL_TREE) > @@ -8774,7 +8736,10 @@ lower_omp_scan (gimple_stmt_iterator *gs > } > } > } > - if (!octx->scan_inclusive && !input_phase && var4 == > NULL_TREE) > + if (is_simd > + && octx->scan_exclusive > + && !input_phase > + && var4 == NULL_TREE) > var4 = create_tmp_var (TREE_TYPE (val)); > } > if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) > @@ -8794,12 +8759,14 @@ lower_omp_scan (gimple_stmt_iterator *gs > { > /* Otherwise, assign to it the identity element. */ > gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT > (c); > + if (is_for) > + tseq = copy_gimple_seq_and_replace_locals (tseq); > tree ref = build_outer_var_ref (var, octx); > tree x = (DECL_HAS_VALUE_EXPR_P (new_vard) > ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); > if (x) > { > - if (omp_is_reference (var)) > + if (new_vard != new_var) > val = build_fold_addr_expr_loc (clause_loc, > val); > SET_DECL_VALUE_EXPR (new_vard, val); > } > @@ -8811,13 +8778,14 @@ lower_omp_scan (gimple_stmt_iterator *gs > SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); > DECL_HAS_VALUE_EXPR_P (placeholder) = 0; > gimple_seq_add_seq (&before, tseq); > - OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; > + if (is_simd) > + OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; > } > } > - else > + else if (is_simd) > { > tree x; > - if (!octx->scan_inclusive) > + if (octx->scan_exclusive) > { > tree v4 = unshare_expr (var4); > tree v2 = unshare_expr (var2); > @@ -8828,7 +8796,7 @@ lower_omp_scan (gimple_stmt_iterator *gs > x = (DECL_HAS_VALUE_EXPR_P (new_vard) > ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); > tree vexpr = val; > - if (x && omp_is_reference (var)) > + if (x && new_vard != new_var) > vexpr = build_fold_addr_expr_loc (clause_loc, val); > if (x) > SET_DECL_VALUE_EXPR (new_vard, vexpr); > @@ -8864,7 +8832,7 @@ lower_omp_scan (gimple_stmt_iterator *gs > tree x = omp_reduction_init (c, TREE_TYPE (new_var)); > gimplify_assign (val, x, &before); > } > - else > + else if (is_simd) > { > /* scan phase. */ > enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c); > @@ -8888,11 +8856,11 @@ lower_omp_scan (gimple_stmt_iterator *gs > } > } > } > - if (!octx->scan_inclusive && !input_phase && lane0) > + if (octx->scan_exclusive && !input_phase && lane0) > { > tree vexpr = unshare_expr (var4); > TREE_OPERAND (vexpr, 1) = lane0; > - if (omp_is_reference (var)) > + if (new_vard != new_var) > vexpr = build_fold_addr_expr_loc (clause_loc, vexpr); > SET_DECL_VALUE_EXPR (new_vard, vexpr); > } > @@ -8901,9 +8869,17 @@ lower_omp_scan (gimple_stmt_iterator *gs > else if (has_clauses) > sorry_at (gimple_location (stmt), > "%<#pragma omp scan%> not supported yet"); > - gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT); > - gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT); > - gsi_replace (gsi_p, gimple_build_nop (), true); > + if (!is_for) > + { > + gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT); > + gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT); > + gsi_replace (gsi_p, gimple_build_nop (), true); > + } > + else if (before) > + { > + gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (stmt)); > + gsi_insert_seq_before (&gsi, before, GSI_SAME_STMT); > + } > } > > > @@ -9124,6 +9100,712 @@ lower_omp_for_lastprivate (struct omp_fo > } > } > > +/* Callback for walk_gimple_seq. Find #pragma omp scan statement. */ > + > +tree > +omp_find_scan (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, > + struct walk_stmt_info *wi) > +{ > + gimple *stmt = gsi_stmt (*gsi_p); > + > + *handled_ops_p = true; > + switch (gimple_code (stmt)) > + { > + WALK_SUBSTMTS; > + > + case GIMPLE_OMP_SCAN: > + *(gimple_stmt_iterator *) (wi->info) = *gsi_p; > + return integer_zero_node; > + default: > + break; > + } > + return NULL; > +} > + > +/* Helper function for lower_omp_for, add transformations for a worksharing > + loop with scan directives inside of it. > + For worksharing loop not combined with simd, transform: > + #pragma omp for reduction(inscan,+:r) private(i) > + for (i = 0; i < n; i = i + 1) > + { > + { > + update (r); > + } > + #pragma omp scan inclusive(r) > + { > + use (r); > + } > + } > + > + into two worksharing loops + code to merge results: > + > + num_threads = omp_get_num_threads (); > + thread_num = omp_get_thread_num (); > + if (thread_num == 0) goto <D.2099>; else goto <D.2100>; > + <D.2099>: > + var2 = r; > + goto <D.2101>; > + <D.2100>: > + // For UDRs this is UDR init, or if ctors are needed, copy from > + // var3 that has been constructed to contain the neutral element. > + var2 = 0; > + <D.2101>: > + ivar = 0; > + // The _scantemp_ clauses will arrange for rpriva to be initialized to > + // a shared array with num_threads elements and rprivb to a local array > + // number of elements equal to the number of (contiguous) iterations the > + // current thread will perform. controlb and controlp variables are > + // temporaries to handle deallocation of rprivb at the end of second > + // GOMP_FOR. > + #pragma omp for _scantemp_(rpriva) _scantemp_(rprivb) > _scantemp_(controlb) \ > + _scantemp_(controlp) reduction(inscan,+:r) private(i) nowait > + for (i = 0; i < n; i = i + 1) > + { > + { > + // For UDRs this is UDR init or copy from var3. > + r = 0; > + // This is the input phase from user code. > + update (r); > + } > + { > + // For UDRs this is UDR merge. > + var2 = var2 + r; > + // Rather than handing it over to the user, save to local thread's > + // array. > + rprivb[ivar] = var2; > + // For exclusive scan, the above two statements are swapped. > + ivar = ivar + 1; > + } > + } > + // And remember the final value from this thread's into the shared > + // rpriva array. > + rpriva[(sizetype) thread_num] = var2; > + // If more than one thread, compute using Work-Efficient prefix sum > + // the inclusive parallel scan of the rpriva array. > + if (num_threads > 1) goto <D.2102>; else goto <D.2103>; > + <D.2102>: > + GOMP_barrier (); > + down = 0; > + k = 1; > + num_threadsu = (unsigned int) num_threads; > + thread_numup1 = (unsigned int) thread_num + 1; > + <D.2108>: > + twok = k << 1; > + if (twok > num_threadsu) goto <D.2110>; else goto <D.2111>; > + <D.2110>: > + down = 4294967295; > + k = k >> 1; > + if (k == num_threadsu) goto <D.2112>; else goto <D.2111>; > + <D.2112>: > + k = k >> 1; > + <D.2111>: > + twok = k << 1; > + cplx = .MUL_OVERFLOW (thread_nump1, twok); > + mul = REALPART_EXPR <cplx>; > + ovf = IMAGPART_EXPR <cplx>; > + if (ovf == 0) goto <D.2116>; else goto <D.2117>; > + <D.2116>: > + andv = k & down; > + andvm1 = andv + 4294967295; > + l = mul + andvm1; > + if (l < num_threadsu) goto <D.2120>; else goto <D.2117>; > + <D.2120>: > + // For UDRs this is UDR merge, performed using var2 variable as temporary, > + // i.e. var2 = rpriva[l - k]; UDR merge (var2, rpriva[l]); rpriva[l] = > var2; > + rpriva[l] = rpriva[l - k] + rpriva[l]; > + <D.2117>: > + if (down == 0) goto <D.2121>; else goto <D.2122>; > + <D.2121>: > + k = k << 1; > + goto <D.2123>; > + <D.2122>: > + k = k >> 1; > + <D.2123>: > + GOMP_barrier (); > + if (k != 0) goto <D.2108>; else goto <D.2103>; > + <D.2103>: > + if (thread_num == 0) goto <D.2124>; else goto <D.2125>; > + <D.2124>: > + // For UDRs this is UDR init or copy from var3. > + var2 = 0; > + goto <D.2126>; > + <D.2125>: > + var2 = rpriva[thread_num - 1]; > + <D.2126>: > + ivar = 0; > + #pragma omp for _scantemp_(controlb) _scantemp_(controlp) \ > + reduction(inscan,+:r) private(i) > + for (i = 0; i < n; i = i + 1) > + { > + { > + // For UDRs, this is UDR merge (rprivb[ivar], var2); r = > rprivb[ivar]; > + r = rprivb[ivar] + var2; > + } > + { > + // This is the scan phase from user code. > + use (r); > + // Plus a bump of the iterator. > + ivar = ivar + 1; > + } > + } */ > + > +static void > +lower_omp_for_scan (gimple_seq *body_p, gimple_seq *dlist, gomp_for *stmt, > + struct omp_for_data *fd, omp_context *ctx) > +{ > + gcc_assert (ctx->scan_inclusive || ctx->scan_exclusive); > + > + gimple_seq body = gimple_omp_body (stmt); > + gimple_stmt_iterator input1_gsi = gsi_none (); > + struct walk_stmt_info wi; > + memset (&wi, 0, sizeof (wi)); > + wi.val_only = true; > + wi.info = (void *) &input1_gsi; > + walk_gimple_seq_mod (&body, omp_find_scan, NULL, &wi); > + gcc_assert (!gsi_end_p (input1_gsi)); > + > + gimple *input_stmt1 = gsi_stmt (input1_gsi); > + gimple_stmt_iterator gsi = input1_gsi; > + gsi_next (&gsi); > + gimple_stmt_iterator scan1_gsi = gsi; > + gimple *scan_stmt1 = gsi_stmt (gsi); > + gcc_assert (scan_stmt1 && gimple_code (scan_stmt1) == GIMPLE_OMP_SCAN); > + > + gimple_seq input_body = gimple_omp_body (input_stmt1); > + gimple_seq scan_body = gimple_omp_body (scan_stmt1); > + gimple_omp_set_body (input_stmt1, NULL); > + gimple_omp_set_body (scan_stmt1, NULL); > + gimple_omp_set_body (stmt, NULL); > + > + gomp_for *new_stmt = as_a <gomp_for *> (gimple_copy (stmt)); > + gimple_seq new_body = copy_gimple_seq_and_replace_locals (body); > + gimple_omp_set_body (stmt, body); > + gimple_omp_set_body (input_stmt1, input_body); > + > + gimple_stmt_iterator input2_gsi = gsi_none (); > + memset (&wi, 0, sizeof (wi)); > + wi.val_only = true; > + wi.info = (void *) &input2_gsi; > + walk_gimple_seq_mod (&new_body, omp_find_scan, NULL, &wi); > + gcc_assert (!gsi_end_p (input2_gsi)); > + > + gimple *input_stmt2 = gsi_stmt (input2_gsi); > + gsi = input2_gsi; > + gsi_next (&gsi); > + gimple_stmt_iterator scan2_gsi = gsi; > + gimple *scan_stmt2 = gsi_stmt (gsi); > + gcc_assert (scan_stmt2 && gimple_code (scan_stmt2) == GIMPLE_OMP_SCAN); > + gimple_omp_set_body (scan_stmt2, scan_body); > + > + tree num_threads = create_tmp_var (integer_type_node); > + tree thread_num = create_tmp_var (integer_type_node); > + tree nthreads_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); > + tree threadnum_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); > + gimple *g = gimple_build_call (nthreads_decl, 0); > + gimple_call_set_lhs (g, num_threads); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_call (threadnum_decl, 0); > + gimple_call_set_lhs (g, thread_num); > + gimple_seq_add_stmt (body_p, g); > + > + tree ivar = create_tmp_var (sizetype); > + tree new_clauses1 = NULL_TREE, new_clauses2 = NULL_TREE; > + tree *cp1 = &new_clauses1, *cp2 = &new_clauses2; > + tree k = create_tmp_var (unsigned_type_node); > + tree l = create_tmp_var (unsigned_type_node); > + > + gimple_seq clist = NULL, mdlist = NULL; > + gimple_seq thr01_list = NULL, thrn1_list = NULL; > + gimple_seq thr02_list = NULL, thrn2_list = NULL; > + gimple_seq scan1_list = NULL, input2_list = NULL; > + gimple_seq last_list = NULL, reduc_list = NULL; > + for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c)) > + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION > + && OMP_CLAUSE_REDUCTION_INSCAN (c)) > + { > + location_t clause_loc = OMP_CLAUSE_LOCATION (c); > + tree var = OMP_CLAUSE_DECL (c); > + tree new_var = lookup_decl (var, ctx); > + tree var3 = NULL_TREE; > + tree new_vard = new_var; > + if (omp_is_reference (var)) > + new_var = build_simple_mem_ref_loc (clause_loc, new_var); > + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) > + { > + var3 = maybe_lookup_decl (new_vard, ctx); > + if (var3 == new_vard) > + var3 = NULL_TREE; > + } > + > + tree ptype = build_pointer_type (TREE_TYPE (new_var)); > + tree rpriva = create_tmp_var (ptype); > + tree nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_); > + OMP_CLAUSE_DECL (nc) = rpriva; > + *cp1 = nc; > + cp1 = &OMP_CLAUSE_CHAIN (nc); > + > + tree rprivb = create_tmp_var (ptype); > + nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_); > + OMP_CLAUSE_DECL (nc) = rprivb; > + OMP_CLAUSE__SCANTEMP__ALLOC (nc) = 1; > + *cp1 = nc; > + cp1 = &OMP_CLAUSE_CHAIN (nc); > + > + tree var2 = create_tmp_var_raw (TREE_TYPE (new_var)); > + if (new_vard != new_var) > + TREE_ADDRESSABLE (var2) = 1; > + gimple_add_tmp_var (var2); > + > + tree x = fold_convert_loc (clause_loc, sizetype, thread_num); > + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x, > + TYPE_SIZE_UNIT (TREE_TYPE (ptype))); > + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x); > + tree rpriva_ref = build_simple_mem_ref_loc (clause_loc, x); > + > + x = fold_build2_loc (clause_loc, PLUS_EXPR, integer_type_node, > + thread_num, integer_minus_one_node); > + x = fold_convert_loc (clause_loc, sizetype, x); > + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x, > + TYPE_SIZE_UNIT (TREE_TYPE (ptype))); > + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x); > + tree rprivam1_ref = build_simple_mem_ref_loc (clause_loc, x); > + > + x = fold_convert_loc (clause_loc, sizetype, l); > + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x, > + TYPE_SIZE_UNIT (TREE_TYPE (ptype))); > + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x); > + tree rprival_ref = build_simple_mem_ref_loc (clause_loc, x); > + > + x = fold_build2_loc (clause_loc, MINUS_EXPR, unsigned_type_node, l, > k); > + x = fold_convert_loc (clause_loc, sizetype, x); > + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x, > + TYPE_SIZE_UNIT (TREE_TYPE (ptype))); > + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x); > + tree rprivalmk_ref = build_simple_mem_ref_loc (clause_loc, x); > + > + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, ivar, > + TYPE_SIZE_UNIT (TREE_TYPE (ptype))); > + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rprivb), rprivb, x); > + tree rprivb_ref = build_simple_mem_ref_loc (clause_loc, x); > + > + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) > + { > + tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); > + tree val = var2; > + if (new_vard != new_var) > + val = build_fold_addr_expr_loc (clause_loc, val); > + > + x = lang_hooks.decls.omp_clause_default_ctor > + (c, var2, build_outer_var_ref (var, ctx)); > + if (x) > + gimplify_and_add (x, &clist); > + > + x = build_outer_var_ref (var, ctx); > + x = lang_hooks.decls.omp_clause_assign_op (c, var2, x); > + gimplify_and_add (x, &thr01_list); > + > + tree y = (DECL_HAS_VALUE_EXPR_P (new_vard) > + ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); > + if (var3) > + { > + x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3); > + gimplify_and_add (x, &thrn1_list); > + x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3); > + gimplify_and_add (x, &thr02_list); > + } > + else if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c)) > + { > + /* Otherwise, assign to it the identity element. */ > + gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); > + tseq = copy_gimple_seq_and_replace_locals (tseq); > + SET_DECL_VALUE_EXPR (new_vard, val); > + DECL_HAS_VALUE_EXPR_P (new_vard) = 1; > + SET_DECL_VALUE_EXPR (placeholder, error_mark_node); > + DECL_HAS_VALUE_EXPR_P (placeholder) = 1; > + lower_omp (&tseq, ctx); > + gimple_seq_add_seq (&thrn1_list, tseq); > + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); > + lower_omp (&tseq, ctx); > + gimple_seq_add_seq (&thr02_list, tseq); > + SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); > + DECL_HAS_VALUE_EXPR_P (placeholder) = 0; > + OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; > + if (y) > + SET_DECL_VALUE_EXPR (new_vard, y); > + else > + { > + DECL_HAS_VALUE_EXPR_P (new_vard) = 0; > + SET_DECL_VALUE_EXPR (new_vard, NULL_TREE); > + } > + } > + > + x = lang_hooks.decls.omp_clause_assign_op (c, var2, rprivam1_ref); > + gimplify_and_add (x, &thrn2_list); > + > + if (ctx->scan_exclusive) > + { > + x = unshare_expr (rprivb_ref); > + x = lang_hooks.decls.omp_clause_assign_op (c, x, var2); > + gimplify_and_add (x, &scan1_list); > + } > + > + gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); > + tseq = copy_gimple_seq_and_replace_locals (tseq); > + SET_DECL_VALUE_EXPR (placeholder, var2); > + DECL_HAS_VALUE_EXPR_P (placeholder) = 1; > + lower_omp (&tseq, ctx); > + gimple_seq_add_seq (&scan1_list, tseq); > + > + if (ctx->scan_inclusive) > + { > + x = unshare_expr (rprivb_ref); > + x = lang_hooks.decls.omp_clause_assign_op (c, x, var2); > + gimplify_and_add (x, &scan1_list); > + } > + > + x = unshare_expr (rpriva_ref); > + x = lang_hooks.decls.omp_clause_assign_op (c, x, var2); > + gimplify_and_add (x, &mdlist); > + > + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); > + tseq = copy_gimple_seq_and_replace_locals (tseq); > + SET_DECL_VALUE_EXPR (new_vard, val); > + DECL_HAS_VALUE_EXPR_P (new_vard) = 1; > + SET_DECL_VALUE_EXPR (placeholder, rprivb_ref); > + lower_omp (&tseq, ctx); > + if (y) > + SET_DECL_VALUE_EXPR (new_vard, y); > + else > + { > + DECL_HAS_VALUE_EXPR_P (new_vard) = 0; > + SET_DECL_VALUE_EXPR (new_vard, NULL_TREE); > + } > + gimple_seq_add_seq (&input2_list, tseq); > + > + x = unshare_expr (new_var); > + x = lang_hooks.decls.omp_clause_assign_op (c, x, rprivb_ref); > + gimplify_and_add (x, &input2_list); > + > + x = build_outer_var_ref (var, ctx); > + x = lang_hooks.decls.omp_clause_assign_op (c, x, rpriva_ref); > + gimplify_and_add (x, &last_list); > + > + x = lang_hooks.decls.omp_clause_assign_op (c, var2, > rprivalmk_ref); > + gimplify_and_add (x, &reduc_list); > + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); > + tseq = copy_gimple_seq_and_replace_locals (tseq); > + val = rprival_ref; > + if (new_vard != new_var) > + val = build_fold_addr_expr_loc (clause_loc, val); > + SET_DECL_VALUE_EXPR (new_vard, val); > + DECL_HAS_VALUE_EXPR_P (new_vard) = 1; > + SET_DECL_VALUE_EXPR (placeholder, var2); > + lower_omp (&tseq, ctx); > + OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL; > + SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); > + DECL_HAS_VALUE_EXPR_P (placeholder) = 0; > + if (y) > + SET_DECL_VALUE_EXPR (new_vard, y); > + else > + { > + DECL_HAS_VALUE_EXPR_P (new_vard) = 0; > + SET_DECL_VALUE_EXPR (new_vard, NULL_TREE); > + } > + gimple_seq_add_seq (&reduc_list, tseq); > + x = lang_hooks.decls.omp_clause_assign_op (c, rprival_ref, var2); > + gimplify_and_add (x, &reduc_list); > + > + x = lang_hooks.decls.omp_clause_dtor (c, var2); > + if (x) > + gimplify_and_add (x, dlist); > + } > + else > + { > + x = build_outer_var_ref (var, ctx); > + gimplify_assign (var2, x, &thr01_list); > + > + x = omp_reduction_init (c, TREE_TYPE (new_var)); > + gimplify_assign (var2, unshare_expr (x), &thrn1_list); > + gimplify_assign (var2, x, &thr02_list); > + > + gimplify_assign (var2, rprivam1_ref, &thrn2_list); > + > + enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c); > + if (code == MINUS_EXPR) > + code = PLUS_EXPR; > + > + if (ctx->scan_exclusive) > + gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list); > + x = build2 (code, TREE_TYPE (new_var), var2, new_var); > + gimplify_assign (var2, x, &scan1_list); > + if (ctx->scan_inclusive) > + gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list); > + > + gimplify_assign (unshare_expr (rpriva_ref), var2, &mdlist); > + > + x = build2 (code, TREE_TYPE (new_var), rprivb_ref, var2); > + gimplify_assign (new_var, x, &input2_list); > + > + gimplify_assign (build_outer_var_ref (var, ctx), rpriva_ref, > + &last_list); > + > + x = build2 (code, TREE_TYPE (new_var), rprivalmk_ref, > + unshare_expr (rprival_ref)); > + gimplify_assign (rprival_ref, x, &reduc_list); > + } > + } > + > + g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node); > + gimple_seq_add_stmt (&scan1_list, g); > + g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node); > + gimple_seq_add_stmt (gimple_omp_body_ptr (scan_stmt2), g); > + > + tree controlb = create_tmp_var (boolean_type_node); > + tree controlp = create_tmp_var (ptr_type_node); > + tree nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_); > + OMP_CLAUSE_DECL (nc) = controlb; > + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1; > + *cp1 = nc; > + cp1 = &OMP_CLAUSE_CHAIN (nc); > + nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_); > + OMP_CLAUSE_DECL (nc) = controlp; > + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1; > + *cp1 = nc; > + cp1 = &OMP_CLAUSE_CHAIN (nc); > + nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_); > + OMP_CLAUSE_DECL (nc) = controlb; > + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1; > + *cp2 = nc; > + cp2 = &OMP_CLAUSE_CHAIN (nc); > + nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_); > + OMP_CLAUSE_DECL (nc) = controlp; > + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1; > + *cp2 = nc; > + cp2 = &OMP_CLAUSE_CHAIN (nc); > + > + *cp1 = gimple_omp_for_clauses (stmt); > + gimple_omp_for_set_clauses (stmt, new_clauses1); > + *cp2 = gimple_omp_for_clauses (new_stmt); > + gimple_omp_for_set_clauses (new_stmt, new_clauses2); > + > + gimple_omp_set_body (scan_stmt1, scan1_list); > + gimple_omp_set_body (input_stmt2, input2_list); > + > + gsi_insert_seq_after (&input1_gsi, gimple_omp_body (input_stmt1), > + GSI_SAME_STMT); > + gsi_remove (&input1_gsi, true); > + gsi_insert_seq_after (&scan1_gsi, gimple_omp_body (scan_stmt1), > + GSI_SAME_STMT); > + gsi_remove (&scan1_gsi, true); > + gsi_insert_seq_after (&input2_gsi, gimple_omp_body (input_stmt2), > + GSI_SAME_STMT); > + gsi_remove (&input2_gsi, true); > + gsi_insert_seq_after (&scan2_gsi, gimple_omp_body (scan_stmt2), > + GSI_SAME_STMT); > + gsi_remove (&scan2_gsi, true); > + > + gimple_seq_add_seq (body_p, clist); > + > + tree lab1 = create_artificial_label (UNKNOWN_LOCATION); > + tree lab2 = create_artificial_label (UNKNOWN_LOCATION); > + tree lab3 = create_artificial_label (UNKNOWN_LOCATION); > + g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab1); > + gimple_seq_add_stmt (body_p, g); > + gimple_seq_add_seq (body_p, thr01_list); > + g = gimple_build_goto (lab3); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab2); > + gimple_seq_add_stmt (body_p, g); > + gimple_seq_add_seq (body_p, thrn1_list); > + g = gimple_build_label (lab3); > + gimple_seq_add_stmt (body_p, g); > + > + g = gimple_build_assign (ivar, size_zero_node); > + gimple_seq_add_stmt (body_p, g); > + > + gimple_seq_add_stmt (body_p, stmt); > + gimple_seq_add_seq (body_p, body); > + gimple_seq_add_stmt (body_p, gimple_build_omp_continue (fd->loop.v, > + fd->loop.v)); > + > + g = gimple_build_omp_return (true); > + gimple_seq_add_stmt (body_p, g); > + gimple_seq_add_seq (body_p, mdlist); > + > + lab1 = create_artificial_label (UNKNOWN_LOCATION); > + lab2 = create_artificial_label (UNKNOWN_LOCATION); > + g = gimple_build_cond (GT_EXPR, num_threads, integer_one_node, lab1, lab2); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab1); > + gimple_seq_add_stmt (body_p, g); > + > + g = omp_build_barrier (NULL); > + gimple_seq_add_stmt (body_p, g); > + > + tree down = create_tmp_var (unsigned_type_node); > + g = gimple_build_assign (down, build_zero_cst (unsigned_type_node)); > + gimple_seq_add_stmt (body_p, g); > + > + g = gimple_build_assign (k, build_one_cst (unsigned_type_node)); > + gimple_seq_add_stmt (body_p, g); > + > + tree num_threadsu = create_tmp_var (unsigned_type_node); > + g = gimple_build_assign (num_threadsu, NOP_EXPR, num_threads); > + gimple_seq_add_stmt (body_p, g); > + > + tree thread_numu = create_tmp_var (unsigned_type_node); > + g = gimple_build_assign (thread_numu, NOP_EXPR, thread_num); > + gimple_seq_add_stmt (body_p, g); > + > + tree thread_nump1 = create_tmp_var (unsigned_type_node); > + g = gimple_build_assign (thread_nump1, PLUS_EXPR, thread_numu, > + build_int_cst (unsigned_type_node, 1)); > + gimple_seq_add_stmt (body_p, g); > + > + lab3 = create_artificial_label (UNKNOWN_LOCATION); > + g = gimple_build_label (lab3); > + gimple_seq_add_stmt (body_p, g); > + > + tree twok = create_tmp_var (unsigned_type_node); > + g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node); > + gimple_seq_add_stmt (body_p, g); > + > + tree lab4 = create_artificial_label (UNKNOWN_LOCATION); > + tree lab5 = create_artificial_label (UNKNOWN_LOCATION); > + tree lab6 = create_artificial_label (UNKNOWN_LOCATION); > + g = gimple_build_cond (GT_EXPR, twok, num_threadsu, lab4, lab5); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab4); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_assign (down, build_all_ones_cst (unsigned_type_node)); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node); > + gimple_seq_add_stmt (body_p, g); > + > + g = gimple_build_cond (EQ_EXPR, k, num_threadsu, lab6, lab5); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab6); > + gimple_seq_add_stmt (body_p, g); > + > + g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node); > + gimple_seq_add_stmt (body_p, g); > + > + g = gimple_build_label (lab5); > + gimple_seq_add_stmt (body_p, g); > + > + g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node); > + gimple_seq_add_stmt (body_p, g); > + > + tree cplx = create_tmp_var (build_complex_type (unsigned_type_node, > false)); > + g = gimple_build_call_internal (IFN_MUL_OVERFLOW, 2, thread_nump1, twok); > + gimple_call_set_lhs (g, cplx); > + gimple_seq_add_stmt (body_p, g); > + tree mul = create_tmp_var (unsigned_type_node); > + g = gimple_build_assign (mul, REALPART_EXPR, > + build1 (REALPART_EXPR, unsigned_type_node, cplx)); > + gimple_seq_add_stmt (body_p, g); > + tree ovf = create_tmp_var (unsigned_type_node); > + g = gimple_build_assign (ovf, IMAGPART_EXPR, > + build1 (IMAGPART_EXPR, unsigned_type_node, cplx)); > + gimple_seq_add_stmt (body_p, g); > + > + tree lab7 = create_artificial_label (UNKNOWN_LOCATION); > + tree lab8 = create_artificial_label (UNKNOWN_LOCATION); > + g = gimple_build_cond (EQ_EXPR, ovf, build_zero_cst (unsigned_type_node), > + lab7, lab8); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab7); > + gimple_seq_add_stmt (body_p, g); > + > + tree andv = create_tmp_var (unsigned_type_node); > + g = gimple_build_assign (andv, BIT_AND_EXPR, k, down); > + gimple_seq_add_stmt (body_p, g); > + tree andvm1 = create_tmp_var (unsigned_type_node); > + g = gimple_build_assign (andvm1, PLUS_EXPR, andv, > + build_minus_one_cst (unsigned_type_node)); > + gimple_seq_add_stmt (body_p, g); > + > + g = gimple_build_assign (l, PLUS_EXPR, mul, andvm1); > + gimple_seq_add_stmt (body_p, g); > + > + tree lab9 = create_artificial_label (UNKNOWN_LOCATION); > + g = gimple_build_cond (LT_EXPR, l, num_threadsu, lab9, lab8); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab9); > + gimple_seq_add_stmt (body_p, g); > + gimple_seq_add_seq (body_p, reduc_list); > + g = gimple_build_label (lab8); > + gimple_seq_add_stmt (body_p, g); > + > + tree lab10 = create_artificial_label (UNKNOWN_LOCATION); > + tree lab11 = create_artificial_label (UNKNOWN_LOCATION); > + tree lab12 = create_artificial_label (UNKNOWN_LOCATION); > + g = gimple_build_cond (EQ_EXPR, down, build_zero_cst (unsigned_type_node), > + lab10, lab11); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab10); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_assign (k, LSHIFT_EXPR, k, integer_one_node); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_goto (lab12); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab11); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab12); > + gimple_seq_add_stmt (body_p, g); > + > + g = omp_build_barrier (NULL); > + gimple_seq_add_stmt (body_p, g); > + > + g = gimple_build_cond (NE_EXPR, k, build_zero_cst (unsigned_type_node), > + lab3, lab2); > + gimple_seq_add_stmt (body_p, g); > + > + g = gimple_build_label (lab2); > + gimple_seq_add_stmt (body_p, g); > + > + lab1 = create_artificial_label (UNKNOWN_LOCATION); > + lab2 = create_artificial_label (UNKNOWN_LOCATION); > + lab3 = create_artificial_label (UNKNOWN_LOCATION); > + g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab1); > + gimple_seq_add_stmt (body_p, g); > + gimple_seq_add_seq (body_p, thr02_list); > + g = gimple_build_goto (lab3); > + gimple_seq_add_stmt (body_p, g); > + g = gimple_build_label (lab2); > + gimple_seq_add_stmt (body_p, g); > + gimple_seq_add_seq (body_p, thrn2_list); > + g = gimple_build_label (lab3); > + gimple_seq_add_stmt (body_p, g); > + > + g = gimple_build_assign (ivar, size_zero_node); > + gimple_seq_add_stmt (body_p, g); > + gimple_seq_add_stmt (body_p, new_stmt); > + gimple_seq_add_seq (body_p, new_body); > + > + gimple_seq new_dlist = NULL; > + lab1 = create_artificial_label (UNKNOWN_LOCATION); > + lab2 = create_artificial_label (UNKNOWN_LOCATION); > + tree num_threadsm1 = create_tmp_var (integer_type_node); > + g = gimple_build_assign (num_threadsm1, PLUS_EXPR, num_threads, > + integer_minus_one_node); > + gimple_seq_add_stmt (&new_dlist, g); > + g = gimple_build_cond (EQ_EXPR, thread_num, num_threadsm1, lab1, lab2); > + gimple_seq_add_stmt (&new_dlist, g); > + g = gimple_build_label (lab1); > + gimple_seq_add_stmt (&new_dlist, g); > + gimple_seq_add_seq (&new_dlist, last_list); > + g = gimple_build_label (lab2); > + gimple_seq_add_stmt (&new_dlist, g); > + gimple_seq_add_seq (&new_dlist, *dlist); > + *dlist = new_dlist; > +} > > /* Lower code for an OMP loop directive. */ > > @@ -9317,9 +9999,18 @@ lower_omp_for (gimple_stmt_iterator *gsi > > bool phony_loop = (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP > && gimple_omp_for_grid_phony (stmt)); > - if (!phony_loop) > - gimple_seq_add_stmt (&body, stmt); > - gimple_seq_add_seq (&body, gimple_omp_body (stmt)); > + if ((ctx->scan_inclusive || ctx->scan_exclusive) > + && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR) > + { > + gcc_assert (!phony_loop); > + lower_omp_for_scan (&body, &dlist, stmt, &fd, ctx); > + } > + else > + { > + if (!phony_loop) > + gimple_seq_add_stmt (&body, stmt); > + gimple_seq_add_seq (&body, gimple_omp_body (stmt)); > + } > > if (!phony_loop) > gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v, > --- gcc/omp-expand.c.jj 2019-07-02 12:58:01.459063560 +0200 > +++ gcc/omp-expand.c 2019-07-02 13:08:01.388672109 +0200 > @@ -3502,6 +3502,98 @@ expand_omp_for_generic (struct omp_regio > } > } > > +/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL, > + compute needed allocation size. If !ALLOC of team allocations, > + if ALLOC of thread allocation. SZ is the initial needed size for > + other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes, > + CNT number of elements of each array, for !ALLOC this is > + omp_get_num_threads (), for ALLOC number of iterations handled by the > + current thread. If PTR is non-NULL, it is the start of the allocation > + and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_ > + clauses pointers to the corresponding arrays. */ > + > +static tree > +expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz, > + unsigned HOST_WIDE_INT alloc_align, tree cnt, > + gimple_stmt_iterator *gsi, bool alloc) > +{ > + tree eltsz = NULL_TREE; > + unsigned HOST_WIDE_INT preval = 0; > + if (ptr && sz) > + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), > + ptr, size_int (sz)); > + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) > + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ > + && !OMP_CLAUSE__SCANTEMP__CONTROL (c) > + && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc) > + { > + tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); > + unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type); > + if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) > + { > + unsigned HOST_WIDE_INT szl > + = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type)); > + szl = least_bit_hwi (szl); > + if (szl) > + al = MIN (al, szl); > + } > + if (ptr == NULL_TREE) > + { > + if (eltsz == NULL_TREE) > + eltsz = TYPE_SIZE_UNIT (pointee_type); > + else > + eltsz = size_binop (PLUS_EXPR, eltsz, > + TYPE_SIZE_UNIT (pointee_type)); > + } > + if (preval == 0 && al <= alloc_align) > + { > + unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz; > + sz += diff; > + if (diff && ptr) > + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), > + ptr, size_int (diff)); > + } > + else if (al > preval) > + { > + if (ptr) > + { > + ptr = fold_convert (pointer_sized_int_node, ptr); > + ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr, > + build_int_cst (pointer_sized_int_node, > + al - 1)); > + ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr, > + build_int_cst (pointer_sized_int_node, > + -(HOST_WIDE_INT) al)); > + ptr = fold_convert (ptr_type_node, ptr); > + } > + else > + sz += al - 1; > + } > + if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) > + preval = al; > + else > + preval = 1; > + if (ptr) > + { > + expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false); > + ptr = OMP_CLAUSE_DECL (c); > + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, > + size_binop (MULT_EXPR, cnt, > + TYPE_SIZE_UNIT (pointee_type))); > + } > + } > + > + if (ptr == NULL_TREE) > + { > + eltsz = size_binop (MULT_EXPR, eltsz, cnt); > + if (sz) > + eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz)); > + return eltsz; > + } > + else > + return ptr; > +} > + > /* A subroutine of expand_omp_for. Generate code for a parallel > loop with static schedule and no specified chunk size. Given > parameters: > @@ -3544,11 +3636,12 @@ expand_omp_for_static_nochunk (struct om > struct omp_for_data *fd, > gimple *inner_stmt) > { > - tree n, q, s0, e0, e, t, tt, nthreads, threadid; > + tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid; > tree type, itype, vmain, vback; > basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; > basic_block body_bb, cont_bb, collapse_bb = NULL; > - basic_block fin_bb; > + basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL; > + basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL; > gimple_stmt_iterator gsi, gsip; > edge ep; > bool broken_loop = region->cont == NULL; > @@ -3650,7 +3743,9 @@ expand_omp_for_static_nochunk (struct om > c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); > cond_var = OMP_CLAUSE_DECL (c); > } > - if (fd->have_reductemp || fd->have_pointer_condtemp) > + if (fd->have_reductemp > + || fd->have_pointer_condtemp > + || fd->have_nonctrl_scantemp) > { > tree t1 = build_int_cst (long_integer_type_node, 0); > tree t2 = build_int_cst (long_integer_type_node, 1); > @@ -3660,8 +3755,11 @@ expand_omp_for_static_nochunk (struct om > gimple_stmt_iterator gsi2 = gsi_none (); > gimple *g = NULL; > tree mem = null_pointer_node, memv = NULL_TREE; > + unsigned HOST_WIDE_INT condtemp_sz = 0; > + unsigned HOST_WIDE_INT alloc_align = 0; > if (fd->have_reductemp) > { > + gcc_assert (!fd->have_nonctrl_scantemp); > tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); > reductions = OMP_CLAUSE_DECL (c); > gcc_assert (TREE_CODE (reductions) == SSA_NAME); > @@ -3678,16 +3776,40 @@ expand_omp_for_static_nochunk (struct om > gsi2 = gsip; > reductions = null_pointer_node; > } > - if (fd->have_pointer_condtemp) > + if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp) > { > - tree type = TREE_TYPE (condtemp); > + tree type; > + if (fd->have_pointer_condtemp) > + type = TREE_TYPE (condtemp); > + else > + type = ptr_type_node; > memv = create_tmp_var (type); > TREE_ADDRESSABLE (memv) = 1; > - unsigned HOST_WIDE_INT sz > - = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); > - sz *= fd->lastprivate_conditional; > - expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz), > - false); > + unsigned HOST_WIDE_INT sz = 0; > + tree size = NULL_TREE; > + if (fd->have_pointer_condtemp) > + { > + sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); > + sz *= fd->lastprivate_conditional; > + condtemp_sz = sz; > + } > + if (fd->have_nonctrl_scantemp) > + { > + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); > + gimple *g = gimple_build_call (nthreads, 0); > + nthreads = create_tmp_var (integer_type_node); > + gimple_call_set_lhs (g, nthreads); > + gsi_insert_before (&gsi2, g, GSI_SAME_STMT); > + nthreads = fold_convert (sizetype, nthreads); > + alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node); > + size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz, > + alloc_align, nthreads, NULL, > + false); > + size = fold_convert (type, size); > + } > + else > + size = build_int_cst (type, sz); > + expand_omp_build_assign (&gsi2, memv, size, false); > mem = build_fold_addr_expr (memv); > } > tree t > @@ -3698,6 +3820,12 @@ expand_omp_for_static_nochunk (struct om > true, GSI_SAME_STMT); > if (fd->have_pointer_condtemp) > expand_omp_build_assign (&gsi2, condtemp, memv, false); > + if (fd->have_nonctrl_scantemp) > + { > + tree ptr = fd->have_pointer_condtemp ? condtemp : memv; > + expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz, > + alloc_align, nthreads, &gsi2, false); > + } > if (fd->have_reductemp) > { > gsi_remove (&gsi2, true); > @@ -3788,6 +3916,72 @@ expand_omp_for_static_nochunk (struct om > gsi = gsi_last_nondebug_bb (third_bb); > gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); > > + if (fd->have_nonctrl_scantemp) > + { > + tree clauses = gimple_omp_for_clauses (fd->for_stmt); > + tree controlp = NULL_TREE, controlb = NULL_TREE; > + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) > + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ > + && OMP_CLAUSE__SCANTEMP__CONTROL (c)) > + { > + if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) > + controlb = OMP_CLAUSE_DECL (c); > + else > + controlp = OMP_CLAUSE_DECL (c); > + if (controlb && controlp) > + break; > + } > + gcc_assert (controlp && controlb); > + tree cnt = create_tmp_var (sizetype); > + gimple *g = gimple_build_assign (cnt, NOP_EXPR, q); > + gsi_insert_before (&gsi, g, GSI_SAME_STMT); > + unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node); > + tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0, > + alloc_align, cnt, NULL, true); > + tree size = create_tmp_var (sizetype); > + expand_omp_build_assign (&gsi, size, sz, false); > + tree cmp = fold_build2 (GT_EXPR, boolean_type_node, > + size, size_int (16384)); > + expand_omp_build_assign (&gsi, controlb, cmp); > + g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, > + NULL_TREE, NULL_TREE); > + gsi_insert_before (&gsi, g, GSI_SAME_STMT); > + fourth_bb = split_block (third_bb, g)->dest; > + gsi = gsi_last_nondebug_bb (fourth_bb); > + /* FIXME: Once we have allocators, this should use allocator. */ > + g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, > size); > + gimple_call_set_lhs (g, controlp); > + gsi_insert_before (&gsi, g, GSI_SAME_STMT); > + expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt, > + &gsi, true); > + gsi_prev (&gsi); > + g = gsi_stmt (gsi); > + fifth_bb = split_block (fourth_bb, g)->dest; > + gsi = gsi_last_nondebug_bb (fifth_bb); > + > + g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0); > + gimple_call_set_lhs (g, controlp); > + gsi_insert_before (&gsi, g, GSI_SAME_STMT); > + tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN); > + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) > + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ > + && OMP_CLAUSE__SCANTEMP__ALLOC (c)) > + { > + tree tmp = create_tmp_var (sizetype); > + tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); > + g = gimple_build_assign (tmp, MULT_EXPR, cnt, > + TYPE_SIZE_UNIT (pointee_type)); > + gsi_insert_before (&gsi, g, GSI_SAME_STMT); > + g = gimple_build_call (alloca_decl, 2, tmp, > + size_int (TYPE_ALIGN (pointee_type))); > + gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c)); > + gsi_insert_before (&gsi, g, GSI_SAME_STMT); > + } > + > + sixth_bb = split_block (fifth_bb, g)->dest; > + gsi = gsi_last_nondebug_bb (sixth_bb); > + } > + > t = build2 (MULT_EXPR, itype, q, threadid); > t = build2 (PLUS_EXPR, itype, t, tt); > s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, > GSI_SAME_STMT); > @@ -4018,7 +4212,9 @@ expand_omp_for_static_nochunk (struct om > if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) > { > t = gimple_omp_return_lhs (gsi_stmt (gsi)); > - if (fd->have_reductemp || fd->have_pointer_condtemp) > + if (fd->have_reductemp > + || ((fd->have_pointer_condtemp || fd->have_scantemp) > + && !fd->have_nonctrl_scantemp)) > { > tree fn; > if (t) > @@ -4045,6 +4241,38 @@ expand_omp_for_static_nochunk (struct om > gcall *g = gimple_build_call (fn, 0); > gsi_insert_after (&gsi, g, GSI_SAME_STMT); > } > + if (fd->have_scantemp && !fd->have_nonctrl_scantemp) > + { > + tree clauses = gimple_omp_for_clauses (fd->for_stmt); > + tree controlp = NULL_TREE, controlb = NULL_TREE; > + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) > + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ > + && OMP_CLAUSE__SCANTEMP__CONTROL (c)) > + { > + if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) > + controlb = OMP_CLAUSE_DECL (c); > + else > + controlp = OMP_CLAUSE_DECL (c); > + if (controlb && controlp) > + break; > + } > + gcc_assert (controlp && controlb); > + gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, > + NULL_TREE, NULL_TREE); > + gsi_insert_before (&gsi, g, GSI_SAME_STMT); > + exit1_bb = split_block (exit_bb, g)->dest; > + gsi = gsi_after_labels (exit1_bb); > + g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1, > + controlp); > + gsi_insert_before (&gsi, g, GSI_SAME_STMT); > + exit2_bb = split_block (exit1_bb, g)->dest; > + gsi = gsi_after_labels (exit2_bb); > + g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), > 1, > + controlp); > + gsi_insert_before (&gsi, g, GSI_SAME_STMT); > + exit3_bb = split_block (exit2_bb, g)->dest; > + gsi = gsi_after_labels (exit3_bb); > + } > gsi_remove (&gsi, true); > > /* Connect all the blocks. */ > @@ -4053,8 +4281,34 @@ expand_omp_for_static_nochunk (struct om > ep = find_edge (entry_bb, second_bb); > ep->flags = EDGE_TRUE_VALUE; > ep->probability = profile_probability::guessed_always ().apply_scale (1, > 4); > - find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; > - find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; > + if (fourth_bb) > + { > + ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE); > + ep->probability > + = profile_probability::guessed_always ().apply_scale (1, 2); > + ep = find_edge (third_bb, fourth_bb); > + ep->flags = EDGE_TRUE_VALUE; > + ep->probability > + = profile_probability::guessed_always ().apply_scale (1, 2); > + ep = find_edge (fourth_bb, fifth_bb); > + redirect_edge_and_branch (ep, sixth_bb); > + } > + else > + sixth_bb = third_bb; > + find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; > + find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE; > + if (exit1_bb) > + { > + ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE); > + ep->probability > + = profile_probability::guessed_always ().apply_scale (1, 2); > + ep = find_edge (exit_bb, exit1_bb); > + ep->flags = EDGE_TRUE_VALUE; > + ep->probability > + = profile_probability::guessed_always ().apply_scale (1, 2); > + ep = find_edge (exit1_bb, exit2_bb); > + redirect_edge_and_branch (ep, exit3_bb); > + } > > if (!broken_loop) > { > @@ -4082,12 +4336,22 @@ expand_omp_for_static_nochunk (struct om > > set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); > set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); > - set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); > + if (fourth_bb) > + { > + set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb); > + set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb); > + } > + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb); > > set_immediate_dominator (CDI_DOMINATORS, body_bb, > recompute_dominator (CDI_DOMINATORS, body_bb)); > set_immediate_dominator (CDI_DOMINATORS, fin_bb, > recompute_dominator (CDI_DOMINATORS, fin_bb)); > + if (exit1_bb) > + { > + set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb); > + set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb); > + } > > struct loop *loop = body_bb->loop_father; > if (loop != entry_bb->loop_father) > --- gcc/testsuite/c-c++-common/gomp/scan-3.c.jj 2019-07-02 12:57:30.366553813 > +0200 > +++ gcc/testsuite/c-c++-common/gomp/scan-3.c 2019-07-02 13:08:01.383672187 > +0200 > @@ -8,7 +8,7 @@ f1 (int *c, int *d) > for (i = 0; i < 64; i++) > { > d[i] = a; > - #pragma omp scan inclusive (a) /* { dg-message "sorry, > unimplemented: '#pragma omp scan' not supported yet" } */ > + #pragma omp scan inclusive (a) > a += c[i]; > } > } > --- gcc/testsuite/c-c++-common/gomp/scan-5.c.jj 2019-07-02 13:01:48.959476445 > +0200 > +++ gcc/testsuite/c-c++-common/gomp/scan-5.c 2019-07-02 13:08:24.532311809 > +0200 > @@ -6,7 +6,7 @@ foo (int *a, int *b) > for (int i = 0; i < 64; i++) > { > r += a[i]; > - #pragma omp scan inclusive (r) /* { dg-message "sorry, > unimplemented: '#pragma omp scan' not supported yet" } */ > + #pragma omp scan inclusive (r) > b[i] = r; > } > return r; > --- libgomp/testsuite/libgomp.c++/scan-1.C.jj 2019-07-02 13:16:12.144014405 > +0200 > +++ libgomp/testsuite/libgomp.c++/scan-1.C 2019-07-02 13:17:41.705604254 > +0200 > @@ -0,0 +1,151 @@ > +// { dg-require-effective-target size32plus } > + > +extern "C" void abort (); > + > +struct S { > + inline S (); > + inline ~S (); > + inline S (const S &); > + inline S & operator= (const S &); > + int s; > +}; > + > +S::S () : s (0) > +{ > +} > + > +S::~S () > +{ > +} > + > +S::S (const S &x) > +{ > + s = x.s; > +} > + > +S & > +S::operator= (const S &x) > +{ > + s = x.s; > + return *this; > +} > + > +static inline void > +ini (S &x) > +{ > + x.s = 0; > +} > + > +S r, a[1024], b[1024]; > + > +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) > +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer > (ini (omp_priv)) > + > +__attribute__((noipa)) void > +foo (S *a, S *b) > +{ > + #pragma omp for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + r.s += a[i].s; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) S > +bar (void) > +{ > + S s; > + #pragma omp parallel > + #pragma omp for reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + s.s += 2 * a[i].s; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return S (s); > +} > + > +__attribute__((noipa)) void > +baz (S *a, S *b) > +{ > + #pragma omp parallel for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + r.s += a[i].s; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) S > +qux (void) > +{ > + S s; > + #pragma omp parallel for reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + s.s += 2 * a[i].s; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return S (s); > +} > + > +int > +main () > +{ > + S s; > + for (int i = 0; i < 1024; ++i) > + { > + a[i].s = i; > + b[i].s = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + s.s += i; > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + } > + if (bar ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s.s += 2 * i; > + if (b[i].s != s.s) > + abort (); > + } > + r.s = 0; > + baz (a, b); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s.s += i; > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + } > + if (qux ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s.s += 2 * i; > + if (b[i].s != s.s) > + abort (); > + } > + return 0; > +} > --- libgomp/testsuite/libgomp.c++/scan-2.C.jj 2019-07-02 13:16:12.151014294 > +0200 > +++ libgomp/testsuite/libgomp.c++/scan-2.C 2019-07-02 13:29:26.250606680 > +0200 > @@ -0,0 +1,116 @@ > +// { dg-require-effective-target size32plus } > + > +extern "C" void abort (); > +int r, a[1024], b[1024], q; > + > +__attribute__((noipa)) void > +foo (int *a, int *b, int &r) > +{ > + #pragma omp for reduction (inscan, +:r) nowait > + for (int i = 0; i < 1024; i++) > + { > + r += a[i]; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) int > +bar (void) > +{ > + int &s = q; > + q = 0; > + #pragma omp parallel > + #pragma omp for reduction (inscan, +:s) nowait > + for (int i = 0; i < 1024; i++) > + { > + s += 2 * a[i]; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b, int &r) > +{ > + #pragma omp parallel for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + r += a[i]; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) int > +qux (void) > +{ > + int &s = q; > + q = 0; > + #pragma omp parallel for reduction (inscan, +:s) > + for (int i = 0; i < 1024; i++) > + { > + s += 2 * a[i]; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + s += i; > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + } > + if (bar () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += 2 * i; > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + } > + r = 0; > + baz (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += i; > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + } > + if (qux () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += 2 * i; > + if (b[i] != s) > + abort (); > + } > +} > --- libgomp/testsuite/libgomp.c++/scan-3.C.jj 2019-07-02 13:16:12.157014200 > +0200 > +++ libgomp/testsuite/libgomp.c++/scan-3.C 2019-07-02 13:29:33.038500894 > +0200 > @@ -0,0 +1,119 @@ > +// { dg-require-effective-target size32plus } > + > +extern "C" void abort (); > +int r, a[1024], b[1024], q; > + > +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer > (omp_priv = 0) > + > +__attribute__((noipa)) void > +foo (int *a, int *b, int &r) > +{ > + #pragma omp for reduction (inscan, foo:r) > + for (int i = 0; i < 1024; i++) > + { > + r += a[i]; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) int > +bar (void) > +{ > + int &s = q; > + q = 0; > + #pragma omp parallel > + #pragma omp for reduction (inscan, foo:s) > + for (int i = 0; i < 1024; i++) > + { > + s += 2 * a[i]; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b, int &r) > +{ > + #pragma omp parallel for reduction (inscan, foo:r) > + for (int i = 0; i < 1024; i++) > + { > + r += a[i]; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) int > +qux (void) > +{ > + int &s = q; > + q = 0; > + #pragma omp parallel for reduction (inscan, foo:s) > + for (int i = 0; i < 1024; i++) > + { > + s += 2 * a[i]; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + s += i; > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + } > + if (bar () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += 2 * i; > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + } > + r = 0; > + baz (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += i; > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + } > + if (qux () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += 2 * i; > + if (b[i] != s) > + abort (); > + } > + return 0; > +} > --- libgomp/testsuite/libgomp.c++/scan-4.C.jj 2019-07-02 13:16:12.559007871 > +0200 > +++ libgomp/testsuite/libgomp.c++/scan-4.C 2019-07-02 13:52:26.973993019 > +0200 > @@ -0,0 +1,150 @@ > +// { dg-require-effective-target size32plus } > + > +extern "C" void abort (); > + > +struct S { > + inline S (); > + inline ~S (); > + inline S (const S &); > + inline S & operator= (const S &); > + int s; > +}; > + > +S::S () : s (0) > +{ > +} > + > +S::~S () > +{ > +} > + > +S::S (const S &x) > +{ > + s = x.s; > +} > + > +S & > +S::operator= (const S &x) > +{ > + s = x.s; > + return *this; > +} > + > +static inline void > +ini (S &x) > +{ > + x.s = 0; > +} > + > +S r, a[1024], b[1024]; > + > +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) > +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer > (ini (omp_priv)) > + > +__attribute__((noipa)) void > +foo (S *a, S *b, S &r) > +{ > + #pragma omp for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + r.s += a[i].s; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) S > +bar () > +{ > + S s; > + #pragma omp parallel > + #pragma omp for reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + s.s += 2 * a[i].s; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (S *a, S *b, S &r) > +{ > + #pragma omp parallel for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + r.s += a[i].s; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) S > +qux () > +{ > + S s; > + #pragma omp parallel for reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + s.s += 2 * a[i].s; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return s; > +} > + > +int > +main () > +{ > + S s; > + for (int i = 0; i < 1024; ++i) > + { > + a[i].s = i; > + b[i].s = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b, r); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + s.s += i; > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + } > + if (bar ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s.s += 2 * i; > + if (b[i].s != s.s) > + abort (); > + } > + r.s = 0; > + baz (a, b, r); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s.s += i; > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + } > + if (qux ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s.s += 2 * i; > + if (b[i].s != s.s) > + abort (); > + } > +} > --- libgomp/testsuite/libgomp.c++/scan-5.C.jj 2019-07-02 13:16:12.567007745 > +0200 > +++ libgomp/testsuite/libgomp.c++/scan-5.C 2019-07-02 13:29:08.895877145 > +0200 > @@ -0,0 +1,158 @@ > +// { dg-require-effective-target size32plus } > + > +extern "C" void abort (); > + > +template <typename T> > +struct S { > + inline S (); > + inline ~S (); > + inline S (const S &); > + inline S & operator= (const S &); > + T s; > +}; > + > +template <typename T> > +S<T>::S () : s (0) > +{ > +} > + > +template <typename T> > +S<T>::~S () > +{ > +} > + > +template <typename T> > +S<T>::S (const S &x) > +{ > + s = x.s; > +} > + > +template <typename T> > +S<T> & > +S<T>::operator= (const S &x) > +{ > + s = x.s; > + return *this; > +} > + > +template <typename T> > +static inline void > +ini (S<T> &x) > +{ > + x.s = 0; > +} > + > +S<int> r, a[1024], b[1024]; > + > +#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s) > +#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) > initializer (ini (omp_priv)) > + > +template <typename T> > +__attribute__((noipa)) void > +foo (S<T> *a, S<T> *b) > +{ > + #pragma omp for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r.s += a[i].s; > + } > +} > + > +template <typename T> > +__attribute__((noipa)) S<T> > +bar (void) > +{ > + S<T> s; > + #pragma omp parallel > + #pragma omp for reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s.s += 2 * a[i].s; > + } > + return S<T> (s); > +} > + > +__attribute__((noipa)) void > +baz (S<int> *a, S<int> *b) > +{ > + #pragma omp parallel for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r.s += a[i].s; > + } > +} > + > +__attribute__((noipa)) S<int> > +qux (void) > +{ > + S<int> s; > + #pragma omp parallel for reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s.s += 2 * a[i].s; > + } > + return S<int> (s); > +} > + > +int > +main () > +{ > + S<int> s; > + for (int i = 0; i < 1024; ++i) > + { > + a[i].s = i; > + b[i].s = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + s.s += i; > + } > + if (bar<int> ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + s.s += 2 * i; > + } > + r.s = 0; > + baz (a, b); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + s.s += i; > + } > + if (qux ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + s.s += 2 * i; > + } > +} > --- libgomp/testsuite/libgomp.c++/scan-6.C.jj 2019-07-02 13:16:12.574007634 > +0200 > +++ libgomp/testsuite/libgomp.c++/scan-6.C 2019-07-02 13:31:59.909212015 > +0200 > @@ -0,0 +1,120 @@ > +// { dg-require-effective-target size32plus } > + > +extern "C" void abort (); > +int r, a[1024], b[1024], q; > + > +template <typename T, typename U> > +__attribute__((noipa)) void > +foo (T a, T b, U r) > +{ > + #pragma omp for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +template <typename T> > +__attribute__((noipa)) T > +bar () > +{ > + T &s = q; > + q = 0; > + #pragma omp parallel > + #pragma omp for reduction (inscan, +:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +template <typename T> > +__attribute__((noipa)) void > +baz (T *a, T *b, T &r) > +{ > + #pragma omp parallel for reduction (inscan, +:r) > + for (T i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +template <typename T> > +__attribute__((noipa)) int > +qux () > +{ > + T s = q; > + q = 0; > + #pragma omp parallel for reduction (inscan, +:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo<int *, int &> (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + s += i; > + } > + if (bar<int> () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + s += 2 * i; > + } > + r = 0; > + baz<int> (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + s += i; > + } > + if (qux<int &> () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + s += 2 * i; > + } > +} > --- libgomp/testsuite/libgomp.c++/scan-7.C.jj 2019-07-02 13:16:12.581007523 > +0200 > +++ libgomp/testsuite/libgomp.c++/scan-7.C 2019-07-02 13:33:29.036823011 > +0200 > @@ -0,0 +1,118 @@ > +// { dg-require-effective-target size32plus } > + > +extern "C" void abort (); > +int r, a[1024], b[1024], q; > + > +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer > (omp_priv = 0) > + > +__attribute__((noipa)) void > +foo (int *a, int *b, int &r) > +{ > + #pragma omp for reduction (inscan, foo:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +bar (void) > +{ > + int &s = q; > + q = 0; > + #pragma omp parallel > + #pragma omp for reduction (inscan, foo:s) nowait > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b, int &r) > +{ > + #pragma omp parallel for reduction (inscan, foo:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +qux (void) > +{ > + int &s = q; > + q = 0; > + #pragma omp parallel for reduction (inscan, foo:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + s += i; > + } > + if (bar () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + s += 2 * i; > + } > + r = 0; > + baz (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + s += i; > + } > + if (qux () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + s += 2 * i; > + } > +} > --- libgomp/testsuite/libgomp.c++/scan-8.C.jj 2019-07-02 13:16:12.587007429 > +0200 > +++ libgomp/testsuite/libgomp.c++/scan-8.C 2019-07-02 13:43:27.518448406 > +0200 > @@ -0,0 +1,150 @@ > +// { dg-require-effective-target size32plus } > + > +extern "C" void abort (); > + > +struct S { > + inline S (); > + inline ~S (); > + inline S (const S &); > + inline S & operator= (const S &); > + int s; > +}; > + > +S::S () : s (0) > +{ > +} > + > +S::~S () > +{ > +} > + > +S::S (const S &x) > +{ > + s = x.s; > +} > + > +S & > +S::operator= (const S &x) > +{ > + s = x.s; > + return *this; > +} > + > +static inline void > +ini (S &x) > +{ > + x.s = 0; > +} > + > +S r, a[1024], b[1024]; > + > +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) > +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer > (ini (omp_priv)) > + > +__attribute__((noipa)) void > +foo (S *a, S *b, S &r) > +{ > + #pragma omp for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r.s += a[i].s; > + } > +} > + > +__attribute__((noipa)) S > +bar (void) > +{ > + S s; > + #pragma omp parallel > + #pragma omp for reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s.s += 2 * a[i].s; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (S *a, S *b, S &r) > +{ > + #pragma omp parallel for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r.s += a[i].s; > + } > +} > + > +__attribute__((noipa)) S > +qux (void) > +{ > + S s; > + #pragma omp parallel for reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s.s += 2 * a[i].s; > + } > + return s; > +} > + > +int > +main () > +{ > + S s; > + for (int i = 0; i < 1024; ++i) > + { > + a[i].s = i; > + b[i].s = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b, r); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + s.s += i; > + } > + if (bar ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + s.s += 2 * i; > + } > + r.s = 0; > + baz (a, b, r); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + s.s += i; > + } > + if (qux ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + s.s += 2 * i; > + } > +} > --- libgomp/testsuite/libgomp.c/scan-1.c.jj 2019-07-02 13:08:01.390672078 > +0200 > +++ libgomp/testsuite/libgomp.c/scan-1.c 2019-07-02 13:08:01.390672078 > +0200 > @@ -0,0 +1,115 @@ > +/* { dg-require-effective-target size32plus } */ > + > +extern void abort (void); > +int r, a[1024], b[1024]; > + > +__attribute__((noipa)) void > +foo (int *a, int *b) > +{ > + #pragma omp for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + r += a[i]; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) int > +bar (void) > +{ > + int s = 0; > + #pragma omp parallel > + #pragma omp for reduction (inscan, +:s) > + for (int i = 0; i < 1024; i++) > + { > + s += 2 * a[i]; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b) > +{ > + #pragma omp parallel for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + r += a[i]; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) int > +qux (void) > +{ > + int s = 0; > + #pragma omp parallel for reduction (inscan, +:s) > + for (int i = 0; i < 1024; i++) > + { > + s += 2 * a[i]; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + s += i; > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + } > + if (bar () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += 2 * i; > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + } > + r = 0; > + baz (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += i; > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + } > + if (qux () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += 2 * i; > + if (b[i] != s) > + abort (); > + } > + return 0; > +} > --- libgomp/testsuite/libgomp.c/scan-2.c.jj 2019-07-02 13:08:01.390672078 > +0200 > +++ libgomp/testsuite/libgomp.c/scan-2.c 2019-07-02 13:08:01.390672078 > +0200 > @@ -0,0 +1,117 @@ > +/* { dg-require-effective-target size32plus } */ > + > +extern void abort (void); > +int r, a[1024], b[1024]; > + > +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer > (omp_priv = 0) > + > +__attribute__((noipa)) void > +foo (int *a, int *b) > +{ > + #pragma omp for reduction (inscan, foo:r) > + for (int i = 0; i < 1024; i++) > + { > + r += a[i]; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) int > +bar (void) > +{ > + int s = 0; > + #pragma omp parallel > + #pragma omp for reduction (inscan, foo:s) > + for (int i = 0; i < 1024; i++) > + { > + s += 2 * a[i]; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b) > +{ > + #pragma omp parallel for reduction (inscan, foo:r) > + for (int i = 0; i < 1024; i++) > + { > + r += a[i]; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) int > +qux (void) > +{ > + int s = 0; > + #pragma omp parallel for reduction (inscan, foo:s) > + for (int i = 0; i < 1024; i++) > + { > + s += 2 * a[i]; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + s += i; > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + } > + if (bar () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += 2 * i; > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + } > + r = 0; > + baz (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += i; > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + } > + if (qux () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += 2 * i; > + if (b[i] != s) > + abort (); > + } > + return 0; > +} > --- libgomp/testsuite/libgomp.c/scan-3.c.jj 2019-07-02 13:08:01.390672078 > +0200 > +++ libgomp/testsuite/libgomp.c/scan-3.c 2019-07-02 13:08:01.390672078 > +0200 > @@ -0,0 +1,88 @@ > +/* { dg-require-effective-target size32plus } */ > + > +extern void abort (void); > +float r = 1.0f, a[1024], b[1024]; > + > +__attribute__((noipa)) void > +foo (float *a, float *b) > +{ > + #pragma omp for reduction (inscan, *:r) > + for (int i = 0; i < 1024; i++) > + { > + r *= a[i]; > + #pragma omp scan inclusive(r) > + b[i] = r; > + } > +} > + > +__attribute__((noipa)) float > +bar (void) > +{ > + float s = -__builtin_inff (); > + #pragma omp parallel for reduction (inscan, max:s) > + for (int i = 0; i < 1024; i++) > + { > + s = s > a[i] ? s : a[i]; > + #pragma omp scan inclusive(s) > + b[i] = s; > + } > + return s; > +} > + > +int > +main () > +{ > + float s = 1.0f; > + for (int i = 0; i < 1024; ++i) > + { > + if (i < 80) > + a[i] = (i & 1) ? 0.25f : 0.5f; > + else if (i < 200) > + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; > + else if (i < 280) > + a[i] = (i & 1) ? 0.25f : 0.5f; > + else if (i < 380) > + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; > + else > + switch (i % 6) > + { > + case 0: a[i] = 0.25f; break; > + case 1: a[i] = 2.0f; break; > + case 2: a[i] = -1.0f; break; > + case 3: a[i] = -4.0f; break; > + case 4: a[i] = 0.5f; break; > + case 5: a[i] = 1.0f; break; > + default: a[i] = 0.0f; break; > + } > + b[i] = -19.0f; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b); > + if (r * 16384.0f != 0.125f) > + abort (); > + float m = -175.25f; > + for (int i = 0; i < 1024; ++i) > + { > + s *= a[i]; > + if (b[i] != s) > + abort (); > + else > + { > + a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f); > + b[i] = -231.75f; > + m += 0.75f; > + } > + } > + if (bar () != 592.0f) > + abort (); > + s = -__builtin_inff (); > + for (int i = 0; i < 1024; ++i) > + { > + if (s < a[i]) > + s = a[i]; > + if (b[i] != s) > + abort (); > + } > + return 0; > +} > --- libgomp/testsuite/libgomp.c/scan-4.c.jj 2019-07-02 13:08:01.390672078 > +0200 > +++ libgomp/testsuite/libgomp.c/scan-4.c 2019-07-02 13:08:01.390672078 > +0200 > @@ -0,0 +1,179 @@ > +/* { dg-require-effective-target size32plus } */ > + > +extern void abort (void); > +int r, a[1024], b[1024]; > +unsigned short r2, b2[1024]; > +unsigned char r3, b3[1024]; > + > +__attribute__((noipa)) void > +foo (int *a, int *b, unsigned short *b2, unsigned char *b3) > +{ > + #pragma omp for reduction (inscan, +:r, r2, r3) > + for (int i = 0; i < 1024; i++) > + { > + { r += a[i]; r2 += a[i]; r3 += a[i]; } > + #pragma omp scan inclusive(r, r2, r3) > + { > + b[i] = r; > + b2[i] = r2; > + b3[i] = r3; > + } > + } > +} > + > +__attribute__((noipa)) int > +bar (unsigned short *s2p, unsigned char *s3p) > +{ > + int s = 0; > + unsigned short s2 = 0; > + unsigned char s3 = 0; > + #pragma omp parallel > + #pragma omp for reduction (inscan, +:s, s2, s3) > + for (int i = 0; i < 1024; i++) > + { > + { > + s += 2 * a[i]; > + s2 += 2 * a[i]; > + s3 += 2 * a[i]; > + } > + #pragma omp scan inclusive(s, s2, s3) > + { b[i] = s; b2[i] = s2; b3[i] = s3; } > + } > + *s2p = s2; > + *s3p = s3; > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b, unsigned short *b2, unsigned char *b3) > +{ > + #pragma omp parallel for reduction (inscan, +:r, r2, r3) > + for (int i = 0; i < 1024; i++) > + { > + { > + r += a[i]; > + r2 += a[i]; > + r3 += a[i]; > + } > + #pragma omp scan inclusive(r, r2, r3) > + { > + b[i] = r; > + b2[i] = r2; > + b3[i] = r3; > + } > + } > +} > + > +__attribute__((noipa)) int > +qux (unsigned short *s2p, unsigned char *s3p) > +{ > + int s = 0; > + unsigned short s2 = 0; > + unsigned char s3 = 0; > + #pragma omp parallel for reduction (inscan, +:s, s2, s3) > + for (int i = 0; i < 1024; i++) > + { > + { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; } > + #pragma omp scan inclusive(s, s2, s3) > + { b[i] = s; b2[i] = s2; b3[i] = s3; } > + } > + *s2p = s2; > + *s3p = s3; > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + unsigned short s2; > + unsigned char s3; > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + b2[i] = -1; > + b3[i] = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b, b2, b3); > + if (r != 1024 * 1023 / 2 > + || r2 != (unsigned short) r > + || r3 != (unsigned char) r) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + s += i; > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + else > + { > + b[i] = 25; > + b2[i] = 24; > + b3[i] = 26; > + } > + } > + if (bar (&s2, &s3) != 1024 * 1023) > + abort (); > + if (s2 != (unsigned short) (1024 * 1023) > + || s3 != (unsigned char) (1024 * 1023)) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += 2 * i; > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + else > + { > + b[i] = -1; > + b2[i] = -1; > + b3[i] = -1; > + } > + } > + r = 0; > + r2 = 0; > + r3 = 0; > + baz (a, b, b2, b3); > + if (r != 1024 * 1023 / 2 > + || r2 != (unsigned short) r > + || r3 != (unsigned char) r) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += i; > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + else > + { > + b[i] = 25; > + b2[i] = 24; > + b3[i] = 26; > + } > + } > + s2 = 0; > + s3 = 0; > + if (qux (&s2, &s3) != 1024 * 1023) > + abort (); > + if (s2 != (unsigned short) (1024 * 1023) > + || s3 != (unsigned char) (1024 * 1023)) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + s += 2 * i; > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + } > + return 0; > +} > --- libgomp/testsuite/libgomp.c/scan-5.c.jj 2019-07-02 13:08:01.390672078 > +0200 > +++ libgomp/testsuite/libgomp.c/scan-5.c 2019-07-02 13:08:01.390672078 > +0200 > @@ -0,0 +1,115 @@ > +/* { dg-require-effective-target size32plus } */ > + > +extern void abort (void); > +int r, a[1024], b[1024]; > + > +__attribute__((noipa)) void > +foo (int *a, int *b) > +{ > + #pragma omp for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +bar (void) > +{ > + int s = 0; > + #pragma omp parallel > + #pragma omp for reduction (inscan, +:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b) > +{ > + #pragma omp parallel for reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +qux (void) > +{ > + int s = 0; > + #pragma omp parallel for reduction (inscan, +:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + s += i; > + } > + if (bar () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + s += 2 * i; > + } > + r = 0; > + baz (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + s += i; > + } > + if (qux () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + s += 2 * i; > + } > + return 0; > +} > --- libgomp/testsuite/libgomp.c/scan-6.c.jj 2019-07-02 13:08:01.390672078 > +0200 > +++ libgomp/testsuite/libgomp.c/scan-6.c 2019-07-02 13:08:01.390672078 > +0200 > @@ -0,0 +1,117 @@ > +/* { dg-require-effective-target size32plus } */ > + > +extern void abort (void); > +int r, a[1024], b[1024]; > + > +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer > (omp_priv = 0) > + > +__attribute__((noipa)) void > +foo (int *a, int *b) > +{ > + #pragma omp for reduction (inscan, foo:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +bar (void) > +{ > + int s = 0; > + #pragma omp parallel > + #pragma omp for reduction (inscan, foo:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b) > +{ > + #pragma omp parallel for reduction (inscan, foo:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +qux (void) > +{ > + int s = 0; > + #pragma omp parallel for reduction (inscan, foo:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + s += i; > + } > + if (bar () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + s += 2 * i; > + } > + r = 0; > + baz (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + s += i; > + } > + if (qux () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + s += 2 * i; > + } > + return 0; > +} > --- libgomp/testsuite/libgomp.c/scan-7.c.jj 2019-07-02 13:08:01.390672078 > +0200 > +++ libgomp/testsuite/libgomp.c/scan-7.c 2019-07-02 13:08:01.390672078 > +0200 > @@ -0,0 +1,86 @@ > +/* { dg-require-effective-target size32plus } */ > + > +extern void abort (void); > +float r = 1.0f, a[1024], b[1024]; > + > +__attribute__((noipa)) void > +foo (float *a, float *b) > +{ > + #pragma omp for reduction (inscan, *:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r *= a[i]; > + } > +} > + > +__attribute__((noipa)) float > +bar (void) > +{ > + float s = -__builtin_inff (); > + #pragma omp parallel for reduction (inscan, max:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s = s > a[i] ? s : a[i]; > + } > + return s; > +} > + > +int > +main () > +{ > + float s = 1.0f; > + for (int i = 0; i < 1024; ++i) > + { > + if (i < 80) > + a[i] = (i & 1) ? 0.25f : 0.5f; > + else if (i < 200) > + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; > + else if (i < 280) > + a[i] = (i & 1) ? 0.25f : 0.5f; > + else if (i < 380) > + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; > + else > + switch (i % 6) > + { > + case 0: a[i] = 0.25f; break; > + case 1: a[i] = 2.0f; break; > + case 2: a[i] = -1.0f; break; > + case 3: a[i] = -4.0f; break; > + case 4: a[i] = 0.5f; break; > + case 5: a[i] = 1.0f; break; > + default: a[i] = 0.0f; break; > + } > + b[i] = -19.0f; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b); > + if (r * 16384.0f != 0.125f) > + abort (); > + float m = -175.25f; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -231.75f; > + s *= a[i]; > + a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f); > + m += 0.75f; > + } > + if (bar () != 592.0f) > + abort (); > + s = -__builtin_inff (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + if (s < a[i]) > + s = a[i]; > + } > + return 0; > +} > --- libgomp/testsuite/libgomp.c/scan-8.c.jj 2019-07-02 13:08:01.390672078 > +0200 > +++ libgomp/testsuite/libgomp.c/scan-8.c 2019-07-02 13:08:01.390672078 > +0200 > @@ -0,0 +1,179 @@ > +/* { dg-require-effective-target size32plus } */ > + > +extern void abort (void); > +int r, a[1024], b[1024]; > +unsigned short r2, b2[1024]; > +unsigned char r3, b3[1024]; > + > +__attribute__((noipa)) void > +foo (int *a, int *b, unsigned short *b2, unsigned char *b3) > +{ > + #pragma omp for reduction (inscan, +:r, r2, r3) > + for (int i = 0; i < 1024; i++) > + { > + { > + b[i] = r; > + b2[i] = r2; > + b3[i] = r3; > + } > + #pragma omp scan exclusive(r, r2, r3) > + { r += a[i]; r2 += a[i]; r3 += a[i]; } > + } > +} > + > +__attribute__((noipa)) int > +bar (unsigned short *s2p, unsigned char *s3p) > +{ > + int s = 0; > + unsigned short s2 = 0; > + unsigned char s3 = 0; > + #pragma omp parallel > + #pragma omp for reduction (inscan, +:s, s2, s3) > + for (int i = 0; i < 1024; i++) > + { > + { b[i] = s; b2[i] = s2; b3[i] = s3; } > + #pragma omp scan exclusive(s, s2, s3) > + { > + s += 2 * a[i]; > + s2 += 2 * a[i]; > + s3 += 2 * a[i]; > + } > + } > + *s2p = s2; > + *s3p = s3; > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b, unsigned short *b2, unsigned char *b3) > +{ > + #pragma omp parallel for reduction (inscan, +:r, r2, r3) > + for (int i = 0; i < 1024; i++) > + { > + { > + b[i] = r; > + b2[i] = r2; > + b3[i] = r3; > + } > + #pragma omp scan exclusive(r, r2, r3) > + { > + r += a[i]; > + r2 += a[i]; > + r3 += a[i]; > + } > + } > +} > + > +__attribute__((noipa)) int > +qux (unsigned short *s2p, unsigned char *s3p) > +{ > + int s = 0; > + unsigned short s2 = 0; > + unsigned char s3 = 0; > + #pragma omp parallel for reduction (inscan, +:s, s2, s3) > + for (int i = 0; i < 1024; i++) > + { > + { b[i] = s; b2[i] = s2; b3[i] = s3; } > + #pragma omp scan exclusive(s, s2, s3) > + { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; } > + } > + *s2p = s2; > + *s3p = s3; > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + unsigned short s2; > + unsigned char s3; > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + b2[i] = -1; > + b3[i] = -1; > + asm ("" : "+g" (i)); > + } > + #pragma omp parallel > + foo (a, b, b2, b3); > + if (r != 1024 * 1023 / 2 > + || r2 != (unsigned short) r > + || r3 != (unsigned char) r) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + else > + { > + b[i] = 25; > + b2[i] = 24; > + b3[i] = 26; > + } > + s += i; > + } > + if (bar (&s2, &s3) != 1024 * 1023) > + abort (); > + if (s2 != (unsigned short) (1024 * 1023) > + || s3 != (unsigned char) (1024 * 1023)) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + else > + { > + b[i] = -1; > + b2[i] = -1; > + b3[i] = -1; > + } > + s += 2 * i; > + } > + r = 0; > + r2 = 0; > + r3 = 0; > + baz (a, b, b2, b3); > + if (r != 1024 * 1023 / 2 > + || r2 != (unsigned short) r > + || r3 != (unsigned char) r) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + else > + { > + b[i] = 25; > + b2[i] = 24; > + b3[i] = 26; > + } > + s += i; > + } > + s2 = 0; > + s3 = 0; > + if (qux (&s2, &s3) != 1024 * 1023) > + abort (); > + if (s2 != (unsigned short) (1024 * 1023) > + || s3 != (unsigned char) (1024 * 1023)) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + s += 2 * i; > + } > + return 0; > +} > > Jakub