On Fri, 21 Jun 2019 at 08:57, Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > The following patch adds exclusive scan support for simd, it is similar to > the inclusive scan, just we need to swap the input and scan phases and > use slightly different pattern at the start of the scan phase, so that it > computes what we need. > > Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. > > 2019-06-21 Jakub Jelinek <ja...@redhat.com> > > * omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument, > create another "omp scan inscan exclusive" array if > !ctx->scan_inclusive. > (lower_rec_input_clauses): Handle exclusive scan inscan reductions. > (lower_omp_scan): Likewise. > * tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of > 2-bit bitfield for simd_lane_access_p member. > * tree-vect-data-refs.c (vect_analyze_data_refs): Also handle > aux == (void *)-4 as simd lane access. > * tree-vect-stmts.c (check_scan_store): Handle exclusive scan. Update > comment with permutations to show the canonical permutation order. > (vectorizable_scan_store): Handle exclusive scan. > (vectorizable_store): Call vectorizable_scan_store even for > STMT_VINFO_SIMD_LANE_ACCESS_P > 3. > > * gcc.dg/vect/vect-simd-12.c: New test. > * gcc.dg/vect/vect-simd-13.c: New test. > * gcc.dg/vect/vect-simd-14.c: New test. > * gcc.dg/vect/vect-simd-15.c: New test. > * gcc.target/i386/sse2-vect-simd-12.c: New test. > * gcc.target/i386/sse2-vect-simd-13.c: New test. > * gcc.target/i386/sse2-vect-simd-14.c: New test. > * gcc.target/i386/sse2-vect-simd-15.c: New test. > * gcc.target/i386/avx2-vect-simd-12.c: New test. > * gcc.target/i386/avx2-vect-simd-13.c: New test. > * gcc.target/i386/avx2-vect-simd-14.c: New test. > * gcc.target/i386/avx2-vect-simd-15.c: New test. > * gcc.target/i386/avx512f-vect-simd-12.c: New test. > * gcc.target/i386/avx512f-vect-simd-13.c: New test. > * gcc.target/i386/avx512f-vect-simd-14.c: New test. > * gcc.target/i386/avx512bw-vect-simd-15.c: New test. > * g++.dg/vect/simd-6.cc: New test. > * g++.dg/vect/simd-7.cc: New test. > * g++.dg/vect/simd-8.cc: New test. > * g++.dg/vect/simd-9.cc: New test. > * c-c++-common/gomp/scan-2.c: Don't expect any diagnostics. > > --- gcc/omp-low.c.jj 2019-06-20 13:26:29.085150770 +0200 > +++ gcc/omp-low.c 2019-06-20 15:46:25.964253058 +0200 > @@ -3692,7 +3692,8 @@ struct omplow_simd_context { > static bool > lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, > omplow_simd_context *sctx, tree &ivar, > - tree &lvar, tree *rvar = NULL) > + tree &lvar, tree *rvar = NULL, > + tree *rvar2 = NULL) > { > if (known_eq (sctx->max_vf, 0U)) > { > @@ -3767,6 +3768,25 @@ lower_rec_simd_input_clauses (tree new_v > *rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, > sctx->lastlane, NULL_TREE, NULL_TREE); > TREE_THIS_NOTRAP (*rvar) = 1; > + > + if (!ctx->scan_inclusive) > + { > + /* And for exclusive scan yet another one, which will > + hold the value during the scan phase. */ > + tree savar = create_tmp_var_raw (atype); > + if (TREE_ADDRESSABLE (new_var)) > + TREE_ADDRESSABLE (savar) = 1; > + DECL_ATTRIBUTES (savar) > + = tree_cons (get_identifier ("omp simd array"), NULL, > + tree_cons (get_identifier ("omp simd inscan " > + "exclusive"), NULL, > + DECL_ATTRIBUTES (savar))); > + gimple_add_tmp_var (savar); > + ctx->cb.decl_map->put (iavar, savar); > + *rvar2 = build4 (ARRAY_REF, TREE_TYPE (new_var), savar, > + sctx->idx, NULL_TREE, NULL_TREE); > + TREE_THIS_NOTRAP (*rvar2) = 1; > + } > } > ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx, > NULL_TREE, NULL_TREE); > @@ -5185,14 +5205,15 @@ lower_rec_input_clauses (tree clauses, g > new_vard = TREE_OPERAND (new_var, 0); > gcc_assert (DECL_P (new_vard)); > } > - tree rvar = NULL_TREE, *rvarp = NULL; > + tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE; > if (is_simd > && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION > && OMP_CLAUSE_REDUCTION_INSCAN (c)) > rvarp = &rvar; > if (is_simd > && lower_rec_simd_input_clauses (new_var, ctx, &sctx, > - ivar, lvar, rvarp)) > + ivar, lvar, rvarp, > + &rvar2)) > { > if (new_vard == new_var) > { > @@ -5220,6 +5241,14 @@ lower_rec_input_clauses (tree clauses, g > (c, ivar2, build_outer_var_ref (var, > ctx)); > gimplify_and_add (x, &llist[0]); > > + if (rvar2) > + { > + x = lang_hooks.decls.omp_clause_default_ctor > + (c, unshare_expr (rvar2), > + build_outer_var_ref (var, ctx)); > + gimplify_and_add (x, &llist[0]); > + } > + > /* For types that need construction, add another > private var which will be default constructed > and optionally initialized with > @@ -5229,7 +5258,9 @@ lower_rec_input_clauses (tree clauses, g > iteration. */ > tree nv = create_tmp_var_raw (TREE_TYPE (ivar)); > gimple_add_tmp_var (nv); > - ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0), > + ctx->cb.decl_map->put (TREE_OPERAND (rvar2 > + ? rvar2 > + : ivar, 0), > nv); > x = lang_hooks.decls.omp_clause_default_ctor > (c, nv, build_outer_var_ref (var, ctx)); > @@ -5296,6 +5327,18 @@ lower_rec_input_clauses (tree clauses, g > gimplify_stmt (&dtor, &tseq); > gimple_seq_add_seq (&llist[1], tseq); > } > + > + if (rvar2) > + { > + x = lang_hooks.decls.omp_clause_dtor (c, rvar2); > + if (x) > + { > + tseq = NULL; > + dtor = x; > + gimplify_stmt (&dtor, &tseq); > + gimple_seq_add_seq (&llist[1], tseq); > + } > + } > break; > } > if (x) > @@ -5390,6 +5433,24 @@ lower_rec_input_clauses (tree clauses, g > gimple_seq_add_seq (ilist, tseq); > } > OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; > + if (!ctx->scan_inclusive) > + { > + tree nv2 > + = create_tmp_var_raw (TREE_TYPE (new_var)); > + gimple_add_tmp_var (nv2); > + ctx->cb.decl_map->put (nv, nv2); > + x = lang_hooks.decls.omp_clause_default_ctor > + (c, nv2, build_outer_var_ref (var, ctx)); > + gimplify_and_add (x, ilist); > + x = lang_hooks.decls.omp_clause_dtor (c, nv2); > + if (x) > + { > + tseq = NULL; > + dtor = x; > + gimplify_stmt (&dtor, &tseq); > + gimple_seq_add_seq (dlist, tseq); > + } > + } > x = lang_hooks.decls.omp_clause_dtor (c, nv); > if (x) > { > @@ -5399,6 +5460,21 @@ lower_rec_input_clauses (tree clauses, g > gimple_seq_add_seq (dlist, tseq); > } > } > + else if (!ctx->scan_inclusive > + && TREE_ADDRESSABLE (TREE_TYPE (new_var))) > + { > + tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var)); > + gimple_add_tmp_var (nv2); > + ctx->cb.decl_map->put (new_vard, nv2); > + x = lang_hooks.decls.omp_clause_dtor (c, nv2); > + if (x) > + { > + tseq = NULL; > + dtor = x; > + gimplify_stmt (&dtor, &tseq); > + gimple_seq_add_seq (dlist, tseq); > + } > + } > DECL_HAS_VALUE_EXPR_P (placeholder) = 0; > goto do_dtor; > } > @@ -5487,14 +5563,15 @@ lower_rec_input_clauses (tree clauses, g > new_vard = TREE_OPERAND (new_var, 0); > gcc_assert (DECL_P (new_vard)); > } > - tree rvar = NULL_TREE, *rvarp = NULL; > + tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE; > if (is_simd > && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION > && OMP_CLAUSE_REDUCTION_INSCAN (c)) > rvarp = &rvar; > if (is_simd > && lower_rec_simd_input_clauses (new_var, ctx, &sctx, > - ivar, lvar, rvarp)) > + ivar, lvar, rvarp, > + &rvar2)) > { > if (new_vard != new_var) > { > @@ -8573,18 +8650,40 @@ lower_omp_scan (gimple_stmt_iterator *gs > gimple_seq before = NULL; > omp_context *octx = ctx->outer; > gcc_assert (octx); > + if (!octx->scan_inclusive && !has_clauses) > + { > + gimple_stmt_iterator gsi2 = *gsi_p; > + gsi_next (&gsi2); > + gimple *stmt2 = gsi_stmt (gsi2); > + /* For exclusive scan, swap GIMPLE_OMP_SCAN without clauses > + with following GIMPLE_OMP_SCAN with clauses, so that input_phase, > + the one with exclusive clause(s), comes first. */ > + if (stmt2 > + && gimple_code (stmt2) == GIMPLE_OMP_SCAN > + && gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt2)) != NULL) > + { > + gsi_remove (gsi_p, false); > + gsi_insert_after (gsi_p, stmt, GSI_SAME_STMT); > + ctx = maybe_lookup_ctx (stmt2); > + gcc_assert (ctx); > + lower_omp_scan (gsi_p, ctx); > + return; > + } > + } > + > bool input_phase = has_clauses ^ octx->scan_inclusive; > if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR > && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD) > - && !gimple_omp_for_combined_into_p (octx->stmt) > - && octx->scan_inclusive) > + && !gimple_omp_for_combined_into_p (octx->stmt)) > { > if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt), > OMP_CLAUSE__SIMDUID_)) > { > tree uid = OMP_CLAUSE__SIMDUID__DECL (c); > lane = create_tmp_var (unsigned_type_node); > - tree t = build_int_cst (integer_type_node, 1 + !input_phase); > + tree t = build_int_cst (integer_type_node, > + input_phase ? 1 > + : octx->scan_inclusive ? 2 : 3); > gimple *g > = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t); > gimple_call_set_lhs (g, lane); > @@ -8601,6 +8700,8 @@ lower_omp_scan (gimple_stmt_iterator *gs > tree val = new_var; > tree var2 = NULL_TREE; > tree var3 = NULL_TREE; > + tree var4 = NULL_TREE; > + tree lane0 = NULL_TREE; > tree new_vard = new_var; > if (omp_is_reference (var)) > { > @@ -8623,16 +8724,26 @@ lower_omp_scan (gimple_stmt_iterator *gs > DECL_ATTRIBUTES (v))) > { > val = unshare_expr (val); > + lane0 = TREE_OPERAND (val, 1); > TREE_OPERAND (val, 1) = lane; > var2 = lookup_decl (v, octx); > + if (!octx->scan_inclusive) > + var4 = lookup_decl (var2, octx); > if (input_phase > && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) > - var3 = maybe_lookup_decl (var2, octx); > + var3 = maybe_lookup_decl (var4 ? var4 : var2, octx); > if (!input_phase) > { > var2 = build4 (ARRAY_REF, TREE_TYPE (val), > var2, lane, NULL_TREE, NULL_TREE); > TREE_THIS_NOTRAP (var2) = 1; > + if (!octx->scan_inclusive) > + { > + var4 = build4 (ARRAY_REF, TREE_TYPE (val), > + var4, lane, NULL_TREE, > + NULL_TREE); > + TREE_THIS_NOTRAP (var4) = 1; > + } > } > else > var2 = val; > @@ -8643,12 +8754,28 @@ lower_omp_scan (gimple_stmt_iterator *gs > else > { > var2 = build_outer_var_ref (var, octx); > - if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) > + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) > { > var3 = maybe_lookup_decl (new_vard, octx); > - if (var3 == new_vard) > + if (var3 == new_vard || var3 == NULL_TREE) > var3 = NULL_TREE; > + else if (!octx->scan_inclusive && !input_phase) > + { > + var4 = maybe_lookup_decl (var3, octx); > + if (var4 == var3 || var4 == NULL_TREE) > + { > + if (TREE_ADDRESSABLE (TREE_TYPE (new_var))) > + { > + var4 = var3; > + var3 = NULL_TREE; > + } > + else > + var4 = NULL_TREE; > + } > + } > } > + if (!octx->scan_inclusive && !input_phase && var4 == > NULL_TREE) > + var4 = create_tmp_var (TREE_TYPE (val)); > } > if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) > { > @@ -8689,9 +8816,17 @@ lower_omp_scan (gimple_stmt_iterator *gs > } > else > { > + tree x; > + if (!octx->scan_inclusive) > + { > + tree v4 = unshare_expr (var4); > + tree v2 = unshare_expr (var2); > + x = lang_hooks.decls.omp_clause_assign_op (c, v4, v2); > + gimplify_and_add (x, &before); > + } > gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); > - tree x = (DECL_HAS_VALUE_EXPR_P (new_vard) > - ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); > + x = (DECL_HAS_VALUE_EXPR_P (new_vard) > + ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); > tree vexpr = val; > if (x && omp_is_reference (var)) > vexpr = build_fold_addr_expr_loc (clause_loc, val); > @@ -8706,8 +8841,18 @@ lower_omp_scan (gimple_stmt_iterator *gs > SET_DECL_VALUE_EXPR (new_vard, x); > SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); > DECL_HAS_VALUE_EXPR_P (placeholder) = 0; > - x = lang_hooks.decls.omp_clause_assign_op (c, val, var2); > - gimplify_and_add (x, &before); > + if (octx->scan_inclusive) > + { > + x = lang_hooks.decls.omp_clause_assign_op (c, val, > + var2); > + gimplify_and_add (x, &before); > + } > + else if (lane0 == NULL_TREE) > + { > + x = lang_hooks.decls.omp_clause_assign_op (c, val, > + var4); > + gimplify_and_add (x, &before); > + } > } > } > else > @@ -8728,10 +8873,29 @@ lower_omp_scan (gimple_stmt_iterator *gs > > tree x = build2 (code, TREE_TYPE (var2), > unshare_expr (var2), unshare_expr (val)); > - gimplify_assign (unshare_expr (var2), x, &before); > - gimplify_assign (val, var2, &before); > + if (octx->scan_inclusive) > + { > + gimplify_assign (unshare_expr (var2), x, &before); > + gimplify_assign (val, var2, &before); > + } > + else > + { > + gimplify_assign (unshare_expr (var4), > + unshare_expr (var2), &before); > + gimplify_assign (var2, x, &before); > + if (lane0 == NULL_TREE) > + gimplify_assign (val, var4, &before); > + } > } > } > + if (!octx->scan_inclusive && !input_phase && lane0) > + { > + tree vexpr = unshare_expr (var4); > + TREE_OPERAND (vexpr, 1) = lane0; > + if (omp_is_reference (var)) > + vexpr = build_fold_addr_expr_loc (clause_loc, vexpr); > + SET_DECL_VALUE_EXPR (new_vard, vexpr); > + } > } > } > else if (has_clauses) > --- gcc/tree-vectorizer.h.jj 2019-06-20 13:26:29.078150879 +0200 > +++ gcc/tree-vectorizer.h 2019-06-20 14:18:04.241075200 +0200 > @@ -917,7 +917,7 @@ struct _stmt_vec_info { > bool strided_p; > > /* For both loads and stores. */ > - unsigned simd_lane_access_p : 2; > + unsigned simd_lane_access_p : 3; > > /* Classifies how the load or store is going to be implemented > for loop vectorization. */ > --- gcc/tree-vect-data-refs.c.jj 2019-06-20 13:55:35.421150589 +0200 > +++ gcc/tree-vect-data-refs.c 2019-06-20 14:18:04.240075216 +0200 > @@ -4223,7 +4223,8 @@ vect_analyze_data_refs (vec_info *vinfo, > /* See if this was detected as SIMD lane access. */ > if (dr->aux == (void *)-1 > || dr->aux == (void *)-2 > - || dr->aux == (void *)-3) > + || dr->aux == (void *)-3 > + || dr->aux == (void *)-4) > { > if (nested_in_vect_loop_p (loop, stmt_info)) > return opt_result::failure_at (stmt_info->stmt, > --- gcc/tree-vect-stmts.c.jj 2019-06-20 13:26:29.084150785 +0200 > +++ gcc/tree-vect-stmts.c 2019-06-20 14:18:04.239075231 +0200 > @@ -6512,7 +6512,37 @@ check_scan_store (stmt_vec_info stmt_inf > kinds are there in order to allow optimizing the initializer store > and combiner sequence, e.g. if it is originally some C++ish user > defined reduction, but allow the vectorizer to pattern recognize it > - and turn into the appropriate vectorized scan. */ > + and turn into the appropriate vectorized scan. > + > + For exclusive scan, this is slightly different: > + #pragma omp simd reduction(inscan,+:r) > + for (...) > + { > + use (r); > + #pragma omp scan exclusive (r) > + r += something (); > + } > + shall have body with: > + // Initialization for input phase, store the reduction initializer: > + _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0); > + _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1); > + D.2042[_21] = 0; > + // Actual input phase: > + ... > + r.0_5 = D.2042[_20]; > + _6 = _4 + r.0_5; > + D.2042[_20] = _6; > + // Initialization for scan phase: > + _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3); > + _26 = D.2043[_25]; > + D.2044[_25] = _26; > + _27 = D.2042[_25]; > + _28 = _26 + _27; > + D.2043[_25] = _28; > + // Actual scan phase: > + ... > + r.1_8 = D.2044[_20]; > + ... */ > > if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2) > { > @@ -6553,26 +6583,52 @@ check_scan_store (stmt_vec_info stmt_inf > if (TREE_CODE (rhs) != SSA_NAME) > goto fail; > > - use_operand_p use_p; > - imm_use_iterator iter; > gimple *other_store_stmt = NULL; > - FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) > + tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); > + bool inscan_var_store > + = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; > + > + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) > { > - gimple *use_stmt = USE_STMT (use_p); > - if (use_stmt == stmt || is_gimple_debug (use_stmt)) > - continue; > - if (gimple_bb (use_stmt) != gimple_bb (stmt) > - || !gimple_store_p (use_stmt) > - || other_store_stmt) > - goto fail; > - other_store_stmt = use_stmt; > + if (!inscan_var_store) > + { > + use_operand_p use_p; > + imm_use_iterator iter; > + FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) > + { > + gimple *use_stmt = USE_STMT (use_p); > + if (use_stmt == stmt || is_gimple_debug (use_stmt)) > + continue; > + if (gimple_bb (use_stmt) != gimple_bb (stmt) > + || !is_gimple_assign (use_stmt) > + || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS > + || other_store_stmt > + || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME) > + goto fail; > + other_store_stmt = use_stmt; > + } > + if (other_store_stmt == NULL) > + goto fail; > + rhs = gimple_assign_lhs (other_store_stmt); > + if (!single_imm_use (rhs, &use_p, &other_store_stmt)) > + goto fail; > + } > } > - if (other_store_stmt == NULL) > - goto fail; > - stmt_vec_info other_store_stmt_info > - = loop_vinfo->lookup_stmt (other_store_stmt); > - if (other_store_stmt_info == NULL > - || STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3) > + else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3) > + { > + use_operand_p use_p; > + imm_use_iterator iter; > + FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) > + { > + gimple *use_stmt = USE_STMT (use_p); > + if (use_stmt == stmt || is_gimple_debug (use_stmt)) > + continue; > + if (other_store_stmt) > + goto fail; > + other_store_stmt = use_stmt; > + } > + } > + else > goto fail; > > gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); > @@ -6599,8 +6655,7 @@ check_scan_store (stmt_vec_info stmt_inf > > tree rhs1 = gimple_assign_rhs1 (def_stmt); > tree rhs2 = gimple_assign_rhs2 (def_stmt); > - if (TREE_CODE (rhs1) != SSA_NAME > - || TREE_CODE (rhs2) != SSA_NAME) > + if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME) > goto fail; > > gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1); > @@ -6615,22 +6670,83 @@ check_scan_store (stmt_vec_info stmt_inf > stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt); > if (load1_stmt_info == NULL > || load2_stmt_info == NULL > - || STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3 > - || STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3) > + || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) > + != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)) > + || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) > + != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))) > goto fail; > > - if (scan_operand_equal_p (gimple_assign_lhs (stmt), > + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store) > + { > + dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info); > + if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR > + || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0))) > + goto fail; > + tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0); > + tree lrhs; > + if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) > + lrhs = rhs1; > + else > + lrhs = rhs2; > + use_operand_p use_p; > + imm_use_iterator iter; > + FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs) > + { > + gimple *use_stmt = USE_STMT (use_p); > + if (use_stmt == def_stmt || is_gimple_debug (use_stmt)) > + continue; > + if (other_store_stmt) > + goto fail; > + other_store_stmt = use_stmt; > + } > + } > + > + if (other_store_stmt == NULL) > + goto fail; > + if (gimple_bb (other_store_stmt) != gimple_bb (stmt) > + || !gimple_store_p (other_store_stmt)) > + goto fail; > + > + stmt_vec_info other_store_stmt_info > + = loop_vinfo->lookup_stmt (other_store_stmt); > + if (other_store_stmt_info == NULL > + || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) > + != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))) > + goto fail; > + > + gimple *stmt1 = stmt; > + gimple *stmt2 = other_store_stmt; > + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) > + std::swap (stmt1, stmt2); > + if (scan_operand_equal_p (gimple_assign_lhs (stmt1), > gimple_assign_rhs1 (load2_stmt))) > { > std::swap (rhs1, rhs2); > std::swap (load1_stmt, load2_stmt); > std::swap (load1_stmt_info, load2_stmt_info); > } > - if (!scan_operand_equal_p (gimple_assign_lhs (stmt), > - gimple_assign_rhs1 (load1_stmt)) > - || !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt), > + if (!scan_operand_equal_p (gimple_assign_lhs (stmt1), > + gimple_assign_rhs1 (load1_stmt))) > + goto fail; > + > + tree var3 = NULL_TREE; > + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3 > + && !scan_operand_equal_p (gimple_assign_lhs (stmt2), > gimple_assign_rhs1 (load2_stmt))) > goto fail; > + else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) > + { > + dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info); > + if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR > + || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0))) > + goto fail; > + var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0); > + if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3)) > + || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3)) > + || lookup_attribute ("omp simd inscan exclusive", > + DECL_ATTRIBUTES (var3))) > + goto fail; > + } > > dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info); > if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR > @@ -6648,6 +6764,14 @@ check_scan_store (stmt_vec_info stmt_inf > if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) > std::swap (var1, var2); > > + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) > + { > + if (!lookup_attribute ("omp simd inscan exclusive", > + DECL_ATTRIBUTES (var1))) > + goto fail; > + var1 = var3; > + } > + > if (loop_vinfo->scan_map == NULL) > goto fail; > tree *init = loop_vinfo->scan_map->get (var1); > @@ -6655,6 +6779,7 @@ check_scan_store (stmt_vec_info stmt_inf > goto fail; > > /* The IL is as expected, now check if we can actually vectorize it. > + Inclusive scan: > _26 = D.2043[_25]; > _27 = D.2042[_25]; > _28 = _26 + _27; > @@ -6664,21 +6789,49 @@ check_scan_store (stmt_vec_info stmt_inf > from the D.2042[_21] = 0; store): > _30 = MEM <vector(8) int> [(int *)&D.2043]; > _31 = MEM <vector(8) int> [(int *)&D.2042]; > - _32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>; > + _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>; > _33 = _31 + _32; > // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] }; > - _34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>; > + _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>; > _35 = _33 + _34; > // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], > // _31[1]+.._31[4], ... _31[4]+.._31[7] }; > - _36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>; > + _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>; > _37 = _35 + _36; > // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], > // _31[0]+.._31[4], ... _31[0]+.._31[7] }; > _38 = _30 + _37; > _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>; > MEM <vector(8) int> [(int *)&D.2043] = _39; > - MEM <vector(8) int> [(int *)&D.2042] = _38; */ > + MEM <vector(8) int> [(int *)&D.2042] = _38; > + Exclusive scan: > + _26 = D.2043[_25]; > + D.2044[_25] = _26; > + _27 = D.2042[_25]; > + _28 = _26 + _27; > + D.2043[_25] = _28; > + should be vectorized as (where _40 is the vectorized rhs > + from the D.2042[_21] = 0; store): > + _30 = MEM <vector(8) int> [(int *)&D.2043]; > + _31 = MEM <vector(8) int> [(int *)&D.2042]; > + _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>; > + _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>; > + _34 = _32 + _33; > + // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3], > + // _31[3]+_31[4], ... _31[5]+.._31[6] }; > + _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>; > + _36 = _34 + _35; > + // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], > + // _31[1]+.._31[4], ... _31[3]+.._31[6] }; > + _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>; > + _38 = _36 + _37; > + // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], > + // _31[0]+.._31[4], ... _31[0]+.._31[6] }; > + _39 = _30 + _38; > + _50 = _31 + _39; > + _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>; > + MEM <vector(8) int> [(int *)&D.2044] = _39; > + MEM <vector(8) int> [(int *)&D.2042] = _51; */ > enum machine_mode vec_mode = TYPE_MODE (vectype); > optab optab = optab_for_tree_code (code, vectype, optab_default); > if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing) > @@ -6715,6 +6868,24 @@ vectorizable_scan_store (stmt_vec_info s > tree rhs = gimple_assign_rhs1 (stmt); > gcc_assert (TREE_CODE (rhs) == SSA_NAME); > > + tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); > + bool inscan_var_store > + = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; > + > + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) > + { > + use_operand_p use_p; > + imm_use_iterator iter; > + FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) > + { > + gimple *use_stmt = USE_STMT (use_p); > + if (use_stmt == stmt || is_gimple_debug (use_stmt)) > + continue; > + rhs = gimple_assign_lhs (use_stmt); > + break; > + } > + } > + > gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); > enum tree_code code = gimple_assign_rhs_code (def_stmt); > if (code == POINTER_PLUS_EXPR) > @@ -6737,15 +6908,12 @@ vectorizable_scan_store (stmt_vec_info s > { > std::swap (rhs1, rhs2); > std::swap (var1, var2); > + std::swap (load1_dr_info, load2_dr_info); > } > > tree *init = loop_vinfo->scan_map->get (var1); > gcc_assert (init); > > - tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); > - bool inscan_var_store > - = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; > - > unsigned HOST_WIDE_INT nunits; > if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) > gcc_unreachable (); > @@ -6789,29 +6957,50 @@ vectorizable_scan_store (stmt_vec_info s > tree vec_oprnd1 = NULL_TREE; > tree vec_oprnd2 = NULL_TREE; > tree vec_oprnd3 = NULL_TREE; > - tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr)); > + tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr); > tree dataref_offset = build_int_cst (ref_type, 0); > tree bump = vect_get_data_ptr_increment (dr_info, vectype, > VMAT_CONTIGUOUS); > + tree ldataref_ptr = NULL_TREE; > tree orig = NULL_TREE; > + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) > + ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr); > for (int j = 0; j < ncopies; j++) > { > stmt_vec_info new_stmt_info; > if (j == 0) > { > vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info); > - vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info); > + if (ldataref_ptr == NULL) > + vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info); > vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info); > orig = vec_oprnd3; > } > else > { > vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1); > - vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2); > + if (ldataref_ptr == NULL) > + vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2); > vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3); > if (!inscan_var_store) > dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, > bump); > } > > + if (ldataref_ptr) > + { > + vec_oprnd2 = make_ssa_name (vectype); > + tree data_ref = fold_build2 (MEM_REF, vectype, > + unshare_expr (ldataref_ptr), > + dataref_offset); > + vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr)); > + gimple *g = gimple_build_assign (vec_oprnd2, data_ref); > + new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); > + if (prev_stmt_info == NULL) > + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; > + else > + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; > + prev_stmt_info = new_stmt_info; > + } > + > tree v = vec_oprnd2; > for (int i = 0; i < units_log2; ++i) > { > @@ -6848,6 +7037,17 @@ vectorizable_scan_store (stmt_vec_info s > new_temp = new_temp2; > } > > + /* For exclusive scan, perform the perms[i] permutation once > + more. */ > + if (i == 0 > + && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 > + && v == vec_oprnd2) > + { > + v = new_temp; > + --i; > + continue; > + } > + > tree new_temp2 = make_ssa_name (vectype); > g = gimple_build_assign (new_temp2, code, v, new_temp); > new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); > @@ -6863,16 +7063,30 @@ vectorizable_scan_store (stmt_vec_info s > STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; > prev_stmt_info = new_stmt_info; > > + tree last_perm_arg = new_temp; > + /* For exclusive scan, new_temp computed above is the exclusive scan > + prefix sum. Turn it into inclusive prefix sum for the broadcast > + of the last element into orig. */ > + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) > + { > + last_perm_arg = make_ssa_name (vectype); > + g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2); > + new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); > + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; > + prev_stmt_info = new_stmt_info; > + } > + > orig = make_ssa_name (vectype); > - g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp, > - perms[units_log2]); > + g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg, > + last_perm_arg, perms[units_log2]); > new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); > STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; > prev_stmt_info = new_stmt_info; > > if (!inscan_var_store) > { > - tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, > + tree data_ref = fold_build2 (MEM_REF, vectype, > + unshare_expr (dataref_ptr), > dataref_offset); > vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); > g = gimple_build_assign (data_ref, new_temp); > @@ -6888,7 +7102,8 @@ vectorizable_scan_store (stmt_vec_info s > if (j != 0) > dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); > > - tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, > + tree data_ref = fold_build2 (MEM_REF, vectype, > + unshare_expr (dataref_ptr), > dataref_offset); > vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); > gimple *g = gimple_build_assign (data_ref, orig); > @@ -7325,7 +7540,7 @@ vectorizable_store (stmt_vec_info stmt_i > } > return true; > } > - else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3) > + else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3) > return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies); > > if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) > --- gcc/testsuite/gcc.dg/vect/vect-simd-12.c.jj 2019-06-20 15:08:50.260400440 > +0200 > +++ gcc/testsuite/gcc.dg/vect/vect-simd-12.c 2019-06-20 15:08:24.332805239 > +0200 > @@ -0,0 +1,122 @@ > +/* { dg-require-effective-target size32plus } */ > +/* { dg-additional-options "-fopenmp-simd" } */ > +/* { dg-additional-options "-mavx" { target avx_runtime } } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > target i?86-*-* x86_64-*-* } } } */ > + > +#ifndef main > +#include "tree-vect.h" > +#endif > + > +int r, a[1024], b[1024]; > + > +__attribute__((noipa)) void > +foo (int *a, int *b) > +{ > + #pragma omp simd reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +bar (void) > +{ > + int s = 0; > + #pragma omp simd reduction (inscan, +:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b) > +{ > + #pragma omp simd reduction (inscan, +:r) if (simd: 0) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +qux (void) > +{ > + int s = 0; > + #pragma omp simd reduction (inscan, +:s) simdlen (1) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > +#ifndef main > + check_vect (); > +#endif > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + foo (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + s += i; > + } > + if (bar () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + s += 2 * i; > + } > + r = 0; > + baz (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + s += i; > + } > + if (qux () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + s += 2 * i; > + } > + return 0; > +} > --- gcc/testsuite/gcc.dg/vect/vect-simd-13.c.jj 2019-06-20 15:47:23.580359715 > +0200 > +++ gcc/testsuite/gcc.dg/vect/vect-simd-13.c 2019-06-20 15:13:23.500134387 > +0200 > @@ -0,0 +1,124 @@ > +/* { dg-require-effective-target size32plus } */ > +/* { dg-additional-options "-fopenmp-simd" } */ > +/* { dg-additional-options "-mavx" { target avx_runtime } } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > target i?86-*-* x86_64-*-* } } } */ > + > +#ifndef main > +#include "tree-vect.h" > +#endif > + > +int r, a[1024], b[1024]; > + > +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer > (omp_priv = 0) > + > +__attribute__((noipa)) void > +foo (int *a, int *b) > +{ > + #pragma omp simd reduction (inscan, foo:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +bar (void) > +{ > + int s = 0; > + #pragma omp simd reduction (inscan, foo:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b) > +{ > + #pragma omp simd reduction (inscan, foo:r) if (simd: 0) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +qux (void) > +{ > + int s = 0; > + #pragma omp simd reduction (inscan, foo:s) simdlen (1) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > +#ifndef main > + check_vect (); > +#endif > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + foo (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + s += i; > + } > + if (bar () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + s += 2 * i; > + } > + r = 0; > + baz (a, b); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + s += i; > + } > + if (qux () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + s += 2 * i; > + } > + return 0; > +} > --- gcc/testsuite/gcc.dg/vect/vect-simd-14.c.jj 2019-06-20 15:48:30.536321539 > +0200 > +++ gcc/testsuite/gcc.dg/vect/vect-simd-14.c 2019-06-20 15:54:39.291617792 > +0200 > @@ -0,0 +1,94 @@ > +/* { dg-require-effective-target size32plus } */ > +/* { dg-additional-options "-fopenmp-simd" } */ > +/* { dg-additional-options "-mavx" { target avx_runtime } } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > target i?86-*-* x86_64-*-* } } } */ > + > +#ifndef main > +#include "tree-vect.h" > +#endif > + > +float r = 1.0f, a[1024], b[1024]; > + > +__attribute__((noipa)) void > +foo (float *a, float *b) > +{ > + #pragma omp simd reduction (inscan, *:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r *= a[i]; > + } > +} > + > +__attribute__((noipa)) float > +bar (void) > +{ > + float s = -__builtin_inff (); > + #pragma omp simd reduction (inscan, max:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s = s > a[i] ? s : a[i]; > + } > + return s; > +} > + > +int > +main () > +{ > + float s = 1.0f; > +#ifndef main > + check_vect (); > +#endif > + for (int i = 0; i < 1024; ++i) > + { > + if (i < 80) > + a[i] = (i & 1) ? 0.25f : 0.5f; > + else if (i < 200) > + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; > + else if (i < 280) > + a[i] = (i & 1) ? 0.25f : 0.5f; > + else if (i < 380) > + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; > + else > + switch (i % 6) > + { > + case 0: a[i] = 0.25f; break; > + case 1: a[i] = 2.0f; break; > + case 2: a[i] = -1.0f; break; > + case 3: a[i] = -4.0f; break; > + case 4: a[i] = 0.5f; break; > + case 5: a[i] = 1.0f; break; > + default: a[i] = 0.0f; break; > + } > + b[i] = -19.0f; > + asm ("" : "+g" (i)); > + } > + foo (a, b); > + if (r * 16384.0f != 0.125f) > + abort (); > + float m = -175.25f; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -231.75f; > + s *= a[i]; > + a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f); > + m += 0.75f; > + } > + if (bar () != 592.0f) > + abort (); > + s = -__builtin_inff (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + if (s < a[i]) > + s = a[i]; > + } > + return 0; > +}
Hi, I've noticed that this new test (gcc.dg/vect/vect-simd-14.c) fails at execution time on arm targets. It does pass on aarch64. Christophe > --- gcc/testsuite/gcc.dg/vect/vect-simd-15.c.jj 2019-06-20 15:50:34.483399705 > +0200 > +++ gcc/testsuite/gcc.dg/vect/vect-simd-15.c 2019-06-20 15:52:09.976919050 > +0200 > @@ -0,0 +1,186 @@ > +/* { dg-require-effective-target size32plus } */ > +/* { dg-additional-options "-fopenmp-simd" } */ > +/* { dg-additional-options "-mavx" { target avx_runtime } } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > target i?86-*-* x86_64-*-* } } } */ > + > +#ifndef main > +#include "tree-vect.h" > +#endif > + > +int r, a[1024], b[1024]; > +unsigned short r2, b2[1024]; > +unsigned char r3, b3[1024]; > + > +__attribute__((noipa)) void > +foo (int *a, int *b, unsigned short *b2, unsigned char *b3) > +{ > + #pragma omp simd reduction (inscan, +:r, r2, r3) > + for (int i = 0; i < 1024; i++) > + { > + { > + b[i] = r; > + b2[i] = r2; > + b3[i] = r3; > + } > + #pragma omp scan exclusive(r, r2, r3) > + { r += a[i]; r2 += a[i]; r3 += a[i]; } > + } > +} > + > +__attribute__((noipa)) int > +bar (unsigned short *s2p, unsigned char *s3p) > +{ > + int s = 0; > + unsigned short s2 = 0; > + unsigned char s3 = 0; > + #pragma omp simd reduction (inscan, +:s, s2, s3) > + for (int i = 0; i < 1024; i++) > + { > + { b[i] = s; b2[i] = s2; b3[i] = s3; } > + #pragma omp scan exclusive(s, s2, s3) > + { > + s += 2 * a[i]; > + s2 += 2 * a[i]; > + s3 += 2 * a[i]; > + } > + } > + *s2p = s2; > + *s3p = s3; > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b, unsigned short *b2, unsigned char *b3) > +{ > + #pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0) > + for (int i = 0; i < 1024; i++) > + { > + { > + b[i] = r; > + b2[i] = r2; > + b3[i] = r3; > + } > + #pragma omp scan exclusive(r, r2, r3) > + { > + r += a[i]; > + r2 += a[i]; > + r3 += a[i]; > + } > + } > +} > + > +__attribute__((noipa)) int > +qux (unsigned short *s2p, unsigned char *s3p) > +{ > + int s = 0; > + unsigned short s2 = 0; > + unsigned char s3 = 0; > + #pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1) > + for (int i = 0; i < 1024; i++) > + { > + { b[i] = s; b2[i] = s2; b3[i] = s3; } > + #pragma omp scan exclusive(s, s2, s3) > + { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; } > + } > + *s2p = s2; > + *s3p = s3; > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + unsigned short s2; > + unsigned char s3; > +#ifndef main > + check_vect (); > +#endif > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + b2[i] = -1; > + b3[i] = -1; > + asm ("" : "+g" (i)); > + } > + foo (a, b, b2, b3); > + if (r != 1024 * 1023 / 2 > + || r2 != (unsigned short) r > + || r3 != (unsigned char) r) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + else > + { > + b[i] = 25; > + b2[i] = 24; > + b3[i] = 26; > + } > + s += i; > + } > + if (bar (&s2, &s3) != 1024 * 1023) > + abort (); > + if (s2 != (unsigned short) (1024 * 1023) > + || s3 != (unsigned char) (1024 * 1023)) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + else > + { > + b[i] = -1; > + b2[i] = -1; > + b3[i] = -1; > + } > + s += 2 * i; > + } > + r = 0; > + r2 = 0; > + r3 = 0; > + baz (a, b, b2, b3); > + if (r != 1024 * 1023 / 2 > + || r2 != (unsigned short) r > + || r3 != (unsigned char) r) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + else > + { > + b[i] = 25; > + b2[i] = 24; > + b3[i] = 26; > + } > + s += i; > + } > + s2 = 0; > + s3 = 0; > + if (qux (&s2, &s3) != 1024 * 1023) > + abort (); > + if (s2 != (unsigned short) (1024 * 1023) > + || s3 != (unsigned char) (1024 * 1023)) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s > + || b2[i] != (unsigned short) s > + || b3[i] != (unsigned char) s) > + abort (); > + s += 2 * i; > + } > + return 0; > +} > --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c.jj 2019-06-20 > 15:58:35.276983324 +0200 > +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c 2019-06-20 > 15:58:35.274983355 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 > -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target sse2 } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } > */ > + > +#include "sse2-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-12.c" > + > +static void > +sse2_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c.jj 2019-06-20 > 15:58:35.283983216 +0200 > +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c 2019-06-20 > 15:58:35.281983247 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 > -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target sse2 } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } > */ > + > +#include "sse2-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-13.c" > + > +static void > +sse2_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c.jj 2019-06-20 > 15:58:35.288983139 +0200 > +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c 2019-06-20 > 15:58:35.287983154 +0200 > @@ -0,0 +1,15 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 > -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target sse2 } */ > + > +#include "sse2-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-14.c" > + > +static void > +sse2_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c.jj 2019-06-20 > 15:58:35.293983061 +0200 > +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c 2019-06-20 > 15:58:35.292983077 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 > -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target sse2 } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } > */ > + > +#include "sse2-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-15.c" > + > +static void > +sse2_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c.jj 2019-06-20 > 15:58:35.299982969 +0200 > +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c 2019-06-20 > 15:58:35.297982999 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target avx2 } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } > */ > + > +#include "avx2-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-12.c" > + > +static void > +avx2_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c.jj 2019-06-20 > 15:58:35.305982876 +0200 > +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c 2019-06-20 > 15:58:35.303982907 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target avx2 } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } > */ > + > +#include "avx2-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-13.c" > + > +static void > +avx2_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c.jj 2019-06-20 > 15:58:35.310982799 +0200 > +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c 2019-06-20 > 15:58:35.309982815 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target avx2 } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } > */ > + > +#include "avx2-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-14.c" > + > +static void > +avx2_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c.jj 2019-06-20 > 15:58:35.316982707 +0200 > +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c 2019-06-20 > 15:58:35.314982738 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target avx2 } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } > */ > + > +#include "avx2-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-15.c" > + > +static void > +avx2_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c.jj 2019-06-20 > 15:58:35.323982599 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c 2019-06-20 > 15:58:35.321982630 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 > -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target avx512f } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } > */ > + > +#include "avx512f-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-12.c" > + > +static void > +avx512f_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c.jj 2019-06-20 > 15:58:35.328982522 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c 2019-06-20 > 15:58:35.326982553 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 > -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target avx512f } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } > */ > + > +#include "avx512f-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-13.c" > + > +static void > +avx512f_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c.jj 2019-06-20 > 15:58:35.333982445 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c 2019-06-20 > 15:58:35.332982461 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 > -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target avx512f } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } > */ > + > +#include "avx512f-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-14.c" > + > +static void > +avx512f_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c.jj 2019-06-20 > 15:58:35.347982230 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c 2019-06-20 > 15:58:35.346982245 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fopenmp-simd -mavx512bw -mprefer-vector-width=512 > -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target avx512bw } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } > */ > + > +#include "avx512bw-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/vect/vect-simd-15.c" > + > +static void > +avx512bw_test (void) > +{ > + do_main (); > +} > --- gcc/testsuite/g++.dg/vect/simd-6.cc.jj 2019-06-20 16:00:34.800142524 > +0200 > +++ gcc/testsuite/g++.dg/vect/simd-6.cc 2019-06-20 16:07:41.722559826 +0200 > @@ -0,0 +1,161 @@ > +// { dg-require-effective-target size32plus } > +// { dg-additional-options "-fopenmp-simd" } > +// { dg-additional-options "-mavx" { target avx_runtime } } > +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > xfail *-*-* } } } > + > +#include "../../gcc.dg/vect/tree-vect.h" > + > +template <typename T> > +struct S { > + inline S (); > + inline ~S (); > + inline S (const S &); > + inline S & operator= (const S &); > + T s; > +}; > + > +template <typename T> > +S<T>::S () : s (0) > +{ > +} > + > +template <typename T> > +S<T>::~S () > +{ > +} > + > +template <typename T> > +S<T>::S (const S &x) > +{ > + s = x.s; > +} > + > +template <typename T> > +S<T> & > +S<T>::operator= (const S &x) > +{ > + s = x.s; > + return *this; > +} > + > +template <typename T> > +static inline void > +ini (S<T> &x) > +{ > + x.s = 0; > +} > + > +S<int> r, a[1024], b[1024]; > + > +#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s) > +#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) > initializer (ini (omp_priv)) > + > +template <typename T> > +__attribute__((noipa)) void > +foo (S<T> *a, S<T> *b) > +{ > + #pragma omp simd reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r.s += a[i].s; > + } > +} > + > +template <typename T> > +__attribute__((noipa)) S<T> > +bar (void) > +{ > + S<T> s; > + #pragma omp simd reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s.s += 2 * a[i].s; > + } > + return S<T> (s); > +} > + > +__attribute__((noipa)) void > +baz (S<int> *a, S<int> *b) > +{ > + #pragma omp simd reduction (inscan, +:r) simdlen(1) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r.s += a[i].s; > + } > +} > + > +__attribute__((noipa)) S<int> > +qux (void) > +{ > + S<int> s; > + #pragma omp simd if (0) reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s.s += 2 * a[i].s; > + } > + return S<int> (s); > +} > + > +int > +main () > +{ > + S<int> s; > + check_vect (); > + for (int i = 0; i < 1024; ++i) > + { > + a[i].s = i; > + b[i].s = -1; > + asm ("" : "+g" (i)); > + } > + foo (a, b); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + s.s += i; > + } > + if (bar<int> ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + s.s += 2 * i; > + } > + r.s = 0; > + baz (a, b); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + s.s += i; > + } > + if (qux ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + s.s += 2 * i; > + } > + return 0; > +} > --- gcc/testsuite/g++.dg/vect/simd-7.cc.jj 2019-06-20 16:00:51.095891542 > +0200 > +++ gcc/testsuite/g++.dg/vect/simd-7.cc 2019-06-20 16:12:50.222747875 +0200 > @@ -0,0 +1,124 @@ > +// { dg-require-effective-target size32plus } > +// { dg-additional-options "-fopenmp-simd" } > +// { dg-additional-options "-mavx" { target avx_runtime } } > +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > target i?86-*-* x86_64-*-* } } } */ > + > +#include "../../gcc.dg/vect/tree-vect.h" > + > +int r, a[1024], b[1024], q; > + > +template <typename T, typename U> > +__attribute__((noipa)) void > +foo (T a, T b, U r) > +{ > + #pragma omp simd reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +template <typename T> > +__attribute__((noipa)) T > +bar (void) > +{ > + T &s = q; > + q = 0; > + #pragma omp simd reduction (inscan, +:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +template <typename T> > +__attribute__((noipa)) void > +baz (T *a, T *b, T &r) > +{ > + #pragma omp simd reduction (inscan, +:r) if (simd: 0) > + for (T i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +template <typename T> > +__attribute__((noipa)) int > +qux (void) > +{ > + T s = q; > + q = 0; > + #pragma omp simd reduction (inscan, +:s) simdlen (1) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + check_vect (); > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + foo<int *, int &> (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + s += i; > + } > + if (bar<int> () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + s += 2 * i; > + } > + r = 0; > + baz<int> (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + s += i; > + } > + if (qux<int &> () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + s += 2 * i; > + } > + return 0; > +} > --- gcc/testsuite/g++.dg/vect/simd-8.cc.jj 2019-06-20 16:00:54.154844430 > +0200 > +++ gcc/testsuite/g++.dg/vect/simd-8.cc 2019-06-20 16:15:37.994133891 +0200 > @@ -0,0 +1,122 @@ > +// { dg-require-effective-target size32plus } > +// { dg-additional-options "-fopenmp-simd" } > +// { dg-additional-options "-mavx" { target avx_runtime } } > +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > target i?86-*-* x86_64-*-* } } } > + > +#include "../../gcc.dg/vect/tree-vect.h" > + > +int r, a[1024], b[1024], q; > + > +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer > (omp_priv = 0) > + > +__attribute__((noipa)) void > +foo (int *a, int *b, int &r) > +{ > + #pragma omp simd reduction (inscan, foo:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +bar (void) > +{ > + int &s = q; > + q = 0; > + #pragma omp simd reduction (inscan, foo:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (int *a, int *b, int &r) > +{ > + #pragma omp simd reduction (inscan, foo:r) if (simd: 0) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r += a[i]; > + } > +} > + > +__attribute__((noipa)) int > +qux (void) > +{ > + int &s = q; > + q = 0; > + #pragma omp simd reduction (inscan, foo:s) simdlen (1) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s += 2 * a[i]; > + } > + return s; > +} > + > +int > +main () > +{ > + int s = 0; > + check_vect (); > + for (int i = 0; i < 1024; ++i) > + { > + a[i] = i; > + b[i] = -1; > + asm ("" : "+g" (i)); > + } > + foo (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = 25; > + s += i; > + } > + if (bar () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -1; > + s += 2 * i; > + } > + r = 0; > + baz (a, b, r); > + if (r != 1024 * 1023 / 2) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + else > + b[i] = -25; > + s += i; > + } > + if (qux () != 1024 * 1023) > + abort (); > + s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i] != s) > + abort (); > + s += 2 * i; > + } > + return 0; > +} > --- gcc/testsuite/g++.dg/vect/simd-9.cc.jj 2019-06-20 16:00:57.197797566 > +0200 > +++ gcc/testsuite/g++.dg/vect/simd-9.cc 2019-06-20 16:17:27.484427949 +0200 > @@ -0,0 +1,153 @@ > +// { dg-require-effective-target size32plus } > +// { dg-additional-options "-fopenmp-simd" } > +// { dg-additional-options "-mavx" { target avx_runtime } } > +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > xfail *-*-* } } } > + > +#include "../../gcc.dg/vect/tree-vect.h" > + > +struct S { > + inline S (); > + inline ~S (); > + inline S (const S &); > + inline S & operator= (const S &); > + int s; > +}; > + > +S::S () : s (0) > +{ > +} > + > +S::~S () > +{ > +} > + > +S::S (const S &x) > +{ > + s = x.s; > +} > + > +S & > +S::operator= (const S &x) > +{ > + s = x.s; > + return *this; > +} > + > +static inline void > +ini (S &x) > +{ > + x.s = 0; > +} > + > +S r, a[1024], b[1024]; > + > +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) > +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer > (ini (omp_priv)) > + > +__attribute__((noipa)) void > +foo (S *a, S *b, S &r) > +{ > + #pragma omp simd reduction (inscan, +:r) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r.s += a[i].s; > + } > +} > + > +__attribute__((noipa)) S > +bar (void) > +{ > + S s; > + #pragma omp simd reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s.s += 2 * a[i].s; > + } > + return s; > +} > + > +__attribute__((noipa)) void > +baz (S *a, S *b, S &r) > +{ > + #pragma omp simd reduction (inscan, +:r) simdlen(1) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = r; > + #pragma omp scan exclusive(r) > + r.s += a[i].s; > + } > +} > + > +__attribute__((noipa)) S > +qux (void) > +{ > + S s; > + #pragma omp simd if (0) reduction (inscan, plus:s) > + for (int i = 0; i < 1024; i++) > + { > + b[i] = s; > + #pragma omp scan exclusive(s) > + s.s += 2 * a[i].s; > + } > + return s; > +} > + > +int > +main () > +{ > + S s; > + check_vect (); > + for (int i = 0; i < 1024; ++i) > + { > + a[i].s = i; > + b[i].s = -1; > + asm ("" : "+g" (i)); > + } > + foo (a, b, r); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + s.s += i; > + } > + if (bar ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + s.s += 2 * i; > + } > + r.s = 0; > + baz (a, b, r); > + if (r.s != 1024 * 1023 / 2) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + else > + b[i].s = 25; > + s.s += i; > + } > + if (qux ().s != 1024 * 1023) > + abort (); > + s.s = 0; > + for (int i = 0; i < 1024; ++i) > + { > + if (b[i].s != s.s) > + abort (); > + s.s += 2 * i; > + } > + return 0; > +} > --- gcc/testsuite/c-c++-common/gomp/scan-2.c.jj 2019-06-10 14:18:17.461525669 > +0200 > +++ gcc/testsuite/c-c++-common/gomp/scan-2.c 2019-06-20 23:54:03.615422149 > +0200 > @@ -8,7 +8,7 @@ f1 (int *c, int *d) > for (i = 0; i < 64; i++) > { > d[i] = a; > - #pragma omp scan exclusive (a) /* { dg-message "sorry, > unimplemented: '#pragma omp scan' not supported yet" } */ > + #pragma omp scan exclusive (a) > a += c[i]; > } > } > > Jakub