On Tue, 7 Nov 2023, Robin Dapp wrote: > Hi, > > this restricts tree-ifcvt to only create COND_OPs when we versioned the > loop for vectorization. Apart from that it re-creates a VEC_COND_EXPR > in vect_expand_fold_left if we emitted a COND_OP. > > I'm still missing the "bail out" part for vect_expand_fold_left, though? > > Bootstrap, testsuites are unchanged but so they were for the original > patch, so... :)
Looks good, maybe ... > Regards > Robin > > gcc/ChangeLog: > > PR tree-optimization/112361 > PR target/112359 > PR middle-end/112406 > > * tree-if-conv.cc (convert_scalar_cond_reduction): Remember if > loop was versioned and only then create COND_OPs. > (predicate_scalar_phi): Do not create COND_OP when not > vectorizing. > * tree-vect-loop.cc (vect_expand_fold_left): Re-create > VEC_COND_EXPR. > (vectorize_fold_left_reduction): Pass mask and whether a cond_op > was used to vect_expand_fold_left. > > gcc/testsuite/ChangeLog: > > * gcc.dg/pr112359.c: New test. > --- > gcc/testsuite/gcc.dg/pr112359.c | 14 +++++++++++ > gcc/tree-if-conv.cc | 41 ++++++++++++++++++++++----------- > gcc/tree-vect-loop.cc | 22 ++++++++++++++++-- > 3 files changed, 62 insertions(+), 15 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/pr112359.c > > diff --git a/gcc/testsuite/gcc.dg/pr112359.c b/gcc/testsuite/gcc.dg/pr112359.c > new file mode 100644 > index 00000000000..26c77457399 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr112359.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -mavx512fp16 -ftree-loop-if-convert" } */ > + > +int i, c; > +unsigned long long u; > + > +void > +foo (void) > +{ > + for (; i; i++) > + if (c) > + u |= i; > +} > + > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc > index a7a751b668c..0190cf2369e 100644 > --- a/gcc/tree-if-conv.cc > +++ b/gcc/tree-if-conv.cc > @@ -1845,12 +1845,15 @@ is_cond_scalar_reduction (gimple *phi, gimple > **reduc, tree arg_0, tree arg_1, > res_2 = res_13 + _ifc__1; > Argument SWAP tells that arguments of conditional expression should be > swapped. > + If LOOP_VERSIONED is true if we assume that we versioned the loop for > + vectorization. In that case we can create a COND_OP. > Returns rhs of resulting PHI assignment. */ > > static tree > convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi, > tree cond, tree op0, tree op1, bool swap, > - bool has_nop, gimple* nop_reduc) > + bool has_nop, gimple* nop_reduc, > + bool loop_versioned) > { > gimple_stmt_iterator stmt_it; > gimple *new_assign; > @@ -1874,7 +1877,7 @@ convert_scalar_cond_reduction (gimple *reduc, > gimple_stmt_iterator *gsi, > The COND_OP will have a neutral_op else value. */ > internal_fn ifn; > ifn = get_conditional_internal_fn (reduction_op); > - if (ifn != IFN_LAST > + if (loop_versioned && ifn != IFN_LAST > && vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs)) > && !swap) > { > @@ -2129,11 +2132,13 @@ cmp_arg_entry (const void *p1, const void *p2, void * > /* data. */) > The generated code is inserted at GSI that points to the top of > basic block's statement list. > If PHI node has more than two arguments a chain of conditional > - expression is produced. */ > + expression is produced. > + LOOP_VERSIONED should be true if we know that the loop was versioned for > + vectorization. */ > > > static void > -predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi) > +predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool > loop_versioned) > { > gimple *new_stmt = NULL, *reduc, *nop_reduc; > tree rhs, res, arg0, arg1, op0, op1, scev; > @@ -2213,7 +2218,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator > *gsi) > /* Convert reduction stmt into vectorizable form. */ > rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, > true_bb != gimple_bb (reduc), > - has_nop, nop_reduc); > + has_nop, nop_reduc, > + loop_versioned); > redundant_ssa_names.safe_push (std::make_pair (res, rhs)); > } > else > @@ -2311,7 +2317,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator > *gsi) > { > /* Convert reduction stmt into vectorizable form. */ > rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, > - swap, has_nop, nop_reduc); > + swap, has_nop, nop_reduc, > + loop_versioned); > redundant_ssa_names.safe_push (std::make_pair (res, rhs)); > } > new_stmt = gimple_build_assign (res, rhs); > @@ -2334,10 +2341,12 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator > *gsi) > } > > /* Replaces in LOOP all the scalar phi nodes other than those in the > - LOOP->header block with conditional modify expressions. */ > + LOOP->header block with conditional modify expressions. > + LOOP_VERSIONED should be true if we know that the loop was versioned for > + vectorization. */ > > static void > -predicate_all_scalar_phis (class loop *loop) > +predicate_all_scalar_phis (class loop *loop, bool loop_versioned) > { > basic_block bb; > unsigned int orig_loop_num_nodes = loop->num_nodes; > @@ -2365,7 +2374,7 @@ predicate_all_scalar_phis (class loop *loop) > gsi_next (&phi_gsi); > else > { > - predicate_scalar_phi (phi, &gsi); > + predicate_scalar_phi (phi, &gsi, loop_versioned); > remove_phi_node (&phi_gsi, false); > } > } > @@ -2887,10 +2896,12 @@ remove_conditions_and_labels (loop_p loop) > } > > /* Combine all the basic blocks from LOOP into one or two super basic > - blocks. Replace PHI nodes with conditional modify expressions. */ > + blocks. Replace PHI nodes with conditional modify expressions. > + LOOP_VERSIONED should be true if we know that the loop was versioned for > + vectorization. */ > > static void > -combine_blocks (class loop *loop) > +combine_blocks (class loop *loop, bool loop_versioned) > { > basic_block bb, exit_bb, merge_target_bb; > unsigned int orig_loop_num_nodes = loop->num_nodes; > @@ -2900,7 +2911,7 @@ combine_blocks (class loop *loop) > > remove_conditions_and_labels (loop); > insert_gimplified_predicates (loop); > - predicate_all_scalar_phis (loop); > + predicate_all_scalar_phis (loop, loop_versioned); > > if (need_to_predicate || need_to_rewrite_undefined) > predicate_statements (loop); > @@ -3727,6 +3738,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> > *preds) > bitmap exit_bbs; > edge pe; > auto_vec<data_reference_p, 10> refs; > + bool loop_versioned; > > again: > rloop = NULL; > @@ -3736,6 +3748,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> > *preds) > need_to_predicate = false; > need_to_rewrite_undefined = false; > any_complicated_phi = false; > + loop_versioned = false; > > /* Apply more aggressive if-conversion when loop or its outer loop were > marked with simd pragma. When that's the case, we try to if-convert > @@ -3843,6 +3856,8 @@ tree_if_conversion (class loop *loop, vec<gimple *> > *preds) > will re-use that for things like runtime alias versioning > whose condition can end up using those invariants. */ > pe = single_pred_edge (gimple_bb (preds->last ())); > + > + loop_versioned = true; > } > > if (need_to_lower_bitfields) > @@ -3875,7 +3890,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> > *preds) > /* Now all statements are if-convertible. Combine all the basic > blocks into one huge basic block doing the if-conversion > on-the-fly. */ > - combine_blocks (loop); > + combine_blocks (loop, loop_versioned); > } > > /* Perform local CSE, this esp. helps the vectorizer analysis if loads > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index 362856a6507..d48685ae308 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -6907,7 +6907,8 @@ merge_with_identity (gimple_stmt_iterator *gsi, tree > mask, tree vectype, > > static tree > vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest, > - tree_code code, tree lhs, tree vector_rhs) > + tree_code code, tree lhs, tree vector_rhs, > + bool is_cond_op, tree mask) isn't is_cond_op implied by mask != NULL? That said, if we ever end up here with a non-cond op but a loop mask we effectively want the same behvior so I think eliding is_cond_op and instead checking mask != NULL_TREE below is more future proof. OK with that change. I think we already have the "bail out" part for the case of applying loop masking here: else if (reduction_type == FOLD_LEFT_REDUCTION && internal_fn_mask_index (reduc_fn) == -1 && FLOAT_TYPE_P (vectype_in) && HONOR_SIGN_DEPENDENT_ROUNDING (vectype_in)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "can't operate on partial vectors because" " signed zeros cannot be preserved.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; we'd have to do a hard-fail for the case the reduction op is conditional with a similar condition (the above is too late for a hard fail I think). Thanks, Richard. > { > tree vectype = TREE_TYPE (vector_rhs); > tree scalar_type = TREE_TYPE (vectype); > @@ -6915,6 +6916,22 @@ vect_expand_fold_left (gimple_stmt_iterator *gsi, tree > scalar_dest, > unsigned HOST_WIDE_INT vec_size_in_bits = tree_to_uhwi (TYPE_SIZE > (vectype)); > unsigned HOST_WIDE_INT element_bitsize = tree_to_uhwi (bitsize); > > + /* If we created a COND_OP in ifcvt re-create a VEC_COND_EXPR to mask the > + input here in order to be able to perform an unconditional element-wise > + reduction of it. */ > + if (is_cond_op) > + { > + tree masked_vector_rhs = make_temp_ssa_name (vectype, NULL, > + "masked_vector_rhs"); > + tree neutral_op = neutral_op_for_reduction (scalar_type, code, > NULL_TREE, > + false); > + tree vector_identity = build_vector_from_val (vectype, neutral_op); > + gassign *select = gimple_build_assign (masked_vector_rhs, > VEC_COND_EXPR, > + mask, vector_rhs, vector_identity); > + gsi_insert_before (gsi, select, GSI_SAME_STMT); > + vector_rhs = masked_vector_rhs; > + } > + > for (unsigned HOST_WIDE_INT bit_offset = 0; > bit_offset < vec_size_in_bits; > bit_offset += element_bitsize) > @@ -7151,7 +7168,8 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, > else > { > reduc_var = vect_expand_fold_left (gsi, scalar_dest_var, > - tree_code (code), reduc_var, def0); > + tree_code (code), reduc_var, def0, > + is_cond_op, mask); > new_stmt = SSA_NAME_DEF_STMT (reduc_var); > /* Remove the statement, so that we can use the same code paths > as for statements that we've just created. */ > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)