On Mon, 27 Nov 2023, Tamar Christina wrote: > Ping > > > -----Original Message----- > > From: Tamar Christina <tamar.christ...@arm.com> > > Sent: Monday, November 6, 2023 7:40 AM > > To: gcc-patches@gcc.gnu.org > > Cc: nd <n...@arm.com>; rguent...@suse.de; j...@ventanamicro.com > > Subject: [PATCH 9/21]middle-end: implement vectorizable_early_exit for > > codegen of exit code > > > > Hi All, > > > > This implements vectorable_early_exit which is used as the codegen part of > > vectorizing a gcond. > > > > For the most part it shares the majority of the code with > > vectorizable_comparison with addition that it needs to be able to reduce > > multiple resulting statements into a single one for use in the gcond, and > > also > > needs to be able to perform masking on the comparisons. > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > > Ok for master? > > > > Thanks, > > Tamar > > > > gcc/ChangeLog: > > > > * tree-vect-stmts.cc (vectorizable_comparison_1): Support stmts > > without > > lhs. > > (vectorizable_early_exit): New. > > (vect_analyze_stmt, vect_transform_stmt): Use it. > > (vect_is_simple_use, vect_get_vector_types_for_stmt): Support > > gcond. > > > > --- inline copy of patch -- > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index > > 36aeca60a22cfaea8d3b43348000d75de1d525c7..4809b822632279493a84 > > 3d402a833c9267bb315e 100644 > > --- a/gcc/tree-vect-stmts.cc > > +++ b/gcc/tree-vect-stmts.cc > > @@ -12475,7 +12475,7 @@ vectorizable_comparison_1 (vec_info *vinfo, > > tree vectype, > > vec<tree> vec_oprnds0 = vNULL; > > vec<tree> vec_oprnds1 = vNULL; > > tree mask_type; > > - tree mask; > > + tree mask = NULL_TREE; > > > > if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) > > return false; > > @@ -12615,8 +12615,9 @@ vectorizable_comparison_1 (vec_info *vinfo, > > tree vectype, > > /* Transform. */ > > > > /* Handle def. */ > > - lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info)); > > - mask = vect_create_destination_var (lhs, mask_type); > > + lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info)); if (lhs) > > + mask = vect_create_destination_var (lhs, mask_type);
wrecked line-break / white-space > > > > vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, > > rhs1, &vec_oprnds0, vectype, > > @@ -12630,7 +12631,10 @@ vectorizable_comparison_1 (vec_info *vinfo, > > tree vectype, > > gimple *new_stmt; > > vec_rhs2 = vec_oprnds1[i]; > > > > - new_temp = make_ssa_name (mask); > > + if (lhs) > > + new_temp = make_ssa_name (mask); > > + else > > + new_temp = make_temp_ssa_name (mask_type, NULL, "cmp"); > > if (bitop1 == NOP_EXPR) > > { > > new_stmt = gimple_build_assign (new_temp, code, @@ -12709,6 > > +12713,196 @@ vectorizable_comparison (vec_info *vinfo, > > return true; > > } > > > > +/* Check to see if the current early break given in STMT_INFO is valid for > > + vectorization. */ > > + > > +static bool > > +vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, > > + gimple_stmt_iterator *gsi, gimple **vec_stmt, > > + slp_tree slp_node, stmt_vector_for_cost *cost_vec) { { goes to the next line > > + loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > > + if (!loop_vinfo > > + || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info))) > > + return false; > > + > > + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_early_exit_def) > > + return false; > > + > > + if (!STMT_VINFO_RELEVANT_P (stmt_info)) > > + return false; > > + > > + gimple_match_op op; > > + if (!gimple_extract_op (stmt_info->stmt, &op)) > > + gcc_unreachable (); > > + gcc_assert (op.code.is_tree_code ()); auto code = tree_code > > + (op.code); missed line break > > + > > + tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); gcc_assert > > + (vectype_out); likewise. > > + tree var_op = op.ops[0]; > > + > > + /* When vectorizing things like pointer comparisons we will assume that > > + the VF of both operands are the same. e.g. a pointer must be compared > > + to a pointer. We'll leave this up to vectorizable_comparison_1 to > > + check further. */ > > + tree vectype_op = vectype_out; > > + if (SSA_VAR_P (var_op)) TREE_CODE (var_op) == SSA_NAME > > + { > > + stmt_vec_info operand0_info > > + = loop_vinfo->lookup_stmt (SSA_NAME_DEF_STMT (var_op)); lookup_def (var_op) > > + if (!operand0_info) > > + return false; > > + > > + /* If we're in a pattern get the type of the original statement. */ > > + if (STMT_VINFO_IN_PATTERN_P (operand0_info)) > > + operand0_info = STMT_VINFO_RELATED_STMT (operand0_info); > > + vectype_op = STMT_VINFO_VECTYPE (operand0_info); > > + } I think you want to use vect_is_simple_use on var_op instead, that's the canonical way for querying operands. > > + > > + tree truth_type = truth_type_for (vectype_op); machine_mode mode = > > + TYPE_MODE (truth_type); int ncopies; > > + more line break issues ... (also below, check yourself) shouldn't STMT_VINFO_VECTYPE already match truth_type here? If not it looks to be set wrongly (or shouldn't be set at all) > > + if (slp_node) > > + ncopies = 1; > > + else > > + ncopies = vect_get_num_copies (loop_vinfo, truth_type); > > + > > + vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); bool > > + masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); > > + what about with_len? > > + /* Analyze only. */ > > + if (!vec_stmt) > > + { > > + if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing) > > + { > > + if (dump_enabled_p ()) > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > + "can't vectorize early exit because the " > > + "target doesn't support flag setting vector " > > + "comparisons.\n"); > > + return false; > > + } > > + > > + if (!expand_vec_cmp_expr_p (vectype_op, truth_type, NE_EXPR)) Why NE_EXPR? This looks wrong. Or vectype_op is wrong if you're emitting mask = op0 CMP op1; if (mask != 0) I think you need to check for CMP, not NE_EXPR. > > + { > > + if (dump_enabled_p ()) > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > + "can't vectorize early exit because the " > > + "target does not support boolean vector " > > + "comparisons for type %T.\n", truth_type); > > + return false; > > + } > > + > > + if (ncopies > 1 > > + && direct_optab_handler (ior_optab, mode) == CODE_FOR_nothing) > > + { > > + if (dump_enabled_p ()) > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > + "can't vectorize early exit because the " > > + "target does not support boolean vector OR for " > > + "type %T.\n", truth_type); > > + return false; > > + } > > + > > + if (!vectorizable_comparison_1 (vinfo, truth_type, stmt_info, code, > > gsi, > > + vec_stmt, slp_node, cost_vec)) > > + return false; I suppose vectorizable_comparison_1 will check this again, so the above is redundant? > > + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) > > + vect_record_loop_mask (loop_vinfo, masks, ncopies, truth_type, > > NULL); LENs missing (or disabling partial vectors). > > + return true; > > + } > > + > > + /* Tranform. */ > > + > > + tree new_temp = NULL_TREE; > > + gimple *new_stmt = NULL; > > + > > + if (dump_enabled_p ()) > > + dump_printf_loc (MSG_NOTE, vect_location, "transform > > + early-exit.\n"); > > + > > + if (!vectorizable_comparison_1 (vinfo, truth_type, stmt_info, code, gsi, > > + vec_stmt, slp_node, cost_vec)) > > + gcc_unreachable (); > > + > > + gimple *stmt = STMT_VINFO_STMT (stmt_info); basic_block cond_bb = > > + gimple_bb (stmt); gimple_stmt_iterator cond_gsi = gsi_last_bb > > + (cond_bb); > > + > > + vec<tree> stmts; > > + > > + if (slp_node) > > + stmts = SLP_TREE_VEC_DEFS (slp_node); > > + else > > + { > > + auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info); > > + stmts.create (vec_stmts.length ()); > > + for (auto stmt : vec_stmts) > > + stmts.quick_push (gimple_assign_lhs (stmt)); > > + } > > > + /* Determine if we need to reduce the final value. */ > > + if (stmts.length () > 1) > > + { > > + /* We build the reductions in a way to maintain as much parallelism > > as > > + possible. */ > > + auto_vec<tree> workset (stmts.length ()); > > + workset.splice (stmts); > > + while (workset.length () > 1) > > + { > > + new_temp = make_temp_ssa_name (truth_type, NULL, > > "vexit_reduc"); > > + tree arg0 = workset.pop (); > > + tree arg1 = workset.pop (); > > + new_stmt = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0, > > arg1); > > + vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, > > + &cond_gsi); > > + if (slp_node) > > + slp_node->push_vec_def (new_stmt); > > + else > > + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); > > + workset.quick_insert (0, new_temp); Reduction epilogue handling has similar code to reduce a set of vectors to a single one with an operation. I think we want to share that code. > > + } > > + } > > + else > > + new_temp = stmts[0]; > > + > > + gcc_assert (new_temp); > > + > > + tree cond = new_temp; > > + if (masked_loop_p) > > + { > > + tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, > > truth_type, 0); > > + cond = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, cond, > > + &cond_gsi); I don't think this is correct when 'stmts' had more than one vector? > > + } > > + > > + /* Now build the new conditional. Pattern gimple_conds get dropped > > during > > + codegen so we must replace the original insn. */ if > > + (is_pattern_stmt_p (stmt_info)) > > + stmt = STMT_VINFO_STMT (STMT_VINFO_RELATED_STMT (stmt_info)); vect_original_stmt? > > + > > + tree t = fold_build2 (NE_EXPR, boolean_type_node, cond, > > + build_zero_cst (truth_type)); > > + t = canonicalize_cond_expr_cond (t); > > + gimple_cond_set_condition_from_tree ((gcond*)stmt, t); Please use gimple_cond_set_{lhs,rhs,code} instead of going through GENERIC. > > + update_stmt (stmt); > > + > > + if (slp_node) > > + slp_node->push_vec_def (stmt); > > + else > > + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (stmt); I don't think we need those, in fact we still have the original defs from vectorizable_comparison_1 here? I'd just truncate both vectors. > > + > > + > > + if (!slp_node) > > + *vec_stmt = stmt; I think you leak 'stmts' for !slp Otherwise looks good. Richard. > > + return true; > > +} > > + > > /* If SLP_NODE is nonnull, return true if vectorizable_live_operation > > can handle all live statements in the node. Otherwise return true > > if STMT_INFO is not live or if vectorizable_live_operation can handle > > it. > > @@ -12928,7 +13122,9 @@ vect_analyze_stmt (vec_info *vinfo, > > || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo), > > stmt_info, NULL, node) > > || vectorizable_recurr (as_a <loop_vec_info> (vinfo), > > - stmt_info, NULL, node, cost_vec)); > > + stmt_info, NULL, node, cost_vec) > > + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node, > > + cost_vec)); > > else > > { > > if (bb_vinfo) > > @@ -12951,7 +13147,10 @@ vect_analyze_stmt (vec_info *vinfo, > > NULL, NULL, node, cost_vec) > > || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node, > > cost_vec) > > - || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)); > > + || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec) > > + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node, > > + cost_vec)); > > + > > } > > > > if (node) > > @@ -13110,6 +13309,12 @@ vect_transform_stmt (vec_info *vinfo, > > gcc_assert (done); > > break; > > > > + case loop_exit_ctrl_vec_info_type: > > + done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt, > > + slp_node, NULL); > > + gcc_assert (done); > > + break; > > + > > default: > > if (!STMT_VINFO_LIVE_P (stmt_info)) > > { > > @@ -13511,7 +13716,7 @@ vect_is_simple_use (tree operand, vec_info > > *vinfo, enum vect_def_type *dt, > > case vect_first_order_recurrence: > > dump_printf (MSG_NOTE, "first order recurrence\n"); > > break; > > - case vect_early_exit_def: > > + case vect_early_exit_def: > > dump_printf (MSG_NOTE, "early exit\n"); > > break; > > case vect_unknown_def_type: > > > > > > > > > > -- > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)