On Mon, 27 Nov 2023, Tamar Christina wrote:

> Ping
> 
> > -----Original Message-----
> > From: Tamar Christina <tamar.christ...@arm.com>
> > Sent: Monday, November 6, 2023 7:40 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: nd <n...@arm.com>; rguent...@suse.de; j...@ventanamicro.com
> > Subject: [PATCH 9/21]middle-end: implement vectorizable_early_exit for
> > codegen of exit code
> > 
> > Hi All,
> > 
> > This implements vectorable_early_exit which is used as the codegen part of
> > vectorizing a gcond.
> > 
> > For the most part it shares the majority of the code with
> > vectorizable_comparison with addition that it needs to be able to reduce
> > multiple resulting statements into a single one for use in the gcond, and 
> > also
> > needs to be able to perform masking on the comparisons.
> > 
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > 
> > Ok for master?
> > 
> > Thanks,
> > Tamar
> > 
> > gcc/ChangeLog:
> > 
> >     * tree-vect-stmts.cc (vectorizable_comparison_1): Support stmts
> > without
> >     lhs.
> >     (vectorizable_early_exit): New.
> >     (vect_analyze_stmt, vect_transform_stmt): Use it.
> >     (vect_is_simple_use, vect_get_vector_types_for_stmt): Support
> > gcond.
> > 
> > --- inline copy of patch --
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index
> > 36aeca60a22cfaea8d3b43348000d75de1d525c7..4809b822632279493a84
> > 3d402a833c9267bb315e 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -12475,7 +12475,7 @@ vectorizable_comparison_1 (vec_info *vinfo,
> > tree vectype,
> >    vec<tree> vec_oprnds0 = vNULL;
> >    vec<tree> vec_oprnds1 = vNULL;
> >    tree mask_type;
> > -  tree mask;
> > +  tree mask = NULL_TREE;
> > 
> >    if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
> >      return false;
> > @@ -12615,8 +12615,9 @@ vectorizable_comparison_1 (vec_info *vinfo,
> > tree vectype,
> >    /* Transform.  */
> > 
> >    /* Handle def.  */
> > -  lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
> > -  mask = vect_create_destination_var (lhs, mask_type);
> > +  lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));  if (lhs)
> > +    mask = vect_create_destination_var (lhs, mask_type);

wrecked line-break / white-space

> > 
> >    vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
> >                  rhs1, &vec_oprnds0, vectype,
> > @@ -12630,7 +12631,10 @@ vectorizable_comparison_1 (vec_info *vinfo,
> > tree vectype,
> >        gimple *new_stmt;
> >        vec_rhs2 = vec_oprnds1[i];
> > 
> > -      new_temp = make_ssa_name (mask);
> > +      if (lhs)
> > +   new_temp = make_ssa_name (mask);
> > +      else
> > +   new_temp = make_temp_ssa_name (mask_type, NULL, "cmp");
> >        if (bitop1 == NOP_EXPR)
> >     {
> >       new_stmt = gimple_build_assign (new_temp, code, @@ -12709,6
> > +12713,196 @@ vectorizable_comparison (vec_info *vinfo,
> >    return true;
> >  }
> > 
> > +/* Check to see if the current early break given in STMT_INFO is valid for
> > +   vectorization.  */
> > +
> > +static bool
> > +vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
> > +                    gimple_stmt_iterator *gsi, gimple **vec_stmt,
> > +                    slp_tree slp_node, stmt_vector_for_cost *cost_vec) {

{ goes to the next line

> > +  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> > +  if (!loop_vinfo
> > +      || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
> > +    return false;
> > +
> > +  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_early_exit_def)
> > +    return false;
> > +
> > +  if (!STMT_VINFO_RELEVANT_P (stmt_info))
> > +    return false;
> > +
> > +  gimple_match_op op;
> > +  if (!gimple_extract_op (stmt_info->stmt, &op))
> > +    gcc_unreachable ();
> > +  gcc_assert (op.code.is_tree_code ());  auto code = tree_code
> > + (op.code);

missed line break

> > +
> > +  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);  gcc_assert
> > + (vectype_out);

likewise.

> > +  tree var_op = op.ops[0];
> > +
> > +  /* When vectorizing things like pointer comparisons we will assume that
> > +     the VF of both operands are the same. e.g. a pointer must be compared
> > +     to a pointer.  We'll leave this up to vectorizable_comparison_1 to
> > +     check further.  */
> > +  tree vectype_op = vectype_out;
> > +  if (SSA_VAR_P (var_op))

TREE_CODE (var_op) == SSA_NAME

> > +    {
> > +      stmt_vec_info operand0_info
> > +   = loop_vinfo->lookup_stmt (SSA_NAME_DEF_STMT (var_op));

lookup_def (var_op)

> > +      if (!operand0_info)
> > +   return false;
> > +
> > +      /* If we're in a pattern get the type of the original statement.  */
> > +      if (STMT_VINFO_IN_PATTERN_P (operand0_info))
> > +   operand0_info = STMT_VINFO_RELATED_STMT (operand0_info);
> > +      vectype_op = STMT_VINFO_VECTYPE (operand0_info);
> > +    }

I think you want to use vect_is_simple_use on var_op instead, that's
the canonical way for querying operands.

> > +
> > +  tree truth_type = truth_type_for (vectype_op);  machine_mode mode =
> > + TYPE_MODE (truth_type);  int ncopies;
> > +

more line break issues ... (also below, check yourself)

shouldn't STMT_VINFO_VECTYPE already match truth_type here?  If not
it looks to be set wrongly (or shouldn't be set at all)

> > +  if (slp_node)
> > +    ncopies = 1;
> > +  else
> > +    ncopies = vect_get_num_copies (loop_vinfo, truth_type);
> > +
> > +  vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);  bool
> > + masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
> > +

what about with_len?

> > +  /* Analyze only.  */
> > +  if (!vec_stmt)
> > +    {
> > +      if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing)
> > +   {
> > +     if (dump_enabled_p ())
> > +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +                          "can't vectorize early exit because the "
> > +                          "target doesn't support flag setting vector "
> > +                          "comparisons.\n");
> > +     return false;
> > +   }
> > +
> > +      if (!expand_vec_cmp_expr_p (vectype_op, truth_type, NE_EXPR))

Why NE_EXPR?  This looks wrong.  Or vectype_op is wrong if you're
emitting

 mask = op0 CMP op1;
 if (mask != 0)

I think you need to check for CMP, not NE_EXPR.

> > +   {
> > +     if (dump_enabled_p ())
> > +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +                          "can't vectorize early exit because the "
> > +                          "target does not support boolean vector "
> > +                          "comparisons for type %T.\n", truth_type);
> > +     return false;
> > +   }
> > +
> > +      if (ncopies > 1
> > +     && direct_optab_handler (ior_optab, mode) == CODE_FOR_nothing)
> > +   {
> > +     if (dump_enabled_p ())
> > +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +                          "can't vectorize early exit because the "
> > +                          "target does not support boolean vector OR for "
> > +                          "type %T.\n", truth_type);
> > +     return false;
> > +   }
> > +
> > +      if (!vectorizable_comparison_1 (vinfo, truth_type, stmt_info, code, 
> > gsi,
> > +                                 vec_stmt, slp_node, cost_vec))
> > +   return false;

I suppose vectorizable_comparison_1 will check this again, so the above
is redundant?

> > +      if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> > +   vect_record_loop_mask (loop_vinfo, masks, ncopies, truth_type,
> > NULL);

LENs missing (or disabling partial vectors).

> > +      return true;
> > +    }
> > +
> > +  /* Tranform.  */
> > +
> > +  tree new_temp = NULL_TREE;
> > +  gimple *new_stmt = NULL;
> > +
> > +  if (dump_enabled_p ())
> > +    dump_printf_loc (MSG_NOTE, vect_location, "transform
> > + early-exit.\n");
> > +
> > +  if (!vectorizable_comparison_1 (vinfo, truth_type, stmt_info, code, gsi,
> > +                             vec_stmt, slp_node, cost_vec))
> > +    gcc_unreachable ();
> > +
> > +  gimple *stmt = STMT_VINFO_STMT (stmt_info);  basic_block cond_bb =
> > + gimple_bb (stmt);  gimple_stmt_iterator  cond_gsi = gsi_last_bb
> > + (cond_bb);
> > +
> > +  vec<tree> stmts;
> > +
> > +  if (slp_node)
> > +    stmts = SLP_TREE_VEC_DEFS (slp_node);
> > +  else
> > +    {
> > +      auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info);
> > +      stmts.create (vec_stmts.length ());
> > +      for (auto stmt : vec_stmts)
> > +   stmts.quick_push (gimple_assign_lhs (stmt));
> > +    }
>
> > +  /* Determine if we need to reduce the final value.  */
> > +  if (stmts.length () > 1)
> > +    {
> > +      /* We build the reductions in a way to maintain as much parallelism 
> > as
> > +    possible.  */
> > +      auto_vec<tree> workset (stmts.length ());
> > +      workset.splice (stmts);
> > +      while (workset.length () > 1)
> > +   {
> > +     new_temp = make_temp_ssa_name (truth_type, NULL,
> > "vexit_reduc");
> > +     tree arg0 = workset.pop ();
> > +     tree arg1 = workset.pop ();
> > +     new_stmt = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0,
> > arg1);
> > +     vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
> > +                                  &cond_gsi);
> > +     if (slp_node)
> > +       slp_node->push_vec_def (new_stmt);
> > +     else
> > +       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
> > +     workset.quick_insert (0, new_temp);

Reduction epilogue handling has similar code to reduce a set of vectors
to a single one with an operation.  I think we want to share that code.

> > +   }
> > +    }
> > +  else
> > +    new_temp = stmts[0];
> > +
> > +  gcc_assert (new_temp);
> > +
> > +  tree cond = new_temp;
> > +  if (masked_loop_p)
> > +    {
> > +      tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
> > truth_type, 0);
> > +      cond = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, cond,
> > +                          &cond_gsi);

I don't think this is correct when 'stmts' had more than one vector?

> > +    }
> > +
> > +  /* Now build the new conditional.  Pattern gimple_conds get dropped
> > during
> > +     codegen so we must replace the original insn.  */  if
> > + (is_pattern_stmt_p (stmt_info))
> > +    stmt = STMT_VINFO_STMT (STMT_VINFO_RELATED_STMT (stmt_info));

vect_original_stmt?

> > +
> > +  tree t = fold_build2 (NE_EXPR, boolean_type_node, cond,
> > +                   build_zero_cst (truth_type));
> > +  t = canonicalize_cond_expr_cond (t);
> > +  gimple_cond_set_condition_from_tree ((gcond*)stmt, t);

Please use gimple_cond_set_{lhs,rhs,code} instead of going through
GENERIC.

> > +  update_stmt (stmt);
> > +
> > +  if (slp_node)
> > +    slp_node->push_vec_def (stmt);
> > +   else
> > +    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (stmt);

I don't think we need those, in fact we still have the original defs
from vectorizable_comparison_1 here?  I'd just truncate both vectors.

> > +
> > +
> > +  if (!slp_node)
> > +    *vec_stmt = stmt;

I think you leak 'stmts' for !slp

Otherwise looks good.

Richard.


> > +  return true;
> > +}
> > +
> >  /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
> >     can handle all live statements in the node.  Otherwise return true
> >     if STMT_INFO is not live or if vectorizable_live_operation can handle 
> > it.
> > @@ -12928,7 +13122,9 @@ vect_analyze_stmt (vec_info *vinfo,
> >       || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
> >                               stmt_info, NULL, node)
> >       || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
> > -                              stmt_info, NULL, node, cost_vec));
> > +                              stmt_info, NULL, node, cost_vec)
> > +     || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
> > +                                 cost_vec));
> >    else
> >      {
> >        if (bb_vinfo)
> > @@ -12951,7 +13147,10 @@ vect_analyze_stmt (vec_info *vinfo,
> >                                      NULL, NULL, node, cost_vec)
> >           || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
> >                                       cost_vec)
> > -         || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
> > +         || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)
> > +         || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
> > +                                     cost_vec));
> > +
> >      }
> > 
> >    if (node)
> > @@ -13110,6 +13309,12 @@ vect_transform_stmt (vec_info *vinfo,
> >        gcc_assert (done);
> >        break;
> > 
> > +    case loop_exit_ctrl_vec_info_type:
> > +      done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt,
> > +                                 slp_node, NULL);
> > +      gcc_assert (done);
> > +      break;
> > +
> >      default:
> >        if (!STMT_VINFO_LIVE_P (stmt_info))
> >     {
> > @@ -13511,7 +13716,7 @@ vect_is_simple_use (tree operand, vec_info
> > *vinfo, enum vect_def_type *dt,
> >     case vect_first_order_recurrence:
> >       dump_printf (MSG_NOTE, "first order recurrence\n");
> >       break;
> > -       case vect_early_exit_def:
> > +   case vect_early_exit_def:
> >       dump_printf (MSG_NOTE, "early exit\n");
> >       break;
> >     case vect_unknown_def_type:
> > 
> > 
> > 
> > 
> > --
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to