> +using namespace rtl_ssa;
> +using namespace riscv_vector;
> +
> +/* The AVL propagation instructions and corresponding preferred AVL.
> +   It will be updated during the analysis.  */
> +static hash_map<insn_info *, rtx> *avlprops;

Maybe put into member data of pass_avlprop?

> +
> +const pass_data pass_data_avlprop = {
> +  RTL_PASS,     /* type */
> +  "avlprop",    /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_NONE,      /* tv_id */
> +  0,            /* properties_required */
> +  0,            /* properties_provided */
> +  0,            /* properties_destroyed */
> +  0,            /* todo_flags_start */
> +  0,            /* todo_flags_finish */
> +};
> +
> +class pass_avlprop : public rtl_opt_pass
> +{
> +public:
> +  pass_avlprop (gcc::context *ctxt) : rtl_opt_pass (pass_data_avlprop, ctxt) 
> {}
> +
> +  /* opt_pass methods: */
> +  virtual bool gate (function *) final override
> +  {
> +    return TARGET_VECTOR && optimize > 0;
> +  }
> +  virtual unsigned int execute (function *) final override;
> +}; // class pass_avlprop
> +
> +static void
> +avlprop_init (void)

Maybe put into member function of pass_avlprop?

> +{
> +  calculate_dominance_info (CDI_DOMINATORS);
> +  df_analyze ();
> +  crtl->ssa = new function_info (cfun);

And take function * from incomping parameter of execute

> +  avlprops = new hash_map<insn_info *, rtx>;
> +}
> +
> +static void
> +avlprop_done (void)
> +{
> +  free_dominance_info (CDI_DOMINATORS);
> +  if (crtl->ssa->perform_pending_updates ())
> +    cleanup_cfg (0);
> +  delete crtl->ssa;
> +  crtl->ssa = nullptr;
> +  delete avlprops;
> +  avlprops = NULL;
> +}
> +
> +/* Helper function to get AVL operand.  */
> +static rtx
> +get_avl (insn_info *insn, bool avlprop_p)
> +{
> +  if (get_attr_avl_type (insn->rtl ()) == INVALID_ATTRIBUTE
> +      || get_attr_avl_type (insn->rtl ()) == VLS)
> +    return NULL_RTX;
> +  if (avlprop_p)
> +    {
> +      if (avlprops->get (insn))
> +       return (*avlprops->get (insn));
> +      else if (vlmax_avl_type_p (insn->rtl ()))
> +       return RVV_VLMAX;

I guess I didn't get why we need handle vlmax_avl_type_p here?

> +    }
> +  extract_insn_cached (insn->rtl ());
> +  return recog_data.operand[get_attr_vl_op_idx (insn->rtl ())];
> +}
> +
> +/* This is a straight forward pattern ALWAYS in paritial auto-vectorization:
> +
> +     VL = SELECT_AVL (AVL, ...)
> +     V0 = MASK_LEN_LOAD (..., VL)
> +     V1 = MASK_LEN_LOAD (..., VL)
> +     V2 = V0 + V1 --- Missed LEN information.
> +     MASK_LEN_STORE (..., V2, VL)
> +
> +   We prefer PLUS_EXPR (V0 + V1) instead of COND_LEN_ADD (V0, V1, dummy LEN)
> +   because:
> +
> +     - Few code changes in Loop Vectorizer.
> +     - Reuse the current clean flow of partial vectorization, That is, apply
> +       predicate LEN or MASK into LOAD/STORE operations and other special
> +       arithmetic operations (e.d. DIV), then do the whole vector register
> +       operation if it DON'T affect the correctness.
> +       Such flow is used by all other targets like x86, sve, s390, ... etc.
> +     - PLUS_EXPR has better gimple optimizations than COND_LEN_ADD.
> +
> +   We propagate AVL from NON-VLMAX to VLMAX for gimple IR like PLUS_EXPR 
> which
> +   generates the VLMAX instruction due to missed LEN information. The later
> +   VSETVL PASS will elided the redundant vsetvls.
> +*/
> +
> +static rtx
> +get_autovectorize_preferred_avl (insn_info *insn)
> +{
> +  if (!vlmax_avl_p (get_avl (insn, true)) || !tail_agnostic_p (insn->rtl ()))
> +    return NULL_RTX;

I would prefer adding new attribute to let this become simpler.

> +
> +  rtx use_avl = NULL_RTX;
> +  insn_info *avl_use_insn = nullptr;
> +  unsigned int ratio
> +    = calculate_ratio (get_sew (insn->rtl ()), get_vlmul (insn->rtl ()));
> +  for (def_info *def : insn->defs ())
> +    {
> +      auto set = safe_dyn_cast<set_info *> (def);
> +      if (!set || !set->is_reg ())
> +       return NULL_RTX;
> +      for (use_info *use : set->all_uses ())
> +       {
> +         if (!use->is_in_nondebug_insn ())
> +           return NULL_RTX;
> +         insn_info *use_insn = use->insn ();
> +         /* FIXME: Stop AVL propagation if any USE is not a RVV real
> +            instruction. It should be totally enough for vectorized codes 
> since
> +            they always locate at extended blocks.
> +
> +            TODO: We can extend PHI checking for intrinsic codes if it
> +            necessary in the future.  */
> +         if (use_insn->is_artificial () || !has_vtype_op (use_insn->rtl ()))
> +           return NULL_RTX;
> +         if (!has_vl_op (use_insn->rtl ()))
> +           continue;
> +
> +         rtx new_use_avl = get_avl (use_insn, true);
> +         if (!new_use_avl)
> +           return NULL_RTX;
> +         if (!use_avl)
> +           use_avl = new_use_avl;
> +         if (!rtx_equal_p (use_avl, new_use_avl)
> +             || calculate_ratio (get_sew (use_insn->rtl ()),
> +                                 get_vlmul (use_insn->rtl ()))
> +                  != ratio
> +             || vlmax_avl_p (new_use_avl)
> +             || !tail_agnostic_p (use_insn->rtl ()))
> +           return NULL_RTX;
> +         if (!avl_use_insn)
> +           avl_use_insn = use_insn;
> +       }
> +    }
> +
> +  if (use_avl && register_operand (use_avl, Pmode))
> +    {
> +      gcc_assert (avl_use_insn);
> +      // Find a definition at or neighboring INSN.
> +      resource_info resource = full_register (REGNO (use_avl));
> +      def_lookup dl1 = crtl->ssa->find_def (resource, insn);
> +      def_lookup dl2 = crtl->ssa->find_def (resource, avl_use_insn);
> +      if (dl1.matching_set () || dl2.matching_set ())
> +       return NULL_RTX;
> +      def_info *def1 = dl1.last_def_of_prev_group ();
> +      def_info *def2 = dl2.last_def_of_prev_group ();
> +      if (def1 != def2)
> +       return NULL_RTX;
> +      /* FIXME: We only all AVL propation within a block which should
> +        be totally enough for vectorized codes.
> +
> +        TODO: We can enhance it here for intrinsic codes in the future
> +        if it is necessary.  */
> +      if (def1->insn ()->bb () != insn->bb ()
> +         || def1->insn ()->compare_with (insn) >= 0)
> +       return NULL_RTX;
> +    }
> +  return use_avl;
> +}
> +
> +/* If we have a preferred AVL to propagate, return the AVL.
> +   Otherwise, return NULL_RTX as we don't need have any preferred
> +   AVL.  */
> +
> +static rtx
> +get_preferred_avl (insn_info *insn)
> +{
> +  /* TODO: We only do AVL propagation for missed-LEN partial
> +     autovectorization for now.  We could add more more AVL
> +     propagation for intrinsic codes in the future.  */
> +  return get_autovectorize_preferred_avl (insn);
> +}
> +
> +/* Return the AVL TYPE operand index.  */
> +static int
> +get_avl_type_index (insn_info *insn)
> +{
> +  extract_insn_cached (insn->rtl ());
> +  /* Except rounding mode patterns, AVL TYPE operand
> +     is always the last operand.  */
> +  if (find_access (insn->uses (), VXRM_REGNUM)
> +      || find_access (insn->uses (), FRM_REGNUM))
> +    return recog_data.n_operands - 2;
> +  return recog_data.n_operands - 1;

Could we add some attribute like `vl_op_idx`? maintain this magic here
is not good idea IMO.

> +}
> +
> +/* Main entry point for this pass.  */
> +unsigned int
> +pass_avlprop::execute (function *)
> +{
> +  avlprop_init ();
> +
> +  /* Go through all the instructions looking for AVL that we could 
> propagate. */
> +
> +  insn_info *next;
> +  bool change_p = true;
> +
> +  while (change_p)
> +    {
> +      /* Iterate on each instruction until no more change need.  */
> +      change_p = false;
> +      for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)

Backward should converge faster, also I suggest add a pre-scan pass to
collect all candidate, and then iterate those candidate only.

Maybe something like this:

for each insn in reverse order:
   if insn is candidate:
      put insn to candidate list

while (change_p)
{
  for each insn in candidate:
    ...
}

Reply via email to