> +using namespace rtl_ssa; > +using namespace riscv_vector; > + > +/* The AVL propagation instructions and corresponding preferred AVL. > + It will be updated during the analysis. */ > +static hash_map<insn_info *, rtx> *avlprops;
Maybe put into member data of pass_avlprop? > + > +const pass_data pass_data_avlprop = { > + RTL_PASS, /* type */ > + "avlprop", /* name */ > + OPTGROUP_NONE, /* optinfo_flags */ > + TV_NONE, /* tv_id */ > + 0, /* properties_required */ > + 0, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, /* todo_flags_start */ > + 0, /* todo_flags_finish */ > +}; > + > +class pass_avlprop : public rtl_opt_pass > +{ > +public: > + pass_avlprop (gcc::context *ctxt) : rtl_opt_pass (pass_data_avlprop, ctxt) > {} > + > + /* opt_pass methods: */ > + virtual bool gate (function *) final override > + { > + return TARGET_VECTOR && optimize > 0; > + } > + virtual unsigned int execute (function *) final override; > +}; // class pass_avlprop > + > +static void > +avlprop_init (void) Maybe put into member function of pass_avlprop? > +{ > + calculate_dominance_info (CDI_DOMINATORS); > + df_analyze (); > + crtl->ssa = new function_info (cfun); And take function * from incomping parameter of execute > + avlprops = new hash_map<insn_info *, rtx>; > +} > + > +static void > +avlprop_done (void) > +{ > + free_dominance_info (CDI_DOMINATORS); > + if (crtl->ssa->perform_pending_updates ()) > + cleanup_cfg (0); > + delete crtl->ssa; > + crtl->ssa = nullptr; > + delete avlprops; > + avlprops = NULL; > +} > + > +/* Helper function to get AVL operand. */ > +static rtx > +get_avl (insn_info *insn, bool avlprop_p) > +{ > + if (get_attr_avl_type (insn->rtl ()) == INVALID_ATTRIBUTE > + || get_attr_avl_type (insn->rtl ()) == VLS) > + return NULL_RTX; > + if (avlprop_p) > + { > + if (avlprops->get (insn)) > + return (*avlprops->get (insn)); > + else if (vlmax_avl_type_p (insn->rtl ())) > + return RVV_VLMAX; I guess I didn't get why we need handle vlmax_avl_type_p here? > + } > + extract_insn_cached (insn->rtl ()); > + return recog_data.operand[get_attr_vl_op_idx (insn->rtl ())]; > +} > + > +/* This is a straight forward pattern ALWAYS in paritial auto-vectorization: > + > + VL = SELECT_AVL (AVL, ...) > + V0 = MASK_LEN_LOAD (..., VL) > + V1 = MASK_LEN_LOAD (..., VL) > + V2 = V0 + V1 --- Missed LEN information. > + MASK_LEN_STORE (..., V2, VL) > + > + We prefer PLUS_EXPR (V0 + V1) instead of COND_LEN_ADD (V0, V1, dummy LEN) > + because: > + > + - Few code changes in Loop Vectorizer. > + - Reuse the current clean flow of partial vectorization, That is, apply > + predicate LEN or MASK into LOAD/STORE operations and other special > + arithmetic operations (e.d. DIV), then do the whole vector register > + operation if it DON'T affect the correctness. > + Such flow is used by all other targets like x86, sve, s390, ... etc. > + - PLUS_EXPR has better gimple optimizations than COND_LEN_ADD. > + > + We propagate AVL from NON-VLMAX to VLMAX for gimple IR like PLUS_EXPR > which > + generates the VLMAX instruction due to missed LEN information. The later > + VSETVL PASS will elided the redundant vsetvls. > +*/ > + > +static rtx > +get_autovectorize_preferred_avl (insn_info *insn) > +{ > + if (!vlmax_avl_p (get_avl (insn, true)) || !tail_agnostic_p (insn->rtl ())) > + return NULL_RTX; I would prefer adding new attribute to let this become simpler. > + > + rtx use_avl = NULL_RTX; > + insn_info *avl_use_insn = nullptr; > + unsigned int ratio > + = calculate_ratio (get_sew (insn->rtl ()), get_vlmul (insn->rtl ())); > + for (def_info *def : insn->defs ()) > + { > + auto set = safe_dyn_cast<set_info *> (def); > + if (!set || !set->is_reg ()) > + return NULL_RTX; > + for (use_info *use : set->all_uses ()) > + { > + if (!use->is_in_nondebug_insn ()) > + return NULL_RTX; > + insn_info *use_insn = use->insn (); > + /* FIXME: Stop AVL propagation if any USE is not a RVV real > + instruction. It should be totally enough for vectorized codes > since > + they always locate at extended blocks. > + > + TODO: We can extend PHI checking for intrinsic codes if it > + necessary in the future. */ > + if (use_insn->is_artificial () || !has_vtype_op (use_insn->rtl ())) > + return NULL_RTX; > + if (!has_vl_op (use_insn->rtl ())) > + continue; > + > + rtx new_use_avl = get_avl (use_insn, true); > + if (!new_use_avl) > + return NULL_RTX; > + if (!use_avl) > + use_avl = new_use_avl; > + if (!rtx_equal_p (use_avl, new_use_avl) > + || calculate_ratio (get_sew (use_insn->rtl ()), > + get_vlmul (use_insn->rtl ())) > + != ratio > + || vlmax_avl_p (new_use_avl) > + || !tail_agnostic_p (use_insn->rtl ())) > + return NULL_RTX; > + if (!avl_use_insn) > + avl_use_insn = use_insn; > + } > + } > + > + if (use_avl && register_operand (use_avl, Pmode)) > + { > + gcc_assert (avl_use_insn); > + // Find a definition at or neighboring INSN. > + resource_info resource = full_register (REGNO (use_avl)); > + def_lookup dl1 = crtl->ssa->find_def (resource, insn); > + def_lookup dl2 = crtl->ssa->find_def (resource, avl_use_insn); > + if (dl1.matching_set () || dl2.matching_set ()) > + return NULL_RTX; > + def_info *def1 = dl1.last_def_of_prev_group (); > + def_info *def2 = dl2.last_def_of_prev_group (); > + if (def1 != def2) > + return NULL_RTX; > + /* FIXME: We only all AVL propation within a block which should > + be totally enough for vectorized codes. > + > + TODO: We can enhance it here for intrinsic codes in the future > + if it is necessary. */ > + if (def1->insn ()->bb () != insn->bb () > + || def1->insn ()->compare_with (insn) >= 0) > + return NULL_RTX; > + } > + return use_avl; > +} > + > +/* If we have a preferred AVL to propagate, return the AVL. > + Otherwise, return NULL_RTX as we don't need have any preferred > + AVL. */ > + > +static rtx > +get_preferred_avl (insn_info *insn) > +{ > + /* TODO: We only do AVL propagation for missed-LEN partial > + autovectorization for now. We could add more more AVL > + propagation for intrinsic codes in the future. */ > + return get_autovectorize_preferred_avl (insn); > +} > + > +/* Return the AVL TYPE operand index. */ > +static int > +get_avl_type_index (insn_info *insn) > +{ > + extract_insn_cached (insn->rtl ()); > + /* Except rounding mode patterns, AVL TYPE operand > + is always the last operand. */ > + if (find_access (insn->uses (), VXRM_REGNUM) > + || find_access (insn->uses (), FRM_REGNUM)) > + return recog_data.n_operands - 2; > + return recog_data.n_operands - 1; Could we add some attribute like `vl_op_idx`? maintain this magic here is not good idea IMO. > +} > + > +/* Main entry point for this pass. */ > +unsigned int > +pass_avlprop::execute (function *) > +{ > + avlprop_init (); > + > + /* Go through all the instructions looking for AVL that we could > propagate. */ > + > + insn_info *next; > + bool change_p = true; > + > + while (change_p) > + { > + /* Iterate on each instruction until no more change need. */ > + change_p = false; > + for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next) Backward should converge faster, also I suggest add a pre-scan pass to collect all candidate, and then iterate those candidate only. Maybe something like this: for each insn in reverse order: if insn is candidate: put insn to candidate list while (change_p) { for each insn in candidate: ... }