Re: [PATCH] RISC-V: -mrvv-max-lmul=conv-dynamic [PR122846].

Kito Cheng Tue, 16 Dec 2025 05:07:59 -0800

ack, I am interested in this patch and will review this in the next few days.


On Fri, Dec 12, 2025 at 10:22 PM Robin Dapp <[email protected]> wrote:
>
> Hi,
>
> As discussed in the patchwork sync this patch adds a dynamic LMUL mode
> that sets the LMUL to the ratio of largest/smallest type size in a loop,
> with the maximum being LMUL8.
>
> This is supposed to imitate what other architectures implicitly do by
> vec_unpack_hi/lo.  I have done cursory testing and obviously more
> coverage would be preferred.
>
> Regtested on rv64gcv_zvl512b.
>
> Regards
>  Robin
>
>         PR target/122846
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-opts.h (enum rvv_max_lmul_enum): Add
>         RVV_CONV_DYNAMIC.
>         (TARGET_MAX_LMUL): Ditto.
>         * config/riscv/riscv-string.cc (use_vector_stringop_p): Use
>         LMUL1 for RVV_CONV_DYNAMIC.
>         (expand_rawmemchr): Ditto.
>         (expand_strcmp): Ditto.
>         (check_vectorise_memory_operation): Ditto.
>         * config/riscv/riscv-vector-costs.cc (get_smallest_mode):
>         New function.
>         (compute_lmul_from_conversion_ratio): Calculate LMUL from
>         largest/smallest type.
>         (costs::has_unexpected_spills_p): Split.
>         (costs::compute_live_ranges_and_lmul): Compute smallest type and
>         call new function.
>         (costs::cleanup_live_range_data): New function.
>         (costs::compute_conversion_dynamic_lmul): New function.
>         (costs::record_potential_unexpected_spills): Use new function.
>         (costs::better_main_loop_than_p): Allow appropriate LMUL.
>         * config/riscv/riscv-vector-costs.h: Declare.
>         * config/riscv/riscv.opt: New option
>         -mrvv-max-lmul=conv-dynamic.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c: New test.
>         * gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c: New test.
>         * gcc.target/riscv/rvv/autovec/pr122846.c: New test.
> ---
>  gcc/config/riscv/riscv-opts.h                 |   7 +-
>  gcc/config/riscv/riscv-string.cc              |  26 +-
>  gcc/config/riscv/riscv-vector-costs.cc        | 226 ++++++++++++++----
>  gcc/config/riscv/riscv-vector-costs.h         |  17 +-
>  gcc/config/riscv/riscv.opt                    |   3 +
>  .../riscv/rvv/autovec/dyn-lmul-conv-1.c       |  42 ++++
>  .../riscv/rvv/autovec/dyn-lmul-conv-2.c       |  43 ++++
>  .../gcc.target/riscv/rvv/autovec/pr122846.c   |  14 ++
>  8 files changed, 320 insertions(+), 58 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c
>
> diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
> index 9b92a965e27..c6a09d59620 100644
> --- a/gcc/config/riscv/riscv-opts.h
> +++ b/gcc/config/riscv/riscv-opts.h
> @@ -86,7 +86,9 @@ enum rvv_max_lmul_enum {
>    RVV_M4 = 4,
>    RVV_M8 = 8,
>    /* For dynamic LMUL, we compare COST start with LMUL8.  */
> -  RVV_DYNAMIC = 9
> +  RVV_DYNAMIC = 9,
> +  /* For dynamic LMUL based on conversions, set LMUL based on type size 
> ratio.  */
> +  RVV_CONV_DYNAMIC = 10
>  };
>
>  enum riscv_multilib_select_kind {
> @@ -155,7 +157,8 @@ enum rvv_vector_bits_enum {
>
>  /* The maximum LMUL according to user configuration.  */
>  #define TARGET_MAX_LMUL                                                      
>   \
> -  (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul)
> +  (int) ((rvv_max_lmul == RVV_DYNAMIC || rvv_max_lmul == RVV_CONV_DYNAMIC) \
> +        ? RVV_M8 : rvv_max_lmul)
>
>  /* TLS types.  */
>  enum riscv_tls_type {
> diff --git a/gcc/config/riscv/riscv-string.cc 
> b/gcc/config/riscv/riscv-string.cc
> index c5710e4c896..ac9b19213a0 100644
> --- a/gcc/config/riscv/riscv-string.cc
> +++ b/gcc/config/riscv/riscv-string.cc
> @@ -1089,13 +1089,17 @@ use_vector_stringop_p (struct stringop_info &info, 
> HOST_WIDE_INT max_ew,
>    if (!TARGET_VECTOR || !(stringop_strategy & STRATEGY_VECTOR))
>      return false;
>
> +  int max_lmul = TARGET_MAX_LMUL;
> +  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
> +    max_lmul = RVV_M1;
> +
>    if (CONST_INT_P (length_in))
>      {
>        HOST_WIDE_INT length = INTVAL (length_in);
>
>        /* If the VLEN and preferred LMUL allow the entire block to be copied 
> in
>          one go then no loop is needed.  */
> -      if (known_le (length, BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL))
> +      if (known_le (length, BYTES_PER_RISCV_VECTOR * max_lmul))
>         {
>           need_loop = false;
>
> @@ -1130,10 +1134,10 @@ use_vector_stringop_p (struct stringop_info &info, 
> HOST_WIDE_INT max_ew,
>           poly_int64 nunits;
>
>           if (need_loop)
> -           per_iter = BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL;
> +           per_iter = BYTES_PER_RISCV_VECTOR * max_lmul;
>           else
>             per_iter = length;
> -         /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL may not be divisible by
> +         /* BYTES_PER_RISCV_VECTOR * MAX_LMUL may not be divisible by
>              this potential_ew.  */
>           if (!multiple_p (per_iter, potential_ew, &nunits))
>             continue;
> @@ -1164,7 +1168,7 @@ use_vector_stringop_p (struct stringop_info &info, 
> HOST_WIDE_INT max_ew,
>                  pointless.
>                  Still, by choosing a lower LMUL factor that still allows
>                  an entire transfer, we can reduce register pressure.  */
> -             for (unsigned lmul = 1; lmul < TARGET_MAX_LMUL; lmul <<= 1)
> +             for (int lmul = 1; lmul < max_lmul; lmul <<= 1)
>                 if (known_le (length * BITS_PER_UNIT, TARGET_MIN_VLEN * lmul)
>                     && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, 
> potential_ew,
>                                    &mode_units)
> @@ -1177,9 +1181,9 @@ use_vector_stringop_p (struct stringop_info &info, 
> HOST_WIDE_INT max_ew,
>           if (vmode != VOIDmode)
>             break;
>
> -         /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL will at least be 
> divisible
> +         /* BYTES_PER_RISCV_VECTOR * MAX_LMUL will at least be divisible
>              by potential_ew 1, so this should succeed eventually.  */
> -         if (multiple_p (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL,
> +         if (multiple_p (BYTES_PER_RISCV_VECTOR * max_lmul,
>                           potential_ew, &mode_units)
>               && riscv_vector::get_vector_mode (elem_mode,
>                                                 mode_units).exists (&vmode))
> @@ -1195,7 +1199,7 @@ use_vector_stringop_p (struct stringop_info &info, 
> HOST_WIDE_INT max_ew,
>      }
>    else
>      {
> -      gcc_assert (get_lmul_mode (QImode, TARGET_MAX_LMUL).exists (&vmode));
> +      gcc_assert (get_lmul_mode (QImode, max_lmul).exists (&vmode));
>      }
>
>    /* A memcpy libcall in the worst case takes 3 instructions to prepare the
> @@ -1356,6 +1360,8 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx 
> haystack, rtx needle,
>
>    unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
>    int lmul = TARGET_MAX_LMUL;
> +  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
> +    lmul = RVV_M1;
>    poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
>
>    machine_mode vmode;
> @@ -1455,6 +1461,8 @@ expand_strcmp (rtx result, rtx src1, rtx src2, rtx 
> nbytes,
>    machine_mode mode = E_QImode;
>    unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
>    int lmul = TARGET_MAX_LMUL;
> +  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
> +    lmul = RVV_M1;
>    poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
>
>    machine_mode vmode;
> @@ -1606,7 +1614,9 @@ check_vectorise_memory_operation (rtx length_in, 
> HOST_WIDE_INT &lmul_out)
>    if (rvv_max_lmul != RVV_DYNAMIC)
>      {
>        lmul_out = TARGET_MAX_LMUL;
> -      return (length <= ((TARGET_MAX_LMUL * TARGET_MIN_VLEN) / 8));
> +      if (rvv_max_lmul == RVV_CONV_DYNAMIC)
> +       lmul_out = RVV_M1;
> +      return (length <= ((lmul_out * TARGET_MIN_VLEN) / 8));
>      }
>
>    /* Find smallest lmul large enough for entire op.  */
> diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
> b/gcc/config/riscv/riscv-vector-costs.cc
> index 27ced61e815..41b4e4860b0 100644
> --- a/gcc/config/riscv/riscv-vector-costs.cc
> +++ b/gcc/config/riscv/riscv-vector-costs.cc
> @@ -258,6 +258,14 @@ get_biggest_mode (machine_mode mode1, machine_mode mode2)
>    return mode1_size >= mode2_size ? mode1 : mode2;
>  }
>
> +static machine_mode
> +get_smallest_mode (machine_mode mode1, machine_mode mode2)
> +{
> +  unsigned int mode1_size = GET_MODE_BITSIZE (mode1).to_constant ();
> +  unsigned int mode2_size = GET_MODE_BITSIZE (mode2).to_constant ();
> +  return mode1_size <= mode2_size ? mode1 : mode2;
> +}
> +
>  /* Return true if OP is invariant.  */
>
>  static bool
> @@ -361,9 +369,11 @@ machine_mode
>  costs::compute_local_live_ranges (
>    loop_vec_info loop_vinfo,
>    const hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
> -  hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb)
> +  hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb,
> +  machine_mode *smallest_mode_out)
>  {
>    machine_mode biggest_mode = QImode;
> +  machine_mode smallest_mode = TImode;
>    class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>    if (!program_points_per_bb.is_empty ())
>      {
> @@ -396,6 +406,8 @@ costs::compute_local_live_ranges (
>                 {
>                   biggest_mode = get_biggest_mode (biggest_mode,
>                                                    TYPE_MODE (TREE_TYPE 
> (lhs)));
> +                 smallest_mode = get_smallest_mode (smallest_mode,
> +                                                    TYPE_MODE (TREE_TYPE 
> (lhs)));
>                   bool existed_p = false;
>                   pair &live_range
>                     = live_ranges->get_or_insert (lhs, &existed_p);
> @@ -415,6 +427,9 @@ costs::compute_local_live_ranges (
>                       biggest_mode
>                         = get_biggest_mode (biggest_mode,
>                                             TYPE_MODE (TREE_TYPE (var)));
> +                     smallest_mode
> +                       = get_smallest_mode (smallest_mode,
> +                                            TYPE_MODE (TREE_TYPE (var)));
>                       bool existed_p = false;
>                       pair &live_range
>                         = live_ranges->get_or_insert (var, &existed_p);
> @@ -445,6 +460,8 @@ costs::compute_local_live_ranges (
>                                   (*r).second = MAX (point, (*r).second);
>                                   biggest_mode = get_biggest_mode (
>                                     biggest_mode, TYPE_MODE (TREE_TYPE 
> (arg)));
> +                                 smallest_mode = get_smallest_mode (
> +                                   smallest_mode, TYPE_MODE (TREE_TYPE 
> (arg)));
>                                 }
>                             }
>                           else
> @@ -464,8 +481,14 @@ costs::compute_local_live_ranges (
>         }
>      }
>    if (dump_enabled_p ())
> -    dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n",
> -                    GET_MODE_NAME (biggest_mode));
> +    {
> +      dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n",
> +                      GET_MODE_NAME (biggest_mode));
> +      dump_printf_loc (MSG_NOTE, vect_location, "Smallest mode = %s\n",
> +                      GET_MODE_NAME (smallest_mode));
> +    }
> +  if (smallest_mode_out)
> +    *smallest_mode_out = smallest_mode;
>    return biggest_mode;
>  }
>
> @@ -639,6 +662,25 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, 
> machine_mode mode)
>    return 0;
>  }
>
> +/* Compute LMUL based on the ratio of biggest to smallest type size.
> +   This is used for RVV_CONV_DYNAMIC.  */
> +static int
> +compute_lmul_from_conversion_ratio (machine_mode biggest_mode,
> +                                   machine_mode smallest_mode)
> +{
> +  gcc_assert (GET_MODE_BITSIZE (biggest_mode).is_constant ());
> +  gcc_assert (GET_MODE_BITSIZE (smallest_mode).is_constant ());
> +
> +  unsigned int biggest_size = GET_MODE_BITSIZE (biggest_mode).to_constant ();
> +  unsigned int smallest_size = GET_MODE_BITSIZE (smallest_mode).to_constant 
> ();
> +
> +  int lmul = biggest_size / smallest_size;
> +  lmul = std::min (lmul, (int) RVV_M8);
> +  lmul = std::max (lmul, (int) RVV_M1);
> +
> +  return lmul;
> +}
> +
>  /* Update the live ranges according PHI.
>
>     Loop:
> @@ -825,56 +867,37 @@ costs::update_local_live_ranges (
>      }
>  }
>
> -/* Compute the maximum live V_REGS.  */
> -bool
> -costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
> +/* Helper to compute live ranges, modes, and LMUL.  */
> +void
> +costs::compute_live_ranges_and_lmul (loop_vec_info loop_vinfo,
> +  hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
> +  hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb,
> +  machine_mode &biggest_mode, machine_mode &smallest_mode, int &lmul)
>  {
> -  /* Compute local program points.
> -     It's a fast and effective computation.  */
> -  hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
>    compute_local_program_points (loop_vinfo, program_points_per_bb);
>
> -  /* Compute local live ranges.  */
> -  hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
> -  machine_mode biggest_mode
> -    = compute_local_live_ranges (loop_vinfo, program_points_per_bb,
> -                                live_ranges_per_bb);
> +  smallest_mode = TImode;
> +  biggest_mode = compute_local_live_ranges (loop_vinfo, 
> program_points_per_bb,
> +                                           live_ranges_per_bb, 
> &smallest_mode);
>
> -  /* Update live ranges according to PHI.  */
>    update_local_live_ranges (loop_vinfo, program_points_per_bb,
>                             live_ranges_per_bb, &biggest_mode);
>
> -  int lmul = compute_estimated_lmul (loop_vinfo, biggest_mode);
> +  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
> +    lmul = compute_lmul_from_conversion_ratio (biggest_mode, smallest_mode);
> +  else
> +    lmul = compute_estimated_lmul (loop_vinfo, biggest_mode);
> +
>    gcc_assert (lmul <= RVV_M8);
> -  /* TODO: We calculate the maximum live vars base on current STMTS
> -     sequence.  We can support live range shrink if it can give us
> -     big improvement in the future.  */
> -  if (lmul > RVV_M1)
> -    {
> -      if (!live_ranges_per_bb.is_empty ())
> -       {
> -         unsigned int max_nregs = 0;
> -         for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
> -              = live_ranges_per_bb.begin ();
> -              iter != live_ranges_per_bb.end (); ++iter)
> -           {
> -             basic_block bb = (*iter).first;
> -             unsigned int max_point
> -               = (*program_points_per_bb.get (bb)).length () + 1;
> -             if ((*iter).second.is_empty ())
> -               continue;
> -             /* We prefer larger LMUL unless it causes register spillings. */
> -             unsigned int nregs
> -               = max_number_of_live_regs (loop_vinfo, bb, (*iter).second,
> -                                          max_point, biggest_mode, lmul);
> -             if (nregs > max_nregs)
> -               max_nregs = nregs;
> -           }
> -         live_ranges_per_bb.empty ();
> -         if (max_nregs > V_REG_NUM)
> -           return true;
> -       }
> -    }
> +}
> +
> +/* Helper to clean up live range data structures.  */
> +void
> +costs::cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>>
> +                               &program_points_per_bb,
> +                               hash_map<basic_block, hash_map<tree, pair>>
> +                               &live_ranges_per_bb)
> +{
>    if (!program_points_per_bb.is_empty ())
>      {
>        for (hash_map<basic_block, vec<stmt_point>>::iterator iter
> @@ -887,7 +910,72 @@ costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
>         }
>        program_points_per_bb.empty ();
>      }
> -  return false;
> +  live_ranges_per_bb.empty ();
> +}
> +
> +/* Compute LMUL for RVV_CONV_DYNAMIC mode based on conversion ratio.  */
> +void
> +costs::compute_conversion_dynamic_lmul (loop_vec_info loop_vinfo)
> +{
> +  hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
> +  hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
> +  machine_mode biggest_mode, smallest_mode;
> +  int lmul;
> +
> +  compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb,
> +                               live_ranges_per_bb, biggest_mode,
> +                               smallest_mode, lmul);
> +
> +  /* Store the computed LMUL and biggest mode for later comparison
> +     in cost model.  */
> +  m_computed_lmul_from_conv = lmul;
> +  m_biggest_mode_for_conv = biggest_mode;
> +
> +  cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb);
> +}
> +
> +/* Compute the maximum live V_REGS and check for unexpected spills.  */
> +bool
> +costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
> +{
> +  hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
> +  hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
> +  machine_mode biggest_mode, smallest_mode;
> +  int lmul;
> +
> +  compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb,
> +                               live_ranges_per_bb, biggest_mode,
> +                               smallest_mode, lmul);
> +
> +  /* TODO: We calculate the maximum live vars base on current STMTS
> +     sequence.  We can support live range shrink if it can give us
> +     big improvement in the future.  */
> +  bool has_spills = false;
> +  if (lmul > RVV_M1 && !live_ranges_per_bb.is_empty ())
> +    {
> +      unsigned int max_nregs = 0;
> +      for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
> +          = live_ranges_per_bb.begin ();
> +          iter != live_ranges_per_bb.end (); ++iter)
> +       {
> +         basic_block bb = (*iter).first;
> +         unsigned int max_point
> +           = (*program_points_per_bb.get (bb)).length () + 1;
> +         if ((*iter).second.is_empty ())
> +           continue;
> +         /* We prefer larger LMUL unless it causes register spillings.  */
> +         unsigned int nregs
> +           = max_number_of_live_regs (loop_vinfo, bb, (*iter).second,
> +                                      max_point, biggest_mode, lmul);
> +         if (nregs > max_nregs)
> +           max_nregs = nregs;
> +       }
> +      if (max_nregs > V_REG_NUM)
> +       has_spills = true;
> +    }
> +
> +  cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb);
> +  return has_spills;
>  }
>
>  costs::costs (vec_info *vinfo, bool costing_for_scalar)
> @@ -937,6 +1025,8 @@ costs::record_potential_unexpected_spills (loop_vec_info 
> loop_vinfo)
>        if (!post_dom_available_p)
>         free_dominance_info (CDI_POST_DOMINATORS);
>      }
> +  else if (rvv_max_lmul == RVV_CONV_DYNAMIC)
> +    compute_conversion_dynamic_lmul (loop_vinfo);
>  }
>
>  /* Decide whether to use the unrolling heuristic described above
> @@ -1033,6 +1123,50 @@ costs::better_main_loop_than_p (const vector_costs 
> *uncast_other) const
>           return other_prefer_unrolled;
>         }
>      }
> +  else if (rvv_max_lmul == RVV_CONV_DYNAMIC)
> +    {
> +      if (this->m_computed_lmul_from_conv > 0
> +         && other->m_computed_lmul_from_conv > 0
> +         && this->m_biggest_mode_for_conv != VOIDmode)
> +       {
> +         int this_vf = vect_vf_for_cost (this_loop_vinfo);
> +         int other_vf = vect_vf_for_cost (other_loop_vinfo);
> +
> +         /* Get element size from the biggest mode.  */
> +         unsigned int element_bits
> +           = GET_MODE_BITSIZE (this->m_biggest_mode_for_conv).to_constant ();
> +
> +         /* Estimate LMUL from VF * element_size / MIN_VLEN.  */
> +         int this_lmul = (this_vf * element_bits) / TARGET_MIN_VLEN;
> +         int other_lmul = (other_vf * element_bits) / TARGET_MIN_VLEN;
> +
> +         /* Clamp to valid LMUL range.  */
> +         this_lmul = MAX (1, MIN (this_lmul, 8));
> +         other_lmul = MAX (1, MIN (other_lmul, 8));
> +
> +         int target_lmul = this->m_computed_lmul_from_conv;
> +
> +         /* Prefer the LMUL that exactly matches our computed ratio.  */
> +         if (this_lmul == target_lmul && other_lmul != target_lmul)
> +           {
> +             if (dump_enabled_p ())
> +               dump_printf_loc (MSG_NOTE, vect_location,
> +                                "Preferring LMUL=%d loop because it matches"
> +                                " conversion ratio (other LMUL=%d)\n",
> +                                this_lmul, other_lmul);
> +             return true;
> +           }
> +         else if (this_lmul != target_lmul && other_lmul == target_lmul)
> +           {
> +             if (dump_enabled_p ())
> +               dump_printf_loc (MSG_NOTE, vect_location,
> +                                "Preferring other LMUL=%d loop because it 
> matches"
> +                                " conversion ratio (this LMUL=%d)\n",
> +                                other_lmul, this_lmul);
> +             return false;
> +           }
> +       }
> +    }
>    else if (rvv_max_lmul == RVV_DYNAMIC)
>      {
>        if (other->m_has_unexpected_spills_p)
> diff --git a/gcc/config/riscv/riscv-vector-costs.h 
> b/gcc/config/riscv/riscv-vector-costs.h
> index b84ceb1d3cf..89f813c3d98 100644
> --- a/gcc/config/riscv/riscv-vector-costs.h
> +++ b/gcc/config/riscv/riscv-vector-costs.h
> @@ -106,6 +106,11 @@ private:
>    bool m_has_unexpected_spills_p = false;
>    void record_potential_unexpected_spills (loop_vec_info);
>
> +  /* For RVV_DYNAMIC_CONV mode, store the LMUL computed from conversion ratio
> +     and the biggest mode used in the computation.  */
> +  int m_computed_lmul_from_conv = 0;
> +  machine_mode m_biggest_mode_for_conv = VOIDmode;
> +
>    void compute_local_program_points (vec_info *,
>                                      hash_map<basic_block, vec<stmt_point>> 
> &);
>    void update_local_live_ranges (vec_info *,
> @@ -114,9 +119,17 @@ private:
>                                  machine_mode *);
>    machine_mode compute_local_live_ranges
>      (loop_vec_info, const hash_map<basic_block, vec<stmt_point>> &,
> -     hash_map<basic_block, hash_map<tree, pair>> &);
> -
> +     hash_map<basic_block, hash_map<tree, pair>> &,
> +     machine_mode * = nullptr);
> +
> +  void compute_live_ranges_and_lmul (loop_vec_info,
> +                                    hash_map<basic_block, vec<stmt_point>> &,
> +                                    hash_map<basic_block, hash_map<tree, 
> pair>> &,
> +                                    machine_mode &, machine_mode &, int &);
> +  void cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>> &,
> +                               hash_map<basic_block, hash_map<tree, pair>> 
> &);
>    bool has_unexpected_spills_p (loop_vec_info);
> +  void compute_conversion_dynamic_lmul (loop_vec_info);
>    bool need_additional_vector_vars_p (stmt_vec_info, slp_tree);
>
>    void adjust_vect_cost_per_loop (loop_vec_info);
> diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
> index 452062c6500..de7730a8961 100644
> --- a/gcc/config/riscv/riscv.opt
> +++ b/gcc/config/riscv/riscv.opt
> @@ -313,6 +313,9 @@ Enum(rvv_max_lmul) String(m8) Value(RVV_M8)
>  EnumValue
>  Enum(rvv_max_lmul) String(dynamic) Value(RVV_DYNAMIC)
>
> +EnumValue
> +Enum(rvv_max_lmul) String(conv-dynamic) Value(RVV_CONV_DYNAMIC)
> +
>  mrvv-max-lmul=
>  Target RejectNegative Joined Enum(rvv_max_lmul) Var(rvv_max_lmul) 
> Init(RVV_M1)
>  -mrvv-max-lmul=<string>        Set the RVV LMUL of auto-vectorization.
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c
> new file mode 100644
> index 00000000000..b07bd86f76e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" 
> } */
> +
> +void foo2x1 (short *restrict a, char *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +void foo2x2 (int *restrict a, short *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +void foo2x3 (long *restrict a, int *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +void foo4x1 (int *restrict a, char *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +void foo4x2 (long *restrict a, short *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +void foo8x (long *restrict a, char *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +/* { dg-final { scan-assembler-times ",m2," 3 } } */
> +/* { dg-final { scan-assembler-times ",m4," 2 } } */
> +/* { dg-final { scan-assembler-times ",m8," 1 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c
> new file mode 100644
> index 00000000000..c37e4dd63f2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c
> @@ -0,0 +1,43 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" 
> } */
> +
> +void foo2x1 (unsigned char *restrict a, unsigned short *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +void foo2x2 (unsigned short *restrict a, unsigned int *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +void foo2x3 (unsigned int *restrict a, unsigned long *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +void foo4x1 (unsigned char *restrict a, unsigned int *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +void foo4x2 (unsigned short *restrict a, unsigned long *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +void foo8x (unsigned char *restrict a, unsigned long *restrict b, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    a[i] = b[i];
> +}
> +
> +/* { dg-final { scan-assembler-times ",m1," 6 } } */
> +/* { dg-final { scan-assembler-times ",m2," 3 } } */
> +/* { dg-final { scan-assembler-times ",m4," 1 } } */
> +/* { dg-final { scan-assembler-not ",mf2," } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c
> new file mode 100644
> index 00000000000..7753a66cd96
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" 
> } */
> +
> +int
> +foo (const char *x, const char *y)
> +{
> +  int sum = 0;
> +  for (int i = 0; i < 1024; i++)
> +    sum += x[i] * y[i];
> +  return sum;
> +}
> +
> +/* One for the initial value, one for the reduction.  */
> +/* { dg-final { scan-assembler-times ",m4," 2 } } */
> --
> 2.51.1
>

Re: [PATCH] RISC-V: -mrvv-max-lmul=conv-dynamic [PR122846].

Reply via email to