On Tue, Dec 2, 2025 at 10:32 PM Robin Dapp <[email protected]> wrote:
>
> Similar to vec_extract this implements vec_sets that are present as
> subreg inserts.  Similar to a single element we can just slide up
> a vector with the TU policy.
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-v.cc (expand_vector_subreg_insert):
>         New function.
>         (legitimize_move): Use new function.
> ---
>  gcc/config/riscv/riscv-v.cc | 87 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 87 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 1b22f9e948e..835947892e9 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -1944,6 +1944,80 @@ expand_vector_subreg_extract (rtx dest, rtx src)
>    return true;
>  }
>
> +/* Expand vector insertion into a SUBREG destination using slideup.
> +   Handles patterns like (subreg:V4DI (reg:V8DI) 32) by emitting
> +   a slideup instruction when inserting into non-low parts.
> +   Return true if the move was handled and emitted.  */
> +static bool
> +expand_vector_subreg_insert (rtx dest, rtx src)
> +{
> +  gcc_assert (SUBREG_P (dest) && REG_P (SUBREG_REG (dest)) && REG_P (src));
> +
> +  machine_mode mode = GET_MODE (src);
> +  machine_mode inner_mode = GET_MODE (SUBREG_REG (dest));
> +
> +  gcc_assert (VECTOR_MODE_P (mode));
> +  gcc_assert (VECTOR_MODE_P (inner_mode));
> +
> +  poly_uint16 outer_size = GET_MODE_BITSIZE (mode);
> +  poly_uint16 inner_size = GET_MODE_BITSIZE (inner_mode);
> +
> +  poly_uint16 factor;
> +  if (riscv_tuple_mode_p (inner_mode)
> +      || !multiple_p (inner_size, outer_size, &factor)
> +      || !factor.is_constant ()
> +      || !pow2p_hwi (factor.to_constant ())
> +      || factor.to_constant () <= 1)
> +    return false;
> +
> +  enum vlmul_type lmul = get_vlmul (mode);
> +  enum vlmul_type inner_lmul = get_vlmul (inner_mode);
> +
> +  /* These are just "renames".  */
> +  if ((inner_lmul == LMUL_2 || inner_lmul == LMUL_4 || inner_lmul == LMUL_8)
> +      && (lmul == LMUL_1 || lmul == LMUL_2 || lmul == LMUL_4))

Should we also check SUBREG_BYTE (dest) == 0 here?

> +    {
> +      /* Inserting into a non-zero part means we need to slide up.  */
> +      poly_uint64 slide_count = part * outer_nunits;
> +
> +      /* First, broadcast the source value into a temporary vector.  */
> +      rtx tmp = gen_reg_rtx (inner_mode);
> +
> +      /* Create a vector with src in the low part.  */
> +      rtx low_tmp = gen_lowpart (mode, tmp);
> +      emit_insn (gen_rtx_SET (low_tmp, src));
> +
> +      /* Slide it up to the correct position in inner_reg.
> +        Use TUMA (tail-undisturbed, mask-undisturbed) to preserve

typo here:  s/mask-undisturbed/mask-agnostic/

Reply via email to