Similar to vec_extract this implements vec_sets that are present as
subreg inserts. Similar to a single element we can just slide up
a vector with the TU policy.
gcc/ChangeLog:
* config/riscv/riscv-v.cc (expand_vector_subreg_insert):
New function.
(legitimize_move): Use new function.
---
gcc/config/riscv/riscv-v.cc | 87 +++++++++++++++++++++++++++++++++++++
1 file changed, 87 insertions(+)
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 38853000c38..7bd361c8210 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1943,6 +1943,80 @@ expand_vector_subreg_extract (rtx dest, rtx src)
return true;
}
+/* Expand vector insertion into a SUBREG destination using slideup.
+ Handles patterns like (subreg:V4DI (reg:V8DI) 32) by emitting
+ a slideup instruction when inserting into non-low parts.
+ Return true if the move was handled and emitted. */
+static bool
+expand_vector_subreg_insert (rtx dest, rtx src)
+{
+ gcc_assert (SUBREG_P (dest) && REG_P (SUBREG_REG (dest)) && REG_P (src));
+
+ machine_mode mode = GET_MODE (src);
+ machine_mode inner_mode = GET_MODE (SUBREG_REG (dest));
+
+ gcc_assert (VECTOR_MODE_P (mode));
+ gcc_assert (VECTOR_MODE_P (inner_mode));
+
+ poly_uint16 outer_size = GET_MODE_BITSIZE (mode);
+ poly_uint16 inner_size = GET_MODE_BITSIZE (inner_mode);
+
+ poly_uint16 factor;
+ if (riscv_tuple_mode_p (inner_mode)
+ || !multiple_p (inner_size, outer_size, &factor)
+ || !factor.is_constant ()
+ || !pow2p_hwi (factor.to_constant ())
+ || factor.to_constant () <= 1)
+ return false;
+
+ enum vlmul_type lmul = get_vlmul (mode);
+ enum vlmul_type inner_lmul = get_vlmul (inner_mode);
+
+ /* These are just "renames". */
+ if ((inner_lmul == LMUL_2 || inner_lmul == LMUL_4 || inner_lmul == LMUL_8)
+ && (lmul == LMUL_1 || lmul == LMUL_2 || lmul == LMUL_4))
+ return false;
+
+ poly_uint64 outer_nunits = GET_MODE_NUNITS (mode);
+ poly_uint64 subreg_byte = SUBREG_BYTE (dest);
+
+ /* Calculate which part we're inserting into (0 for low half, 1 for
+ higher half/quarter, etc.) */
+ uint64_t part;
+ if (!exact_div (subreg_byte * BITS_PER_UNIT, outer_size).is_constant (&part))
+ return false;
+
+ rtx inner_reg = SUBREG_REG (dest);
+
+ if (part == 0)
+ emit_insn (gen_rtx_SET (gen_lowpart (mode, inner_reg), src));
+ else
+ {
+ /* Inserting into a non-zero part means we need to slide up. */
+ poly_uint64 slide_count = part * outer_nunits;
+
+ /* First, broadcast the source value into a temporary vector. */
+ rtx tmp = gen_reg_rtx (inner_mode);
+
+ /* Create a vector with src in the low part. */
+ rtx low_tmp = gen_lowpart (mode, tmp);
+ emit_insn (gen_rtx_SET (low_tmp, src));
+
+ /* Slide it up to the correct position in inner_reg.
+ Use TUMA (tail-undisturbed, mask-undisturbed) to preserve
+ other elements in the destination. */
+ rtx ops[] = {inner_reg, inner_reg, tmp, gen_int_mode (slide_count,
Pmode)};
+ insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEUP, inner_mode);
+
+ /* For vslideup, we need VL = slide_count + outer_nunits
+ to copy exactly outer_nunits elements at the offset. */
+ rtx vl = gen_int_mode (slide_count + outer_nunits, Pmode);
+ emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops, vl);
+ }
+
+ return true;
+}
+
/* Expand a pre-RA RVV data move from SRC to DEST.
It expands move for RVV fractional vector modes.
Return true if the move as already been emitted. */
@@ -1975,6 +2049,19 @@ legitimize_move (rtx dest, rtx *srcp)
return true;
}
+ /* Similarly for insertions, handle patterns like
+ (set (subreg:V4DI (reg:V8DI) idx)
+ reg:V4DI)
+ and transform them into vector slideups. */
+ if (SUBREG_P (dest) && REG_P (SUBREG_REG (dest)) && REG_P (*srcp)
+ && VECTOR_MODE_P (GET_MODE (SUBREG_REG (dest)))
+ && VECTOR_MODE_P (mode)
+ && !lra_in_progress)
+ {
+ if (expand_vector_subreg_insert (dest, src))
+ return true;
+ }
+
if (riscv_vls_mode_p (mode))
{
if (GET_MODE_NUNITS (mode).to_constant () <= 31)
--
2.51.0