Similar to vec_extract this implements vec_sets that are present as
subreg inserts.  Similar to a single element we can just slide up
a vector with the TU policy.

gcc/ChangeLog:

        * config/riscv/riscv-v.cc (expand_vector_subreg_insert):
        New function.
        (legitimize_move): Use new function.
---
 gcc/config/riscv/riscv-v.cc | 87 +++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 38853000c38..7bd361c8210 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1943,6 +1943,80 @@ expand_vector_subreg_extract (rtx dest, rtx src)
   return true;
 }
 
+/* Expand vector insertion into a SUBREG destination using slideup.
+   Handles patterns like (subreg:V4DI (reg:V8DI) 32) by emitting
+   a slideup instruction when inserting into non-low parts.
+   Return true if the move was handled and emitted.  */
+static bool
+expand_vector_subreg_insert (rtx dest, rtx src)
+{
+  gcc_assert (SUBREG_P (dest) && REG_P (SUBREG_REG (dest)) && REG_P (src));
+
+  machine_mode mode = GET_MODE (src);
+  machine_mode inner_mode = GET_MODE (SUBREG_REG (dest));
+
+  gcc_assert (VECTOR_MODE_P (mode));
+  gcc_assert (VECTOR_MODE_P (inner_mode));
+
+  poly_uint16 outer_size = GET_MODE_BITSIZE (mode);
+  poly_uint16 inner_size = GET_MODE_BITSIZE (inner_mode);
+
+  poly_uint16 factor;
+  if (riscv_tuple_mode_p (inner_mode)
+      || !multiple_p (inner_size, outer_size, &factor)
+      || !factor.is_constant ()
+      || !pow2p_hwi (factor.to_constant ())
+      || factor.to_constant () <= 1)
+    return false;
+
+  enum vlmul_type lmul = get_vlmul (mode);
+  enum vlmul_type inner_lmul = get_vlmul (inner_mode);
+
+  /* These are just "renames".  */
+  if ((inner_lmul == LMUL_2 || inner_lmul == LMUL_4 || inner_lmul == LMUL_8)
+      && (lmul == LMUL_1 || lmul == LMUL_2 || lmul == LMUL_4))
+    return false;
+
+  poly_uint64 outer_nunits = GET_MODE_NUNITS (mode);
+  poly_uint64 subreg_byte = SUBREG_BYTE (dest);
+
+  /* Calculate which part we're inserting into (0 for low half, 1 for
+     higher half/quarter, etc.)  */
+  uint64_t part;
+  if (!exact_div (subreg_byte * BITS_PER_UNIT, outer_size).is_constant (&part))
+    return false;
+
+  rtx inner_reg = SUBREG_REG (dest);
+
+  if (part == 0)
+    emit_insn (gen_rtx_SET (gen_lowpart (mode, inner_reg), src));
+  else
+    {
+      /* Inserting into a non-zero part means we need to slide up.  */
+      poly_uint64 slide_count = part * outer_nunits;
+
+      /* First, broadcast the source value into a temporary vector.  */
+      rtx tmp = gen_reg_rtx (inner_mode);
+
+      /* Create a vector with src in the low part.  */
+      rtx low_tmp = gen_lowpart (mode, tmp);
+      emit_insn (gen_rtx_SET (low_tmp, src));
+
+      /* Slide it up to the correct position in inner_reg.
+        Use TUMA (tail-undisturbed, mask-undisturbed) to preserve
+        other elements in the destination.  */
+      rtx ops[] = {inner_reg, inner_reg, tmp, gen_int_mode (slide_count, 
Pmode)};
+      insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEUP, inner_mode);
+
+      /* For vslideup, we need VL = slide_count + outer_nunits
+        to copy exactly outer_nunits elements at the offset.  */
+      rtx vl = gen_int_mode (slide_count + outer_nunits, Pmode);
+      emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops, vl);
+    }
+
+  return true;
+}
+
 /* Expand a pre-RA RVV data move from SRC to DEST.
    It expands move for RVV fractional vector modes.
    Return true if the move as already been emitted.  */
@@ -1975,6 +2049,19 @@ legitimize_move (rtx dest, rtx *srcp)
        return true;
     }
 
+  /* Similarly for insertions, handle patterns like
+       (set (subreg:V4DI (reg:V8DI) idx)
+         reg:V4DI)
+     and transform them into vector slideups.  */
+  if (SUBREG_P (dest) && REG_P (SUBREG_REG (dest)) && REG_P (*srcp)
+      && VECTOR_MODE_P (GET_MODE (SUBREG_REG (dest)))
+      && VECTOR_MODE_P (mode)
+      && !lra_in_progress)
+    {
+      if (expand_vector_subreg_insert (dest, src))
+       return true;
+    }
+
   if (riscv_vls_mode_p (mode))
     {
       if (GET_MODE_NUNITS (mode).to_constant () <= 31)
-- 
2.51.0

Reply via email to