https://gcc.gnu.org/g:ecb0d1b860e15371248a055ae4b2d8058bb8dd1a

commit ecb0d1b860e15371248a055ae4b2d8058bb8dd1a
Author: Pan Li <pan2...@intel.com>
Date:   Wed Oct 23 16:46:53 2024 +0800

    RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE}
    
    This patch would like to implment the MASK_LEN_STRIDED_LOAD{STORE} in
    the RISC-V backend by leveraging the vector strided load/store insn.
    
    For example:
    void foo (int * __restrict a, int * __restrict b, int stride, int n)
    {
        for (int i = 0; i < n; i++)
          a[i*stride] = b[i*stride] + 100;
    }
    
    Before this patch:
      38   │     vsetvli a5,a3,e32,m1,ta,ma
      39   │     vluxei64.v  v1,(a1),v4
      40   │     mul a4,a2,a5
      41   │     sub a3,a3,a5
      42   │     vadd.vv v1,v1,v2
      43   │     vsuxei64.v  v1,(a0),v4
      44   │     add a1,a1,a4
      45   │     add a0,a0,a4
    
    After this patch:
      33   │     vsetvli a5,a3,e32,m1,ta,ma
      34   │     vlse32.v    v1,0(a1),a2
      35   │     mul a4,a2,a5
      36   │     sub a3,a3,a5
      37   │     vadd.vv v1,v1,v2
      38   │     vsse32.v    v1,0(a0),a2
      39   │     add a1,a1,a4
      40   │     add a0,a0,a4
    
    The below test suites are passed for this patch:
    * The riscv fully regression test.
    
    gcc/ChangeLog:
    
            * config/riscv/autovec.md (mask_len_strided_load_<mode>): Add
            new pattern for MASK_LEN_STRIDED_LOAD.
            (mask_len_strided_store_<mode>): Ditto but for store.
            * config/riscv/riscv-protos.h (expand_strided_load): Add new
            func decl to expand strided load.
            (expand_strided_store): Ditto but for store.
            * config/riscv/riscv-v.cc (expand_strided_load): Add new
            func impl to expand strided load.
            (expand_strided_store): Ditto but for store.
    
    Signed-off-by: Pan Li <pan2...@intel.com>
    Co-Authored-By: Juzhe-Zhong <juzhe.zh...@rivai.ai>
    (cherry picked from commit 30435cc261071d389d9a210f598170ecdd5ea13c)

Diff:
---
 gcc/config/riscv/autovec.md     | 29 +++++++++++++++++++++++
 gcc/config/riscv/riscv-protos.h |  2 ++
 gcc/config/riscv/riscv-v.cc     | 52 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 774a3d337231..1f1849d52372 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2889,3 +2889,32 @@
     DONE;
   }
 )
+
+;; =========================================================================
+;; == Strided Load/Store
+;; =========================================================================
+(define_expand "mask_len_strided_load_<mode>"
+  [(match_operand:V     0 "register_operand")
+   (match_operand       1 "pmode_reg_or_0_operand")
+   (match_operand       2 "pmode_reg_or_0_operand")
+   (match_operand:<VM>  3 "vector_mask_operand")
+   (match_operand       4 "autovec_length_operand")
+   (match_operand       5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_strided_load (<MODE>mode, operands);
+    DONE;
+  })
+
+(define_expand "mask_len_strided_store_<mode>"
+  [(match_operand       0 "pmode_reg_or_0_operand")
+   (match_operand       1 "pmode_reg_or_0_operand")
+   (match_operand:V     2 "register_operand")
+   (match_operand:<VM>  3 "vector_mask_operand")
+   (match_operand       4 "autovec_length_operand")
+   (match_operand       5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_strided_store (<MODE>mode, operands);
+    DONE;
+  })
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 54f472afd8d0..0a6b43f0c767 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -700,6 +700,8 @@ bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned 
HOST_WIDE_INT, bool);
 void emit_vec_extract (rtx, rtx, rtx);
 bool expand_vec_setmem (rtx, rtx, rtx);
 bool expand_vec_cmpmem (rtx, rtx, rtx, rtx);
+void expand_strided_load (machine_mode, rtx *);
+void expand_strided_store (machine_mode, rtx *);
 
 /* Rounding mode bitfield for fixed point VXRM.  */
 enum fixed_point_rounding_mode
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index c48b87278a31..209b7ee88f18 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3833,6 +3833,58 @@ expand_load_store (rtx *ops, bool is_load)
     }
 }
 
+/* Expand MASK_LEN_STRIDED_LOAD.  */
+void
+expand_strided_load (machine_mode mode, rtx *ops)
+{
+  rtx v_reg = ops[0];
+  rtx base = ops[1];
+  rtx stride = ops[2];
+  rtx mask = ops[3];
+  rtx len = ops[4];
+  poly_int64 len_val;
+
+  insn_code icode = code_for_pred_strided_load (mode);
+  rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride};
+
+  if (poly_int_rtx_p (len, &len_val)
+      && known_eq (len_val, GET_MODE_NUNITS (mode)))
+    emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops);
+  else
+    {
+      len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+      emit_nonvlmax_insn (icode, BINARY_OP_TAMA, emit_ops, len);
+    }
+}
+
+/* Expand MASK_LEN_STRIDED_STORE.  */
+void
+expand_strided_store (machine_mode mode, rtx *ops)
+{
+  rtx v_reg = ops[2];
+  rtx base = ops[0];
+  rtx stride = ops[1];
+  rtx mask = ops[3];
+  rtx len = ops[4];
+  poly_int64 len_val;
+  rtx vl_type;
+
+  if (poly_int_rtx_p (len, &len_val)
+      && known_eq (len_val, GET_MODE_NUNITS (mode)))
+    {
+      len = gen_reg_rtx (Pmode);
+      emit_vlmax_vsetvl (mode, len);
+      vl_type = get_avl_type_rtx (VLMAX);
+    }
+  else
+    {
+      len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+      vl_type = get_avl_type_rtx (NONVLMAX);
+    }
+
+  emit_insn (gen_pred_strided_store (mode, gen_rtx_MEM (mode, base),
+                                    mask, stride, v_reg, len, vl_type));
+}
 
 /* Return true if the operation is the floating-point operation need FRM.  */
 static bool

Reply via email to