https://gcc.gnu.org/g:ecb0d1b860e15371248a055ae4b2d8058bb8dd1a
commit ecb0d1b860e15371248a055ae4b2d8058bb8dd1a Author: Pan Li <pan2...@intel.com> Date: Wed Oct 23 16:46:53 2024 +0800 RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE} This patch would like to implment the MASK_LEN_STRIDED_LOAD{STORE} in the RISC-V backend by leveraging the vector strided load/store insn. For example: void foo (int * __restrict a, int * __restrict b, int stride, int n) { for (int i = 0; i < n; i++) a[i*stride] = b[i*stride] + 100; } Before this patch: 38 │ vsetvli a5,a3,e32,m1,ta,ma 39 │ vluxei64.v v1,(a1),v4 40 │ mul a4,a2,a5 41 │ sub a3,a3,a5 42 │ vadd.vv v1,v1,v2 43 │ vsuxei64.v v1,(a0),v4 44 │ add a1,a1,a4 45 │ add a0,a0,a4 After this patch: 33 │ vsetvli a5,a3,e32,m1,ta,ma 34 │ vlse32.v v1,0(a1),a2 35 │ mul a4,a2,a5 36 │ sub a3,a3,a5 37 │ vadd.vv v1,v1,v2 38 │ vsse32.v v1,0(a0),a2 39 │ add a1,a1,a4 40 │ add a0,a0,a4 The below test suites are passed for this patch: * The riscv fully regression test. gcc/ChangeLog: * config/riscv/autovec.md (mask_len_strided_load_<mode>): Add new pattern for MASK_LEN_STRIDED_LOAD. (mask_len_strided_store_<mode>): Ditto but for store. * config/riscv/riscv-protos.h (expand_strided_load): Add new func decl to expand strided load. (expand_strided_store): Ditto but for store. * config/riscv/riscv-v.cc (expand_strided_load): Add new func impl to expand strided load. (expand_strided_store): Ditto but for store. Signed-off-by: Pan Li <pan2...@intel.com> Co-Authored-By: Juzhe-Zhong <juzhe.zh...@rivai.ai> (cherry picked from commit 30435cc261071d389d9a210f598170ecdd5ea13c) Diff: --- gcc/config/riscv/autovec.md | 29 +++++++++++++++++++++++ gcc/config/riscv/riscv-protos.h | 2 ++ gcc/config/riscv/riscv-v.cc | 52 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 774a3d337231..1f1849d52372 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2889,3 +2889,32 @@ DONE; } ) + +;; ========================================================================= +;; == Strided Load/Store +;; ========================================================================= +(define_expand "mask_len_strided_load_<mode>" + [(match_operand:V 0 "register_operand") + (match_operand 1 "pmode_reg_or_0_operand") + (match_operand 2 "pmode_reg_or_0_operand") + (match_operand:<VM> 3 "vector_mask_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_strided_load (<MODE>mode, operands); + DONE; + }) + +(define_expand "mask_len_strided_store_<mode>" + [(match_operand 0 "pmode_reg_or_0_operand") + (match_operand 1 "pmode_reg_or_0_operand") + (match_operand:V 2 "register_operand") + (match_operand:<VM> 3 "vector_mask_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_strided_store (<MODE>mode, operands); + DONE; + }) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 54f472afd8d0..0a6b43f0c767 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -700,6 +700,8 @@ bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool); void emit_vec_extract (rtx, rtx, rtx); bool expand_vec_setmem (rtx, rtx, rtx); bool expand_vec_cmpmem (rtx, rtx, rtx, rtx); +void expand_strided_load (machine_mode, rtx *); +void expand_strided_store (machine_mode, rtx *); /* Rounding mode bitfield for fixed point VXRM. */ enum fixed_point_rounding_mode diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index c48b87278a31..209b7ee88f18 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3833,6 +3833,58 @@ expand_load_store (rtx *ops, bool is_load) } } +/* Expand MASK_LEN_STRIDED_LOAD. */ +void +expand_strided_load (machine_mode mode, rtx *ops) +{ + rtx v_reg = ops[0]; + rtx base = ops[1]; + rtx stride = ops[2]; + rtx mask = ops[3]; + rtx len = ops[4]; + poly_int64 len_val; + + insn_code icode = code_for_pred_strided_load (mode); + rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride}; + + if (poly_int_rtx_p (len, &len_val) + && known_eq (len_val, GET_MODE_NUNITS (mode))) + emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops); + else + { + len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len); + emit_nonvlmax_insn (icode, BINARY_OP_TAMA, emit_ops, len); + } +} + +/* Expand MASK_LEN_STRIDED_STORE. */ +void +expand_strided_store (machine_mode mode, rtx *ops) +{ + rtx v_reg = ops[2]; + rtx base = ops[0]; + rtx stride = ops[1]; + rtx mask = ops[3]; + rtx len = ops[4]; + poly_int64 len_val; + rtx vl_type; + + if (poly_int_rtx_p (len, &len_val) + && known_eq (len_val, GET_MODE_NUNITS (mode))) + { + len = gen_reg_rtx (Pmode); + emit_vlmax_vsetvl (mode, len); + vl_type = get_avl_type_rtx (VLMAX); + } + else + { + len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len); + vl_type = get_avl_type_rtx (NONVLMAX); + } + + emit_insn (gen_pred_strided_store (mode, gen_rtx_MEM (mode, base), + mask, stride, v_reg, len, vl_type)); +} /* Return true if the operation is the floating-point operation need FRM. */ static bool