https://gcc.gnu.org/g:b30c6a5eabaf476663f1a1e41165967e782eccd3
commit r15-4937-gb30c6a5eabaf476663f1a1e41165967e782eccd3 Author: Craig Blackmore <craig.blackm...@embecosm.com> Date: Mon Nov 4 13:55:19 2024 -0700 [PATCH v2 1/2] RISC-V: Make vectorized memset handle more cases `expand_vec_setmem` only generated vectorized memset if it fitted into a single vector store of at least (TARGET_MIN_VLEN / 8) bytes. Also, without dynamic LMUL the operation was always TARGET_MAX_LMUL even if it would have fitted a smaller LMUL. Allow vectorized memset to be generated for smaller lengths and smaller LMUL by switching to using use_vector_string_op. Smaller LMUL can be seen in setmem-3.c:f3. Smaller lengths will be seen after the second patch in this series which selectively disables by pieces. gcc/ChangeLog: * config/riscv/riscv-string.cc (use_vector_stringop_p): Add comment. (expand_vec_setmem): Use use_vector_stringop_p instead of check_vectorise_memory_operation. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/setmem-3.c: Expect smaller lmul. Diff: --- gcc/config/riscv/riscv-string.cc | 37 +++++++++++----------- gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c | 6 ++-- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index 118c02a40212..20395e19c604 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -1062,6 +1062,9 @@ struct stringop_info { MAX_EW is the maximum element width that the caller wants to use and LENGTH_IN is the length of the stringop in bytes. + + This is currently used for cpymem and setmem. If expand_vec_cmpmem switches + to using it too then check_vectorise_memory_operation can be removed. */ static bool @@ -1600,41 +1603,39 @@ check_vectorise_memory_operation (rtx length_in, HOST_WIDE_INT &lmul_out) bool expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in) { - HOST_WIDE_INT lmul; + stringop_info info; + /* Check we are able and allowed to vectorise this operation; bail if not. */ - if (!check_vectorise_memory_operation (length_in, lmul)) + if (!use_vector_stringop_p (info, 1, length_in) || info.need_loop) return false; - machine_mode vmode - = riscv_vector::get_vector_mode (QImode, BYTES_PER_RISCV_VECTOR * lmul) - .require (); rtx dst_addr = copy_addr_to_reg (XEXP (dst_in, 0)); - rtx dst = change_address (dst_in, vmode, dst_addr); + rtx dst = change_address (dst_in, info.vmode, dst_addr); - rtx fill_value = gen_reg_rtx (vmode); + rtx fill_value = gen_reg_rtx (info.vmode); rtx broadcast_ops[] = { fill_value, fill_value_in }; /* If the length is exactly vlmax for the selected mode, do that. Otherwise, use a predicated store. */ - if (known_eq (GET_MODE_SIZE (vmode), INTVAL (length_in))) + if (known_eq (GET_MODE_SIZE (info.vmode), INTVAL (info.avl))) { - emit_vlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP, - broadcast_ops); + emit_vlmax_insn (code_for_pred_broadcast (info.vmode), UNARY_OP, + broadcast_ops); emit_move_insn (dst, fill_value); } else { - if (!satisfies_constraint_K (length_in)) - length_in = force_reg (Pmode, length_in); - emit_nonvlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP, - broadcast_ops, length_in); + if (!satisfies_constraint_K (info.avl)) + info.avl = force_reg (Pmode, info.avl); + emit_nonvlmax_insn (code_for_pred_broadcast (info.vmode), + riscv_vector::UNARY_OP, broadcast_ops, info.avl); machine_mode mask_mode - = riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (vmode)) - .require (); + = riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (info.vmode)) + .require (); rtx mask = CONSTM1_RTX (mask_mode); - emit_insn (gen_pred_store (vmode, dst, mask, fill_value, length_in, - get_avl_type_rtx (riscv_vector::NONVLMAX))); + emit_insn (gen_pred_store (info.vmode, dst, mask, fill_value, info.avl, + get_avl_type_rtx (riscv_vector::NONVLMAX))); } return true; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c index 25be694d248a..52766fece76a 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c @@ -21,13 +21,13 @@ f1 (void *a, int const b) return __builtin_memset (a, b, MIN_VECTOR_BYTES - 1); } -/* Vectorise+inline minimum vector register width using requested lmul. +/* Vectorised code should use smallest lmul known to fit length. ** f2: ** ( -** vsetivli\s+zero,\d+,e8,m8,ta,ma +** vsetivli\s+zero,\d+,e8,m1,ta,ma ** | ** li\s+a\d+,\d+ -** vsetvli\s+zero,a\d+,e8,m8,ta,ma +** vsetvli\s+zero,a\d+,e8,m1,ta,ma ** ) ** vmv\.v\.x\s+v\d+,a1 ** vse8\.v\s+v\d+,0\(a0\)