https://gcc.gnu.org/g:fd5e5dda8d79d62396f56d4fdd628b4bc5f9fa24
commit r14-9607-gfd5e5dda8d79d62396f56d4fdd628b4bc5f9fa24 Author: Christoph Müllner <christoph.muell...@vrull.eu> Date: Thu Mar 21 15:40:49 2024 +0100 RISC-V: Don't add fractional LMUL types to V_VLS for XTheadVector The expansion of `memset` (via expand_builtin_memset_args()) uses clear_by_pieces() and store_by_pieces() to avoid calls to the C runtime. To check if a type can be used for that purpose the function by_pieces_mode_supported_p() tests if a `mov` and a `vec_duplicate` INSN can be expaned by the backend. The `vec_duplicate` expansion takes arguments of type `V_VLS`. The `mov` expansions take arguments of type `V`, `VB`, `VT`, `VLS_AVL_IMM`, and `VLS_AVL_REG`. Some of these types (in fact not types but type iterators) include fractional LMUL types. E.g. `V_VLS` includes `V`, which includes `VI`, which includes `RVVMF2QI`. This results in an attempt to use fractional LMUL-types for the `memset` expansion resulting in an ICE for XTheadVector, because that extension cannot handle fractional LMULs. This patch addresses this issue by splitting the definition of the `VI` mode itereator into `VI_NOFRAC` (without fractional LMUL types) and `VI_FRAC` (only fractional LMUL types). Further, it defines `V_VLS` such, that `VI_FRAC` types are only included if XTheadVector is not enabled. The effect is demonstrated by a new test case that shows that the by-pieces framework now emits `sb` instructions instead of triggering an ICE. Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu> PR target/114194 gcc/ChangeLog: * config/riscv/vector-iterators.md: Split VI into VI_FRAC and VI_NOFRAC. Only include VI_NOFRAC in V_VLS without TARGET_XTHEADVECTOR. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/xtheadvector/pr114194.c: New test. Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu> Diff: --- gcc/config/riscv/vector-iterators.md | 19 +++++--- .../gcc.target/riscv/rvv/xtheadvector/pr114194.c | 56 ++++++++++++++++++++++ 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index c2ea7e8b10a..a24e1bf078f 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -108,17 +108,24 @@ UNSPECV_FRM_RESTORE_EXIT ]) -(define_mode_iterator VI [ - RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") - - RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") - - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") +;; Subset of VI with fractional LMUL types +(define_mode_iterator VI_FRAC [ + RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + (RVVMF2SI "TARGET_MIN_VLEN > 32") +]) +;; Subset of VI with non-fractional LMUL types +(define_mode_iterator VI_NOFRAC [ + RVVM8QI RVVM4QI RVVM2QI RVVM1QI + RVVM8HI RVVM4HI RVVM2HI RVVM1HI + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64") (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64") ]) +(define_mode_iterator VI [ VI_NOFRAC (VI_FRAC "!TARGET_XTHEADVECTOR") ]) + ;; This iterator is the same as above but with TARGET_VECTOR_ELEN_FP_16 ;; changed to TARGET_ZVFH. TARGET_VECTOR_ELEN_FP_16 is also true for ;; TARGET_ZVFHMIN while we actually want to disable all instructions apart diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c new file mode 100644 index 00000000000..fc2d1349425 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_xtheadvector" { target { rv32 } } } */ +/* { dg-options "-march=rv64gc_xtheadvector" { target { rv64 } } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** foo0_1: +** sb\tzero,0([a-x0-9]+) +** ret +*/ +void foo0_1 (void *p) +{ + __builtin_memset (p, 0, 1); +} + +/* +** foo0_7: +** sb\tzero,0([a-x0-9]+) +** sb\tzero,1([a-x0-9]+) +** sb\tzero,2([a-x0-9]+) +** sb\tzero,3([a-x0-9]+) +** sb\tzero,4([a-x0-9]+) +** sb\tzero,5([a-x0-9]+) +** sb\tzero,6([a-x0-9]+) +** ret +*/ +void foo0_7 (void *p) +{ + __builtin_memset (p, 0, 7); +} + +/* +** foo1_1: +** li\t[a-x0-9]+,1 +** sb\t[a-x0-9]+,0([a-x0-9]+) +** ret +*/ +void foo1_1 (void *p) +{ + __builtin_memset (p, 1, 1); +} + +/* +** foo1_5: +** li\t[a-x0-9]+,1 +** sb\t[a-x0-9]+,0([a-x0-9]+) +** sb\t[a-x0-9]+,1([a-x0-9]+) +** sb\t[a-x0-9]+,2([a-x0-9]+) +** sb\t[a-x0-9]+,3([a-x0-9]+) +** sb\t[a-x0-9]+,4([a-x0-9]+) +** ret +*/ +void foo1_5 (void *p) +{ + __builtin_memset (p, 1, 5); +}