An ICE was discovered in recent rounding autovec support: config/riscv/riscv-v.cc:4314 65 | } | ^ 0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**, rtx_def*, bool) /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314 0x1fb1aa2 pre_vsetvl::remove_avl_operand() /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342 0x1fb18c1 pre_vsetvl::cleaup() /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308 0x1fb216d pass_vsetvl::lazy_vsetvl() /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480 0x1fb2214 pass_vsetvl::execute(function*) /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504
The root cause is that the RA reload into (set (reg) vec_duplicate:DI). However, it is not valid in RV32 system since we don't have a single broadcast instruction DI scalar in RV32 system. We should expand it early for RV32 system. gcc/ChangeLog: * config/riscv/predicates.md: Adapt predicate. * config/riscv/riscv-protos.h (can_be_broadcasted_p): New function. * config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto. * config/riscv/vector.md (vec_duplicate<mode>): New pattern. (*vec_duplicate<mode>): Adapt vec_duplicate insn pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c: New test. --- gcc/config/riscv/predicates.md | 9 +----- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-v.cc | 20 +++++++++++++ gcc/config/riscv/vector.md | 20 ++++++++++++- .../riscv/rvv/autovec/unop/sew64-rv32.c | 29 +++++++++++++++++++ 5 files changed, 70 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index db18054607f..df1c66f3a76 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -553,14 +553,7 @@ ;; The scalar operand can be directly broadcast by RVV instructions. (define_predicate "direct_broadcast_operand" - (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op)) - && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op) - || rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))) - && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), GET_MODE_BITSIZE (Pmode)))") - (ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))") - (ior (match_code "const_int,const_poly_int") - (ior (match_operand 0 "register_operand") - (match_test "satisfies_constraint_Wdm (op)")))))) + (match_test "riscv_vector::can_be_broadcasted_p (op)")) ;; A CONST_INT operand that has exactly two bits cleared. (define_predicate "const_nottwobits_operand" diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 9b254a4b278..135df1a1d94 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -598,6 +598,7 @@ uint8_t get_sew (rtx_insn *); enum vlmul_type get_vlmul (rtx_insn *); int count_regno_occurrences (rtx_insn *, unsigned int); bool imm_avl_p (machine_mode); +bool can_be_broadcasted_p (rtx); } /* We classify builtin types into two classes: diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index ff9539fff02..6b8b4da7477 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4418,4 +4418,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) return count; } +/* Return true if the OP can be directly broadcasted. */ +bool +can_be_broadcasted_p (rtx op) +{ + machine_mode mode = GET_MODE (op); + /* We don't allow RA (register allocation) reload generate + (vec_duplicate:DI reg) in RV32 system wheras we allow + (vec_duplicate:DI mem) in RV32 system. */ + if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode) + && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)) + && !satisfies_constraint_Wdm (op)) + return false; + + if (satisfies_constraint_K (op) || register_operand (op, mode) + || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode))) + return true; + + return can_create_pseudo_p () && nonmemory_operand (op, mode); +} + } // namespace riscv_vector diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 8509c4fe5f2..e23f64938b7 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -1370,11 +1370,29 @@ ;; ---- Duplicate Operations ;; ----------------------------------------------------------------- +(define_expand "vec_duplicate<mode>" + [(set (match_operand:V_VLS 0 "register_operand") + (vec_duplicate:V_VLS + (match_operand:<VEL> 1 "direct_broadcast_operand")))] + "TARGET_VECTOR" + { + /* Early expand DImode broadcast in RV32 system to avoid RA reload + generate (set (reg) (vec_duplicate:DI)). */ + if (maybe_gt (GET_MODE_SIZE (<VEL>mode), GET_MODE_SIZE (Pmode))) + { + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), + riscv_vector::UNARY_OP, operands); + DONE; + } + /* Otherwise, allow it fall into general vec_duplicate pattern + which allow us to have vv->vx combine optimization in later pass. */ + }) + ;; According to GCC internal: ;; This pattern only handles duplicates of non-constant inputs. ;; Constant vectors go through the movm pattern instead. ;; So "direct_broadcast_operand" can only be mem or reg, no CONSTANT. -(define_insn_and_split "vec_duplicate<mode>" +(define_insn_and_split "*vec_duplicate<mode>" [(set (match_operand:V_VLS 0 "register_operand") (vec_duplicate:V_VLS (match_operand:<VEL> 1 "direct_broadcast_operand")))] diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c new file mode 100644 index 00000000000..8f81369030e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */ + +#include <stdint-gcc.h> +#include "test-math.h" + +#define ARRAY_SIZE 128 + +float in[ARRAY_SIZE]; +int64_t out[ARRAY_SIZE]; +int64_t ref[ARRAY_SIZE]; + +TEST_UNARY_CALL_CVT (float, int64_t, __builtin_llrintf) +TEST_ASSERT (int64_t) + + +TEST_INIT_CVT (float, __builtin_inf (), int64_t, __builtin_llrintf (__builtin_inff ()), 19) +TEST_INIT_CVT (float, -__builtin_inf (), int64_t, __builtin_llrintf (-__builtin_inff ()), 20) +TEST_INIT_CVT (float, __builtin_nanf (""), int64_t, 0x7fffffffffffffff, 21) + +int +main () +{ + RUN_TEST_CVT (float, int64_t, 19, __builtin_llrintf, in, out, ref, ARRAY_SIZE); + RUN_TEST_CVT (float, int64_t, 20, __builtin_llrintf, in, out, ref, ARRAY_SIZE); + RUN_TEST_CVT (float, int64_t, 21, __builtin_llrintf, in, out, ref, ARRAY_SIZE); + + return 0; +} -- 2.36.3