https://gcc.gnu.org/g:0e9f08c785156884842749b3e42a4c550719c407
commit r16-2467-g0e9f08c785156884842749b3e42a4c550719c407 Author: Spencer Abson <spencer.ab...@arm.com> Date: Tue Jul 8 11:35:44 2025 +0000 aarch64: Add support for unpacked SVE FDIV This patch extends the unpredicated FP division expander to support partial FP modes. It extends the existing patterns used to implement UNSPEC_COND_FDIV and it's approximation as needed. gcc/ChangeLog: * config/aarch64/aarch64-sve.md: (@aarch64_sve_<optab><mode>): Extend from SVE_FULL_F to SVE_F, use aarch64_predicate_operand. (@aarch64_frecpe<mode>): Extend from SVE_FULL_F to SVE_F. (@aarch64_frecps<mode>): Likewise. (div<mode>3): Likewise, use aarch64_sve_fp_pred. * config/aarch64/iterators.md: Add warnings above SVE_FP_UNARY and SVE_FP_BINARY. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/unpacked_fdiv_1.c: New test. * gcc.target/aarch64/sve/unpacked_fdiv_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_fdiv_3.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve.md | 50 +++++++++++----------- gcc/config/aarch64/iterators.md | 8 ++++ .../gcc.target/aarch64/sve/unpacked_fdiv_1.c | 38 ++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_fdiv_2.c | 15 +++++++ .../gcc.target/aarch64/sve/unpacked_fdiv_3.c | 13 ++++++ 5 files changed, 99 insertions(+), 25 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index a52ef61098bb..fc0f51e73a3e 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3752,9 +3752,9 @@ ;; Unpredicated floating-point unary operations. (define_insn "@aarch64_sve_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand" "w")] + [(set (match_operand:SVE_F 0 "register_operand" "=w") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand" "w")] SVE_FP_UNARY))] "TARGET_SVE" "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>" @@ -5561,10 +5561,10 @@ ;; Unpredicated floating-point binary operations. (define_insn "@aarch64_sve_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand" "w") - (match_operand:SVE_FULL_F 2 "register_operand" "w")] + [(set (match_operand:SVE_F 0 "register_operand" "=w") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand" "w") + (match_operand:SVE_F 2 "register_operand" "w")] SVE_FP_BINARY))] "TARGET_SVE" "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" @@ -5588,12 +5588,12 @@ ;; Predicated floating-point binary operations that have no immediate forms. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand")] SVE_COND_FP_BINARY_REG))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] @@ -6685,12 +6685,12 @@ ;; ------------------------------------------------------------------------- (define_expand "div<mode>3" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_dup 3) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 1 "nonmemory_operand") - (match_operand:SVE_FULL_F 2 "register_operand")] + (match_dup 4) + (match_operand:SVE_F 1 "nonmemory_operand") + (match_operand:SVE_F 2 "register_operand")] UNSPEC_COND_FDIV))] "TARGET_SVE" { @@ -6698,23 +6698,23 @@ DONE; operands[1] = force_reg (<MODE>mode, operands[1]); - operands[3] = aarch64_ptrue_reg (<VPRED>mode); + operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]); } ) (define_expand "@aarch64_frecpe<mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand")] + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand")] UNSPEC_FRECPE))] "TARGET_SVE" ) (define_expand "@aarch64_frecps<mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand") - (match_operand:SVE_FULL_F 2 "register_operand")] + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand") + (match_operand:SVE_F 2 "register_operand")] UNSPEC_FRECPS))] "TARGET_SVE" ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 795c4ac7a579..8533912e5937 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -3367,6 +3367,10 @@ (define_int_iterator SVE_INT_UNARY [UNSPEC_REVB UNSPEC_REVH UNSPEC_REVW]) +;; This iterator is currently only used for estimation instructions, +;; which are never generated automatically when -ftrapping-math is true. +;; The iterator is therefore applied unconditionally to partial FP modes. +;; This might need to be revisited if new operations are added in future. (define_int_iterator SVE_FP_UNARY [UNSPEC_FRECPE UNSPEC_RSQRTE]) (define_int_iterator SVE_FP_UNARY_INT [(UNSPEC_FEXPA "TARGET_NON_STREAMING")]) @@ -3379,6 +3383,10 @@ (define_int_iterator SVE_INT_BINARY_MULTI [UNSPEC_SQDMULH UNSPEC_SRSHL UNSPEC_URSHL]) +;; This iterator is currently only used for estimation instructions, +;; which are never generated automatically when -ftrapping-math is true. +;; The iterator is therefore applied unconditionally to partial FP modes. +;; This might need to be revisited if new operations are added in future. (define_int_iterator SVE_FP_BINARY [UNSPEC_FRECPS UNSPEC_RSQRTS]) (define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL]) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_1.c new file mode 100644 index 000000000000..78d0d9ce160f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048" } */ + +#include <stdint.h> + +#define b_i b[i] +#define DIV(A, B) A / B + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i], (TYPE0)RHS) > c[i]) \ + out[i] = 3; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) + +TEST_ALL (DIV, _Float16, uint64_t, 32) + +TEST_ALL (DIV, _Float16, uint32_t, 64) + +TEST_ALL (DIV, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfdivr?\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdivr?\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_2.c new file mode 100644 index 000000000000..a8f70e1443df --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include "unpacked_fdiv_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfdivr?\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdivr?\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_3.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_3.c new file mode 100644 index 000000000000..ecd088f9e3b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -moverride=sve_width=2048 -mlow-precision-div" } */ + +#include "unpacked_fdiv_1.c" + +/* { dg-final { scan-assembler-not {\tfrecpe\tz[0-9]+\.h} } } */ +/* { dg-final { scan-assembler-not {\tfrecps\tz[0-9]+\.h} } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tfrecpe\tz[0-9]+\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrecps\tz[0-9]+\.s} 1 } } */