https://gcc.gnu.org/g:0e9f08c785156884842749b3e42a4c550719c407

commit r16-2467-g0e9f08c785156884842749b3e42a4c550719c407
Author: Spencer Abson <spencer.ab...@arm.com>
Date:   Tue Jul 8 11:35:44 2025 +0000

    aarch64: Add support for unpacked SVE FDIV
    
    This patch extends the unpredicated FP division expander to support
    partial FP modes.  It extends the existing patterns used to implement
    UNSPEC_COND_FDIV and it's approximation as needed.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-sve.md: (@aarch64_sve_<optab><mode>):
            Extend from SVE_FULL_F to SVE_F, use aarch64_predicate_operand.
            (@aarch64_frecpe<mode>): Extend from SVE_FULL_F to SVE_F.
            (@aarch64_frecps<mode>): Likewise.
            (div<mode>3): Likewise, use aarch64_sve_fp_pred.
            * config/aarch64/iterators.md: Add warnings above SVE_FP_UNARY
            and SVE_FP_BINARY.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/sve/unpacked_fdiv_1.c: New test.
            * gcc.target/aarch64/sve/unpacked_fdiv_2.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_fdiv_3.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve.md                  | 50 +++++++++++-----------
 gcc/config/aarch64/iterators.md                    |  8 ++++
 .../gcc.target/aarch64/sve/unpacked_fdiv_1.c       | 38 ++++++++++++++++
 .../gcc.target/aarch64/sve/unpacked_fdiv_2.c       | 15 +++++++
 .../gcc.target/aarch64/sve/unpacked_fdiv_3.c       | 13 ++++++
 5 files changed, 99 insertions(+), 25 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index a52ef61098bb..fc0f51e73a3e 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3752,9 +3752,9 @@
 
 ;; Unpredicated floating-point unary operations.
 (define_insn "@aarch64_sve_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
-       (unspec:SVE_FULL_F
-         [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+       (unspec:SVE_F
+         [(match_operand:SVE_F 1 "register_operand" "w")]
          SVE_FP_UNARY))]
   "TARGET_SVE"
   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
@@ -5561,10 +5561,10 @@
 
 ;; Unpredicated floating-point binary operations.
 (define_insn "@aarch64_sve_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
-       (unspec:SVE_FULL_F
-         [(match_operand:SVE_FULL_F 1 "register_operand" "w")
-          (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+       (unspec:SVE_F
+         [(match_operand:SVE_F 1 "register_operand" "w")
+          (match_operand:SVE_F 2 "register_operand" "w")]
          SVE_FP_BINARY))]
   "TARGET_SVE"
   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
@@ -5588,12 +5588,12 @@
 
 ;; Predicated floating-point binary operations that have no immediate forms.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand")
-          (match_operand:SVE_FULL_F 3 "register_operand")]
+          (match_operand:SVE_F 2 "register_operand")
+          (match_operand:SVE_F 3 "register_operand")]
          SVE_COND_FP_BINARY_REG))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
@@ -6685,12 +6685,12 @@
 ;; -------------------------------------------------------------------------
 
 (define_expand "div<mode>3"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
          [(match_dup 3)
-          (const_int SVE_RELAXED_GP)
-          (match_operand:SVE_FULL_F 1 "nonmemory_operand")
-          (match_operand:SVE_FULL_F 2 "register_operand")]
+          (match_dup 4)
+          (match_operand:SVE_F 1 "nonmemory_operand")
+          (match_operand:SVE_F 2 "register_operand")]
          UNSPEC_COND_FDIV))]
   "TARGET_SVE"
   {
@@ -6698,23 +6698,23 @@
       DONE;
 
     operands[1] = force_reg (<MODE>mode, operands[1]);
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]);
   }
 )
 
 (define_expand "@aarch64_frecpe<mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:SVE_FULL_F 1 "register_operand")]
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:SVE_F 1 "register_operand")]
          UNSPEC_FRECPE))]
   "TARGET_SVE"
 )
 
 (define_expand "@aarch64_frecps<mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:SVE_FULL_F 1 "register_operand")
-          (match_operand:SVE_FULL_F 2 "register_operand")]
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:SVE_F 1 "register_operand")
+          (match_operand:SVE_F 2 "register_operand")]
          UNSPEC_FRECPS))]
   "TARGET_SVE"
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 795c4ac7a579..8533912e5937 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3367,6 +3367,10 @@
 (define_int_iterator SVE_INT_UNARY [UNSPEC_REVB
                                    UNSPEC_REVH UNSPEC_REVW])
 
+;; This iterator is currently only used for estimation instructions,
+;; which are never generated automatically when -ftrapping-math is true.
+;; The iterator is therefore applied unconditionally to partial FP modes.
+;; This might need to be revisited if new operations are added in future.
 (define_int_iterator SVE_FP_UNARY [UNSPEC_FRECPE UNSPEC_RSQRTE])
 
 (define_int_iterator SVE_FP_UNARY_INT [(UNSPEC_FEXPA "TARGET_NON_STREAMING")])
@@ -3379,6 +3383,10 @@
 (define_int_iterator SVE_INT_BINARY_MULTI [UNSPEC_SQDMULH
                                           UNSPEC_SRSHL UNSPEC_URSHL])
 
+;; This iterator is currently only used for estimation instructions,
+;; which are never generated automatically when -ftrapping-math is true.
+;; The iterator is therefore applied unconditionally to partial FP modes.
+;; This might need to be revisited if new operations are added in future.
 (define_int_iterator SVE_FP_BINARY [UNSPEC_FRECPS UNSPEC_RSQRTS])
 
 (define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL])
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_1.c
new file mode 100644
index 000000000000..78d0d9ce160f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+#define DIV(A, B) A / B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (FN (a[i], (TYPE0)RHS) > c[i])                                \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (DIV, _Float16, uint64_t, 32)
+
+TEST_ALL (DIV, _Float16, uint32_t, 64)
+
+TEST_ALL (DIV, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfdivr?\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdivr?\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_2.c
new file mode 100644
index 000000000000..a8f70e1443df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_fdiv_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfdivr?\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdivr?\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_3.c
new file mode 100644
index 000000000000..ecd088f9e3b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -moverride=sve_width=2048 -mlow-precision-div" } */
+
+#include "unpacked_fdiv_1.c"
+
+/* { dg-final { scan-assembler-not {\tfrecpe\tz[0-9]+\.h} } } */
+/* { dg-final { scan-assembler-not {\tfrecps\tz[0-9]+\.h} } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrecpe\tz[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrecps\tz[0-9]+\.s} 1 } } */

Reply via email to