https://gcc.gnu.org/g:ed9612bad7866ad66df63e123175d8b3aaba08de

commit r16-3591-ged9612bad7866ad66df63e123175d8b3aaba08de
Author: Kyrylo Tkachov <[email protected]>
Date:   Thu Sep 4 02:46:39 2025 -0700

    aarch64: Use SVE for V2DImode integer min/max operations
    
    Unlike Advanced SIMD, SVE has instruction to perform smin, smax, umin, umax
    on 64-bit elements.  Thus, we can use them with the fixed-width V2DImode
    expander.  Most of the machinery is already there on the define_insn side,
    supporting V2DImode operands of the SVE pattern.  We just need to wire up
    the RTL emission to the v2di standard names for the TARGET_SVE case.
    
    So for the smin case we now generate:
    min_di:
            ldr     q30, [x0]
            ptrue   p7.b, all
            ldr     q31, [x1]
            smin    z30.d, p7/m, z30.d, z31.d
            str     q30, [x2]
            ret
    
    min_imm_di:
            ldr     q31, [x0]
            smin    z31.d, z31.d, #5
            str     q31, [x2]
            ret
    
    instead of the previous:
    min_di:
            ldr     q30, [x0]
            ldr     q31, [x1]
            cmgt    v29.2d, v30.2d, v31.2d
            bsl     v29.16b, v31.16b, v30.16b
            str     q29, [x2]
            ret
    
    min_imm_di:
            ldr     q31, [x0]
            mov     z30.d, #5
            cmgt    v29.2d, v30.2d, v31.2d
            bsl     v29.16b, v31.16b, v30.16b
            str     q29, [x2]
            ret
    
    The register operand case is the same length, though the new ptrue can now 
be
    shared and moved away.  But the immediate operand case is obviously better
    as the SVE immediate form doesn't require a predicate operand.
    
    Bootstrapped and tested on aarch64-none-linux-gnu.
    
    Signed-off-by: Kyrylo Tkachov <[email protected]>
    
    gcc/
    
            * config/aarch64/iterators.md (sve_di_suf): New mode attribute.
            * config/aarch64/aarch64-sve.md (<optab><mode>3 
SVE_INT_BINARY_MULTI):
            Rename to...
            (<optab><mode>3<sve_di_suf>): ... This.  Use SVE_I_SIMD_DI mode
            iterator.
            * config/aarch64/aarch64-simd.md (<su><maxmin>v2di3): Use the above
            for TARGET_SVE.
    
    gcc/testsuite/
    
            * gcc.target/aarch64/sve/usminmax_di.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md                 |  8 ++++
 gcc/config/aarch64/aarch64-sve.md                  | 12 +++---
 gcc/config/aarch64/iterators.md                    |  5 +++
 gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c | 44 ++++++++++++++++++++++
 4 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index c111dc2c7f7c..14b9d5c78e3f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1788,6 +1788,14 @@
   enum rtx_code cmp_operator;
   rtx cmp_fmt;
 
+  /* SVE has native D-forms of the MIN/MAX instructions.  */
+  if (TARGET_SVE)
+    {
+      emit_insn (gen_<su><maxmin>v2di3_as_sve (operands[0], operands[1],
+                                              operands[2]));
+      DONE;
+    }
+
   switch (<CODE>)
     {
     case UMIN:
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 51e2d7d7e875..1ebcffe88a32 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4052,13 +4052,13 @@
 ;; -------------------------------------------------------------------------
 
 ;; Unpredicated integer binary operations that have an immediate form.
-(define_expand "<optab><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-       (unspec:SVE_I
+(define_expand "<optab><mode>3<sve_di_suf>"
+  [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
+       (unspec:SVE_I_SIMD_DI
          [(match_dup 3)
-          (SVE_INT_BINARY_MULTI:SVE_I
-            (match_operand:SVE_I 1 "register_operand")
-            (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
+          (SVE_INT_BINARY_MULTI:SVE_I_SIMD_DI
+            (match_operand:SVE_I_SIMD_DI 1 "register_operand")
+            (match_operand:SVE_I_SIMD_DI 2 
"aarch64_sve_<sve_imm_con>_operand"))]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 7a6ea0d8d067..451b00f55af5 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1931,6 +1931,11 @@
                           (VNx8SI "VNx8HI") (VNx16SI "VNx16QI")
                           (VNx8DI "VNx8HI")])
 
+;; Suffix mapping Advanced SIMD modes to be expanded as SVE instructions.
+(define_mode_attr sve_di_suf [(VNx16QI "") (VNx8HI "") (VNx4SI "") (VNx2DI "")
+                             (VNx8QI "") (VNx4QI "") (VNx2QI "") (VNx4HI "")
+                             (VNx2HI "") (VNx2SI "") (V2DI "_as_sve")])
+
 ;; Register suffix narrowed modes for VQN.
 (define_mode_attr Vntype [(V8HI "8b") (V4SI "4h")
                          (V2DI "2s")])
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c 
b/gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c
new file mode 100644
index 000000000000..5405308ceec8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 --param aarch64-autovec-preference=asimd-only" } */
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define FUNC(T, N, S)   \
+void min_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c)  \
+{                                                                         \
+  int i;                                                                  \
+  for (i = 0; i < N; i++)                                                 \
+    c[i] = MIN (a[i], b[i]);                                              \
+}                                                                         \
+void max_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c)  \
+{                                                                         \
+  int i;                                                                  \
+  for (i = 0; i < N; i++)                                                 \
+    c[i] = MAX (a[i], b[i]);                                              \
+}                                                                         \
+void min_imm_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \
+{                                                                            \
+  int i;                                                                     \
+  for (i = 0; i < N; i++)                                                    \
+    c[i] = MIN (a[i], 5);                                                    \
+}                                                                            \
+void max_imm_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \
+{                                                                            \
+  int i;                                                                     \
+  for (i = 0; i < N; i++)                                                    \
+    c[i] = MAX (a[i], 8);                                                    \
+}
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #8\n} 1 
} } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 
} } */
+FUNC (long long, 2, di)
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #8\n} 1 
} } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 
} } */
+FUNC (unsigned long long, 2, udi)
+

Reply via email to