External email: Use caution opening links or attachments
From: Dhruv Chawla <[email protected]>
This patch folds the following patterns:
- umax (a, add (a, b)) -> [sum, ovf] = adds (a, b); !ovf ? sum : a
- umin (a, add (a, b)) -> [sum, ovf] = adds (a, b); !ovf ? a : sum
- umax (a, sub (a, b)) -> [diff, ovf] = subs (a, b); ovf ? diff : a
- umin (a, sub (a, b)) -> [diff, ovf] = subs (a, b); ovf ? a : diff
Where ovf is the overflow flag. adds and subs are generated by
generating a parallel compare+plus/minus which maps to the pattern
add<mode>3_compareC. sub<mode>3_compareC is also created to have an
equivalent pattern for the subs instruction. In the case of subs, the
overflow flag represents underflow.
This patch is a respin of the patch posted at
https://gcc.gnu.org/pipermail/gcc-patches/2025-May/685021.html as per
the suggestion to turn it into a target-specific transform by Richard
Biener.
Bootstrapped and regtested on aarch64-unknown-linux-gnu.
Signed-off-by: Dhruv Chawla <[email protected]>
PR middle-end/116815
gcc/ChangeLog:
* config/aarch64/aarch64.md (sub<mode>3_compareC): New pattern.
(*aarch64_plus_within_<optab><mode>3_<ovf_commutate>): Likewise.
(*aarch64_minus_within_<optab><mode>3): Likewise.
* config/aarch64/iterators.md (ovf_add_cmp): New code attribute.
(ovf_sub_cmp): Likewise.
(ovf_commutate): New iterator.
(ovf_comm_opp): New int attribute.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/pr116815-1.c: New test.
* gcc.target/aarch64/pr116815-2.c: Likewise.
* gcc.target/aarch64/pr116815-3.c: Likewise.
* gcc.target/aarch64/pr116815-4.c: Likewise.
* gcc.target/aarch64/pr116815-5.c: Likewise.
* gcc.target/aarch64/pr116815-6.c: Likewise.
---
gcc/config/aarch64/aarch64.md | 76 +++++++++++
gcc/config/aarch64/iterators.md | 9 ++
gcc/testsuite/gcc.target/aarch64/pr116815-1.c | 120 ++++++++++++++++++
gcc/testsuite/gcc.target/aarch64/pr116815-2.c | 93 ++++++++++++++
gcc/testsuite/gcc.target/aarch64/pr116815-3.c | 62 +++++++++
gcc/testsuite/gcc.target/aarch64/pr116815-4.c | 48 +++++++
gcc/testsuite/gcc.target/aarch64/pr116815-5.c | 44 +++++++
gcc/testsuite/gcc.target/aarch64/pr116815-6.c | 60 +++++++++
8 files changed, 512 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-3.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-4.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-5.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-6.c
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 8e431a10cb1..a55fe5c85d8 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3715,6 +3715,20 @@
[(set_attr "type" "alus_sreg")]
)
+;; An equivalent to add<mode>3_compareC
+(define_insn "sub<mode>3_compareC"
+ [(set (reg:CC_C CC_REGNUM)
+ (compare:CC_C
+ (minus:GPI
+ (match_operand:GPI 1 "register_operand" "r")
+ (match_operand:GPI 2 "register_operand" "r"))
+ (match_dup 1)))
+ (set (match_operand:GPI 0 "register_operand" "=r")
+ (minus:GPI (match_dup 1) (match_dup 2)))]
+ ""
+ "subs\t%<w>0, %<w>1, %<w>2"
+)
+
(define_peephole2
[(set (match_operand:GPI 0 "aarch64_general_reg")
(minus:GPI (match_operand:GPI 1 "aarch64_reg_or_zero")
@@ -4455,6 +4469,68 @@
[(set_attr "type" "<su>div")]
)
+;; umax (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? sum : a
+;; umin (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? a : sum
+;; ... along with the commutative version of add (a, b) i.e. add (b, a).
+(define_insn_and_split "*aarch64_plus_within_<optab><mode>3_<ovf_commutate>"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (UMAXMIN:GPI
+ (plus:GPI (match_operand:GPI 1 "register_operand" "r")
+ (match_operand:GPI 2 "register_operand" "r"))
+ (match_dup ovf_commutate)))
+ (clobber (match_scratch:GPI 3))]
+ "!TARGET_CSSC"
+ "#"
+ "&& !reload_completed"
+ [(parallel
+ [(set (reg:CC_C CC_REGNUM)
+ (compare:CC_C (plus:GPI (match_dup ovf_commutate)
+ (match_dup <ovf_comm_opp>))
+ (match_dup ovf_commutate)))
+ (set (match_dup 3) (plus:GPI (match_dup ovf_commutate)
+ (match_dup <ovf_comm_opp>)))])
+ (set (match_dup 0)
+ (if_then_else:GPI (<ovf_add_cmp> (reg:CC_C CC_REGNUM)
+ (const_int 0))
+ (match_dup 3)
+ (match_dup ovf_commutate)))]
+ {
+ if (GET_CODE (operands[3]) == SCRATCH)
+ operands[3] = gen_reg_rtx (<MODE>mode);
+ }
+)
+
+;; In this case the ovf represents an underflow.
+;; umax (a, sub (a, b)) => [diff, ovf] = subs (a, b); ovf ? diff : a
+;; umin (a, sub (a, b)) => [diff, ovf] = subs (a, b); ovf ? a : diff
+(define_insn_and_split "*aarch64_minus_within_<optab><mode>3"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (UMAXMIN:GPI
+ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
+ (match_operand:GPI 2 "register_operand" "r"))
+ (match_dup 1)))
+ (clobber (match_scratch:GPI 3))]
+ "!TARGET_CSSC"
+ "#"
+ "&& !reload_completed"
+ [(parallel
+ [(set (reg:CC_C CC_REGNUM)
+ (compare:CC_C (minus:GPI (match_dup 1) (match_dup 2))
+ (match_dup 1)))
+ (set (match_dup 3) (minus:GPI (match_dup 1) (match_dup 2)))])
+ ;; There is an inverse mapping here from CC_C -> CC as this requires the
+ ;; inverse of the comparison from the above pattern.
+ (set (match_dup 0)
+ (if_then_else:GPI (<ovf_sub_cmp> (reg:CC CC_REGNUM)
+ (const_int 0))
+ (match_dup 3)
+ (match_dup 1)))]
+ {
+ if (GET_CODE (operands[3]) == SCRATCH)
+ operands[3] = gen_reg_rtx (<MODE>mode);
+ }
+)
+
;; -------------------------------------------------------------------
;; Comparison insns
;; -------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index c3771d9402b..a29243247bd 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2804,6 +2804,8 @@
(define_code_iterator FMAXMIN [smax smin])
+(define_code_iterator UMAXMIN [umax umin])
+
;; Signed and unsigned max operations.
(define_code_iterator USMAX [smax umax])
@@ -3092,6 +3094,9 @@
(define_code_attr maxminand [(smax "bic") (smin "and")])
+(define_code_attr ovf_add_cmp [(umax "geu") (umin "ltu")])
+(define_code_attr ovf_sub_cmp [(umax "ltu") (umin "geu")])
+
;; MLA/MLS attributes.
(define_code_attr as [(ss_plus "a") (ss_minus "s")])
@@ -5007,3 +5012,7 @@
(UNSPEC_F2CVT "f2cvt")
(UNSPEC_F1CVTLT "f1cvtlt")
(UNSPEC_F2CVTLT "f2cvtlt")])
+
+;; Operand numbers for commutative operations
+(define_int_iterator ovf_commutate [1 2])
+(define_int_attr ovf_comm_opp [(1 "2") (2 "1")])
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-1.c
b/gcc/testsuite/gcc.target/aarch64/pr116815-1.c
new file mode 100644
index 00000000000..f3bdb794318
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-1.c
@@ -0,0 +1,120 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests. */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)
\
+ unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+/*
+** umaxadd1:
+** adds (w[0-9]+), w0, w1
+** csel w0, \1, w0, cc
+** ret
+*/
+OPERATION (max, add, 1, a, a + b)
+
+/*
+** umaxadd2:
+** adds (w[0-9]+), w0, w1
+** csel w0, \1, w0, cc
+** ret
+*/
+OPERATION (max, add, 2, a, b + a)
+
+/*
+** umaxadd3:
+** adds (w[0-9]+), w0, w1
+** csel w0, \1, w0, cc
+** ret
+*/
+OPERATION (max, add, 3, a + b, a)
+
+/*
+** umaxadd4:
+** adds (w[0-9]+), w0, w1
+** csel w0, \1, w0, cc
+** ret
+*/
+OPERATION (max, add, 4, b + a, a)
+
+/*
+** uminadd1:
+** adds (w[0-9]+), w0, w1
+** csel w0, \1, w0, cs
+** ret
+*/
+OPERATION (min, add, 1, a, a + b)
+
+/*
+** uminadd2:
+** adds (w[0-9]+), w0, w1
+** csel w0, \1, w0, cs
+** ret
+*/
+OPERATION (min, add, 2, a, b + a)
+
+/*
+** uminadd3:
+** adds (w[0-9]+), w0, w1
+** csel w0, \1, w0, cs
+** ret
+*/
+OPERATION (min, add, 3, a + b, a)
+
+/*
+** uminadd4:
+** adds (w[0-9]+), w0, w1
+** csel w0, \1, w0, cs
+** ret
+*/
+OPERATION (min, add, 4, b + a, a)
+
+/* sub requires the inverse of the comparison from add. */
+
+/*
+** umaxsub1:
+** subs (w[0-9]+), w0, w1
+** csel w0, \1, w0, cc
+** ret
+*/
+OPERATION (max, sub, 1, a, a - b)
+
+/*
+** umaxsub2:
+** subs (w[0-9]+), w0, w1
+** csel w0, \1, w0, cc
+** ret
+*/
+OPERATION (max, sub, 2, a - b, a)
+
+/*
+** uminsub1:
+** subs (w[0-9]+), w0, w1
+** csel w0, \1, w0, cs
+** ret
+*/
+OPERATION (min, sub, 1, a, a - b)
+
+/*
+** uminsub2:
+** subs (w[0-9]+), w0, w1
+** csel w0, \1, w0, cs
+** ret
+*/
+OPERATION (min, sub, 2, a - b, a)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-2.c
b/gcc/testsuite/gcc.target/aarch64/pr116815-2.c
new file mode 100644
index 00000000000..f2dd7b0bbb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-2.c
@@ -0,0 +1,93 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* PR middle-end/116815 */
+
+/* Negative tests. */
+
+static inline int __attribute__ ((always_inline))
+smax (int a, int b)
+{
+ return a > b ? a : b;
+}
+
+static inline int __attribute__ ((always_inline))
+smin (int a, int b)
+{
+ return a < b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+umax (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+umin (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define ASSUME(cond) if (!(cond)) __builtin_unreachable ();
+
+/* This transformation does not trigger on signed types. */
+
+int
+smax_add (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smax (a, a + b);
+}
+
+int
+smin_add (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smin (a, a + b);
+}
+
+int
+smax_sub (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smax (a, a - b);
+}
+
+int
+smin_sub (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smin (a, a - b);
+}
+
+/* Invalid patterns. */
+
+/* This can potentially be matched, but the RHS gets factored to
+ (a + b) * b. */
+unsigned
+umax_factored (unsigned a, unsigned b)
+{
+ return umax (a * b, a * b + b * b);
+}
+
+unsigned
+umin_mult (unsigned a, unsigned b)
+{
+ return umin (a, a * b);
+}
+
+unsigned
+umax_sub (unsigned a, unsigned b)
+{
+ return umax (a, b - a);
+}
+
+unsigned
+umin_sub (unsigned a, unsigned b)
+{
+ return umin (a, b - a);
+}
+
+/* { dg-final { scan-assembler-not "adds\\t" } } */
+/* { dg-final { scan-assembler-not "subs\\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-3.c
b/gcc/testsuite/gcc.target/aarch64/pr116815-3.c
new file mode 100644
index 00000000000..f8050377116
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-3.c
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/* PR middle-end/116815 */
+
+/* Multi-use tests. */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+/* FIXME: This should only generate one adds. */
+
+/*
+** umax_add_umin_add:
+** adds (w[0-9]+), w0, w1
+** csel \1, \1, w0, cc
+** adds (w[0-9]+), w1, w0
+** csel w0, \2, w1, cs
+** add w0, \1, \2
+** ret
+*/
+unsigned
+umax_add_umin_add (unsigned a, unsigned b)
+{
+ return max (a, a + b) + min (a + b, b);
+}
+
+/*
+** umin_add_umax_add:
+** adds (w[0-9]+), w0, w1
+** csel \1, \1, w0, cs
+** adds (w[0-9]+), w1, w0
+** csel \2, \2, w1, cc
+** add w0, \1, \2
+** ret
+*/
+unsigned
+umin_add_umax_add (unsigned a, unsigned b)
+{
+ return min (a, b + a) + max (b + a, b);
+}
+
+/* FIXME: This pattern does not get optimized. */
+
+unsigned
+multiple_paths (unsigned a, unsigned b)
+{
+ if (a > 5)
+ return max (a, a + b);
+ else
+ return min (a, a + b);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-4.c
b/gcc/testsuite/gcc.target/aarch64/pr116815-4.c
new file mode 100644
index 00000000000..c1be921334b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-4.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests with a use of the min-max in an if-condition. */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)
\
+ unsigned u##op##type##N (unsigned a, unsigned b, unsigned c, unsigned d,
\
+ unsigned e) \
+ {
\
+ unsigned result = op (exp1, exp2);
\
+ if (result == c || result == c * 2)
\
+ return d;
\
+ else
\
+ return e;
\
+ }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-assembler-times "adds\\t" 8 } } */
+/* { dg-final { scan-assembler-times "subs\\t" 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-5.c
b/gcc/testsuite/gcc.target/aarch64/pr116815-5.c
new file mode 100644
index 00000000000..015c868aec2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-5.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+cssc"
+
+/* PR middle-end/116815 */
+
+/* Make sure that umax/umin instructions are generated with CSSC. */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)
\
+ unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-assembler-times "umax\\t" 6 } } */
+/* { dg-final { scan-assembler-times "umin\\t" 6 } } */
+/* { dg-final { scan-assembler-not "adds\\t" } } */
+/* { dg-final { scan-assembler-not "subs\\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-6.c
b/gcc/testsuite/gcc.target/aarch64/pr116815-6.c
new file mode 100644
index 00000000000..2b5407bcfc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-6.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+/* PR middle-end/116815 */
+
+/* Verify that the transformation gives correct results */
+
+[[gnu::always_inline]]
+inline unsigned min (unsigned a, unsigned b)
+{
+ return (a < b) ? a : b;
+}
+
+[[gnu::always_inline]]
+inline unsigned max (unsigned a, unsigned b)
+{
+ return (a > b) ? a : b;
+}
+
+[[gnu::noipa]] unsigned
+umaxadd (unsigned a, unsigned b)
+{
+ return max (a + b, a);
+}
+
+[[gnu::noipa]] unsigned
+umaxsub (unsigned a, unsigned b)
+{
+ return max (a - b, a);
+}
+
+[[gnu::noipa]] unsigned
+uminadd (unsigned a, unsigned b)
+{
+ return min (a + b, a);
+}
+
+[[gnu::noipa]] unsigned
+uminsub (unsigned a, unsigned b)
+{
+ return min (a - b, a);
+}
+
+int
+main ()
+{
+ /* Overflows to 0x30000000. */
+ if (umaxadd (0x90000000, 0xa0000000) != 0x90000000)
+ __builtin_abort ();
+
+ if (uminadd (0x90000000, 0xa0000000) != 0x30000000)
+ __builtin_abort ();
+
+ /* Underflows to 0x60000000. */
+ if (umaxsub (0x00000000, 0xa0000000) != 0x60000000)
+ __builtin_abort ();
+
+ if (uminsub (0x00000000, 0xa0000000) != 0x00000000)
+ __builtin_abort ();
+}
--
2.44.0