From: Soumya AR <[email protected]>

This patch adds support for atomic min/max instructions offered by aarch64 under
LSE.

The implementation provides three execution paths:

1. When LSE is available at compile time (-march=armv8.1-a or later):
   Emits inline LSE atomic min/max instructions (ldsmin, ldsmax, ldumin, 
ldumax).

2. When LSE availability is unknown at compile time (default):
   Uses outline atomics - calls to libgcc functions that perform runtime
   detection for LSE and dispatch to either LSE instructions or LL/SC sequences.

3. When outline atomics are explicitly disabled (-mno-outline-atomics) on
   non-LSE targets: Emits inline LL/SC (LDXR, STXR etc) sequences
   using conditional select instructions for min/max.

----

For op_fetch varaints, we first generate the appropriate fetch_op variant, then
use aarch64_split_atomic_op to generate the same operation (non-atomically) to
return the updated value. This function is extended to handle the min/max
operations. We have to be careful about QI/HI modes, as ldxr and its variants
do a zero extended load, so it's important to explicitly sign extend the values
before comparing them.

----

lse.S is responsible for emitting the appropriate LSE or non-LSE sequence. For
min/max on non-LSE systems, this is done using a conditional select.

There is, however, a unique case where systems with the CSSC extension have
native min/max instructions as well. In that case, it would be preferable to
emit the LL/SC sequence using the native min/max instructions. But, this would
only occur on targets with CSSC but without LSE, which is quite improbable, and
thus, I haven't added special handling for the CSSC feature.

----

Bootstrapped and regression tested on aarch64-linux-gnu and x86_64-linux-gnu.
Cross-compiled and regression tested for arm-linux-gnueabihf-armv7-a and
aarch64-linux-gnu without LSE.

Signed-off-by: Soumya AR <[email protected]>

gcc/ChangeLog:

        * config/aarch64/aarch64-protos.h: Add declarations for new
        outline atomic min/max name structures.
        * config/aarch64/aarch64.cc (DEF4): Define names for outline
        atomic min/max functions.
        (aarch64_ool_ldsmin_names, aarch64_ool_ldsmax_names,
        aarch64_ool_ldumin_names, aarch64_ool_ldumax_names): New.
        (aarch64_split_atomic_op): Add support for SMIN, SMAX, UMIN,
        UMAX operations with sign extension for QI/HI modes.
        * config/aarch64/atomics.md: Add LSE and outline atomics
        support for atomic fetch min/max operations.
        * config/aarch64/iterators.md: Add min/max iterators.

libgcc/ChangeLog:

        * config/aarch64/lse.S: Implement outline atomic min/max
        functions.
        * config/aarch64/t-lse: Add min/max function entries.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/atomic-minmax-lse.c: New test.
        * gcc.target/aarch64/atomic-minmax-nolse.c: New test.
        * gcc.target/aarch64/atomic-minmax.c: New test.
        * gcc.target/aarch64/atomic-minmax.x: New test.

---
 gcc/config/aarch64/aarch64-protos.h           |   4 +
 gcc/config/aarch64/aarch64.cc                 |  51 +++++
 gcc/config/aarch64/atomics.md                 |  54 ++++-
 gcc/config/aarch64/iterators.md               |  30 ++-
 .../gcc.target/aarch64/atomic-minmax-lse.c    | 122 +++++++++++
 .../gcc.target/aarch64/atomic-minmax-nolse.c  | 196 ++++++++++++++++++
 .../gcc.target/aarch64/atomic-minmax.c        | 128 ++++++++++++
 .../gcc.target/aarch64/atomic-minmax.x        | 185 +++++++++++++++++
 libgcc/config/aarch64/lse.S                   |  62 +++++-
 libgcc/config/aarch64/t-lse                   |   3 +-
 10 files changed, 825 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax.x

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 48d3a3de235..4df2d37e253 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1271,6 +1271,10 @@ extern const atomic_ool_names aarch64_ool_ldadd_names;
 extern const atomic_ool_names aarch64_ool_ldset_names;
 extern const atomic_ool_names aarch64_ool_ldclr_names;
 extern const atomic_ool_names aarch64_ool_ldeor_names;
+extern const atomic_ool_names aarch64_ool_ldsmin_names;
+extern const atomic_ool_names aarch64_ool_ldsmax_names;
+extern const atomic_ool_names aarch64_ool_ldumin_names;
+extern const atomic_ool_names aarch64_ool_ldumax_names;
 
 tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *);
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 293afa52b3b..5d2f96b7f20 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -26518,6 +26518,10 @@ const atomic_ool_names aarch64_ool_ldadd_names = { { 
DEF4(ldadd) } };
 const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
 const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } };
 const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } };
+const atomic_ool_names aarch64_ool_ldsmin_names = { { DEF4(ldsmin) } };
+const atomic_ool_names aarch64_ool_ldsmax_names = { { DEF4(ldsmax) } };
+const atomic_ool_names aarch64_ool_ldumin_names = { { DEF4(ldumin) } };
+const atomic_ool_names aarch64_ool_ldumax_names = { { DEF4(ldumax) } };
 
 #undef DEF0
 #undef DEF4
@@ -26770,6 +26774,53 @@ aarch64_split_atomic_op (enum rtx_code code, rtx 
old_out, rtx new_out, rtx mem,
       emit_insn (gen_rtx_SET (new_out, x));
       break;
 
+    case SMIN:
+    case SMAX:
+    case UMIN:
+    case UMAX:
+    {
+      rtx_code cmp_code;
+      switch (code)
+       {
+       case SMIN:
+         cmp_code = LT;
+         break;
+       case SMAX:
+         cmp_code = GT;
+         break;
+       case UMIN:
+         cmp_code = LTU;
+         break;
+       case UMAX:
+         cmp_code = GTU;
+         break;
+       default:
+         gcc_unreachable ();
+       }
+
+      if ((code == SMIN || code == SMAX) && (mode == QImode || mode == HImode))
+       {
+         rtx old_extended = gen_rtx_REG (wmode, REGNO (old_out));
+         emit_insn (
+           gen_rtx_SET (old_extended,
+                        gen_rtx_SIGN_EXTEND (wmode,
+                                             gen_lowpart (mode, old_out))));
+         old_out = old_extended;
+
+         rtx value_extended = gen_rtx_REG (wmode, REGNO (value));
+         emit_insn (
+           gen_rtx_SET (value_extended,
+                        gen_rtx_SIGN_EXTEND (wmode,
+                                             gen_lowpart (mode, value))));
+         value = value_extended;
+       }
+      rtx cc_reg = aarch64_gen_compare_reg (cmp_code, old_out, value);
+      rtx cond = gen_rtx_fmt_ee (cmp_code, VOIDmode, cc_reg, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (wmode, cond, old_out, value);
+      emit_insn (gen_rtx_SET (new_out, x));
+      break;
+    }
+
     case MINUS:
       if (CONST_INT_P (value))
        {
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index c9534d43c0f..8dc0e3ffdac 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -284,6 +284,18 @@
          case XOR:
            gen = gen_aarch64_atomic_xor<mode>_lse;
            break;
+         case SMAX:
+           gen = gen_aarch64_atomic_smax<mode>_lse;
+           break;
+         case SMIN:
+           gen = gen_aarch64_atomic_smin<mode>_lse;
+           break;
+         case UMAX:
+           gen = gen_aarch64_atomic_umax<mode>_lse;
+           break;
+         case UMIN:
+           gen = gen_aarch64_atomic_umin<mode>_lse;
+           break;
          case AND:
            operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
                                              NULL, 1);
@@ -317,6 +329,18 @@
                                              NULL, 1);
            names = &aarch64_ool_ldclr_names;
            break;
+         case SMIN:
+           names = &aarch64_ool_ldsmin_names;
+           break;
+         case SMAX:
+           names = &aarch64_ool_ldsmax_names;
+           break;
+         case UMIN:
+           names = &aarch64_ool_ldumin_names;
+           break;
+         case UMAX:
+           names = &aarch64_ool_ldumax_names;
+           break;
          default:
            gcc_unreachable ();
          }
@@ -442,6 +466,18 @@
        case XOR:
          gen = gen_aarch64_atomic_fetch_xor<mode>_lse;
          break;
+       case SMAX:
+         gen = gen_aarch64_atomic_fetch_smax<mode>_lse;
+         break;
+       case SMIN:
+         gen = gen_aarch64_atomic_fetch_smin<mode>_lse;
+         break;
+       case UMAX:
+         gen = gen_aarch64_atomic_fetch_umax<mode>_lse;
+         break;
+       case UMIN:
+         gen = gen_aarch64_atomic_fetch_umin<mode>_lse;
+         break;
        case AND:
          operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
                                            NULL, 1);
@@ -475,6 +511,18 @@
                                            NULL, 1);
          names = &aarch64_ool_ldclr_names;
          break;
+       case SMIN:
+         names = &aarch64_ool_ldsmin_names;
+         break;
+       case SMAX:
+         names = &aarch64_ool_ldsmax_names;
+         break;
+       case UMIN:
+         names = &aarch64_ool_ldumin_names;
+         break;
+       case UMAX:
+         names = &aarch64_ool_ldumax_names;
+         break;
        default:
          gcc_unreachable ();
        }
@@ -581,7 +629,11 @@
       operands[2] = force_reg (<MODE>mode, operands[2]);
       emit_insn (gen_atomic_fetch_<atomic_optab><mode>
                  (tmp, operands[1], operands[2], operands[3]));
-      tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
+      if (<CODE> == SMIN || <CODE> == SMAX)
+       tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
+                                operands[0], 0, OPTAB_WIDEN);
+      else
+       tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
                                 operands[0], 1, OPTAB_WIDEN);
       emit_move_insn (operands[0], tmp);
     }
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b425b0ed2ca..e4c1c8844bb 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1354,6 +1354,10 @@
     UNSPECV_ATOMIC_LDOP_BIC    ; Represent an atomic load-bic
     UNSPECV_ATOMIC_LDOP_XOR    ; Represent an atomic load-xor
     UNSPECV_ATOMIC_LDOP_PLUS   ; Represent an atomic load-add
+    UNSPECV_ATOMIC_LDOP_SMAX   ; Represent an atomic load-smax
+    UNSPECV_ATOMIC_LDOP_SMIN   ; Represent an atomic load-smin
+    UNSPECV_ATOMIC_LDOP_UMAX   ; Represent an atomic load-umax
+    UNSPECV_ATOMIC_LDOP_UMIN   ; Represent an atomic load-umin
 ])
 
 ;; -------------------------------------------------------------------
@@ -2898,7 +2902,7 @@
 ;; Iterator for __sync_<op> operations that where the operation can be
 ;; represented directly RTL.  This is all of the sync operations bar
 ;; nand.
-(define_code_iterator atomic_op [plus minus ior xor and])
+(define_code_iterator atomic_op [plus minus ior xor and smin smax umin umax])
 
 ;; Iterator for integer conversions
 (define_code_iterator FIXUORS [fix unsigned_fix])
@@ -3215,21 +3219,27 @@
 
 ;; Atomic operations
 (define_code_attr atomic_optab
-  [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")])
+  [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")
+  (smin "smin") (smax "smax") (umin "umin") (umax "umax")])
 
 (define_code_attr atomic_op_operand
   [(ior "aarch64_logical_operand")
    (xor "aarch64_logical_operand")
    (and "aarch64_logical_operand")
    (plus "aarch64_plus_operand")
-   (minus "aarch64_plus_operand")])
+   (minus "aarch64_plus_operand")
+   (smin "aarch64_sminmax_operand")
+   (smax "aarch64_sminmax_operand")
+   (umin "aarch64_uminmax_operand")
+   (umax "aarch64_uminmax_operand")])
 
 ;; Constants acceptable for atomic operations.
 ;; This definition must appear in this file before the iterators it refers to.
 (define_code_attr const_atomic
  [(plus "IJ") (minus "IJ")
   (xor "<lconst_atomic>") (ior "<lconst_atomic>")
-  (and "<lconst_atomic>")])
+  (and "<lconst_atomic>")
+  (smin "") (smax "") (umin "") (umax "")])
 
 ;; Attribute to describe constants acceptable in atomic logical operations
 (define_mode_attr lconst_atomic [(QI "K") (HI "K") (SI "K") (DI "L")])
@@ -4096,7 +4106,9 @@
 
 (define_int_iterator ATOMIC_LDOP
  [UNSPECV_ATOMIC_LDOP_OR UNSPECV_ATOMIC_LDOP_BIC
-  UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS])
+  UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS
+  UNSPECV_ATOMIC_LDOP_SMAX UNSPECV_ATOMIC_LDOP_SMIN
+  UNSPECV_ATOMIC_LDOP_UMAX UNSPECV_ATOMIC_LDOP_UMIN])
 
 (define_int_iterator SUBDI_BITS [8 16 32])
 
@@ -5255,11 +5267,15 @@
 
 (define_int_attr atomic_ldop
  [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
-  (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+  (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")
+  (UNSPECV_ATOMIC_LDOP_SMAX "smax") (UNSPECV_ATOMIC_LDOP_SMIN "smin")
+  (UNSPECV_ATOMIC_LDOP_UMAX "umax") (UNSPECV_ATOMIC_LDOP_UMIN "umin")])
 
 (define_int_attr atomic_ldoptab
  [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic")
-  (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+  (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")
+  (UNSPECV_ATOMIC_LDOP_SMAX "smax") (UNSPECV_ATOMIC_LDOP_SMIN "smin")
+  (UNSPECV_ATOMIC_LDOP_UMAX "umax") (UNSPECV_ATOMIC_LDOP_UMIN "umin")])
 
 (define_int_attr fp8_cvt_uns_op
   [(UNSPEC_F1CVT "f1cvt")
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c 
b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c
new file mode 100644
index 00000000000..6d579f8360a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c
@@ -0,0 +1,122 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+lse" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "atomic-minmax.x"
+
+/* { dg-final { scan-assembler-not "\tldxr" } } */
+/* { dg-final { scan-assembler-not "\tldaxr" } } */
+/* { dg-final { scan-assembler-not "\tstxr" } } */
+/* { dg-final { scan-assembler-not "\tstlxr" } } */
+
+/*
+** test_smin_s8:
+**     ...
+**     ldsminb w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_smax_s8:
+**     ...
+**     ldsmaxlb        w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_smin_s16:
+**     ...
+**     ldsminah        w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_smax_s16:
+**     ...
+**     ldsmaxalh       w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_smin_s32:
+**     ...
+**     ldsmin  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_smax_s32:
+**     ...
+**     ldsmaxal        w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_smin_s64:
+**     ...
+**     ldsmina x[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_smax_s64:
+**     ...
+**     ldsmax  x[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_umin_u8:
+**     ...
+**     lduminb w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_umax_u8:
+**     ...
+**     ldumaxab        w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_umin_u16:
+**     ...
+**     lduminah        w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_umax_u16:
+**     ...
+**     ldumaxlh        w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_umin_u32:
+**     ...
+**     lduminal        w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_umax_u32:
+**     ...
+**     ldumax  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_umin_u64:
+**     ...
+**     ldumin  x[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     ...
+*/
+
+/*
+** test_umax_u64:
+**     ...
+**     ldumaxal        x[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     ...
+*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c 
b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c
new file mode 100644
index 00000000000..e4962974ea3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c
@@ -0,0 +1,196 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+nolse -mno-outline-atomics" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "atomic-minmax.x"
+
+/* { dg-final { scan-assembler-not "\tldsmin" } } */
+/* { dg-final { scan-assembler-not "\tldsmax" } } */
+/* { dg-final { scan-assembler-not "\tldumin" } } */
+/* { dg-final { scan-assembler-not "\tldumax" } } */
+
+/* { dg-final { scan-assembler-not "__aarch64_" } } */
+
+/*
+** test_smin_s8:
+**     ...
+**     ldxrb   w[0-9]+, \[x[0-9]+\]
+**     sxtb    w[0-9]+, w[0-9]+
+**     sxtb    w[0-9]+, w[0-9]+
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, lt
+**     stxrb   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_smax_s8:
+**     ...
+**     ldxrb   w[0-9]+, \[x[0-9]+\]
+**     sxtb    w[0-9]+, w[0-9]+
+**     sxtb    w[0-9]+, w[0-9]+
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, gt
+**     stlxrb  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_smin_s16:
+**     ...
+**     ldaxrh  w[0-9]+, \[x[0-9]+\]
+**     sxth    w[0-9]+, w[0-9]+
+**     sxth    w[0-9]+, w[0-9]+
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, lt
+**     stxrh   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_smax_s16:
+**     ...
+**     ldaxrh  w[0-9]+, \[x[0-9]+\]
+**     sxth    w[0-9]+, w[0-9]+
+**     sxth    w[0-9]+, w[0-9]+
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, gt
+**     stlxrh  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_smin_s32:
+**     ...
+**     ldxr    w[0-9]+, \[x[0-9]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, lt
+**     stxr    w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_smax_s32:
+**     ...
+**     ldaxr   w[0-9]+, \[x[0-9]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, gt
+**     stlxr   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_smin_s64:
+**     ...
+**     ldaxr   x[0-9]+, \[x[0-9]+\]
+**     cmp     x[0-9]+, x[0-9]+
+**     csel    x[0-9]+, x[0-9]+, x[0-9]+, lt
+**     stxr    w[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_smax_s64:
+**     ...
+**     ldxr    x[0-9]+, \[x[0-9]+\]
+**     cmp     x[0-9]+, x[0-9]+
+**     csel    x[0-9]+, x[0-9]+, x[0-9]+, gt
+**     stxr    w[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_umin_u8:
+**     ...
+**     ldxrb   w[0-9]+, \[x[0-9]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, cc
+**     stxrb   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_umax_u8:
+**     ...
+**     ldaxrb  w[0-9]+, \[x[0-9]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, hi
+**     stxrb   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_umin_u16:
+**     ...
+**     ldaxrh  w[0-9]+, \[x[0-9]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, cc
+**     stxrh   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_umax_u16:
+**     ...
+**     ldxrh   w[0-9]+, \[x[0-9]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, hi
+**     stlxrh  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_umin_u32:
+**     ...
+**     ldaxr   w[0-9]+, \[x[0-9]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, cc
+**     stlxr   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_umax_u32:
+**     ...
+**     ldxr    w[0-9]+, \[x[0-9]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, hi
+**     stxr    w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_umin_u64:
+**     ...
+**     ldxr    x[0-9]+, \[x[0-9]+\]
+**     cmp     x[0-9]+, x[0-9]+
+**     csel    x[0-9]+, x[0-9]+, x[0-9]+, cc
+**     stxr    w[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
+
+/*
+** test_umax_u64:
+**     ...
+**     ldaxr   x[0-9]+, \[x[0-9]+\]
+**     cmp     x[0-9]+, x[0-9]+
+**     csel    x[0-9]+, x[0-9]+, x[0-9]+, hi
+**     stlxr   w[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     cbnz    w[0-9]+, .*
+**     ...
+*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c 
b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c
new file mode 100644
index 00000000000..225816b63bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c
@@ -0,0 +1,128 @@
+/* { dg-do run } */
+/* { dg-options "--save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "atomic-minmax.x"
+
+int main ()
+{
+  run_tests();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "\tldsmin" } } */
+/* { dg-final { scan-assembler-not "\tldsmax" } } */
+/* { dg-final { scan-assembler-not "\tldumin" } } */
+/* { dg-final { scan-assembler-not "\tldumax" } } */
+
+/*
+** test_smin_s8:
+**     ...
+**     bl      __aarch64_ldsmin1_relax
+**     ...
+*/
+
+/*
+** test_smax_s8:
+**     ...
+**     bl      __aarch64_ldsmax1_rel
+**     ...
+*/
+
+/*
+** test_smin_s16:
+**     ...
+**     bl      __aarch64_ldsmin2_acq
+**     ...
+*/
+
+/*
+** test_smax_s16:
+**     ...
+**     bl      __aarch64_ldsmax2_acq_rel
+**     ...
+*/
+
+/*
+** test_smin_s32:
+**     ...
+**     bl      __aarch64_ldsmin4_relax
+**     ...
+*/
+
+/*
+** test_smax_s32:
+**     ...
+**     bl      __aarch64_ldsmax4_acq_rel
+**     ...
+*/
+
+/*
+** test_smin_s64:
+**     ...
+**     bl      __aarch64_ldsmin8_acq
+**     ...
+*/
+
+/*
+** test_smax_s64:
+**     ...
+**     bl      __aarch64_ldsmax8_relax
+**     ...     
+*/
+
+/*
+** test_umin_u8:
+**     ...
+**     bl      __aarch64_ldumin1_relax
+**     ...
+*/
+
+/*
+** test_umax_u8:
+**     ...
+**     bl      __aarch64_ldumax1_acq
+**     ...
+*/
+
+/*
+** test_umin_u16:
+**     ...
+**     bl      __aarch64_ldumin2_acq
+**     ...
+*/
+
+/*
+** test_umax_u16:
+**     ...
+**     bl      __aarch64_ldumax2_rel
+**     ...
+*/
+
+/*
+** test_umin_u32:
+**     ...
+**     bl      __aarch64_ldumin4_acq_rel
+**     ...
+*/
+
+/*
+** test_umax_u32:
+**     ...
+**     bl      __aarch64_ldumax4_relax
+**     ...
+*/
+
+/*
+** test_umin_u64:
+**     ...
+**     bl      __aarch64_ldumin8_relax
+**     ...
+*/
+
+/*
+** test_umax_u64:
+**     ...
+**     bl      __aarch64_ldumax8_acq_rel
+**     ...     
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax.x 
b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.x
new file mode 100644
index 00000000000..e9e21d3f2db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.x
@@ -0,0 +1,185 @@
+#include <stdint.h>
+
+extern void abort(void);
+
+#define TEST_FETCH_OP(TYPE, VAR, OP, INIT, ARG, EXPECTED_OLD, EXPECTED_NEW, 
MEM_ORDER) \
+  do { \
+    VAR = INIT; \
+    TYPE old_val = __atomic_fetch_##OP(&VAR, ARG, MEM_ORDER); \
+    TYPE new_val = VAR; \
+    if (old_val != EXPECTED_OLD) { \
+      abort(); \
+    } \
+    if (new_val != EXPECTED_NEW) { \
+      abort(); \
+    } \
+  } while (0)
+
+#define TEST_OP_FETCH(TYPE, VAR, OP, INIT, ARG, EXPECTED_OLD, EXPECTED_NEW, 
MEM_ORDER) \
+  do { \
+    VAR = INIT; \
+    TYPE result = __atomic_##OP##_fetch(&VAR, ARG, MEM_ORDER); \
+    TYPE new_val = VAR; \
+    if (result != EXPECTED_NEW) { \
+      abort(); \
+    } \
+    if (new_val != EXPECTED_NEW) { \
+      abort(); \
+    } \
+  } while (0)
+
+#define GEN_TYPE_TESTS(TYPE, VAR, SUFFIX, IS_SIGNED) \
+  TYPE VAR; \
+  void test_##SUFFIX() { \
+    TEST_FETCH_OP(TYPE, VAR, min, 10, 5, 10, 5, __ATOMIC_RELAXED); \
+    TEST_FETCH_OP(TYPE, VAR, min, 10, 20, 10, 10, __ATOMIC_SEQ_CST); \
+    TEST_FETCH_OP(TYPE, VAR, max, 10, 20, 10, 20, __ATOMIC_ACQUIRE); \
+    TEST_FETCH_OP(TYPE, VAR, max, 10, 5, 10, 10, __ATOMIC_RELEASE); \
+    \
+    TEST_OP_FETCH(TYPE, VAR, min, 10, 5, 10, 5, __ATOMIC_CONSUME); \
+    TEST_OP_FETCH(TYPE, VAR, min, 10, 20, 10, 10, __ATOMIC_ACQ_REL); \
+    TEST_OP_FETCH(TYPE, VAR, max, 10, 20, 10, 20, __ATOMIC_RELAXED); \
+    TEST_OP_FETCH(TYPE, VAR, max, 10, 5, 10, 10, __ATOMIC_SEQ_CST); \
+    \
+    if (IS_SIGNED) { \
+      TEST_FETCH_OP(TYPE, VAR, min, -10, -20, -10, -20, __ATOMIC_ACQUIRE); \
+      TEST_FETCH_OP(TYPE, VAR, max, -10, 5, -10, 5, __ATOMIC_RELEASE); \
+      TEST_FETCH_OP(TYPE, VAR, min, -5, -3, -5, -5, __ATOMIC_RELAXED); \
+      TEST_FETCH_OP(TYPE, VAR, max, -20, -10, -20, -10, __ATOMIC_SEQ_CST); \
+      TEST_OP_FETCH(TYPE, VAR, min, -100, 50, -100, -100, __ATOMIC_ACQ_REL); \
+      TEST_OP_FETCH(TYPE, VAR, max, -50, -60, -50, -50, __ATOMIC_CONSUME); \
+    } \
+  }
+
+GEN_TYPE_TESTS(int8_t, s8_var_test, s8, 1)
+GEN_TYPE_TESTS(int16_t, s16_var_test, s16, 1)
+GEN_TYPE_TESTS(int32_t, s32_var_test, s32, 1)
+GEN_TYPE_TESTS(int64_t, s64_var_test, s64, 1)
+
+GEN_TYPE_TESTS(uint8_t, u8_var_test, u8, 0)
+GEN_TYPE_TESTS(uint16_t, u16_var_test, u16, 0)
+GEN_TYPE_TESTS(uint32_t, u32_var_test, u32, 0)
+GEN_TYPE_TESTS(uint64_t, u64_var_test, u64, 0)
+
+void run_tests() {
+  test_s8();
+  test_s16();
+  test_s32();
+  test_s64();
+  test_u8();
+  test_u16();
+  test_u32();
+  test_u64();
+}
+
+int8_t s8_var = 0;
+
+int8_t
+test_smin_s8 (int8_t a)
+{
+  return __atomic_fetch_min (&s8_var, a, __ATOMIC_RELAXED);
+}
+
+int8_t
+test_smax_s8 (int8_t a)
+{
+  return __atomic_fetch_max (&s8_var, a, __ATOMIC_RELEASE);
+}
+
+int16_t s16_var = 0;
+
+int16_t
+test_smin_s16 (int16_t a)
+{
+  return __atomic_fetch_min (&s16_var, a, __ATOMIC_ACQUIRE);
+}
+
+int16_t
+test_smax_s16 (int16_t a)
+{
+  return __atomic_fetch_max (&s16_var, a, __ATOMIC_ACQ_REL);
+}
+
+int32_t s32_var = 0;
+
+int32_t
+test_smin_s32 (int32_t a)
+{
+  return __atomic_fetch_min (&s32_var, a, __ATOMIC_RELAXED);
+}
+
+int32_t
+test_smax_s32 (int32_t a)
+{
+  return __atomic_fetch_max (&s32_var, a, __ATOMIC_SEQ_CST);
+}
+
+int64_t s64_var = 0;
+
+int64_t
+test_smin_s64 (int64_t a)
+{
+  return __atomic_fetch_min (&s64_var, a, __ATOMIC_ACQUIRE);
+}
+
+int64_t
+test_smax_s64 (int64_t a)
+{
+  return __atomic_fetch_max (&s64_var, a, __ATOMIC_RELAXED);
+}
+
+uint8_t u8_var = 0;
+
+uint8_t
+test_umin_u8 (uint8_t a)
+{
+  return __atomic_fetch_min (&u8_var, a, __ATOMIC_RELAXED);
+}
+
+uint8_t
+test_umax_u8 (uint8_t a)
+{
+  return __atomic_fetch_max (&u8_var, a, __ATOMIC_CONSUME);
+}
+
+uint16_t u16_var = 0;
+
+uint16_t
+test_umin_u16 (uint16_t a)
+{
+  return __atomic_fetch_min (&u16_var, a, __ATOMIC_ACQUIRE);
+}
+
+uint16_t
+test_umax_u16 (uint16_t a)
+{
+  return __atomic_fetch_max (&u16_var, a, __ATOMIC_RELEASE);
+}
+
+uint32_t u32_var = 0;
+
+uint32_t
+test_umin_u32 (uint32_t a)
+{
+  return __atomic_fetch_min (&u32_var, a, __ATOMIC_ACQ_REL);
+}
+
+uint32_t
+test_umax_u32 (uint32_t a)
+{
+  return __atomic_fetch_max (&u32_var, a, __ATOMIC_RELAXED);
+}
+
+uint64_t u64_var = 0;
+
+uint64_t
+test_umin_u64 (uint64_t a)
+{
+  return __atomic_fetch_min (&u64_var, a, __ATOMIC_RELAXED);
+}
+
+uint64_t
+test_umax_u64 (uint64_t a)
+{
+  return __atomic_fetch_max (&u64_var, a, __ATOMIC_ACQ_REL);
+}
\ No newline at end of file
diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
index e31ffa41514..f098cb278ec 100644
--- a/libgcc/config/aarch64/lse.S
+++ b/libgcc/config/aarch64/lse.S
@@ -276,7 +276,9 @@ ENDFN       NAME(swp)
 #endif
 
 #if defined(L_ldadd) || defined(L_ldclr) \
-    || defined(L_ldeor) || defined(L_ldset)
+    || defined(L_ldeor) || defined(L_ldset) \
+    || defined(L_ldsmin) || defined(L_ldsmax) \
+    || defined(L_ldumin) || defined(L_ldumax)
 
 #ifdef L_ldadd
 #define LDNM   ldadd
@@ -294,6 +296,26 @@ ENDFN      NAME(swp)
 #define LDNM   ldset
 #define OP     orr
 #define OPN    0x3000
+#elif defined(L_ldsmin)
+#define LDNM   ldsmin
+#define OP     smin
+#define OPN    0x5000
+#define IS_MINMAX 1
+#elif defined(L_ldsmax)
+#define LDNM   ldsmax
+#define OP     smax
+#define OPN    0x4000
+#define IS_MINMAX 1
+#elif defined(L_ldumin)
+#define LDNM   ldumin
+#define OP     umin
+#define OPN    0x7000
+#define IS_MINMAX 1
+#elif defined(L_ldumax)
+#define LDNM   ldumax
+#define OP     umax
+#define OPN    0x6000
+#define IS_MINMAX 1
 #else
 #error
 #endif
@@ -311,7 +333,45 @@ STARTFN    NAME(LDNM)
 
 8:     mov             s(tmp0), s(0)
 0:     LDXR            s(0), [x1]
+#ifdef IS_MINMAX
+       /* For min/max, extend if needed, compare, and select.  */
+#if SIZE < 4
+  #if defined(L_ldsmin) || defined(L_ldsmax)
+    /* Sign extend for signed comparisons.  */
+    #if SIZE == 1
+       sxtb            w(tmp1), w(0)
+       sxtb            w(tmp3), w(tmp0)
+    #else /* SIZE == 2 */
+       sxth            w(tmp1), w(0)
+       sxth            w(tmp3), w(tmp0)
+    #endif
+  #else /* L_ldumin || L_ldumax */
+    /* Zero extend for unsigned comparisons.  */
+    #if SIZE == 1
+       uxtb            w(tmp1), w(0)
+       uxtb            w(tmp3), w(tmp0)
+    #else /* SIZE == 2 */
+       uxth            w(tmp1), w(0)
+       uxth            w(tmp3), w(tmp0)
+    #endif
+  #endif
+       cmp             w(tmp3), w(tmp1)
+#else /* SIZE >= 4 */
+       cmp             s(tmp0), s(0)
+#endif
+  /* Select based on condition.  */
+  #if defined(L_ldsmin)
+       csel            s(tmp1), s(tmp0), s(0), lt
+  #elif defined(L_ldsmax)
+       csel            s(tmp1), s(tmp0), s(0), gt
+  #elif defined(L_ldumin)
+       csel            s(tmp1), s(tmp0), s(0), lo
+  #elif defined(L_ldumax)
+       csel            s(tmp1), s(tmp0), s(0), hi
+  #endif
+#else /* Not IS_MINMAX */
        OP              s(tmp1), s(0), s(tmp0)
+#endif /* IS_MINMAX */
        STXR            w(tmp2), s(tmp1), [x1]
        cbnz            w(tmp2), 0b
        BARRIER
diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse
index c58f003114f..955243341b3 100644
--- a/libgcc/config/aarch64/t-lse
+++ b/libgcc/config/aarch64/t-lse
@@ -23,7 +23,8 @@ S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
 O0 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S0)))
 
 # Swap, Load-and-operate have 4 sizes and 5 memory models
-S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))
+S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset \
+                             ldsmin ldsmax ldumin ldumax))
 O1 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S1)))
 
 LSE_OBJS := $(O0) $(O1)
-- 
2.43.0


Reply via email to