From: Soumya AR <[email protected]> This patch adds support for atomic min/max instructions offered by aarch64 under LSE.
The implementation provides three execution paths: 1. When LSE is available at compile time (-march=armv8.1-a or later): Emits inline LSE atomic min/max instructions (ldsmin, ldsmax, ldumin, ldumax). 2. When LSE availability is unknown at compile time (default): Uses outline atomics - calls to libgcc functions that perform runtime detection for LSE and dispatch to either LSE instructions or LL/SC sequences. 3. When outline atomics are explicitly disabled (-mno-outline-atomics) on non-LSE targets: Emits inline LL/SC (LDXR, STXR etc) sequences using conditional select instructions for min/max. ---- For op_fetch varaints, we first generate the appropriate fetch_op variant, then use aarch64_split_atomic_op to generate the same operation (non-atomically) to return the updated value. This function is extended to handle the min/max operations. We have to be careful about QI/HI modes, as ldxr and its variants do a zero extended load, so it's important to explicitly sign extend the values before comparing them. ---- lse.S is responsible for emitting the appropriate LSE or non-LSE sequence. For min/max on non-LSE systems, this is done using a conditional select. There is, however, a unique case where systems with the CSSC extension have native min/max instructions as well. In that case, it would be preferable to emit the LL/SC sequence using the native min/max instructions. But, this would only occur on targets with CSSC but without LSE, which is quite improbable, and thus, I haven't added special handling for the CSSC feature. ---- Bootstrapped and regression tested on aarch64-linux-gnu and x86_64-linux-gnu. Cross-compiled and regression tested for arm-linux-gnueabihf-armv7-a and aarch64-linux-gnu without LSE. Signed-off-by: Soumya AR <[email protected]> gcc/ChangeLog: * config/aarch64/aarch64-protos.h: Add declarations for new outline atomic min/max name structures. * config/aarch64/aarch64.cc (DEF4): Define names for outline atomic min/max functions. (aarch64_ool_ldsmin_names, aarch64_ool_ldsmax_names, aarch64_ool_ldumin_names, aarch64_ool_ldumax_names): New. (aarch64_split_atomic_op): Add support for SMIN, SMAX, UMIN, UMAX operations with sign extension for QI/HI modes. * config/aarch64/atomics.md: Add LSE and outline atomics support for atomic fetch min/max operations. * config/aarch64/iterators.md: Add min/max iterators. libgcc/ChangeLog: * config/aarch64/lse.S: Implement outline atomic min/max functions. * config/aarch64/t-lse: Add min/max function entries. gcc/testsuite/ChangeLog: * gcc.target/aarch64/atomic-minmax-lse.c: New test. * gcc.target/aarch64/atomic-minmax-nolse.c: New test. * gcc.target/aarch64/atomic-minmax.c: New test. * gcc.target/aarch64/atomic-minmax.x: New test. --- gcc/config/aarch64/aarch64-protos.h | 4 + gcc/config/aarch64/aarch64.cc | 51 +++++ gcc/config/aarch64/atomics.md | 54 ++++- gcc/config/aarch64/iterators.md | 30 ++- .../gcc.target/aarch64/atomic-minmax-lse.c | 122 +++++++++++ .../gcc.target/aarch64/atomic-minmax-nolse.c | 196 ++++++++++++++++++ .../gcc.target/aarch64/atomic-minmax.c | 128 ++++++++++++ .../gcc.target/aarch64/atomic-minmax.x | 185 +++++++++++++++++ libgcc/config/aarch64/lse.S | 62 +++++- libgcc/config/aarch64/t-lse | 3 +- 10 files changed, 825 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax.c create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax.x diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 48d3a3de235..4df2d37e253 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1271,6 +1271,10 @@ extern const atomic_ool_names aarch64_ool_ldadd_names; extern const atomic_ool_names aarch64_ool_ldset_names; extern const atomic_ool_names aarch64_ool_ldclr_names; extern const atomic_ool_names aarch64_ool_ldeor_names; +extern const atomic_ool_names aarch64_ool_ldsmin_names; +extern const atomic_ool_names aarch64_ool_ldsmax_names; +extern const atomic_ool_names aarch64_ool_ldumin_names; +extern const atomic_ool_names aarch64_ool_ldumax_names; tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 293afa52b3b..5d2f96b7f20 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -26518,6 +26518,10 @@ const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } }; const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } }; const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } }; const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } }; +const atomic_ool_names aarch64_ool_ldsmin_names = { { DEF4(ldsmin) } }; +const atomic_ool_names aarch64_ool_ldsmax_names = { { DEF4(ldsmax) } }; +const atomic_ool_names aarch64_ool_ldumin_names = { { DEF4(ldumin) } }; +const atomic_ool_names aarch64_ool_ldumax_names = { { DEF4(ldumax) } }; #undef DEF0 #undef DEF4 @@ -26770,6 +26774,53 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, emit_insn (gen_rtx_SET (new_out, x)); break; + case SMIN: + case SMAX: + case UMIN: + case UMAX: + { + rtx_code cmp_code; + switch (code) + { + case SMIN: + cmp_code = LT; + break; + case SMAX: + cmp_code = GT; + break; + case UMIN: + cmp_code = LTU; + break; + case UMAX: + cmp_code = GTU; + break; + default: + gcc_unreachable (); + } + + if ((code == SMIN || code == SMAX) && (mode == QImode || mode == HImode)) + { + rtx old_extended = gen_rtx_REG (wmode, REGNO (old_out)); + emit_insn ( + gen_rtx_SET (old_extended, + gen_rtx_SIGN_EXTEND (wmode, + gen_lowpart (mode, old_out)))); + old_out = old_extended; + + rtx value_extended = gen_rtx_REG (wmode, REGNO (value)); + emit_insn ( + gen_rtx_SET (value_extended, + gen_rtx_SIGN_EXTEND (wmode, + gen_lowpart (mode, value)))); + value = value_extended; + } + rtx cc_reg = aarch64_gen_compare_reg (cmp_code, old_out, value); + rtx cond = gen_rtx_fmt_ee (cmp_code, VOIDmode, cc_reg, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (wmode, cond, old_out, value); + emit_insn (gen_rtx_SET (new_out, x)); + break; + } + case MINUS: if (CONST_INT_P (value)) { diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index c9534d43c0f..8dc0e3ffdac 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -284,6 +284,18 @@ case XOR: gen = gen_aarch64_atomic_xor<mode>_lse; break; + case SMAX: + gen = gen_aarch64_atomic_smax<mode>_lse; + break; + case SMIN: + gen = gen_aarch64_atomic_smin<mode>_lse; + break; + case UMAX: + gen = gen_aarch64_atomic_umax<mode>_lse; + break; + case UMIN: + gen = gen_aarch64_atomic_umin<mode>_lse; + break; case AND: operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1], NULL, 1); @@ -317,6 +329,18 @@ NULL, 1); names = &aarch64_ool_ldclr_names; break; + case SMIN: + names = &aarch64_ool_ldsmin_names; + break; + case SMAX: + names = &aarch64_ool_ldsmax_names; + break; + case UMIN: + names = &aarch64_ool_ldumin_names; + break; + case UMAX: + names = &aarch64_ool_ldumax_names; + break; default: gcc_unreachable (); } @@ -442,6 +466,18 @@ case XOR: gen = gen_aarch64_atomic_fetch_xor<mode>_lse; break; + case SMAX: + gen = gen_aarch64_atomic_fetch_smax<mode>_lse; + break; + case SMIN: + gen = gen_aarch64_atomic_fetch_smin<mode>_lse; + break; + case UMAX: + gen = gen_aarch64_atomic_fetch_umax<mode>_lse; + break; + case UMIN: + gen = gen_aarch64_atomic_fetch_umin<mode>_lse; + break; case AND: operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2], NULL, 1); @@ -475,6 +511,18 @@ NULL, 1); names = &aarch64_ool_ldclr_names; break; + case SMIN: + names = &aarch64_ool_ldsmin_names; + break; + case SMAX: + names = &aarch64_ool_ldsmax_names; + break; + case UMIN: + names = &aarch64_ool_ldumin_names; + break; + case UMAX: + names = &aarch64_ool_ldumax_names; + break; default: gcc_unreachable (); } @@ -581,7 +629,11 @@ operands[2] = force_reg (<MODE>mode, operands[2]); emit_insn (gen_atomic_fetch_<atomic_optab><mode> (tmp, operands[1], operands[2], operands[3])); - tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2], + if (<CODE> == SMIN || <CODE> == SMAX) + tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2], + operands[0], 0, OPTAB_WIDEN); + else + tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2], operands[0], 1, OPTAB_WIDEN); emit_move_insn (operands[0], tmp); } diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index b425b0ed2ca..e4c1c8844bb 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1354,6 +1354,10 @@ UNSPECV_ATOMIC_LDOP_BIC ; Represent an atomic load-bic UNSPECV_ATOMIC_LDOP_XOR ; Represent an atomic load-xor UNSPECV_ATOMIC_LDOP_PLUS ; Represent an atomic load-add + UNSPECV_ATOMIC_LDOP_SMAX ; Represent an atomic load-smax + UNSPECV_ATOMIC_LDOP_SMIN ; Represent an atomic load-smin + UNSPECV_ATOMIC_LDOP_UMAX ; Represent an atomic load-umax + UNSPECV_ATOMIC_LDOP_UMIN ; Represent an atomic load-umin ]) ;; ------------------------------------------------------------------- @@ -2898,7 +2902,7 @@ ;; Iterator for __sync_<op> operations that where the operation can be ;; represented directly RTL. This is all of the sync operations bar ;; nand. -(define_code_iterator atomic_op [plus minus ior xor and]) +(define_code_iterator atomic_op [plus minus ior xor and smin smax umin umax]) ;; Iterator for integer conversions (define_code_iterator FIXUORS [fix unsigned_fix]) @@ -3215,21 +3219,27 @@ ;; Atomic operations (define_code_attr atomic_optab - [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")]) + [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub") + (smin "smin") (smax "smax") (umin "umin") (umax "umax")]) (define_code_attr atomic_op_operand [(ior "aarch64_logical_operand") (xor "aarch64_logical_operand") (and "aarch64_logical_operand") (plus "aarch64_plus_operand") - (minus "aarch64_plus_operand")]) + (minus "aarch64_plus_operand") + (smin "aarch64_sminmax_operand") + (smax "aarch64_sminmax_operand") + (umin "aarch64_uminmax_operand") + (umax "aarch64_uminmax_operand")]) ;; Constants acceptable for atomic operations. ;; This definition must appear in this file before the iterators it refers to. (define_code_attr const_atomic [(plus "IJ") (minus "IJ") (xor "<lconst_atomic>") (ior "<lconst_atomic>") - (and "<lconst_atomic>")]) + (and "<lconst_atomic>") + (smin "") (smax "") (umin "") (umax "")]) ;; Attribute to describe constants acceptable in atomic logical operations (define_mode_attr lconst_atomic [(QI "K") (HI "K") (SI "K") (DI "L")]) @@ -4096,7 +4106,9 @@ (define_int_iterator ATOMIC_LDOP [UNSPECV_ATOMIC_LDOP_OR UNSPECV_ATOMIC_LDOP_BIC - UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS]) + UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS + UNSPECV_ATOMIC_LDOP_SMAX UNSPECV_ATOMIC_LDOP_SMIN + UNSPECV_ATOMIC_LDOP_UMAX UNSPECV_ATOMIC_LDOP_UMIN]) (define_int_iterator SUBDI_BITS [8 16 32]) @@ -5255,11 +5267,15 @@ (define_int_attr atomic_ldop [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr") - (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")]) + (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add") + (UNSPECV_ATOMIC_LDOP_SMAX "smax") (UNSPECV_ATOMIC_LDOP_SMIN "smin") + (UNSPECV_ATOMIC_LDOP_UMAX "umax") (UNSPECV_ATOMIC_LDOP_UMIN "umin")]) (define_int_attr atomic_ldoptab [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic") - (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")]) + (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add") + (UNSPECV_ATOMIC_LDOP_SMAX "smax") (UNSPECV_ATOMIC_LDOP_SMIN "smin") + (UNSPECV_ATOMIC_LDOP_UMAX "umax") (UNSPECV_ATOMIC_LDOP_UMIN "umin")]) (define_int_attr fp8_cvt_uns_op [(UNSPEC_F1CVT "f1cvt") diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c new file mode 100644 index 00000000000..6d579f8360a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c @@ -0,0 +1,122 @@ +/* { dg-do compile } */ +/* { dg-options "-march=armv8-a+lse" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "atomic-minmax.x" + +/* { dg-final { scan-assembler-not "\tldxr" } } */ +/* { dg-final { scan-assembler-not "\tldaxr" } } */ +/* { dg-final { scan-assembler-not "\tstxr" } } */ +/* { dg-final { scan-assembler-not "\tstlxr" } } */ + +/* +** test_smin_s8: +** ... +** ldsminb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_smax_s8: +** ... +** ldsmaxlb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_smin_s16: +** ... +** ldsminah w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_smax_s16: +** ... +** ldsmaxalh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_smin_s32: +** ... +** ldsmin w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_smax_s32: +** ... +** ldsmaxal w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_smin_s64: +** ... +** ldsmina x[0-9]+, x[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_smax_s64: +** ... +** ldsmax x[0-9]+, x[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_umin_u8: +** ... +** lduminb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_umax_u8: +** ... +** ldumaxab w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_umin_u16: +** ... +** lduminah w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_umax_u16: +** ... +** ldumaxlh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_umin_u32: +** ... +** lduminal w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_umax_u32: +** ... +** ldumax w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_umin_u64: +** ... +** ldumin x[0-9]+, x[0-9]+, \[x[0-9]+\] +** ... +*/ + +/* +** test_umax_u64: +** ... +** ldumaxal x[0-9]+, x[0-9]+, \[x[0-9]+\] +** ... +*/ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c new file mode 100644 index 00000000000..e4962974ea3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c @@ -0,0 +1,196 @@ +/* { dg-do compile } */ +/* { dg-options "-march=armv8-a+nolse -mno-outline-atomics" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "atomic-minmax.x" + +/* { dg-final { scan-assembler-not "\tldsmin" } } */ +/* { dg-final { scan-assembler-not "\tldsmax" } } */ +/* { dg-final { scan-assembler-not "\tldumin" } } */ +/* { dg-final { scan-assembler-not "\tldumax" } } */ + +/* { dg-final { scan-assembler-not "__aarch64_" } } */ + +/* +** test_smin_s8: +** ... +** ldxrb w[0-9]+, \[x[0-9]+\] +** sxtb w[0-9]+, w[0-9]+ +** sxtb w[0-9]+, w[0-9]+ +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, lt +** stxrb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_smax_s8: +** ... +** ldxrb w[0-9]+, \[x[0-9]+\] +** sxtb w[0-9]+, w[0-9]+ +** sxtb w[0-9]+, w[0-9]+ +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, gt +** stlxrb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_smin_s16: +** ... +** ldaxrh w[0-9]+, \[x[0-9]+\] +** sxth w[0-9]+, w[0-9]+ +** sxth w[0-9]+, w[0-9]+ +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, lt +** stxrh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_smax_s16: +** ... +** ldaxrh w[0-9]+, \[x[0-9]+\] +** sxth w[0-9]+, w[0-9]+ +** sxth w[0-9]+, w[0-9]+ +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, gt +** stlxrh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_smin_s32: +** ... +** ldxr w[0-9]+, \[x[0-9]+\] +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, lt +** stxr w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_smax_s32: +** ... +** ldaxr w[0-9]+, \[x[0-9]+\] +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, gt +** stlxr w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_smin_s64: +** ... +** ldaxr x[0-9]+, \[x[0-9]+\] +** cmp x[0-9]+, x[0-9]+ +** csel x[0-9]+, x[0-9]+, x[0-9]+, lt +** stxr w[0-9]+, x[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_smax_s64: +** ... +** ldxr x[0-9]+, \[x[0-9]+\] +** cmp x[0-9]+, x[0-9]+ +** csel x[0-9]+, x[0-9]+, x[0-9]+, gt +** stxr w[0-9]+, x[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_umin_u8: +** ... +** ldxrb w[0-9]+, \[x[0-9]+\] +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, cc +** stxrb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_umax_u8: +** ... +** ldaxrb w[0-9]+, \[x[0-9]+\] +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, hi +** stxrb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_umin_u16: +** ... +** ldaxrh w[0-9]+, \[x[0-9]+\] +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, cc +** stxrh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_umax_u16: +** ... +** ldxrh w[0-9]+, \[x[0-9]+\] +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, hi +** stlxrh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_umin_u32: +** ... +** ldaxr w[0-9]+, \[x[0-9]+\] +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, cc +** stlxr w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_umax_u32: +** ... +** ldxr w[0-9]+, \[x[0-9]+\] +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, hi +** stxr w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_umin_u64: +** ... +** ldxr x[0-9]+, \[x[0-9]+\] +** cmp x[0-9]+, x[0-9]+ +** csel x[0-9]+, x[0-9]+, x[0-9]+, cc +** stxr w[0-9]+, x[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ + +/* +** test_umax_u64: +** ... +** ldaxr x[0-9]+, \[x[0-9]+\] +** cmp x[0-9]+, x[0-9]+ +** csel x[0-9]+, x[0-9]+, x[0-9]+, hi +** stlxr w[0-9]+, x[0-9]+, \[x[0-9]+\] +** cbnz w[0-9]+, .* +** ... +*/ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c new file mode 100644 index 00000000000..225816b63bd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c @@ -0,0 +1,128 @@ +/* { dg-do run } */ +/* { dg-options "--save-temps" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "atomic-minmax.x" + +int main () +{ + run_tests(); + return 0; +} + +/* { dg-final { scan-assembler-not "\tldsmin" } } */ +/* { dg-final { scan-assembler-not "\tldsmax" } } */ +/* { dg-final { scan-assembler-not "\tldumin" } } */ +/* { dg-final { scan-assembler-not "\tldumax" } } */ + +/* +** test_smin_s8: +** ... +** bl __aarch64_ldsmin1_relax +** ... +*/ + +/* +** test_smax_s8: +** ... +** bl __aarch64_ldsmax1_rel +** ... +*/ + +/* +** test_smin_s16: +** ... +** bl __aarch64_ldsmin2_acq +** ... +*/ + +/* +** test_smax_s16: +** ... +** bl __aarch64_ldsmax2_acq_rel +** ... +*/ + +/* +** test_smin_s32: +** ... +** bl __aarch64_ldsmin4_relax +** ... +*/ + +/* +** test_smax_s32: +** ... +** bl __aarch64_ldsmax4_acq_rel +** ... +*/ + +/* +** test_smin_s64: +** ... +** bl __aarch64_ldsmin8_acq +** ... +*/ + +/* +** test_smax_s64: +** ... +** bl __aarch64_ldsmax8_relax +** ... +*/ + +/* +** test_umin_u8: +** ... +** bl __aarch64_ldumin1_relax +** ... +*/ + +/* +** test_umax_u8: +** ... +** bl __aarch64_ldumax1_acq +** ... +*/ + +/* +** test_umin_u16: +** ... +** bl __aarch64_ldumin2_acq +** ... +*/ + +/* +** test_umax_u16: +** ... +** bl __aarch64_ldumax2_rel +** ... +*/ + +/* +** test_umin_u32: +** ... +** bl __aarch64_ldumin4_acq_rel +** ... +*/ + +/* +** test_umax_u32: +** ... +** bl __aarch64_ldumax4_relax +** ... +*/ + +/* +** test_umin_u64: +** ... +** bl __aarch64_ldumin8_relax +** ... +*/ + +/* +** test_umax_u64: +** ... +** bl __aarch64_ldumax8_acq_rel +** ... +*/ diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax.x b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.x new file mode 100644 index 00000000000..e9e21d3f2db --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.x @@ -0,0 +1,185 @@ +#include <stdint.h> + +extern void abort(void); + +#define TEST_FETCH_OP(TYPE, VAR, OP, INIT, ARG, EXPECTED_OLD, EXPECTED_NEW, MEM_ORDER) \ + do { \ + VAR = INIT; \ + TYPE old_val = __atomic_fetch_##OP(&VAR, ARG, MEM_ORDER); \ + TYPE new_val = VAR; \ + if (old_val != EXPECTED_OLD) { \ + abort(); \ + } \ + if (new_val != EXPECTED_NEW) { \ + abort(); \ + } \ + } while (0) + +#define TEST_OP_FETCH(TYPE, VAR, OP, INIT, ARG, EXPECTED_OLD, EXPECTED_NEW, MEM_ORDER) \ + do { \ + VAR = INIT; \ + TYPE result = __atomic_##OP##_fetch(&VAR, ARG, MEM_ORDER); \ + TYPE new_val = VAR; \ + if (result != EXPECTED_NEW) { \ + abort(); \ + } \ + if (new_val != EXPECTED_NEW) { \ + abort(); \ + } \ + } while (0) + +#define GEN_TYPE_TESTS(TYPE, VAR, SUFFIX, IS_SIGNED) \ + TYPE VAR; \ + void test_##SUFFIX() { \ + TEST_FETCH_OP(TYPE, VAR, min, 10, 5, 10, 5, __ATOMIC_RELAXED); \ + TEST_FETCH_OP(TYPE, VAR, min, 10, 20, 10, 10, __ATOMIC_SEQ_CST); \ + TEST_FETCH_OP(TYPE, VAR, max, 10, 20, 10, 20, __ATOMIC_ACQUIRE); \ + TEST_FETCH_OP(TYPE, VAR, max, 10, 5, 10, 10, __ATOMIC_RELEASE); \ + \ + TEST_OP_FETCH(TYPE, VAR, min, 10, 5, 10, 5, __ATOMIC_CONSUME); \ + TEST_OP_FETCH(TYPE, VAR, min, 10, 20, 10, 10, __ATOMIC_ACQ_REL); \ + TEST_OP_FETCH(TYPE, VAR, max, 10, 20, 10, 20, __ATOMIC_RELAXED); \ + TEST_OP_FETCH(TYPE, VAR, max, 10, 5, 10, 10, __ATOMIC_SEQ_CST); \ + \ + if (IS_SIGNED) { \ + TEST_FETCH_OP(TYPE, VAR, min, -10, -20, -10, -20, __ATOMIC_ACQUIRE); \ + TEST_FETCH_OP(TYPE, VAR, max, -10, 5, -10, 5, __ATOMIC_RELEASE); \ + TEST_FETCH_OP(TYPE, VAR, min, -5, -3, -5, -5, __ATOMIC_RELAXED); \ + TEST_FETCH_OP(TYPE, VAR, max, -20, -10, -20, -10, __ATOMIC_SEQ_CST); \ + TEST_OP_FETCH(TYPE, VAR, min, -100, 50, -100, -100, __ATOMIC_ACQ_REL); \ + TEST_OP_FETCH(TYPE, VAR, max, -50, -60, -50, -50, __ATOMIC_CONSUME); \ + } \ + } + +GEN_TYPE_TESTS(int8_t, s8_var_test, s8, 1) +GEN_TYPE_TESTS(int16_t, s16_var_test, s16, 1) +GEN_TYPE_TESTS(int32_t, s32_var_test, s32, 1) +GEN_TYPE_TESTS(int64_t, s64_var_test, s64, 1) + +GEN_TYPE_TESTS(uint8_t, u8_var_test, u8, 0) +GEN_TYPE_TESTS(uint16_t, u16_var_test, u16, 0) +GEN_TYPE_TESTS(uint32_t, u32_var_test, u32, 0) +GEN_TYPE_TESTS(uint64_t, u64_var_test, u64, 0) + +void run_tests() { + test_s8(); + test_s16(); + test_s32(); + test_s64(); + test_u8(); + test_u16(); + test_u32(); + test_u64(); +} + +int8_t s8_var = 0; + +int8_t +test_smin_s8 (int8_t a) +{ + return __atomic_fetch_min (&s8_var, a, __ATOMIC_RELAXED); +} + +int8_t +test_smax_s8 (int8_t a) +{ + return __atomic_fetch_max (&s8_var, a, __ATOMIC_RELEASE); +} + +int16_t s16_var = 0; + +int16_t +test_smin_s16 (int16_t a) +{ + return __atomic_fetch_min (&s16_var, a, __ATOMIC_ACQUIRE); +} + +int16_t +test_smax_s16 (int16_t a) +{ + return __atomic_fetch_max (&s16_var, a, __ATOMIC_ACQ_REL); +} + +int32_t s32_var = 0; + +int32_t +test_smin_s32 (int32_t a) +{ + return __atomic_fetch_min (&s32_var, a, __ATOMIC_RELAXED); +} + +int32_t +test_smax_s32 (int32_t a) +{ + return __atomic_fetch_max (&s32_var, a, __ATOMIC_SEQ_CST); +} + +int64_t s64_var = 0; + +int64_t +test_smin_s64 (int64_t a) +{ + return __atomic_fetch_min (&s64_var, a, __ATOMIC_ACQUIRE); +} + +int64_t +test_smax_s64 (int64_t a) +{ + return __atomic_fetch_max (&s64_var, a, __ATOMIC_RELAXED); +} + +uint8_t u8_var = 0; + +uint8_t +test_umin_u8 (uint8_t a) +{ + return __atomic_fetch_min (&u8_var, a, __ATOMIC_RELAXED); +} + +uint8_t +test_umax_u8 (uint8_t a) +{ + return __atomic_fetch_max (&u8_var, a, __ATOMIC_CONSUME); +} + +uint16_t u16_var = 0; + +uint16_t +test_umin_u16 (uint16_t a) +{ + return __atomic_fetch_min (&u16_var, a, __ATOMIC_ACQUIRE); +} + +uint16_t +test_umax_u16 (uint16_t a) +{ + return __atomic_fetch_max (&u16_var, a, __ATOMIC_RELEASE); +} + +uint32_t u32_var = 0; + +uint32_t +test_umin_u32 (uint32_t a) +{ + return __atomic_fetch_min (&u32_var, a, __ATOMIC_ACQ_REL); +} + +uint32_t +test_umax_u32 (uint32_t a) +{ + return __atomic_fetch_max (&u32_var, a, __ATOMIC_RELAXED); +} + +uint64_t u64_var = 0; + +uint64_t +test_umin_u64 (uint64_t a) +{ + return __atomic_fetch_min (&u64_var, a, __ATOMIC_RELAXED); +} + +uint64_t +test_umax_u64 (uint64_t a) +{ + return __atomic_fetch_max (&u64_var, a, __ATOMIC_ACQ_REL); +} \ No newline at end of file diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S index e31ffa41514..f098cb278ec 100644 --- a/libgcc/config/aarch64/lse.S +++ b/libgcc/config/aarch64/lse.S @@ -276,7 +276,9 @@ ENDFN NAME(swp) #endif #if defined(L_ldadd) || defined(L_ldclr) \ - || defined(L_ldeor) || defined(L_ldset) + || defined(L_ldeor) || defined(L_ldset) \ + || defined(L_ldsmin) || defined(L_ldsmax) \ + || defined(L_ldumin) || defined(L_ldumax) #ifdef L_ldadd #define LDNM ldadd @@ -294,6 +296,26 @@ ENDFN NAME(swp) #define LDNM ldset #define OP orr #define OPN 0x3000 +#elif defined(L_ldsmin) +#define LDNM ldsmin +#define OP smin +#define OPN 0x5000 +#define IS_MINMAX 1 +#elif defined(L_ldsmax) +#define LDNM ldsmax +#define OP smax +#define OPN 0x4000 +#define IS_MINMAX 1 +#elif defined(L_ldumin) +#define LDNM ldumin +#define OP umin +#define OPN 0x7000 +#define IS_MINMAX 1 +#elif defined(L_ldumax) +#define LDNM ldumax +#define OP umax +#define OPN 0x6000 +#define IS_MINMAX 1 #else #error #endif @@ -311,7 +333,45 @@ STARTFN NAME(LDNM) 8: mov s(tmp0), s(0) 0: LDXR s(0), [x1] +#ifdef IS_MINMAX + /* For min/max, extend if needed, compare, and select. */ +#if SIZE < 4 + #if defined(L_ldsmin) || defined(L_ldsmax) + /* Sign extend for signed comparisons. */ + #if SIZE == 1 + sxtb w(tmp1), w(0) + sxtb w(tmp3), w(tmp0) + #else /* SIZE == 2 */ + sxth w(tmp1), w(0) + sxth w(tmp3), w(tmp0) + #endif + #else /* L_ldumin || L_ldumax */ + /* Zero extend for unsigned comparisons. */ + #if SIZE == 1 + uxtb w(tmp1), w(0) + uxtb w(tmp3), w(tmp0) + #else /* SIZE == 2 */ + uxth w(tmp1), w(0) + uxth w(tmp3), w(tmp0) + #endif + #endif + cmp w(tmp3), w(tmp1) +#else /* SIZE >= 4 */ + cmp s(tmp0), s(0) +#endif + /* Select based on condition. */ + #if defined(L_ldsmin) + csel s(tmp1), s(tmp0), s(0), lt + #elif defined(L_ldsmax) + csel s(tmp1), s(tmp0), s(0), gt + #elif defined(L_ldumin) + csel s(tmp1), s(tmp0), s(0), lo + #elif defined(L_ldumax) + csel s(tmp1), s(tmp0), s(0), hi + #endif +#else /* Not IS_MINMAX */ OP s(tmp1), s(0), s(tmp0) +#endif /* IS_MINMAX */ STXR w(tmp2), s(tmp1), [x1] cbnz w(tmp2), 0b BARRIER diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse index c58f003114f..955243341b3 100644 --- a/libgcc/config/aarch64/t-lse +++ b/libgcc/config/aarch64/t-lse @@ -23,7 +23,8 @@ S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas)) O0 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S0))) # Swap, Load-and-operate have 4 sizes and 5 memory models -S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset)) +S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset \ + ldsmin ldsmax ldumin ldumax)) O1 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S1))) LSE_OBJS := $(O0) $(O1) -- 2.43.0
