Hello,
This patch enables the use of LDAPR for load-acquire semantics. After
some internal investigation based on the work published by Podkopaev et
al. (https://dl.acm.org/doi/10.1145/3290382) we can confirm that using
LDAPR for the C++ load-acquire semantics is a correct relaxation.
Bootstrapped and regression tested on aarch64-none-linux-gnu.
OK for trunk?
2022-11-09 Andre Vieira <andre.simoesdiasvie...@arm.com>
Kyrylo Tkachov <kyrylo.tkac...@arm.com>
gcc/ChangeLog:
* config/aarch64/aarch64.h (AARCH64_ISA_RCPC): New Macro.
(TARGET_RCPC): New Macro.
* config/aarch64/atomics.md (atomic_load<mode>): Change into
an expand.
(aarch64_atomic_load<mode>_rcpc): New define_insn for ldapr.
(aarch64_atomic_load<mode>): Rename of old define_insn for ldar.
* config/aarch64/iterators.md (UNSPEC_LDAP): New unspec enum value.
*
doc/gcc/gcc-command-options/machine-dependent-options/aarch64-options.rst
(rcpc): Ammend documentation to mention the effects on code
generation.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/ldapr.c: New test.
* lib/target-supports.exp (add_options_for_aarch64_rcpc): New
options procedure.
(check_effective_target_aarch64_rcpc_ok_nocache): New
check-effective-target.
(check_effective_target_aarch64_rcpc_ok): Likewise.
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index
e60f9bce023b2cd5e7233ee9b8c61fc93c1494c2..51a8aa02a5850d5c79255dbf7e0764ffdec73ccd
100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -221,6 +221,7 @@ enum class aarch64_feature : unsigned char {
#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3A)
#define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS)
#define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64)
+#define AARCH64_ISA_RCPC (aarch64_isa_flags & AARCH64_FL_RCPC)
/* Crypto is an optional extension to AdvSIMD. */
#define TARGET_CRYPTO (AARCH64_ISA_CRYPTO)
@@ -328,6 +329,9 @@ enum class aarch64_feature : unsigned char {
/* SB instruction is enabled through +sb. */
#define TARGET_SB (AARCH64_ISA_SB)
+/* RCPC loads from Armv8.3-a. */
+#define TARGET_RCPC (AARCH64_ISA_RCPC)
+
/* Apply the workaround for Cortex-A53 erratum 835769. */
#define TARGET_FIX_ERR_A53_835769 \
((aarch64_fix_a53_err835769 == 2) \
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index
bc95f6d9d15f190a3e33704b4def2860d5f339bd..801a62bf2ba432f35ae1931beb8c4405b77b36c3
100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -657,7 +657,42 @@
}
)
-(define_insn "atomic_load<mode>"
+(define_expand "atomic_load<mode>"
+ [(match_operand:ALLI 0 "register_operand" "=r")
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "Q")
+ (match_operand:SI 2 "const_int_operand")]
+ ""
+ {
+ /* If TARGET_RCPC and this is an ACQUIRE load, then expand to a pattern
+ using UNSPECV_LDAP. */
+ enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+ if (TARGET_RCPC
+ && (is_mm_acquire (model)
+ || is_mm_acq_rel (model)))
+ {
+ emit_insn (gen_aarch64_atomic_load<mode>_rcpc (operands[0], operands[1],
+ operands[2]));
+ }
+ else
+ {
+ emit_insn (gen_aarch64_atomic_load<mode> (operands[0], operands[1],
+ operands[2]));
+ }
+ DONE;
+ }
+)
+
+(define_insn "aarch64_atomic_load<mode>_rcpc"
+ [(set (match_operand:ALLI 0 "register_operand" "=r")
+ (unspec_volatile:ALLI
+ [(match_operand:ALLI 1 "aarch64_sync_memory_operand" "Q")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPECV_LDAP))]
+ "TARGET_RCPC"
+ "ldapr<atomic_sfx>\t%<w>0, %1"
+)
+
+(define_insn "aarch64_atomic_load<mode>"
[(set (match_operand:ALLI 0 "register_operand" "=r")
(unspec_volatile:ALLI
[(match_operand:ALLI 1 "aarch64_sync_memory_operand" "Q")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index
a8ad4e5ff215ade06c3ca13a24ef18d259afcb6c..d8c2f9d6c32d6f188d584c2e9d8fb36511624de6
100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -988,6 +988,7 @@
UNSPECV_LX ; Represent a load-exclusive.
UNSPECV_SX ; Represent a store-exclusive.
UNSPECV_LDA ; Represent an atomic load or
load-acquire.
+ UNSPECV_LDAP ; Represent an atomic acquire load with RCpc
semantics.
UNSPECV_STL ; Represent an atomic store or
store-release.
UNSPECV_ATOMIC_CMPSW ; Represent an atomic compare swap.
UNSPECV_ATOMIC_EXCHG ; Represent an atomic exchange.
diff --git
a/gcc/doc/gcc/gcc-command-options/machine-dependent-options/aarch64-options.rst
b/gcc/doc/gcc/gcc-command-options/machine-dependent-options/aarch64-options.rst
index
c2b23a6ee97ef2b7c74119f22c1d3e3d85385f4d..25d609238db7d45845dbc446ac21d12dddcf8eac
100644
---
a/gcc/doc/gcc/gcc-command-options/machine-dependent-options/aarch64-options.rst
+++
b/gcc/doc/gcc/gcc-command-options/machine-dependent-options/aarch64-options.rst
@@ -437,9 +437,9 @@ the following and their inverses no :samp:`{feature}` :
floating-point instructions. This option is enabled by default for
:option:`-march=armv8.4-a`. Use of this option with architectures prior to
Armv8.2-A is not supported.
:samp:`rcpc`
- Enable the RcPc extension. This does not change code generation from GCC,
- but is passed on to the assembler, enabling inline asm statements to use
- instructions from the RcPc extension.
+ Enable the RcPc extension. This enables the use of the LDAPR instructions
for
+ load-acquire atomic semantics, and passes it on to the assembler, enabling
+ inline asm statements to use instructions from the RcPc extension.
:samp:`dotprod`
Enable the Dot Product extension. This also enables Advanced SIMD
instructions.
diff --git a/gcc/testsuite/gcc.target/aarch64/ldapr.c
b/gcc/testsuite/gcc.target/aarch64/ldapr.c
new file mode 100644
index
0000000000000000000000000000000000000000..c36edfcd79a9ee41434ab09ac47d257a692a8606
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldapr.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -std=c99" } */
+/* { dg-require-effective-target aarch64_rcpc_ok } */
+/* { dg-add-options aarch64_rcpc } */
+#include <stdatomic.h>
+
+atomic_ullong u64;
+atomic_llong s64;
+atomic_uint u32;
+atomic_int s32;
+atomic_ushort u16;
+atomic_short s16;
+atomic_uchar u8;
+atomic_schar s8;
+
+#define TEST(size, rettype) \
+rettype \
+test_##size (void) \
+{ \
+ return atomic_load_explicit (&size, memory_order_acquire); \
+} \
+
+TEST(u64, unsigned long long)
+TEST(s64, long long)
+TEST(u32, unsigned int)
+TEST(s32, int)
+TEST(u16, unsigned short)
+TEST(s16, short)
+TEST(u8, unsigned char)
+TEST(s8, signed char)
+
+/* { dg-final { scan-assembler-times "ldapr\tx" 2 } } */
+/* { dg-final { scan-assembler-times "ldapr\tw" 2 } } */
+/* { dg-final { scan-assembler-times "ldaprh\tw" 2 } } */
+/* { dg-final { scan-assembler-times "ldaprb\tw" 2 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp
b/gcc/testsuite/lib/target-supports.exp
index
c7f583d6d1498401a7c106ed3f539dcd04f95451..262665a78dfb58f1e63b629829c5112789b7abd9
100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -11819,6 +11819,42 @@ proc check_effective_target_glibc { } {
}]
}
+proc add_options_for_aarch64_rcpc { flags } {
+ if { ! [check_effective_target_aarch64_rcpc_ok] } {
+ return "$flags"
+ }
+ global et_aarch64_rcpc_flags
+ return "$flags $et_aarch64_rcpc_flags"
+}
+
+# Return 1 if the toolchain supports the RCPC extension.
+proc check_effective_target_aarch64_rcpc_ok_nocache { } {
+ global et_aarch64_rcpc_flags
+ set et_aarch64_rcpc_flags ""
+ if { ![istarget aarch64*-*-*] } {
+ return 0
+ }
+
+ foreach flags {"" "-march=armv8.2-a+rcpc"} {
+ if { [check_no_compiler_messages_nocache aarch64_rcpc_ok object {
+ int main (void) {
+ asm volatile ("ldapr x0, [x0]":::"memory");
+ return 0;
+ }
+ } $flags ] } {
+ set et_aarch64_rcpc_flags $flags
+ return 1
+ }
+ }
+ return 0
+}
+
+proc check_effective_target_aarch64_rcpc_ok { } {
+ return [check_cached_effective_target aarch64_rcpc_ok \
+ check_effective_target_aarch64_rcpc_ok_nocache]
+}
+
+
# Return 1 if the target plus current options supports a vector
# complex addition with rotate of half and single float modes, 0 otherwise.
#