At present, `atomic_16.S' groups different implementations of the
same functions together in the file.  Therefore, as an example,
the LSE128 implementation of `exchange_16' follows on immediately
from its core implementation, as does the `fetch_or_16' LSE128
implementation.

Such architectural extension-dependent implementations are dependent
both on ifunc and assembler support.  They may therefore conceivably
be guarded by 2 preprocessor macros, e.g. `#if HAVE_IFUNC' and `#if
HAVE_FEAT_LSE128'.

Having to apply these guards on a per-function basis adds unnecessary
clutter to the file and makes its maintenance more error-prone.

We therefore reorganize the layout of the file in such a way that all
core implementations needing no `#ifdef's are placed first, followed
by all ifunc-dependent implementations, which can all be guarded by a
single `#if HAVE_IFUNC'.  Within the guard, these are then subdivided
and organized according to architectural extension requirements such
that in the case of LSE128-specific functions, for example, they can
all be guarded by a single `#if HAVE_FEAT_LSE128', greatly reducing
the overall number of required `#ifdef' macros.

libatomic/ChangeLog:

        * config/linux/aarch64/atomic_16.S: reshuffle functions.
---
 libatomic/config/linux/aarch64/atomic_16.S | 583 ++++++++++-----------
 1 file changed, 288 insertions(+), 295 deletions(-)

diff --git a/libatomic/config/linux/aarch64/atomic_16.S 
b/libatomic/config/linux/aarch64/atomic_16.S
index 16ff03057ab..27363f82b75 100644
--- a/libatomic/config/linux/aarch64/atomic_16.S
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -40,15 +40,12 @@
 
 #include "auto-config.h"
 
-#if !HAVE_IFUNC
-# undef HAVE_FEAT_LSE128
-# define HAVE_FEAT_LSE128 0
-#endif
-
-#define HAVE_FEAT_LSE2 HAVE_IFUNC
-
-#if HAVE_FEAT_LSE128
+#if HAVE_IFUNC
+# if HAVE_FEAT_LSE128
        .arch   armv9-a+lse128
+# else
+       .arch   armv8-a+lse
+# endif
 #else
        .arch   armv8-a+lse
 #endif
@@ -124,6 +121,8 @@ NAME:                               \
 #define ACQ_REL 4
 #define SEQ_CST 5
 
+/* Core atomic operation implementations.  These are available irrespective of
+   ifunc support or the presence of additional architectural extensions.  */
 
 ENTRY (load_16)
        mov     x5, x0
@@ -143,31 +142,6 @@ ENTRY (load_16)
 END (load_16)
 
 
-#if HAVE_FEAT_LSE2
-ENTRY_FEAT (load_16, LSE2)
-       cbnz    w1, 1f
-
-       /* RELAXED.  */
-       ldp     res0, res1, [x0]
-       ret
-1:
-       cmp     w1, SEQ_CST
-       b.eq    2f
-
-       /* ACQUIRE/CONSUME (Load-AcquirePC semantics).  */
-       ldp     res0, res1, [x0]
-       dmb     ishld
-       ret
-
-       /* SEQ_CST.  */
-2:     ldar    tmp0, [x0]      /* Block reordering with Store-Release instr.  
*/
-       ldp     res0, res1, [x0]
-       dmb     ishld
-       ret
-END_FEAT (load_16, LSE2)
-#endif
-
-
 ENTRY (store_16)
        cbnz    w4, 2f
 
@@ -185,23 +159,6 @@ ENTRY (store_16)
 END (store_16)
 
 
-#if HAVE_FEAT_LSE2
-ENTRY_FEAT (store_16, LSE2)
-       cbnz    w4, 1f
-
-       /* RELAXED.  */
-       stp     in0, in1, [x0]
-       ret
-
-       /* RELEASE/SEQ_CST.  */
-1:     ldxp    xzr, tmp0, [x0]
-       stlxp   w4, in0, in1, [x0]
-       cbnz    w4, 1b
-       ret
-END_FEAT (store_16, LSE2)
-#endif
-
-
 ENTRY (exchange_16)
        mov     x5, x0
        cbnz    w4, 2f
@@ -229,31 +186,6 @@ ENTRY (exchange_16)
 END (exchange_16)
 
 
-#if HAVE_FEAT_LSE128
-ENTRY_FEAT (exchange_16, LSE128)
-       mov     tmp0, x0
-       mov     res0, in0
-       mov     res1, in1
-       cbnz    w4, 1f
-
-       /* RELAXED.  */
-       swpp    res0, res1, [tmp0]
-       ret
-1:
-       cmp     w4, ACQUIRE
-       b.hi    2f
-
-       /* ACQUIRE/CONSUME.  */
-       swppa   res0, res1, [tmp0]
-       ret
-
-       /* RELEASE/ACQ_REL/SEQ_CST.  */
-2:     swppal  res0, res1, [tmp0]
-       ret
-END_FEAT (exchange_16, LSE128)
-#endif
-
-
 ENTRY (compare_exchange_16)
        ldp     exp0, exp1, [x1]
        cbz     w4, 3f
@@ -301,43 +233,97 @@ ENTRY (compare_exchange_16)
 END (compare_exchange_16)
 
 
-#if HAVE_FEAT_LSE2
-ENTRY_FEAT (compare_exchange_16, LSE)
-       ldp     exp0, exp1, [x1]
-       mov     tmp0, exp0
-       mov     tmp1, exp1
-       cbz     w4, 2f
-       cmp     w4, RELEASE
-       b.hs    3f
+ENTRY (fetch_or_16)
+       mov     x5, x0
+       cbnz    w4, 2f
 
-       /* ACQUIRE/CONSUME.  */
-       caspa   exp0, exp1, in0, in1, [x0]
-0:
-       cmp     exp0, tmp0
-       ccmp    exp1, tmp1, 0, eq
-       bne     1f
-       mov     x0, 1
+       /* RELAXED.  */
+1:     ldxp    res0, res1, [x5]
+       orr     tmp0, res0, in0
+       orr     tmp1, res1, in1
+       stxp    w4, tmp0, tmp1, [x5]
+       cbnz    w4, 1b
        ret
-1:
-       stp     exp0, exp1, [x1]
-       mov     x0, 0
+
+       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
+2:     ldaxp   res0, res1, [x5]
+       orr     tmp0, res0, in0
+       orr     tmp1, res1, in1
+       stlxp   w4, tmp0, tmp1, [x5]
+       cbnz    w4, 2b
        ret
+END (fetch_or_16)
+
+
+ENTRY (or_fetch_16)
+       mov     x5, x0
+       cbnz    w4, 2f
 
        /* RELAXED.  */
-2:     casp    exp0, exp1, in0, in1, [x0]
-       b       0b
+1:     ldxp    res0, res1, [x5]
+       orr     res0, res0, in0
+       orr     res1, res1, in1
+       stxp    w4, res0, res1, [x5]
+       cbnz    w4, 1b
+       ret
 
-       /* RELEASE.  */
-3:     b.hi    4f
-       caspl   exp0, exp1, in0, in1, [x0]
-       b       0b
+       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
+2:     ldaxp   res0, res1, [x5]
+       orr     res0, res0, in0
+       orr     res1, res1, in1
+       stlxp   w4, res0, res1, [x5]
+       cbnz    w4, 2b
+       ret
+END (or_fetch_16)
+
+
+ENTRY (fetch_and_16)
+       mov     x5, x0
+       cbnz    w4, 2f
+
+       /* RELAXED.  */
+1:     ldxp    res0, res1, [x5]
+       and     tmp0, res0, in0
+       and     tmp1, res1, in1
+       stxp    w4, tmp0, tmp1, [x5]
+       cbnz    w4, 1b
+       ret
+
+       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
+2:     ldaxp   res0, res1, [x5]
+       and     tmp0, res0, in0
+       and     tmp1, res1, in1
+       stlxp   w4, tmp0, tmp1, [x5]
+       cbnz    w4, 2b
+       ret
+END (fetch_and_16)
+
+
+ENTRY (and_fetch_16)
+       mov     x5, x0
+       cbnz    w4, 2f
+
+       /* RELAXED.  */
+1:     ldxp    res0, res1, [x5]
+       and     res0, res0, in0
+       and     res1, res1, in1
+       stxp    w4, res0, res1, [x5]
+       cbnz    w4, 1b
+       ret
+
+       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
+2:     ldaxp   res0, res1, [x5]
+       and     res0, res0, in0
+       and     res1, res1, in1
+       stlxp   w4, res0, res1, [x5]
+       cbnz    w4, 2b
+       ret
+END (and_fetch_16)
 
-       /* ACQ_REL/SEQ_CST.  */
-4:     caspal  exp0, exp1, in0, in1, [x0]
-       b       0b
-END_FEAT (compare_exchange_16, LSE)
-#endif
 
+/* The following functions are currently single-implementation operations,
+   so they are never assigned an ifunc selector.  As such, they must be
+   reachable from __atomic_* entrypoints.  */
 
 ENTRY_ALIASED (fetch_add_16)
        mov     x5, x0
@@ -427,309 +413,316 @@ ENTRY_ALIASED (sub_fetch_16)
 END (sub_fetch_16)
 
 
-ENTRY (fetch_or_16)
+ENTRY_ALIASED (fetch_xor_16)
        mov     x5, x0
        cbnz    w4, 2f
 
        /* RELAXED.  */
 1:     ldxp    res0, res1, [x5]
-       orr     tmp0, res0, in0
-       orr     tmp1, res1, in1
+       eor     tmp0, res0, in0
+       eor     tmp1, res1, in1
        stxp    w4, tmp0, tmp1, [x5]
        cbnz    w4, 1b
        ret
 
        /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
 2:     ldaxp   res0, res1, [x5]
-       orr     tmp0, res0, in0
-       orr     tmp1, res1, in1
+       eor     tmp0, res0, in0
+       eor     tmp1, res1, in1
        stlxp   w4, tmp0, tmp1, [x5]
        cbnz    w4, 2b
        ret
-END (fetch_or_16)
+END (fetch_xor_16)
 
 
-#if HAVE_FEAT_LSE128
-ENTRY_FEAT (fetch_or_16, LSE128)
-       mov     tmp0, x0
-       mov     res0, in0
-       mov     res1, in1
-       cbnz    w4, 1f
+ENTRY_ALIASED (xor_fetch_16)
+       mov     x5, x0
+       cbnz    w4, 2f
 
        /* RELAXED.  */
-       ldsetp  res0, res1, [tmp0]
-       ret
-1:
-       cmp     w4, ACQUIRE
-       b.hi    2f
-
-       /* ACQUIRE/CONSUME.  */
-       ldsetpa res0, res1, [tmp0]
+1:     ldxp    res0, res1, [x5]
+       eor     res0, res0, in0
+       eor     res1, res1, in1
+       stxp    w4, res0, res1, [x5]
+       cbnz    w4, 1b
        ret
 
-       /* RELEASE/ACQ_REL/SEQ_CST.  */
-2:     ldsetpal        res0, res1, [tmp0]
+       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
+2:     ldaxp   res0, res1, [x5]
+       eor     res0, res0, in0
+       eor     res1, res1, in1
+       stlxp   w4, res0, res1, [x5]
+       cbnz    w4, 2b
        ret
-END_FEAT (fetch_or_16, LSE128)
-#endif
+END (xor_fetch_16)
 
 
-ENTRY (or_fetch_16)
+ENTRY_ALIASED (fetch_nand_16)
        mov     x5, x0
+       mvn     in0, in0
+       mvn     in1, in1
        cbnz    w4, 2f
 
        /* RELAXED.  */
 1:     ldxp    res0, res1, [x5]
-       orr     res0, res0, in0
-       orr     res1, res1, in1
-       stxp    w4, res0, res1, [x5]
+       orn     tmp0, in0, res0
+       orn     tmp1, in1, res1
+       stxp    w4, tmp0, tmp1, [x5]
        cbnz    w4, 1b
        ret
 
        /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
 2:     ldaxp   res0, res1, [x5]
-       orr     res0, res0, in0
-       orr     res1, res1, in1
-       stlxp   w4, res0, res1, [x5]
+       orn     tmp0, in0, res0
+       orn     tmp1, in1, res1
+       stlxp   w4, tmp0, tmp1, [x5]
        cbnz    w4, 2b
        ret
-END (or_fetch_16)
+END (fetch_nand_16)
 
 
-#if HAVE_FEAT_LSE128
-ENTRY_FEAT (or_fetch_16, LSE128)
-       cbnz    w4, 1f
-       mov     tmp0, in0
-       mov     tmp1, in1
+ENTRY_ALIASED (nand_fetch_16)
+       mov     x5, x0
+       mvn     in0, in0
+       mvn     in1, in1
+       cbnz    w4, 2f
 
        /* RELAXED.  */
-       ldsetp  in0, in1, [x0]
-       orr     res0, in0, tmp0
-       orr     res1, in1, tmp1
+1:     ldxp    res0, res1, [x5]
+       orn     res0, in0, res0
+       orn     res1, in1, res1
+       stxp    w4, res0, res1, [x5]
+       cbnz    w4, 1b
        ret
-1:
-       cmp     w4, ACQUIRE
-       b.hi    2f
 
-       /* ACQUIRE/CONSUME.  */
-       ldsetpa in0, in1, [x0]
-       orr     res0, in0, tmp0
-       orr     res1, in1, tmp1
+       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
+2:     ldaxp   res0, res1, [x5]
+       orn     res0, in0, res0
+       orn     res1, in1, res1
+       stlxp   w4, res0, res1, [x5]
+       cbnz    w4, 2b
        ret
+END (nand_fetch_16)
 
-       /* RELEASE/ACQ_REL/SEQ_CST.  */
-2:     ldsetpal        in0, in1, [x0]
-       orr     res0, in0, tmp0
-       orr     res1, in1, tmp1
-       ret
-END_FEAT (or_fetch_16, LSE128)
-#endif
 
+/* __atomic_test_and_set is always inlined, so this entry is unused and
+   only required for completeness.  */
+ENTRY_ALIASED (test_and_set_16)
 
-ENTRY (fetch_and_16)
+       /* RELAXED/ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
        mov     x5, x0
-       cbnz    w4, 2f
-
-       /* RELAXED.  */
-1:     ldxp    res0, res1, [x5]
-       and     tmp0, res0, in0
-       and     tmp1, res1, in1
-       stxp    w4, tmp0, tmp1, [x5]
+1:     ldaxrb  w0, [x5]
+       stlxrb  w4, w2, [x5]
        cbnz    w4, 1b
        ret
+END (test_and_set_16)
 
-       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
-2:     ldaxp   res0, res1, [x5]
-       and     tmp0, res0, in0
-       and     tmp1, res1, in1
-       stlxp   w4, tmp0, tmp1, [x5]
-       cbnz    w4, 2b
-       ret
-END (fetch_and_16)
-
+/* Ensure extension-specific implementations are not included unless ifunc
+   support is present, along with necessary assembler support.  */
 
-#if HAVE_FEAT_LSE128
-ENTRY_FEAT (fetch_and_16, LSE128)
-       mov     tmp0, x0
-       mvn     res0, in0
-       mvn     res1, in1
-       cbnz    w4, 1f
+#if HAVE_IFUNC
+ENTRY_FEAT (load_16, LSE2)
+       cbnz    w1, 1f
 
        /* RELAXED.  */
-       ldclrp  res0, res1, [tmp0]
+       ldp     res0, res1, [x0]
        ret
-
 1:
-       cmp     w4, ACQUIRE
-       b.hi    2f
+       cmp     w1, SEQ_CST
+       b.eq    2f
 
-       /* ACQUIRE/CONSUME.  */
-       ldclrpa res0, res1, [tmp0]
+       /* ACQUIRE/CONSUME (Load-AcquirePC semantics).  */
+       ldp     res0, res1, [x0]
+       dmb     ishld
        ret
 
-       /* RELEASE/ACQ_REL/SEQ_CST.  */
-2:     ldclrpal        res0, res1, [tmp0]
+       /* SEQ_CST.  */
+2:     ldar    tmp0, [x0]      /* Block reordering with Store-Release instr.  
*/
+       ldp     res0, res1, [x0]
+       dmb     ishld
        ret
-END_FEAT (fetch_and_16, LSE128)
-#endif
+END_FEAT (load_16, LSE2)
 
 
-ENTRY (and_fetch_16)
-       mov     x5, x0
-       cbnz    w4, 2f
+ENTRY_FEAT (store_16, LSE2)
+       cbnz    w4, 1f
 
        /* RELAXED.  */
-1:     ldxp    res0, res1, [x5]
-       and     res0, res0, in0
-       and     res1, res1, in1
-       stxp    w4, res0, res1, [x5]
+       stp     in0, in1, [x0]
+       ret
+
+       /* RELEASE/SEQ_CST.  */
+1:     ldxp    xzr, tmp0, [x0]
+       stlxp   w4, in0, in1, [x0]
        cbnz    w4, 1b
        ret
+END_FEAT (store_16, LSE2)
 
-       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
-2:     ldaxp   res0, res1, [x5]
-       and     res0, res0, in0
-       and     res1, res1, in1
-       stlxp   w4, res0, res1, [x5]
-       cbnz    w4, 2b
+
+ENTRY_FEAT (compare_exchange_16, LSE)
+       ldp     exp0, exp1, [x1]
+       mov     tmp0, exp0
+       mov     tmp1, exp1
+       cbz     w4, 2f
+       cmp     w4, RELEASE
+       b.hs    3f
+
+       /* ACQUIRE/CONSUME.  */
+       caspa   exp0, exp1, in0, in1, [x0]
+0:
+       cmp     exp0, tmp0
+       ccmp    exp1, tmp1, 0, eq
+       bne     1f
+       mov     x0, 1
        ret
-END (and_fetch_16)
+1:
+       stp     exp0, exp1, [x1]
+       mov     x0, 0
+       ret
+
+       /* RELAXED.  */
+2:     casp    exp0, exp1, in0, in1, [x0]
+       b       0b
+
+       /* RELEASE.  */
+3:     b.hi    4f
+       caspl   exp0, exp1, in0, in1, [x0]
+       b       0b
+
+       /* ACQ_REL/SEQ_CST.  */
+4:     caspal  exp0, exp1, in0, in1, [x0]
+       b       0b
+END_FEAT (compare_exchange_16, LSE)
 
 
 #if HAVE_FEAT_LSE128
-ENTRY_FEAT (and_fetch_16, LSE128)
-       mvn     tmp0, in0
-       mvn     tmp0, in1
+ENTRY_FEAT (exchange_16, LSE128)
+       mov     tmp0, x0
+       mov     res0, in0
+       mov     res1, in1
        cbnz    w4, 1f
 
        /* RELAXED.  */
-       ldclrp  tmp0, tmp1, [x0]
-       and     res0, tmp0, in0
-       and     res1, tmp1, in1
+       swpp    res0, res1, [tmp0]
        ret
-
 1:
        cmp     w4, ACQUIRE
        b.hi    2f
 
        /* ACQUIRE/CONSUME.  */
-       ldclrpa tmp0, tmp1, [x0]
-       and     res0, tmp0, in0
-       and     res1, tmp1, in1
+       swppa   res0, res1, [tmp0]
        ret
 
        /* RELEASE/ACQ_REL/SEQ_CST.  */
-2:     ldclrpal        tmp0, tmp1, [x5]
-       and     res0, tmp0, in0
-       and     res1, tmp1, in1
+2:     swppal  res0, res1, [tmp0]
        ret
-END_FEAT (and_fetch_16, LSE128)
-#endif
+END_FEAT (exchange_16, LSE128)
 
 
-ENTRY_ALIASED (fetch_xor_16)
-       mov     x5, x0
-       cbnz    w4, 2f
+ENTRY_FEAT (fetch_or_16, LSE128)
+       mov     tmp0, x0
+       mov     res0, in0
+       mov     res1, in1
+       cbnz    w4, 1f
 
        /* RELAXED.  */
-1:     ldxp    res0, res1, [x5]
-       eor     tmp0, res0, in0
-       eor     tmp1, res1, in1
-       stxp    w4, tmp0, tmp1, [x5]
-       cbnz    w4, 1b
+       ldsetp  res0, res1, [tmp0]
        ret
+1:
+       cmp     w4, ACQUIRE
+       b.hi    2f
 
-       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
-2:     ldaxp   res0, res1, [x5]
-       eor     tmp0, res0, in0
-       eor     tmp1, res1, in1
-       stlxp   w4, tmp0, tmp1, [x5]
-       cbnz    w4, 2b
+       /* ACQUIRE/CONSUME.  */
+       ldsetpa res0, res1, [tmp0]
        ret
-END (fetch_xor_16)
 
+       /* RELEASE/ACQ_REL/SEQ_CST.  */
+2:     ldsetpal        res0, res1, [tmp0]
+       ret
+END_FEAT (fetch_or_16, LSE128)
 
-ENTRY_ALIASED (xor_fetch_16)
-       mov     x5, x0
-       cbnz    w4, 2f
+
+ENTRY_FEAT (or_fetch_16, LSE128)
+       cbnz    w4, 1f
+       mov     tmp0, in0
+       mov     tmp1, in1
 
        /* RELAXED.  */
-1:     ldxp    res0, res1, [x5]
-       eor     res0, res0, in0
-       eor     res1, res1, in1
-       stxp    w4, res0, res1, [x5]
-       cbnz    w4, 1b
+       ldsetp  in0, in1, [x0]
+       orr     res0, in0, tmp0
+       orr     res1, in1, tmp1
        ret
+1:
+       cmp     w4, ACQUIRE
+       b.hi    2f
 
-       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
-2:     ldaxp   res0, res1, [x5]
-       eor     res0, res0, in0
-       eor     res1, res1, in1
-       stlxp   w4, res0, res1, [x5]
-       cbnz    w4, 2b
+       /* ACQUIRE/CONSUME.  */
+       ldsetpa in0, in1, [x0]
+       orr     res0, in0, tmp0
+       orr     res1, in1, tmp1
        ret
-END (xor_fetch_16)
 
+       /* RELEASE/ACQ_REL/SEQ_CST.  */
+2:     ldsetpal        in0, in1, [x0]
+       orr     res0, in0, tmp0
+       orr     res1, in1, tmp1
+       ret
+END_FEAT (or_fetch_16, LSE128)
 
-ENTRY_ALIASED (fetch_nand_16)
-       mov     x5, x0
-       mvn     in0, in0
-       mvn     in1, in1
-       cbnz    w4, 2f
+
+ENTRY_FEAT (fetch_and_16, LSE128)
+       mov     tmp0, x0
+       mvn     res0, in0
+       mvn     res1, in1
+       cbnz    w4, 1f
 
        /* RELAXED.  */
-1:     ldxp    res0, res1, [x5]
-       orn     tmp0, in0, res0
-       orn     tmp1, in1, res1
-       stxp    w4, tmp0, tmp1, [x5]
-       cbnz    w4, 1b
+       ldclrp  res0, res1, [tmp0]
        ret
 
-       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
-2:     ldaxp   res0, res1, [x5]
-       orn     tmp0, in0, res0
-       orn     tmp1, in1, res1
-       stlxp   w4, tmp0, tmp1, [x5]
-       cbnz    w4, 2b
+1:
+       cmp     w4, ACQUIRE
+       b.hi    2f
+
+       /* ACQUIRE/CONSUME.  */
+       ldclrpa res0, res1, [tmp0]
        ret
-END (fetch_nand_16)
 
+       /* RELEASE/ACQ_REL/SEQ_CST.  */
+2:     ldclrpal        res0, res1, [tmp0]
+       ret
+END_FEAT (fetch_and_16, LSE128)
 
-ENTRY_ALIASED (nand_fetch_16)
-       mov     x5, x0
-       mvn     in0, in0
-       mvn     in1, in1
-       cbnz    w4, 2f
 
-       /* RELAXED.  */
-1:     ldxp    res0, res1, [x5]
-       orn     res0, in0, res0
-       orn     res1, in1, res1
-       stxp    w4, res0, res1, [x5]
-       cbnz    w4, 1b
-       ret
+ENTRY_FEAT (and_fetch_16, LSE128)
+       mvn     tmp0, in0
+       mvn     tmp0, in1
+       cbnz    w4, 1f
 
-       /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
-2:     ldaxp   res0, res1, [x5]
-       orn     res0, in0, res0
-       orn     res1, in1, res1
-       stlxp   w4, res0, res1, [x5]
-       cbnz    w4, 2b
+       /* RELAXED.  */
+       ldclrp  tmp0, tmp1, [x0]
+       and     res0, tmp0, in0
+       and     res1, tmp1, in1
        ret
-END (nand_fetch_16)
 
+1:
+       cmp     w4, ACQUIRE
+       b.hi    2f
 
-/* __atomic_test_and_set is always inlined, so this entry is unused and
-   only required for completeness.  */
-ENTRY_ALIASED (test_and_set_16)
+       /* ACQUIRE/CONSUME.  */
+       ldclrpa tmp0, tmp1, [x0]
+       and     res0, tmp0, in0
+       and     res1, tmp1, in1
+       ret
 
-       /* RELAXED/ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
-       mov     x5, x0
-1:     ldaxrb  w0, [x5]
-       stlxrb  w4, w2, [x5]
-       cbnz    w4, 1b
+       /* RELEASE/ACQ_REL/SEQ_CST.  */
+2:     ldclrpal        tmp0, tmp1, [x5]
+       and     res0, tmp0, in0
+       and     res1, tmp1, in1
        ret
-END (test_and_set_16)
+END_FEAT (and_fetch_16, LSE128)
+#endif /* HAVE_FEAT_LSE128 */
+#endif /* HAVE_IFUNC */
 
 
 /* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code.  */
-- 
2.34.1

Reply via email to