[ARM] atomics for rtems, aka bare metal

Richard Henderson Thu, 08 Mar 2012 17:57:27 -0800

On 03/08/12 10:02, Joel Sherrill wrote:
> If so, it is just a matter of ifdef's to get the right code.


Ok.

> Does arm-eabi have this support? We probably could
> just use the same code.

There's scant bare metal support atm.  For sufficiently new ARM core revisions,
gcc will inline operations.  In order to get all of it, you have to target 
ARMv7.

I threw this together today.  It likely needs quite a bit of spit and polish:

  * There's quite a bit of boiler plate that could be shared with lib1funcs.S.

  * I know Richard E has cautioned me time and again about SWP, but
    I have to assume that it's useful for at least some CPU revisions.
    Knowing exactly when and where is the trick.  E.g. it might be correct
    to use them on arm-rtems, but not generically for arm-eabi.

  * It might be cleaner to split this into multiple files, which can
    be re-used in interesting ways.  For instance, when targeting ARMv6
    it might be a win to use some of these routines instead of linux-atomic.c,
    particularly when lacking LDREX[BH].

  * The __atomic_fetch_op and __atomic_op_fetch routines are missing.

  * Given that this is bare metal, and pre-ARMv6 is certainly non-SMP, you
    might well want to provide fallback versions of these routines that
    disable interrupts.

  * This has received only compile testing with the arm-rtemseabi multilibs.
    There may well be bugs.

  * It's quite likely that this actually belongs in the not-yet-extant libatomic
    and not in libgcc at all.


r~

diff --git a/libgcc/config.host b/libgcc/config.host
index 257622a..251ecb6 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -359,7 +359,7 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtemseabi*)
        tm_file="$tm_file arm/bpabi-lib.h"
        case ${host} in
        arm*-*-eabi* | arm*-*-rtemseabi*)
-         tmake_file="${tmake_file} arm/t-bpabi"
+         tmake_file="${tmake_file} arm/t-bpabi arm/t-atomic"
          extra_parts="crtbegin.o crtend.o crti.o crtn.o"
          ;;
        arm*-*-symbianelf*)
diff --git a/libgcc/config/arm/native-atomic.S 
b/libgcc/config/arm/native-atomic.S
new file mode 100644
index 0000000..d646706
--- /dev/null
+++ b/libgcc/config/arm/native-atomic.S
@@ -0,0 +1,509 @@
+/* Atomic operations for ARM, assuming only native cpu support.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#if defined(__ARM_ARCH_2__)
+# define __ARM_ARCH__ 2
+#endif
+
+#if defined(__ARM_ARCH_3__) || defined(__ARM_ARCH_3M__)
+# define __ARM_ARCH__ 3
+#endif
+
+#if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__)
+# define __ARM_ARCH__ 4
+#endif
+
+#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+       || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+       || defined(__ARM_ARCH_5TEJ__)
+# define __ARM_ARCH__ 5
+#endif
+
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+       || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+       || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
+       || defined(__ARM_ARCH_6M__)
+# define __ARM_ARCH__ 6
+#endif
+
+#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+       || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+       || defined(__ARM_ARCH_7EM__)
+# define __ARM_ARCH__ 7
+#endif
+
+#ifndef __ARM_ARCH__
+#error Unable to determine architecture.
+#endif
+
+#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__)
+# define HAVE_STREX
+# define HAVE_STREXB
+# define HAVE_STREXH
+# define HAVE_STREXD
+#elif __ARM_ARCH__ == 6
+# define HAVE_STREX
+#elif __ARM_ARCH__ == 4 || __ARM_ARCH__ == 5
+/* This assumes that we are *not* running on ARM6+ SMP.  */
+# define HAVE_SWP
+#endif
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+       /* Tag_ABI_align_needed: This code does not require 8-byte
+          alignment from the caller.  */
+       /* .eabi_attribute 24, 0  -- default setting.  */
+       /* Tag_ABI_align_preserved: This code preserves 8-byte
+          alignment in any callee.  */
+       .eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+#if __ARM_ARCH__ == 4 && defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+# error "Need interworking stubs"
+#endif
+
+.macro FUNC    name
+       .globl  \name
+#ifdef __ELF__
+       .hidden \name
+       .type   \name, %function
+#endif
+#ifdef __thumb2__
+       .thumb
+       .thumb_func
+#else
+       .arm
+#endif
+       .align  2
+\name:
+.endm
+
+.macro ENDF    name
+#ifdef __ELF__
+       .size   \name, . - \name
+#endif
+.endm
+
+.macro ALIAS   new, old
+#ifdef __thumb__
+       .thumb_set \new, \old
+#else
+       .set    \new, \old
+#endif
+       .globl  \new
+.endm
+
+/* Concatenation macros */
+#define C(X,Y)         C2(X,Y)
+#define C2(X,Y)                X ## Y
+
+/* Sometimes we prefer an insn form that sets flags, for size.  */
+#ifdef __thumb2__
+# define s(X)          X ## s
+#else
+# define s(X)          X
+#endif
+
+#if __ARM_ARCH__ > 4 || defined(__ARM_ARCH_4T__)
+# define RET           bx lr
+#else
+# define RET           mov pc, lr
+#endif
+
+#if __ARM_ARCH__ >= 7
+# define DMB           dmb     sy
+# define HAVE_DMB
+#elif __ARM_ARCH__ == 6
+# define DMB           mcr     p15, 0, r0, c7, c10, 5
+# define HAVE_DMB
+#else
+# define DMB
+#endif
+
+#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
+# define HAVE_MOVW
+#endif
+
+       .text
+       .syntax unified
+
+#if __ARM_ARCH__ >= 7
+       .arch   armv7-a
+#elif defined(__thumb2__)
+       .arch   armv6t2
+#elif __ARM_ARCH__ >= 6
+       .arch   armv6
+#elif __ARM_ARCH__ > 4
+       .arch   armv5
+#elif defined(__ARM_ARCH_4T__)
+       .arch   armv4t
+#else
+       .arch   armv4
+#endif
+
+/* ----------------------------------------------------------------------
+   SYNC_SYNCHRONIZE
+   ----------------------------------------------------------------------*/
+
+/* Note that GCC assumes that all AAPCS compliant systems have this symbol;
+   invocations of __atomic_thread_fence and __atomic_signal_fence are always
+   revectored here.  */
+
+FUNC   __sync_synchronize
+
+       DMB
+       RET
+
+ENDF   __sync_synchronize
+
+
+/* ----------------------------------------------------------------------
+   ATOMIC_EXCHANGE
+   ----------------------------------------------------------------------*/
+
+/* These functions have the interface
+     T __atomic_exchange_N(T *mem, T new, int model);
+
+   The straight-forward versions of the LDREX alternatives, that is
+   those that don't need to force alignment, are inlined by the compiler
+   for the appropriate ISAs.  They are present here for completeness.
+
+   The compiler *never* emits SWP/SWPB, as it cannot know that the
+   runtime environment supports it.  Since we're assuming bare metal,
+   non-SMP, fully multilibed compilation, we go ahead and use it.
+
+   We ignore the memory model parameter to these routines and always
+   implement a full barrier across the routine.  */
+
+#if defined(HAVE_STREX) || defined(HAVE_SWP)
+FUNC   __atomic_exchange_1
+
+       DMB
+# ifdef HAVE_STREXB
+       /* Use the byte version of STREX if available.  */
+0:     ldrexb  r2, [r0]
+        strexb r3, r1, [r0]
+        cmp    r3, #0
+        bne    0b
+        mov    r0, r2
+       DMB
+       RET
+# elif defined(HAVE_STREX)
+       /* Use the word version of STREX and perform alignment.  */
+       push    { r4, lr }
+       and     r4, r0, #3              @ find byte misalignment
+       bic     ip, r0, #3              @ align r0 into ip
+       s(lsl)  r4, r4, #3              @ find bit misalignment
+#  ifdef __ARMEB__
+       rsb     r4, r4, #24
+#  endif
+       s(mov)  r3, #0xff               @ compute byte mask
+       s(lsl)  r3, r3, r4              @ shift byte mask into place
+       s(lsl)  r1, r1, r4              @ shift input into place
+       uadd8   r3, r3, r3              @ move byte mask to APSR.GE bits
+0:
+       ldrex   r2, [ip]
+       lsr     r0, r2, r4              @ return old value
+       sel     r2, r1, r2              @ merge word with new val
+       strex   r3, r2, [ip]
+       cmp     r3, #0
+       bne     0b
+       uxtb    r0, r0                  @ return byte
+       DMB
+       pop     { r4, pc }
+# else
+       /* Use SWPB only as a last resort.  */
+       mov     r2, r0
+       swpb    r0, r1, [r2]
+       RET
+# endif /* HAVE_STREXB */
+
+ENDF   __atomic_exchange_1
+ALIAS  __sync_lock_test_and_set_1, __atomic_exchange_1
+#endif /* HAVE_STREX || HAVE_SWPB */
+
+#ifdef HAVE_STREX
+FUNC   __atomic_exchange_2
+
+       DMB
+# ifdef HAVE_STREXH
+       /* Use the half-word version of STREX if available.  */
+0:     ldrexh  r2, [r0]
+        strexh r3, r1, [r0]
+        cmp    r3, #0
+        bne    0b
+        mov    r0, r2
+       DMB
+       RET
+# else
+       /* Use the word version of STREX and perform alignment.  */
+#  ifdef HAVE_MOVW
+       movw    r3, #0xffff             @ create byte mask
+#  else
+       ldr     r3, =#0xffff
+#  endif
+       lsrs    r2, r0, #2              @ carry = (r0 & 2)
+#  ifdef __ARMEB__
+#   define HI  cc                      @ iff value is in the high half
+#   define LO  cs                      @ iff the value is in the low half
+#  else
+#   define HI  cs
+#   define LO  cc
+#  endif
+       bic     ip, r0, #3              @ align r0 into ip
+       itt     HI
+       C(lsl,HI) r3, r3, #16           @ shift byte mask into place
+       C(lsl,HI) r1, r1, #16           @ shift new value into place
+       uadd16  r3, r3, r3              @ move byte mask into APSR.GE bits
+0:
+       ldrex   r2, [ip]
+       ite     HI
+       C(uxth,HI) r0, r2, ror #16      @ return old value
+       C(uxth,LO) r0, r2
+       sel     r2, r1, r2              @ merge word with new value
+       strex   r3, r2, [ip]
+       tst     r3, r3                  @ do not clobber carry
+       bne     0b
+       DMB
+       RET
+
+# endif /* HAVE_STREXH */
+ENDF   __atomic_exchange_2
+ALIAS  __sync_lock_test_and_set_2, __atomic_exchange_2
+#endif /* HAVE_STREX */
+
+#if defined(HAVE_STREX) || defined(HAVE_SWP)
+FUNC   __atomic_exchange_4
+
+       DMB
+# ifdef HAVE_STREX
+0:     ldrex   r2, [r0]
+        strex  r3, r1, [r0]
+        cmp    r3, #0
+        bne    0b
+        mov    r0, r2
+# else
+       mov     r2, r0
+       swp     r0, r1, [r2]
+# endif
+       DMB
+       RET
+
+ENDF   __atomic_exchange_4
+ALIAS  __sync_lock_test_and_set_4, __atomic_exchange_4
+#endif /* HAVE_STREX || HAVE_SWP */
+
+#ifdef HAVE_STREXD
+FUNC   __atomic_exchange_8
+
+       DMB
+       push    { r4, r5 }
+0:     ldrexd  r4, r5, [r0]
+       strexd  r1, r2, r3, [r0]
+       cmp     r1, #0
+       bne     0b
+       mov     r0, r4
+       mov     r1, r5
+       DMB
+       pop     { r4, r5 }
+       RET
+
+ENDF   __atomic_exchange_8
+ALIAS  __sync_lock_test_and_set_8, __atomic_exchange_8
+#endif /* HAVE_STREXD */
+
+/* ----------------------------------------------------------------------
+   ATOMIC_COMPARE_EXCHANGE
+   ----------------------------------------------------------------------*/
+
+/* These functions have the interface
+     bool __atomic_compare_exchange_N(T *mem, T *old, T new, int ms, int mf);
+
+   The versions that do not require forced alignment are inlined
+   by the compiler.  They are present here for completeness only.
+
+   We ignore the memory model parameters to these routines and always
+   implement a full barrier across the routine.  */
+
+#ifdef HAVE_STREX
+FUNC   __atomic_compare_exchange_1
+#ifdef HAVE_STREXB
+
+       DMB
+       push    { r4, lr }
+       ldrb    r4, [r1]                @ load old value
+       mov     ip, r0
+0:     ldrexb  r0, [ip]
+       cmp     r0, r4
+       bne     1f
+       strexb  r3, r2, [ip]
+       cmp     r3, #0
+       bne     0b
+1:     strb    r0, [r1]                @ return read value
+       ite     ne
+       movne   r0, #0
+       moveq   r0, #1
+       DMB
+       pop     { r4, pc }
+
+#else
+
+       DMB
+       push    { r4, r5, r6, lr }
+       ldrb    r4, [r1]                @ load old value
+       and     r6, r0, #3              @ find byte misalignment
+       bic     ip, r0, #3              @ align r0 into ip
+       s(lsl)  r6, r6, #3              @ find bit misalignment
+#  ifdef __ARMEB__
+       rsb     r6, r6, #24
+#  endif
+       s(mov)  r5, #0xff
+       s(lsl)  r4, r4, r6              @ shift old value in place
+       s(lsl)  r5, r5, r6              @ shift byte mask in place
+0:     ldrex   r2, [ip]
+       and     r0, r2, r5              @ mask read value to compare
+       cmp     r0, r4
+       bne     1f
+       s(bic)  r2, r2, r5              @ merge new value into word
+       s(orr)  r2, r2, r4
+       strex   r3, r2, [ip]
+       cmp     r3, #0
+       bne     0b
+1:     lsr     r0, r0, r6              @ return read value
+       strb    r0, [r1]
+       ite     ne
+       movne   r0, #0
+       moveq   r0, #1
+       DMB
+       pop     { r4, r5, r6, pc }
+
+#endif /* HAVE_STREXB */
+ENDF   __atomic_compare_exchange_1
+
+FUNC   __atomic_compare_exchange_2
+#ifdef HAVE_STREXB
+
+       DMB
+       push    { r4, lr }
+       ldrh    r4, [r1]                @ load old value
+       mov     ip, r0
+0:     ldrexh  r0, [ip]
+       cmp     r0, r4
+       bne     1f
+       strexh  r3, r2, [ip]
+       cmp     r3, #0
+       bne     0b
+1:     strh    r0, [r1]                @ return read value
+       ite     ne
+       movne   r0, #0
+       moveq   r0, #1
+       DMB
+       pop     { r4, pc }
+
+#else
+
+       DMB
+       push    { r4, r5, r6, lr }
+       ldrh    r4, [r1]                @ load old value
+#  ifdef HAVE_MOVW
+       movw    r5, #0xffff             @ create byte mask
+#  else
+       ldr     r5, =#0xffff
+#  endif
+       and     r6, r0, #2              @ find byte misalignment
+       bic     ip, r0, #3              @ align r0 into ip
+       s(lsl)  r6, r6, #3              @ find bit misalignment
+#  ifdef __ARMEB__
+       eor     r6, r6, #16
+#  endif
+       s(lsl)  r4, r4, r6              @ shift old value in place
+       s(lsl)  r5, r5, r6              @ shift byte mask in place
+0:     ldrex   r2, [ip]
+       and     r0, r2, r5              @ mask read value to compare
+       cmp     r0, r4
+       bne     1f
+       s(bic)  r2, r2, r5              @ merge new value into word
+       s(orr)  r2, r2, r4
+       strex   r3, r2, [ip]
+       cmp     r3, #0
+       bne     0b
+1:     lsr     r0, r0, r6              @ return read value
+       strh    r0, [r1]
+       ite     ne
+       movne   r0, #0
+       moveq   r0, #1
+       DMB
+       pop     { r4, r5, r6, pc }
+
+#endif /* HAVE_STREXB */
+ENDF   __atomic_compare_exchange_2
+
+FUNC   __atomic_compare_exchange_4
+
+       DMB
+       push    { r4, lr }
+       mov     ip, r0
+       ldr     r4, [r1]                @ load old value
+0:     ldrex   r0, [ip]
+       cmp     r0, r4
+       bne     1f
+       strex   r3, r2, [ip]
+       cmp     r3, #0
+       bne     0b
+1:     str     r0, [r1]                @ return old value
+       ite     ne
+       movne   r0, #0
+       moveq   r0, #1
+       DMB
+       pop     { r4, pc }
+
+ENDF   __atomic_compare_exchange_4
+#endif /* HAVE_STREX */
+
+#ifdef HAVE_STREXD
+FUNC   __atomic_compare_exchange_8
+
+       DMB
+       push    { r4, r5, r6, lr }
+       ldrd    r4, r5, [r1]            @ load old value
+       mov     ip, r0                  @ save pointers
+       mov     lr, r1
+0:     ldrexd  r0, r1, [ip]
+       cmp     r0, r4
+       it      eq
+       cmpeq   r1, r2
+       bne     1f
+       strexd  r6, r2, r3, [r0]
+       cmp     r6, #0
+       bne     0b
+1:     strd    r0, r1, [lr]            @ save read value
+       ite     ne
+       movne   r0, #0
+       moveq   r0, #1
+       DMB
+       pop     { r4, r5, r6, pc }
+
+ENDF   __atomic_compare_exchange_8
+#endif /* HAVE_STREXD */
diff --git a/libgcc/config/arm/t-atomic b/libgcc/config/arm/t-atomic
new file mode 100644
index 0000000..977f09d
--- /dev/null
+++ b/libgcc/config/arm/t-atomic
@@ -0,0 +1 @@
+LIB2ADD_ST += $(srcdir)/config/arm/native-atomic.S

[ARM] atomics for rtems, aka bare metal

Reply via email to