On 03/08/12 10:02, Joel Sherrill wrote: > If so, it is just a matter of ifdef's to get the right code.
Ok. > Does arm-eabi have this support? We probably could > just use the same code. There's scant bare metal support atm. For sufficiently new ARM core revisions, gcc will inline operations. In order to get all of it, you have to target ARMv7. I threw this together today. It likely needs quite a bit of spit and polish: * There's quite a bit of boiler plate that could be shared with lib1funcs.S. * I know Richard E has cautioned me time and again about SWP, but I have to assume that it's useful for at least some CPU revisions. Knowing exactly when and where is the trick. E.g. it might be correct to use them on arm-rtems, but not generically for arm-eabi. * It might be cleaner to split this into multiple files, which can be re-used in interesting ways. For instance, when targeting ARMv6 it might be a win to use some of these routines instead of linux-atomic.c, particularly when lacking LDREX[BH]. * The __atomic_fetch_op and __atomic_op_fetch routines are missing. * Given that this is bare metal, and pre-ARMv6 is certainly non-SMP, you might well want to provide fallback versions of these routines that disable interrupts. * This has received only compile testing with the arm-rtemseabi multilibs. There may well be bugs. * It's quite likely that this actually belongs in the not-yet-extant libatomic and not in libgcc at all. r~
diff --git a/libgcc/config.host b/libgcc/config.host index 257622a..251ecb6 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -359,7 +359,7 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtemseabi*) tm_file="$tm_file arm/bpabi-lib.h" case ${host} in arm*-*-eabi* | arm*-*-rtemseabi*) - tmake_file="${tmake_file} arm/t-bpabi" + tmake_file="${tmake_file} arm/t-bpabi arm/t-atomic" extra_parts="crtbegin.o crtend.o crti.o crtn.o" ;; arm*-*-symbianelf*) diff --git a/libgcc/config/arm/native-atomic.S b/libgcc/config/arm/native-atomic.S new file mode 100644 index 0000000..d646706 --- /dev/null +++ b/libgcc/config/arm/native-atomic.S @@ -0,0 +1,509 @@ +/* Atomic operations for ARM, assuming only native cpu support. + Copyright (C) 2012 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#if defined(__ARM_ARCH_2__) +# define __ARM_ARCH__ 2 +#endif + +#if defined(__ARM_ARCH_3__) || defined(__ARM_ARCH_3M__) +# define __ARM_ARCH__ 3 +#endif + +#if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) +# define __ARM_ARCH__ 4 +#endif + +#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH__ 5 +#endif + +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ + || defined(__ARM_ARCH_6M__) +# define __ARM_ARCH__ 6 +#endif + +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH__ 7 +#endif + +#ifndef __ARM_ARCH__ +#error Unable to determine architecture. +#endif + +#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__) +# define HAVE_STREX +# define HAVE_STREXB +# define HAVE_STREXH +# define HAVE_STREXD +#elif __ARM_ARCH__ == 6 +# define HAVE_STREX +#elif __ARM_ARCH__ == 4 || __ARM_ARCH__ == 5 +/* This assumes that we are *not* running on ARM6+ SMP. */ +# define HAVE_SWP +#endif + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + +#if __ARM_ARCH__ == 4 && defined(__thumb__) && !defined(__THUMB_INTERWORK__) +# error "Need interworking stubs" +#endif + +.macro FUNC name + .globl \name +#ifdef __ELF__ + .hidden \name + .type \name, %function +#endif +#ifdef __thumb2__ + .thumb + .thumb_func +#else + .arm +#endif + .align 2 +\name: +.endm + +.macro ENDF name +#ifdef __ELF__ + .size \name, . - \name +#endif +.endm + +.macro ALIAS new, old +#ifdef __thumb__ + .thumb_set \new, \old +#else + .set \new, \old +#endif + .globl \new +.endm + +/* Concatenation macros */ +#define C(X,Y) C2(X,Y) +#define C2(X,Y) X ## Y + +/* Sometimes we prefer an insn form that sets flags, for size. */ +#ifdef __thumb2__ +# define s(X) X ## s +#else +# define s(X) X +#endif + +#if __ARM_ARCH__ > 4 || defined(__ARM_ARCH_4T__) +# define RET bx lr +#else +# define RET mov pc, lr +#endif + +#if __ARM_ARCH__ >= 7 +# define DMB dmb sy +# define HAVE_DMB +#elif __ARM_ARCH__ == 6 +# define DMB mcr p15, 0, r0, c7, c10, 5 +# define HAVE_DMB +#else +# define DMB +#endif + +#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__) +# define HAVE_MOVW +#endif + + .text + .syntax unified + +#if __ARM_ARCH__ >= 7 + .arch armv7-a +#elif defined(__thumb2__) + .arch armv6t2 +#elif __ARM_ARCH__ >= 6 + .arch armv6 +#elif __ARM_ARCH__ > 4 + .arch armv5 +#elif defined(__ARM_ARCH_4T__) + .arch armv4t +#else + .arch armv4 +#endif + +/* ---------------------------------------------------------------------- + SYNC_SYNCHRONIZE + ----------------------------------------------------------------------*/ + +/* Note that GCC assumes that all AAPCS compliant systems have this symbol; + invocations of __atomic_thread_fence and __atomic_signal_fence are always + revectored here. */ + +FUNC __sync_synchronize + + DMB + RET + +ENDF __sync_synchronize + + +/* ---------------------------------------------------------------------- + ATOMIC_EXCHANGE + ----------------------------------------------------------------------*/ + +/* These functions have the interface + T __atomic_exchange_N(T *mem, T new, int model); + + The straight-forward versions of the LDREX alternatives, that is + those that don't need to force alignment, are inlined by the compiler + for the appropriate ISAs. They are present here for completeness. + + The compiler *never* emits SWP/SWPB, as it cannot know that the + runtime environment supports it. Since we're assuming bare metal, + non-SMP, fully multilibed compilation, we go ahead and use it. + + We ignore the memory model parameter to these routines and always + implement a full barrier across the routine. */ + +#if defined(HAVE_STREX) || defined(HAVE_SWP) +FUNC __atomic_exchange_1 + + DMB +# ifdef HAVE_STREXB + /* Use the byte version of STREX if available. */ +0: ldrexb r2, [r0] + strexb r3, r1, [r0] + cmp r3, #0 + bne 0b + mov r0, r2 + DMB + RET +# elif defined(HAVE_STREX) + /* Use the word version of STREX and perform alignment. */ + push { r4, lr } + and r4, r0, #3 @ find byte misalignment + bic ip, r0, #3 @ align r0 into ip + s(lsl) r4, r4, #3 @ find bit misalignment +# ifdef __ARMEB__ + rsb r4, r4, #24 +# endif + s(mov) r3, #0xff @ compute byte mask + s(lsl) r3, r3, r4 @ shift byte mask into place + s(lsl) r1, r1, r4 @ shift input into place + uadd8 r3, r3, r3 @ move byte mask to APSR.GE bits +0: + ldrex r2, [ip] + lsr r0, r2, r4 @ return old value + sel r2, r1, r2 @ merge word with new val + strex r3, r2, [ip] + cmp r3, #0 + bne 0b + uxtb r0, r0 @ return byte + DMB + pop { r4, pc } +# else + /* Use SWPB only as a last resort. */ + mov r2, r0 + swpb r0, r1, [r2] + RET +# endif /* HAVE_STREXB */ + +ENDF __atomic_exchange_1 +ALIAS __sync_lock_test_and_set_1, __atomic_exchange_1 +#endif /* HAVE_STREX || HAVE_SWPB */ + +#ifdef HAVE_STREX +FUNC __atomic_exchange_2 + + DMB +# ifdef HAVE_STREXH + /* Use the half-word version of STREX if available. */ +0: ldrexh r2, [r0] + strexh r3, r1, [r0] + cmp r3, #0 + bne 0b + mov r0, r2 + DMB + RET +# else + /* Use the word version of STREX and perform alignment. */ +# ifdef HAVE_MOVW + movw r3, #0xffff @ create byte mask +# else + ldr r3, =#0xffff +# endif + lsrs r2, r0, #2 @ carry = (r0 & 2) +# ifdef __ARMEB__ +# define HI cc @ iff value is in the high half +# define LO cs @ iff the value is in the low half +# else +# define HI cs +# define LO cc +# endif + bic ip, r0, #3 @ align r0 into ip + itt HI + C(lsl,HI) r3, r3, #16 @ shift byte mask into place + C(lsl,HI) r1, r1, #16 @ shift new value into place + uadd16 r3, r3, r3 @ move byte mask into APSR.GE bits +0: + ldrex r2, [ip] + ite HI + C(uxth,HI) r0, r2, ror #16 @ return old value + C(uxth,LO) r0, r2 + sel r2, r1, r2 @ merge word with new value + strex r3, r2, [ip] + tst r3, r3 @ do not clobber carry + bne 0b + DMB + RET + +# endif /* HAVE_STREXH */ +ENDF __atomic_exchange_2 +ALIAS __sync_lock_test_and_set_2, __atomic_exchange_2 +#endif /* HAVE_STREX */ + +#if defined(HAVE_STREX) || defined(HAVE_SWP) +FUNC __atomic_exchange_4 + + DMB +# ifdef HAVE_STREX +0: ldrex r2, [r0] + strex r3, r1, [r0] + cmp r3, #0 + bne 0b + mov r0, r2 +# else + mov r2, r0 + swp r0, r1, [r2] +# endif + DMB + RET + +ENDF __atomic_exchange_4 +ALIAS __sync_lock_test_and_set_4, __atomic_exchange_4 +#endif /* HAVE_STREX || HAVE_SWP */ + +#ifdef HAVE_STREXD +FUNC __atomic_exchange_8 + + DMB + push { r4, r5 } +0: ldrexd r4, r5, [r0] + strexd r1, r2, r3, [r0] + cmp r1, #0 + bne 0b + mov r0, r4 + mov r1, r5 + DMB + pop { r4, r5 } + RET + +ENDF __atomic_exchange_8 +ALIAS __sync_lock_test_and_set_8, __atomic_exchange_8 +#endif /* HAVE_STREXD */ + +/* ---------------------------------------------------------------------- + ATOMIC_COMPARE_EXCHANGE + ----------------------------------------------------------------------*/ + +/* These functions have the interface + bool __atomic_compare_exchange_N(T *mem, T *old, T new, int ms, int mf); + + The versions that do not require forced alignment are inlined + by the compiler. They are present here for completeness only. + + We ignore the memory model parameters to these routines and always + implement a full barrier across the routine. */ + +#ifdef HAVE_STREX +FUNC __atomic_compare_exchange_1 +#ifdef HAVE_STREXB + + DMB + push { r4, lr } + ldrb r4, [r1] @ load old value + mov ip, r0 +0: ldrexb r0, [ip] + cmp r0, r4 + bne 1f + strexb r3, r2, [ip] + cmp r3, #0 + bne 0b +1: strb r0, [r1] @ return read value + ite ne + movne r0, #0 + moveq r0, #1 + DMB + pop { r4, pc } + +#else + + DMB + push { r4, r5, r6, lr } + ldrb r4, [r1] @ load old value + and r6, r0, #3 @ find byte misalignment + bic ip, r0, #3 @ align r0 into ip + s(lsl) r6, r6, #3 @ find bit misalignment +# ifdef __ARMEB__ + rsb r6, r6, #24 +# endif + s(mov) r5, #0xff + s(lsl) r4, r4, r6 @ shift old value in place + s(lsl) r5, r5, r6 @ shift byte mask in place +0: ldrex r2, [ip] + and r0, r2, r5 @ mask read value to compare + cmp r0, r4 + bne 1f + s(bic) r2, r2, r5 @ merge new value into word + s(orr) r2, r2, r4 + strex r3, r2, [ip] + cmp r3, #0 + bne 0b +1: lsr r0, r0, r6 @ return read value + strb r0, [r1] + ite ne + movne r0, #0 + moveq r0, #1 + DMB + pop { r4, r5, r6, pc } + +#endif /* HAVE_STREXB */ +ENDF __atomic_compare_exchange_1 + +FUNC __atomic_compare_exchange_2 +#ifdef HAVE_STREXB + + DMB + push { r4, lr } + ldrh r4, [r1] @ load old value + mov ip, r0 +0: ldrexh r0, [ip] + cmp r0, r4 + bne 1f + strexh r3, r2, [ip] + cmp r3, #0 + bne 0b +1: strh r0, [r1] @ return read value + ite ne + movne r0, #0 + moveq r0, #1 + DMB + pop { r4, pc } + +#else + + DMB + push { r4, r5, r6, lr } + ldrh r4, [r1] @ load old value +# ifdef HAVE_MOVW + movw r5, #0xffff @ create byte mask +# else + ldr r5, =#0xffff +# endif + and r6, r0, #2 @ find byte misalignment + bic ip, r0, #3 @ align r0 into ip + s(lsl) r6, r6, #3 @ find bit misalignment +# ifdef __ARMEB__ + eor r6, r6, #16 +# endif + s(lsl) r4, r4, r6 @ shift old value in place + s(lsl) r5, r5, r6 @ shift byte mask in place +0: ldrex r2, [ip] + and r0, r2, r5 @ mask read value to compare + cmp r0, r4 + bne 1f + s(bic) r2, r2, r5 @ merge new value into word + s(orr) r2, r2, r4 + strex r3, r2, [ip] + cmp r3, #0 + bne 0b +1: lsr r0, r0, r6 @ return read value + strh r0, [r1] + ite ne + movne r0, #0 + moveq r0, #1 + DMB + pop { r4, r5, r6, pc } + +#endif /* HAVE_STREXB */ +ENDF __atomic_compare_exchange_2 + +FUNC __atomic_compare_exchange_4 + + DMB + push { r4, lr } + mov ip, r0 + ldr r4, [r1] @ load old value +0: ldrex r0, [ip] + cmp r0, r4 + bne 1f + strex r3, r2, [ip] + cmp r3, #0 + bne 0b +1: str r0, [r1] @ return old value + ite ne + movne r0, #0 + moveq r0, #1 + DMB + pop { r4, pc } + +ENDF __atomic_compare_exchange_4 +#endif /* HAVE_STREX */ + +#ifdef HAVE_STREXD +FUNC __atomic_compare_exchange_8 + + DMB + push { r4, r5, r6, lr } + ldrd r4, r5, [r1] @ load old value + mov ip, r0 @ save pointers + mov lr, r1 +0: ldrexd r0, r1, [ip] + cmp r0, r4 + it eq + cmpeq r1, r2 + bne 1f + strexd r6, r2, r3, [r0] + cmp r6, #0 + bne 0b +1: strd r0, r1, [lr] @ save read value + ite ne + movne r0, #0 + moveq r0, #1 + DMB + pop { r4, r5, r6, pc } + +ENDF __atomic_compare_exchange_8 +#endif /* HAVE_STREXD */ diff --git a/libgcc/config/arm/t-atomic b/libgcc/config/arm/t-atomic new file mode 100644 index 0000000..977f09d --- /dev/null +++ b/libgcc/config/arm/t-atomic @@ -0,0 +1 @@ +LIB2ADD_ST += $(srcdir)/config/arm/native-atomic.S