Hi Richard, Comments inline below..
On Sat, Dec 10, 2011 at 03:21:23PM -0800, Richard Henderson wrote: > --- > libitm/Makefile.am | 3 + > libitm/Makefile.in | 20 +++-- > libitm/config/arm/hwcap.cc | 67 +++++++++++++++++ > libitm/config/arm/hwcap.h | 41 ++++++++++ > libitm/config/arm/sjlj.S | 135 > ++++++++++++++++++++++++++++++++++ > libitm/config/arm/target.h | 62 ++++++++++++++++ > libitm/config/generic/asmcfi.h | 13 ++- > libitm/config/linux/arm/futex_bits.h | 48 ++++++++++++ > libitm/configure | 18 ++++- > libitm/configure.ac | 1 + > libitm/configure.tgt | 2 + > 11 files changed, 395 insertions(+), 15 deletions(-) > create mode 100644 libitm/config/arm/hwcap.cc > create mode 100644 libitm/config/arm/hwcap.h > create mode 100644 libitm/config/arm/sjlj.S > create mode 100644 libitm/config/arm/target.h > create mode 100644 libitm/config/linux/arm/futex_bits.h > > diff --git a/libitm/Makefile.am b/libitm/Makefile.am > index 26e1ebc..d417026 100644 > --- a/libitm/Makefile.am > +++ b/libitm/Makefile.am > @@ -62,6 +62,9 @@ libitm_la_SOURCES = \ > query.cc retry.cc rwlock.cc useraction.cc util.cc \ > sjlj.S tls.cc method-serial.cc method-gl.cc > > +if ARCH_ARM > +libitm_la_SOURCES += hwcap.cc > +endif > if ARCH_X86 > libitm_la_SOURCES += x86_sse.cc x86_avx.cc > x86_sse.lo : XCFLAGS += -msse > diff --git a/libitm/Makefile.in b/libitm/Makefile.in > index dc77382..5305f4c 100644 > --- a/libitm/Makefile.in > +++ b/libitm/Makefile.in > @@ -36,8 +36,9 @@ POST_UNINSTALL = : > build_triplet = @build@ > host_triplet = @host@ > target_triplet = @target@ > -@ARCH_X86_TRUE@am__append_1 = x86_sse.cc x86_avx.cc > -@ARCH_FUTEX_TRUE@am__append_2 = futex.cc > +@ARCH_ARM_TRUE@am__append_1 = hwcap.cc > +@ARCH_X86_TRUE@am__append_2 = x86_sse.cc x86_avx.cc > +@ARCH_FUTEX_TRUE@am__append_3 = futex.cc > subdir = . > DIST_COMMON = $(am__configure_deps) $(srcdir)/../config.guess \ > $(srcdir)/../config.sub $(srcdir)/../depcomp \ > @@ -99,15 +100,16 @@ libitm_la_LIBADD = > am__libitm_la_SOURCES_DIST = aatree.cc alloc.cc alloc_c.cc \ > alloc_cpp.cc barrier.cc beginend.cc clone.cc eh_cpp.cc \ > local.cc query.cc retry.cc rwlock.cc useraction.cc util.cc \ > - sjlj.S tls.cc method-serial.cc method-gl.cc x86_sse.cc \ > - x86_avx.cc futex.cc > -@ARCH_X86_TRUE@am__objects_1 = x86_sse.lo x86_avx.lo > -@ARCH_FUTEX_TRUE@am__objects_2 = futex.lo > + sjlj.S tls.cc method-serial.cc method-gl.cc hwcap.cc \ > + x86_sse.cc x86_avx.cc futex.cc > +@ARCH_ARM_TRUE@am__objects_1 = hwcap.lo > +@ARCH_X86_TRUE@am__objects_2 = x86_sse.lo x86_avx.lo > +@ARCH_FUTEX_TRUE@am__objects_3 = futex.lo > am_libitm_la_OBJECTS = aatree.lo alloc.lo alloc_c.lo alloc_cpp.lo \ > barrier.lo beginend.lo clone.lo eh_cpp.lo local.lo query.lo \ > retry.lo rwlock.lo useraction.lo util.lo sjlj.lo tls.lo \ > method-serial.lo method-gl.lo $(am__objects_1) \ > - $(am__objects_2) > + $(am__objects_2) $(am__objects_3) > libitm_la_OBJECTS = $(am_libitm_la_OBJECTS) > DEFAULT_INCLUDES = -I.@am__isrc@ > depcomp = $(SHELL) $(top_srcdir)/../depcomp > @@ -376,7 +378,8 @@ libitm_la_LDFLAGS = $(libitm_version_info) > $(libitm_version_script) > libitm_la_SOURCES = aatree.cc alloc.cc alloc_c.cc alloc_cpp.cc \ > barrier.cc beginend.cc clone.cc eh_cpp.cc local.cc query.cc \ > retry.cc rwlock.cc useraction.cc util.cc sjlj.S tls.cc \ > - method-serial.cc method-gl.cc $(am__append_1) $(am__append_2) > + method-serial.cc method-gl.cc $(am__append_1) $(am__append_2) \ > + $(am__append_3) > > # Automake Documentation: > # If your package has Texinfo files in many directories, you can use the > @@ -505,6 +508,7 @@ distclean-compile: > @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clone.Plo@am__quote@ > @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/eh_cpp.Plo@am__quote@ > @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/futex.Plo@am__quote@ > +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hwcap.Plo@am__quote@ > @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/local.Plo@am__quote@ > @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/method-gl.Plo@am__quote@ > @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/method-serial.Plo@am__quote@ > diff --git a/libitm/config/arm/hwcap.cc b/libitm/config/arm/hwcap.cc > new file mode 100644 > index 0000000..007c10e > --- /dev/null > +++ b/libitm/config/arm/hwcap.cc > @@ -0,0 +1,67 @@ > +/* Copyright (C) 2011 Free Software Foundation, Inc. > + Contributed by Richard Henderson <r...@redhat.com>. > + > + This file is part of the GNU Transactional Memory Library (libitm). > + > + Libitm is free software; you can redistribute it and/or modify it > + under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3 of the License, or > + (at your option) any later version. > + > + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY > + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS > + FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + more details. > + > + Under Section 7 of GPL version 3, you are granted additional > + permissions described in the GCC Runtime Library Exception, version > + 3.1, as published by the Free Software Foundation. > + > + You should have received a copy of the GNU General Public License and > + a copy of the GCC Runtime Library Exception along with this program; > + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > + <http://www.gnu.org/licenses/>. */ > + > +/* This file initializes GTM_hwcap in some os-specific way to indicate > + what ISA extensions are present for ARM. */ > + > +#include "libitm_i.h" > +#include "hwcap.h" > + > +/* Begin by defaulting to whatever options were given to the compiler. */ > +int GTM_hwcap HIDDEN = 0 > +#ifdef __VFP_FP__ > + | HWCAP_ARM_VFP > +#endif > +#ifdef __IWMMXT__ > + | HWCAP_ARM_IWMMXT > +#endif > + ; > + > +#ifdef __linux__ > +#include <unistd.h> > +#include <sys/fcntl.h> > +#include <elf.h> > + > +static void __attribute__((constructor)) > +init_gtm_hwcap(void) > +{ > + int fd = open ("/proc/self/auxv", O_RDONLY); > + if (fd < 0) > + return; > + > + Elf32_auxv_t pairs[512]; > + ssize_t rlen = read (fd, pairs, sizeof(pairs)); > + close (fd); > + if (rlen < 0) > + return; > + > + size_t n = (size_t)rlen / sizeof(pairs[0]); > + for (size_t i = 0; i < n; ++i) > + if (pairs[i].a_type == AT_HWCAP) > + { > + GTM_hwcap = pairs[i].a_un.a_val; > + return; > + } > +} > +#endif > diff --git a/libitm/config/arm/hwcap.h b/libitm/config/arm/hwcap.h > new file mode 100644 > index 0000000..16e4034 > --- /dev/null > +++ b/libitm/config/arm/hwcap.h > @@ -0,0 +1,41 @@ > +/* Copyright (C) 2011 Free Software Foundation, Inc. > + Contributed by Richard Henderson <r...@redhat.com>. > + > + This file is part of the GNU Transactional Memory Library (libitm). > + > + Libitm is free software; you can redistribute it and/or modify it > + under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3 of the License, or > + (at your option) any later version. > + > + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY > + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS > + FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + more details. > + > + Under Section 7 of GPL version 3, you are granted additional > + permissions described in the GCC Runtime Library Exception, version > + 3.1, as published by the Free Software Foundation. > + > + You should have received a copy of the GNU General Public License and > + a copy of the GCC Runtime Library Exception along with this program; > + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > + <http://www.gnu.org/licenses/>. */ > + > +/* The following must match the kernel's <asm/procinfo.h>. */ > +#define HWCAP_ARM_SWP 1 > +#define HWCAP_ARM_HALF 2 > +#define HWCAP_ARM_THUMB 4 > +#define HWCAP_ARM_26BIT 8 > +#define HWCAP_ARM_FAST_MULT 16 > +#define HWCAP_ARM_FPA 32 > +#define HWCAP_ARM_VFP 64 > +#define HWCAP_ARM_EDSP 128 > +#define HWCAP_ARM_JAVA 256 > +#define HWCAP_ARM_IWMMXT 512 > +#define HWCAP_ARM_CRUNCH 1024 > +#define HWCAP_ARM_THUMBEE 2048 > +#define HWCAP_ARM_NEON 4096 > +#define HWCAP_ARM_VFPv3 8192 > +#define HWCAP_ARM_VFPv3D16 16384 > + > diff --git a/libitm/config/arm/sjlj.S b/libitm/config/arm/sjlj.S > new file mode 100644 > index 0000000..cf2ae4b > --- /dev/null > +++ b/libitm/config/arm/sjlj.S > @@ -0,0 +1,135 @@ > +/* Copyright (C) 2011 Free Software Foundation, Inc. > + Contributed by Richard Henderson <r...@redhat.com>. > + > + This file is part of the GNU Transactional Memory Library (libitm). > + > + Libitm is free software; you can redistribute it and/or modify it > + under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3 of the License, or > + (at your option) any later version. > + > + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY > + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS > + FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + more details. > + > + Under Section 7 of GPL version 3, you are granted additional > + permissions described in the GCC Runtime Library Exception, version > + 3.1, as published by the Free Software Foundation. > + > + You should have received a copy of the GNU General Public License and > + a copy of the GCC Runtime Library Exception along with this program; > + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > + <http://www.gnu.org/licenses/>. */ > + > +#include "hwcap.h" > +#include "asmcfi.h" > + > + .syntax unified > + > +#if defined(__thumb__) > + .thumb > + .thumb_func > +#endif This ain't gonna cut it for Thumb1 - Thus replace that with if defined(__thumb2) .syntax unified and Thumb1 don't cut it together unfortunately . Also the ldc instructions wouldn't work at Thumb1. > + > +/* ??? Use movw/movt when possible. */ You can use movw/movt with this complex condition I think #if (defined (thumb) && defined(__ARM_ARCH_6T2__)) || defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__) || defined (__ARM_ARCH_7M__) > +#if defined(PIC) > +.macro ldaddr reg, addr > + ldr \reg, 99f > + ldr \reg, [\reg] > +98: add \reg, \reg, pc > +.subsection 1 > +99: .word \addr - (98b + 8) This can't be right for Thumb2 state . PC is PC + 4 for Thumb2 unless I'm missing something right now. > +.subsection 0 > +.endm > +#else > +.macro ldaddr reg, addr > + ldr \reg, =\addr > +.endm > +#endif > + > + .text > + .align 2 > + .global _ITM_beginTransaction > + .type _ITM_beginTransaction, %function > + > +_ITM_beginTransaction: > + .fnstart > + cfi_startproc > +#ifdef __thumb__ > + mov ip, sp > + push { r4-r11, ip, lr } > +#else > + push { r4-r11, sp, lr } The assembler should have warned this case as well since this really is an UNPREDICTABLE instruction. SP can't really be in the register list for a push. Thus you shouldn't need a __thumb__ case. > +#endif > + .save { lr } > + .pad #(9*4) > + cfi_adjust_cfa_offset(40) > + cfi_rel_offset(lr, 36) > + sub sp, sp, #(14*8) > + .pad #(14*8) > + cfi_adjust_cfa_offset(14*8) > + > + ldaddr r2, GTM_hwcap > + > + /* Store the VFP registers. Don't use VFP instructions directly > + because this code is used in non-VFP multilibs. */ > + tst r2, #HWCAP_ARM_VFP > + it ne > + stcne p11, cr8, [sp], {16} /* vstmne sp, {d8-d15} */ > + > + /* Save the call-preserved iWMMXt registers. */ > + tst r2, #HWCAP_ARM_IWMMXT > + beq 1f > + stcl p1, cr10, [sp, #64] /* wstrd wr10, [sp, #64] */ > + stcl p1, cr11, [sp, #72] > + stcl p1, cr12, [sp, #80] > + stcl p1, cr13, [sp, #88] > + stcl p1, cr14, [sp, #96] > + stcl p1, cr15, [sp, #104] > +1: > + /* Invoke GTM_begin_transaction with the struct we just built. */ > + mov r1, sp > + bl GTM_begin_transaction > + > + /* Return; we don't need to restore any of the call-saved regs. */ > + add sp, sp, #(14*8 + 9*4) > + cfi_adjust_cfa_offset(-(14*8 + 9*4)) > + pop { pc } > + .fnend > + cfi_endproc > + .size _ITM_beginTransaction, . - _ITM_beginTransaction > + > + .global GTM_longjmp > + .hidden GTM_longjmp > + .type GTM_longjmp, %function > + > +GTM_longjmp: > + ldaddr r2, GTM_hwcap > + > + tst r2, #HWCAP_ARM_VFP > + it ne > + ldcne p11, cr8, [r1], {16} /* vldmiane r1, {d8-d15} */ > + > + tst r2, #HWCAP_ARM_IWMMXT > + beq 1f > + ldcl p1, cr10, [r1, #64] /* wldrd wr10, [r1, #64] */ > + ldcl p1, cr11, [r1, #72] > + ldcl p1, cr12, [r1, #80] > + ldcl p1, cr13, [r1, #88] > + ldcl p1, cr14, [r1, #96] > + ldcl p1, cr15, [r1, #104] > +1: > + add r1, r1, #(14*8) /* Skip both VFP and iWMMXt blocks */ > +#ifdef __thumb__ > + ldm r1, { r4-r11, ip, lr } > + mov sp, ip > + bx lr > +#else > + ldm r1, { r4-r11, sp, pc } > +#endif > + .size GTM_longjmp, . - GTM_longjmp > + > +#ifdef __linux__ > +.section .note.GNU-stack, "", %progbits > +#endif > diff --git a/libitm/config/arm/target.h b/libitm/config/arm/target.h > new file mode 100644 > index 0000000..99dd99a > --- /dev/null > +++ b/libitm/config/arm/target.h > @@ -0,0 +1,62 @@ > +/* Copyright (C) 2011 Free Software Foundation, Inc. > + Contributed by Richard Henderson <r...@redhat.com>. > + > + This file is part of the GNU Transactional Memory Library (libitm). > + > + Libitm is free software; you can redistribute it and/or modify it > + under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3 of the License, or > + (at your option) any later version. > + > + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY > + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS > + FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + more details. > + > + Under Section 7 of GPL version 3, you are granted additional > + permissions described in the GCC Runtime Library Exception, version > + 3.1, as published by the Free Software Foundation. > + > + You should have received a copy of the GNU General Public License and > + a copy of the GCC Runtime Library Exception along with this program; > + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > + <http://www.gnu.org/licenses/>. */ > + > +namespace GTM HIDDEN { > + > +typedef struct gtm_jmpbuf > +{ > + unsigned long long vfp[8]; /* d8-d15 */ > + unsigned long long iwmmxt[6]; /* cr10-cr15 */ > + unsigned long gr[8]; /* r4-r11 */ > + void *cfa; > + unsigned long pc; > +} gtm_jmpbuf; > + > +/* ARM generally uses a fixed page size of 4K. */< > +#define PAGE_SIZE 4096 > +#define FIXED_PAGE_SIZE 1 > + > +/* ??? The size of one line in hardware caches (in bytes). */ > +#define HW_CACHELINE_SIZE 64 > + > +static inline void > +cpu_relax (void) > +{ > + /* ??? Maybe use WFE. */ > + __asm volatile ("" : : : "memory"); > +} Should this be similar to cpu_relax in the kernel ? On some cores I thought we'd end up having to do have to use __sync_synchronize. > + > +static inline void > +atomic_read_barrier (void) > +{ > + __sync_synchronize (); > +} > + > +static inline void > +atomic_write_barrier (void) > +{ > + __sync_synchronize (); > +} > + > +} // namespace GTM > diff --git a/libitm/config/generic/asmcfi.h b/libitm/config/generic/asmcfi.h > index 4344d6f..bb7f98f 100644 > --- a/libitm/config/generic/asmcfi.h > +++ b/libitm/config/generic/asmcfi.h > @@ -27,16 +27,19 @@ > > #ifdef HAVE_AS_CFI_PSEUDO_OP > > -#define cfi_startproc .cfi_startproc > -#define cfi_endproc .cfi_endproc > -#define cfi_def_cfa_offset(n) .cfi_def_cfa_offset n > -#define cfi_def_cfa(r,n) .cfi_def_cfa r, n > -#define cfi_register(o,n) .cfi_register o, n > +#define cfi_startproc .cfi_startproc > +#define cfi_endproc .cfi_endproc > +#define cfi_adjust_cfa_offset(n) .cfi_adjust_cfa_offset n > +#define cfi_def_cfa_offset(n) .cfi_def_cfa_offset n > +#define cfi_def_cfa(r,n) .cfi_def_cfa r, n > +#define cfi_rel_offset(r,o) .cfi_rel_offset r, o > +#define cfi_register(o,n) .cfi_register o, n > > #else > > #define cfi_startproc > #define cfi_endproc > +#define cfi_adjust_cfa_offset(n) > #define cfi_def_cfa_offset(n) > #define cfi_def_cfa(r,n) > #define cfi_register(o,n) > diff --git a/libitm/config/linux/arm/futex_bits.h > b/libitm/config/linux/arm/futex_bits.h > new file mode 100644 > index 0000000..7e1b52f > --- /dev/null > +++ b/libitm/config/linux/arm/futex_bits.h > @@ -0,0 +1,48 @@ > +/* Copyright (C) 2011 Free Software Foundation, Inc. > + Contributed by Richard Henderson <r...@redhat.com>. > + > + This file is part of the GNU Transactional Memory Library (libitm). > + > + Libitm is free software; you can redistribute it and/or modify it > + under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3 of the License, or > + (at your option) any later version. > + > + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY > + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS > + FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + more details. > + > + Under Section 7 of GPL version 3, you are granted additional > + permissions described in the GCC Runtime Library Exception, version > + 3.1, as published by the Free Software Foundation. > + > + You should have received a copy of the GNU General Public License and > + a copy of the GCC Runtime Library Exception along with this program; ou> + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > + <http://www.gnu.org/licenses/>. */ > + > +/* Provide target-specific access to the futex system call. */ > + > +#include <sys/syscall.h> > + > +static inline long > +sys_futex0 (int *addr, long op, long val) > +{ > + register long sc_0 __asm__("r0"); > + register long sc_1 __asm__("r1"); > + register long sc_2 __asm__("r2"); > + register long sc_3 __asm__("r3"); > + > + sc_0 = (long) addr; > + sc_1 = op; > + sc_2 = val; > + sc_3 = 0; > + > + __asm volatile ("swi %1" > + : "+r"(sc_0) > + : "i"(SYS_futex), "r"(sc_1), "r"(sc_2), "r"(sc_3) > + : "memory"); > + > + return sc_0; > +} Hmmm why is this still using the old syscall interface ? I see that Joseph commented on this in a previous review. Thus I don't think this is right as it stands today. cheers Ramana