On 10/12/11 23:21, Richard Henderson wrote: > diff --git a/libitm/config/arm/hwcap.h b/libitm/config/arm/hwcap.h > new file mode 100644 > index 0000000..16e4034 > --- /dev/null > +++ b/libitm/config/arm/hwcap.h > + > +/* The following must match the kernel's <asm/procinfo.h>. */ > +#define HWCAP_ARM_SWP 1
All of these defines are linux-specific. Shouldn't they be wrapped as such? > diff --git a/libitm/config/arm/sjlj.S b/libitm/config/arm/sjlj.S > new file mode 100644 > index 0000000..cf2ae4b > --- /dev/null > +++ b/libitm/config/arm/sjlj.S > +/* ??? Use movw/movt when possible. */ > +#if defined(PIC) > +.macro ldaddr reg, addr > + ldr \reg, 99f > + ldr \reg, [\reg] > +98: add \reg, \reg, pc > +.subsection 1 > +99: .word \addr - (98b + 8) > +.subsection 0 > +.endm That can't be right. The first instruction loads from your literal block an address difference, then your second instruction tries to use that as an address. Why don't you just write .macro ldaddr reg, addr ldr \reg, =(\addr - 99b + PC_OFFS) 99: add \reg, \reg, pc .endm > + /* Store the VFP registers. Don't use VFP instructions directly > + because this code is used in non-VFP multilibs. */ > + tst r2, #HWCAP_ARM_VFP > + it ne > + stcne p11, cr8, [sp], {16} /* vstmne sp, {d8-d15} */ You shouldn't rely on the conditional STC instruction not trapping if the condition test fails. Conditionally NOT executing an instruction is implementation defined if the instruction itself is undefined. Just branch round it as you've done for the other instructions below. > + > +GTM_longjmp: > + ldaddr r2, GTM_hwcap > + > + tst r2, #HWCAP_ARM_VFP > + it ne > + ldcne p11, cr8, [r1], {16} /* vldmiane r1, {d8-d15} */ As above. > + > + tst r2, #HWCAP_ARM_IWMMXT > + beq 1f > + ldcl p1, cr10, [r1, #64] /* wldrd wr10, [r1, #64] */ > + ldcl p1, cr11, [r1, #72] > + ldcl p1, cr12, [r1, #80] > + ldcl p1, cr13, [r1, #88] > + ldcl p1, cr14, [r1, #96] > + ldcl p1, cr15, [r1, #104] > +1: > + add r1, r1, #(14*8) /* Skip both VFP and iWMMXt blocks */ > +#ifdef __thumb__ > + ldm r1, { r4-r11, ip, lr } > + mov sp, ip > + bx lr > +#else > + ldm r1, { r4-r11, sp, pc } Isn't this buffer on the stack? And won't loading SP cause the buffer to become deallocated? If so, then you need to use the same code as Thumb to avoid undefined behaviour if the instruction is interrupted before completion (since SP won't be restored). Otherwise, with the changes for Ramana's comments, this is OK. R.