Hi Dave, Could you split this further into a patch that deals with the case for disabling MCR memory barriers for Thumb1 so that it maybe backported to the release branches ? I have commented inline as well.
Could you also provide a proper changelog entry for this that will also help with review of the patch ? I've not yet managed to fully review all the bits in this patch but here's some initial comments that should be looked at. On 1 July 2011 16:54, Dr. David Alan Gilbert <david.gilb...@linaro.org> wrote: > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 057f9ba..39057d2 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > > /* Emit a strex{b,h,d, } instruction appropriate for the specified > @@ -23374,14 +23388,29 @@ arm_output_strex (emit_f emit, > rtx value, > rtx memory) > { > - const char *suffix = arm_ldrex_suffix (mode); > - rtx operands[3]; > + rtx operands[4]; > > operands[0] = result; > operands[1] = value; > - operands[2] = memory; > - arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", > suffix, > - cc); > + if (mode != DImode) > + { > + const char *suffix = arm_ldrex_suffix (mode); > + operands[2] = memory; > + arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", > + suffix, cc); > + } > + else > + { > + /* The restrictions on target registers in ARM mode are that the two > + registers are consecutive and the first one is even; Thumb is > + actually more flexible, but DI should give us this anyway. > + Note that the 1st register always gets the lowest word in memory. */ > + gcc_assert ((REGNO (value) & 1) == 0); > + operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1); > + operands[3] = memory; > + arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, > %%C3", > + cc); > + } > } > > /* Helper to emit a two operand instruction. */ > @@ -23423,7 +23452,7 @@ arm_output_op3 (emit_f emit, const char *mnemonic, > rtx d, rtx a, rtx b) > > required_value: > > - RTX register or const_int representing the required old_value for > + RTX register representing the required old_value for > the modify to continue, if NULL no comparsion is performed. */ > static void > arm_output_sync_loop (emit_f emit, > @@ -23437,7 +23466,13 @@ arm_output_sync_loop (emit_f emit, > enum attr_sync_op sync_op, > int early_barrier_required) > { > - rtx operands[1]; > + rtx operands[2]; > + /* We'll use the lo for the normal rtx in the none-DI case > + as well as the least-sig word in the DI case. */ > + rtx old_value_lo, required_value_lo, new_value_lo, t1_lo; > + rtx old_value_hi, required_value_hi, new_value_hi, t1_hi; > + > + bool is_di = mode == DImode; > > gcc_assert (t1 != t2); > > @@ -23448,82 +23483,142 @@ arm_output_sync_loop (emit_f emit, > > arm_output_ldrex (emit, mode, old_value, memory); > > + if (is_di) > + { > + old_value_lo = gen_lowpart (SImode, old_value); > + old_value_hi = gen_highpart (SImode, old_value); > + if (required_value) > + { > + required_value_lo = gen_lowpart (SImode, required_value); > + required_value_hi = gen_highpart (SImode, required_value); > + } > + else > + { > + /* Silence false potentially unused warning */ > + required_value_lo = NULL; > + required_value_hi = NULL; > + } > + new_value_lo = gen_lowpart (SImode, new_value); > + new_value_hi = gen_highpart (SImode, new_value); > + t1_lo = gen_lowpart (SImode, t1); > + t1_hi = gen_highpart (SImode, t1); > + } > + else > + { > + old_value_lo = old_value; > + new_value_lo = new_value; > + required_value_lo = required_value; > + t1_lo = t1; > + > + /* Silence false potentially unused warning */ > + t1_hi = NULL; > + new_value_hi = NULL; > + required_value_hi = NULL; > + old_value_hi = NULL; > + } > + > if (required_value) > { > - rtx operands[2]; > + operands[0] = old_value_lo; > + operands[1] = required_value_lo; > > - operands[0] = old_value; > - operands[1] = required_value; > arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1"); > + if (is_di) > + { > + arm_output_asm_insn (emit, 0, operands, "it\teq"); This should be guarded with a if (TARGET_THUMB2) - there's no point in accounting for the length of this instruction in the compiler and then have the assembler fold it away in ARM state. > > diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h > index c32ef1a..3fdd22f 100644 > --- a/gcc/config/arm/arm.h > +++ b/gcc/config/arm/arm.h > @@ -282,7 +282,8 @@ extern void > (*arm_lang_output_object_attributes_hook)(void); > -#define TARGET_HAVE_DMB_MCR (arm_arch6k && ! TARGET_HAVE_DMB) > +#define TARGET_HAVE_DMB_MCR (arm_arch6k && ! TARGET_HAVE_DMB \ > + && ! TARGET_THUMB1) This hunk (TARGET_HAVE_DMB_MCR) should probably be backported to release branches because this is technically fixing an issue and hence should be a separate patch that can be looked at separately. > > /* Nonzero if this chip implements a memory barrier instruction. */ > #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR) > @@ -290,8 +291,12 @@ extern void > (*arm_lang_output_object_attributes_hook)(void); sync.md changes - > (define_mode_iterator NARROW [QI HI]) >+(define_mode_iterator QHSD [QI HI SI DI]) >+(define_mode_iterator SIDI [SI DI]) >+ >+(define_mode_attr sync_predtab [(SI "TARGET_HAVE_LDREX && >TARGET_HAVE_MEMORY_BARRIER") >+ (QI "TARGET_HAVE_LDREXBH && >TARGET_HAVE_MEMORY_BARRIER") >+ (HI "TARGET_HAVE_LDREXBH && >TARGET_HAVE_MEMORY_BARRIER") >+ (DI "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN >&& TARGET_HAVE_MEMORY_BARRIER")]) >+ Can we move all the iterators to iterators.md and then arrange includes to work automatically ? Minor nit - could you align the entries for QI, HI and DI with the start of the SI ? >+(define_mode_attr sync_atleastsi [(SI "SI") >+ (DI "DI") >+ (HI "SI") >+ (QI "SI")]) > I couldn't spot where this was being used. Can this be removed if not necessary ? > >-(define_insn "arm_sync_new_nandsi" >+(define_insn "arm_sync_new_<sync_optab><mode>" > [(set (match_operand:SI 0 "s_register_operand" "=&r") >- (unspec_volatile:SI [(not:SI (and:SI >- (match_operand:SI 1 "arm_sync_memory_operand" >"+Q") >- (match_operand:SI 2 "s_register_operand" "r"))) >- ] >- VUNSPEC_SYNC_NEW_OP)) >+ (unspec_volatile:SI [(syncop:SI >+ (zero_extend:SI >+ (match_operand:NARROW 1 >"arm_sync_memory_operand" "+Q")) >+ (match_operand:SI 2 "s_register_operand" "r")) >+ ] >+ VUNSPEC_SYNC_NEW_OP)) > (set (match_dup 1) >- (unspec_volatile:SI [(match_dup 1) (match_dup 2)] >- VUNSPEC_SYNC_NEW_OP)) >+ (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] >+ VUNSPEC_SYNC_NEW_OP)) > (clobber (reg:CC CC_REGNUM)) > (clobber (match_scratch:SI 3 "=&r"))] >- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" >+ "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER" Can't this just use <sync_predtab> instead since the condition is identical for QImode and HImode from that mode attribute and in quite a few places below. ? >@@ -461,19 +359,19 @@ > (unspec_volatile:SI > [(not:SI > (and:SI >- (zero_extend:SI >- (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) >- (match_operand:SI 2 "s_register_operand" "r"))) >+ (zero_extend:SI >+ (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) >+ (match_operand:SI 2 "s_register_operand" "r"))) > ] VUNSPEC_SYNC_NEW_OP)) > (set (match_dup 1) > (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] >- VUNSPEC_SYNC_NEW_OP)) >+ VUNSPEC_SYNC_NEW_OP)) > (clobber (reg:CC CC_REGNUM)) > (clobber (match_scratch:SI 3 "=&r"))] >- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" >+ "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER" Again here . Not sure which pattern this is though just looking at the patch. > { > return arm_output_sync_insn (insn, operands); >- } >+ } > [(set_attr "sync_result" "0") > (set_attr "sync_memory" "1") > (set_attr "sync_new_value" "2") >@@ -483,20 +381,20 @@ > (set_attr "conds" "clob") > (set_attr "predicable" "no")]) > >-(define_insn "arm_sync_old_<sync_optab>si" >- [(set (match_operand:SI 0 "s_register_operand" "=&r") >- (unspec_volatile:SI [(syncop:SI >- (match_operand:SI 1 "arm_sync_memory_operand" >"+Q") >- (match_operand:SI 2 "s_register_operand" "r")) >- ] >- VUNSPEC_SYNC_OLD_OP)) >+(define_insn "arm_sync_old_<sync_optab><mode>" >+ [(set (match_operand:SIDI 0 "s_register_operand" "=&r") >+ (unspec_volatile:SIDI [(syncop:SIDI >+ (match_operand:SIDI 1 "arm_sync_memory_operand" >"+Q") >+ (match_operand:SIDI 2 "s_register_operand" "r")) >+ ] >+ VUNSPEC_SYNC_OLD_OP)) > (set (match_dup 1) >- (unspec_volatile:SI [(match_dup 1) (match_dup 2)] >- VUNSPEC_SYNC_OLD_OP)) >+ (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] >+ VUNSPEC_SYNC_OLD_OP)) > (clobber (reg:CC CC_REGNUM)) >- (clobber (match_scratch:SI 3 "=&r")) >+ (clobber (match_scratch:SIDI 3 "=&r")) > (clobber (match_scratch:SI 4 "<sync_clobber>"))] >- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" >+ "<sync_predtab>" > { > return arm_output_sync_insn (insn, operands); > } >@@ -509,20 +407,21 @@ > (set_attr "conds" "clob") > (set_attr "predicable" "no")]) > >-(define_insn "arm_sync_old_nandsi" >+(define_insn "arm_sync_old_<sync_optab><mode>" > [(set (match_operand:SI 0 "s_register_operand" "=&r") >- (unspec_volatile:SI [(not:SI (and:SI >- (match_operand:SI 1 "arm_sync_memory_operand" >"+Q") >- (match_operand:SI 2 "s_register_operand" "r"))) >- ] >- VUNSPEC_SYNC_OLD_OP)) >+ (unspec_volatile:SI [(syncop:SI >+ (zero_extend:SI >+ (match_operand:NARROW 1 >"arm_sync_memory_operand" "+Q")) >+ (match_operand:SI 2 "s_register_operand" "r")) >+ ] >+ VUNSPEC_SYNC_OLD_OP)) > (set (match_dup 1) >- (unspec_volatile:SI [(match_dup 1) (match_dup 2)] >- VUNSPEC_SYNC_OLD_OP)) >+ (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] >+ VUNSPEC_SYNC_OLD_OP)) > (clobber (reg:CC CC_REGNUM)) > (clobber (match_scratch:SI 3 "=&r")) >- (clobber (match_scratch:SI 4 "=&r"))] >- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" >+ (clobber (match_scratch:SI 4 "<sync_clobber>"))] >+ "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER" Likewise for sync_predtab. >-(define_insn "arm_sync_old_<sync_optab><mode>" >- [(set (match_operand:SI 0 "s_register_operand" "=&r") >- (unspec_volatile:SI [(syncop:SI >- (zero_extend:SI >- (match_operand:NARROW 1 >"arm_sync_memory_operand" "+Q")) >- (match_operand:SI 2 "s_register_operand" "r")) >- ] >- VUNSPEC_SYNC_OLD_OP)) >+(define_insn "arm_sync_old_nand<mode>" >+ [(set (match_operand:SIDI 0 "s_register_operand" "=&r") >+ (unspec_volatile:SIDI [(not:SIDI (and:SIDI >+ (match_operand:SIDI 1 "arm_sync_memory_operand" >"+Q") >+ (match_operand:SIDI 2 "s_register_operand" "r"))) >+ ] >+ VUNSPEC_SYNC_OLD_OP)) > (set (match_dup 1) >- (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] >+ (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] > VUNSPEC_SYNC_OLD_OP)) > (clobber (reg:CC CC_REGNUM)) >- (clobber (match_scratch:SI 3 "=&r")) >- (clobber (match_scratch:SI 4 "<sync_clobber>"))] >- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" >+ (clobber (match_scratch:SIDI 3 "=&r")) >+ (clobber (match_scratch:SI 4 "=&r"))] >+ "<sync_predtab>" > { > return arm_output_sync_insn (insn, operands); > } >@@ -557,26 +455,26 @@ > (set_attr "sync_memory" "1") > (set_attr "sync_new_value" "2") > (set_attr "sync_t1" "3") >- (set_attr "sync_t2" "<sync_t2_reqd>") >- (set_attr "sync_op" "<sync_optab>") >+ (set_attr "sync_t2" "4") >+ (set_attr "sync_op" "nand") > (set_attr "conds" "clob") > (set_attr "predicable" "no")]) > > (define_insn "arm_sync_old_nand<mode>" > [(set (match_operand:SI 0 "s_register_operand" "=&r") >- (unspec_volatile:SI [(not:SI (and:SI >- (zero_extend:SI >- (match_operand:NARROW 1 >"arm_sync_memory_operand" "+Q")) >- (match_operand:SI 2 "s_register_operand" "r"))) >- ] >- VUNSPEC_SYNC_OLD_OP)) >+ (unspec_volatile:SI [(not:SI (and:SI >+ (zero_extend:SI >+ (match_operand:NARROW 1 >"arm_sync_memory_operand" "+Q")) >+ (match_operand:SI 2 "s_register_operand" "r"))) <>+ ] >+ VUNSPEC_SYNC_OLD_OP)) > (set (match_dup 1) >- (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] >- VUNSPEC_SYNC_OLD_OP)) >+ (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] >+ VUNSPEC_SYNC_OLD_OP)) > (clobber (reg:CC CC_REGNUM)) > (clobber (match_scratch:SI 3 "=&r")) > (clobber (match_scratch:SI 4 "=&r"))] >- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" >+ "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER" > { > return arm_output_sync_insn (insn, operands); > } Cheers Ramana