Hi Dave,

Could you split this further into a patch that deals with the
case for disabling MCR memory barriers for Thumb1 so that it
maybe backported to the release branches ? I have commented inline
as well.

Could you also provide a proper changelog entry for this that will
also help with review of the patch ?

I've not yet managed to fully review all the bits in this patch but
here's some initial comments that should be looked at.

On 1 July 2011 16:54, Dr. David Alan Gilbert <david.gilb...@linaro.org> wrote:
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 057f9ba..39057d2 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c

>
>  /* Emit a strex{b,h,d, } instruction appropriate for the specified
> @@ -23374,14 +23388,29 @@ arm_output_strex (emit_f emit,
>                  rtx value,
>                  rtx memory)
>  {
> -  const char *suffix = arm_ldrex_suffix (mode);
> -  rtx operands[3];
> +  rtx operands[4];
>
>   operands[0] = result;
>   operands[1] = value;
> -  operands[2] = memory;
> -  arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", 
> suffix,
> -                      cc);
> +  if (mode != DImode)
> +    {
> +      const char *suffix = arm_ldrex_suffix (mode);
> +      operands[2] = memory;
> +      arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2",
> +                         suffix, cc);
> +    }
> +  else
> +    {
> +      /* The restrictions on target registers in ARM mode are that the two
> +        registers are consecutive and the first one is even; Thumb is
> +        actually more flexible, but DI should give us this anyway.
> +        Note that the 1st register always gets the lowest word in memory.  */
> +      gcc_assert ((REGNO (value) & 1) == 0);
> +      operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1);
> +      operands[3] = memory;
> +      arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, 
> %%C3",
> +                          cc);
> +    }
>  }
>
>  /* Helper to emit a two operand instruction.  */
> @@ -23423,7 +23452,7 @@ arm_output_op3 (emit_f emit, const char *mnemonic, 
> rtx d, rtx a, rtx b)
>
>    required_value:
>
> -   RTX register or const_int representing the required old_value for
> +   RTX register representing the required old_value for
>    the modify to continue, if NULL no comparsion is performed.  */
>  static void
>  arm_output_sync_loop (emit_f emit,
> @@ -23437,7 +23466,13 @@ arm_output_sync_loop (emit_f emit,
>                      enum attr_sync_op sync_op,
>                      int early_barrier_required)
>  {
> -  rtx operands[1];
> +  rtx operands[2];
> +  /* We'll use the lo for the normal rtx in the none-DI case
> +     as well as the least-sig word in the DI case.  */
> +  rtx old_value_lo, required_value_lo, new_value_lo, t1_lo;
> +  rtx old_value_hi, required_value_hi, new_value_hi, t1_hi;
> +
> +  bool is_di = mode == DImode;
>
>   gcc_assert (t1 != t2);
>
> @@ -23448,82 +23483,142 @@ arm_output_sync_loop (emit_f emit,
>
>   arm_output_ldrex (emit, mode, old_value, memory);
>
> +  if (is_di)
> +    {
> +      old_value_lo = gen_lowpart (SImode, old_value);
> +      old_value_hi = gen_highpart (SImode, old_value);
> +      if (required_value)
> +       {
> +         required_value_lo = gen_lowpart (SImode, required_value);
> +         required_value_hi = gen_highpart (SImode, required_value);
> +       }
> +      else
> +       {
> +         /* Silence false potentially unused warning */
> +         required_value_lo = NULL;
> +         required_value_hi = NULL;
> +       }
> +      new_value_lo = gen_lowpart (SImode, new_value);
> +      new_value_hi = gen_highpart (SImode, new_value);
> +      t1_lo = gen_lowpart (SImode, t1);
> +      t1_hi = gen_highpart (SImode, t1);
> +    }
> +  else
> +    {
> +      old_value_lo = old_value;
> +      new_value_lo = new_value;
> +      required_value_lo = required_value;
> +      t1_lo = t1;
> +
> +      /* Silence false potentially unused warning */
> +      t1_hi = NULL;
> +      new_value_hi = NULL;
> +      required_value_hi = NULL;
> +      old_value_hi = NULL;
> +    }
> +
>   if (required_value)
>     {
> -      rtx operands[2];
> +      operands[0] = old_value_lo;
> +      operands[1] = required_value_lo;
>
> -      operands[0] = old_value;
> -      operands[1] = required_value;
>       arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
> +      if (is_di)
> +        {
> +          arm_output_asm_insn (emit, 0, operands, "it\teq");

This should be guarded with a if (TARGET_THUMB2) - there's no point in
accounting for the length of this instruction in the compiler and then
have the assembler fold it away in ARM state.

>
> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
> index c32ef1a..3fdd22f 100644
> --- a/gcc/config/arm/arm.h
> +++ b/gcc/config/arm/arm.h
> @@ -282,7 +282,8 @@ extern void 
> (*arm_lang_output_object_attributes_hook)(void);
> -#define TARGET_HAVE_DMB_MCR    (arm_arch6k && ! TARGET_HAVE_DMB)
> +#define TARGET_HAVE_DMB_MCR    (arm_arch6k && ! TARGET_HAVE_DMB \
> +                                && ! TARGET_THUMB1)

This hunk (TARGET_HAVE_DMB_MCR) should probably be backported to
release branches because this is technically fixing an issue and
hence should be a separate patch that can be looked at separately.

>
>  /* Nonzero if this chip implements a memory barrier instruction.  */
>  #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR)
> @@ -290,8 +291,12 @@ extern void 
> (*arm_lang_output_object_attributes_hook)(void);

sync.md changes -

> (define_mode_iterator NARROW [QI HI])
>+(define_mode_iterator QHSD [QI HI SI DI])
>+(define_mode_iterator SIDI [SI DI])
>+
>+(define_mode_attr sync_predtab [(SI "TARGET_HAVE_LDREX && 
>TARGET_HAVE_MEMORY_BARRIER")
>+                              (QI "TARGET_HAVE_LDREXBH && 
>TARGET_HAVE_MEMORY_BARRIER")
>+                              (HI "TARGET_HAVE_LDREXBH && 
>TARGET_HAVE_MEMORY_BARRIER")
>+                              (DI "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN 
>&& TARGET_HAVE_MEMORY_BARRIER")])
>+

Can we move all the iterators to iterators.md and then arrange
includes to work automatically ? Minor nit - could you align the entries
for QI, HI and DI with the start of the SI ?

>+(define_mode_attr sync_atleastsi [(SI "SI")
>+                                (DI "DI")
>+                                (HI "SI")
>+                                (QI "SI")])
>

I couldn't spot where this was being used. Can this be removed if not
necessary ?

>
>-(define_insn "arm_sync_new_nandsi"
>+(define_insn "arm_sync_new_<sync_optab><mode>"
>   [(set (match_operand:SI 0 "s_register_operand" "=&r")
>-        (unspec_volatile:SI [(not:SI (and:SI
>-                               (match_operand:SI 1 "arm_sync_memory_operand" 
>"+Q")
>-                               (match_operand:SI 2 "s_register_operand" "r")))
>-                          ]
>-                          VUNSPEC_SYNC_NEW_OP))
>+        (unspec_volatile:SI [(syncop:SI
>+                             (zero_extend:SI
>+                               (match_operand:NARROW 1 
>"arm_sync_memory_operand" "+Q"))
>+                             (match_operand:SI 2 "s_register_operand" "r"))
>+                          ]
>+                          VUNSPEC_SYNC_NEW_OP))
>    (set (match_dup 1)
>-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
>-                          VUNSPEC_SYNC_NEW_OP))
>+      (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>+                              VUNSPEC_SYNC_NEW_OP))
>    (clobber (reg:CC CC_REGNUM))
>    (clobber (match_scratch:SI 3 "=&r"))]
>-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
>+  "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER"

Can't this just use <sync_predtab> instead since the condition is identical
for QImode and HImode from that mode attribute and in quite a few
places below. ?

>@@ -461,19 +359,19 @@
>         (unspec_volatile:SI
>         [(not:SI
>            (and:SI
>-               (zero_extend:SI        
>-               (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
>-               (match_operand:SI 2 "s_register_operand" "r")))
>+             (zero_extend:SI  
>+               (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
>+             (match_operand:SI 2 "s_register_operand" "r")))
>         ] VUNSPEC_SYNC_NEW_OP))
>    (set (match_dup 1)
>         (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>-                              VUNSPEC_SYNC_NEW_OP))
>+                              VUNSPEC_SYNC_NEW_OP))
>    (clobber (reg:CC CC_REGNUM))
>    (clobber (match_scratch:SI 3 "=&r"))]
>-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
>+  "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER"

Again here . Not sure which pattern this is though just looking at the patch.

>   {
>     return arm_output_sync_insn (insn, operands);
>-  }
>+  }
>   [(set_attr "sync_result"          "0")
>    (set_attr "sync_memory"          "1")
>    (set_attr "sync_new_value"       "2")
>@@ -483,20 +381,20 @@
>    (set_attr "conds" "clob")
>    (set_attr "predicable" "no")])
>
>-(define_insn "arm_sync_old_<sync_optab>si"
>-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
>-        (unspec_volatile:SI [(syncop:SI
>-                               (match_operand:SI 1 "arm_sync_memory_operand" 
>"+Q")
>-                               (match_operand:SI 2 "s_register_operand" "r"))
>-                          ]
>-                          VUNSPEC_SYNC_OLD_OP))
>+(define_insn "arm_sync_old_<sync_optab><mode>"
>+  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
>+      (unspec_volatile:SIDI [(syncop:SIDI
>+                             (match_operand:SIDI 1 "arm_sync_memory_operand" 
>"+Q")
>+                             (match_operand:SIDI 2 "s_register_operand" "r"))
>+                          ]
>+                          VUNSPEC_SYNC_OLD_OP))
>    (set (match_dup 1)
>-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
>-                          VUNSPEC_SYNC_OLD_OP))
>+        (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
>+                            VUNSPEC_SYNC_OLD_OP))
>    (clobber (reg:CC CC_REGNUM))
>-   (clobber (match_scratch:SI 3 "=&r"))
>+   (clobber (match_scratch:SIDI 3 "=&r"))
>    (clobber (match_scratch:SI 4 "<sync_clobber>"))]
>-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
>+  "<sync_predtab>"
>   {
>     return arm_output_sync_insn (insn, operands);
>   }
>@@ -509,20 +407,21 @@
>    (set_attr "conds" "clob")
>    (set_attr "predicable" "no")])
>
>-(define_insn "arm_sync_old_nandsi"
>+(define_insn "arm_sync_old_<sync_optab><mode>"
>   [(set (match_operand:SI 0 "s_register_operand" "=&r")
>-        (unspec_volatile:SI [(not:SI (and:SI
>-                               (match_operand:SI 1 "arm_sync_memory_operand" 
>"+Q")
>-                               (match_operand:SI 2 "s_register_operand" "r")))
>-                          ]
>-                          VUNSPEC_SYNC_OLD_OP))
>+        (unspec_volatile:SI [(syncop:SI
>+                             (zero_extend:SI
>+                               (match_operand:NARROW 1 
>"arm_sync_memory_operand" "+Q"))
>+                             (match_operand:SI 2 "s_register_operand" "r"))
>+                          ]
>+                          VUNSPEC_SYNC_OLD_OP))
>    (set (match_dup 1)
>-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
>-                          VUNSPEC_SYNC_OLD_OP))
>+      (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>+                          VUNSPEC_SYNC_OLD_OP))
>    (clobber (reg:CC CC_REGNUM))
>    (clobber (match_scratch:SI 3 "=&r"))
>-   (clobber (match_scratch:SI 4 "=&r"))]
>-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
>+   (clobber (match_scratch:SI 4 "<sync_clobber>"))]
>+  "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER"

Likewise for sync_predtab.

>-(define_insn "arm_sync_old_<sync_optab><mode>"
>-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
>-        (unspec_volatile:SI [(syncop:SI
>-                               (zero_extend:SI
>-                               (match_operand:NARROW 1 
>"arm_sync_memory_operand" "+Q"))
>-                               (match_operand:SI 2 "s_register_operand" "r"))
>-                          ]
>-                          VUNSPEC_SYNC_OLD_OP))
>+(define_insn "arm_sync_old_nand<mode>"
>+  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
>+      (unspec_volatile:SIDI [(not:SIDI (and:SIDI
>+                             (match_operand:SIDI 1 "arm_sync_memory_operand" 
>"+Q")
>+                             (match_operand:SIDI 2 "s_register_operand" "r")))
>+                          ]
>+                          VUNSPEC_SYNC_OLD_OP))
>    (set (match_dup 1)
>-        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>+        (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
>                           VUNSPEC_SYNC_OLD_OP))
>    (clobber (reg:CC CC_REGNUM))
>-   (clobber (match_scratch:SI 3 "=&r"))
>-   (clobber (match_scratch:SI 4 "<sync_clobber>"))]
>-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
>+   (clobber (match_scratch:SIDI 3 "=&r"))
>+   (clobber (match_scratch:SI 4 "=&r"))]
>+  "<sync_predtab>"
>   {
>     return arm_output_sync_insn (insn, operands);
>   }
>@@ -557,26 +455,26 @@
>    (set_attr "sync_memory"          "1")
>    (set_attr "sync_new_value"       "2")
>    (set_attr "sync_t1"              "3")
>-   (set_attr "sync_t2"              "<sync_t2_reqd>")
>-   (set_attr "sync_op"              "<sync_optab>")
>+   (set_attr "sync_t2"              "4")
>+   (set_attr "sync_op"              "nand")
>    (set_attr "conds"              "clob")
>    (set_attr "predicable" "no")])
>
> (define_insn "arm_sync_old_nand<mode>"
>   [(set (match_operand:SI 0 "s_register_operand" "=&r")
>-        (unspec_volatile:SI [(not:SI (and:SI
>-                               (zero_extend:SI
>-                               (match_operand:NARROW 1 
>"arm_sync_memory_operand" "+Q"))
>-                               (match_operand:SI 2 "s_register_operand" "r")))
>-                          ]
>-                          VUNSPEC_SYNC_OLD_OP))
>+      (unspec_volatile:SI [(not:SI (and:SI
>+                             (zero_extend:SI
>+                               (match_operand:NARROW 1 
>"arm_sync_memory_operand" "+Q"))
>+                             (match_operand:SI 2 "s_register_operand" "r")))
<>+                         ]
>+                          VUNSPEC_SYNC_OLD_OP))
>    (set (match_dup 1)
>-        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>-                          VUNSPEC_SYNC_OLD_OP))
>+      (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>+                          VUNSPEC_SYNC_OLD_OP))
>    (clobber (reg:CC CC_REGNUM))
>    (clobber (match_scratch:SI 3 "=&r"))
>    (clobber (match_scratch:SI 4 "=&r"))]
>-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
>+  "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER"
>   {
>     return arm_output_sync_insn (insn, operands);
>   }


Cheers
Ramana

Reply via email to