Hi Sandra, Chung-Lin,
A couple of comments from me,
On 26/05/15 20:10, Sandra Loosemore wrote:
Chung-Lin posted this patch last year but it seems never to have been
reviewed:
https://gcc.gnu.org/ml/gcc-patches/2014-06/msg00714.html
I've just re-applied and re-tested it and it still seems to be good.
Can somebody please take a look at it?
-Sandra
+mfix-cortex-a9-volatile-hazards
+Target Report Var(fix_a9_volatile_hazards) Init(0)
+Avoid errata causing read-after-read hazards for concurrent volatile
+accesses on Cortex-A9 MPCore processors.
s/errata/erratum/
+;; Thumb-2 version allows conditional execution
+(define_insn "*memory_barrier_t2"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+ "TARGET_HAVE_MEMORY_BARRIER && TARGET_THUMB2"
+ {
+ if (TARGET_HAVE_DMB)
+ {
+ /* Note we issue a system level barrier. We should consider issuing
+ a inner shareabilty zone barrier here instead, ie. "DMB ISH". */
+ /* ??? Differentiate based on SEQ_CST vs less strict? */
+ return "dmb%?\tsy";
+ }
+
+ if (TARGET_HAVE_DMB_MCR)
+ return "mcr%?\tp15, 0, r0, c7, c10, 5";
+
+ gcc_unreachable ();
+ }
+ [(set_attr "length" "4")
+ (set_attr "conds" "nocond")
+ (set_attr "predicable" "yes")])
+
This should also set the 'predicable_short_it' attribute to "no"
since we don't want it to be predicated when compiling for ARMv8-A Thumb2.
Consequently:
Index: testsuite/gcc.target/arm/a9-volatile-ordering-erratum-2.c
===================================================================
--- testsuite/gcc.target/arm/a9-volatile-ordering-erratum-2.c (revision 0)
+++ testsuite/gcc.target/arm/a9-volatile-ordering-erratum-2.c (revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile { target arm_dmb } } */
+/* { dg-options "-O2 -mthumb -mfix-cortex-a9-volatile-hazards" } */
Please add a -mno-restrict-it to the options here so that when armv8-a is the
default architecture
we are still allowed to conditionalise dmb.
+static bool
+any_volatile_loads_p (const_rtx body)
+{
+ int i, j;
+ rtx lhs, rhs;
+ enum rtx_code code;
+ const char *fmt;
+
+ if (body == NULL_RTX)
+ return false;
+
+ code = GET_CODE (body);
+
+ if (code == SET)
+ {
+ lhs = SET_DEST (body);
+ rhs = SET_SRC (body);
+
+ if (!REG_P (lhs) && GET_CODE (lhs) != SUBREG)
+ return false;
+
+ if ((MEM_P (rhs) || GET_CODE (rhs) == SYMBOL_REF)
+ && MEM_VOLATILE_P (rhs))
+ return true;
+ }
+ else
+ {
+ fmt = GET_RTX_FORMAT (code);
+
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'e')
+ {
+ if (any_volatile_loads_p (XEXP (body, i)))
+ return true;
+ }
+ else if (fmt[i] == 'E')
+ for (j = 0; j < XVECLEN (body, i); j++)
+ if (any_volatile_loads_p (XVECEXP (body, i, j)))
+ return true;
+ }
+ }
+
+ return false;
+}
Would it be simpler to write this using the FOR_EACH_SUBRTX infrastructure? I
think it would make this function much shorter.
@@ -17248,6 +17334,9 @@ arm_reorg (void)
{
rtx table;
+ if (fix_a9_volatile_hazards)
+ arm_cortex_a9_errata_reorg (insn);
+
note_invalid_constants (insn, address, true);
address += get_attr_length (insn);
Does the logic for adding the insn length to address need to be updated in any
way since we're inserting a new instruction
in the stream? The calculations here always confuse me...
Thanks,
Kyrill