https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62173

            Bug ID: 62173
           Summary: [AArch64] Performance regression due to r213488
           Product: gcc
           Version: 5.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: spop at gcc dot gnu.org

void bar(int i) {
  char A[10];
  int d = 0;
  while (i > 0)
    A[d++] = i--;

  while (d > 0)
    foo(A[d--]);
}

Compile this function at -O3 with the last good revision r213487 and with the
first bad revision r213488 "[AArch64] Improve TARGET_LEGITIMIZE_ADDRESS_P
hook", and diffing:

--- good.s    2014-08-18 10:56:42.542867000 -0500
+++ bad.s    2014-08-18 10:56:42.504090000 -0500
@@ -8,34 +8,33 @@
     stp    x29, x30, [sp, -48]!
     cmp    w0, wzr
     add    x29, sp, 0
-    stp    x19, x20, [sp, 16]
+    str    x19, [sp, 16]
     ble    .L1
     strb    w0, [x29, 32]
     subs    w19, w0, #1
-    add    x20, x29, 32
     beq    .L4
     strb    w19, [x29, 33]
     subs    w1, w0, #2
     beq    .L4
-    strb    w1, [x20, 2]
+    strb    w1, [x29, 34]
     subs    w1, w0, #3
     beq    .L4
-    strb    w1, [x20, 3]
+    strb    w1, [x29, 35]
     subs    w1, w0, #4
     beq    .L4
-    strb    w1, [x20, 4]
+    strb    w1, [x29, 36]
     subs    w1, w0, #5
     beq    .L4
-    strb    w1, [x20, 5]
+    strb    w1, [x29, 37]
     subs    w1, w0, #6
     beq    .L4
-    strb    w1, [x20, 6]
+    strb    w1, [x29, 38]
     subs    w1, w0, #7
     beq    .L4
-    strb    w1, [x20, 7]
+    strb    w1, [x29, 39]
     subs    w1, w0, #8
     beq    .L4
-    strb    w1, [x20, 8]
+    strb    w1, [x29, 40]
     subs    w1, w0, #9
     beq    .L4
     strb    w1, [x29, 41]
@@ -43,14 +42,16 @@
 .L35:
     sub    w19, w19, #1
 .L4:
-    ldrb    w0, [x20, w0, sxtw]
+    add    x1, x29, 48
+    add    x0, x1, x0, sxtw
+    ldrb    w0, [x0, -16]
     bl    foo
     mov    w0, w19
     cbnz    w19, .L35
 .L1:
-    ldp    x19, x20, [sp, 16]
+    ldr    x19, [sp, 16]
     ldp    x29, x30, [sp], 48
     ret
     .size    bar, .-bar


The problem is that gcc now generates an addressing mode that requires two more
add instructions in the second loop:

-    ldrb    w0, [x20, w0, sxtw]
+    add    x1, x29, 48
+    add    x0, x1, x0, sxtw
+    ldrb    w0, [x0, -16]

Reply via email to