Re: [PATCH, ARM] Improve code-gen for multiple shifted accumulations in array indexing

Yufeng Zhang Wed, 18 Jun 2014 09:36:22 -0700

This time with patch... Apologize.

Yufeng


On 06/18/14 17:31, Yufeng Zhang wrote:

Hi,

This patch improves the code-gen of -marm in the case of two-dimensional
array access.

Given the following code:

typedef struct { int x,y,a,b; } X;

int
f7a(X p[][4], int x, int y)
{
    return p[x][y].a;
}

The code-gen on -O2 -marm -mcpu=cortex-a15 is currently

          mov     r2, r2, asl #4
          add     r1, r2, r1, asl #6
          add     r0, r0, r1
          ldr     r0, [r0, #8]
          bx      lr

With the patch, we'll get:

          add     r1, r0, r1, lsl #6
          add     r2, r1, r2, lsl #4
          ldr     r0, [r2, #8]
          bx      lr

The -mthumb code-gen had been OK.

The patch has passed the bootstrapping on cortex-a15 and the
arm-none-eabi regtest, with no code-gen difference in spec2k
(unfortunately).

OK for the trunk?

Thanks,
Yufeng

gcc/

        * config/arm/arm.c (arm_reassoc_shifts_in_address): New declaration
        and new function.
        (arm_legitimize_address): Call the new functions.
        (thumb_legitimize_address): Prefix the declaration with static.

gcc/testsuite/

        * gcc.target/arm/shifted-add-1.c: New test.
        * gcc.target/arm/shifted-add-2.c: Ditto.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 16fc7ed..281c96a 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -88,6 +88,7 @@ static int thumb1_base_register_rtx_p (rtx, enum 
machine_mode, int);
 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
+static void arm_reassoc_shifts_in_address (rtx);
 inline static int thumb1_index_register_rtx_p (rtx, int);
 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
 static int thumb_far_jump_used_p (void);
@@ -7501,7 +7502,8 @@ arm_legitimize_address (rtx x, rtx orig_x, enum 
machine_mode mode)
     {
       /* TODO: legitimize_address for Thumb2.  */
       if (TARGET_THUMB2)
-        return x;
+       return x;
+
       return thumb_legitimize_address (x, orig_x, mode);
     }
 
@@ -7551,6 +7553,9 @@ arm_legitimize_address (rtx x, rtx orig_x, enum 
machine_mode mode)
        }
       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
        x = gen_rtx_PLUS (SImode, xop0, xop1);
+
+      if (GET_CODE (xop0) == PLUS)
+       arm_reassoc_shifts_in_address (xop0);
     }
 
   /* XXX We don't allow MINUS any more -- see comment in
@@ -7614,7 +7619,8 @@ arm_legitimize_address (rtx x, rtx orig_x, enum 
machine_mode mode)
 
 /* Try machine-dependent ways of modifying an illegitimate Thumb address
    to be legitimate.  If we find one, return the new, valid address.  */
-rtx
+
+static rtx
 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
 {
   if (GET_CODE (x) == PLUS
@@ -7679,6 +7685,47 @@ thumb_legitimize_address (rtx x, rtx orig_x, enum 
machine_mode mode)
   return x;
 }
 
+/* Transform
+     PLUS (PLUS (MULT1, MULT2), REG)
+   to
+     PLUS (PLUS (MULT1, REG), MULT2)
+   so that we can use two add (shifted register) instructions
+   to compute the expression.  Note that SHIFTs has already
+   been replaced with MULTs as a result of canonicalization.
+
+   This routine is to help undo the undesired canonicalization
+   that is done by simplify_gen_binary on addresses with
+   multiple shifts.  For example, it will help transform
+      (x << 6) + (y << 4) + p + 8
+   back to:
+      (x << 6) + p + (y << 4) + 8
+   where p is the start address of a two-dimensional array and
+   x and y are the indexes.  */
+
+static void
+arm_reassoc_shifts_in_address (rtx x)
+{
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx op0 = XEXP (x, 0);
+      rtx op1 = XEXP (x, 1);
+
+      if (GET_CODE (op0) == PLUS && REG_P (op1))
+       {
+         rtx xop0 = XEXP (op0, 0);
+         rtx xop1 = XEXP (op0, 1);
+
+         if (GET_CODE (xop0) == MULT && GET_CODE (xop1) == MULT
+             && power_of_two_operand (XEXP (xop0, 1), GET_MODE (xop0))
+             && power_of_two_operand (XEXP (xop1, 1), GET_MODE (xop1)))
+           {
+             XEXP (op0, 1) = op1;
+             XEXP (x, 1) = xop1;
+           }
+       }
+    }
+}
+
 bool
 arm_legitimize_reload_address (rtx *p,
                               enum machine_mode mode,
diff --git a/gcc/testsuite/gcc.target/arm/shifted-add-1.c 
b/gcc/testsuite/gcc.target/arm/shifted-add-1.c
new file mode 100644
index 0000000..8777fe4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/shifted-add-1.c
@@ -0,0 +1,47 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2" } */
+
+typedef struct { int x,y,a,b; } x;
+
+int
+f7a(x p[][4], int x, int y)
+{
+  return p[x][y].a;
+}
+
+/* { dg-final { object-size text <= 16 { target { { ! arm_thumb1 } && { ! 
arm_thumb2 } } } } } */
+/* { dg-final { object-size text <= 12 { target arm_thumb2 } } } */
+
+
+/* For arm code-gen, expect four instructions like:
+
+       0:   e0801301        add     r1, r0, r1, lsl #6
+       4:   e0812202        add     r2, r1, r2, lsl #4
+       8:   e5920008        ldr     r0, [r2, #8]
+       c:   e12fff1e        bx      lr
+
+   instead of
+
+       0:   e1a02202        lsl     r2, r2, #4
+       4:   e0821301        add     r1, r2, r1, lsl #6
+       8:   e0800001        add     r0, r0, r1
+       c:   e5900008        ldr     r0, [r0, #8]
+       10:   e12fff1e        bx      lr
+
+
+   for thumb2 code-gen, expect four instructions like:
+
+       0:   eb00 1181       add.w   r1, r0, r1, lsl #6
+       4:   eb01 1202       add.w   r2, r1, r2, lsl #4
+       8:   6890            ldr     r0, [r2, #8]
+       a:   4770            bx      lr
+
+   instead of
+
+       0:   ea4f 1202       mov.w   r2, r2, lsl #4
+       4:   eb02 1181       add.w   r1, r2, r1, lsl #6
+       8:   4408            add     r0, r1
+       a:   6880            ldr     r0, [r0, #8]
+       c:   4770            bx      lr
+       e:   bf00            nop
+*/
diff --git a/gcc/testsuite/gcc.target/arm/shifted-add-2.c 
b/gcc/testsuite/gcc.target/arm/shifted-add-2.c
new file mode 100644
index 0000000..f540465
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/shifted-add-2.c
@@ -0,0 +1,50 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2" } */
+
+typedef struct { int x,y,a,b; } x;
+
+int
+f8a(x p[][4][4], int x, int y, int z)
+{
+  return p[x][y][z].a;
+}
+
+/* { dg-final { object-size text <= 20 { target { { ! arm_thumb1 } && { ! 
arm_thumb2 } } } } } */
+/* { dg-final { object-size text <= 16 { target arm_thumb2 } } } */
+
+
+/* For arm code-gen, expect five instructions like:
+
+       0:   e0833102        add     r3, r3, r2, lsl #2
+       4:   e0800401        add     r0, r0, r1, lsl #8
+       8:   e0803203        add     r3, r0, r3, lsl #4
+       c:   e5930008        ldr     r0, [r3, #8]
+       10:   e12fff1e        bx      lr
+
+   instead of
+
+       0:   e0833102        add     r3, r3, r2, lsl #2
+       4:   e1a03203        lsl     r3, r3, #4
+       8:   e0831401        add     r1, r3, r1, lsl #8
+       c:   e0800001        add     r0, r0, r1
+       10:   e5900008        ldr     r0, [r0, #8]
+       14:   e12fff1e        bx      lr
+
+   for thumb2 code-gen, expect five instructions like:
+
+       0:   eb03 0382       add.w   r3, r3, r2, lsl #2
+       4:   eb00 2001       add.w   r0, r0, r1, lsl #8
+       8:   eb00 1303       add.w   r3, r0, r3, lsl #4
+       c:   6898            ldr     r0, [r3, #8]
+       e:   4770            bx      lr
+
+   instead of
+
+       0:   eb03 0382       add.w   r3, r3, r2, lsl #2
+       4:   ea4f 1303       mov.w   r3, r3, lsl #4
+       8:   eb03 2101       add.w   r1, r3, r1, lsl #8
+       c:   4408            add     r0, r1
+       e:   6880            ldr     r0, [r0, #8]
+       10:   4770            bx      lr
+       12:   bf00            nop
+*/

Re: [PATCH, ARM] Improve code-gen for multiple shifted accumulations in array indexing

Reply via email to