This time with patch... Apologize. Yufeng
On 06/18/14 17:31, Yufeng Zhang wrote:
Hi, This patch improves the code-gen of -marm in the case of two-dimensional array access. Given the following code: typedef struct { int x,y,a,b; } X; int f7a(X p[][4], int x, int y) { return p[x][y].a; } The code-gen on -O2 -marm -mcpu=cortex-a15 is currently mov r2, r2, asl #4 add r1, r2, r1, asl #6 add r0, r0, r1 ldr r0, [r0, #8] bx lr With the patch, we'll get: add r1, r0, r1, lsl #6 add r2, r1, r2, lsl #4 ldr r0, [r2, #8] bx lr The -mthumb code-gen had been OK. The patch has passed the bootstrapping on cortex-a15 and the arm-none-eabi regtest, with no code-gen difference in spec2k (unfortunately). OK for the trunk? Thanks, Yufeng gcc/ * config/arm/arm.c (arm_reassoc_shifts_in_address): New declaration and new function. (arm_legitimize_address): Call the new functions. (thumb_legitimize_address): Prefix the declaration with static. gcc/testsuite/ * gcc.target/arm/shifted-add-1.c: New test. * gcc.target/arm/shifted-add-2.c: Ditto.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 16fc7ed..281c96a 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -88,6 +88,7 @@ static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int); static rtx arm_legitimize_address (rtx, rtx, enum machine_mode); static reg_class_t arm_preferred_reload_class (rtx, reg_class_t); static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode); +static void arm_reassoc_shifts_in_address (rtx); inline static int thumb1_index_register_rtx_p (rtx, int); static bool arm_legitimate_address_p (enum machine_mode, rtx, bool); static int thumb_far_jump_used_p (void); @@ -7501,7 +7502,8 @@ arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) { /* TODO: legitimize_address for Thumb2. */ if (TARGET_THUMB2) - return x; + return x; + return thumb_legitimize_address (x, orig_x, mode); } @@ -7551,6 +7553,9 @@ arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) } else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1)) x = gen_rtx_PLUS (SImode, xop0, xop1); + + if (GET_CODE (xop0) == PLUS) + arm_reassoc_shifts_in_address (xop0); } /* XXX We don't allow MINUS any more -- see comment in @@ -7614,7 +7619,8 @@ arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) /* Try machine-dependent ways of modifying an illegitimate Thumb address to be legitimate. If we find one, return the new, valid address. */ -rtx + +static rtx thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) { if (GET_CODE (x) == PLUS @@ -7679,6 +7685,47 @@ thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) return x; } +/* Transform + PLUS (PLUS (MULT1, MULT2), REG) + to + PLUS (PLUS (MULT1, REG), MULT2) + so that we can use two add (shifted register) instructions + to compute the expression. Note that SHIFTs has already + been replaced with MULTs as a result of canonicalization. + + This routine is to help undo the undesired canonicalization + that is done by simplify_gen_binary on addresses with + multiple shifts. For example, it will help transform + (x << 6) + (y << 4) + p + 8 + back to: + (x << 6) + p + (y << 4) + 8 + where p is the start address of a two-dimensional array and + x and y are the indexes. */ + +static void +arm_reassoc_shifts_in_address (rtx x) +{ + if (GET_CODE (x) == PLUS) + { + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + + if (GET_CODE (op0) == PLUS && REG_P (op1)) + { + rtx xop0 = XEXP (op0, 0); + rtx xop1 = XEXP (op0, 1); + + if (GET_CODE (xop0) == MULT && GET_CODE (xop1) == MULT + && power_of_two_operand (XEXP (xop0, 1), GET_MODE (xop0)) + && power_of_two_operand (XEXP (xop1, 1), GET_MODE (xop1))) + { + XEXP (op0, 1) = op1; + XEXP (x, 1) = xop1; + } + } + } +} + bool arm_legitimize_reload_address (rtx *p, enum machine_mode mode, diff --git a/gcc/testsuite/gcc.target/arm/shifted-add-1.c b/gcc/testsuite/gcc.target/arm/shifted-add-1.c new file mode 100644 index 0000000..8777fe4 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/shifted-add-1.c @@ -0,0 +1,47 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2" } */ + +typedef struct { int x,y,a,b; } x; + +int +f7a(x p[][4], int x, int y) +{ + return p[x][y].a; +} + +/* { dg-final { object-size text <= 16 { target { { ! arm_thumb1 } && { ! arm_thumb2 } } } } } */ +/* { dg-final { object-size text <= 12 { target arm_thumb2 } } } */ + + +/* For arm code-gen, expect four instructions like: + + 0: e0801301 add r1, r0, r1, lsl #6 + 4: e0812202 add r2, r1, r2, lsl #4 + 8: e5920008 ldr r0, [r2, #8] + c: e12fff1e bx lr + + instead of + + 0: e1a02202 lsl r2, r2, #4 + 4: e0821301 add r1, r2, r1, lsl #6 + 8: e0800001 add r0, r0, r1 + c: e5900008 ldr r0, [r0, #8] + 10: e12fff1e bx lr + + + for thumb2 code-gen, expect four instructions like: + + 0: eb00 1181 add.w r1, r0, r1, lsl #6 + 4: eb01 1202 add.w r2, r1, r2, lsl #4 + 8: 6890 ldr r0, [r2, #8] + a: 4770 bx lr + + instead of + + 0: ea4f 1202 mov.w r2, r2, lsl #4 + 4: eb02 1181 add.w r1, r2, r1, lsl #6 + 8: 4408 add r0, r1 + a: 6880 ldr r0, [r0, #8] + c: 4770 bx lr + e: bf00 nop +*/ diff --git a/gcc/testsuite/gcc.target/arm/shifted-add-2.c b/gcc/testsuite/gcc.target/arm/shifted-add-2.c new file mode 100644 index 0000000..f540465 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/shifted-add-2.c @@ -0,0 +1,50 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2" } */ + +typedef struct { int x,y,a,b; } x; + +int +f8a(x p[][4][4], int x, int y, int z) +{ + return p[x][y][z].a; +} + +/* { dg-final { object-size text <= 20 { target { { ! arm_thumb1 } && { ! arm_thumb2 } } } } } */ +/* { dg-final { object-size text <= 16 { target arm_thumb2 } } } */ + + +/* For arm code-gen, expect five instructions like: + + 0: e0833102 add r3, r3, r2, lsl #2 + 4: e0800401 add r0, r0, r1, lsl #8 + 8: e0803203 add r3, r0, r3, lsl #4 + c: e5930008 ldr r0, [r3, #8] + 10: e12fff1e bx lr + + instead of + + 0: e0833102 add r3, r3, r2, lsl #2 + 4: e1a03203 lsl r3, r3, #4 + 8: e0831401 add r1, r3, r1, lsl #8 + c: e0800001 add r0, r0, r1 + 10: e5900008 ldr r0, [r0, #8] + 14: e12fff1e bx lr + + for thumb2 code-gen, expect five instructions like: + + 0: eb03 0382 add.w r3, r3, r2, lsl #2 + 4: eb00 2001 add.w r0, r0, r1, lsl #8 + 8: eb00 1303 add.w r3, r0, r3, lsl #4 + c: 6898 ldr r0, [r3, #8] + e: 4770 bx lr + + instead of + + 0: eb03 0382 add.w r3, r3, r2, lsl #2 + 4: ea4f 1303 mov.w r3, r3, lsl #4 + 8: eb03 2101 add.w r1, r3, r1, lsl #8 + c: 4408 add r0, r1 + e: 6880 ldr r0, [r0, #8] + 10: 4770 bx lr + 12: bf00 nop +*/