Improve TI mode address offsets - these may either use LDP of 64-bit or
LDR of 128-bit, so we need to use the correct intersection of offsets.
When splitting a large offset into base and offset, use a signed 9-bit
unscaled offset.
Remove the Ump constraint on movti and movtf instructions as this blocks
the reload optimizer from merging address CSEs (is this supposed to work
only on 'm' constraints?). The result is improved codesize, especially
wrf and gamess in SPEC2006.
int f (int x)
{
__int128_t arr[100];
arr[31] = 0;
arr[48] = 0;
arr[79] = 0;
arr[65] = 0;
arr[70] = 0;
return arr[x];
}
Before patch (note the multiple redundant add x1, sp, 1024):
sub sp, sp, #1600
sbfiz x0, x0, 4, 32
add x1, sp, 256
stp xzr, xzr, [x1, 240]
add x1, sp, 768
stp xzr, xzr, [x1]
add x1, sp, 1024
stp xzr, xzr, [x1, 240]
add x1, sp, 1024
stp xzr, xzr, [x1, 16]
add x1, sp, 1024
stp xzr, xzr, [x1, 96]
ldr w0, [sp, x0]
add sp, sp, 1600
ret
After patch:
sub sp, sp, #1600
sbfiz x0, x0, 4, 32
add x1, sp, 1024
stp xzr, xzr, [sp, 496]
stp xzr, xzr, [x1, -256]
stp xzr, xzr, [x1, 240]
stp xzr, xzr, [x1, 16]
stp xzr, xzr, [x1, 96]
ldr w0, [sp, x0]
add sp, sp, 1600
ret
Bootstrap & regress OK.
ChangeLog:
2015-11-10 Wilco Dijkstra <[email protected]>
gcc/
* config/aarch64/aarch64.md (movti_aarch64): Change Ump to m.
(movtf_aarch64): Likewise.
* config/aarch64/aarch64.c (aarch64_classify_address):
Use correct intersection of offsets.
(aarch64_legitimize_address_displacement): Use 9-bit signed offsets.
(aarch64_legitimize_address): Use 9-bit signed offsets for TI/TF mode.
Use 7-bit signed scaled mode for modes > 16 bytes.
--
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index
3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd
100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4066,7 +4066,8 @@ aarch64_classify_address (struct aarch64_address_info
*info,
instruction memory accesses. */
if (mode == TImode || mode == TFmode)
return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
- && offset_9bit_signed_unscaled_p (mode, offset));
+ && (offset_9bit_signed_unscaled_p (mode, offset)
+ || offset_12bit_unsigned_scaled_p (mode, offset)));
/* A 7bit offset check because OImode will emit a ldp/stp
instruction (only big endian will get here).
@@ -4270,18 +4271,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
/* Split an out-of-range address displacement into a base and offset.
Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
to increase opportunities for sharing the base address of different sizes.
- For TI/TFmode and unaligned accesses use a 256-byte range. */
+ For unaligned accesses and TI/TF mode use the signed 9-bit range. */
static bool
aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode
mode)
{
- HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
+ HOST_WIDE_INT offset = INTVAL (*disp);
+ HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
- if (mode == TImode || mode == TFmode ||
- (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
- mask = 0xff;
+ if (mode == TImode || mode == TFmode
+ || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
+ base = (offset + 0x100) & ~0x1ff;
- *off = GEN_INT (INTVAL (*disp) & ~mask);
- *disp = GEN_INT (INTVAL (*disp) & mask);
+ *off = GEN_INT (base);
+ *disp = GEN_INT (offset - base);
return true;
}
@@ -5148,12 +5150,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x */,
machine_mode mode)
x = gen_rtx_PLUS (Pmode, base, offset_rtx);
}
- /* Does it look like we'll need a load/store-pair operation? */
+ /* Does it look like we'll need a 16-byte load/store-pair operation? */
HOST_WIDE_INT base_offset;
- if (GET_MODE_SIZE (mode) > 16
- || mode == TImode)
- base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
- & ~((128 * GET_MODE_SIZE (mode)) - 1));
+ if (GET_MODE_SIZE (mode) > 16)
+ base_offset = (offset + 0x400) & ~0x7f0;
/* For offsets aren't a multiple of the access size, the limit is
-256...255. */
else if (offset & (GET_MODE_SIZE (mode) - 1))
@@ -5167,6 +5167,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x */,
machine_mode mode)
/* Small negative offsets are supported. */
else if (IN_RANGE (offset, -256, 0))
base_offset = 0;
+ else if (mode == TImode || mode == TFmode)
+ base_offset = (offset + 0x100) & ~0x1ff;
/* Use 12-bit offset by access size. */
else
base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index
24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e
100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1094,9 +1094,9 @@
(define_insn "*movti_aarch64"
[(set (match_operand:TI 0
- "nonimmediate_operand" "=r, *w,r ,*w,r ,Ump,Ump,*w,m")
+ "nonimmediate_operand" "=r, *w,r ,*w,r,m,m,*w,m")
(match_operand:TI 1
- "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r ,Z , m,*w"))]
+ "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
"(register_operand (operands[0], TImode)
|| aarch64_reg_or_zero (operands[1], TImode))"
"@
@@ -1211,9 +1211,9 @@
(define_insn "*movtf_aarch64"
[(set (match_operand:TF 0
- "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump