On 10/08/16 17:31, Wilco Dijkstra wrote:
> Richard Earnshaw wrote:
>> OK. But please enhance the comment with some explanation as to WHY
>> you've chosen to use just two base pairings rather than separate bases
>> for each access size.
>
> OK here is the updated patch which also handles unaligned accesses
> which further improves the benefit:
>
> This patch adds legitimize_address_displacement hook so that stack accesses
> with large offsets are split into a more efficient sequence. Unaligned and
> TI/TFmode use a 256-byte range, byte and halfword accesses use a 4KB range,
> wider accesses use a 16KB range to maximise the available addressing range
> and increase opportunities to share the base address.
>
> int f(int x)
> {
> int arr[8192];
> arr[4096] = 0;
> arr[6000] = 0;
> arr[7000] = 0;
> arr[8191] = 0;
> return arr[x];
> }
>
> Now generates:
>
> sub sp, sp, #32768
> add x1, sp, 16384
> str wzr, [x1]
> str wzr, [x1, 7616]
> str wzr, [x1, 11616]
> str wzr, [x1, 16380]
> ldr w0, [sp, w0, sxtw 2]
> add sp, sp, 32768
> ret
>
> instead of:
>
> sub sp, sp, #32768
> mov x2, 28000
> add x1, sp, 16384
> mov x3, 32764
> str wzr, [x1]
> mov x1, 24000
> add x1, sp, x1
> str wzr, [x1]
> add x1, sp, x2
> str wzr, [x1]
> add x1, sp, x3
> str wzr, [x1]
> ldr w0, [sp, w0, sxtw 2]
> add sp, sp, 32768
> ret
>
> Bootstrap, GCC regression OK.
>
> ChangeLog:
> 2016-08-10 Wilco Dijkstra <[email protected]>
>
> gcc/
> * config/aarch64/aarch64.c (aarch64_legitimize_address_displacement):
> New function.
> (TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT): Define.
OK.
R.
> --
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index
> 9a5fc199128b1326d0fb2afe0833aa6a5ce62ddf..b8536175a84b76f8c2939e61f1379ae279b20d43
> 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4173,6 +4173,24 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
> return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
> }
>
> +/* Split an out-of-range address displacement into a base and offset.
> + Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
> + to increase opportunities for sharing the base address of different sizes.
> + For TI/TFmode and unaligned accesses use a 256-byte range. */
> +static bool
> +aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode
> mode)
> +{
> + HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
> +
> + if (mode == TImode || mode == TFmode ||
> + (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
> + mask = 0xff;
> +
> + *off = GEN_INT (INTVAL (*disp) & ~mask);
> + *disp = GEN_INT (INTVAL (*disp) & mask);
> + return true;
> +}
> +
> /* Return TRUE if rtx X is immediate constant 0.0 */
> bool
> aarch64_float_const_zero_rtx_p (rtx x)
> @@ -14137,6 +14155,10 @@ aarch64_optab_supported_p (int op, machine_mode
> mode1, machine_mode,
> #undef TARGET_LEGITIMATE_CONSTANT_P
> #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
>
> +#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
> +#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
> + aarch64_legitimize_address_displacement
> +
> #undef TARGET_LIBGCC_CMP_RETURN_MODE
> #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
>
>