On 07/08/14 12:32, Kyrill Tkachov wrote:
>
> On 16/05/14 13:35, Richard Earnshaw wrote:
>> On 08/05/14 18:36, Ian Bolton wrote:
>>> Hi,
>>>
>>> It currently takes 4 instructions to generate certain immediates on
>>> AArch64 (unless we put them in the constant pool).
>>>
>>> For example ...
>>>
>>> long long
>>> ffffbeefcafebabe ()
>>> {
>>> return 0xFFFFBEEFCAFEBABEll;
>>> }
>>>
>>> leads to ...
>>>
>>> mov x0, 0x47806
>>> mov x0, 0xcafe, lsl 16
>>> mov x0, 0xbeef, lsl 32
>>> orr x0, x0, -281474976710656
>>>
>>> The above case is tackled in this patch by employing MOVN
>>> to generate the top 32-bits in a single instruction ...
>>>
>>> mov x0, -71536975282177
>>> movk x0, 0xcafe, lsl 16
>>> movk x0, 0xbabe, lsl 0
>>>
>>> Note that where at least two half-words are 0xffff, existing
>>> code that does the immediate in two instructions is still used.)
>>>
>>> Tested on standard gcc regressions and the attached test case.
>>>
>>> OK for commit?
>> What about:
>>
>> long long a()
>> {
>> return 0x1234ffff56789abcll;
>> }
>>
>> long long b()
>> {
>> return 0x12345678ffff9abcll;
>> }
>>
>> long long c()
>> {
>> return 0x123456789abcffffll;
>> }
>>
>> ?
>>
>> Surely these can also benefit from this sort of optimization, but it
>> looks as though you only handle the top 16 bits being set.
>
> Hi Richard,
>
> How about this rework of the patch?
>
> For code:
>
> long long foo ()
> {
> return 0xFFFFBEEFCAFEBABEll;
> }
>
> long long a()
> {
> return 0x1234ffff56789abcll;
> }
>
> long long b()
> {
> return 0x12345678ffff9abcll;
> }
>
> long long c()
> {
> return 0x123456789abcffffll;
> }
>
> we now generate:
> foo:
> mov x0, -17730
> movk x0, 0xcafe, lsl 16
> movk x0, 0xbeef, lsl 32
> ret
> .size foo, .-foo
> .align 2
> .global a
> .type a, %function
> a:
> mov x0, -25924
> movk x0, 0x5678, lsl 16
> movk x0, 0x1234, lsl 48
> ret
> .size a, .-a
> .align 2
> .global b
> .type b, %function
> b:
> mov x0, -25924
> movk x0, 0x5678, lsl 32
> movk x0, 0x1234, lsl 48
> ret
> .size b, .-b
> .align 2
> .global c
> .type c, %function
> c:
> mov x0, -1698889729
> movk x0, 0x5678, lsl 32
> movk x0, 0x1234, lsl 48
> ret
>
>
> 3 instructions are used in each case.
>
> Thanks,
> Kyrill
>
> 2014-08-07 Ian Bolton <[email protected]>
> Kyrylo Tkachov <[email protected]>
>
> * config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
> Use MOVN when one of the half-words is 0xffff.
>
>
> aarch64-movn-pattern-patch-v3.patch
>
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 0a7f441..2db91c7 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1005,7 +1005,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
> unsigned HOST_WIDE_INT val;
> bool subtargets;
> rtx subtarget;
> - int one_match, zero_match;
> + int one_match, zero_match, first_not_ffff_match;
>
> gcc_assert (mode == SImode || mode == DImode);
>
> @@ -1106,29 +1106,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
> one_match = 0;
> zero_match = 0;
> mask = 0xffff;
> + first_not_ffff_match = -1;
>
> for (i = 0; i < 64; i += 16, mask <<= 16)
> {
> - if ((val & mask) == 0)
> - zero_match++;
> - else if ((val & mask) == mask)
> + if ((val & mask) == mask)
> one_match++;
> + else
> + {
> + if (first_not_ffff_match < 0)
> + first_not_ffff_match = i;
> + if ((val & mask) == 0)
> + zero_match++;
> + }
> }
>
> if (one_match == 2)
> {
> - mask = 0xffff;
> - for (i = 0; i < 64; i += 16, mask <<= 16)
> + /* Set one of the quarters and then insert back into result. */
> + mask = 0xffffll << first_not_ffff_match;
> + emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
> + emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
> + GEN_INT ((val >> first_not_ffff_match)
> + & 0xffff)));
> + return;
> + }
> +
> + if (one_match == 1)
I think this should be (one_match > zero_match).
Otherwise constants such as
0x00001234ffff0000ll
might end up taking three rather than two insns.
R.
> + {
> + /* Set either first three quarters or all but the third. */
> + mask = 0xffffll << (16 - first_not_ffff_match);
> + emit_insn (gen_rtx_SET (VOIDmode, dest,
> + GEN_INT (val | mask | 0xffffffff00000000ull)));
> +
> + /* Now insert other two quarters. */
> + for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match <<
> 1);
> + i < 64; i += 16, mask <<= 16)
> {
> if ((val & mask) != mask)
> - {
> - emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
> - emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> - GEN_INT ((val >> i) & 0xffff)));
> - return;
> - }
> + emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> + GEN_INT ((val >> i) & 0xffff)));
> }
> - gcc_unreachable ();
> + return;
> }
>
> if (zero_match == 2)
>