On 07/08/14 12:32, Kyrill Tkachov wrote: > > On 16/05/14 13:35, Richard Earnshaw wrote: >> On 08/05/14 18:36, Ian Bolton wrote: >>> Hi, >>> >>> It currently takes 4 instructions to generate certain immediates on >>> AArch64 (unless we put them in the constant pool). >>> >>> For example ... >>> >>> long long >>> ffffbeefcafebabe () >>> { >>> return 0xFFFFBEEFCAFEBABEll; >>> } >>> >>> leads to ... >>> >>> mov x0, 0x47806 >>> mov x0, 0xcafe, lsl 16 >>> mov x0, 0xbeef, lsl 32 >>> orr x0, x0, -281474976710656 >>> >>> The above case is tackled in this patch by employing MOVN >>> to generate the top 32-bits in a single instruction ... >>> >>> mov x0, -71536975282177 >>> movk x0, 0xcafe, lsl 16 >>> movk x0, 0xbabe, lsl 0 >>> >>> Note that where at least two half-words are 0xffff, existing >>> code that does the immediate in two instructions is still used.) >>> >>> Tested on standard gcc regressions and the attached test case. >>> >>> OK for commit? >> What about: >> >> long long a() >> { >> return 0x1234ffff56789abcll; >> } >> >> long long b() >> { >> return 0x12345678ffff9abcll; >> } >> >> long long c() >> { >> return 0x123456789abcffffll; >> } >> >> ? >> >> Surely these can also benefit from this sort of optimization, but it >> looks as though you only handle the top 16 bits being set. > > Hi Richard, > > How about this rework of the patch? > > For code: > > long long foo () > { > return 0xFFFFBEEFCAFEBABEll; > } > > long long a() > { > return 0x1234ffff56789abcll; > } > > long long b() > { > return 0x12345678ffff9abcll; > } > > long long c() > { > return 0x123456789abcffffll; > } > > we now generate: > foo: > mov x0, -17730 > movk x0, 0xcafe, lsl 16 > movk x0, 0xbeef, lsl 32 > ret > .size foo, .-foo > .align 2 > .global a > .type a, %function > a: > mov x0, -25924 > movk x0, 0x5678, lsl 16 > movk x0, 0x1234, lsl 48 > ret > .size a, .-a > .align 2 > .global b > .type b, %function > b: > mov x0, -25924 > movk x0, 0x5678, lsl 32 > movk x0, 0x1234, lsl 48 > ret > .size b, .-b > .align 2 > .global c > .type c, %function > c: > mov x0, -1698889729 > movk x0, 0x5678, lsl 32 > movk x0, 0x1234, lsl 48 > ret > > > 3 instructions are used in each case. > > Thanks, > Kyrill > > 2014-08-07 Ian Bolton <ian.bol...@arm.com> > Kyrylo Tkachov <kyrylo.tkac...@arm.com> > > * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): > Use MOVN when one of the half-words is 0xffff. > > > aarch64-movn-pattern-patch-v3.patch > > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 0a7f441..2db91c7 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -1005,7 +1005,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) > unsigned HOST_WIDE_INT val; > bool subtargets; > rtx subtarget; > - int one_match, zero_match; > + int one_match, zero_match, first_not_ffff_match; > > gcc_assert (mode == SImode || mode == DImode); > > @@ -1106,29 +1106,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) > one_match = 0; > zero_match = 0; > mask = 0xffff; > + first_not_ffff_match = -1; > > for (i = 0; i < 64; i += 16, mask <<= 16) > { > - if ((val & mask) == 0) > - zero_match++; > - else if ((val & mask) == mask) > + if ((val & mask) == mask) > one_match++; > + else > + { > + if (first_not_ffff_match < 0) > + first_not_ffff_match = i; > + if ((val & mask) == 0) > + zero_match++; > + } > } > > if (one_match == 2) > { > - mask = 0xffff; > - for (i = 0; i < 64; i += 16, mask <<= 16) > + /* Set one of the quarters and then insert back into result. */ > + mask = 0xffffll << first_not_ffff_match; > + emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); > + emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), > + GEN_INT ((val >> first_not_ffff_match) > + & 0xffff))); > + return; > + } > + > + if (one_match == 1)
I think this should be (one_match > zero_match). Otherwise constants such as 0x00001234ffff0000ll might end up taking three rather than two insns. R. > + { > + /* Set either first three quarters or all but the third. */ > + mask = 0xffffll << (16 - first_not_ffff_match); > + emit_insn (gen_rtx_SET (VOIDmode, dest, > + GEN_INT (val | mask | 0xffffffff00000000ull))); > + > + /* Now insert other two quarters. */ > + for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << > 1); > + i < 64; i += 16, mask <<= 16) > { > if ((val & mask) != mask) > - { > - emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); > - emit_insn (gen_insv_immdi (dest, GEN_INT (i), > - GEN_INT ((val >> i) & 0xffff))); > - return; > - } > + emit_insn (gen_insv_immdi (dest, GEN_INT (i), > + GEN_INT ((val >> i) & 0xffff))); > } > - gcc_unreachable (); > + return; > } > > if (zero_match == 2) >