On 08/05/14 18:36, Ian Bolton wrote: > Hi, > > It currently takes 4 instructions to generate certain immediates on > AArch64 (unless we put them in the constant pool). > > For example ... > > long long > ffffbeefcafebabe () > { > return 0xFFFFBEEFCAFEBABEll; > } > > leads to ... > > mov x0, 0x47806 > mov x0, 0xcafe, lsl 16 > mov x0, 0xbeef, lsl 32 > orr x0, x0, -281474976710656 > > The above case is tackled in this patch by employing MOVN > to generate the top 32-bits in a single instruction ... > > mov x0, -71536975282177 > movk x0, 0xcafe, lsl 16 > movk x0, 0xbabe, lsl 0 > > Note that where at least two half-words are 0xffff, existing > code that does the immediate in two instructions is still used.) > > Tested on standard gcc regressions and the attached test case. > > OK for commit?
What about: long long a() { return 0x1234ffff56789abcll; } long long b() { return 0x12345678ffff9abcll; } long long c() { return 0x123456789abcffffll; } ? Surely these can also benefit from this sort of optimization, but it looks as though you only handle the top 16 bits being set. R. > > Cheers, > Ian > > > 2014-05-08 Ian Bolton <ian.bol...@arm.com> > > gcc/ > * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): > Use MOVN when top-most half-word (and only that half-word) > is 0xffff. > gcc/testsuite/ > * gcc.target/aarch64/movn_1.c: New test. > > > aarch64-movn-exploitation-patch-v5.txt > > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 43a83566..a8e504e 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -1177,6 +1177,18 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) > } > } > > + /* Look for case where upper 16 bits are set, so we can use MOVN. */ > + if ((val & 0xffff000000000000ll) == 0xffff000000000000ll) > + { > + emit_insn (gen_rtx_SET (VOIDmode, dest, > + GEN_INT (~ (~val & (0xffffll << 32))))); > + emit_insn (gen_insv_immdi (dest, GEN_INT (16), > + GEN_INT ((val >> 16) & 0xffff))); > + emit_insn (gen_insv_immdi (dest, GEN_INT (0), > + GEN_INT (val & 0xffff))); > + return; > + } > + > simple_sequence: > first = true; > mask = 0xffff; > diff --git a/gcc/testsuite/gcc.target/aarch64/movn_1.c > b/gcc/testsuite/gcc.target/aarch64/movn_1.c > new file mode 100644 > index 0000000..cc11ade > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/movn_1.c > @@ -0,0 +1,27 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fno-inline --save-temps" } */ > + > +extern void abort (void); > + > +long long > +foo () > +{ > + /* { dg-final { scan-assembler "mov\tx\[0-9\]+, -71536975282177" } } */ > + return 0xffffbeefcafebabell; > +} > + > +long long > +merge4 (int a, int b, int c, int d) > +{ > + return ((long long) a << 48 | (long long) b << 32 > + | (long long) c << 16 | (long long) d); > +} > + > +int main () > +{ > + if (foo () != merge4 (0xffff, 0xbeef, 0xcafe, 0xbabe)) > + abort (); > + return 0; > +} > + > +/* { dg-final { cleanup-saved-temps } } */ >