On 07/08/14 13:46, Richard Earnshaw wrote:
On 07/08/14 12:32, Kyrill Tkachov wrote:
On 16/05/14 13:35, Richard Earnshaw wrote:
On 08/05/14 18:36, Ian Bolton wrote:
Hi,
It currently takes 4 instructions to generate certain immediates on
AArch64 (unless we put them in the constant pool).
For example ...
long long
ffffbeefcafebabe ()
{
return 0xFFFFBEEFCAFEBABEll;
}
leads to ...
mov x0, 0x47806
mov x0, 0xcafe, lsl 16
mov x0, 0xbeef, lsl 32
orr x0, x0, -281474976710656
The above case is tackled in this patch by employing MOVN
to generate the top 32-bits in a single instruction ...
mov x0, -71536975282177
movk x0, 0xcafe, lsl 16
movk x0, 0xbabe, lsl 0
Note that where at least two half-words are 0xffff, existing
code that does the immediate in two instructions is still used.)
Tested on standard gcc regressions and the attached test case.
OK for commit?
What about:
long long a()
{
return 0x1234ffff56789abcll;
}
long long b()
{
return 0x12345678ffff9abcll;
}
long long c()
{
return 0x123456789abcffffll;
}
?
Surely these can also benefit from this sort of optimization, but it
looks as though you only handle the top 16 bits being set.
Hi Richard,
How about this rework of the patch?
For code:
long long foo ()
{
return 0xFFFFBEEFCAFEBABEll;
}
long long a()
{
return 0x1234ffff56789abcll;
}
long long b()
{
return 0x12345678ffff9abcll;
}
long long c()
{
return 0x123456789abcffffll;
}
we now generate:
foo:
mov x0, -17730
movk x0, 0xcafe, lsl 16
movk x0, 0xbeef, lsl 32
ret
.size foo, .-foo
.align 2
.global a
.type a, %function
a:
mov x0, -25924
movk x0, 0x5678, lsl 16
movk x0, 0x1234, lsl 48
ret
.size a, .-a
.align 2
.global b
.type b, %function
b:
mov x0, -25924
movk x0, 0x5678, lsl 32
movk x0, 0x1234, lsl 48
ret
.size b, .-b
.align 2
.global c
.type c, %function
c:
mov x0, -1698889729
movk x0, 0x5678, lsl 32
movk x0, 0x1234, lsl 48
ret
3 instructions are used in each case.
Thanks,
Kyrill
2014-08-07 Ian Bolton <ian.bol...@arm.com>
Kyrylo Tkachov <kyrylo.tkac...@arm.com>
* config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
Use MOVN when one of the half-words is 0xffff.
aarch64-movn-pattern-patch-v3.patch
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0a7f441..2db91c7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1005,7 +1005,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
unsigned HOST_WIDE_INT val;
bool subtargets;
rtx subtarget;
- int one_match, zero_match;
+ int one_match, zero_match, first_not_ffff_match;
gcc_assert (mode == SImode || mode == DImode);
@@ -1106,29 +1106,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
one_match = 0;
zero_match = 0;
mask = 0xffff;
+ first_not_ffff_match = -1;
for (i = 0; i < 64; i += 16, mask <<= 16)
{
- if ((val & mask) == 0)
- zero_match++;
- else if ((val & mask) == mask)
+ if ((val & mask) == mask)
one_match++;
+ else
+ {
+ if (first_not_ffff_match < 0)
+ first_not_ffff_match = i;
+ if ((val & mask) == 0)
+ zero_match++;
+ }
}
if (one_match == 2)
{
- mask = 0xffff;
- for (i = 0; i < 64; i += 16, mask <<= 16)
+ /* Set one of the quarters and then insert back into result. */
+ mask = 0xffffll << first_not_ffff_match;
+ emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
+ GEN_INT ((val >> first_not_ffff_match)
+ & 0xffff)));
+ return;
+ }
+
+ if (one_match == 1)
I think this should be (one_match > zero_match).
Otherwise constants such as
0x00001234ffff0000ll
might end up taking three rather than two insns.
You're right, we generate:
mov x0, -65536
movk x0, 0x1234, lsl 32
and x0, x0, 281474976710655
with your suggestion we can improve this to:
mov x0, 4294901760
movk x0, 0x1234, lsl 32
Ok with that change then?
Kyrill
2014-08-07 Ian Bolton<ian.bol...@arm.com>
Kyrylo Tkachov<kyrylo.tkac...@arm.com>
* config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
Use MOVN when one of the half-words is 0xffff.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0a7f441..2db91c7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1005,7 +1005,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
unsigned HOST_WIDE_INT val;
bool subtargets;
rtx subtarget;
- int one_match, zero_match;
+ int one_match, zero_match, first_not_ffff_match;
gcc_assert (mode == SImode || mode == DImode);
@@ -1106,29 +1106,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
one_match = 0;
zero_match = 0;
mask = 0xffff;
+ first_not_ffff_match = -1;
for (i = 0; i < 64; i += 16, mask <<= 16)
{
- if ((val & mask) == 0)
- zero_match++;
- else if ((val & mask) == mask)
+ if ((val & mask) == mask)
one_match++;
+ else
+ {
+ if (first_not_ffff_match < 0)
+ first_not_ffff_match = i;
+ if ((val & mask) == 0)
+ zero_match++;
+ }
}
if (one_match == 2)
{
- mask = 0xffff;
- for (i = 0; i < 64; i += 16, mask <<= 16)
+ /* Set one of the quarters and then insert back into result. */
+ mask = 0xffffll << first_not_ffff_match;
+ emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
+ GEN_INT ((val >> first_not_ffff_match)
+ & 0xffff)));
+ return;
+ }
+
+ if (one_match > zero_match)
+ {
+ /* Set either first three quarters or all but the third. */
+ mask = 0xffffll << (16 - first_not_ffff_match);
+ emit_insn (gen_rtx_SET (VOIDmode, dest,
+ GEN_INT (val | mask | 0xffffffff00000000ull)));
+
+ /* Now insert other two quarters. */
+ for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
+ i < 64; i += 16, mask <<= 16)
{
if ((val & mask) != mask)
- {
- emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
- return;
- }
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
}
- gcc_unreachable ();
+ return;
}
if (zero_match == 2)