Improve immediate expansion of immediates which can be created from a
bitmask immediate and 2 MOVKs. This reduces the number of 4-instruction
immediates in SPECINT/FP by 10-15%.
Passes regress, OK for commit?
gcc/ChangeLog:
PR target/106583
* config/aarch64/aarch64.cc (aarch64_internal_mov_immediate)
Add support for a bitmask immediate with 2 MOVKs.
gcc/testsuite:
PR target/106583
* gcc.target/aarch64/pr106583.c: Add new test.
---
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index
926e81f028c82aac9a5fecc18f921f84399c24ae..1601d11710cb6132c80a77bb4fe2f8429519aa5a
100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -5568,7 +5568,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool
generate,
one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
- if (zero_match != 2 && one_match != 2)
+ if (zero_match < 2 && one_match < 2)
{
/* Try emitting a bitmask immediate with a movk replacing 16 bits.
For a 64-bit bitmask try whether changing 16 bits to all ones or
@@ -5600,6 +5600,43 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool
generate,
}
}
+ /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */
+ if (zero_match + one_match == 0)
+ {
+ mask = 0xffffffff;
+
+ for (i = 0; i < 64; i += 16)
+ {
+ val2 = val & ~mask;
+ if (aarch64_bitmask_imm (val2, mode))
+ break;
+ val2 = val | mask;
+ if (aarch64_bitmask_imm (val2, mode))
+ break;
+ val2 = val2 & ~mask;
+ val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
+ if (aarch64_bitmask_imm (val2, mode))
+ break;
+
+ mask = (mask << 16) | (mask >> 48);
+ }
+
+ if (i != 64)
+ {
+ if (generate)
+ {
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+ i = (i + 16) & 63;
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+ }
+
+ return 3;
+ }
+ }
+
/* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
are emitted by the initial mov. If one_match > zero_match, skip set bits,
otherwise skip zero bits. */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c
b/gcc/testsuite/gcc.target/aarch64/pr106583.c
new file mode 100644
index
0000000000000000000000000000000000000000..f0a027a0950e506d4ddaacce5e151f57070948dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr106583.c
@@ -0,0 +1,30 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps" } */
+
+long f1 (void)
+{
+ return 0x7efefefefefefeff;
+}
+
+long f2 (void)
+{
+ return 0x12345678aaaaaaaa;
+}
+
+long f3 (void)
+{
+ return 0x1234cccccccc5678;
+}
+
+long f4 (void)
+{
+ return 0x7777123456787777;
+}
+
+long f5 (void)
+{
+ return 0x5555555512345678;
+}
+
+/* { dg-final { scan-assembler-times {\tmovk\t} 10 } } */
+/* { dg-final { scan-assembler-times {\tmov\t} 5 } } */