Many of the constants which are generated using 3 `mov` instruction can be generated using `mov` plus `sub` instruction. The patch uses following method to add the mentioned functionality. If a constant `val` can not be generated using 1 or 2 `mov`, then find `val1` such that `(val + val1)` can be generated in 1 `mov`. Generate a `sub` instruction to get the original value.
Consider the function: long f1 (void) { return 0xFFFFFFFF0001FFFF - 0x00123000; } GCC output: f1(): mov x0, -12289 movk x0, 0xffef, lsl 16 movk x0, 0xfffe, lsl 32 ret New output: f1(): mov x0, -4294967297 sub x0, x0, #1060864 ret This patch is for GCC 16. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_internal_mov_immediate): Add implementation to generate an immediate using mov plus sub instruction. gcc/testsuite/ChangeLog: * gcc.target/aarch64/pr114528.c: New test. Signed-off-by: Eikansh Gupta <quic_eikag...@quicinc.com> --- gcc/config/aarch64/aarch64.cc | 44 ++++++++++++++++- gcc/testsuite/gcc.target/aarch64/pr114528.c | 53 +++++++++++++++++++++ 2 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr114528.c diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index fe76730b0a7..9076086d92f 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -4604,13 +4604,53 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, } } + /* Try a mov plus sub to generate the immediate in 2 instructions. */ + /* Check number of instructions required to generate the immediate. */ + num_insns = 1; + mask = 0xffff; + val2 = one_match > zero_match ? ~val : val; + i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32; + + for (i += 16; i < 64; i += 16) + { + if ((val2 & (mask << i)) == 0) + continue; + num_insns ++; + } + + /* Generate mov plus sub if mov plus movk can't be generated. + Find val3 such that aarch64_move_imm (val + val3) == 1. */ + if (num_insns > 2) + { + for (i = 1; i < 4096; i++) + { + if (aarch64_move_imm (val + i, mode)) + { + val3 = i; + break; + } + if (aarch64_move_imm (val + (i << 12), mode)) + { + val3 = (i << 12); + break; + } + } + if (i < 4096) + { + if (generate) + { + emit_insn (gen_rtx_SET (dest, GEN_INT (val + val3))); + emit_insn (gen_adddi3 (dest, dest, GEN_INT (-val3))); + } + return 2; + } + } + /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which are emitted by the initial mov. If one_match > zero_match, skip set bits, otherwise skip zero bits. */ num_insns = 1; - mask = 0xffff; - val2 = one_match > zero_match ? ~val : val; i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32; if (generate) diff --git a/gcc/testsuite/gcc.target/aarch64/pr114528.c b/gcc/testsuite/gcc.target/aarch64/pr114528.c new file mode 100644 index 00000000000..5cbab6a644f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr114528.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** f1: +** mov x0, -4294967297 +** sub x0, x0, #1060864 +** ret +*/ + +long f1 (void) +{ + return 0xFFFFFFFF0001FFFF - 0x00123000; +} + +/* +** f2: +** mov x0, 82190693199511551 +** sub x0, x0, #4546560 +** ret +*/ + +long f2 (void) +{ + return 0x0123FFFFFFFFFFFF - 0x00456000; +} + +/* +** f3: +** mov x0, 188896956645376 +** sub x0, x0, #13492224 +** ret +*/ + +long f3 (void) +{ + return 0x0000ABCD00000000 - 0x00CDE000; +} + +/* If a constant can be generated with 2 mov instruction, + mov+sub should not get generated. */ +/* +** f4: +** mov x0, -292 +** movk x0, 0x1, lsl 16 +** ret +*/ + +long f4 (void) +{ + return 0xFFFFFFFF0001FFFF - 0x00123; +} -- 2.17.1