Many of the constants which are generated using 3 `mov` instruction can be
generated using `mov` plus `sub` instruction. The patch uses following method
to add the mentioned functionality. If a constant `val` can not be generated
using 1 or 2 `mov`, then find `val1` such that `(val + val1)` can be generated
in 1 `mov`. Generate a `sub` instruction to get the original value.

Consider the function:

long f1 (void)
{
  return 0xFFFFFFFF0001FFFF - 0x00123000;
}

GCC output:
f1():
        mov     x0, -12289
        movk    x0, 0xffef, lsl 16
        movk    x0, 0xfffe, lsl 32
        ret

New output:
f1():
        mov     x0, -4294967297
        sub     x0, x0, #1060864
        ret

This patch is for GCC 16.

gcc/ChangeLog:

        * config/aarch64/aarch64.cc
        (aarch64_internal_mov_immediate): Add implementation to generate
        an immediate using mov plus sub instruction.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/pr114528.c: New test.

Signed-off-by: Eikansh Gupta <quic_eikag...@quicinc.com>
---
 gcc/config/aarch64/aarch64.cc               | 44 ++++++++++++++++-
 gcc/testsuite/gcc.target/aarch64/pr114528.c | 53 +++++++++++++++++++++
 2 files changed, 95 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr114528.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index fe76730b0a7..9076086d92f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -4604,13 +4604,53 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool 
generate,
          }
     }
 
+  /* Try a mov plus sub to generate the immediate in 2 instructions. */
+  /* Check number of instructions required to generate the immediate. */
+  num_insns = 1;
+  mask = 0xffff;
+  val2 = one_match > zero_match ? ~val : val;
+  i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
+
+  for (i += 16; i < 64; i += 16)
+    {
+      if ((val2 & (mask << i)) == 0)
+       continue;
+      num_insns ++;
+    }
+
+  /* Generate mov plus sub if mov plus movk can't be generated.
+     Find val3 such that aarch64_move_imm (val + val3) == 1. */
+  if (num_insns > 2)
+    {
+      for (i = 1; i < 4096; i++)
+       {
+         if (aarch64_move_imm (val + i, mode))
+           {
+             val3 = i;
+             break;
+           }
+         if (aarch64_move_imm (val + (i << 12), mode))
+           {
+             val3 = (i << 12);
+             break;
+           }
+       }
+      if (i < 4096)
+       {
+         if (generate)
+           {
+             emit_insn (gen_rtx_SET (dest, GEN_INT (val + val3)));
+             emit_insn (gen_adddi3 (dest, dest, GEN_INT (-val3)));
+           }
+         return 2;
+       }
+    }
+
   /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
      are emitted by the initial mov.  If one_match > zero_match, skip set bits,
      otherwise skip zero bits.  */
 
   num_insns = 1;
-  mask = 0xffff;
-  val2 = one_match > zero_match ? ~val : val;
   i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
 
   if (generate)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr114528.c 
b/gcc/testsuite/gcc.target/aarch64/pr114528.c
new file mode 100644
index 00000000000..5cbab6a644f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr114528.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** f1:
+**     mov     x0, -4294967297
+**     sub     x0, x0, #1060864
+**     ret
+*/
+
+long f1 (void)
+{
+  return 0xFFFFFFFF0001FFFF - 0x00123000;
+}
+
+/*
+** f2:
+**     mov     x0, 82190693199511551
+**     sub     x0, x0, #4546560
+**     ret
+*/
+
+long f2 (void)
+{
+  return 0x0123FFFFFFFFFFFF - 0x00456000;
+}
+
+/*
+** f3:
+**     mov     x0, 188896956645376
+**     sub     x0, x0, #13492224
+**     ret
+*/
+
+long f3 (void)
+{
+  return 0x0000ABCD00000000 - 0x00CDE000;
+}
+
+/* If a constant can be generated with 2 mov instruction,
+   mov+sub should not get generated. */
+/*
+** f4:
+**     mov     x0, -292
+**     movk    x0, 0x1, lsl 16
+**     ret
+*/
+
+long f4 (void)
+{
+  return 0xFFFFFFFF0001FFFF - 0x00123;
+}
-- 
2.17.1

Reply via email to