On 07/08/14 13:46, Richard Earnshaw wrote:
On 07/08/14 12:32, Kyrill Tkachov wrote:
On 16/05/14 13:35, Richard Earnshaw wrote:
On 08/05/14 18:36, Ian Bolton wrote:
Hi,

It currently takes 4 instructions to generate certain immediates on
AArch64 (unless we put them in the constant pool).

For example ...

    long long
    ffffbeefcafebabe ()
    {
      return 0xFFFFBEEFCAFEBABEll;
    }

leads to ...

    mov x0, 0x47806
    mov x0, 0xcafe, lsl 16
    mov x0, 0xbeef, lsl 32
    orr x0, x0, -281474976710656

The above case is tackled in this patch by employing MOVN
to generate the top 32-bits in a single instruction ...

    mov x0, -71536975282177
    movk x0, 0xcafe, lsl 16
    movk x0, 0xbabe, lsl 0

Note that where at least two half-words are 0xffff, existing
code that does the immediate in two instructions is still used.)

Tested on standard gcc regressions and the attached test case.

OK for commit?
What about:

long long a()
{
    return 0x1234ffff56789abcll;
}

long long b()
{
    return 0x12345678ffff9abcll;
}

long long c()
{
    return 0x123456789abcffffll;
}

?

Surely these can also benefit from this sort of optimization, but it
looks as though you only handle the top 16 bits being set.
Hi Richard,

How about this rework of the patch?

For code:

long long foo ()
{
    return 0xFFFFBEEFCAFEBABEll;
}

long long a()
{
    return 0x1234ffff56789abcll;
}

long long b()
{
    return 0x12345678ffff9abcll;
}

long long c()
{
    return 0x123456789abcffffll;
}

we now generate:
foo:
          mov     x0, -17730
          movk    x0, 0xcafe, lsl 16
          movk    x0, 0xbeef, lsl 32
          ret
          .size   foo, .-foo
          .align  2
          .global a
          .type   a, %function
a:
          mov     x0, -25924
          movk    x0, 0x5678, lsl 16
          movk    x0, 0x1234, lsl 48
          ret
          .size   a, .-a
          .align  2
          .global b
          .type   b, %function
b:
          mov     x0, -25924
          movk    x0, 0x5678, lsl 32
          movk    x0, 0x1234, lsl 48
          ret
          .size   b, .-b
          .align  2
          .global c
          .type   c, %function
c:
          mov     x0, -1698889729
          movk    x0, 0x5678, lsl 32
          movk    x0, 0x1234, lsl 48
          ret


3 instructions are used in each case.

Thanks,
Kyrill

2014-08-07  Ian Bolton  <ian.bol...@arm.com>
                      Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

          * config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
          Use MOVN when one of the half-words is 0xffff.


aarch64-movn-pattern-patch-v3.patch


diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0a7f441..2db91c7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1005,7 +1005,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
    unsigned HOST_WIDE_INT val;
    bool subtargets;
    rtx subtarget;
-  int one_match, zero_match;
+  int one_match, zero_match, first_not_ffff_match;
gcc_assert (mode == SImode || mode == DImode); @@ -1106,29 +1106,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
    one_match = 0;
    zero_match = 0;
    mask = 0xffff;
+  first_not_ffff_match = -1;
for (i = 0; i < 64; i += 16, mask <<= 16)
      {
-      if ((val & mask) == 0)
-       zero_match++;
-      else if ((val & mask) == mask)
+      if ((val & mask) == mask)
        one_match++;
+      else
+       {
+         if (first_not_ffff_match < 0)
+           first_not_ffff_match = i;
+         if ((val & mask) == 0)
+           zero_match++;
+       }
      }
if (one_match == 2)
      {
-      mask = 0xffff;
-      for (i = 0; i < 64; i += 16, mask <<= 16)
+      /* Set one of the quarters and then insert back into result.  */
+      mask = 0xffffll << first_not_ffff_match;
+      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
+      emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
+                                GEN_INT ((val >> first_not_ffff_match)
+                                         & 0xffff)));
+      return;
+    }
+
+  if (one_match == 1)
I think this should be (one_match > zero_match).

Otherwise constants such as


   0x00001234ffff0000ll

might end up taking three rather than two insns.

You're right, we generate:
        mov     x0, -65536
        movk    x0, 0x1234, lsl 32
        and     x0, x0, 281474976710655

with your suggestion we can improve this to:
        mov     x0, 4294901760
        movk    x0, 0x1234, lsl 32

Ok with that change then?

Kyrill

2014-08-07  Ian Bolton<ian.bol...@arm.com>
            Kyrylo Tkachov<kyrylo.tkac...@arm.com>

         * config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
         Use MOVN when one of the half-words is 0xffff.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0a7f441..2db91c7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1005,7 +1005,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
   unsigned HOST_WIDE_INT val;
   bool subtargets;
   rtx subtarget;
-  int one_match, zero_match;
+  int one_match, zero_match, first_not_ffff_match;
 
   gcc_assert (mode == SImode || mode == DImode);
 
@@ -1106,29 +1106,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
   one_match = 0;
   zero_match = 0;
   mask = 0xffff;
+  first_not_ffff_match = -1;
 
   for (i = 0; i < 64; i += 16, mask <<= 16)
     {
-      if ((val & mask) == 0)
-	zero_match++;
-      else if ((val & mask) == mask)
+      if ((val & mask) == mask)
 	one_match++;
+      else
+	{
+	  if (first_not_ffff_match < 0)
+	    first_not_ffff_match = i;
+	  if ((val & mask) == 0)
+	    zero_match++;
+	}
     }
 
   if (one_match == 2)
     {
-      mask = 0xffff;
-      for (i = 0; i < 64; i += 16, mask <<= 16)
+      /* Set one of the quarters and then insert back into result.  */
+      mask = 0xffffll << first_not_ffff_match;
+      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
+      emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
+				 GEN_INT ((val >> first_not_ffff_match)
+					  & 0xffff)));
+      return;
+    }
+
+  if (one_match > zero_match)
+    {
+      /* Set either first three quarters or all but the third.	 */
+      mask = 0xffffll << (16 - first_not_ffff_match);
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      GEN_INT (val | mask | 0xffffffff00000000ull)));
+
+      /* Now insert other two quarters.	 */
+      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
+	   i < 64; i += 16, mask <<= 16)
 	{
 	  if ((val & mask) != mask)
-	    {
-	      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
-	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-					 GEN_INT ((val >> i) & 0xffff)));
-	      return;
-	    }
+	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+				       GEN_INT ((val >> i) & 0xffff)));
 	}
-      gcc_unreachable ();
+      return;
     }
 
   if (zero_match == 2)

Reply via email to