On 07/08/14 12:32, Kyrill Tkachov wrote:
> 
> On 16/05/14 13:35, Richard Earnshaw wrote:
>> On 08/05/14 18:36, Ian Bolton wrote:
>>> Hi,
>>>
>>> It currently takes 4 instructions to generate certain immediates on
>>> AArch64 (unless we put them in the constant pool).
>>>
>>> For example ...
>>>
>>>    long long
>>>    ffffbeefcafebabe ()
>>>    {
>>>      return 0xFFFFBEEFCAFEBABEll;
>>>    }
>>>
>>> leads to ...
>>>
>>>    mov x0, 0x47806
>>>    mov x0, 0xcafe, lsl 16
>>>    mov x0, 0xbeef, lsl 32
>>>    orr x0, x0, -281474976710656
>>>
>>> The above case is tackled in this patch by employing MOVN
>>> to generate the top 32-bits in a single instruction ...
>>>
>>>    mov x0, -71536975282177
>>>    movk x0, 0xcafe, lsl 16
>>>    movk x0, 0xbabe, lsl 0
>>>
>>> Note that where at least two half-words are 0xffff, existing
>>> code that does the immediate in two instructions is still used.)
>>>
>>> Tested on standard gcc regressions and the attached test case.
>>>
>>> OK for commit?
>> What about:
>>
>> long long a()
>> {
>>    return 0x1234ffff56789abcll;
>> }
>>
>> long long b()
>> {
>>    return 0x12345678ffff9abcll;
>> }
>>
>> long long c()
>> {
>>    return 0x123456789abcffffll;
>> }
>>
>> ?
>>
>> Surely these can also benefit from this sort of optimization, but it
>> looks as though you only handle the top 16 bits being set.
> 
> Hi Richard,
> 
> How about this rework of the patch?
> 
> For code:
> 
> long long foo ()
> {
>    return 0xFFFFBEEFCAFEBABEll;
> }
> 
> long long a()
> {
>    return 0x1234ffff56789abcll;
> }
> 
> long long b()
> {
>    return 0x12345678ffff9abcll;
> }
> 
> long long c()
> {
>    return 0x123456789abcffffll;
> }
> 
> we now generate:
> foo:
>          mov     x0, -17730
>          movk    x0, 0xcafe, lsl 16
>          movk    x0, 0xbeef, lsl 32
>          ret
>          .size   foo, .-foo
>          .align  2
>          .global a
>          .type   a, %function
> a:
>          mov     x0, -25924
>          movk    x0, 0x5678, lsl 16
>          movk    x0, 0x1234, lsl 48
>          ret
>          .size   a, .-a
>          .align  2
>          .global b
>          .type   b, %function
> b:
>          mov     x0, -25924
>          movk    x0, 0x5678, lsl 32
>          movk    x0, 0x1234, lsl 48
>          ret
>          .size   b, .-b
>          .align  2
>          .global c
>          .type   c, %function
> c:
>          mov     x0, -1698889729
>          movk    x0, 0x5678, lsl 32
>          movk    x0, 0x1234, lsl 48
>          ret
> 
> 
> 3 instructions are used in each case.
> 
> Thanks,
> Kyrill
> 
> 2014-08-07  Ian Bolton  <ian.bol...@arm.com>
>                      Kyrylo Tkachov  <kyrylo.tkac...@arm.com>
> 
>          * config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
>          Use MOVN when one of the half-words is 0xffff.
> 
> 
> aarch64-movn-pattern-patch-v3.patch
> 
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 0a7f441..2db91c7 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1005,7 +1005,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
>    unsigned HOST_WIDE_INT val;
>    bool subtargets;
>    rtx subtarget;
> -  int one_match, zero_match;
> +  int one_match, zero_match, first_not_ffff_match;
>  
>    gcc_assert (mode == SImode || mode == DImode);
>  
> @@ -1106,29 +1106,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
>    one_match = 0;
>    zero_match = 0;
>    mask = 0xffff;
> +  first_not_ffff_match = -1;
>  
>    for (i = 0; i < 64; i += 16, mask <<= 16)
>      {
> -      if ((val & mask) == 0)
> -     zero_match++;
> -      else if ((val & mask) == mask)
> +      if ((val & mask) == mask)
>       one_match++;
> +      else
> +     {
> +       if (first_not_ffff_match < 0)
> +         first_not_ffff_match = i;
> +       if ((val & mask) == 0)
> +         zero_match++;
> +     }
>      }
>  
>    if (one_match == 2)
>      {
> -      mask = 0xffff;
> -      for (i = 0; i < 64; i += 16, mask <<= 16)
> +      /* Set one of the quarters and then insert back into result.  */
> +      mask = 0xffffll << first_not_ffff_match;
> +      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
> +      emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
> +                              GEN_INT ((val >> first_not_ffff_match)
> +                                       & 0xffff)));
> +      return;
> +    }
> +
> +  if (one_match == 1)

I think this should be (one_match > zero_match).

Otherwise constants such as


  0x00001234ffff0000ll

might end up taking three rather than two insns.

R.

> +    {
> +      /* Set either first three quarters or all but the third.        */
> +      mask = 0xffffll << (16 - first_not_ffff_match);
> +      emit_insn (gen_rtx_SET (VOIDmode, dest,
> +                           GEN_INT (val | mask | 0xffffffff00000000ull)));
> +
> +      /* Now insert other two quarters.       */
> +      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 
> 1);
> +        i < 64; i += 16, mask <<= 16)
>       {
>         if ((val & mask) != mask)
> -         {
> -           emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
> -           emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> -                                      GEN_INT ((val >> i) & 0xffff)));
> -           return;
> -         }
> +         emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> +                                    GEN_INT ((val >> i) & 0xffff)));
>       }
> -      gcc_unreachable ();
> +      return;
>      }
>  
>    if (zero_match == 2)
> 


Reply via email to