On 07/08/14 13:57, Kyrill Tkachov wrote:
> 
> On 07/08/14 13:46, Richard Earnshaw wrote:
>> On 07/08/14 12:32, Kyrill Tkachov wrote:
>>> On 16/05/14 13:35, Richard Earnshaw wrote:
>>>> On 08/05/14 18:36, Ian Bolton wrote:
>>>>> Hi,
>>>>>
>>>>> It currently takes 4 instructions to generate certain immediates on
>>>>> AArch64 (unless we put them in the constant pool).
>>>>>
>>>>> For example ...
>>>>>
>>>>>     long long
>>>>>     ffffbeefcafebabe ()
>>>>>     {
>>>>>       return 0xFFFFBEEFCAFEBABEll;
>>>>>     }
>>>>>
>>>>> leads to ...
>>>>>
>>>>>     mov x0, 0x47806
>>>>>     mov x0, 0xcafe, lsl 16
>>>>>     mov x0, 0xbeef, lsl 32
>>>>>     orr x0, x0, -281474976710656
>>>>>
>>>>> The above case is tackled in this patch by employing MOVN
>>>>> to generate the top 32-bits in a single instruction ...
>>>>>
>>>>>     mov x0, -71536975282177
>>>>>     movk x0, 0xcafe, lsl 16
>>>>>     movk x0, 0xbabe, lsl 0
>>>>>
>>>>> Note that where at least two half-words are 0xffff, existing
>>>>> code that does the immediate in two instructions is still used.)
>>>>>
>>>>> Tested on standard gcc regressions and the attached test case.
>>>>>
>>>>> OK for commit?
>>>> What about:
>>>>
>>>> long long a()
>>>> {
>>>>     return 0x1234ffff56789abcll;
>>>> }
>>>>
>>>> long long b()
>>>> {
>>>>     return 0x12345678ffff9abcll;
>>>> }
>>>>
>>>> long long c()
>>>> {
>>>>     return 0x123456789abcffffll;
>>>> }
>>>>
>>>> ?
>>>>
>>>> Surely these can also benefit from this sort of optimization, but it
>>>> looks as though you only handle the top 16 bits being set.
>>> Hi Richard,
>>>
>>> How about this rework of the patch?
>>>
>>> For code:
>>>
>>> long long foo ()
>>> {
>>>     return 0xFFFFBEEFCAFEBABEll;
>>> }
>>>
>>> long long a()
>>> {
>>>     return 0x1234ffff56789abcll;
>>> }
>>>
>>> long long b()
>>> {
>>>     return 0x12345678ffff9abcll;
>>> }
>>>
>>> long long c()
>>> {
>>>     return 0x123456789abcffffll;
>>> }
>>>
>>> we now generate:
>>> foo:
>>>           mov     x0, -17730
>>>           movk    x0, 0xcafe, lsl 16
>>>           movk    x0, 0xbeef, lsl 32
>>>           ret
>>>           .size   foo, .-foo
>>>           .align  2
>>>           .global a
>>>           .type   a, %function
>>> a:
>>>           mov     x0, -25924
>>>           movk    x0, 0x5678, lsl 16
>>>           movk    x0, 0x1234, lsl 48
>>>           ret
>>>           .size   a, .-a
>>>           .align  2
>>>           .global b
>>>           .type   b, %function
>>> b:
>>>           mov     x0, -25924
>>>           movk    x0, 0x5678, lsl 32
>>>           movk    x0, 0x1234, lsl 48
>>>           ret
>>>           .size   b, .-b
>>>           .align  2
>>>           .global c
>>>           .type   c, %function
>>> c:
>>>           mov     x0, -1698889729
>>>           movk    x0, 0x5678, lsl 32
>>>           movk    x0, 0x1234, lsl 48
>>>           ret
>>>
>>>
>>> 3 instructions are used in each case.
>>>
>>> Thanks,
>>> Kyrill
>>>
>>> 2014-08-07  Ian Bolton  <ian.bol...@arm.com>
>>>                       Kyrylo Tkachov  <kyrylo.tkac...@arm.com>
>>>
>>>           * config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
>>>           Use MOVN when one of the half-words is 0xffff.
>>>
>>>
>>> aarch64-movn-pattern-patch-v3.patch
>>>
>>>
>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>>> index 0a7f441..2db91c7 100644
>>> --- a/gcc/config/aarch64/aarch64.c
>>> +++ b/gcc/config/aarch64/aarch64.c
>>> @@ -1005,7 +1005,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
>>>     unsigned HOST_WIDE_INT val;
>>>     bool subtargets;
>>>     rtx subtarget;
>>> -  int one_match, zero_match;
>>> +  int one_match, zero_match, first_not_ffff_match;
>>>   
>>>     gcc_assert (mode == SImode || mode == DImode);
>>>   
>>> @@ -1106,29 +1106,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
>>>     one_match = 0;
>>>     zero_match = 0;
>>>     mask = 0xffff;
>>> +  first_not_ffff_match = -1;
>>>   
>>>     for (i = 0; i < 64; i += 16, mask <<= 16)
>>>       {
>>> -      if ((val & mask) == 0)
>>> -   zero_match++;
>>> -      else if ((val & mask) == mask)
>>> +      if ((val & mask) == mask)
>>>     one_match++;
>>> +      else
>>> +   {
>>> +     if (first_not_ffff_match < 0)
>>> +       first_not_ffff_match = i;
>>> +     if ((val & mask) == 0)
>>> +       zero_match++;
>>> +   }
>>>       }
>>>   
>>>     if (one_match == 2)
>>>       {
>>> -      mask = 0xffff;
>>> -      for (i = 0; i < 64; i += 16, mask <<= 16)
>>> +      /* Set one of the quarters and then insert back into result.  */
>>> +      mask = 0xffffll << first_not_ffff_match;
>>> +      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
>>> +      emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
>>> +                            GEN_INT ((val >> first_not_ffff_match)
>>> +                                     & 0xffff)));
>>> +      return;
>>> +    }
>>> +
>>> +  if (one_match == 1)
>> I think this should be (one_match > zero_match).
>>
>> Otherwise constants such as
>>
>>
>>    0x00001234ffff0000ll
>>
>> might end up taking three rather than two insns.
> 
> You're right, we generate:
>          mov     x0, -65536
>          movk    x0, 0x1234, lsl 32
>          and     x0, x0, 281474976710655
> 
> with your suggestion we can improve this to:
>          mov     x0, 4294901760
>          movk    x0, 0x1234, lsl 32
> 
> Ok with that change then?
> 
> Kyrill
> 
> 2014-08-07  Ian Bolton<ian.bol...@arm.com>
>              Kyrylo Tkachov<kyrylo.tkac...@arm.com>
> 
>           * config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
>           Use MOVN when one of the half-words is 0xffff.
> 
> 

OK.

R.


Reply via email to