On 19/10/2019 17:31, Segher Boessenkool wrote:
> Hi Richard,
>
> On Fri, Oct 18, 2019 at 08:48:31PM +0100, Richard Earnshaw wrote:
>>
>> This series of patches rewrites all the DImode arithmetic patterns for
>> the Arm backend when compiling for Arm or Thumb2 to split the
>> operations during expand (the thumb1 code is unchanged and cannot
>> benefit from early splitting as we are unable to expose the carry
>> flag).
>
> Very nice :-)
>
> I have a bunch of testcases from when I did something similar for PowerPC
> that I wanted to test... But I cannot get your series to apply. Do you
> have a git repo I can pull from?
>
Perhaps because it's already committed to trunk?
> Here is one test case (it's a bit geared towards what our ISA can do):
>
> ===
> typedef unsigned int u32;
> typedef unsigned long long u64;
>
> u64 add(u64 a, u64 b) { return a + b; }
> u64 add1(u64 a) { return a + 1; }
> u64 add42(u64 a) { return a + 42; }
> u64 addm1(u64 a) { return a - 1; }
> u64 addff(u64 a) { return a + 0xffffffffULL; }
> u64 addH(u64 a) { return a + 0x123400005678ULL; }
> u64 addH0(u64 a) { return a + 0x123400000000ULL; }
> u64 addS(u64 a, u32 b) { return a + b; }
> u64 addSH(u64 a, u32 b) { return a + ((u64)b << 32); }
> u64 addB1(u64 a) { return a + 0x100000000ULL; }
> u64 addB8(u64 a) { return a + 0x800000000ULL; }
>
> u64 addSH42(u64 a, u32 b) { return a + ((u64)b << 32) + 42; }
> u64 addSHm1(u64 a, u32 b) { return a + ((u64)b << 32) - 1; }
> u64 addSHff(u64 a, u32 b) { return a + ((u64)b << 32) + 0xffffffffULL; }
> ===
>
> rs6000 -m32 currently has non-optimal code for addm1, addSHm1; trunk arm
> has non-optimal code for addH0, addSH, addB1, addB8, addSH42, addSHm1, and
> addSHff if I understand well enough. So I'd love to see what it does with
> your series applied :-)
>
>
> Segher
>
We do pretty well on this. Only addSHm1 needs three insns (except where
the constant isn't valid for arm), and I think that's the minimum for
this case anyway. Several of the tests only need one insn.
R.
.arch armv8-a
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file "lltest.c"
.text
.align 2
.global add
.syntax unified
.arm
.fpu softvfp
.type add, %function
add:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
adds r0, r0, r2
adc r1, r1, r3
bx lr
.size add, .-add
.align 2
.global add1
.syntax unified
.arm
.fpu softvfp
.type add1, %function
add1:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
adds r0, r0, #1
adc r1, r1, #0
bx lr
.size add1, .-add1
.align 2
.global add42
.syntax unified
.arm
.fpu softvfp
.type add42, %function
add42:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
adds r0, r0, #42
adc r1, r1, #0
bx lr
.size add42, .-add42
.align 2
.global addm1
.syntax unified
.arm
.fpu softvfp
.type addm1, %function
addm1:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
subs r0, r0, #1
sbc r1, r1, #0
bx lr
.size addm1, .-addm1
.align 2
.global addff
.syntax unified
.arm
.fpu softvfp
.type addff, %function
addff:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
subs r0, r0, #1
adc r1, r1, #0
bx lr
.size addff, .-addff
.align 2
.global addH
.syntax unified
.arm
.fpu softvfp
.type addH, %function
addH:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
movw r3, #22136
adds r0, r0, r3
movw r3, #4660
adc r1, r3, r1
bx lr
.size addH, .-addH
.align 2
.global addH0
.syntax unified
.arm
.fpu softvfp
.type addH0, %function
addH0:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
add r1, r1, #4608
add r1, r1, #52
bx lr
.size addH0, .-addH0
.align 2
.global addS
.syntax unified
.arm
.fpu softvfp
.type addS, %function
addS:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
adds r0, r2, r0
adc r1, r1, #0
bx lr
.size addS, .-addS
.align 2
.global addSH
.syntax unified
.arm
.fpu softvfp
.type addSH, %function
addSH:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
add r1, r2, r1
bx lr
.size addSH, .-addSH
.align 2
.global addB1
.syntax unified
.arm
.fpu softvfp
.type addB1, %function
addB1:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
add r1, r1, #1
bx lr
.size addB1, .-addB1
.align 2
.global addB8
.syntax unified
.arm
.fpu softvfp
.type addB8, %function
addB8:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
add r1, r1, #8
bx lr
.size addB8, .-addB8
.align 2
.global addSH42
.syntax unified
.arm
.fpu softvfp
.type addSH42, %function
addSH42:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
adds r0, r0, #42
adc r1, r1, r2
bx lr
.size addSH42, .-addSH42
.align 2
.global addSHm1
.syntax unified
.arm
.fpu softvfp
.type addSHm1, %function
addSHm1:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
subs r0, r0, #1
sbc r1, r1, #0
add r1, r2, r1
bx lr
.size addSHm1, .-addSHm1
.align 2
.global addSHff
.syntax unified
.arm
.fpu softvfp
.type addSHff, %function
addSHff:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
subs r0, r0, #1
adc r1, r1, r2
bx lr
.size addSHff, .-addSHff
.ident "GCC: (trunk) 10.0.0 20191018 (experimental) [master revision
54f1e150a38:1ddbabe127b:e55e9d95a5ab8397197a5e358ba0185f9471f043]"