This patch resolves the failure of pr43644-2.c in the testsuite, a code quality test I added back in July, that started failing as the code GCC generates for 128-bit values (and their parameter passing) has been in flux. After a few attempts at tweaking pattern constraints in the hope of convincing reload to produce a more aggressive (but potentially unsafe) register allocation, I think the best solution is to use a peephole2 to catch/clean-up this specific case.
Specifically, the function: unsigned __int128 foo(unsigned __int128 x, unsigned long long y) { return x+y; } currently generates: foo: movq %rdx, %rcx movq %rdi, %rax movq %rsi, %rdx addq %rcx, %rax adcq $0, %rdx ret and with this patch/peephole2 now generates: foo: movq %rdx, %rax movq %rsi, %rdx addq %rdi, %rax adcq $0, %rdx ret which I believe is optimal. This patch has been tested on x86_64-pc-linux-gnu with make bootstrap and make -k check, both with and without --target_board=unix{-m32} with no new failures. Ok for mainline? 2023-12-21 Roger Sayle <ro...@nextmovesoftware.com> gcc/ChangeLog PR target/43644 * config/i386/i386.md (define_peephole2): Tweak register allocation of *add<dwi>3_doubleword_concat_zext. gcc/testsuite/ChangeLog PR target/43644 * gcc.target/i386/pr43644-2.c: Expect 2 movq instructions. Thanks in advance, and for your patience with this testsuite noise. Roger --
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e862368..5967208 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6428,6 +6428,38 @@ (clobber (reg:CC FLAGS_REG))])] "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);") +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "general_reg_operand")) + (set (match_operand:SWI48 2 "general_reg_operand") + (match_operand:SWI48 3 "general_reg_operand")) + (set (match_dup 1) (match_operand:SWI48 4 "general_reg_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI48 (match_dup 2) (match_dup 0)) + (match_dup 2))) + (set (match_dup 2) + (plus:SWI48 (match_dup 2) (match_dup 0)))])] + "REGNO (operands[0]) != REGNO (operands[1]) + && REGNO (operands[0]) != REGNO (operands[2]) + && REGNO (operands[0]) != REGNO (operands[3]) + && REGNO (operands[0]) != REGNO (operands[4]) + && REGNO (operands[1]) != REGNO (operands[2]) + && REGNO (operands[1]) != REGNO (operands[3]) + && REGNO (operands[1]) != REGNO (operands[4]) + && REGNO (operands[2]) != REGNO (operands[3]) + && REGNO (operands[2]) != REGNO (operands[4]) + && REGNO (operands[3]) != REGNO (operands[4]) + && peep2_reg_dead_p (4, operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 1) (match_dup 4)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI48 (match_dup 2) (match_dup 3)) + (match_dup 2))) + (set (match_dup 2) + (plus:SWI48 (match_dup 2) (match_dup 3)))])]) + (define_insn "*add<mode>_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r,r") (plus:SWI48 diff --git a/gcc/testsuite/gcc.target/i386/pr43644-2.c b/gcc/testsuite/gcc.target/i386/pr43644-2.c index d470b0a..3316ac6 100644 --- a/gcc/testsuite/gcc.target/i386/pr43644-2.c +++ b/gcc/testsuite/gcc.target/i386/pr43644-2.c @@ -6,4 +6,4 @@ unsigned __int128 foo(unsigned __int128 x, unsigned long long y) return x+y; } -/* { dg-final { scan-assembler-times "movq" 1 } } */ +/* { dg-final { scan-assembler-times "movq" 2 } } */