https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108787
--- Comment #6 from Jakub Jelinek <jakub at gcc dot gnu.org> --- --- gcc/config/rs6000/rs6000.md.jj 2023-01-16 11:52:16.036734757 +0100 +++ gcc/config/rs6000/rs6000.md 2023-02-14 19:46:13.915782702 +0100 @@ -3231,20 +3231,38 @@ (plus:TI (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand")) (any_extend:TI (match_operand:DI 2 "gpc_reg_operand"))) - (any_extend:TI (match_operand:DI 3 "gpc_reg_operand"))))] + (match_operand:TI 3 "gpc_reg_operand")))] "TARGET_MADDLD && TARGET_POWERPC64" { rtx op0_lo = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 8 : 0); rtx op0_hi = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 0 : 8); + rtx op3_lo = gen_rtx_SUBREG (DImode, operands[3], BYTES_BIG_ENDIAN ? 8 : 0); + rtx op3_hi = gen_rtx_SUBREG (DImode, operands[3], BYTES_BIG_ENDIAN ? 0 : 8); + rtx hi_temp = gen_reg_rtx (DImode); - emit_insn (gen_maddlddi4 (op0_lo, operands[1], operands[2], operands[3])); + emit_insn (gen_maddlddi4 (op0_lo, operands[1], operands[2], op3_lo)); if (BYTES_BIG_ENDIAN) - emit_insn (gen_<u>madddi4_highpart (op0_hi, operands[1], operands[2], - operands[3])); + emit_insn (gen_<u>madddi4_highpart (hi_temp, operands[1], operands[2], + op3_lo)); else - emit_insn (gen_<u>madddi4_highpart_le (op0_hi, operands[1], operands[2], - operands[3])); + emit_insn (gen_<u>madddi4_highpart_le (hi_temp, operands[1], operands[2], + op3_lo)); + + if (<CODE> == SIGN_EXTEND) + { + rtx sgn = gen_reg_rtx (DImode); + rtx hi_temp2 = gen_reg_rtx (DImode); + + emit_insn (gen_lshrdi3 (sgn, op3_lo, GEN_INT (63))); + + emit_insn (gen_adddi3 (hi_temp2, hi_temp, sgn)); + + hi_temp = hi_temp2; + } + + emit_insn (gen_adddi3 (op0_hi, hi_temp, op3_hi)); + DONE; }) gets it functionally correct. But given __attribute__((noipa)) unsigned __int128 foo (unsigned long long x, unsigned long long y, unsigned __int128 z) { return (unsigned __int128) x * y + z; } __attribute__((noipa)) __int128 bar (long long x, long long y, __int128 z) { return (__int128) x * y + z; } __attribute__((noipa)) unsigned __int128 baz (unsigned long long x, unsigned long long y, unsigned long long z) { return (unsigned __int128) x * y + z; } __attribute__((noipa)) __int128 qux (long long x, long long y, long long z) { return (__int128) x * y + z; } we used to emit in GCC 12 4/4/4/5 instructions: mulld 9,3,4 mulhdu 4,3,4 addc 3,9,5 adde 4,4,6 and mulld 9,3,4 mulhd 4,3,4 addc 3,9,5 adde 4,4,6 and mulld 9,3,4 mulhdu 4,3,4 addc 3,9,5 addze 4,4 and mulld 9,3,4 mulhd 4,3,4 sradi 10,5,63 addc 3,9,5 adde 4,4,10 Now, with the patch we get 3/5/3/6 instructions: maddhdu 9,3,4,5 maddld 3,3,4,5 add 4,9,6 and maddhd 9,3,4,5 srdi 10,5,63 maddld 3,3,4,5 add 9,9,10 add 4,9,6 and mr 9,3 maddld 3,3,4,5 maddhdu 4,9,4,5 and maddhd 9,3,4,5 srdi 8,5,63 sradi 10,5,63 maddld 3,3,4,5 add 9,9,8 add 4,9,10 So, unless we can somehow check for the sign extended operands[3], we shouldn't define maddditi3 or FAIL in it or expand it to equivalent of what we used to emit before.