https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108787

--- Comment #6 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
--- gcc/config/rs6000/rs6000.md.jj      2023-01-16 11:52:16.036734757 +0100
+++ gcc/config/rs6000/rs6000.md 2023-02-14 19:46:13.915782702 +0100
@@ -3231,20 +3231,38 @@
        (plus:TI
          (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand"))
                   (any_extend:TI (match_operand:DI 2 "gpc_reg_operand")))
-         (any_extend:TI (match_operand:DI 3 "gpc_reg_operand"))))]
+         (match_operand:TI 3 "gpc_reg_operand")))]
   "TARGET_MADDLD && TARGET_POWERPC64"
 {
   rtx op0_lo = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 8 : 0);
   rtx op0_hi = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 0 : 8);
+  rtx op3_lo = gen_rtx_SUBREG (DImode, operands[3], BYTES_BIG_ENDIAN ? 8 : 0);
+  rtx op3_hi = gen_rtx_SUBREG (DImode, operands[3], BYTES_BIG_ENDIAN ? 0 : 8);
+  rtx hi_temp = gen_reg_rtx (DImode);

-  emit_insn (gen_maddlddi4 (op0_lo, operands[1], operands[2], operands[3]));
+  emit_insn (gen_maddlddi4 (op0_lo, operands[1], operands[2], op3_lo));

   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_<u>madddi4_highpart (op0_hi, operands[1], operands[2],
-                                       operands[3]));
+    emit_insn (gen_<u>madddi4_highpart (hi_temp, operands[1], operands[2],
+                                       op3_lo));
   else
-    emit_insn (gen_<u>madddi4_highpart_le (op0_hi, operands[1], operands[2],
-                                          operands[3]));
+    emit_insn (gen_<u>madddi4_highpart_le (hi_temp, operands[1], operands[2],
+                                          op3_lo));
+
+  if (<CODE> == SIGN_EXTEND)
+    {
+      rtx sgn = gen_reg_rtx (DImode);
+      rtx hi_temp2 = gen_reg_rtx (DImode);
+
+      emit_insn (gen_lshrdi3 (sgn, op3_lo, GEN_INT (63)));
+
+      emit_insn (gen_adddi3 (hi_temp2, hi_temp, sgn));
+
+      hi_temp = hi_temp2;
+    }
+
+  emit_insn (gen_adddi3 (op0_hi, hi_temp, op3_hi));
+
   DONE;
 })

gets it functionally correct.
But given
__attribute__((noipa)) unsigned __int128
foo (unsigned long long x, unsigned long long y, unsigned __int128 z)
{
  return (unsigned __int128) x * y + z;
}

__attribute__((noipa)) __int128
bar (long long x, long long y, __int128 z)
{
  return (__int128) x * y + z;
}

__attribute__((noipa)) unsigned __int128
baz (unsigned long long x, unsigned long long y, unsigned long long z)
{
  return (unsigned __int128) x * y + z;
}

__attribute__((noipa)) __int128
qux (long long x, long long y, long long z)
{
  return (__int128) x * y + z;
}
we used to emit in GCC 12 4/4/4/5 instructions:
        mulld 9,3,4
        mulhdu 4,3,4
        addc 3,9,5
        adde 4,4,6
and
        mulld 9,3,4
        mulhd 4,3,4
        addc 3,9,5
        adde 4,4,6
and
        mulld 9,3,4
        mulhdu 4,3,4
        addc 3,9,5
        addze 4,4
and
        mulld 9,3,4
        mulhd 4,3,4
        sradi 10,5,63
        addc 3,9,5
        adde 4,4,10
Now, with the patch we get 3/5/3/6 instructions:
        maddhdu 9,3,4,5
        maddld 3,3,4,5
        add 4,9,6
and
        maddhd 9,3,4,5
        srdi 10,5,63
        maddld 3,3,4,5
        add 9,9,10
        add 4,9,6
and
        mr 9,3
        maddld 3,3,4,5
        maddhdu 4,9,4,5
and
        maddhd 9,3,4,5
        srdi 8,5,63
        sradi 10,5,63
        maddld 3,3,4,5
        add 9,9,8
        add 4,9,10
So, unless we can somehow check for the sign extended operands[3], we shouldn't
define maddditi3 or FAIL in it or expand it to equivalent of what we used to
emit before.

Reply via email to