From: Pan Li <pan2...@intel.com>

This patch would like to implement the SAT_MUL scalar unsigned from
uint128_t, aka:

  NT __attribute__((noinline))
  sat_u_mul_##NT##_fmt_1 (NT a, NT b)
  {
    uint128_t x = (uint128_t)a * (uint128_t)b;
    NT max = -1;
    if (x > (uint128_t)(max))
      return max;
    else
      return (NT)x;
  }

Take uint64_t and uint8_t as example:

Before this patch for uint8_t:
  10   │ sat_u_mul_uint8_t_from_uint128_t_fmt_1:
  11   │     mulhu   a5,a0,a1
  12   │     mul a0,a0,a1
  13   │     bne a5,zero,.L3
  14   │     li  a5,255
  15   │     bleu    a0,a5,.L4
  16   │ .L3:
  17   │     li  a0,255
  18   │ .L4:
  19   │     andi    a0,a0,0xff
  20   │     ret

After this patch for uint8_t:
  10   │ sat_u_mul_uint8_t_from_uint128_t_fmt_1:
  11   │     mul a0,a0,a1
  12   │     li  a5,255
  13   │     sltu    a5,a5,a0
  14   │     neg a5,a5
  15   │     or  a0,a0,a5
  16   │     andi    a0,a0,0xff
  17   │     ret

Before this patch for uint64_t:
  10   │ sat_u_mul_uint64_t_from_uint128_t_fmt_1:
  11   │     mulhu   a5,a0,a1
  12   │     mul a0,a0,a1
  13   │     beq a5,zero,.L4
  14   │     li  a0,-1
  15   │ .L4:
  16   │     ret

After this patch for uint64_t:
  10   │ sat_u_mul_uint64_t_from_uint128_t_fmt_1:
  11   │     mulhsu  a5,a1,a0
  12   │     mul a0,a0,a1
  13   │     snez    a5,a5
  14   │     neg a5,a5
  15   │     or  a0,a0,a5
  16   │     ret

gcc/ChangeLog:

        * config/riscv/riscv-protos.h (riscv_expand_usmul): Add new func
        decl.
        * config/riscv/riscv.cc (riscv_expand_xmode_usmul): Add new func
        to expand Xmode SAT_MUL.
        (riscv_expand_non_xmode_usmul): Ditto but for non-Xmode.
        (riscv_expand_usmul): Add new func to implment SAT_MUL.
        * config/riscv/riscv.md (usmul<mode>3): Add new pattern to match
        standard name usmul.

Signed-off-by: Pan Li <pan2...@intel.com>
---
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.cc       | 82 +++++++++++++++++++++++++++++++++
 gcc/config/riscv/riscv.md       | 11 +++++
 3 files changed, 94 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a0331204479..38f63ea8424 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -137,6 +137,7 @@ extern void riscv_expand_usadd (rtx, rtx, rtx);
 extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
 extern void riscv_expand_sssub (rtx, rtx, rtx);
+extern void riscv_expand_usmul (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
 extern void riscv_expand_sstrunc (rtx, rtx);
 extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index bbc7547d385..36f66f8b899 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -13265,6 +13265,88 @@ riscv_expand_sssub (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Implement the Xmode usmul.
+
+   b = SAT_MUL (a, b);
+   =>
+   _1 = a * b;
+   _2 = mulhu (a, b);
+   _overflow_p = _2 == 0;
+   _mask = - _overflow_p;
+   b = _1 | _mask;
+ */
+
+static void
+riscv_expand_xmode_usmul (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+
+  gcc_assert (mode == Xmode);
+
+  rtx mul = gen_reg_rtx (Xmode);
+  rtx mulhu = gen_reg_rtx (Xmode);
+  rtx overflow_p = gen_reg_rtx (Xmode);
+
+  riscv_emit_binary (MULT, mul, x, y);
+
+  if (TARGET_64BIT)
+    emit_insn (gen_usmuldi3_highpart (mulhu, x, y));
+  else
+    emit_insn (gen_usmulsi3_highpart (mulhu, x, y));
+
+  riscv_emit_binary (NE, overflow_p, mulhu, CONST0_RTX (Xmode));
+  riscv_emit_unary (NEG, overflow_p, overflow_p);
+  riscv_emit_binary (IOR, dest, mul, overflow_p);
+}
+
+/* Implement the non-Xmode usmul.
+
+   b = SAT_MUL (a, b);
+   =>
+   _1 = a * b;
+   _max = (T)-1
+   _overflow_p = _1 > _max;
+   _mask = - _overflow_p;
+   b = _1 | _mask;
+ */
+
+static void
+riscv_expand_non_xmode_usmul (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+
+  gcc_assert (mode != Xmode);
+
+  rtx xmode_x = riscv_extend_to_xmode_reg (x, mode, ZERO_EXTEND);
+  rtx xmode_y = riscv_extend_to_xmode_reg (y, mode, ZERO_EXTEND);
+  rtx xmode_mul = gen_reg_rtx (Xmode);
+  rtx mul_max = gen_reg_rtx (Xmode);
+  rtx overflow_p = gen_reg_rtx (Xmode);
+
+  HOST_WIDE_INT max = ((uint64_t)1 << bitsize) - 1;
+
+  emit_move_insn (mul_max, GEN_INT (max));
+  riscv_emit_binary (MULT, xmode_mul, xmode_x, xmode_y);
+
+  riscv_emit_binary (LTU, overflow_p, mul_max, xmode_mul);
+  riscv_emit_unary (NEG, overflow_p, overflow_p);
+  riscv_emit_binary (IOR, xmode_mul, xmode_mul, overflow_p);
+
+  emit_move_insn (dest, gen_lowpart (mode, xmode_mul));
+}
+
+/* Implements the unsigned saturation mult standard name usmul for int mode.  
*/
+
+void
+riscv_expand_usmul (rtx dest, rtx x, rtx y)
+{
+  if (GET_MODE (dest) == Xmode)
+    return riscv_expand_xmode_usmul (dest, x, y) ;
+  else
+    return riscv_expand_non_xmode_usmul (dest, x, y);
+}
+
 /* Implement the unsigned saturation truncation for int mode.
 
    b = SAT_TRUNC (a);
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 893c925b6b9..e1c8cf0c7b1 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4634,6 +4634,17 @@ (define_expand "sssub<mode>3"
   }
 )
 
+(define_expand "usmul<mode>3"
+  [(match_operand:ANYI 0 "register_operand")
+   (match_operand:ANYI 1 "register_operand")
+   (match_operand:ANYI 2 "register_operand")]
+  ""
+  {
+    riscv_expand_usmul (operands[0], operands[1], operands[2]);
+    DONE;
+  }
+)
+
 (define_expand "ustrunc<mode><anyi_double_truncated>2"
   [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand")
    (match_operand:ANYI_DOUBLE_TRUNC       1 "register_operand")]
-- 
2.43.0

Reply via email to