From: Pan Li <pan2...@intel.com> This patch would like to implement the SAT_MUL scalar unsigned from uint128_t, aka:
NT __attribute__((noinline)) sat_u_mul_##NT##_fmt_1 (NT a, NT b) { uint128_t x = (uint128_t)a * (uint128_t)b; NT max = -1; if (x > (uint128_t)(max)) return max; else return (NT)x; } Take uint64_t and uint8_t as example: Before this patch for uint8_t: 10 │ sat_u_mul_uint8_t_from_uint128_t_fmt_1: 11 │ mulhu a5,a0,a1 12 │ mul a0,a0,a1 13 │ bne a5,zero,.L3 14 │ li a5,255 15 │ bleu a0,a5,.L4 16 │ .L3: 17 │ li a0,255 18 │ .L4: 19 │ andi a0,a0,0xff 20 │ ret After this patch for uint8_t: 10 │ sat_u_mul_uint8_t_from_uint128_t_fmt_1: 11 │ mul a0,a0,a1 12 │ li a5,255 13 │ sltu a5,a5,a0 14 │ neg a5,a5 15 │ or a0,a0,a5 16 │ andi a0,a0,0xff 17 │ ret Before this patch for uint64_t: 10 │ sat_u_mul_uint64_t_from_uint128_t_fmt_1: 11 │ mulhu a5,a0,a1 12 │ mul a0,a0,a1 13 │ beq a5,zero,.L4 14 │ li a0,-1 15 │ .L4: 16 │ ret After this patch for uint64_t: 10 │ sat_u_mul_uint64_t_from_uint128_t_fmt_1: 11 │ mulhsu a5,a1,a0 12 │ mul a0,a0,a1 13 │ snez a5,a5 14 │ neg a5,a5 15 │ or a0,a0,a5 16 │ ret gcc/ChangeLog: * config/riscv/riscv-protos.h (riscv_expand_usmul): Add new func decl. * config/riscv/riscv.cc (riscv_expand_xmode_usmul): Add new func to expand Xmode SAT_MUL. (riscv_expand_non_xmode_usmul): Ditto but for non-Xmode. (riscv_expand_usmul): Add new func to implment SAT_MUL. * config/riscv/riscv.md (usmul<mode>3): Add new pattern to match standard name usmul. Signed-off-by: Pan Li <pan2...@intel.com> --- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv.cc | 82 +++++++++++++++++++++++++++++++++ gcc/config/riscv/riscv.md | 11 +++++ 3 files changed, 94 insertions(+) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index a0331204479..38f63ea8424 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -137,6 +137,7 @@ extern void riscv_expand_usadd (rtx, rtx, rtx); extern void riscv_expand_ssadd (rtx, rtx, rtx); extern void riscv_expand_ussub (rtx, rtx, rtx); extern void riscv_expand_sssub (rtx, rtx, rtx); +extern void riscv_expand_usmul (rtx, rtx, rtx); extern void riscv_expand_ustrunc (rtx, rtx); extern void riscv_expand_sstrunc (rtx, rtx); extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index bbc7547d385..36f66f8b899 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -13265,6 +13265,88 @@ riscv_expand_sssub (rtx dest, rtx x, rtx y) emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); } +/* Implement the Xmode usmul. + + b = SAT_MUL (a, b); + => + _1 = a * b; + _2 = mulhu (a, b); + _overflow_p = _2 == 0; + _mask = - _overflow_p; + b = _1 | _mask; + */ + +static void +riscv_expand_xmode_usmul (rtx dest, rtx x, rtx y) +{ + machine_mode mode = GET_MODE (dest); + + gcc_assert (mode == Xmode); + + rtx mul = gen_reg_rtx (Xmode); + rtx mulhu = gen_reg_rtx (Xmode); + rtx overflow_p = gen_reg_rtx (Xmode); + + riscv_emit_binary (MULT, mul, x, y); + + if (TARGET_64BIT) + emit_insn (gen_usmuldi3_highpart (mulhu, x, y)); + else + emit_insn (gen_usmulsi3_highpart (mulhu, x, y)); + + riscv_emit_binary (NE, overflow_p, mulhu, CONST0_RTX (Xmode)); + riscv_emit_unary (NEG, overflow_p, overflow_p); + riscv_emit_binary (IOR, dest, mul, overflow_p); +} + +/* Implement the non-Xmode usmul. + + b = SAT_MUL (a, b); + => + _1 = a * b; + _max = (T)-1 + _overflow_p = _1 > _max; + _mask = - _overflow_p; + b = _1 | _mask; + */ + +static void +riscv_expand_non_xmode_usmul (rtx dest, rtx x, rtx y) +{ + machine_mode mode = GET_MODE (dest); + unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant (); + + gcc_assert (mode != Xmode); + + rtx xmode_x = riscv_extend_to_xmode_reg (x, mode, ZERO_EXTEND); + rtx xmode_y = riscv_extend_to_xmode_reg (y, mode, ZERO_EXTEND); + rtx xmode_mul = gen_reg_rtx (Xmode); + rtx mul_max = gen_reg_rtx (Xmode); + rtx overflow_p = gen_reg_rtx (Xmode); + + HOST_WIDE_INT max = ((uint64_t)1 << bitsize) - 1; + + emit_move_insn (mul_max, GEN_INT (max)); + riscv_emit_binary (MULT, xmode_mul, xmode_x, xmode_y); + + riscv_emit_binary (LTU, overflow_p, mul_max, xmode_mul); + riscv_emit_unary (NEG, overflow_p, overflow_p); + riscv_emit_binary (IOR, xmode_mul, xmode_mul, overflow_p); + + emit_move_insn (dest, gen_lowpart (mode, xmode_mul)); +} + +/* Implements the unsigned saturation mult standard name usmul for int mode. */ + +void +riscv_expand_usmul (rtx dest, rtx x, rtx y) +{ + if (GET_MODE (dest) == Xmode) + return riscv_expand_xmode_usmul (dest, x, y) ; + else + return riscv_expand_non_xmode_usmul (dest, x, y); +} + /* Implement the unsigned saturation truncation for int mode. b = SAT_TRUNC (a); diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 893c925b6b9..e1c8cf0c7b1 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4634,6 +4634,17 @@ (define_expand "sssub<mode>3" } ) +(define_expand "usmul<mode>3" + [(match_operand:ANYI 0 "register_operand") + (match_operand:ANYI 1 "register_operand") + (match_operand:ANYI 2 "register_operand")] + "" + { + riscv_expand_usmul (operands[0], operands[1], operands[2]); + DONE; + } +) + (define_expand "ustrunc<mode><anyi_double_truncated>2" [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand") (match_operand:ANYI_DOUBLE_TRUNC 1 "register_operand")] -- 2.43.0