On Wed, Mar 04, 2026 at 09:39:56PM +0800, Max Chou wrote: > According to the Zvfofp8min extension, the vfwcvtbf16.f.f.v instruction > supports OFP8 to BF16 conversion when SEW is 8. > And the VTYPE.altfmt field is used to select the OFP8 format. > * altfmt = 0: OFP8.e4m3 to BF16 > * altfmt = 1: OFP8.e5m2 to BF16 > > Reviewed-by: Chao Liu <[email protected]> > Signed-off-by: Max Chou <[email protected]> > --- > target/riscv/helper.h | 12 +++ > target/riscv/insn_trans/trans_rvbf16.c.inc | 16 +++- > target/riscv/vector_helper.c | 99 +++++++++++++++++++++- > 3 files changed, 122 insertions(+), 5 deletions(-) > > diff --git a/target/riscv/helper.h b/target/riscv/helper.h > index eb0a488ba8..356c24d9fb 100644 > --- a/target/riscv/helper.h > +++ b/target/riscv/helper.h > @@ -1247,6 +1247,18 @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, > env, i32) > DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32) > DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32) > > +/* OFP8 functions */ > +DEF_HELPER_5(vfwcvtbf16_f_f_v_ofp8e4m3, void, ptr, ptr, ptr, env, i32) > +DEF_HELPER_5(vfwcvtbf16_f_f_v_ofp8e5m2, void, ptr, ptr, ptr, env, i32) > +DEF_HELPER_5(vfncvtbf16_f_f_w_ofp8e4m3, void, ptr, ptr, ptr, env, i32) > +DEF_HELPER_5(vfncvtbf16_f_f_w_ofp8e5m2, void, ptr, ptr, ptr, env, i32) > +DEF_HELPER_5(vfncvtbf16_sat_f_f_w_ofp8e4m3, void, ptr, ptr, ptr, env, i32) > +DEF_HELPER_5(vfncvtbf16_sat_f_f_w_ofp8e5m2, void, ptr, ptr, ptr, env, i32) > +DEF_HELPER_5(vfncvt_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32) > +DEF_HELPER_5(vfncvt_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32) > +DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32) > +DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32) > + > /* Vector crypto functions */ > DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32) > DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32) > diff --git a/target/riscv/insn_trans/trans_rvbf16.c.inc > b/target/riscv/insn_trans/trans_rvbf16.c.inc > index 6cfda03d2e..9aafd4d2ef 100644 > --- a/target/riscv/insn_trans/trans_rvbf16.c.inc > +++ b/target/riscv/insn_trans/trans_rvbf16.c.inc > @@ -92,11 +92,20 @@ static bool trans_vfncvtbf16_f_f_w(DisasContext *ctx, > arg_vfncvtbf16_f_f_w *a) > static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, arg_vfwcvtbf16_f_f_v > *a) > { > REQUIRE_FPU; > - REQUIRE_ZVFBFMIN(ctx); > > - if (opfv_widen_check(ctx, a) && (ctx->sew == MO_16)) { > + if (opfv_widen_check(ctx, a) && > + ((ctx->sew == MO_16 && ctx->cfg_ptr->ext_zvfbfmin) || > + (ctx->sew == MO_8 && ctx->cfg_ptr->ext_zvfofp8min))) { > + gen_helper_gvec_3_ptr *fn; > uint32_t data = 0; > > + if (ctx->sew == MO_16) { > + fn = gen_helper_vfwcvtbf16_f_f_v; > + } else { > + fn = ctx->altfmt ? gen_helper_vfwcvtbf16_f_f_v_ofp8e5m2 : > + gen_helper_vfwcvtbf16_f_f_v_ofp8e4m3; > + } > + > gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN); > > data = FIELD_DP32(data, VDATA, VM, a->vm); > @@ -106,8 +115,7 @@ static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, > arg_vfwcvtbf16_f_f_v *a) > tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0), > vreg_ofs(ctx, a->rs2), tcg_env, > ctx->cfg_ptr->vlenb, > - ctx->cfg_ptr->vlenb, data, > - gen_helper_vfwcvtbf16_f_f_v); > + ctx->cfg_ptr->vlenb, data, fn); > finalize_rvv_inst(ctx); > return true; > } > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c > index 57221ab884..e7b187a57d 100644 > --- a/target/riscv/vector_helper.c > +++ b/target/riscv/vector_helper.c > @@ -89,7 +89,7 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, > target_ulong s1, > > switch (vsew) { > case MO_8: > - ill_altfmt &= !(cpu->cfg.ext_zvfbfa); > + ill_altfmt &= !(cpu->cfg.ext_zvfbfa || cpu->cfg.ext_zvfofp8min); > break; > case MO_16: > ill_altfmt &= !(cpu->cfg.ext_zvfbfa); > @@ -5034,6 +5034,103 @@ GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) > RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) > GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) > > +/* > + * OCP FP8 Narrowing Conversions (BF16/F32 -> FP8) > + * 1. Initialize a local float_status with RISC-V specific NaN handling > + * 2. Call the softfloat conversion function with saturation parameter > + * 3. Merge exception flags back to the original status > + */ > +#define GEN_OCP_FP8_NARROW(NAME, CONVERT_FN, SATURATE, IN_TYPE) \ > +static uint8_t NAME(IN_TYPE a, float_status *s) \ > +{ \ > + float_status local = *s; \ > + local.default_nan_pattern = 0x70; \ > + local.default_nan_mode = true; \ > + uint8_t result = CONVERT_FN(a, SATURATE, &local); \ > + s->float_exception_flags |= local.float_exception_flags; \ > + return result; \ > +} > + > +/* BF16 -> E4M3/E5M2 conversions */ > +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e4m3, bfloat16_to_float8_e4m3, false, > + uint16_t) > +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e5m2, bfloat16_to_float8_e5m2, false, > + uint16_t) > +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e4m3_sat, bfloat16_to_float8_e4m3, true, > + uint16_t) > +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e5m2_sat, bfloat16_to_float8_e5m2, true, > + uint16_t) > + > +/* F32 -> E4M3/E5M2 conversions */ > +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e4m3, float32_to_float8_e4m3, false, > uint32_t) > +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e5m2, float32_to_float8_e5m2, false, > uint32_t) > +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e4m3_sat, float32_to_float8_e4m3, true, > + uint32_t) > +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e5m2_sat, float32_to_float8_e5m2, true, > + uint32_t) > + > +/* > + * OCP FP8 Widening Conversions (FP8 -> BF16) > + * According to Zvfofp8min isa specification: "No rounding occurs, and no > + * floating-point exception flags are set." > + * 1. Initialize a local float_status with no_signaling_nans=true > + * 2. Call the softfloat conversion function > + * 3. Intentionally DISCARD exception flags (not merged back) > + */ > +#define GEN_OCP_FP8_WIDEN(NAME, CONVERT_FN) \ > +static uint16_t NAME(uint8_t a, float_status *s) \ > +{ \ > + float_status local = *s; \ > + local.no_signaling_nans = true; \ > + return CONVERT_FN(a, &local); \ > +} The widening conversion correctly discards exception flags per spec ("No rounding occurs, and no floating- point exception flags are set."). Good.
Thanks, Chao > + > +GEN_OCP_FP8_WIDEN(vfwcvt_e4m3_to_bf16, float8_e4m3_to_bfloat16) > +GEN_OCP_FP8_WIDEN(vfwcvt_e5m2_to_bf16, float8_e5m2_to_bfloat16) > + > +/* vfwcvtbf16.f.f.w vd, vs2, vm # Convert OFP8 to BF16. */ > +RVVCALL(OPFVV1, vfwcvtbf16_f_f_v_ofp8e4m3, WOP_UU_B, H2, H1, > + vfwcvt_e4m3_to_bf16) > +RVVCALL(OPFVV1, vfwcvtbf16_f_f_v_ofp8e5m2, WOP_UU_B, H2, H1, > + vfwcvt_e5m2_to_bf16) > +GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v_ofp8e4m3, 2) > +GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v_ofp8e5m2, 2) > + > +/* vfncvtbf16.f.f.w vd, vs2, vm # Convert BF16 to OFP8 without saturation. */ > +RVVCALL(OPFVV1, vfncvtbf16_f_f_w_ofp8e4m3, NOP_UU_B, H1, H2, > + vfncvt_bf16_to_e4m3) > +RVVCALL(OPFVV1, vfncvtbf16_f_f_w_ofp8e5m2, NOP_UU_B, H1, H2, > + vfncvt_bf16_to_e5m2) > +GEN_VEXT_V_ENV(vfncvtbf16_f_f_w_ofp8e4m3, 1) > +GEN_VEXT_V_ENV(vfncvtbf16_f_f_w_ofp8e5m2, 1) > + > +/* vfncvtbf16.sat.f.f.w vd, vs2, vm # Convert BF16 to OFP8 with saturation. > */ > +RVVCALL(OPFVV1, vfncvtbf16_sat_f_f_w_ofp8e4m3, NOP_UU_B, H1, H2, > + vfncvt_bf16_to_e4m3_sat) > +RVVCALL(OPFVV1, vfncvtbf16_sat_f_f_w_ofp8e5m2, NOP_UU_B, H1, H2, > + vfncvt_bf16_to_e5m2_sat) > +GEN_VEXT_V_ENV(vfncvtbf16_sat_f_f_w_ofp8e4m3, 1) > +GEN_VEXT_V_ENV(vfncvtbf16_sat_f_f_w_ofp8e5m2, 1) > + > +/* Quad-width narrowing type for FP32 to OFP8 */ > +#define QOP_UU_B uint8_t, uint32_t, uint32_t > + > +/* vfncvt.f.f.q vd, vs2, vm # Convert FP32 to OFP8. */ > +RVVCALL(OPFVV1, vfncvt_f_f_q_ofp8e4m3, QOP_UU_B, H1, H4, > + vfncvt_f32_to_e4m3) > +RVVCALL(OPFVV1, vfncvt_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4, > + vfncvt_f32_to_e5m2) > +GEN_VEXT_V_ENV(vfncvt_f_f_q_ofp8e4m3, 1) > +GEN_VEXT_V_ENV(vfncvt_f_f_q_ofp8e5m2, 1) > + > +/* vfncvt.sat.f.f.q vd, vs2, vm # Convert FP32 to OFP8 with saturation. */ > +RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e4m3, QOP_UU_B, H1, H4, > + vfncvt_f32_to_e4m3_sat) > +RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4, > + vfncvt_f32_to_e5m2_sat) > +GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e4m3, 1) > +GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e5m2, 1) > + > /* > * Vector Reduction Operations > */ > -- > 2.52.0 >
