On Wed, Mar 04, 2026 at 09:39:59PM +0800, Max Chou wrote:
> The vfncvt.f.f.q and vfncvt.sat.f.f.q instructions convert a vector of
> FP32 elements to a vector of OFP8 elements. The vfncvt.sat.f.fq instruction
> converts a vector of FP32 elements to a vector of OFP8 elements with 
> saturation.
> The VTYPE.altfmt field is used to select the OFP8 format.
> * altfmt = 0: FP32 to OFP8.e4m3
> * altfmt = 1: FP32 to OFP8.e5m2
> 
> Reviewed-by: Chao Liu <[email protected]>
> Signed-off-by: Max Chou <[email protected]>
> ---
>  target/riscv/insn32.decode                 |  2 +
>  target/riscv/insn_trans/trans_rvofp8.c.inc | 63 ++++++++++++++++++++++
>  target/riscv/insn_trans/trans_rvv.c.inc    | 39 ++++++++++++++
>  3 files changed, 104 insertions(+)
> 
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index 49201c0c20..f2b413c7d4 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -974,6 +974,8 @@ vfwmaccbf16_vv    111011 . ..... ..... 001 ..... 1010111 
> @r_vm
>  vfwmaccbf16_vf    111011 . ..... ..... 101 ..... 1010111 @r_vm
>  
>  # *** Zvfofp8min Extension ***
> +vfncvt_f_f_q          010010 . ..... 11001 001 ..... 1010111 @r2_vm
> +vfncvt_sat_f_f_q      010010 . ..... 11011 001 ..... 1010111 @r2_vm
>  vfncvtbf16_sat_f_f_w  010010 . ..... 11111 001 ..... 1010111 @r2_vm
>  
>  # *** Zvbc vector crypto extension ***
> diff --git a/target/riscv/insn_trans/trans_rvofp8.c.inc 
> b/target/riscv/insn_trans/trans_rvofp8.c.inc
> index d28f92e050..619ee4d773 100644
> --- a/target/riscv/insn_trans/trans_rvofp8.c.inc
> +++ b/target/riscv/insn_trans/trans_rvofp8.c.inc
> @@ -12,6 +12,13 @@
>      }                                       \
>  } while (0)
>  
> +static bool zvfofp8min_narrow_quad_check(DisasContext *s, arg_rmr *a)
> +{
> +    return require_rvv(s) &&
> +           vext_check_isa_ill(s) &&
> +           vext_check_sq(s, a->rd, a->rs2, a->vm) &&
> +           (s->sew == MO_8);
> +}
>  
>  static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
>  {
> @@ -40,3 +47,59 @@ static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, 
> arg_rmr *a)
>      }
>      return false;
>  }
> +
> +static bool trans_vfncvt_f_f_q(DisasContext *ctx, arg_rmr *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_ZVFOFP8MIN(ctx);
> +
> +    if (zvfofp8min_narrow_quad_check(ctx, a)) {
> +        gen_helper_gvec_3_ptr *fn;
> +        uint32_t data = 0;
> +
> +        fn = ctx->altfmt ? gen_helper_vfncvt_f_f_q_ofp8e5m2 :
> +                           gen_helper_vfncvt_f_f_q_ofp8e4m3;
> +
> +        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
> +
> +        data = FIELD_DP32(data, VDATA, VM, a->vm);
> +        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
> +        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
> +        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
> +                           vreg_ofs(ctx, a->rs2), tcg_env,
> +                           ctx->cfg_ptr->vlenb,
> +                           ctx->cfg_ptr->vlenb, data, fn);
> +        finalize_rvv_inst(ctx);
> +        return true;
> +    }
> +    return false;
> +}
> +
> +static bool trans_vfncvt_sat_f_f_q(DisasContext *ctx, arg_rmr *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_ZVFOFP8MIN(ctx);
> +
> +    if (zvfofp8min_narrow_quad_check(ctx, a)) {
> +        gen_helper_gvec_3_ptr *fn;
> +        uint32_t data = 0;
> +
> +        fn = ctx->altfmt ? gen_helper_vfncvt_sat_f_f_q_ofp8e5m2 :
> +                           gen_helper_vfncvt_sat_f_f_q_ofp8e4m3;
> +
> +        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
> +
> +        data = FIELD_DP32(data, VDATA, VM, a->vm);
> +        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
> +        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
> +        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
> +                           vreg_ofs(ctx, a->rs2), tcg_env,
> +                           ctx->cfg_ptr->vlenb,
> +                           ctx->cfg_ptr->vlenb, data, fn);
> +        finalize_rvv_inst(ctx);
> +        return true;
> +    }
> +    return false;
> +}
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
> b/target/riscv/insn_trans/trans_rvv.c.inc
> index 161bf94a07..bbe864dd7c 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -621,6 +621,45 @@ static bool vext_check_sds(DisasContext *s, int vd, int 
> vs1, int vs2, int vm)
>             require_align(vs1, s->lmul);
>  }
>  
> +/*
> + * Common check function for vector narrowing instructions
> + * of single-width result (SEW) and quad-width source (4*SEW).
> + *
> + * Rules to be checked here:
> + *   1. The largest vector register group used by an instruction
> + *      can not be greater than 8 vector registers
> + *      (Section 31.5.2)
> + *   2. Quad-width SEW cannot greater than ELEN.
> + *      (Section 31.2)
> + *   3. Source vector register number is multiples of 4 * LMUL.
> + *      (Section 31.3.4.2)
> + *   4. Destination vector register number is multiples of LMUL.
> + *      (Section 31.3.4.2)
> + *   5. Destination vector register group for a masked vector
> + *      instruction cannot overlap the source mask register (v0).
> + *      (Section 31.5.3)
> + * risc-v unprivileged spec
> + */
> +static bool vext_quad_narrow_check_common(DisasContext *s, int vd, int vs2,
> +                                          int vm)
> +{
> +    return (s->lmul <= 1) &&
> +           (s->sew < MO_32) &&
> +           ((s->sew + 2) <= (s->cfg_ptr->elen >> 4)) &&
> +           require_align(vs2, s->lmul + 2) &&
> +           require_align(vd, s->lmul) &&
> +           require_vm(vm, vd);
> +}
The quad-narrow check logic is correct:
- lmul <= 1 caps source at 8 regs (4 * LMUL = 4 * 2 = 8)
- sew < MO_32 prevents quad SEW exceeding 128 bits
- elen check verified for both ELEN = 32 and 64

Thanks,
Chao
> +
> +static bool vext_check_sq(DisasContext *s, int vd, int vs, int vm)
> +{
> +    bool ret = vext_quad_narrow_check_common(s, vd, vs, vm);
> +    if (vd != vs) {
> +        ret &= require_noover(vd, s->lmul, vs, s->lmul + 2);
> +    }
> +    return ret;
> +}
> +
>  /*
>   * Check function for vector reduction instructions.
>   *
> -- 
> 2.52.0
> 

Reply via email to