The vfncvt.f.f.q and vfncvt.sat.f.f.q instructions convert a vector of FP32 elements to a vector of OFP8 elements. The vfncvt.sat.f.fq instruction converts a vector of FP32 elements to a vector of OFP8 elements with saturation. The VTYPE.altfmt field is used to select the OFP8 format. * altfmt = 0: FP32 to OFP8.e4m3 * altfmt = 1: FP32 to OFP8.e5m2
Signed-off-by: Max Chou <[email protected]> --- target/riscv/insn32.decode | 2 + target/riscv/insn_trans/trans_rvofp8.c.inc | 63 ++++++++++++++++++++++ target/riscv/insn_trans/trans_rvv.c.inc | 39 ++++++++++++++ 3 files changed, 104 insertions(+) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 49201c0c20..f2b413c7d4 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -974,6 +974,8 @@ vfwmaccbf16_vv 111011 . ..... ..... 001 ..... 1010111 @r_vm vfwmaccbf16_vf 111011 . ..... ..... 101 ..... 1010111 @r_vm # *** Zvfofp8min Extension *** +vfncvt_f_f_q 010010 . ..... 11001 001 ..... 1010111 @r2_vm +vfncvt_sat_f_f_q 010010 . ..... 11011 001 ..... 1010111 @r2_vm vfncvtbf16_sat_f_f_w 010010 . ..... 11111 001 ..... 1010111 @r2_vm # *** Zvbc vector crypto extension *** diff --git a/target/riscv/insn_trans/trans_rvofp8.c.inc b/target/riscv/insn_trans/trans_rvofp8.c.inc index f1a7b0affd..8851209660 100644 --- a/target/riscv/insn_trans/trans_rvofp8.c.inc +++ b/target/riscv/insn_trans/trans_rvofp8.c.inc @@ -12,6 +12,13 @@ } \ } while (0) +static bool zvfofp8min_narrow_quad_check(DisasContext *s, arg_rmr *a) +{ + return require_rvv(s) && + vext_check_isa_ill(s) && + vext_check_sq(s, a->rd, a->rs2, a->vm) && + (s->sew == MO_8); +} static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a) { @@ -41,3 +48,59 @@ static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a) return false; } +static bool trans_vfncvt_f_f_q(DisasContext *ctx, arg_rmr *a) +{ + REQUIRE_FPU; + REQUIRE_ZVFOFP8MIN(ctx); + + if (zvfofp8min_narrow_quad_check(ctx, a)) { + gen_helper_gvec_3_ptr *fn; + uint32_t data = 0; + + fn = ctx->altfmt ? gen_helper_vfncvt_f_f_q_ofp8e5m2 : + gen_helper_vfncvt_f_f_q_ofp8e4m3; + + gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN); + + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul); + data = FIELD_DP32(data, VDATA, VTA, ctx->vta); + data = FIELD_DP32(data, VDATA, VMA, ctx->vma); + tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0), + vreg_ofs(ctx, a->rs2), tcg_env, + ctx->cfg_ptr->vlenb, + ctx->cfg_ptr->vlenb, data, fn); + finalize_rvv_inst(ctx); + return true; + } + return false; +} + +static bool trans_vfncvt_sat_f_f_q(DisasContext *ctx, arg_rmr *a) +{ + REQUIRE_FPU; + REQUIRE_ZVFOFP8MIN(ctx); + + if (zvfofp8min_narrow_quad_check(ctx, a)) { + gen_helper_gvec_3_ptr *fn; + uint32_t data = 0; + + fn = ctx->altfmt ? gen_helper_vfncvt_sat_f_f_q_ofp8e5m2 : + gen_helper_vfncvt_sat_f_f_q_ofp8e4m3; + + gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN); + + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul); + data = FIELD_DP32(data, VDATA, VTA, ctx->vta); + data = FIELD_DP32(data, VDATA, VMA, ctx->vma); + tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0), + vreg_ofs(ctx, a->rs2), tcg_env, + ctx->cfg_ptr->vlenb, + ctx->cfg_ptr->vlenb, data, fn); + finalize_rvv_inst(ctx); + return true; + } + return false; +} + diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index bcd45b0aa3..9053b9fb57 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -621,6 +621,45 @@ static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm) require_align(vs1, s->lmul); } +/* + * Common check function for vector narrowing instructions + * of single-width result (SEW) and quad-width source (4*SEW). + * + * Rules to be checked here: + * 1. The largest vector register group used by an instruction + * can not be greater than 8 vector registers + * (Section 31.5.2) + * 2. Quad-width SEW cannot greater than ELEN. + * (Section 31.2) + * 3. Source vector register number is multiples of 4 * LMUL. + * (Section 31.3.4.2) + * 4. Destination vector register number is multiples of LMUL. + * (Section 31.3.4.2) + * 5. Destination vector register group for a masked vector + * instruction cannot overlap the source mask register (v0). + * (Section 31.5.3) + * risc-v unprivileged spec + */ +static bool vext_quad_narrow_check_common(DisasContext *s, int vd, int vs2, + int vm) +{ + return (s->lmul <= 1) && + (s->sew < MO_32) && + ((s->sew + 2) <= (s->cfg_ptr->elen >> 4)) && + require_align(vs2, s->lmul + 2) && + require_align(vd, s->lmul) && + require_vm(vm, vd); +} + +static bool vext_check_sq(DisasContext *s, int vd, int vs, int vm) +{ + bool ret = vext_quad_narrow_check_common(s, vd, vs, vm); + if (vd != vs) { + ret &= require_noover(vd, s->lmul, vs, s->lmul + 2); + } + return ret; +} + /* * Check function for vector reduction instructions. * -- 2.52.0
