The vfext.vf2 instruction converts a vector of OCP FP4 E2M1 floating-point numbers to a vector of OFP FP8 E4M3 floating-points numbers.
Signed-off-by: Max Chou <[email protected]> --- target/riscv/helper.h | 3 ++ target/riscv/insn32.decode | 3 ++ target/riscv/insn_trans/trans_rvofp4.c.inc | 44 ++++++++++++++++++++++ target/riscv/translate.c | 1 + target/riscv/vector_helper.c | 33 ++++++++++++++++ 5 files changed, 84 insertions(+) create mode 100644 target/riscv/insn_trans/trans_rvofp4.c.inc diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 356c24d9fb..162303fb6c 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1259,6 +1259,9 @@ DEF_HELPER_5(vfncvt_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32) +/* OFP4 function */ +DEF_HELPER_5(vfext_vf2, void, ptr, ptr, ptr, env, i32) + /* Vector crypto functions */ DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index f2b413c7d4..c58223ebd8 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -754,6 +754,9 @@ vsext_vf2 010010 . ..... 00111 010 ..... 1010111 @r2_vm vsext_vf4 010010 . ..... 00101 010 ..... 1010111 @r2_vm vsext_vf8 010010 . ..... 00011 010 ..... 1010111 @r2_vm +# Zvfofp4min Extension +vfext_vf2 010010 . ..... 10110 010 ..... 1010111 @r2_vm + vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm11 vsetivli 11 .......... ..... 111 ..... 1010111 @r2_zimm10 vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvofp4.c.inc b/target/riscv/insn_trans/trans_rvofp4.c.inc new file mode 100644 index 0000000000..91bf50834f --- /dev/null +++ b/target/riscv/insn_trans/trans_rvofp4.c.inc @@ -0,0 +1,44 @@ +/* + * RISC-V translation routines for the OFP4 Standard Extensions. + * + * Copyright (C) 2025 SiFive, Inc. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +static bool vext_zvfofp4min_check(DisasContext *s, arg_rmr *a) +{ + return s->cfg_ptr->ext_zvfofp4min && + (s->sew == MO_8) && + vext_check_altfmt(s, -1) && + (s->lmul >= -2) && + require_rvv(s) && + vext_check_isa_ill(s) && + (a->rd != a->rs2) && + require_align(a->rd, s->lmul) && + require_align(a->rs2, s->lmul - 1) && + require_vm(a->vm, a->rd) && + require_noover(a->rd, s->lmul, a->rs2, s->lmul - 1); +} + +static bool trans_vfext_vf2(DisasContext *s, arg_rmr *a) +{ + if (vext_zvfofp4min_check(s, a)) { + uint32_t data = 0; + + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); + data = FIELD_DP32(data, VDATA, VMA, s->vma); + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs2), tcg_env, + s->cfg_ptr->vlenb, s->cfg_ptr->vlenb, data, + gen_helper_vfext_vf2); + tcg_gen_movi_tl(cpu_vstart, 0); + finalize_rvv_inst(s); + + return true; + } + return false; +} + diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 137022d7fb..bf403785b5 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -1220,6 +1220,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) #include "insn_trans/trans_svinval.c.inc" #include "insn_trans/trans_rvbf16.c.inc" #include "insn_trans/trans_rvofp8.c.inc" +#include "insn_trans/trans_rvofp4.c.inc" #include "decode-xthead.c.inc" #include "decode-xmips.c.inc" #include "insn_trans/trans_xthead.c.inc" diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 418212973d..a87728f130 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -5121,6 +5121,7 @@ RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4, GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e4m3, 1) GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e5m2, 1) +/* Zvfofp4min: vfext.vf2 - OFP4 E2M1 to OFP8 E4M3 conversion */ /* * Vector Reduction Operations */ @@ -5920,3 +5921,35 @@ GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) + + +void HELPER(vfext_vf2)(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + float_status fp_status = env->fp_status; + uint32_t vl = env->vl; + uint32_t vm = vext_vm(desc); + uint32_t esz = sizeof(uint8_t); + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); + uint32_t i; + + VSTART_CHECK_EARLY_EXIT(env, vl); + + for (i = env->vstart; i < vl; ++i) { + if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); + continue; + } + + uint8_t input = *((uint8_t *)vs2 + H1((i % 2 ? i - 1 : i) / 2)); + input = (i % 2) ? ((input >> 4) & 0xf) : (input & 0xf); + *((uint8_t *)vd + H1(i)) = float4_e2m1_to_float8_e4m3(input, + &fp_status); + } + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); +} -- 2.52.0
