DIVIDE TO INTEGER computes floating point remainder and is used by LuaJIT, so add it to QEMU.
Put the main logic into fpu/, because it is way more convenient to operate on FloatParts than to convert floats back-and-forth. Signed-off-by: Ilya Leoshkevich <[email protected]> --- fpu/softfloat.c | 142 +++++++++++++++++++++++++++++++ include/fpu/softfloat.h | 11 +++ target/s390x/helper.h | 1 + target/s390x/tcg/fpu_helper.c | 56 ++++++++++++ target/s390x/tcg/insn-data.h.inc | 5 +- target/s390x/tcg/translate.c | 26 ++++++ 6 files changed, 240 insertions(+), 1 deletion(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 8094358c2e4..87409753483 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -5361,6 +5361,148 @@ floatx80 floatx80_round(floatx80 a, float_status *status) return floatx80_round_pack_canonical(&p, status); } +static void parts_s390_divide_to_integer(FloatParts64 *a, FloatParts64 *b, + int final_quotient_rounding_mode, + bool mask_underflow, bool mask_inexact, + const FloatFmt *fmt, + FloatParts64 *r, FloatParts64 *n, + uint32_t *cc, int *dxc, + float_status *status) +{ + /* POp table "Results: DIVIDE TO INTEGER (Part 1 of 2)" */ + if ((float_cmask(a->cls) | float_cmask(b->cls)) & float_cmask_anynan) { + *r = *parts_pick_nan(a, b, status); + *n = *r; + *cc = 1; + } else if (a->cls == float_class_inf || b->cls == float_class_zero) { + parts_default_nan(r, status); + *n = *r; + *cc = 1; + status->float_exception_flags |= float_flag_invalid; + } else if (b->cls == float_class_inf) { + *r = *a; + n->cls = float_class_zero; + n->sign = a->sign ^ b->sign; + *cc = 0; + } else { + FloatParts64 *q, q_buf, *r_precise, r_precise_buf; + int float_exception_flags = 0; + bool is_q_smallish; + uint32_t r_flags; + + /* Compute precise quotient */ + q_buf = *a; + q = parts_div(&q_buf, b, status); + + /* + * Check whether two closest integers can be precisely represented, + * i.e., all their bits fit into the fractional part. + */ + is_q_smallish = q->exp < (fmt->frac_size + 1); + + /* + * Final quotient is rounded using final-quotient-rounding method, and + * partial quotient is rounded toward zero. + * + * Rounding of partial quotient may be inexact. This is the whole point + * of distinguishing partial quotients, so ignore the exception. + */ + *n = *q; + parts_round_to_int_normal(n, + is_q_smallish ? + final_quotient_rounding_mode : + float_round_to_zero, + 0, fmt->frac_size); + + /* Compute precise remainder */ + r_precise_buf = *b; + r_precise = parts_muladd_scalbn(&r_precise_buf, n, a, 0, + float_muladd_negate_product, status); + + /* Round remainder to the target format */ + *r = *r_precise; + status->float_exception_flags = 0; + parts_uncanon(r, status, fmt); + r_flags = status->float_exception_flags; + r->frac &= (1ULL << fmt->frac_size) - 1; + parts_canonicalize(r, status, fmt); + + /* POp table "Results: DIVIDE TO INTEGER (Part 2 of 2)" */ + if (is_q_smallish) { + if (r->cls != float_class_zero) { + if (r->exp < 2 - (1 << (fmt->exp_size - 1))) { + if (mask_underflow) { + float_exception_flags |= float_flag_underflow; + *dxc = 0x10; + r->exp += fmt->exp_re_bias; + } + } else if (r_flags & float_flag_inexact) { + float_exception_flags |= float_flag_inexact; + if (mask_inexact) { + bool saved_r_sign, saved_r_precise_sign; + + /* + * Check whether remainder was truncated (rounded + * toward zero) or incremented. + */ + saved_r_sign = r->sign; + saved_r_precise_sign = r_precise->sign; + r->sign = false; + r_precise->sign = false; + if (parts_compare(r, r_precise, status, true) < + float_relation_equal) { + *dxc = 0x8; + } else { + *dxc = 0xc; + } + r->sign = saved_r_sign; + r_precise->sign = saved_r_precise_sign; + } + } + } + *cc = 0; + } else if (n->exp > (1 << (fmt->exp_size - 1)) - 1) { + n->exp -= fmt->exp_re_bias; + *cc = r->cls == float_class_zero ? 1 : 3; + } else { + *cc = r->cls == float_class_zero ? 0 : 2; + } + + /* Adjust signs of zero results */ + if (r->cls == float_class_zero) { + r->sign = a->sign; + } + if (n->cls == float_class_zero) { + n->sign = a->sign ^ b->sign; + } + + status->float_exception_flags = float_exception_flags; + } +} + +#define DEFINE_S390_DIVIDE_TO_INTEGER(floatN) \ +void floatN ## _s390_divide_to_integer(floatN a, floatN b, \ + int final_quotient_rounding_mode, \ + bool mask_underflow, bool mask_inexact, \ + floatN *r, floatN *n, \ + uint32_t *cc, int *dxc, \ + float_status *status) \ +{ \ + FloatParts64 pa, pb, pr, pn; \ + \ + floatN ## _unpack_canonical(&pa, a, status); \ + floatN ## _unpack_canonical(&pb, b, status); \ + parts_s390_divide_to_integer(&pa, &pb, final_quotient_rounding_mode, \ + mask_underflow, mask_inexact, \ + &floatN ## _params, \ + &pr, &pn, cc, dxc, status); \ + *r = floatN ## _round_pack_canonical(&pr, status); \ + *n = floatN ## _round_pack_canonical(&pn, status); \ +} + +DEFINE_S390_DIVIDE_TO_INTEGER(float32) +DEFINE_S390_DIVIDE_TO_INTEGER(float64) + static void __attribute__((constructor)) softfloat_init(void) { union_float64 ua, ub, uc, ur; diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index c18ab2cb609..66b0c47b5eb 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -1372,4 +1372,15 @@ static inline bool float128_unordered_quiet(float128 a, float128 b, *----------------------------------------------------------------------------*/ float128 float128_default_nan(float_status *status); +#define DECLARE_S390_DIVIDE_TO_INTEGER(floatN) \ +void floatN ## _s390_divide_to_integer(floatN a, floatN b, \ + int final_quotient_rounding_mode, \ + bool mask_underflow, bool mask_inexact, \ + floatN *r, floatN *n, \ + uint32_t *cc, int *dxc, \ + float_status *status) +DECLARE_S390_DIVIDE_TO_INTEGER(float32); +DECLARE_S390_DIVIDE_TO_INTEGER(float64); + + #endif /* SOFTFLOAT_H */ diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 1a8a76abb98..6a7426fdac7 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -46,6 +46,7 @@ DEF_HELPER_FLAGS_3(sxb, TCG_CALL_NO_WG, i128, env, i128, i128) DEF_HELPER_FLAGS_3(deb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(ddb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(dxb, TCG_CALL_NO_WG, i128, env, i128, i128) +DEF_HELPER_6(dib, void, env, i32, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(meeb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(mdeb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64) diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c index 7a3ff501a46..122994960a6 100644 --- a/target/s390x/tcg/fpu_helper.c +++ b/target/s390x/tcg/fpu_helper.c @@ -315,6 +315,62 @@ Int128 HELPER(dxb)(CPUS390XState *env, Int128 a, Int128 b) return RET128(ret); } +void HELPER(dib)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3, + uint32_t m4, uint32_t bits) +{ + int final_quotient_rounding_mode = s390_get_bfp_rounding_mode(env, m4); + bool mask_underflow = (env->fpc >> 24) & S390_IEEE_MASK_UNDERFLOW; + bool mask_inexact = (env->fpc >> 24) & S390_IEEE_MASK_INEXACT; + float32 a32, b32, n32, r32; + float64 a64, b64, n64, r64; + int dxc = -1; + uint32_t cc; + + if (bits == 32) { + a32 = env->vregs[r1][0] >> 32; + b32 = env->vregs[r2][0] >> 32; + + float32_s390_divide_to_integer( + a32, b32, + final_quotient_rounding_mode, + mask_underflow, mask_inexact, + &r32, &n32, &cc, &dxc, &env->fpu_status); + } else { + a64 = env->vregs[r1][0]; + b64 = env->vregs[r2][0]; + + float64_s390_divide_to_integer( + a64, b64, + final_quotient_rounding_mode, + mask_underflow, mask_inexact, + &r64, &n64, &cc, &dxc, &env->fpu_status); + } + + /* Flush the results if needed */ + if ((env->fpu_status.float_exception_flags & float_flag_invalid) && + ((env->fpc >> 24) & S390_IEEE_MASK_INVALID)) { + /* The action for invalid operation is "Suppress" */ + } else { + /* The action for other exceptions is "Complete" */ + if (bits == 32) { + env->vregs[r1][0] = deposit64(env->vregs[r1][0], 32, 32, r32); + env->vregs[r3][0] = deposit64(env->vregs[r3][0], 32, 32, n32); + } else { + env->vregs[r1][0] = r64; + env->vregs[r3][0] = n64; + } + env->cc_op = cc; + } + + /* Raise an exception if needed */ + if (dxc == -1) { + handle_exceptions(env, false, GETPC()); + } else { + env->fpu_status.float_exception_flags = 0; + tcg_s390_data_exception(env, dxc, GETPC()); + } +} + /* 32-bit FP multiplication */ uint64_t HELPER(meeb)(CPUS390XState *env, uint64_t f1, uint64_t f2) { diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc index baaafe922e9..0d5392eac54 100644 --- a/target/s390x/tcg/insn-data.h.inc +++ b/target/s390x/tcg/insn-data.h.inc @@ -9,7 +9,7 @@ * OPC = (op << 8) | op2 where op is the major, op2 the minor opcode * NAME = name of the opcode, used internally * FMT = format of the opcode (defined in insn-format.h.inc) - * FAC = facility the opcode is available in (defined in DisasFacility) + * FAC = facility the opcode is available in (define in translate.c) * I1 = func in1_xx fills o->in1 * I2 = func in2_xx fills o->in2 * P = func prep_xx initializes o->*out* @@ -361,6 +361,9 @@ C(0xb91d, DSGFR, RRE, Z, r1p1, r2_32s, r1_P, 0, divs64, 0) C(0xe30d, DSG, RXY_a, Z, r1p1, m2_64, r1_P, 0, divs64, 0) C(0xe31d, DSGF, RXY_a, Z, r1p1, m2_32s, r1_P, 0, divs64, 0) +/* DIVIDE TO INTEGER */ + D(0xb35b, DIDBR, RRF_b, Z, 0, 0, 0, 0, dib, 0, 64) + D(0xb353, DIEBR, RRF_b, Z, 0, 0, 0, 0, dib, 0, 32) /* EXCLUSIVE OR */ C(0x1700, XR, RR_a, Z, r1, r2, new, r1_32, xor, nz32) diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 540c5a569c0..dee0e710f39 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -2283,6 +2283,32 @@ static DisasJumpType op_dxb(DisasContext *s, DisasOps *o) return DISAS_NEXT; } +static DisasJumpType op_dib(DisasContext *s, DisasOps *o) +{ + const bool fpe = s390_has_feat(S390_FEAT_FLOATING_POINT_EXT); + uint8_t m4 = get_field(s, m4); + + if (get_field(s, r1) == get_field(s, r2) || + get_field(s, r1) == get_field(s, r3) || + get_field(s, r2) == get_field(s, r3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (m4 == 2 || (!fpe && m4 == 3) || m4 > 7) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_helper_dib(tcg_env, tcg_constant_i32(get_field(s, r1)), + tcg_constant_i32(get_field(s, r2)), + tcg_constant_i32(get_field(s, r3)), tcg_constant_i32(m4), + tcg_constant_i32(s->insn->data)); + set_cc_static(s); + + return DISAS_NEXT; +} + static DisasJumpType op_ear(DisasContext *s, DisasOps *o) { int r2 = get_field(s, r2); -- 2.52.0
