This commit provides the implementation defined behavior flags and the basic operation support for the OCP float8 data types(E4M3 & E5M2).
According to the definition in OFP8 spec, the conversion from a wider format infinity depends on the saturation mode defined in the spec. Signed-off-by: Max Chou <[email protected]> --- fpu/softfloat-parts.c.inc | 159 +++++++++++++++++++++------ fpu/softfloat-specialize.c.inc | 75 +++++++++++++ fpu/softfloat.c | 191 +++++++++++++++++++++++++++++++-- include/fpu/softfloat-types.h | 12 +++ include/fpu/softfloat.h | 81 ++++++++++++++ 5 files changed, 480 insertions(+), 38 deletions(-) diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc index 5e0438fc0b..eee7daae4d 100644 --- a/fpu/softfloat-parts.c.inc +++ b/fpu/softfloat-parts.c.inc @@ -227,11 +227,28 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status, p->exp = fmt->frac_shift - fmt->exp_bias - shift + !has_pseudo_denormals; } - } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { + } else if (likely(p->exp < fmt->exp_max)) { p->cls = float_class_normal; p->exp -= fmt->exp_bias; frac_shl(p, fmt->frac_shift); p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; + } else if (fmt->limited_nan) { + /* + * Formats with limited NaN encodings (E4M3, E2M1, ARM Alt HP). + */ + frac_shl(p, fmt->frac_shift); + p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; + if (fmt->normal_frac_max == NORMAL_FRAC_MAX_ALL || + p->frac_hi <= fmt->normal_frac_max) { + p->cls = float_class_normal; + p->exp -= fmt->exp_bias; + } else { + if (parts_is_snan_frac(p->frac_hi, status)) { + p->cls = float_class_snan; + } else { + p->cls = float_class_qnan; + } + } } else if (likely(frac_eqz(p))) { p->cls = float_class_inf; } else { @@ -241,14 +258,39 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status, } } +/* + * Set FloatPartsN to the maximum normal value for the given format. + * - IEEE formats (!no_infinity): exp = exp_max - 1, frac = all ones + * - Limited NaN formats (E4M3): exp = exp_max, frac = normal_frac_max + * - No NaN/InF formats (E2M1, ARM AHP): exp = exp_max, frac = all ones + */ +static void partsN(set_max_normal)(FloatPartsN *p, const FloatFmt *fmt) +{ + if (!fmt->no_infinity) { + p->exp = fmt->exp_max - 1; + frac_allones(p); + } else if (fmt->normal_frac_max != NORMAL_FRAC_MAX_ALL) { + p->exp = fmt->exp_max; + frac_clear(p); + p->frac_hi = fmt->normal_frac_max; + } else { + p->exp = fmt->exp_max; + frac_allones(p); + } +} + /* * Round and uncanonicalize a floating-point number by parts. There * are FRAC_SHIFT bits that may require rounding at the bottom of the * fraction; these bits will be removed. The exponent will be biased * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. + * + * The saturate parameter controls saturation behavior for formats that + * support it (OCP FP8 E4M3/E5M2). When true, overflow produces max normal + * instead of infinity (E5M2) or NaN (E4M3). */ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, - const FloatFmt *fmt) + const FloatFmt *fmt, bool saturate) { const int exp_max = fmt->exp_max; const int frac_shift = fmt->frac_shift; @@ -256,8 +298,8 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, const uint64_t frac_lsb = round_mask + 1; const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); const uint64_t roundeven_mask = round_mask | frac_lsb; + bool overflow_norm = saturate; uint64_t inc; - bool overflow_norm = false; int exp, flags = 0; switch (s->float_rounding_mode) { @@ -313,30 +355,64 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, } p->frac_lo &= ~round_mask; } + p->exp = exp; - if (fmt->arm_althp) { - /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ - if (unlikely(exp > exp_max)) { - /* Overflow. Return the maximum normal. */ - flags = float_flag_invalid; - exp = exp_max; - frac_allones(p); - p->frac_lo &= ~round_mask; + /* + * Unified overflow handling based on format capabilities. + * 1. Format has infinity -> overflow to infinity (or saturate) + * 2. Format has NaN but no infinity -> overflow to NaN (or saturate) + * 3. Format has neither -> always saturate + */ + if (!fmt->no_infinity) { + if (unlikely(exp >= exp_max)) { + flags |= float_flag_overflow; + if (s->rebias_overflow) { + exp -= fmt->exp_re_bias; + } else if (overflow_norm) { + flags |= float_flag_inexact; + parts_set_max_normal(p, fmt); + exp = p->exp; + p->frac_lo &= ~round_mask; + } else { + flags |= float_flag_inexact; + p->cls = float_class_inf; + exp = exp_max; + frac_clear(p); + } } - } else if (unlikely(exp >= exp_max)) { - flags |= float_flag_overflow; - if (s->rebias_overflow) { - exp -= fmt->exp_re_bias; - } else if (overflow_norm) { + } else if (fmt_has_nan_encoding(fmt)) { + bool is_overflow = (exp > exp_max) || + (exp == exp_max && + p->frac_hi > fmt->normal_frac_max); + + if (unlikely(is_overflow)) { + flags |= float_flag_overflow; flags |= float_flag_inexact; - exp = exp_max - 1; - frac_allones(p); + + if (overflow_norm) { + parts_set_max_normal(p, fmt); + exp = p->exp; + } else { + uint8_t dnan = s->default_nan_pattern; + p->cls = float_class_qnan; + p->sign = dnan >> 7; + exp = exp_max; + frac_allones(p); + } + } + } else { + if (unlikely(exp > exp_max)) { + if (fmt->overflow_raises_invalid) { + /* ARM Alt HP: raise Invalid, not Overflow */ + flags = float_flag_invalid; + } else { + flags |= float_flag_overflow; + flags |= float_flag_inexact; + } + + parts_set_max_normal(p, fmt); + exp = p->exp; p->frac_lo &= ~round_mask; - } else { - flags |= float_flag_inexact; - p->cls = float_class_inf; - exp = exp_max; - frac_clear(p); } } frac_shr(p, frac_shift); @@ -422,11 +498,11 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, float_raise(flags, s); } -static void partsN(uncanon)(FloatPartsN *p, float_status *s, - const FloatFmt *fmt) +static void partsN(uncanon_sat)(FloatPartsN *p, float_status *s, + const FloatFmt *fmt, bool saturate) { if (likely(is_anynorm(p->cls))) { - parts_uncanon_normal(p, s, fmt); + parts_uncanon_normal(p, s, fmt, saturate); } else { switch (p->cls) { case float_class_zero: @@ -434,13 +510,30 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s, frac_clear(p); return; case float_class_inf: - g_assert(!fmt->arm_althp); - p->exp = fmt->exp_max; - frac_clear(p); + /* + * Unified infinity handling using format capabilities. + * Formats with no_infinity must convert infinity to something else + */ + if (!fmt->no_infinity) { + p->exp = fmt->exp_max; + frac_clear(p); + } else if (fmt_has_nan_encoding(fmt)) { + if (saturate) { + parts_set_max_normal(p, fmt); + } else { + uint8_t dnan = s->default_nan_pattern; + p->cls = float_class_qnan; + p->sign = dnan >> 7; + p->exp = fmt->exp_max; + frac_allones(p); + } + } else { + parts_set_max_normal(p, fmt); + } return; case float_class_qnan: case float_class_snan: - g_assert(!fmt->arm_althp); + g_assert(fmt_has_nan_encoding(fmt)); p->exp = fmt->exp_max; frac_shr(p, fmt->frac_shift); return; @@ -451,6 +544,12 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s, } } +static void partsN(uncanon)(FloatPartsN *p, float_status *s, + const FloatFmt *fmt) +{ + partsN(uncanon_sat)(p, s, fmt, false); +} + /* * Returns the result of adding or subtracting the values of the * floating-point values `a' and `b'. The operation is performed diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index ce7315c996..3648dc7467 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -242,6 +242,63 @@ static bool float16_is_snan_internal(float16 a, float_status *status) return frac_msb == snan_bit_is_one(status); } +/*---------------------------------------------------------------------------- +| Internal helper: Determine if E4M3 NaN is signaling. +| E4M3 has only one NaN encoding, so classification is policy-based. +*----------------------------------------------------------------------------*/ + +static bool float8_e4m3_is_snan_internal(float8_e4m3 a, float_status *status) +{ + if (!float8_e4m3_is_any_nan(a)) { + return false; + } + if (no_signaling_nans(status)) { + return false; + } + return snan_bit_is_one(status); +} + +/*---------------------------------------------------------------------------- +| Internal helper: Determine if E5M2 NaN is signaling. +*----------------------------------------------------------------------------*/ + +static bool float8_e5m2_is_snan_internal(float8_e5m2 a, float_status *status) +{ + if (!float8_e5m2_is_any_nan(a)) { + return false; + } + if (no_signaling_nans(status)) { + return false; + } + uint8_t frac = float8_e5m2_val(a) & 0x3; + bool frac_msb = (frac >> 1) & 1; + return frac_msb == snan_bit_is_one(status); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the OCP FP8 E4M3 value `a' is a quiet NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +bool float8_e4m3_is_quiet_nan(float8_e4m3 a_, float_status *status) +{ + if (!float8_e4m3_is_any_nan(a_)) { + return false; + } + return !float8_e4m3_is_snan_internal(a_, status); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the OCP FP8 E5M2 value `a' is a quiet NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +bool float8_e5m2_is_quiet_nan(float8_e5m2 a_, float_status *status) +{ + if (!float8_e5m2_is_any_nan(a_)) { + return false; + } + return !float8_e5m2_is_snan_internal(a_, status); +} + /*---------------------------------------------------------------------------- | Returns 1 if the half-precision floating-point value `a' is a quiet | NaN; otherwise returns 0. @@ -285,6 +342,24 @@ bool bfloat16_is_quiet_nan(bfloat16 a_, float_status *status) return !bfloat16_is_snan_internal(a_, status); } +/*---------------------------------------------------------------------------- +| Returns 1 if the OCP FP8 E4M3 value `a' is a signaling NaN; otherwise 0. +*----------------------------------------------------------------------------*/ + +bool float8_e4m3_is_signaling_nan(float8_e4m3 a_, float_status *status) +{ + return float8_e4m3_is_snan_internal(a_, status); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the OCP FP8 E5M2 value `a' is a signaling NaN; otherwise 0. +*----------------------------------------------------------------------------*/ + +bool float8_e5m2_is_signaling_nan(float8_e5m2 a_, float_status *status) +{ + return float8_e5m2_is_snan_internal(a_, status); +} + /*---------------------------------------------------------------------------- | Returns 1 if the half-precision floating-point value `a' is a signaling | NaN; otherwise returns 0. diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 8094358c2e..533f96dcda 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -522,6 +522,13 @@ typedef struct { #define DECOMPOSED_BINARY_POINT 63 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT) +/* + * Sentinel value for normal_frac_max indicating "all fraction values at + * exp_max are normal" (i.e., the format has no NaN encoding at exp_max). + * Used by E2M1 and ARM Alternative Half Precision formats. + */ +#define NORMAL_FRAC_MAX_ALL 0 + /* Structure holding all of the relevant parameters for a format. * exp_size: the size of the exponent field * exp_bias: the offset applied to the exponent field @@ -542,11 +549,39 @@ typedef struct { int exp_max; int frac_size; int frac_shift; - bool arm_althp; bool has_explicit_bit; uint64_t round_mask; + /* + * Format capability flags: + * no_infinity: Format has no infinity encoding. When true, exp=exp_max + * with frac=0 is NOT infinity - it's either NaN or max normal. + * + * limited_nan: Format has limited or no NaN patterns. When combined + * with normal_frac_max, determines NaN encoding capability: + * - limited_nan=false: Standard IEEE NaN (exp=exp_max, frac!=0) + * - limited_nan=true && normal_frac_max!=0: Limited NaN (E4M3) + * - limited_nan=true && normal_frac_max==0: No NaN encoding (AHP, E2M1) + * + * overflow_raises_invalid: Raise Invalid (not Overflow) exception. + * ARM Alt HP uses this to signal overflow as an invalid operation. + * + * normal_frac_max: For formats with limited_nan, the maximum fraction + * value (after normalization shift, including implicit bit) that is + * still considered normal at exp=exp_max. + * Use NORMAL_FRAC_MAX_ALL (0) to indicate all frac values at exp_max + * are normal (E2M1, ARM Alt HP), which also implies no NaN encoding. + */ + bool no_infinity; + bool limited_nan; + bool overflow_raises_invalid; + uint64_t normal_frac_max; } FloatFmt; +static inline bool fmt_has_nan_encoding(const FloatFmt *fmt) +{ + return !fmt->limited_nan || fmt->normal_frac_max != NORMAL_FRAC_MAX_ALL; +} + /* Expand fields based on the size of exponent and fraction */ #define FLOAT_PARAMS_(E) \ .exp_size = E, \ @@ -560,13 +595,27 @@ typedef struct { .frac_shift = (-F - 1) & 63, \ .round_mask = (1ull << ((-F - 1) & 63)) - 1 +static const FloatFmt float8_e4m3_params = { + FLOAT_PARAMS(4, 3), + .no_infinity = true, + .limited_nan = true, + .normal_frac_max = 0xE000000000000000ULL, +}; + +static const FloatFmt float8_e5m2_params = { + FLOAT_PARAMS(5, 2), +}; + static const FloatFmt float16_params = { FLOAT_PARAMS(5, 10) }; static const FloatFmt float16_params_ahp = { FLOAT_PARAMS(5, 10), - .arm_althp = true + .no_infinity = true, + .limited_nan = true, + .overflow_raises_invalid = true, + .normal_frac_max = NORMAL_FRAC_MAX_ALL, }; static const FloatFmt bfloat16_params = { @@ -614,6 +663,16 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw) }; } +static void QEMU_FLATTEN float8_e4m3_unpack_raw(FloatParts64 *p, float8_e4m3 f) +{ + unpack_raw64(p, &float8_e4m3_params, f); +} + +static void QEMU_FLATTEN float8_e5m2_unpack_raw(FloatParts64 *p, float8_e5m2 f) +{ + unpack_raw64(p, &float8_e5m2_params, f); +} + static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f) { unpack_raw64(p, &float16_params, f); @@ -671,6 +730,16 @@ static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt) return ret; } +static float8_e4m3 QEMU_FLATTEN float8_e4m3_pack_raw(const FloatParts64 *p) +{ + return make_float8_e4m3(pack_raw64(p, &float8_e4m3_params)); +} + +static float8_e5m2 QEMU_FLATTEN float8_e5m2_pack_raw(const FloatParts64 *p) +{ + return make_float8_e5m2(pack_raw64(p, &float8_e5m2_params)); +} + static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p) { return make_float16(pack_raw64(p, &float16_params)); @@ -758,12 +827,26 @@ static void parts128_canonicalize(FloatParts128 *p, float_status *status, PARTS_GENERIC_64_128(canonicalize, A)(A, S, F) static void parts64_uncanon_normal(FloatParts64 *p, float_status *status, - const FloatFmt *fmt); + const FloatFmt *fmt, bool saturate); static void parts128_uncanon_normal(FloatParts128 *p, float_status *status, - const FloatFmt *fmt); + const FloatFmt *fmt, bool saturate); + +#define parts_uncanon_normal(A, S, F, SAT) \ + PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F, SAT) -#define parts_uncanon_normal(A, S, F) \ - PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F) +static void parts64_uncanon_sat(FloatParts64 *p, float_status *status, + const FloatFmt *fmt, bool saturate); +static void parts128_uncanon_sat(FloatParts128 *p, float_status *status, + const FloatFmt *fmt, bool saturate); + +#define parts_uncanon_sat(A, S, F, SAT) \ + PARTS_GENERIC_64_128(uncanon_sat, A)(A, S, F, SAT) + +static void parts64_set_max_normal(FloatParts64 *p, const FloatFmt *fmt); +static void parts128_set_max_normal(FloatParts128 *p, const FloatFmt *fmt); + +#define parts_set_max_normal(P, F) \ + PARTS_GENERIC_64_128(set_max_normal, P)(P, F) static void parts64_uncanon(FloatParts64 *p, float_status *status, const FloatFmt *fmt); @@ -1662,6 +1745,20 @@ static const uint16_t rsqrt_tab[128] = { * Pack/unpack routines with a specific FloatFmt. */ +static void float8_e4m3_unpack_canonical(FloatParts64 *p, float8_e4m3 f, + float_status *s) +{ + float8_e4m3_unpack_raw(p, f); + parts_canonicalize(p, s, &float8_e4m3_params); +} + +static void float8_e5m2_unpack_canonical(FloatParts64 *p, float8_e5m2 f, + float_status *s) +{ + float8_e5m2_unpack_raw(p, f); + parts_canonicalize(p, s, &float8_e5m2_params); +} + static void float16a_unpack_canonical(FloatParts64 *p, float16 f, float_status *s, const FloatFmt *params) { @@ -1682,6 +1779,24 @@ static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f, parts_canonicalize(p, s, &bfloat16_params); } +static float8_e4m3 float8_e4m3_round_pack_canonical(FloatParts64 *p, + float_status *status, + const FloatFmt *params, + const bool saturate) +{ + parts_uncanon_sat(p, status, params, saturate); + return float8_e4m3_pack_raw(p); +} + +static float8_e5m2 float8_e5m2_round_pack_canonical(FloatParts64 *p, + float_status *status, + const FloatFmt *params, + const bool saturate) +{ + parts_uncanon_sat(p, status, params, saturate); + return float8_e5m2_pack_raw(p); +} + static float16 float16a_round_pack_canonical(FloatParts64 *p, float_status *s, const FloatFmt *params) @@ -1838,7 +1953,7 @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p, case float_class_normal: case float_class_denormal: if (s->floatx80_rounding_precision == floatx80_precision_x) { - parts_uncanon_normal(p, s, fmt); + parts_uncanon_normal(p, s, fmt, false); frac = p->frac_hi; exp = p->exp; } else { @@ -1847,7 +1962,7 @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p, p64.sign = p->sign; p64.exp = p->exp; frac_truncjam(&p64, p); - parts_uncanon_normal(&p64, s, fmt); + parts_uncanon_normal(&p64, s, fmt, false); frac = p64.frac; exp = p64.exp; } @@ -2823,6 +2938,66 @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, } } +bfloat16 float8_e4m3_to_bfloat16(float8_e4m3 a, float_status *s) +{ + FloatParts64 p; + + float8_e4m3_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + + return bfloat16_round_pack_canonical(&p, s); +} + +bfloat16 float8_e5m2_to_bfloat16(float8_e5m2 a, float_status *s) +{ + FloatParts64 p; + + float8_e5m2_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + + return bfloat16_round_pack_canonical(&p, s); +} + +float8_e4m3 bfloat16_to_float8_e4m3(bfloat16 a, bool saturate, float_status *s) +{ + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + return float8_e4m3_round_pack_canonical(&p, s, &float8_e4m3_params, + saturate); +} + +float8_e5m2 bfloat16_to_float8_e5m2(bfloat16 a, bool saturate, float_status *s) +{ + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + return float8_e5m2_round_pack_canonical(&p, s, &float8_e5m2_params, + saturate); +} + +float8_e4m3 float32_to_float8_e4m3(float32 a, bool saturate, float_status *s) +{ + FloatParts64 p; + + float32_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + return float8_e4m3_round_pack_canonical(&p, s, &float8_e4m3_params, + saturate); +} + +float8_e5m2 float32_to_float8_e5m2(float32 a, bool saturate, float_status *s) +{ + FloatParts64 p; + + float32_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + return float8_e5m2_round_pack_canonical(&p, s, &float8_e5m2_params, + saturate); +} + float32 float16_to_float32(float16 a, bool ieee, float_status *s) { const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h index 8f82fdfc97..b781bf10b7 100644 --- a/include/fpu/softfloat-types.h +++ b/include/fpu/softfloat-types.h @@ -119,6 +119,18 @@ typedef struct { */ typedef uint16_t bfloat16; +/* + * Software OCP(Open Compute Project) floating point types + */ +typedef uint8_t float8_e4m3; +typedef uint8_t float8_e5m2; +#define float8_e4m3_val(x) (x) +#define float8_e5m2_val(x) (x) +#define make_float8_e4m3(x) (x) +#define make_float8_e5m2(x) (x) +#define const_float8_e4m3(x) (x) +#define const_float8_e5m2(x) (x) + /* * Software IEC/IEEE floating-point underflow tininess-detection mode. */ diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index c18ab2cb60..30aca23057 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -189,6 +189,87 @@ float128 int128_to_float128(Int128, float_status *status); float128 uint64_to_float128(uint64_t, float_status *status); float128 uint128_to_float128(Int128, float_status *status); +/*---------------------------------------------------------------------------- +| Software OCP conversion routines. +*----------------------------------------------------------------------------*/ + +bfloat16 float8_e4m3_to_bfloat16(float8_e4m3, float_status *status); +bfloat16 float8_e5m2_to_bfloat16(float8_e5m2, float_status *status); +float8_e4m3 bfloat16_to_float8_e4m3(bfloat16, bool saturate, float_status *status); +float8_e5m2 bfloat16_to_float8_e5m2(bfloat16, bool saturate, float_status *status); +float8_e4m3 float32_to_float8_e4m3(float32, bool saturate, float_status *status); +float8_e5m2 float32_to_float8_e5m2(float32, bool saturate, float_status *status); + +/*---------------------------------------------------------------------------- +| Software OCP operations. +*----------------------------------------------------------------------------*/ + +bool float8_e4m3_is_quiet_nan(float8_e4m3, float_status *status); +bool float8_e4m3_is_signaling_nan(float8_e4m3, float_status *status); +bool float8_e5m2_is_quiet_nan(float8_e5m2, float_status *status); +bool float8_e5m2_is_signaling_nan(float8_e5m2, float_status *status); + +static inline bool float8_e4m3_is_any_nan(float8_e4m3 a) +{ + return ((float8_e4m3_val(a) & ~0x80) == 0x7f); +} + +static inline bool float8_e5m2_is_any_nan(float8_e5m2 a) +{ + return ((float8_e5m2_val(a) & ~0x80) > 0x7c); +} + +static inline bool float8_e4m3_is_neg(float8_e4m3 a) +{ + return float8_e4m3_val(a) >> 7; +} + +static inline bool float8_e5m2_is_neg(float8_e5m2 a) +{ + return float8_e5m2_val(a) >> 7; +} + +static inline bool float8_e4m3_is_infinity(float8_e4m3 a) +{ + return false; +} + +static inline bool float8_e5m2_is_infinity(float8_e5m2 a) +{ + return (float8_e5m2_val(a) & 0x7f) == 0x7c; +} + +static inline bool float8_e4m3_is_zero(float8_e4m3 a) +{ + return (float8_e4m3_val(a) & 0x7f) == 0; +} + +static inline bool float8_e5m2_is_zero(float8_e5m2 a) +{ + return (float8_e5m2_val(a) & 0x7f) == 0; +} + +static inline bool float8_e4m3_is_zero_or_denormal(float8_e4m3 a) +{ + return (float8_e4m3_val(a) & 0x78) == 0; +} + +static inline bool float8_e5m2_is_zero_or_denormal(float8_e5m2 a) +{ + return (float8_e5m2_val(a) & 0x7c) == 0; +} + +static inline bool float8_e4m3_is_normal(float8_e4m3 a) +{ + uint8_t em = float8_e4m3_val(a) & 0x7f; + return em >= 0x8 && em <= 0x7e; +} + +static inline bool float8_e5m2_is_normal(float8_e5m2 a) +{ + return (((float8_e5m2_val(a) >> 2) + 1) & 0x1f) >= 2; +} + /*---------------------------------------------------------------------------- | Software half-precision conversion routines. *----------------------------------------------------------------------------*/ -- 2.52.0
