https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107591
--- Comment #14 from Jakub Jelinek <jakub at gcc dot gnu.org> --- Incremental patch on top of the https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107569#c18 patch which optimizes the floating point x * x: --- gcc/range-op-float.cc.jj 2022-11-09 19:06:11.075716000 +0100 +++ gcc/range-op-float.cc 2022-11-09 21:11:52.468256045 +0100 @@ -51,7 +51,7 @@ along with GCC; see the file COPYING3. bool range_operator_float::fold_range (frange &r, tree type, const frange &op1, const frange &op2, - relation_trio) const + relation_trio trio) const { if (empty_range_varying (r, type, op1, op2)) return true; @@ -65,7 +65,7 @@ range_operator_float::fold_range (frange bool maybe_nan; rv_fold (lb, ub, maybe_nan, type, op1.lower_bound (), op1.upper_bound (), - op2.lower_bound (), op2.upper_bound ()); + op2.lower_bound (), op2.upper_bound (), trio); // Handle possible NANs by saturating to the appropriate INF if only // one end is a NAN. If both ends are a NAN, just return a NAN. @@ -103,8 +103,8 @@ range_operator_float::rv_fold (REAL_VALU const REAL_VALUE_TYPE &lh_lb ATTRIBUTE_UNUSED, const REAL_VALUE_TYPE &lh_ub ATTRIBUTE_UNUSED, const REAL_VALUE_TYPE &rh_lb ATTRIBUTE_UNUSED, - const REAL_VALUE_TYPE &rh_ub ATTRIBUTE_UNUSED) - const + const REAL_VALUE_TYPE &rh_ub ATTRIBUTE_UNUSED, + relation_trio) const { lb = dconstninf; ub = dconstinf; @@ -1868,7 +1868,8 @@ class foperator_plus : public range_oper const REAL_VALUE_TYPE &lh_lb, const REAL_VALUE_TYPE &lh_ub, const REAL_VALUE_TYPE &rh_lb, - const REAL_VALUE_TYPE &rh_ub) const final override + const REAL_VALUE_TYPE &rh_ub, + relation_trio) const final override { frange_arithmetic (PLUS_EXPR, type, lb, lh_lb, rh_lb, dconstninf); frange_arithmetic (PLUS_EXPR, type, ub, lh_ub, rh_ub, dconstinf); @@ -1892,7 +1893,8 @@ class foperator_minus : public range_ope const REAL_VALUE_TYPE &lh_lb, const REAL_VALUE_TYPE &lh_ub, const REAL_VALUE_TYPE &rh_lb, - const REAL_VALUE_TYPE &rh_ub) const final override + const REAL_VALUE_TYPE &rh_ub, + relation_trio) const final override { frange_arithmetic (MINUS_EXPR, type, lb, lh_lb, rh_ub, dconstninf); frange_arithmetic (MINUS_EXPR, type, ub, lh_ub, rh_lb, dconstinf); @@ -1910,7 +1912,7 @@ class foperator_minus : public range_ope /* Wrapper around frange_arithmetics, that computes the result if inexact rounded to both directions. Also, if one of the - operands is +-0.0 and another +-inf, return +-0.0 rather than + operands is +-0.0 and another +-INF, return +-0.0 rather than NAN. */ static void @@ -1945,13 +1947,42 @@ class foperator_mult : public range_oper const REAL_VALUE_TYPE &lh_lb, const REAL_VALUE_TYPE &lh_ub, const REAL_VALUE_TYPE &rh_lb, - const REAL_VALUE_TYPE &rh_ub) const final override + const REAL_VALUE_TYPE &rh_ub, + relation_trio trio) const final override { REAL_VALUE_TYPE cp[8]; + bool is_square + = (trio.op1_op2 () == VREL_EQ + && real_equal (&lh_lb, &rh_lb) + && real_equal (&lh_ub, &rh_ub) + && real_isneg (&lh_lb) == real_isneg (&rh_lb) + && real_isneg (&lh_ub) == real_isneg (&rh_ub)); // Do a cross-product. frange_mult (type, cp[0], cp[4], lh_lb, rh_lb); - frange_mult (type, cp[1], cp[5], lh_lb, rh_ub); - frange_mult (type, cp[2], cp[6], lh_ub, rh_lb); + if (is_square) + { + // For x * x we can just do max (lh_lb * lh_lb, lh_ub * lh_ub) + // as maximum and -0.0 as minimum if 0.0 is in the range, + // otherwise min (lh_lb * lh_lb, lh_ub * lh_ub). + // -0.0 rather than 0.0 because VREL_EQ doesn't prove that + // x and y are bitwise equal, just that they compare equal. + if (real_compare (LE_EXPR, &lh_lb, &dconst0) + && real_compare (GE_EXPR, &lh_ub, &dconst0)) + { + cp[1] = dconst0; + real_value_negate (&cp[1]); + } + else + cp[1] = cp[0]; + cp[2] = cp[0]; + cp[5] = cp[4]; + cp[6] = cp[4]; + } + else + { + frange_mult (type, cp[1], cp[5], lh_lb, rh_ub); + frange_mult (type, cp[2], cp[6], lh_ub, rh_lb); + } frange_mult (type, cp[3], cp[7], lh_ub, rh_ub); for (int i = 1; i < 3; ++i) { @@ -1965,18 +1996,27 @@ class foperator_mult : public range_oper lb = cp[0]; ub = cp[4]; - // [+-0, +-0] * [+INF,+INF] (or [-INF,-INF] or swapped is a known NaN. - if ((real_iszero (&lh_lb) && real_iszero (&lh_ub) - && real_isinf (&rh_lb) && real_isinf (&rh_ub, real_isneg (&rh_lb))) - || (real_iszero (&rh_lb) && real_iszero (&rh_ub) - && real_isinf (&lh_lb) && real_isinf (&lh_ub, real_isneg (&lh_lb)))) + // If both operands are the same, then we know it can be +-0.0, or +-INF, + // but not both at the same time, so it will never be invalid unless + // operand was already NAN. + if (is_square) + maybe_nan = false; + // [+-0, +-0] * [+INF,+INF] (or [-INF,-INF] or swapped is a known NAN. + else if ((real_iszero (&lh_lb) + && real_iszero (&lh_ub) + && real_isinf (&rh_lb) + && real_isinf (&rh_ub, real_isneg (&rh_lb))) + || (real_iszero (&rh_lb) + && real_iszero (&rh_ub) + && real_isinf (&lh_lb) + && real_isinf (&lh_ub, real_isneg (&lh_lb)))) { real_nan (&lb, NULL, 0, TYPE_MODE (type)); ub = lb; maybe_nan = true; } // Otherwise, if one range includes zero and the other ends with +-INF, - // it is a maybe NaN. + // it is a maybe NAN. else if (real_compare (LE_EXPR, &lh_lb, &dconst0) && real_compare (GE_EXPR, &lh_ub, &dconst0) && (real_isinf (&rh_lb) || real_isinf (&rh_ub))) --- gcc/range-op.h.jj 2022-11-09 11:22:42.867624633 +0100 +++ gcc/range-op.h 2022-11-09 20:20:02.266964633 +0100 @@ -123,7 +123,8 @@ public: const REAL_VALUE_TYPE &lh_lb, const REAL_VALUE_TYPE &lh_ub, const REAL_VALUE_TYPE &rh_lb, - const REAL_VALUE_TYPE &rh_ub) const; + const REAL_VALUE_TYPE &rh_ub, + relation_trio) const; // Unary operations have the range of the LHS as op2. virtual bool fold_range (irange &r, tree type, const frange &lh, We determine the right range (I think), but then it helps just to optimize away the call to sqrt function, not the actual comparison (bet for a fear that a sNaN could appear there). If frange also tracked maybe sNaN (and cleared it say on all binops or unops other than the operations that might not trigger exception/quiet it), perhaps we could optimize that. Or say without frange help just by assuming that say result of a binary floating point operation can't ever be a sNaN.