91734, take 2)

Jakub Jelinek Fri, 20 Sep 2019 23:14:36 -0700

On Mon, Sep 16, 2019 at 08:56:58AM +0200, Richard Biener wrote:
> > As mentioned in the PR, the sqrt (x) < c optimization into x < c*c
> > sometimes breaks the boundary case, if c2=c*c is inexact then in some cases
> > we need to optimize it into x <= c*c rather than x < c*c.  The original
> > bugreport is when c is small and c2 is 0.0, then obviously we need <= 0.0
> > rather than < 0.0, but the testcase includes another example where it makes
> > a difference, plus has a >= testcase too.
> > 
> > Bootstrapped/regtested on powerpc64le-linux, ok for trunk?
> 
> I was hoping Joseph might chime in here...  anyway, does this assume
> round-to-nearest or does it work with round to +-Inf as well?  I
> realize this all is under flag_unsafe_math_optimizations, but
> this flag is notoriously underspecified...  So the question is
> whether we should disable the transform if c*c isn't exact and
> flag_rounding_math?  The transform also doesn't seem to guard
> against isnan (c) (-funsafe-math-optimizations sets
> -fno-trapping-math and -fno-signed-zeros but not -ffinite-math-only
> or disables itself on -frounding-math)


Here is an updated patch, which on top of the previous patch:
1) punts for -frounding-math
2) punts for sqrt comparisons against NaN constant
3) for the c*c inexact also handles the other two comparisons that
apparently need to be handled too
4) for all 4 comparisons also checks nexttoward (c2, 0.0) or nexttoward (c2,
inf) depending on the comparison kind, because as Joseph correctly noted,
with rounding to nearest up to 3 different floating point values can have
the same sqrt result, and if c2 is the middle one from them, we need to use
the 1 ulp smaller or larger one in the comparison
5) had to adjust the testcase, because while it worked fine on powerpc64le,
on x86_64 if the test is linked with -ffast-math/-Ofast etc., crtfastmath.o
is linked in and subnormals are flushed to zero, which is not what we want
for the testcase (at least for a subset of the tests).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

BTW, I've used attached programs to look for the problematic cases on random
float/doubles and the cases the patch handles seem to be the only
problematic ones, there is never need to go further than one nexttoward to 0
or inf.

2019-09-21  Jakub Jelinek  <[email protected]>

        PR tree-optimization/91734
        * generic-match-head.c: Include fold-const-call.h.
        * match.pd (sqrt(x) cmp c): Check the boundary value and
        in case inexact computation of c*c affects comparison of the boundary,
        turn LT_EXPR into LE_EXPR, GE_EXPR into GT_EXPR, LE_EXPR into LT_EXPR
        or GT_EXPR into GE_EXPR.  Punt for sqrt comparisons against NaN and
        for -frounding-math.  For c2, try the next smaller or larger floating
        point constant depending on comparison code and if it has the same
        sqrt as c2, use it instead of c2.

        * gcc.dg/pr91734.c: New test.

--- gcc/generic-match-head.c.jj 2019-09-20 12:24:56.376189996 +0200
+++ gcc/generic-match-head.c    2019-09-20 12:43:08.017273166 +0200
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.
 #include "cgraph.h"
 #include "vec-perm-indices.h"
 #include "fold-const.h"
+#include "fold-const-call.h"
 #include "stor-layout.h"
 #include "tree-dfa.h"
 #include "builtins.h"
--- gcc/match.pd.jj     2019-09-20 12:25:27.323710388 +0200
+++ gcc/match.pd        2019-09-20 17:20:22.974316837 +0200
@@ -3711,8 +3711,7 @@ (define_operator_list COND_TERNARY
      (cmp { tem; } @1)))))
 
  /* Fold comparisons against built-in math functions.  */
- (if (flag_unsafe_math_optimizations
-      && ! flag_errno_math)
+ (if (flag_unsafe_math_optimizations && ! flag_errno_math)
   (for sq (SQRT)
    (simplify
     (cmp (sq @0) REAL_CST@1)
@@ -3747,56 +3746,108 @@ (define_operator_list COND_TERNARY
          if x is negative or NaN.  Due to -funsafe-math-optimizations,
          the results for other x follow from natural arithmetic.  */
        (cmp @0 @1)))
-     (if (cmp == GT_EXPR || cmp == GE_EXPR)
+     (if ((cmp == LT_EXPR
+          || cmp == LE_EXPR
+          || cmp == GT_EXPR
+          || cmp == GE_EXPR)
+         && !REAL_VALUE_ISNAN (TREE_REAL_CST (@1))
+         /* Give up for -frounding-math.  */
+         && !HONOR_SIGN_DEPENDENT_ROUNDING (TREE_TYPE (@0)))
       (with
        {
-         REAL_VALUE_TYPE c2;
+        REAL_VALUE_TYPE c2;
+        enum tree_code ncmp = cmp;
+        const real_format *fmt
+          = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (@0)));
         real_arithmetic (&c2, MULT_EXPR,
                          &TREE_REAL_CST (@1), &TREE_REAL_CST (@1));
-        real_convert (&c2, TYPE_MODE (TREE_TYPE (@0)), &c2);
+        real_convert (&c2, fmt, &c2);
+        /* See PR91734: if c2 is inexact and sqrt(c2) < c (or sqrt(c2) >= c),
+           then change LT_EXPR into LE_EXPR or GE_EXPR into GT_EXPR.  */
+        if (!REAL_VALUE_ISINF (c2))
+          {
+            tree c3 = fold_const_call (CFN_SQRT, TREE_TYPE (@0),
+                                       build_real (TREE_TYPE (@0), c2));
+            if (c3 == NULL_TREE || TREE_CODE (c3) != REAL_CST)
+              ncmp = ERROR_MARK;
+            else if ((cmp == LT_EXPR || cmp == GE_EXPR)
+                     && real_less (&TREE_REAL_CST (c3), &TREE_REAL_CST (@1)))
+              ncmp = cmp == LT_EXPR ? LE_EXPR : GT_EXPR;
+            else if ((cmp == LE_EXPR || cmp == GT_EXPR)
+                     && real_less (&TREE_REAL_CST (@1), &TREE_REAL_CST (c3)))
+              ncmp = cmp == LE_EXPR ? LT_EXPR : GE_EXPR;
+            else
+              {
+                /* With rounding to even, sqrt of up to 3 different values
+                   gives the same normal result, so in some cases c2 needs
+                   to be adjusted.  */
+                REAL_VALUE_TYPE c2alt, tow;
+                if (cmp == LT_EXPR || cmp == GE_EXPR)
+                  tow = dconst0;
+                else
+                  real_inf (&tow);
+                real_nextafter (&c2alt, fmt, &c2, &tow);
+                real_convert (&c2alt, fmt, &c2alt);
+                if (REAL_VALUE_ISINF (c2alt))
+                  ncmp = ERROR_MARK;
+                else
+                  {
+                    c3 = fold_const_call (CFN_SQRT, TREE_TYPE (@0),
+                                          build_real (TREE_TYPE (@0), c2alt));
+                    if (c3 == NULL_TREE || TREE_CODE (c3) != REAL_CST)
+                      ncmp = ERROR_MARK;
+                    else if (real_equal (&TREE_REAL_CST (c3),
+                                         &TREE_REAL_CST (@1)))
+                      c2 = c2alt;
+                  }
+              }
+          }
        }
-       (if (REAL_VALUE_ISINF (c2))
-       /* sqrt(x) > y is x == +Inf, when y is very large.  */
-       (if (HONOR_INFINITIES (@0))
-        (eq @0 { build_real (TREE_TYPE (@0), c2); })
-        { constant_boolean_node (false, type); })
-       /* sqrt(x) > c is the same as x > c*c.  */
-       (cmp @0 { build_real (TREE_TYPE (@0), c2); }))))
-     (if (cmp == LT_EXPR || cmp == LE_EXPR)
-      (with
-       {
-                REAL_VALUE_TYPE c2;
-        real_arithmetic (&c2, MULT_EXPR,
-                         &TREE_REAL_CST (@1), &TREE_REAL_CST (@1));
-        real_convert (&c2, TYPE_MODE (TREE_TYPE (@0)), &c2);
-       }
-       (if (REAL_VALUE_ISINF (c2))
-        (switch
-        /* sqrt(x) < y is always true, when y is a very large
-           value and we don't care about NaNs or Infinities.  */
-        (if (! HONOR_NANS (@0) && ! HONOR_INFINITIES (@0))
-         { constant_boolean_node (true, type); })
-        /* sqrt(x) < y is x != +Inf when y is very large and we
-           don't care about NaNs.  */
-        (if (! HONOR_NANS (@0))
-         (ne @0 { build_real (TREE_TYPE (@0), c2); }))
-        /* sqrt(x) < y is x >= 0 when y is very large and we
-           don't care about Infinities.  */
-        (if (! HONOR_INFINITIES (@0))
-         (ge @0 { build_real (TREE_TYPE (@0), dconst0); }))
-        /* sqrt(x) < y is x >= 0 && x != +Inf, when y is large.  */
-        (if (GENERIC)
-         (truth_andif
-          (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
-          (ne @0 { build_real (TREE_TYPE (@0), c2); }))))
-       /* sqrt(x) < c is the same as x < c*c, if we ignore NaNs.  */
-       (if (! HONOR_NANS (@0))
-        (cmp @0 { build_real (TREE_TYPE (@0), c2); })
-        /* sqrt(x) < c is the same as x >= 0 && x < c*c.  */
-        (if (GENERIC)
-         (truth_andif
-          (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
-          (cmp @0 { build_real (TREE_TYPE (@0), c2); })))))))))
+       (if (cmp == GT_EXPR || cmp == GE_EXPR)
+       (if (REAL_VALUE_ISINF (c2))
+        /* sqrt(x) > y is x == +Inf, when y is very large.  */
+        (if (HONOR_INFINITIES (@0))
+         (eq @0 { build_real (TREE_TYPE (@0), c2); })
+         { constant_boolean_node (false, type); })
+        /* sqrt(x) > c is the same as x > c*c.  */
+        (if (ncmp != ERROR_MARK)
+         (if (ncmp == GE_EXPR)
+          (ge @0 { build_real (TREE_TYPE (@0), c2); })
+          (gt @0 { build_real (TREE_TYPE (@0), c2); }))))
+       /* else if (cmp == LT_EXPR || cmp == LE_EXPR)  */
+       (if (REAL_VALUE_ISINF (c2))
+        (switch
+         /* sqrt(x) < y is always true, when y is a very large
+            value and we don't care about NaNs or Infinities.  */
+         (if (! HONOR_NANS (@0) && ! HONOR_INFINITIES (@0))
+          { constant_boolean_node (true, type); })
+         /* sqrt(x) < y is x != +Inf when y is very large and we
+            don't care about NaNs.  */
+         (if (! HONOR_NANS (@0))
+          (ne @0 { build_real (TREE_TYPE (@0), c2); }))
+         /* sqrt(x) < y is x >= 0 when y is very large and we
+            don't care about Infinities.  */
+         (if (! HONOR_INFINITIES (@0))
+          (ge @0 { build_real (TREE_TYPE (@0), dconst0); }))
+         /* sqrt(x) < y is x >= 0 && x != +Inf, when y is large.  */
+         (if (GENERIC)
+          (truth_andif
+           (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
+           (ne @0 { build_real (TREE_TYPE (@0), c2); }))))
+        /* sqrt(x) < c is the same as x < c*c, if we ignore NaNs.  */
+        (if (ncmp != ERROR_MARK && ! HONOR_NANS (@0))
+         (if (ncmp == LT_EXPR)
+          (lt @0 { build_real (TREE_TYPE (@0), c2); })
+          (le @0 { build_real (TREE_TYPE (@0), c2); }))
+         /* sqrt(x) < c is the same as x >= 0 && x < c*c.  */
+         (if (ncmp != ERROR_MARK && GENERIC)
+          (if (ncmp == LT_EXPR)
+           (truth_andif
+            (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
+            (lt @0 { build_real (TREE_TYPE (@0), c2); }))
+           (truth_andif
+            (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
+            (le @0 { build_real (TREE_TYPE (@0), c2); })))))))))))
    /* Transform sqrt(x) cmp sqrt(y) -> x cmp y.  */
    (simplify
     (cmp (sq @0) (sq @1))
--- gcc/testsuite/gcc.dg/pr91734.c.jj   2019-09-20 12:43:08.019273135 +0200
+++ gcc/testsuite/gcc.dg/pr91734.c      2019-09-21 07:57:26.102273700 +0200
@@ -0,0 +1,97 @@
+/* PR tree-optimization/91734 */
+/* { dg-do run } */
+/* { dg-add-options ieee } */
+/* { dg-additional-options "-O2 -std=gnu99" } */
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f1 (float x)
+{
+  return __builtin_sqrtf (x) < __FLT_MIN__;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f2 (float x)
+{
+  return __builtin_sqrtf (x) < 0x1.2dd3d0p-65f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f3 (float x)
+{
+  return __builtin_sqrtf (x) >= 0x1.2dd3d0p-65f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f4 (float x)
+{
+  return __builtin_sqrtf (x) >= 0x1.5642e6p+54f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f5 (float x)
+{
+  return __builtin_sqrtf (x) > 0x1.5642e6p+54f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f6 (float x)
+{
+  return __builtin_sqrtf (x) < 0x1.4da1cp-19f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f7 (float x)
+{
+  return __builtin_sqrtf (x) <= 0x1.4da1cp-19f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f8 (float x)
+{
+  return __builtin_sqrtf (x) < 0x1.50cb62p-65f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f9 (float x)
+{
+  return __builtin_sqrtf (x) <= 0x1.4fc00cp-73f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f10 (float x)
+{
+  return __builtin_sqrtf (x) < 0x1.001002p+0f;
+}
+
+int
+main ()
+{
+  if (__FLT_RADIX__ != 2
+      || __FLT_MANT_DIG__ != 24
+      || __FLT_MIN_EXP__ != -125
+      || __FLT_MAX_EXP__ != 128
+      || __FLT_HAS_DENORM__ != 1
+      || __FLT_HAS_INFINITY__ != 1)
+    return 0;
+  if (!f1 (0.0f) || f1 (0x1.0p-149f))
+    __builtin_abort ();
+  if (!f2 (0x1.63dbc0p-130f))
+    __builtin_abort ();
+  if (f3 (0x1.63dbc0p-130f))
+    __builtin_abort ();
+  if (!f4 (0x1.c996d0p+108f) || !f4 (0x1.c996cep+108f) || f4 
(0x1.c996ccp+108f))
+    __builtin_abort ();
+  if (f5 (0x1.c996d0p+108f) || f5 (0x1.c996d2p+108f) || !f5 (0x1.c996d4p+108f))
+    __builtin_abort ();
+  if (!f6 (0x1.b2ce3p-38f) || f6 (0x1.b2ce32p-38f) || f6 (0x1.b2ce34p-38f))
+    __builtin_abort ();
+  if (!f7 (0x1.b2ce3p-38f) || !f7 (0x1.b2ce34p-38f) || !f7 (0x1.b2ce36p-38f) 
|| f7 (0x1.b2ce38p-38f))
+    __builtin_abort ();
+  if (!f8 (0x1.bb166p-130f) || !f8 (0x1.bb168p-130f) || f8 (0x1.bb16ap-130f) 
|| f8 (0x1.bb16cp-130f))
+    __builtin_abort ();
+  if (!f9 (0x1.8p-146f) || !f9 (0x1.ap-146f) || f9 (0x1.cp-146f) || f9 
(0x1.ep-146f))
+    __builtin_abort ();
+  if (f10 (0x1.002004p+0f))
+    __builtin_abort ();
+  return 0;
+}


        Jakub

#include <stdlib.h>
#include <math.h>

int
main ()
{
  union U { float f; unsigned int i; } u;
  for (int i = 0; i < 10000000; i++)
    {
      u.i = ((unsigned) random () << 8) ^ random ();
      float c = u.f;
      if (!isnormal (c) || c < 0)
        continue;
      float c2 = c * c;
      for (int j = -15; j <= 15; j++)
        {
          float x = c2;
          if (j < 0)
            {
              for (int k = j; k != 0; k++)
                x = nexttowardf (x, -1.0f);
            }
          else if (j > 0)
            {
              for (int k = j; k != 0; k--)
                x = nexttowardf (x, __builtin_inff ());
            }
          if (x < 0)
            continue;
          if (isinf (c2))
            continue;
          float c3 = __builtin_sqrtf (c2);
          float c4 = c2, c5 = c2;
#ifdef FIXME
          c4 = nexttowardf (c2, 0.0);
          float c4s = __builtin_sqrtf (c4);
          if (c3 >= c && c4s == c)
            ;
          else
            c4 = c2;
          c5 = nexttowardf (c2, __builtin_inff ());
          float c5s = __builtin_sqrtf (c5);
          if (c3 <= c && !isinf (c5) && c5s == c)
            ;
          else
            c5 = c2;
          if (c3 < c
              && (__builtin_sqrtf (x) < c) == (x <= c2)
              && (__builtin_sqrtf (x) >= c) == (x > c2))
            ;
          else
#endif
          if ((__builtin_sqrtf (x) < c) != (x < c4))
            {
              if ((__builtin_sqrtf (x) >= c) != (x >= c4))
                __builtin_printf ("</>= c %.12a c4 %.12a x %.12a %d\n", c, c4, 
x, j);
              else
                __builtin_printf ("< c %.12a c4 %.12a x %.12a %d\n", c, c4, x, 
j);
            }
          else if ((__builtin_sqrtf (x) >= c) != (x >= c4))
            __builtin_printf (">= c %.12a c4 %.12a x %.12a %d\n", c, c4, x, j);
#ifdef FIXME
          if (c3 > c
              && (__builtin_sqrtf (x) <= c) == (x < c2)
              && (__builtin_sqrtf (x) > c) == (x >= c2))
            ;
          else
#endif
          if ((__builtin_sqrtf (x) <= c) != (x <= c5))
            {
              if ((__builtin_sqrtf (x) > c) != (x > c5))
                __builtin_printf ("<=/> c %.12a c5 %.12a x %.12a %d\n", c, c5, 
x, j);
              else
                __builtin_printf ("<= c %.12a c5 %.12a x %.12a %d\n", c, c5, x, 
j);
            }
          else if ((__builtin_sqrtf (x) > c) != (x > c5))
            __builtin_printf ("> c %.12a c5 %.12a x %.12a %d\n", c, c5, x, j);
        }
    }
  return 0;
}

#include <stdlib.h>
#include <math.h>

int
main ()
{
  union U { double f; unsigned long long int i; } u;
  for (int i = 0; i < 10000000; i++)
    {
      u.i = ((unsigned long long) random () << 40) ^ ((unsigned long long) 
random () << 20) ^ random ();
      double c = u.f;
      if (!isnormal (c) || c < 0)
        continue;
      double c2 = c * c;
      for (int j = -15; j <= 15; j++)
        {
          double x = c2;
          if (j < 0)
            {
              for (int k = j; k != 0; k++)
                x = nexttoward (x, -1.0f);
            }
          else if (j > 0)
            {
              for (int k = j; k != 0; k--)
                x = nexttoward (x, __builtin_inf ());
            }
          if (x < 0)
            continue;
          if (isinf (c2))
            continue;
          double c3 = __builtin_sqrt (c2);
          double c4 = c2, c5 = c2;
#ifdef FIXME
          double c4 = nexttoward (c2, 0.0);
          double c4s = __builtin_sqrt (c4);
          if (c3 >= c && c4s == c)
            ;
          else
            c4 = c2;
          double c5 = nexttoward (c2, __builtin_inf ());
          double c5s = __builtin_sqrt (c5);
          if (c3 <= c && !isinf (c5) && c5s == c)
            ;
          else
            c5 = c2;
          if (c3 < c
              && (__builtin_sqrt (x) < c) == (x <= c2)
              && (__builtin_sqrt (x) >= c) == (x > c2))
            ;
          else
#endif
          if ((__builtin_sqrt (x) < c) != (x < c4))
            {
              if ((__builtin_sqrt (x) >= c) != (x >= c4))
                __builtin_printf ("</>= c %.12a c4 %.12a x %.12a %d\n", c, c4, 
x, j);
              else
                __builtin_printf ("< c %.12a c4 %.12a x %.12a %d\n", c, c4, x, 
j);
            }
          else if ((__builtin_sqrt (x) >= c) != (x >= c4))
            __builtin_printf (">= c %.12a c4 %.12a x %.12a %d\n", c, c4, x, j);
#ifdef FIXME
          if (c3 > c
              && (__builtin_sqrt (x) <= c) == (x < c2)
              && (__builtin_sqrt (x) > c) == (x >= c2))
            ;
          else
#endif
          if ((__builtin_sqrt (x) <= c) != (x <= c5))
            {
              if ((__builtin_sqrt (x) > c) != (x > c5))
                __builtin_printf ("<=/> c %.12a c5 %.12a x %.12a %d\n", c, c5, 
x, j);
              else
                __builtin_printf ("<= c %.12a c5 %.12a x %.12a %d\n", c, c5, x, 
j);
            }
          else if ((__builtin_sqrt (x) > c) != (x > c5))
            __builtin_printf ("> c %.12a c5 %.12a x %.12a %d\n", c, c5, x, j);
        }
    }
  return 0;
}

[PATCH] Fix up sqrt(x) < c and sqrt(x) >= c match.pd folding (PR tree-optimization/91734, take 2)

Reply via email to