Simplify more EXACT_DIV_EXPR comparisons

Marc Glisse Sun, 19 May 2019 09:16:47 -0700

Hello,

2 pieces:

- the first one handles the case where the denominator is negative. Itdoesn't happen often with exact_div, so I don't handle it everywhere, butthis one looked trivial

- handle the case where a pointer difference is cast to an unsigned typebefore being compared to a constant (I hit this in std::vector). With somerange info we could probably handle some non-constant cases as well...


The second piece breaks Walloca-13.c (-Walloca-larger-than=100 -O2)

void f (void*);
void g (int *p, int *q)
{
  __SIZE_TYPE__ n = (__SIZE_TYPE__)(p - q);
  if (n < 100)
    f (__builtin_alloca (n));
}

At the time of walloca2, we have

  _1 = p_5(D) - q_6(D);
  # RANGE [-2305843009213693952, 2305843009213693951]
  _2 = _1 /[ex] 4;
  # RANGE ~[2305843009213693952, 16140901064495857663]
  n_7 = (long unsigned intD.10) _2;
  _11 = (long unsigned intD.10) _1;
  if (_11 <= 396)
[...]
  _3 = allocaD.1059 (n_7);

and warn. However, DOM3 later produces

  _1 = p_5(D) - q_6(D);
  _11 = (long unsigned intD.10) _1;
  if (_11 <= 396)
[...]
  # RANGE [0, 99] NONZERO 127
  _2 = _1 /[ex] 4;
  # RANGE [0, 99] NONZERO 127
  n_7 = (long unsigned intD.10) _2;
  _3 = allocaD.1059 (n_7);

so I am tempted to say that the walloca2 pass is too early, xfail thetestcase and file an issue...

A single_use restriction would also probably fix this testcase, but Idon't think that's a good idea, the new code is better because thedivision is now in the branch.


2019-05-20  Marc Glisse  <[email protected]>

gcc/
        * match.pd (X/[ex]D<Y/[ex]D): Handle negative denominator.
        ((size_t)(A /[ex] B) CMP C): New transformation.

gcc/testsuite/
        * gcc.dg/tree-ssa/cmpexactdiv-3.c: New file.
        * gcc.dg/tree-ssa/cmpexactdiv-4.c: New file.

--
Marc Glisse

Index: gcc/match.pd
===================================================================
--- gcc/match.pd        (revision 271376)
+++ gcc/match.pd        (working copy)
@@ -1490,21 +1490,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        && (wi::to_wide (@2)
            == wi::max_value (TYPE_PRECISION (TREE_TYPE (@0)), SIGNED) - 1))
     (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
      (icmp (convert:stype @0) { build_int_cst (stype, 0); })))))
 
 /* X / 4 < Y / 4 iff X < Y when the division is known to be exact.  */
 (for cmp (simple_comparison)
  (simplify
   (cmp (exact_div @0 INTEGER_CST@2) (exact_div @1 @2))
   (if (wi::gt_p (wi::to_wide (@2), 0, TYPE_SIGN (TREE_TYPE (@2))))
-   (cmp @0 @1))))
+   (cmp @0 @1)
+   (if (wi::lt_p (wi::to_wide (@2), 0, TYPE_SIGN (TREE_TYPE (@2))))
+    (cmp @1 @0)))))
 
 /* X / C1 op C2 into a simple range test.  */
 (for cmp (simple_comparison)
  (simplify
   (cmp (trunc_div:s @0 INTEGER_CST@1) INTEGER_CST@2)
   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
        && integer_nonzerop (@1)
        && !TREE_OVERFLOW (@1)
        && !TREE_OVERFLOW (@2))
    (with { tree lo, hi; bool neg_overflow;
@@ -3626,20 +3634,47 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       wi::overflow_type ovf;
       wide_int prod = wi::mul (wi::to_wide (@2), wi::to_wide (@1),
                               TYPE_SIGN (TREE_TYPE (@1)), &ovf);
     }
     (if (ovf)
      { constant_boolean_node (wi::lt_p (wi::to_wide (@2), 0,
                                        TYPE_SIGN (TREE_TYPE (@2)))
                              != (cmp == LT_EXPR || cmp == LE_EXPR), type); }
      (cmp @0 { wide_int_to_tree (TREE_TYPE (@0), prod); }))))))
 
+/* Fold (size_t)(A /[ex] B) CMP C to (size_t)A CMP (size_t)B * C or A CMP' 0.
+
+   For small C (less than max/B), this is (size_t)A CMP (size_t)B * C.
+   For large C (more than min/B+2^size), this is also true, with the
+   multiplication computed modulo 2^size.
+   For intermediate C, this just tests the sign of A.  */
+(for cmp  (lt le gt ge)
+     cmp2 (ge ge lt lt)
+ (simplify
+  (cmp (convert (exact_div @0 INTEGER_CST@1)) INTEGER_CST@2)
+  (if (tree_nop_conversion_p (TREE_TYPE (@0), TREE_TYPE (@2))
+       && TYPE_UNSIGNED (TREE_TYPE (@2)) && !TYPE_UNSIGNED (TREE_TYPE (@0))
+       && wi::gt_p (wi::to_wide (@1), 0, TYPE_SIGN (TREE_TYPE (@1))))
+   (with
+    {
+      tree utype = TREE_TYPE (@2);
+      wide_int denom = wi::to_wide (@1);
+      wide_int right = wi::to_wide (@2);
+      wide_int smax = wi::sdiv_trunc (wi::max_value (TREE_TYPE (@0)), denom);
+      wide_int smin = wi::sdiv_trunc (wi::min_value (TREE_TYPE (@0)), denom);
+      bool small = wi::leu_p (right, smax);
+      bool large = wi::geu_p (right, smin);
+    }
+    (if (small || large)
+     (cmp (convert:utype @0) (mult @2 (convert @1)))
+     (cmp2 @0 { build_zero_cst (TREE_TYPE (@0)); }))))))
+
 /* Unordered tests if either argument is a NaN.  */
 (simplify
  (bit_ior (unordered @0 @0) (unordered @1 @1))
  (if (types_match (@0, @1))
   (unordered @0 @1)))
 (simplify
  (bit_and (ordered @0 @0) (ordered @1 @1))
  (if (types_match (@0, @1))
   (ordered @0 @1)))
 (simplify
Index: gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-3.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-3.c       (nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-3.c       (working copy)
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized-raw" } */
+
+int f(int*a,int*b){
+  if(sizeof(__SIZE_TYPE__)!=sizeof(__PTRDIFF_TYPE__)) return -1;
+  __SIZE_TYPE__ d = b - a;
+  return d >= 5;
+}
+
+/* { dg-final { scan-tree-dump-not "exact_div_expr" "optimized" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-4.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-4.c       (nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-4.c       (working copy)
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized-raw" } */
+
+int f(int*a,int*b,int*c){
+  __PTRDIFF_TYPE__ x = -(b - a);
+  __PTRDIFF_TYPE__ y = -(c - a);
+  return x < y;
+}
+
+/* { dg-final { scan-tree-dump-not "exact_div_expr" "optimized" } } */

Simplify more EXACT_DIV_EXPR comparisons

Reply via email to