Re: [PATCH] reassoc: Optimize in reassoc x < 0 && y < 0 to (x | y) < 0 etc. [PR95731]

Richard Biener Tue, 12 Jan 2021 01:55:31 -0800

On Tue, 12 Jan 2021, Jakub Jelinek wrote:

> Hi!
> 
> We already had x != 0 && y != 0 to (x | y) != 0 and
> x != -1 && y != -1 to (x & y) != -1 and
> x < 32U && y < 32U to (x | y) < 32U, this patch adds signed
> x < 0 && y < 0 to (x | y) < 0.  In that case, the low/high seem to be
> always the same and just in_p indices whether it is >= 0 or < 0,
> also, all types in the same bucket (same precision) should be type
> compatible, but we can have some >= 0 and some < 0 comparison mixed,
> so the patch handles that by using the right BIT_IOR_EXPR or BIT_AND_EXPR
> and doing one set of < 0 or >= 0 first, then BIT_NOT_EXPR and then the other
> one.  I had to move optimize_range_tests_var_bound before this optimization
> because that one deals with signed a >= 0 && a < b, and limited it to the
> last reassoc pass as reassoc itself can't virtually undo this optimization
> yet (and not sure if vrp would be able to).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK.

Richard.

> 2021-01-12  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/95731
>       * tree-ssa-reassoc.c (optimize_range_tests_cmp_bitwise): Also optimize
>       x < 0 && y < 0 && z < 0 into (x | y | z) < 0 for signed x, y, z.
>       (optimize_range_tests): Call optimize_range_tests_cmp_bitwise
>       only after optimize_range_tests_var_bound.
> 
>       * gcc.dg/tree-ssa/pr95731.c: New test.
>       * gcc.c-torture/execute/pr95731.c: New test.
> 
> --- gcc/tree-ssa-reassoc.c.jj 2021-01-11 10:35:02.204501369 +0100
> +++ gcc/tree-ssa-reassoc.c    2021-01-11 15:20:17.578155827 +0100
> @@ -3320,7 +3320,8 @@ optimize_range_tests_to_bit_test (enum t
>  /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
>     and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
>     Also, handle x < C && y < C && z < C where C is power of two as
> -   (x | y | z) < C.  */
> +   (x | y | z) < C.  And also handle signed x < 0 && y < 0 && z < 0
> +   as (x | y | z) < 0.  */
>  
>  static bool
>  optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int 
> length,
> @@ -3340,13 +3341,13 @@ optimize_range_tests_cmp_bitwise (enum t
>  
>        if (ranges[i].exp == NULL_TREE
>         || TREE_CODE (ranges[i].exp) != SSA_NAME
> -       || !ranges[i].in_p
>         || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
>         || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
>       continue;
>  
>        if (ranges[i].low != NULL_TREE
>         && ranges[i].high != NULL_TREE
> +       && ranges[i].in_p
>         && tree_int_cst_equal (ranges[i].low, ranges[i].high))
>       {
>         idx = !integer_zerop (ranges[i].low);
> @@ -3354,7 +3355,8 @@ optimize_range_tests_cmp_bitwise (enum t
>           continue;
>       }
>        else if (ranges[i].high != NULL_TREE
> -            && TREE_CODE (ranges[i].high) == INTEGER_CST)
> +            && TREE_CODE (ranges[i].high) == INTEGER_CST
> +            && ranges[i].in_p)
>       {
>         wide_int w = wi::to_wide (ranges[i].high);
>         int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
> @@ -3370,10 +3372,20 @@ optimize_range_tests_cmp_bitwise (enum t
>                   && integer_zerop (ranges[i].low))))
>           continue;
>       }
> +      else if (ranges[i].high == NULL_TREE
> +            && ranges[i].low != NULL_TREE
> +            /* Perform this optimization only in the last
> +               reassoc pass, as it interferes with the reassociation
> +               itself or could also with VRP etc. which might not
> +               be able to virtually undo the optimization.  */
> +            && !reassoc_insert_powi_p
> +            && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
> +            && integer_zerop (ranges[i].low))
> +     idx = 3;
>        else
>       continue;
>  
> -      b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx;
> +      b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
>        if (buckets.length () <= b)
>       buckets.safe_grow_cleared (b + 1, true);
>        if (chains.length () <= (unsigned) i)
> @@ -3386,7 +3398,7 @@ optimize_range_tests_cmp_bitwise (enum t
>      if (i && chains[i - 1])
>        {
>       int j, k = i;
> -     if ((b % 3) == 2)
> +     if ((b % 4) == 2)
>         {
>           /* When ranges[X - 1].high + 1 is a power of two,
>              we need to process the same bucket up to
> @@ -3439,6 +3451,19 @@ optimize_range_tests_cmp_bitwise (enum t
>         {
>           tree type = TREE_TYPE (ranges[j - 1].exp);
>           strict_overflow_p |= ranges[j - 1].strict_overflow_p;
> +         if ((b % 4) == 3)
> +           {
> +             /* For the signed < 0 cases, the types should be
> +                really compatible (all signed with the same precision,
> +                instead put ranges that have different in_p from
> +                k first.  */
> +             if (!useless_type_conversion_p (type1, type))
> +               continue;
> +             if (ranges[j - 1].in_p != ranges[k - 1].in_p)
> +               candidates.safe_push (&ranges[j - 1]);
> +             type2 = type1;
> +             continue;
> +           }
>           if (j == k
>               || useless_type_conversion_p (type1, type))
>             ;
> @@ -3456,6 +3481,14 @@ optimize_range_tests_cmp_bitwise (enum t
>           tree type = TREE_TYPE (ranges[j - 1].exp);
>           if (j == k)
>             continue;
> +         if ((b % 4) == 3)
> +           {
> +             if (!useless_type_conversion_p (type1, type))
> +               continue;
> +             if (ranges[j - 1].in_p == ranges[k - 1].in_p)
> +               candidates.safe_push (&ranges[j - 1]);
> +             continue;
> +           }
>           if (useless_type_conversion_p (type1, type))
>             ;
>           else if (type2 == NULL_TREE
> @@ -3471,6 +3504,7 @@ optimize_range_tests_cmp_bitwise (enum t
>       FOR_EACH_VEC_ELT (candidates, id, r)
>         {
>           gimple *g;
> +         enum tree_code code;
>           if (id == 0)
>             {
>               op = r->exp;
> @@ -3478,7 +3512,8 @@ optimize_range_tests_cmp_bitwise (enum t
>             }
>           if (id == l)
>             {
> -             g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
> +             code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
> +             g = gimple_build_assign (make_ssa_name (type1), code, op);
>               gimple_seq_add_stmt_without_update (&seq, g);
>               op = gimple_assign_lhs (g);
>             }
> @@ -3490,21 +3525,24 @@ optimize_range_tests_cmp_bitwise (enum t
>               gimple_seq_add_stmt_without_update (&seq, g);
>               exp = gimple_assign_lhs (g);
>             }
> +         if ((b % 4) == 3)
> +           code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
> +         else
> +           code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
>           g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
> -                                  (b % 3) == 1
> -                                  ? BIT_AND_EXPR : BIT_IOR_EXPR, op, exp);
> +                                  code, op, exp);
>           gimple_seq_add_stmt_without_update (&seq, g);
>           op = gimple_assign_lhs (g);
>         }
>       candidates.pop ();
>       if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
>                              candidates.length (), opcode, ops, op,
> -                            seq, true, ranges[k - 1].low,
> +                            seq, ranges[k - 1].in_p, ranges[k - 1].low,
>                              ranges[k - 1].high, strict_overflow_p))
>         any_changes = true;
>       else
>         gimple_seq_discard (seq);
> -     if ((b % 3) == 2 && buckets[b] != i)
> +     if ((b % 4) == 2 && buckets[b] != i)
>         /* There is more work to do for this bucket.  */
>         b--;
>        }
> @@ -3909,10 +3947,10 @@ optimize_range_tests (enum tree_code opc
>    if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
>      any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
>                                                    ops, ranges);
> -  any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
> -                                                ops, ranges);
>    any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
>                                                ranges, first_bb);
> +  any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
> +                                                ops, ranges);
>  
>    if (any_changes && opcode != ERROR_MARK)
>      {
> --- gcc/testsuite/gcc.dg/tree-ssa/pr95731.c.jj        2021-01-11 
> 15:27:37.587163558 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr95731.c   2021-01-11 15:27:15.003419789 
> +0100
> @@ -0,0 +1,22 @@
> +/* PR tree-optimization/95731 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times " >= 0\| < 0" 6 "optimized" } } */
> +
> +int
> +foo (int x, int y, int z, int w, long long u, long long v)
> +{
> +  return x >= 0 && y >= 0 && z < 0 && u < 0 && w >= 0 && v < 0;
> +}
> +
> +int
> +bar (int x, int y, int z, int w, long long u, long long v)
> +{
> +  return u >= 0 && x >= 0 && y >= 0 && v < 0 && z >= 0 && w >= 0;
> +}
> +
> +int
> +baz (int x, int y, int z, int w, long long u, long long v)
> +{
> +  return x >= 0 || u < 0 || y >= 0 || v < 0 || z >= 0 || w >= 0;
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr95731.c.jj  2021-01-11 
> 15:28:07.108828606 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr95731.c     2021-01-11 
> 15:38:31.063806122 +0100
> @@ -0,0 +1,40 @@
> +/* PR tree-optimization/95731 */
> +
> +__attribute__((noipa)) int
> +foo (int x, int y, int z, int w, long long u, long long v)
> +{
> +  return x >= 0 && y >= 0 && z < 0 && u < 0 && w >= 0 && v < 0;
> +}
> +
> +__attribute__((noipa)) int
> +bar (int x, int y, int z, int w, long long u, long long v)
> +{
> +  return u >= 0 && x >= 0 && y >= 0 && v < 0 && z >= 0 && w >= 0;
> +}
> +
> +__attribute__((noipa)) int
> +baz (int x, int y, int z, int w, long long u, long long v)
> +{
> +  return x >= 0 || u < 0 || y >= 0 || v < 0 || z >= 0 || w >= 0;
> +}
> +
> +int
> +main ()
> +{
> +  int i;
> +  for (i = 0; i < 64; i++)
> +    {
> +      int a = foo ((i & 1) ? -123 : 456, (i & 2) ? -123 : 456,
> +                (i & 4) ? -123 : 456, (i & 8) ? -123 : 456,
> +                (i & 16) ? -123 : 456, (i & 32) ? -123 : 456);
> +      int b = bar ((i & 1) ? -123 : 456, (i & 2) ? -123 : 456,
> +                (i & 4) ? -123 : 456, (i & 8) ? -123 : 456,
> +                (i & 16) ? -123 : 456, (i & 32) ? -123 : 456);
> +      int c = baz ((i & 1) ? -123 : 456, (i & 2) ? -123 : 456,
> +                (i & 4) ? -123 : 456, (i & 8) ? -123 : 456,
> +                (i & 16) ? -123 : 456, (i & 32) ? -123 : 456);
> +      if (a != (i == 52) || b != (i == 32) || c != (i != 15))
> +     __builtin_abort ();
> +    }
> +  return 0;
> +}
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH] reassoc: Optimize in reassoc x < 0 && y < 0 to (x | y) < 0 etc. [PR95731]

Reply via email to