On Tue, 12 Jan 2021, Jakub Jelinek wrote: > Hi! > > We already had x != 0 && y != 0 to (x | y) != 0 and > x != -1 && y != -1 to (x & y) != -1 and > x < 32U && y < 32U to (x | y) < 32U, this patch adds signed > x < 0 && y < 0 to (x | y) < 0. In that case, the low/high seem to be > always the same and just in_p indices whether it is >= 0 or < 0, > also, all types in the same bucket (same precision) should be type > compatible, but we can have some >= 0 and some < 0 comparison mixed, > so the patch handles that by using the right BIT_IOR_EXPR or BIT_AND_EXPR > and doing one set of < 0 or >= 0 first, then BIT_NOT_EXPR and then the other > one. I had to move optimize_range_tests_var_bound before this optimization > because that one deals with signed a >= 0 && a < b, and limited it to the > last reassoc pass as reassoc itself can't virtually undo this optimization > yet (and not sure if vrp would be able to). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Richard. > 2021-01-12 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/95731 > * tree-ssa-reassoc.c (optimize_range_tests_cmp_bitwise): Also optimize > x < 0 && y < 0 && z < 0 into (x | y | z) < 0 for signed x, y, z. > (optimize_range_tests): Call optimize_range_tests_cmp_bitwise > only after optimize_range_tests_var_bound. > > * gcc.dg/tree-ssa/pr95731.c: New test. > * gcc.c-torture/execute/pr95731.c: New test. > > --- gcc/tree-ssa-reassoc.c.jj 2021-01-11 10:35:02.204501369 +0100 > +++ gcc/tree-ssa-reassoc.c 2021-01-11 15:20:17.578155827 +0100 > @@ -3320,7 +3320,8 @@ optimize_range_tests_to_bit_test (enum t > /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0 > and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. > Also, handle x < C && y < C && z < C where C is power of two as > - (x | y | z) < C. */ > + (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0 > + as (x | y | z) < 0. */ > > static bool > optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int > length, > @@ -3340,13 +3341,13 @@ optimize_range_tests_cmp_bitwise (enum t > > if (ranges[i].exp == NULL_TREE > || TREE_CODE (ranges[i].exp) != SSA_NAME > - || !ranges[i].in_p > || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1 > || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE) > continue; > > if (ranges[i].low != NULL_TREE > && ranges[i].high != NULL_TREE > + && ranges[i].in_p > && tree_int_cst_equal (ranges[i].low, ranges[i].high)) > { > idx = !integer_zerop (ranges[i].low); > @@ -3354,7 +3355,8 @@ optimize_range_tests_cmp_bitwise (enum t > continue; > } > else if (ranges[i].high != NULL_TREE > - && TREE_CODE (ranges[i].high) == INTEGER_CST) > + && TREE_CODE (ranges[i].high) == INTEGER_CST > + && ranges[i].in_p) > { > wide_int w = wi::to_wide (ranges[i].high); > int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)); > @@ -3370,10 +3372,20 @@ optimize_range_tests_cmp_bitwise (enum t > && integer_zerop (ranges[i].low)))) > continue; > } > + else if (ranges[i].high == NULL_TREE > + && ranges[i].low != NULL_TREE > + /* Perform this optimization only in the last > + reassoc pass, as it interferes with the reassociation > + itself or could also with VRP etc. which might not > + be able to virtually undo the optimization. */ > + && !reassoc_insert_powi_p > + && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) > + && integer_zerop (ranges[i].low)) > + idx = 3; > else > continue; > > - b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx; > + b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx; > if (buckets.length () <= b) > buckets.safe_grow_cleared (b + 1, true); > if (chains.length () <= (unsigned) i) > @@ -3386,7 +3398,7 @@ optimize_range_tests_cmp_bitwise (enum t > if (i && chains[i - 1]) > { > int j, k = i; > - if ((b % 3) == 2) > + if ((b % 4) == 2) > { > /* When ranges[X - 1].high + 1 is a power of two, > we need to process the same bucket up to > @@ -3439,6 +3451,19 @@ optimize_range_tests_cmp_bitwise (enum t > { > tree type = TREE_TYPE (ranges[j - 1].exp); > strict_overflow_p |= ranges[j - 1].strict_overflow_p; > + if ((b % 4) == 3) > + { > + /* For the signed < 0 cases, the types should be > + really compatible (all signed with the same precision, > + instead put ranges that have different in_p from > + k first. */ > + if (!useless_type_conversion_p (type1, type)) > + continue; > + if (ranges[j - 1].in_p != ranges[k - 1].in_p) > + candidates.safe_push (&ranges[j - 1]); > + type2 = type1; > + continue; > + } > if (j == k > || useless_type_conversion_p (type1, type)) > ; > @@ -3456,6 +3481,14 @@ optimize_range_tests_cmp_bitwise (enum t > tree type = TREE_TYPE (ranges[j - 1].exp); > if (j == k) > continue; > + if ((b % 4) == 3) > + { > + if (!useless_type_conversion_p (type1, type)) > + continue; > + if (ranges[j - 1].in_p == ranges[k - 1].in_p) > + candidates.safe_push (&ranges[j - 1]); > + continue; > + } > if (useless_type_conversion_p (type1, type)) > ; > else if (type2 == NULL_TREE > @@ -3471,6 +3504,7 @@ optimize_range_tests_cmp_bitwise (enum t > FOR_EACH_VEC_ELT (candidates, id, r) > { > gimple *g; > + enum tree_code code; > if (id == 0) > { > op = r->exp; > @@ -3478,7 +3512,8 @@ optimize_range_tests_cmp_bitwise (enum t > } > if (id == l) > { > - g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op); > + code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR; > + g = gimple_build_assign (make_ssa_name (type1), code, op); > gimple_seq_add_stmt_without_update (&seq, g); > op = gimple_assign_lhs (g); > } > @@ -3490,21 +3525,24 @@ optimize_range_tests_cmp_bitwise (enum t > gimple_seq_add_stmt_without_update (&seq, g); > exp = gimple_assign_lhs (g); > } > + if ((b % 4) == 3) > + code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR; > + else > + code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR; > g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2), > - (b % 3) == 1 > - ? BIT_AND_EXPR : BIT_IOR_EXPR, op, exp); > + code, op, exp); > gimple_seq_add_stmt_without_update (&seq, g); > op = gimple_assign_lhs (g); > } > candidates.pop (); > if (update_range_test (&ranges[k - 1], NULL, candidates.address (), > candidates.length (), opcode, ops, op, > - seq, true, ranges[k - 1].low, > + seq, ranges[k - 1].in_p, ranges[k - 1].low, > ranges[k - 1].high, strict_overflow_p)) > any_changes = true; > else > gimple_seq_discard (seq); > - if ((b % 3) == 2 && buckets[b] != i) > + if ((b % 4) == 2 && buckets[b] != i) > /* There is more work to do for this bucket. */ > b--; > } > @@ -3909,10 +3947,10 @@ optimize_range_tests (enum tree_code opc > if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) > any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, > ops, ranges); > - any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, > - ops, ranges); > any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, > ranges, first_bb); > + any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, > + ops, ranges); > > if (any_changes && opcode != ERROR_MARK) > { > --- gcc/testsuite/gcc.dg/tree-ssa/pr95731.c.jj 2021-01-11 > 15:27:37.587163558 +0100 > +++ gcc/testsuite/gcc.dg/tree-ssa/pr95731.c 2021-01-11 15:27:15.003419789 > +0100 > @@ -0,0 +1,22 @@ > +/* PR tree-optimization/95731 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > +/* { dg-final { scan-tree-dump-times " >= 0\| < 0" 6 "optimized" } } */ > + > +int > +foo (int x, int y, int z, int w, long long u, long long v) > +{ > + return x >= 0 && y >= 0 && z < 0 && u < 0 && w >= 0 && v < 0; > +} > + > +int > +bar (int x, int y, int z, int w, long long u, long long v) > +{ > + return u >= 0 && x >= 0 && y >= 0 && v < 0 && z >= 0 && w >= 0; > +} > + > +int > +baz (int x, int y, int z, int w, long long u, long long v) > +{ > + return x >= 0 || u < 0 || y >= 0 || v < 0 || z >= 0 || w >= 0; > +} > --- gcc/testsuite/gcc.c-torture/execute/pr95731.c.jj 2021-01-11 > 15:28:07.108828606 +0100 > +++ gcc/testsuite/gcc.c-torture/execute/pr95731.c 2021-01-11 > 15:38:31.063806122 +0100 > @@ -0,0 +1,40 @@ > +/* PR tree-optimization/95731 */ > + > +__attribute__((noipa)) int > +foo (int x, int y, int z, int w, long long u, long long v) > +{ > + return x >= 0 && y >= 0 && z < 0 && u < 0 && w >= 0 && v < 0; > +} > + > +__attribute__((noipa)) int > +bar (int x, int y, int z, int w, long long u, long long v) > +{ > + return u >= 0 && x >= 0 && y >= 0 && v < 0 && z >= 0 && w >= 0; > +} > + > +__attribute__((noipa)) int > +baz (int x, int y, int z, int w, long long u, long long v) > +{ > + return x >= 0 || u < 0 || y >= 0 || v < 0 || z >= 0 || w >= 0; > +} > + > +int > +main () > +{ > + int i; > + for (i = 0; i < 64; i++) > + { > + int a = foo ((i & 1) ? -123 : 456, (i & 2) ? -123 : 456, > + (i & 4) ? -123 : 456, (i & 8) ? -123 : 456, > + (i & 16) ? -123 : 456, (i & 32) ? -123 : 456); > + int b = bar ((i & 1) ? -123 : 456, (i & 2) ? -123 : 456, > + (i & 4) ? -123 : 456, (i & 8) ? -123 : 456, > + (i & 16) ? -123 : 456, (i & 32) ? -123 : 456); > + int c = baz ((i & 1) ? -123 : 456, (i & 2) ? -123 : 456, > + (i & 4) ? -123 : 456, (i & 8) ? -123 : 456, > + (i & 16) ? -123 : 456, (i & 32) ? -123 : 456); > + if (a != (i == 52) || b != (i == 32) || c != (i != 15)) > + __builtin_abort (); > + } > + return 0; > +} > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)