On Tue, 8 Jan 2013, Jan Hubicka wrote:
> Hi,
> here is even more updated patch, this time really fixing the testcase, I hope
> ;)
> It turns out there is one extra problem in tree-ssa-loop-niter.c triggered by
> that code. Some bounds, like one based on inequality test or with wrapping
> IVs are bounds only when they are executed every iteration.
>
> Boostrapped/regtested x86_64-linux.
>
> Honza
>
> PR tree-optimiation/55875
> * gcc.c-torture/execute/pr55875.c: New testcase.
> * g++.dg/torture/pr55875.C: New testcase.
>
> * tree-ssa-loop-niter.c (number_of_iterations_cond): Add
> EVERY_ITERATION parameter.
> (number_of_iterations_exit): Check if exit is executed every
> iteration.
> (idx_infer_loop_bounds): Similarly here.
> (n_of_executions_at_most): Simplify
> to only test for cases where statement is dominated by the
> particular bound; handle correctly the "postdominance"
> test.
> (scev_probably_wraps_p): Use max loop iterations info
> as a global bound first.
> Index: tree-ssa-loop-niter.c
> ===================================================================
> *** tree-ssa-loop-niter.c (revision 194918)
> --- tree-ssa-loop-niter.c (working copy)
> *************** dump_affine_iv (FILE *file, affine_iv *i
> *** 1208,1213 ****
> --- 1208,1215 ----
> -- in this case we can use the information whether the control induction
> variables can overflow or not in a more efficient way.
>
> + if EVERY_ITERATION is true, we know the test is executed on every
> iteration.
> +
> The results (number of iterations and assumptions as described in
> comments at struct tree_niter_desc in tree-flow.h) are stored to NITER.
> Returns false if it fails to determine number of iterations, true if it
> *************** static bool
> *** 1217,1227 ****
> number_of_iterations_cond (struct loop *loop,
> tree type, affine_iv *iv0, enum tree_code code,
> affine_iv *iv1, struct tree_niter_desc *niter,
> ! bool only_exit)
> {
> bool exit_must_be_taken = false, ret;
> bounds bnds;
>
> /* The meaning of these assumptions is this:
> if !assumptions
> then the rest of information does not have to be valid
> --- 1219,1239 ----
> number_of_iterations_cond (struct loop *loop,
> tree type, affine_iv *iv0, enum tree_code code,
> affine_iv *iv1, struct tree_niter_desc *niter,
> ! bool only_exit, bool every_iteration)
> {
> bool exit_must_be_taken = false, ret;
> bounds bnds;
>
> + /* If the test is not executed every iteration, wrapping may make the test
> + to pass again.
> + TODO: the overflow case can be still used as unreliable estimate of
> upper
> + bound. But we have no API to pass it down to number of iterations code
> + and, at present, it will not use it anyway. */
> + if (!every_iteration
> + && (!iv0->no_overflow || !iv1->no_overflow
> + || code == NE_EXPR || code == EQ_EXPR))
> + return false;
> +
> /* The meaning of these assumptions is this:
> if !assumptions
> then the rest of information does not have to be valid
> *************** number_of_iterations_exit (struct loop *
> *** 1807,1815 ****
> tree op0, op1;
> enum tree_code code;
> affine_iv iv0, iv1;
>
> ! if (every_iteration
> ! && !dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src))
> return false;
>
> niter->assumptions = boolean_false_node;
> --- 1819,1829 ----
> tree op0, op1;
> enum tree_code code;
> affine_iv iv0, iv1;
> + bool safe;
>
> ! safe = dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src);
> !
> ! if (every_iteration && !safe)
> return false;
>
> niter->assumptions = boolean_false_node;
> *************** number_of_iterations_exit (struct loop *
> *** 1855,1861 ****
> iv0.base = expand_simple_operations (iv0.base);
> iv1.base = expand_simple_operations (iv1.base);
> if (!number_of_iterations_cond (loop, type, &iv0, code, &iv1, niter,
> ! loop_only_exit_p (loop, exit)))
> {
> fold_undefer_and_ignore_overflow_warnings ();
> return false;
> --- 1869,1875 ----
> iv0.base = expand_simple_operations (iv0.base);
> iv1.base = expand_simple_operations (iv1.base);
> if (!number_of_iterations_cond (loop, type, &iv0, code, &iv1, niter,
> ! loop_only_exit_p (loop, exit), safe))
> {
> fold_undefer_and_ignore_overflow_warnings ();
> return false;
> *************** idx_infer_loop_bounds (tree base, tree *
> *** 2657,2662 ****
> --- 2671,2677 ----
> tree low, high, type, next;
> bool sign, upper = true, at_end = false;
> struct loop *loop = data->loop;
> + bool reliable = true;
>
> if (TREE_CODE (base) != ARRAY_REF)
> return true;
> *************** idx_infer_loop_bounds (tree base, tree *
> *** 2728,2734 ****
> && tree_int_cst_compare (next, high) <= 0)
> return true;
>
> ! record_nonwrapping_iv (loop, init, step, data->stmt, low, high, true,
> upper);
> return true;
> }
>
> --- 2743,2756 ----
> && tree_int_cst_compare (next, high) <= 0)
> return true;
>
> ! /* If access is not executed on every iteration, we must ensure that
> overlow may
> ! not make the access valid later. */
> ! if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (data->stmt))
> ! && scev_probably_wraps_p (initial_condition_in_loop_num (ev,
> loop->num),
> ! step, data->stmt, loop, true))
> ! reliable = false;
> !
> ! record_nonwrapping_iv (loop, init, step, data->stmt, low, high, reliable,
> upper);
> return true;
> }
>
> *************** stmt_dominates_stmt_p (gimple s1, gimple
> *** 3549,3556 ****
> /* Returns true when we can prove that the number of executions of
> STMT in the loop is at most NITER, according to the bound on
> the number of executions of the statement NITER_BOUND->stmt recorded in
> ! NITER_BOUND. If STMT is NULL, we must prove this bound for all
> ! statements in the loop. */
>
> static bool
> n_of_executions_at_most (gimple stmt,
> --- 3571,3585 ----
> /* Returns true when we can prove that the number of executions of
> STMT in the loop is at most NITER, according to the bound on
> the number of executions of the statement NITER_BOUND->stmt recorded in
> ! NITER_BOUND and fact that NITER_BOUND->stmt dominate STMT.
> !
> ! ??? This code can become quite a CPU hog - we can have many bounds,
> ! and large basic block forcing stmt_dominates_stmt_p to be queried
> ! many times on a large basic blocks, so the whole thing is O(n^2)
> ! for scev_probably_wraps_p invocation (that can be done n times).
> !
> ! It would make more sense (and give better answers) to remember BB
> ! bounds computed by discover_iteration_bound_by_body_walk. */
>
> static bool
> n_of_executions_at_most (gimple stmt,
> *************** n_of_executions_at_most (gimple stmt,
> *** 3571,3602 ****
> /* We know that NITER_BOUND->stmt is executed at most NITER_BOUND->bound
> + 1
> times. This means that:
>
> ! -- if NITER_BOUND->is_exit is true, then everything before
> ! NITER_BOUND->stmt is executed at most NITER_BOUND->bound + 1
> ! times, and everything after it at most NITER_BOUND->bound times.
>
> -- If NITER_BOUND->is_exit is false, then if we can prove that when
> STMT
> is executed, then NITER_BOUND->stmt is executed as well in the same
> ! iteration (we conclude that if both statements belong to the same
> ! basic block, or if STMT is after NITER_BOUND->stmt), then STMT
> ! is executed at most NITER_BOUND->bound + 1 times. Otherwise STMT is
> ! executed at most NITER_BOUND->bound + 2 times. */
>
> if (niter_bound->is_exit)
> {
> ! if (stmt
> ! && stmt != niter_bound->stmt
> ! && stmt_dominates_stmt_p (niter_bound->stmt, stmt))
> ! cmp = GE_EXPR;
> ! else
> ! cmp = GT_EXPR;
> }
> else
> {
> ! if (!stmt
> ! || (gimple_bb (stmt) != gimple_bb (niter_bound->stmt)
> ! && !stmt_dominates_stmt_p (niter_bound->stmt, stmt)))
> {
> bound += double_int_one;
> if (bound.is_zero ()
> || !double_int_fits_to_tree_p (nit_type, bound))
> --- 3600,3642 ----
> /* We know that NITER_BOUND->stmt is executed at most NITER_BOUND->bound
> + 1
> times. This means that:
>
> ! -- if NITER_BOUND->is_exit is true, then everything after
> ! it at most NITER_BOUND->bound times.
>
> -- If NITER_BOUND->is_exit is false, then if we can prove that when
> STMT
> is executed, then NITER_BOUND->stmt is executed as well in the same
> ! iteration then STMT is executed at most NITER_BOUND->bound + 1 times.
> !
> ! If we can determine that NITER_BOUND->stmt is always executed
> ! after STMT, then STMT is executed at most NITER_BOUND->bound + 2 times.
> ! We conclude that if both statements belong to the same
> ! basic block and STMT is before NITER_BOUND->stmt and there are no
> ! statements with side effects in between. */
>
> if (niter_bound->is_exit)
> {
> ! if (stmt == niter_bound->stmt
> ! || !stmt_dominates_stmt_p (niter_bound->stmt, stmt))
> ! return false;
> ! cmp = GE_EXPR;
> }
> else
> {
> ! if (!stmt_dominates_stmt_p (niter_bound->stmt, stmt))
> {
> + gimple_stmt_iterator bsi;
> + if (gimple_bb (stmt) != gimple_bb (niter_bound->stmt)
> + || gimple_code (stmt) == GIMPLE_PHI
> + || gimple_code (niter_bound->stmt) == GIMPLE_PHI)
> + return false;
> +
> + /* By stmt_dominates_stmt_p we already know that STMT appears
> + before NITER_BOUND->STMT. Still need to test that the loop
> + can not be terinated by a side effect in between. */
> + for (bsi = gsi_for_stmt (stmt); gsi_stmt (bsi) != niter_bound->stmt;
> + gsi_next (&bsi))
> + if (gimple_has_side_effects (gsi_stmt (bsi)))
> + return false;
Ugh. Both to stmt_dominates_stmt_p and to this (both use loops).
For stmt_dominates_stmt_p you'd simply use GIMPLE uids as other
passes do (and compute them, of course), for the above you'd
alongside of the UID store a sequence number that increments
at each stmt with side-effect. UID is 32bits, so you could
even store (stmt-number-in-bb << 8) | side-effect-nr in UID
(with the issue that if there are exactly 256 side-effect stmts
inbetween the two stmts you'd miss that fact ...).
Well. At least those loops are a real issue IMHO. Adding a
second doesn't make complexity worse I understand - still ...
I expected better from you ;)
Patch is ok, but think about this for a minute - eventually
you can come up with sth very clever.
Thanks,
Richard.
> bound += double_int_one;
> if (bound.is_zero ()
> || !double_int_fits_to_tree_p (nit_type, bound))
> *************** scev_probably_wraps_p (tree base, tree s
> *** 3640,3649 ****
> gimple at_stmt, struct loop *loop,
> bool use_overflow_semantics)
> {
> - struct nb_iter_bound *bound;
> tree delta, step_abs;
> tree unsigned_type, valid_niter;
> tree type = TREE_TYPE (step);
>
> /* FIXME: We really need something like
> http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
> --- 3680,3691 ----
> gimple at_stmt, struct loop *loop,
> bool use_overflow_semantics)
> {
> tree delta, step_abs;
> tree unsigned_type, valid_niter;
> tree type = TREE_TYPE (step);
> + tree e;
> + double_int niter;
> + struct nb_iter_bound *bound;
>
> /* FIXME: We really need something like
> http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
> *************** scev_probably_wraps_p (tree base, tree s
> *** 3706,3719 ****
> valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta,
> step_abs);
>
> estimate_numbers_of_iterations_loop (loop);
> ! for (bound = loop->bounds; bound; bound = bound->next)
> {
> ! if (n_of_executions_at_most (at_stmt, bound, valid_niter))
> ! {
> ! fold_undefer_and_ignore_overflow_warnings ();
> ! return false;
> ! }
> }
>
> fold_undefer_and_ignore_overflow_warnings ();
>
> --- 3748,3773 ----
> valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta,
> step_abs);
>
> estimate_numbers_of_iterations_loop (loop);
> !
> ! if (max_loop_iterations (loop, &niter)
> ! && double_int_fits_to_tree_p (TREE_TYPE (valid_niter), niter)
> ! && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
> ! double_int_to_tree (TREE_TYPE (valid_niter),
> ! niter))) != NULL
> ! && integer_nonzerop (e))
> {
> ! fold_undefer_and_ignore_overflow_warnings ();
> ! return false;
> }
> + if (at_stmt)
> + for (bound = loop->bounds; bound; bound = bound->next)
> + {
> + if (n_of_executions_at_most (at_stmt, bound, valid_niter))
> + {
> + fold_undefer_and_ignore_overflow_warnings ();
> + return false;
> + }
> + }
>
> fold_undefer_and_ignore_overflow_warnings ();
>
> Index: testsuite/gcc.c-torture/execute/pr55875.c
> ===================================================================
> *** testsuite/gcc.c-torture/execute/pr55875.c (revision 0)
> --- testsuite/gcc.c-torture/execute/pr55875.c (revision 0)
> ***************
> *** 0 ****
> --- 1,17 ----
> + int a[250];
> + __attribute__ ((noinline))
> + t(int i)
> + {
> + if (i==0)
> + exit(0);
> + if (i>255)
> + abort ();
> + }
> + main()
> + {
> + unsigned int i;
> + for (i=0;;i++)
> + {
> + a[i]=t((unsigned char)(i+5));
> + }
> + }
> Index: testsuite/g++.dg/torture/20070621-1.C
> ===================================================================
> *** testsuite/g++.dg/torture/20070621-1.C (revision 195032)
> --- testsuite/g++.dg/torture/20070621-1.C (working copy)
> ***************
> *** 1,3 ****
> --- 1,4 ----
> + // { dg-do compile }
> /* Reduced from libstdc++-v3/testsuite/25_algorithms/equal/1.cc
>
> 1.2.ii: In function 'void test1()':
> Index: testsuite/g++.dg/torture/pr55875.C
> ===================================================================
> *** testsuite/g++.dg/torture/pr55875.C (revision 0)
> --- testsuite/g++.dg/torture/pr55875.C (revision 0)
> ***************
> *** 0 ****
> --- 1,55 ----
> + // { dg-do run }
> +
> + struct A
> + {
> + short int a1;
> + unsigned char a2;
> + unsigned int a3;
> + };
> +
> + struct B
> + {
> + unsigned short b1;
> + const A *b2;
> + };
> +
> + B b;
> +
> + __attribute__((noinline, noclone))
> + int foo (unsigned x)
> + {
> + __asm volatile ("" : "+r" (x) : : "memory");
> + return x;
> + }
> +
> + inline void
> + bar (const int &)
> + {
> + }
> +
> + __attribute__((noinline)) void
> + baz ()
> + {
> + const A *a = b.b2;
> + unsigned int i;
> + unsigned short n = b.b1;
> + for (i = 0; i < n; ++i)
> + if (a[i].a1 == 11)
> + {
> + if (i > 0 && (a[i - 1].a2 & 1))
> + continue;
> + bar (foo (2));
> + return;
> + }
> + }
> +
> + int
> + main ()
> + {
> + A a[4] = { { 10, 0, 0 }, { 11, 1, 0 }, { 11, 1, 0 }, { 11, 1, 0 } };
> + b.b1 = 4;
> + b.b2 = a;
> + baz ();
> + return 0;
> + }
> +
>
>
--
Richard Biener <[email protected]>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend