Hi,
At the moment, loop niter analyzer depends on simple_iv to understand control
induction variable in order to do further niter analysis. For cases reported
in PR57558 (comment #4), the control variable is not an SCEV because it's
converted from an smaller type and that type could overflow/wrap. As a result,
niter information for such loops won't be analyzed because
number_of_iterations_exit exits immediately once simple_iv fails. As a matter
of fact, niter analyzer can be improved by computing an additional assumption
under which the loop won't be infinite (or the corresponding iv doesn't
overflow). This patch does so by changing both scev and niter analyzer. It
introduces a new interface simple_iv_with_niters which is used in niter
analyzer. The new interface returns an IV as well as a possible niter
expression, the expression indicates the maximum number the IV can iterate
before the returned simple_iv becoming invalid. Given below example:
short unsigned int i;
int _1;
<bb 2>:
goto <bb 4>;
<bb 3>:
arr[_1] = -1;
i_6 = i_2 + 1;
<bb 4>:
# i_2 = PHI <1(2), i_6(3)>
_1 = (int) i_2;
if (_1 != 199)
goto <bb 3>;
else
goto <bb 5>;
<bb 5>:
return;
When analyzing variable "_1", the old interface simple_iv returns false, while
the new interface returns <{1, 1}_loop, 65534>. It means "_1" is a valid SCEV
as long as it doesn't iterate more than 65534 times.
This patch also introduces a new interface in niter analyzer
number_of_iterations_exit_assumptions. The new interface further derives
assumption from the result of simple_iv_with_niters, and the assumption can be
used by other optimizers. As for this loop, it's an unexpected gain because
assumption (198 < 65534) is naturally TRUE.
For loops that could be infinite, the assumption will be an expression. This
improvement is still good, for example, the next patch to will vectorize such
loops with help of this patch.
This patch also changes how assumptions is handled in niter analyzer. At the
moment, (non-true) assumptions are not supported unless
-funsafe-loop-optimizations are specified, after this patch, the new interface
exposes assumptions to niter user and let them make their own decision. In
effect, this patch discards -funsafe-loop-optimizations on GIMPLE level, which
we think is not a very useful option anyway. Next patch will xfails tests for
this option. Well, the option is not totally discarded because it requires RTL
changes too. I will follow up that after gimple part change.
Bootstrap and test on x86_64 and AArch64. Is it OK?
Thanks,
bin
2016-06-27 Bin Cheng <bin.ch...@arm.com>
* cfgloop.h (flags): New field to loop struct.
(LOOP_F_INFINITE, LOOP_F_ASSUMPTIONS, LOOP_F_MUST_ROLL): New macros.
(loop_flag_set, loop_flag_clear, loop_flag_set_p): New functions.
* cfgloop.c (alloc_loop): Handle flags.
* tree-scalar-evolution.c (simple_iv_with_niters): New funcion.
(derive_simple_iv_with_niters): New function.
(simple_iv): Rewrite using simple_iv_with_niters.
* tree-scalar-evolution.h (simple_iv_with_niters): New decl.
* tree-ssa-loop-niter.c (number_of_iterations_exit_assumptions): New
function.
(number_of_iterations_exit): Rewrite using above function.
* tree-ssa-loop-niter.h (number_of_iterations_exit_assumptions): New
Decl.
gcc/testsuite/ChangeLog
2016-06-27 Bin Cheng <bin.ch...@arm.com>
* gcc.dg/tree-ssa/loop-41.c: New test.
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index 2087b90..a18d973 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -342,6 +342,7 @@ alloc_loop (void)
loop->nb_iterations_upper_bound = 0;
loop->nb_iterations_likely_upper_bound = 0;
loop->nb_iterations_estimate = 0;
+ loop->flags = 0;
return loop;
}
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index dfc7525..5f3baea 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -188,6 +188,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
of the loop can be safely evaluated concurrently. */
int safelen;
+ /* Flags. */
+ unsigned flags;
+
/* True if this loop should never be vectorized. */
bool dont_vectorize;
@@ -221,6 +224,31 @@ struct GTY ((chain_next ("%h.next"))) loop {
basic_block former_header;
};
+/* Set if this is an infinite loop. */
+#define LOOP_F_INFINITE (1 << 0)
+/* Set if this is a finite loop without any prequirisite assumptions. */
+#define LOOP_F_ASSUMPTIONS (1 << 1)
+/* Set if latch of this loop must roll back. */
+#define LOOP_F_MUST_ROLL (1 << 2)
+
+static inline void
+loop_flag_set (struct loop *loop, unsigned flags)
+{
+ loop->flags |= flags;
+}
+
+static inline void
+loop_flag_clear (struct loop *loop, unsigned flags)
+{
+ loop->flags &= ~flags;
+}
+
+static inline bool
+loop_flag_set_p (struct loop *loop, unsigned flags)
+{
+ return (loop->flags & flags) == flags;
+}
+
/* Flags for state of loop structure. */
enum
{
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index d6f2a2f..d46ca29 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -3309,6 +3309,55 @@ scev_reset (void)
}
}
+/* Given EV with form of "(type) {inner_base, inner_step}_loop", this
+ function tries to derive condition under which it can be simplified
+ into "{(type)inner_base, (type)inner_step}_loop". The condition is
+ the maximum number that inner iv can iterate. */
+
+static tree
+derive_simple_iv_with_niters (tree ev, tree *niters)
+{
+ if (!CONVERT_EXPR_P (ev))
+ return ev;
+
+ tree inner_ev = TREE_OPERAND (ev, 0);
+ if (TREE_CODE (inner_ev) != POLYNOMIAL_CHREC)
+ return ev;
+
+ tree init = CHREC_LEFT (inner_ev);
+ tree step = CHREC_RIGHT (inner_ev);
+ if (TREE_CODE (init) != INTEGER_CST
+ || TREE_CODE (step) != INTEGER_CST || integer_zerop (step))
+ return ev;
+
+ tree type = TREE_TYPE (ev);
+ tree inner_type = TREE_TYPE (inner_ev);
+ if (TYPE_PRECISION (inner_type) >= TYPE_PRECISION (type))
+ return ev;
+
+ /* Type conversion in "(type) {inner_base, inner_step}_loop" can be
+ folded only if inner iv won't overflow. We compute the maximum
+ number the inner iv can iterate before overflowing and return the
+ simplified affine iv. */
+ tree delta;
+ init = fold_convert (type, init);
+ step = fold_convert (type, step);
+ ev = build_polynomial_chrec (CHREC_VARIABLE (inner_ev), init, step);
+ if (tree_int_cst_sign_bit (step))
+ {
+ tree bound = lower_bound_in_type (inner_type, inner_type);
+ delta = fold_build2 (MINUS_EXPR, type, init, fold_convert (type, bound));
+ step = fold_build1 (NEGATE_EXPR, type, step);
+ }
+ else
+ {
+ tree bound = upper_bound_in_type (inner_type, inner_type);
+ delta = fold_build2 (MINUS_EXPR, type, fold_convert (type, bound), init);
+ }
+ *niters = fold_build2 (FLOOR_DIV_EXPR, type, delta, step);
+ return ev;
+}
+
/* Checks whether use of OP in USE_LOOP behaves as a simple affine iv with
respect to WRTO_LOOP and returns its base and step in IV if possible
(see analyze_scalar_evolution_in_loop for more details on USE_LOOP
@@ -3326,13 +3375,25 @@ scev_reset (void)
not wrap by some other argument. Otherwise, this might introduce undefined
behavior, and
- for (i = iv->base; ; i = (type) ((unsigned type) i + (unsigned type)
iv->step))
+ i = iv->base;
+ for (; ; i = (type) ((unsigned type) i + (unsigned type) iv->step))
- must be used instead. */
+ must be used instead.
+
+ This function also checks case in which OP is a conversion of an inner
+ simple iv with form (outer_type){inner_base, inner_step}_loop. If type
+ of the inner iv has smaller precision than outer_type, we can't fold it
+ into {(outer_type)inner_base, (outer_type)inner_step}_loop because the
+ inner iv could overflow/wrap. In this case, we can derive a condition
+ under which the inner iv won't overflow/wrap and do the simplification.
+ The condition derived normally is the maximum number inner iv can iterate.
+ This is useful in loop niter analysis, to derive break conditions when a
+ loop must terminate, when is infinite. */
bool
-simple_iv (struct loop *wrto_loop, struct loop *use_loop, tree op,
- affine_iv *iv, bool allow_nonconstant_step)
+simple_iv_with_niters (struct loop *wrto_loop, struct loop *use_loop,
+ tree op, affine_iv *iv, tree *iv_niters,
+ bool allow_nonconstant_step)
{
enum tree_code code;
tree type, ev, base, e, stop;
@@ -3362,8 +3423,14 @@ simple_iv (struct loop *wrto_loop, struct loop
*use_loop, tree op,
return true;
}
- if (TREE_CODE (ev) != POLYNOMIAL_CHREC
- || CHREC_VARIABLE (ev) != (unsigned) wrto_loop->num)
+ /* If we can derive valid scalar evolution with assumptions. */
+ if (TREE_CODE (ev) != POLYNOMIAL_CHREC)
+ ev = derive_simple_iv_with_niters (ev, iv_niters);
+
+ if (TREE_CODE (ev) != POLYNOMIAL_CHREC)
+ return false;
+
+ if (CHREC_VARIABLE (ev) != (unsigned) wrto_loop->num)
return false;
iv->step = CHREC_RIGHT (ev);
@@ -3457,6 +3524,19 @@ simple_iv (struct loop *wrto_loop, struct loop
*use_loop, tree op,
return true;
}
+/* Like simple_iv_with_niters, but return TRUE when OP behaves as a simple
+ affine iv unconditionally. */
+
+bool
+simple_iv (struct loop *wrto_loop, struct loop *use_loop, tree op,
+ affine_iv *iv, bool allow_nonconstant_step)
+{
+ tree iv_niters = NULL_TREE;
+ bool res = simple_iv_with_niters (wrto_loop, use_loop, op, iv,
+ &iv_niters, allow_nonconstant_step);
+ return (!iv_niters && res);
+}
+
/* Finalize the scalar evolution analysis. */
void
diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h
index 8a87660..72cb8f9 100644
--- a/gcc/tree-scalar-evolution.h
+++ b/gcc/tree-scalar-evolution.h
@@ -38,6 +38,8 @@ extern unsigned int scev_const_prop (void);
extern bool expression_expensive_p (tree);
extern bool simple_iv (struct loop *, struct loop *, tree, struct affine_iv *,
bool);
+extern bool simple_iv_with_niters (struct loop *, struct loop *, tree,
+ struct affine_iv *, tree *, bool);
extern tree compute_overall_effect_of_inner_loop (struct loop *, tree);
/* Returns the basic block preceding LOOP, or the CFG entry block when
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 32fe2f9..4e15bd6 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -2186,20 +2186,17 @@ loop_only_exit_p (const struct loop *loop, const_edge
exit)
}
/* Stores description of number of iterations of LOOP derived from
- EXIT (an exit edge of the LOOP) in NITER. Returns true if some
- useful information could be derived (and fields of NITER has
- meaning described in comments at struct tree_niter_desc
- declaration), false otherwise. If WARN is true and
- -Wunsafe-loop-optimizations was given, warn if the optimizer is going to use
- potentially unsafe assumptions.
+ EXIT (an exit edge of the LOOP) in NITER. Returns true if some useful
+ information could be derived (and fields of NITER have meaning described
+ in comments at struct tree_niter_desc declaration), false otherwise.
When EVERY_ITERATION is true, only tests that are known to be executed
- every iteration are considered (i.e. only test that alone bounds the loop).
+ every iteration are considered (i.e. only test that alone bounds the loop).
*/
bool
-number_of_iterations_exit (struct loop *loop, edge exit,
- struct tree_niter_desc *niter,
- bool warn, bool every_iteration)
+number_of_iterations_exit_assumptions (struct loop *loop, edge exit,
+ struct tree_niter_desc *niter,
+ bool every_iteration)
{
gimple *last;
gcond *stmt;
@@ -2209,6 +2206,10 @@ number_of_iterations_exit (struct loop *loop, edge exit,
affine_iv iv0, iv1;
bool safe;
+ /* Nothing to analyze if this is an infinite loop. */
+ if (loop_flag_set_p (loop, LOOP_F_INFINITE))
+ return false;
+
safe = dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src);
if (every_iteration && !safe)
@@ -2251,9 +2252,16 @@ number_of_iterations_exit (struct loop *loop, edge exit,
&& !POINTER_TYPE_P (type))
return false;
- if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, false))
+ tree iv0_niters = NULL_TREE;
+ if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
+ op0, &iv0, &iv0_niters, false))
return false;
- if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, false))
+ tree iv1_niters = NULL_TREE;
+ if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
+ op1, &iv1, &iv1_niters, false))
+ return false;
+ /* Give up on complicated case. */
+ if (iv0_niters && iv1_niters)
return false;
/* We don't want to see undefined signed overflow warnings while
@@ -2269,6 +2277,31 @@ number_of_iterations_exit (struct loop *loop, edge exit,
return false;
}
+ /* Incorporate additional assumption implied by control iv. */
+ tree iv_niters = iv0_niters ? iv0_niters : iv1_niters;
+ if (iv_niters)
+ {
+ tree assumption = fold_build2 (LE_EXPR, boolean_type_node, niter->niter,
+ fold_convert (TREE_TYPE (niter->niter),
+ iv_niters));
+
+ if (!integer_nonzerop (assumption))
+ niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+ niter->assumptions, assumption);
+
+ /* Refine upper bound if possible. */
+ if (TREE_CODE (iv_niters) == INTEGER_CST
+ && niter->max > wi::to_widest (iv_niters))
+ niter->max = wi::to_widest (iv_niters);
+ }
+
+ /* Assume there is no assumptions if the flag is set. */
+ if (loop_flag_set_p (loop, LOOP_F_ASSUMPTIONS))
+ niter->assumptions = boolean_true_node;
+ /* Assume the latch must roll if the flag is set. */
+ if (loop_flag_set_p (loop, LOOP_F_MUST_ROLL))
+ niter->may_be_zero = boolean_false_node;
+
if (optimize >= 3)
{
niter->assumptions = simplify_using_outer_evolutions (loop,
@@ -2291,44 +2324,22 @@ number_of_iterations_exit (struct loop *loop, edge exit,
if (TREE_CODE (niter->niter) == INTEGER_CST)
niter->max = wi::to_widest (niter->niter);
- if (integer_onep (niter->assumptions))
- return true;
-
- /* With -funsafe-loop-optimizations we assume that nothing bad can happen.
- But if we can prove that there is overflow or some other source of weird
- behavior, ignore the loop even with -funsafe-loop-optimizations. */
- if (integer_zerop (niter->assumptions) || !single_exit (loop))
- return false;
+ return (!integer_zerop (niter->assumptions));
+}
- if (flag_unsafe_loop_optimizations)
- niter->assumptions = boolean_true_node;
+/* Like number_of_iterations_exit, but return TRUE only if the niter
+ information holds unconditionally. */
- if (warn)
- {
- const char *wording;
- location_t loc = gimple_location (stmt);
-
- /* We can provide a more specific warning if one of the operator is
- constant and the other advances by +1 or -1. */
- if (!integer_zerop (iv1.step)
- ? (integer_zerop (iv0.step)
- && (integer_onep (iv1.step) || integer_all_onesp (iv1.step)))
- : (integer_onep (iv0.step) || integer_all_onesp (iv0.step)))
- wording =
- flag_unsafe_loop_optimizations
- ? N_("assuming that the loop is not infinite")
- : N_("cannot optimize possibly infinite loops");
- else
- wording =
- flag_unsafe_loop_optimizations
- ? N_("assuming that the loop counter does not overflow")
- : N_("cannot optimize loop, the loop counter may overflow");
-
- warning_at ((LOCATION_LINE (loc) > 0) ? loc : input_location,
- OPT_Wunsafe_loop_optimizations, "%s", gettext (wording));
- }
+bool
+number_of_iterations_exit (struct loop *loop, edge exit,
+ struct tree_niter_desc *niter,
+ bool, bool every_iteration)
+{
+ if (!number_of_iterations_exit_assumptions (loop, exit, niter,
+ every_iteration))
+ return false;
- return flag_unsafe_loop_optimizations;
+ return (integer_nonzerop (niter->assumptions));
}
/* Try to determine the number of iterations of LOOP. If we succeed,
diff --git a/gcc/tree-ssa-loop-niter.h b/gcc/tree-ssa-loop-niter.h
index 1b5d111..1aea580 100644
--- a/gcc/tree-ssa-loop-niter.h
+++ b/gcc/tree-ssa-loop-niter.h
@@ -27,6 +27,9 @@ extern bool loop_only_exit_p (const struct loop *,
const_edge);
extern bool number_of_iterations_exit (struct loop *, edge,
struct tree_niter_desc *niter, bool,
bool every_iteration = true);
+extern bool number_of_iterations_exit_assumptions (struct loop *, edge,
+ struct tree_niter_desc *,
+ bool = true);
extern tree find_loop_niter (struct loop *, edge *);
extern bool finite_loop_p (struct loop *);
extern tree loop_niter_by_eval (struct loop *, edge);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-41.c
b/gcc/testsuite/gcc.dg/tree-ssa/loop-41.c
new file mode 100644
index 0000000..52ba968
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-41.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -ftree-vrp -fdump-tree-vrp-alias" } */
+
+signed char arr[240];
+void foo (void)
+{
+
+ unsigned short i, length = 200;
+
+ for (i = 1; (int)i < (length - 1); i++)
+ arr[i] = -1;
+}
+
+/* { dg-final { scan-tree-dump-not "RANGE \\\[0, 65535\\\]" "vrp1" } } */