Hi, Scalar evolution needs to prove no-overflow for source variable when handling type conversion. This is important because otherwise we would fail to recognize result of the conversion as SCEV, resulting in missing loop optimizations. Take case added by this patch as an example, the loop can't be distributed as memset call because address of memory reference is not recognized. At the moment, we rely on type overflow semantics and loop niter info for no-overflow checking, unfortunately that's not enough. This patch introduces new method checking no-overflow using value range information. As commented in the patch, value range can only be used when source operand variable evaluates on every loop iteration, rather than guarded by some conditions.
This together with patch improving loop niter analysis (https://gcc.gnu.org/ml/gcc-patches/2016-07/msg00736.html) can help various loop passes like vectorization. Bootstrap and test on x86_64 and AArch64. Is it OK? Thanks, bin 2016-07-15 Bin Cheng <bin.ch...@arm.com> * tree-chrec.c (convert_affine_scev): New parameter. Pass new arg. (chrec_convert_1, chrec_convert): Ditto. * tree-chrec.h (chrec_convert, convert_affine_scev): New parameter. * tree-scalar-evolution.c (interpret_rhs_expr): Pass new arg. * tree-vrp.c (adjust_range_with_scev): Ditto. * tree-ssa-loop-niter.c (idx_infer_loop_bounds): Ditto. (scev_var_range_cant_overflow): New function. (scev_probably_wraps_p): New parameter. Call above function. * tree-ssa-loop-niter.h (scev_probably_wraps_p): New parameter. gcc/testsuite/ChangeLog 2016-07-15 Bin Cheng <bin.ch...@arm.com> * gcc.dg/tree-ssa/scev-15.c: New.
diff --git a/gcc/tree-chrec.c b/gcc/tree-chrec.c index ee789a2..6add426 100644 --- a/gcc/tree-chrec.c +++ b/gcc/tree-chrec.c @@ -1162,16 +1162,17 @@ nb_vars_in_chrec (tree chrec) /* Converts BASE and STEP of affine scev to TYPE. LOOP is the loop whose iv the scev corresponds to. AT_STMT is the statement at that the scev is - evaluated. USE_OVERFLOW_SEMANTICS is true if this function should assume that - the rules for overflow of the given language apply (e.g., that signed - arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary - tests, but also to enforce that the result follows them. Returns true if the - conversion succeeded, false otherwise. */ + evaluated. USE_OVERFLOW_SEMANTICS is true if this function should assume + that the rules for overflow of the given language apply (e.g., that signed + arithmetics in C does not overflow) -- i.e., to use them to avoid + unnecessary tests, but also to enforce that the result follows them. + FROM is the source variable converted if it's not NULL. Returns true if + the conversion succeeded, false otherwise. */ bool convert_affine_scev (struct loop *loop, tree type, tree *base, tree *step, gimple *at_stmt, - bool use_overflow_semantics) + bool use_overflow_semantics, tree from) { tree ct = TREE_TYPE (*step); bool enforce_overflow_semantics; @@ -1230,7 +1231,7 @@ convert_affine_scev (struct loop *loop, tree type, must_check_rslt_overflow = false; if (must_check_src_overflow - && scev_probably_wraps_p (*base, *step, at_stmt, loop, + && scev_probably_wraps_p (from, *base, *step, at_stmt, loop, use_overflow_semantics)) return false; @@ -1258,7 +1259,8 @@ convert_affine_scev (struct loop *loop, tree type, if (must_check_rslt_overflow /* Note that in this case we cannot use the fact that signed variables do not overflow, as this is what we are verifying for the new iv. */ - && scev_probably_wraps_p (new_base, new_step, at_stmt, loop, false)) + && scev_probably_wraps_p (NULL, new_base, new_step, + at_stmt, loop, false)) return false; *base = new_base; @@ -1288,12 +1290,14 @@ chrec_convert_rhs (tree type, tree chrec, gimple *at_stmt) USE_OVERFLOW_SEMANTICS is true if this function should assume that the rules for overflow of the given language apply (e.g., that signed - arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary - tests, but also to enforce that the result follows them. */ + arithmetics in C does not overflow) -- i.e., to use them to avoid + unnecessary tests, but also to enforce that the result follows them. + + FROM is the source variable converted if it's not NULL. */ static tree chrec_convert_1 (tree type, tree chrec, gimple *at_stmt, - bool use_overflow_semantics) + bool use_overflow_semantics, tree from) { tree ct, res; tree base, step; @@ -1314,7 +1318,7 @@ chrec_convert_1 (tree type, tree chrec, gimple *at_stmt, step = CHREC_RIGHT (chrec); if (convert_affine_scev (loop, type, &base, &step, at_stmt, - use_overflow_semantics)) + use_overflow_semantics, from)) return build_polynomial_chrec (loop->num, base, step); /* If we cannot propagate the cast inside the chrec, just keep the cast. */ @@ -1347,7 +1351,7 @@ keep_cast: CHREC_LEFT (chrec)), fold_convert (utype, CHREC_RIGHT (chrec))); - res = chrec_convert_1 (type, res, at_stmt, use_overflow_semantics); + res = chrec_convert_1 (type, res, at_stmt, use_overflow_semantics, from); } else res = fold_convert (type, chrec); @@ -1395,14 +1399,16 @@ keep_cast: USE_OVERFLOW_SEMANTICS is true if this function should assume that the rules for overflow of the given language apply (e.g., that signed - arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary - tests, but also to enforce that the result follows them. */ + arithmetics in C does not overflow) -- i.e., to use them to avoid + unnecessary tests, but also to enforce that the result follows them. + + FROM is the source variable converted if it's not NULL. */ tree chrec_convert (tree type, tree chrec, gimple *at_stmt, - bool use_overflow_semantics) + bool use_overflow_semantics, tree from) { - return chrec_convert_1 (type, chrec, at_stmt, use_overflow_semantics); + return chrec_convert_1 (type, chrec, at_stmt, use_overflow_semantics, from); } /* Convert CHREC to TYPE, without regard to signed overflows. Returns the new diff --git a/gcc/tree-chrec.h b/gcc/tree-chrec.h index 383271c..877330e 100644 --- a/gcc/tree-chrec.h +++ b/gcc/tree-chrec.h @@ -59,7 +59,7 @@ enum ev_direction scev_direction (const_tree); extern tree chrec_fold_plus (tree, tree, tree); extern tree chrec_fold_minus (tree, tree, tree); extern tree chrec_fold_multiply (tree, tree, tree); -extern tree chrec_convert (tree, tree, gimple *, bool = true); +extern tree chrec_convert (tree, tree, gimple *, bool = true, tree = NULL); extern tree chrec_convert_rhs (tree, tree, gimple *); extern tree chrec_convert_aggressive (tree, tree, bool *); @@ -75,7 +75,7 @@ extern tree reset_evolution_in_loop (unsigned, tree, tree); extern tree chrec_merge (tree, tree); extern void for_each_scev_op (tree *, bool (*) (tree *, void *), void *); extern bool convert_affine_scev (struct loop *, tree, tree *, tree *, gimple *, - bool); + bool, tree = NULL); /* Observers. */ extern bool eq_evolutions_p (const_tree, const_tree); diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c index ae5e907..1ba26f3 100644 --- a/gcc/tree-scalar-evolution.c +++ b/gcc/tree-scalar-evolution.c @@ -1933,7 +1933,7 @@ interpret_rhs_expr (struct loop *loop, gimple *at_stmt, } else chrec1 = analyze_scalar_evolution (loop, rhs1); - res = chrec_convert (type, chrec1, at_stmt); + res = chrec_convert (type, chrec1, at_stmt, true, rhs1); break; case BIT_AND_EXPR: diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 0723752..79fddcd 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -3187,7 +3187,8 @@ idx_infer_loop_bounds (tree base, tree *idx, void *dta) /* If access is not executed on every iteration, we must ensure that overlow may not make the access valid later. */ if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (data->stmt)) - && scev_probably_wraps_p (initial_condition_in_loop_num (ev, loop->num), + && scev_probably_wraps_p (NULL, + initial_condition_in_loop_num (ev, loop->num), step, data->stmt, loop, true)) upper = false; @@ -4252,6 +4253,99 @@ loop_exits_before_overflow (tree base, tree step, return false; } +/* VAR is scev variable whose evolution part is constant STEP, this function + proves that VAR can't overflow by using value range info. If VAR's value + range is [min, max], it can be proven by: + max + step doesn't overflow ; if step > 0 + or + min - step doesn't overflow ; if step < 0. + + We can only do this if var is computed in every loop iteration, i.e, var's + definition has to dominate loop latch. Consider below example: + + { + unsigned int i; + + <bb 3>: + + <bb 4>: + # RANGE [0, 4294967294] NONZERO 65535 + # i_21 = PHI <0(3), i_18(9)> + if (i_21 != 0) + goto <bb 6>; + else + goto <bb 8>; + + <bb 6>: + # RANGE [0, 65533] NONZERO 65535 + _6 = i_21 + 4294967295; + # RANGE [0, 65533] NONZERO 65535 + _7 = (long unsigned int) _6; + # RANGE [0, 524264] NONZERO 524280 + _8 = _7 * 8; + # PT = nonlocal escaped + _9 = a_14 + _8; + *_9 = 0; + + <bb 8>: + # RANGE [1, 65535] NONZERO 65535 + i_18 = i_21 + 1; + if (i_18 >= 65535) + goto <bb 10>; + else + goto <bb 9>; + + <bb 9>: + goto <bb 4>; + + <bb 10>: + return; + } + + VAR _6 doesn't overflow only with pre-condition (i_21 != 0), here we + can't use _6 to prove no-overlfow for _7. In fact, var _7 takes value + sequence (4294967295, 0, 1, ..., 65533) in loop life time, rather than + (4294967295, 4294967296, ...). */ + +static bool +scev_var_range_cant_overflow (tree var, tree step, struct loop *loop) +{ + tree type; + wide_int minv, maxv, diff, step_wi; + enum value_range_type rtype; + + /* Check if VAR evaluates in every loop iteration. */ + gimple *def; + if ((def = SSA_NAME_DEF_STMT (var)) != NULL + && !dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (def))) + return false; + + if (TREE_CODE (step) != INTEGER_CST || !INTEGRAL_TYPE_P (TREE_TYPE (var))) + return false; + + rtype = get_range_info (var, &minv, &maxv); + if (rtype != VR_RANGE) + return false; + + step_wi = step; + type = TREE_TYPE (var); + if (tree_int_cst_sign_bit (step)) + { + diff = lower_bound_in_type (type, type); + diff = minv - diff; + step_wi = - step_wi; + } + else + { + diff = upper_bound_in_type (type, type); + diff = diff - maxv; + } + if (wi::gtu_p (diff, step_wi)) + return true; + + return false; +} + /* Return false only when the induction variable BASE + STEP * I is known to not overflow: i.e. when the number of iterations is small enough with respect to the step and initial condition in order to @@ -4260,10 +4354,13 @@ loop_exits_before_overflow (tree base, tree step, USE_OVERFLOW_SEMANTICS is true if this function should assume that the rules for overflow of the given language apply (e.g., that signed - arithmetics in C does not overflow). */ + arithmetics in C does not overflow). + + If VAR is a ssa variable, this function also returns false if VAR can + be proven not overflow with value range info. */ bool -scev_probably_wraps_p (tree base, tree step, +scev_probably_wraps_p (tree var, tree base, tree step, gimple *at_stmt, struct loop *loop, bool use_overflow_semantics) { @@ -4300,6 +4397,11 @@ scev_probably_wraps_p (tree base, tree step, if (TREE_CODE (step) != INTEGER_CST) return true; + /* Check if var can be proven not overflow with value range info. */ + if (var && TREE_CODE (var) == SSA_NAME + && scev_var_range_cant_overflow (var, step, loop)) + return false; + if (loop_exits_before_overflow (base, step, at_stmt, loop)) return false; diff --git a/gcc/tree-ssa-loop-niter.h b/gcc/tree-ssa-loop-niter.h index 1b5d111..6fce752 100644 --- a/gcc/tree-ssa-loop-niter.h +++ b/gcc/tree-ssa-loop-niter.h @@ -46,7 +46,8 @@ extern bool estimated_stmt_executions (struct loop *, widest_int *); extern void estimate_numbers_of_iterations (void); extern bool stmt_dominates_stmt_p (gimple *, gimple *); extern bool nowrap_type_p (tree); -extern bool scev_probably_wraps_p (tree, tree, gimple *, struct loop *, bool); +extern bool scev_probably_wraps_p (tree, tree, tree, gimple *, + struct loop *, bool); extern void free_loop_control_ivs (struct loop *); extern void free_numbers_of_iterations_estimates_loop (struct loop *); extern void free_numbers_of_iterations_estimates (function *); diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c index 4333d60..2d9b80f 100644 --- a/gcc/tree-vrp.c +++ b/gcc/tree-vrp.c @@ -4156,7 +4156,7 @@ adjust_range_with_scev (value_range *vr, struct loop *loop, or decreases, ... */ dir == EV_DIR_UNKNOWN /* ... or if it may wrap. */ - || scev_probably_wraps_p (init, step, stmt, get_chrec_loop (chrec), + || scev_probably_wraps_p (NULL, init, step, stmt, get_chrec_loop (chrec), true)) return; diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-15.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-15.c new file mode 100644 index 0000000..a0d2e59 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-15.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-ldist" } */ + +void +foo (int *p) +{ + unsigned short i, j; + + for (i = 0; i < 100; i++) + for (j = 1; j < 101; j++) + { + unsigned int index = 100 * i + j; + p[index-1] = 0; + } +} + +/* Loop can be transformed into builtin memset since &p[...] is SCEV. */ +/* { dg-final { scan-tree-dump "builtin_memset" "ldist" } } */