On Thu, Sep 29, 2011 at 11:15 PM, Jakub Jelinek <[email protected]> wrote:
> Hi!
>
> This patch implements a fold_range_test like optimization on GIMPLE, inside
> tree-ssa-reassoc and tweaks fold-const.c so that most of the code can be
> shared in between the two.
> The advantage of the reassoc optimization is that it doesn't attempt to
> merge just 2 ranges at a time, instead it sorts the ranges for the same
> SSA_NAME and thus can optimize even cases where source code doesn't have
> the numbers in a range test in increasing or decreasing order (and also
> can optimize things that were in multiple statements in the source).
> Additionally, it optimizes cases like
> x == 1 || x == 3
> into (x & ~2) == 1.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2011-09-27 Jakub Jelinek <[email protected]>
>
> PR tree-optimization/46309
> * fold-const.c (make_range, merge_ranges): Remove prototypes.
> (range_binop): Likewise, no longer static.
> (make_range_step): New function.
> (make_range): Use it.
> * tree.h (make_range_step, range_binop): New prototypes.
> * Makefile.in (tree-ssa-reassoc.o): Depend on $(DIAGNOSTIC_CORE_H).
> * tree-ssa-reassoc.c: Include diagnostic-core.h.
> (struct range_entry): New type.
> (init_range_entry, range_entry_cmp, update_range_test,
> optimize_range_tests): New functions.
> (reassociate_bb): Call optimize_range_tests.
>
> * gcc.dg/pr46309.c: New test.
>
> --- gcc/fold-const.c.jj 2011-09-05 12:28:53.000000000 +0200
> +++ gcc/fold-const.c 2011-09-27 16:39:09.000000000 +0200
> @@ -112,12 +112,8 @@ static tree decode_field_reference (loca
> static int all_ones_mask_p (const_tree, int);
> static tree sign_bit_p (tree, const_tree);
> static int simple_operand_p (const_tree);
> -static tree range_binop (enum tree_code, tree, tree, int, tree, int);
> static tree range_predecessor (tree);
> static tree range_successor (tree);
> -extern tree make_range (tree, int *, tree *, tree *, bool *);
> -extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
> - tree, tree);
> static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
> static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree,
> tree);
> static tree unextend (tree, int, int, tree);
> @@ -3731,7 +3727,7 @@ simple_operand_p (const_tree exp)
> must be specified for a comparison. ARG1 will be converted to ARG0's
> type if both are specified. */
>
> -static tree
> +tree
> range_binop (enum tree_code code, tree type, tree arg0, int upper0_p,
> tree arg1, int upper1_p)
> {
> @@ -3790,6 +3786,255 @@ range_binop (enum tree_code code, tree t
> return constant_boolean_node (result, type);
> }
>
> +/* Helper routine for make_range. Perform one step for it, return
> + new expression if the loop should continue or NULL_TREE if it should
> + stop. */
> +
> +tree
> +make_range_step (location_t loc, enum tree_code code, tree arg0, tree arg1,
> + tree exp_type, tree *p_low, tree *p_high, int *p_in_p,
> + bool *strict_overflow_p)
> +{
> + tree arg0_type = TREE_TYPE (arg0);
> + tree n_low, n_high, low = *p_low, high = *p_high;
> + int in_p = *p_in_p, n_in_p;
> +
> + switch (code)
> + {
> + case TRUTH_NOT_EXPR:
> + *p_in_p = ! in_p;
> + return arg0;
> +
> + case EQ_EXPR: case NE_EXPR:
> + case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
> + /* We can only do something if the range is testing for zero
> + and if the second operand is an integer constant. Note that
> + saying something is "in" the range we make is done by
> + complementing IN_P since it will set in the initial case of
> + being not equal to zero; "out" is leaving it alone. */
> + if (low == NULL_TREE || high == NULL_TREE
> + || ! integer_zerop (low) || ! integer_zerop (high)
> + || TREE_CODE (arg1) != INTEGER_CST)
> + return NULL_TREE;
> +
> + switch (code)
> + {
> + case NE_EXPR: /* - [c, c] */
> + low = high = arg1;
> + break;
> + case EQ_EXPR: /* + [c, c] */
> + in_p = ! in_p, low = high = arg1;
> + break;
> + case GT_EXPR: /* - [-, c] */
> + low = 0, high = arg1;
> + break;
> + case GE_EXPR: /* + [c, -] */
> + in_p = ! in_p, low = arg1, high = 0;
> + break;
> + case LT_EXPR: /* - [c, -] */
> + low = arg1, high = 0;
> + break;
> + case LE_EXPR: /* + [-, c] */
> + in_p = ! in_p, low = 0, high = arg1;
> + break;
> + default:
> + gcc_unreachable ();
> + }
> +
> + /* If this is an unsigned comparison, we also know that EXP is
> + greater than or equal to zero. We base the range tests we make
> + on that fact, so we record it here so we can parse existing
> + range tests. We test arg0_type since often the return type
> + of, e.g. EQ_EXPR, is boolean. */
> + if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
> + {
> + if (! merge_ranges (&n_in_p, &n_low, &n_high,
> + in_p, low, high, 1,
> + build_int_cst (arg0_type, 0),
> + NULL_TREE))
> + return NULL_TREE;
> +
> + in_p = n_in_p, low = n_low, high = n_high;
> +
> + /* If the high bound is missing, but we have a nonzero low
> + bound, reverse the range so it goes from zero to the low bound
> + minus 1. */
> + if (high == 0 && low && ! integer_zerop (low))
> + {
> + in_p = ! in_p;
> + high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
> + integer_one_node, 0);
> + low = build_int_cst (arg0_type, 0);
> + }
> + }
> +
> + *p_low = low;
> + *p_high = high;
> + *p_in_p = in_p;
> + return arg0;
> +
> + case NEGATE_EXPR:
> + /* (-x) IN [a,b] -> x in [-b, -a] */
> + n_low = range_binop (MINUS_EXPR, exp_type,
> + build_int_cst (exp_type, 0),
> + 0, high, 1);
> + n_high = range_binop (MINUS_EXPR, exp_type,
> + build_int_cst (exp_type, 0),
> + 0, low, 0);
> + if (n_high != 0 && TREE_OVERFLOW (n_high))
> + return NULL_TREE;
> + goto normalize;
> +
> + case BIT_NOT_EXPR:
> + /* ~ X -> -X - 1 */
> + return build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
> + build_int_cst (exp_type, 1));
> +
> + case PLUS_EXPR:
> + case MINUS_EXPR:
> + if (TREE_CODE (arg1) != INTEGER_CST)
> + return NULL_TREE;
> +
> + /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
> + move a constant to the other side. */
> + if (!TYPE_UNSIGNED (arg0_type)
> + && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
> + return NULL_TREE;
> +
> + /* If EXP is signed, any overflow in the computation is undefined,
> + so we don't worry about it so long as our computations on
> + the bounds don't overflow. For unsigned, overflow is defined
> + and this is exactly the right thing. */
> + n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> + arg0_type, low, 0, arg1, 0);
> + n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> + arg0_type, high, 1, arg1, 0);
> + if ((n_low != 0 && TREE_OVERFLOW (n_low))
> + || (n_high != 0 && TREE_OVERFLOW (n_high)))
> + return NULL_TREE;
> +
> + if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
> + *strict_overflow_p = true;
> +
> + normalize:
> + /* Check for an unsigned range which has wrapped around the maximum
> + value thus making n_high < n_low, and normalize it. */
> + if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
> + {
> + low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
> + integer_one_node, 0);
> + high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
> + integer_one_node, 0);
> +
> + /* If the range is of the form +/- [ x+1, x ], we won't
> + be able to normalize it. But then, it represents the
> + whole range or the empty set, so make it
> + +/- [ -, - ]. */
> + if (tree_int_cst_equal (n_low, low)
> + && tree_int_cst_equal (n_high, high))
> + low = high = 0;
> + else
> + in_p = ! in_p;
> + }
> + else
> + low = n_low, high = n_high;
> +
> + *p_low = low;
> + *p_high = high;
> + *p_in_p = in_p;
> + return arg0;
> +
> + CASE_CONVERT:
> + case NON_LVALUE_EXPR:
> + if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
> + return NULL_TREE;
> +
> + if (! INTEGRAL_TYPE_P (arg0_type)
> + || (low != 0 && ! int_fits_type_p (low, arg0_type))
> + || (high != 0 && ! int_fits_type_p (high, arg0_type)))
> + return NULL_TREE;
> +
> + n_low = low, n_high = high;
> +
> + if (n_low != 0)
> + n_low = fold_convert_loc (loc, arg0_type, n_low);
> +
> + if (n_high != 0)
> + n_high = fold_convert_loc (loc, arg0_type, n_high);
> +
> + /* If we're converting arg0 from an unsigned type, to exp,
> + a signed type, we will be doing the comparison as unsigned.
> + The tests above have already verified that LOW and HIGH
> + are both positive.
> +
> + So we have to ensure that we will handle large unsigned
> + values the same way that the current signed bounds treat
> + negative values. */
> +
> + if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
> + {
> + tree high_positive;
> + tree equiv_type;
> + /* For fixed-point modes, we need to pass the saturating flag
> + as the 2nd parameter. */
> + if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
> + equiv_type
> + = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type),
> + TYPE_SATURATING (arg0_type));
> + else
> + equiv_type
> + = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type), 1);
> +
> + /* A range without an upper bound is, naturally, unbounded.
> + Since convert would have cropped a very large value, use
> + the max value for the destination type. */
> + high_positive
> + = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
> + : TYPE_MAX_VALUE (arg0_type);
> +
> + if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
> + high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
> + fold_convert_loc (loc, arg0_type,
> + high_positive),
> + build_int_cst (arg0_type, 1));
> +
> + /* If the low bound is specified, "and" the range with the
> + range for which the original unsigned value will be
> + positive. */
> + if (low != 0)
> + {
> + if (! merge_ranges (&n_in_p, &n_low, &n_high, 1, n_low, n_high,
> + 1, fold_convert_loc (loc, arg0_type,
> + integer_zero_node),
> + high_positive))
> + return NULL_TREE;
> +
> + in_p = (n_in_p == in_p);
> + }
> + else
> + {
> + /* Otherwise, "or" the range with the range of the input
> + that will be interpreted as negative. */
> + if (! merge_ranges (&n_in_p, &n_low, &n_high, 0, n_low, n_high,
> + 1, fold_convert_loc (loc, arg0_type,
> + integer_zero_node),
> + high_positive))
> + return NULL_TREE;
> +
> + in_p = (in_p != n_in_p);
> + }
> + }
> +
> + *p_low = n_low;
> + *p_high = n_high;
> + *p_in_p = in_p;
> + return arg0;
> +
> + default:
> + return NULL_TREE;
> + }
> +}
> +
> /* Given EXP, a logical expression, set the range it is testing into
> variables denoted by PIN_P, PLOW, and PHIGH. Return the expression
> actually being tested. *PLOW and *PHIGH will be made of the same
> @@ -3804,10 +4049,10 @@ make_range (tree exp, int *pin_p, tree *
> bool *strict_overflow_p)
> {
> enum tree_code code;
> - tree arg0 = NULL_TREE, arg1 = NULL_TREE;
> - tree exp_type = NULL_TREE, arg0_type = NULL_TREE;
> - int in_p, n_in_p;
> - tree low, high, n_low, n_high;
> + tree arg0, arg1 = NULL_TREE;
> + tree exp_type, nexp;
> + int in_p;
> + tree low, high;
> location_t loc = EXPR_LOCATION (exp);
>
> /* Start with simply saying "EXP != 0" and then look at the code of EXP
> @@ -3823,255 +4068,26 @@ make_range (tree exp, int *pin_p, tree *
> {
> code = TREE_CODE (exp);
> exp_type = TREE_TYPE (exp);
> + arg0 = NULL_TREE;
>
> if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
> {
> if (TREE_OPERAND_LENGTH (exp) > 0)
> arg0 = TREE_OPERAND (exp, 0);
> - if (TREE_CODE_CLASS (code) == tcc_comparison
> - || TREE_CODE_CLASS (code) == tcc_unary
> - || TREE_CODE_CLASS (code) == tcc_binary)
> - arg0_type = TREE_TYPE (arg0);
> if (TREE_CODE_CLASS (code) == tcc_binary
> || TREE_CODE_CLASS (code) == tcc_comparison
> || (TREE_CODE_CLASS (code) == tcc_expression
> && TREE_OPERAND_LENGTH (exp) > 1))
> arg1 = TREE_OPERAND (exp, 1);
> }
> + if (arg0 == NULL_TREE)
> + break;
>
> - switch (code)
> - {
> - case TRUTH_NOT_EXPR:
> - in_p = ! in_p, exp = arg0;
> - continue;
> -
> - case EQ_EXPR: case NE_EXPR:
> - case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
> - /* We can only do something if the range is testing for zero
> - and if the second operand is an integer constant. Note that
> - saying something is "in" the range we make is done by
> - complementing IN_P since it will set in the initial case of
> - being not equal to zero; "out" is leaving it alone. */
> - if (low == 0 || high == 0
> - || ! integer_zerop (low) || ! integer_zerop (high)
> - || TREE_CODE (arg1) != INTEGER_CST)
> - break;
> -
> - switch (code)
> - {
> - case NE_EXPR: /* - [c, c] */
> - low = high = arg1;
> - break;
> - case EQ_EXPR: /* + [c, c] */
> - in_p = ! in_p, low = high = arg1;
> - break;
> - case GT_EXPR: /* - [-, c] */
> - low = 0, high = arg1;
> - break;
> - case GE_EXPR: /* + [c, -] */
> - in_p = ! in_p, low = arg1, high = 0;
> - break;
> - case LT_EXPR: /* - [c, -] */
> - low = arg1, high = 0;
> - break;
> - case LE_EXPR: /* + [-, c] */
> - in_p = ! in_p, low = 0, high = arg1;
> - break;
> - default:
> - gcc_unreachable ();
> - }
> -
> - /* If this is an unsigned comparison, we also know that EXP is
> - greater than or equal to zero. We base the range tests we make
> - on that fact, so we record it here so we can parse existing
> - range tests. We test arg0_type since often the return type
> - of, e.g. EQ_EXPR, is boolean. */
> - if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
> - {
> - if (! merge_ranges (&n_in_p, &n_low, &n_high,
> - in_p, low, high, 1,
> - build_int_cst (arg0_type, 0),
> - NULL_TREE))
> - break;
> -
> - in_p = n_in_p, low = n_low, high = n_high;
> -
> - /* If the high bound is missing, but we have a nonzero low
> - bound, reverse the range so it goes from zero to the low
> bound
> - minus 1. */
> - if (high == 0 && low && ! integer_zerop (low))
> - {
> - in_p = ! in_p;
> - high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
> - integer_one_node, 0);
> - low = build_int_cst (arg0_type, 0);
> - }
> - }
> -
> - exp = arg0;
> - continue;
> -
> - case NEGATE_EXPR:
> - /* (-x) IN [a,b] -> x in [-b, -a] */
> - n_low = range_binop (MINUS_EXPR, exp_type,
> - build_int_cst (exp_type, 0),
> - 0, high, 1);
> - n_high = range_binop (MINUS_EXPR, exp_type,
> - build_int_cst (exp_type, 0),
> - 0, low, 0);
> - if (n_high != 0 && TREE_OVERFLOW (n_high))
> - break;
> - goto normalize;
> -
> - case BIT_NOT_EXPR:
> - /* ~ X -> -X - 1 */
> - exp = build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
> - build_int_cst (exp_type, 1));
> - continue;
> -
> - case PLUS_EXPR: case MINUS_EXPR:
> - if (TREE_CODE (arg1) != INTEGER_CST)
> - break;
> -
> - /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
> - move a constant to the other side. */
> - if (!TYPE_UNSIGNED (arg0_type)
> - && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
> - break;
> -
> - /* If EXP is signed, any overflow in the computation is undefined,
> - so we don't worry about it so long as our computations on
> - the bounds don't overflow. For unsigned, overflow is defined
> - and this is exactly the right thing. */
> - n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> - arg0_type, low, 0, arg1, 0);
> - n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> - arg0_type, high, 1, arg1, 0);
> - if ((n_low != 0 && TREE_OVERFLOW (n_low))
> - || (n_high != 0 && TREE_OVERFLOW (n_high)))
> - break;
> -
> - if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
> - *strict_overflow_p = true;
> -
> - normalize:
> - /* Check for an unsigned range which has wrapped around the maximum
> - value thus making n_high < n_low, and normalize it. */
> - if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
> - {
> - low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
> - integer_one_node, 0);
> - high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
> - integer_one_node, 0);
> -
> - /* If the range is of the form +/- [ x+1, x ], we won't
> - be able to normalize it. But then, it represents the
> - whole range or the empty set, so make it
> - +/- [ -, - ]. */
> - if (tree_int_cst_equal (n_low, low)
> - && tree_int_cst_equal (n_high, high))
> - low = high = 0;
> - else
> - in_p = ! in_p;
> - }
> - else
> - low = n_low, high = n_high;
> -
> - exp = arg0;
> - continue;
> -
> - CASE_CONVERT: case NON_LVALUE_EXPR:
> - if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
> - break;
> -
> - if (! INTEGRAL_TYPE_P (arg0_type)
> - || (low != 0 && ! int_fits_type_p (low, arg0_type))
> - || (high != 0 && ! int_fits_type_p (high, arg0_type)))
> - break;
> -
> - n_low = low, n_high = high;
> -
> - if (n_low != 0)
> - n_low = fold_convert_loc (loc, arg0_type, n_low);
> -
> - if (n_high != 0)
> - n_high = fold_convert_loc (loc, arg0_type, n_high);
> -
> -
> - /* If we're converting arg0 from an unsigned type, to exp,
> - a signed type, we will be doing the comparison as unsigned.
> - The tests above have already verified that LOW and HIGH
> - are both positive.
> -
> - So we have to ensure that we will handle large unsigned
> - values the same way that the current signed bounds treat
> - negative values. */
> -
> - if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
> - {
> - tree high_positive;
> - tree equiv_type;
> - /* For fixed-point modes, we need to pass the saturating flag
> - as the 2nd parameter. */
> - if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
> - equiv_type = lang_hooks.types.type_for_mode
> - (TYPE_MODE (arg0_type),
> - TYPE_SATURATING (arg0_type));
> - else
> - equiv_type = lang_hooks.types.type_for_mode
> - (TYPE_MODE (arg0_type), 1);
> -
> - /* A range without an upper bound is, naturally, unbounded.
> - Since convert would have cropped a very large value, use
> - the max value for the destination type. */
> - high_positive
> - = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
> - : TYPE_MAX_VALUE (arg0_type);
> -
> - if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
> - high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
> - fold_convert_loc (loc, arg0_type,
> - high_positive),
> - build_int_cst (arg0_type, 1));
> -
> - /* If the low bound is specified, "and" the range with the
> - range for which the original unsigned value will be
> - positive. */
> - if (low != 0)
> - {
> - if (! merge_ranges (&n_in_p, &n_low, &n_high,
> - 1, n_low, n_high, 1,
> - fold_convert_loc (loc, arg0_type,
> - integer_zero_node),
> - high_positive))
> - break;
> -
> - in_p = (n_in_p == in_p);
> - }
> - else
> - {
> - /* Otherwise, "or" the range with the range of the input
> - that will be interpreted as negative. */
> - if (! merge_ranges (&n_in_p, &n_low, &n_high,
> - 0, n_low, n_high, 1,
> - fold_convert_loc (loc, arg0_type,
> - integer_zero_node),
> - high_positive))
> - break;
> -
> - in_p = (in_p != n_in_p);
> - }
> - }
> -
> - exp = arg0;
> - low = n_low, high = n_high;
> - continue;
> -
> - default:
> - break;
> - }
> -
> - break;
> + nexp = make_range_step (loc, code, arg0, arg1, exp_type, &low,
> + &high, &in_p, strict_overflow_p);
> + if (nexp == NULL_TREE)
> + break;
> + exp = nexp;
> }
>
> /* If EXP is a constant, we can evaluate whether this is true or false. */
> --- gcc/tree.h.jj 2011-09-20 21:43:07.000000000 +0200
> +++ gcc/tree.h 2011-09-27 16:38:58.000000000 +0200
> @@ -5384,7 +5384,10 @@ extern unsigned int get_pointer_alignmen
> extern tree fold_call_stmt (gimple, bool);
> extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum
> built_in_function);
> extern tree make_range (tree, int *, tree *, tree *, bool *);
> +extern tree make_range_step (location_t, enum tree_code, tree, tree, tree,
> + tree *, tree *, int *, bool *);
> extern tree build_range_check (location_t, tree, tree, int, tree, tree);
> +extern tree range_binop (enum tree_code, tree, tree, int, tree, int);
> extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
> tree, tree);
> extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
> --- gcc/Makefile.in.jj 2011-09-18 21:20:04.000000000 +0200
> +++ gcc/Makefile.in 2011-09-29 14:09:42.000000000 +0200
> @@ -2641,7 +2641,7 @@ tree-ssa-reassoc.o : tree-ssa-reassoc.c
> $(TM_H) coretypes.h $(TREE_DUMP_H) $(TREE_PASS_H) $(FLAGS_H) \
> tree-iterator.h $(BASIC_BLOCK_H) $(GIMPLE_H) $(TREE_INLINE_H) \
> $(VEC_H) langhooks.h alloc-pool.h pointer-set.h $(CFGLOOP_H) \
> - tree-pretty-print.h gimple-pretty-print.h
> + tree-pretty-print.h gimple-pretty-print.h $(DIAGNOSTIC_CORE_H)
> tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
> $(TREE_H) $(TM_P_H) $(GGC_H) output.h \
> $(DIAGNOSTIC_H) $(BASIC_BLOCK_H) $(FLAGS_H) $(TIMEVAR_H) $(TM_H) \
> --- gcc/tree-ssa-reassoc.c.jj 2011-09-08 11:21:10.000000000 +0200
> +++ gcc/tree-ssa-reassoc.c 2011-09-29 13:33:00.000000000 +0200
> @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.
> #include "flags.h"
> #include "target.h"
> #include "params.h"
> +#include "diagnostic-core.h"
>
> /* This is a simple global reassociation pass. It is, in part, based
> on the LLVM pass of the same name (They do some things more/less
> @@ -1568,6 +1569,422 @@ optimize_ops_list (enum tree_code opcode
> optimize_ops_list (opcode, ops);
> }
>
> +/* The following functions are subroutines to optimize_range_tests and allow
> + it to try to change a logical combination of comparisons into a range
> + test.
> +
> + For example, both
> + X == 2 || X == 5 || X == 3 || X == 4
> + and
> + X >= 2 && X <= 5
> + are converted to
> + (unsigned) (X - 2) <= 3
> +
> + For more information see comments above fold_test_range in fold-const.c,
> + this implementation is for GIMPLE. */
> +
> +struct range_entry
> +{
> + tree exp;
> + tree low;
> + tree high;
> + bool in_p;
> + bool strict_overflow_p;
> + unsigned int idx, next;
> +};
> +
> +/* This is similar to make_range in fold-const.c, but on top of
> + GIMPLE instead of trees. */
> +
> +static void
> +init_range_entry (struct range_entry *r, tree exp)
> +{
> + int in_p;
> + tree low, high;
> + bool is_bool, strict_overflow_p;
> +
> + r->exp = NULL_TREE;
> + r->in_p = false;
> + r->strict_overflow_p = false;
> + r->low = NULL_TREE;
> + r->high = NULL_TREE;
> + if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
> + return;
> +
> + /* Start with simply saying "EXP != 0" and then look at the code of EXP
> + and see if we can refine the range. Some of the cases below may not
> + happen, but it doesn't seem worth worrying about this. We "continue"
> + the outer loop when we've changed something; otherwise we "break"
> + the switch, which will "break" the while. */
> + low = build_int_cst (TREE_TYPE (exp), 0);
> + high = low;
> + in_p = 0;
> + strict_overflow_p = false;
> + is_bool = TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE;
Effective boolean are also TYPE_PRECISION () == 1 types. Remember
we don't preserve conversions from BOOLEAN_TYPE to such types.
> +
> + while (1)
> + {
> + gimple stmt;
> + enum tree_code code;
> + tree arg0, arg1, exp_type;
> + tree nexp;
> + location_t loc;
> +
> + if (TREE_CODE (exp) != SSA_NAME)
> + break;
> +
> + stmt = SSA_NAME_DEF_STMT (exp);
> + if (!is_gimple_assign (stmt))
> + break;
> +
> + code = gimple_assign_rhs_code (stmt);
> + arg0 = gimple_assign_rhs1 (stmt);
> + arg1 = gimple_assign_rhs2 (stmt);
> + exp_type = TREE_TYPE (exp);
> + loc = gimple_location (stmt);
> + switch (code)
> + {
> + case BIT_NOT_EXPR:
> + if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
See above.
> + {
> + in_p = !in_p;
> + exp = arg0;
> + continue;
> + }
> + break;
> + case SSA_NAME:
> + exp = arg0;
> + continue;
> + CASE_CONVERT:
> + is_bool |= TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE;
Likewise. Though I wonder why !=? Does it matter whether the extension
sign- or zero-extends?
> + goto do_default;
> + case EQ_EXPR:
> + case NE_EXPR:
> + case LT_EXPR:
> + case LE_EXPR:
> + case GE_EXPR:
> + case GT_EXPR:
> + is_bool = true;
> + /* FALLTHRU */
> + do_default:
> + default:
> + if (!is_bool)
> + return;
> + nexp = make_range_step (loc, code, arg0, arg1, exp_type,
> + &low, &high, &in_p,
> + &strict_overflow_p);
> + if (nexp != NULL_TREE)
> + {
> + exp = nexp;
> + gcc_assert (TREE_CODE (exp) == SSA_NAME);
> + continue;
> + }
> + break;
> + }
> + break;
> + }
> + if (is_bool)
> + {
> + r->exp = exp;
> + r->in_p = in_p;
> + r->low = low;
> + r->high = high;
> + r->strict_overflow_p = strict_overflow_p;
> + }
> +}
> +
> +/* Comparison function for qsort. Sort entries
> + without SSA_NAME exp first, then with SSA_NAMEs sorted
> + by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
> + by increasing ->low and if ->low is the same, by increasing
> + ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
> + maximum. */
> +
> +static int
> +range_entry_cmp (const void *a, const void *b)
> +{
> + const struct range_entry *p = (const struct range_entry *) a;
> + const struct range_entry *q = (const struct range_entry *) b;
> +
> + if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
> + {
> + if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
> + {
> + /* Group range_entries for the same SSA_NAME together. */
> + if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
> + return -1;
> + else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
> + return 1;
> + /* If ->low is different, NULL low goes first, then by
> + ascending low. */
> + if (p->low != NULL_TREE)
> + {
> + if (q->low != NULL_TREE)
> + {
> + if (integer_onep (range_binop (LT_EXPR, integer_type_node,
> + p->low, 0, q->low, 0)))
that's just
tem = fold_binary (LT_EXPR, boolean_type_node, p->low, q->low);
if (tem && integer_onep (tem))
return -1;
which avoids building the LT_EXPR tree if it doesn't fold. Similar below.
(ISTR some integer_onep () variant that handles a NULL argument ...)
> + return -1;
> + if (integer_onep (range_binop (GT_EXPR, integer_type_node,
> + p->low, 0, q->low, 0)))
> + return 1;
> + }
> + else
> + return 1;
> + }
> + else if (q->low != NULL_TREE)
> + return -1;
> + /* If ->high is different, NULL high goes last, before that by
> + ascending high. */
> + if (p->high != NULL_TREE)
> + {
> + if (q->high != NULL_TREE)
> + {
> + if (integer_onep (range_binop (LT_EXPR, integer_type_node,
> + p->high, 0, q->high, 0)))
> + return -1;
> + if (integer_onep (range_binop (GT_EXPR, integer_type_node,
> + p->high, 0, q->high, 0)))
> + return 1;
> + }
> + else
> + return -1;
> + }
> + else if (p->high != NULL_TREE)
> + return 1;
> + /* If both ranges are the same, sort below by ascending idx. */
> + }
> + else
> + return 1;
> + }
> + else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
> + return -1;
> +
> + if (p->idx < q->idx)
> + return -1;
> + else
> + {
> + gcc_checking_assert (p->idx > q->idx);
> + return 1;
> + }
> +}
> +
> +/* Helper routine of optimize_range_test.
> + [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
> + RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
> + OPCODE and OPS are arguments of optimize_range_tests. Return
> + true if the range merge has been successful. */
> +
> +static bool
> +update_range_test (struct range_entry *range, struct range_entry *otherrange,
> + unsigned int count, enum tree_code opcode,
> + VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
> + tree low, tree high, bool strict_overflow_p)
> +{
> + tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
> + location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
> + tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
> + enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
> + gimple_stmt_iterator gsi;
> +
> + if (tem == NULL_TREE)
> + return false;
> +
> + if (strict_overflow_p && issue_strict_overflow_warning (wc))
> + warning_at (loc, OPT_Wstrict_overflow,
> + "assuming signed overflow does not occur "
> + "when simplifying range test");
> +
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + {
> + struct range_entry *r;
> + fprintf (dump_file, "Optimizing range tests ");
> + print_generic_expr (dump_file, range->exp, 0);
> + fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
> + print_generic_expr (dump_file, range->low, 0);
> + fprintf (dump_file, ", ");
> + print_generic_expr (dump_file, range->high, 0);
> + fprintf (dump_file, "]");
> + for (r = otherrange; r < otherrange + count; r++)
> + {
> + fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
> + print_generic_expr (dump_file, r->low, 0);
> + fprintf (dump_file, ", ");
> + print_generic_expr (dump_file, r->high, 0);
> + fprintf (dump_file, "]");
> + }
> + fprintf (dump_file, "\n into ");
> + print_generic_expr (dump_file, tem, 0);
> + fprintf (dump_file, "\n");
> + }
> +
> + if (opcode == BIT_IOR_EXPR)
> + tem = invert_truthvalue_loc (loc, tem);
> +
> + tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
> + gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
> + tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
> + GSI_SAME_STMT);
> +
> + VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
> + range->exp = exp;
> + range->low = low;
> + range->high = high;
> + range->in_p = in_p;
> + range->strict_overflow_p = false;
> +
> + for (range = otherrange; range < otherrange + count; range++)
> + {
> + VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
> + range->exp = NULL_TREE;
> + }
> + return true;
> +}
> +
> +/* Optimize range tests, similarly how fold_range_test optimizes
> + it on trees. The tree code for the binary
> + operation between all the operands is OPCODE. */
> +
> +static void
> +optimize_range_tests (enum tree_code opcode,
> + VEC (operand_entry_t, heap) **ops)
> +{
> + unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
> + operand_entry_t oe;
> + struct range_entry *ranges;
> + bool any_changes = false;
> +
> + if (length == 1)
> + return;
> +
> + ranges = XNEWVEC (struct range_entry, length);
> + for (i = 0; i < length; i++)
> + {
> + ranges[i].idx = i;
> + init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops,
> i)->op);
> + /* For | invert it now, we will invert it again before emitting
> + the optimized expression. */
> + if (opcode == BIT_IOR_EXPR)
> + ranges[i].in_p = !ranges[i].in_p;
> + }
> +
> + qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
> + for (i = 0; i < length; i++)
> + if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
> + break;
> +
> + /* Try to merge ranges. */
> + for (first = i; i < length; i++)
> + {
> + tree low = ranges[i].low;
> + tree high = ranges[i].high;
> + int in_p = ranges[i].in_p;
> + bool strict_overflow_p = ranges[i].strict_overflow_p;
> +
> + for (j = i + 1; j < length; j++)
> + {
That looks quadratic - do we want to limit this with a --param, simply
partitioning the array into quadratic chunks?
> + if (ranges[i].exp != ranges[j].exp)
> + break;
Or isn't it too bad because of this check?
> + if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
> + ranges[j].in_p, ranges[j].low, ranges[j].high))
> + break;
And this early out? I suppose some comment on why together with
the sorting this is of limited complexity would help.
> + strict_overflow_p |= ranges[j].strict_overflow_p;
> + }
> +
> + if (j > i + 1
> + && update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
> + ops, ranges[i].exp, in_p, low, high,
> + strict_overflow_p))
> + {
> + i = j - 1;
> + any_changes = true;
> + }
> + }
> +
> + /* Optimize X == CST1 || X == CST2
> + if popcount (CST1 ^ CST2) == 1 into
> + (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
> + Similarly for ranges. E.g.
> + X != 2 && X != 3 && X != 10 && X != 11
> + will be transformed by the above loop into
> + (X - 2U) <= 1U && (X - 10U) <= 1U
> + and this loop can transform that into
> + ((X & ~8) - 2U) <= 1U. */
> + for (i = first; i < length; i++)
> + {
> + tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
> +
> + if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
> + continue;
> + type = TREE_TYPE (ranges[i].exp);
> + if (!INTEGRAL_TYPE_P (type))
> + continue;
> + lowi = ranges[i].low;
> + if (lowi == NULL_TREE)
> + lowi = TYPE_MIN_VALUE (type);
> + highi = ranges[i].high;
> + if (highi == NULL_TREE)
> + continue;
> + for (j = i + 1; j < length && j < i + 64; j++)
> + {
> + if (ranges[j].exp == NULL_TREE)
> + continue;
> + if (ranges[i].exp != ranges[j].exp)
> + break;
> + if (ranges[j].in_p)
> + continue;
> + lowj = ranges[j].low;
> + if (lowj == NULL_TREE)
> + continue;
> + highj = ranges[j].high;
> + if (highj == NULL_TREE)
> + highj = TYPE_MAX_VALUE (type);
> + if (!integer_onep (range_binop (GT_EXPR, integer_type_node,
> + lowj, 0, highi, 0)))
> + continue;
See above.
> + lowxor = range_binop (BIT_XOR_EXPR, NULL_TREE, lowi, 0, lowj, 0);
> + if (lowxor == NULL_TREE)
> + continue;
Likewise.
> + gcc_checking_assert (!integer_zerop (lowxor));
> + tem = range_binop (MINUS_EXPR, NULL_TREE, lowxor, 0,
> + build_int_cst (type, 1), 0);
> + tem = range_binop (BIT_AND_EXPR, NULL_TREE, lowxor, 0, tem, 0);
> + if (!integer_zerop (tem))
> + continue;
> + highxor = range_binop (BIT_XOR_EXPR, NULL_TREE, highi, 0, highj, 0);
> + if (!tree_int_cst_equal (lowxor, highxor))
> + continue;
> + tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
> + exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
> + lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
> + highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
> + if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
> + ranges[i].in_p, lowj, highj,
> + ranges[i].strict_overflow_p
> + || ranges[j].strict_overflow_p))
> + {
> + any_changes = true;
> + break;
> + }
> + }
> + }
> +
> + if (any_changes)
> + {
> + j = 0;
> + FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
> + {
> + if (oe->op == error_mark_node)
> + continue;
> + else if (i != j)
> + VEC_replace (operand_entry_t, *ops, j, oe);
> + j++;
> + }
> + VEC_truncate (operand_entry_t, *ops, j);
> + }
> +
> + XDELETEVEC (ranges);
> +}
> +
> /* Return true if OPERAND is defined by a PHI node which uses the LHS
> of STMT in it's operands. This is also known as a "destructive
> update" operation. */
> @@ -2447,6 +2864,9 @@ reassociate_bb (basic_block bb)
> optimize_ops_list (rhs_code, &ops);
> }
>
> + if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
I think we want to check that the type is boolean here, so we don't setup
the machinery needlessly?
Otherwise it looks like a nice improvement.
Thanks,
Richard.
> + optimize_range_tests (rhs_code, &ops);
> +
> if (VEC_length (operand_entry_t, ops) == 1)
> {
> if (dump_file && (dump_flags & TDF_DETAILS))
> --- gcc/testsuite/gcc.dg/pr46309.c.jj 2011-09-29 14:03:28.000000000 +0200
> +++ gcc/testsuite/gcc.dg/pr46309.c 2011-09-29 14:08:32.000000000 +0200
> @@ -0,0 +1,66 @@
> +/* PR tree-optimization/46309 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-reassoc-details" } */
> +
> +int
> +f1 (int a)
> +{
> + int v1 = (a == 3);
> + int v2 = (a == 1);
> + int v3 = (a == 4);
> + int v4 = (a == 2);
> + return v1 || v2 || v3 || v4;
> +}
> +
> +int
> +f2 (int a)
> +{
> + int v1 = (a == 1);
> + int v2 = (a == 2);
> + int v3 = (a == 3);
> + int v4 = (a == 4);
> + return v1 || v2 || v3 || v4;
> +}
> +
> +int
> +f3 (int a)
> +{
> + int v1 = (a == 3);
> + int v2 = (a == 1);
> + return v1 || v2;
> +}
> +
> +int
> +f4 (int a)
> +{
> + int v1 = (a == 1);
> + int v2 = (a == 2);
> + return v1 || v2;
> +}
> +
> +int
> +f5 (unsigned int a)
> +{
> + int v1 = (a <= 31);
> + int v2 = (a >= 64 && a <= 95);
> + return v1 || v2;
> +}
> +
> +int
> +f6 (unsigned int a)
> +{
> + int v1 = (a <= 31);
> + int v2 = (a >= 64 && a <= 95);
> + int v3 = (a >= 128 && a <= 159);
> + int v4 = (a >= 192 && a <= 223);
> + return v1 || v2 || v3 || v4;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D.
> -.1, 1. and -.2, 2. and -.3, 3. and -.4, 4.\[\n\r\]* into" 2 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D.
> -.1, 1. and -.3, 3.\[\n\r\]* into" 1 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D.
> -.1, 1. and -.2, 2.\[\n\r\]* into" 1 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D.
> -.0, 31. and -.64, 95.\[\n\r\]* into" 2 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D.
> -.128, 159. and -.192, 223.\[\n\r\]* into" 1 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests
> D.\[0-9\]*_\[0-9\]* -.0, 31. and -.128, 159.\[\n\r\]* into" 1 "reassoc2" } }
> */
> +/* { dg-final { cleanup-tree-dump "reassoc1" } } */
> +/* { dg-final { cleanup-tree-dump "reassoc2" } } */
>
> Jakub
>