Hi, The solution for this PR is a mix out of different issues. First is of course the type-hoisting, but also it shows some lacks in simplifications on integer-values, and on equal and none-equal comparisons. The first patch adds to forward-propagation the ability to do type-hoisting for some conversion operations and do simplification for inner binary operations on it. Most important part is here the adjustment of constant integer-values in statement-lists for a truncation. I limited that patch to handle in compare_equal_optimize_1 only bitwise-and operations inner a truncation-cast. Of course for bitwise-xor/or operations some more simplifications are possible. This patch just does the type-hoisting part. In a second patch I add to compare_equal_optimize_1 the ability for further required simplifications for fixing this problem.
ChangeLog 2012-03-15 Kai Tietz <kti...@redhat.com> PR tree-optimization/45397 * tree-ssa-forwprop.c (compare_equal_optimize_1): New function. (combine_cond_expr_cond): Use compare_equal_optimize_1 function. (truncate_integers): New function. (combine_inner_conversion): Likewise. (ssa_forward_propagate_and_combine): Use for conversions combine_inner_conversion function. 2012-03-15 Kai Tietz <kti...@redhat.com> * gcc.dg/tree-ssa/pr45397-1.c: New testcase. Regression tested for all languages (including Ada and Obj-C) on x86_64-unknown-linux-gnu. Ok for apply? Regards, Kai Index: gcc-trunk/gcc/tree-ssa-forwprop.c =================================================================== --- gcc-trunk.orig/gcc/tree-ssa-forwprop.c +++ gcc-trunk/gcc/tree-ssa-forwprop.c @@ -362,6 +362,150 @@ rhs_to_tree (tree type, gimple stmt) gcc_unreachable (); } +/* This function does simplifications of comparison OP0 ==/!= OP1 while integral + typed operands + On success new statement's TREE is returned, otherwise NULL_TREE. */ + +static tree +compare_equal_optimize_1 (gimple stmt, enum tree_code code, tree type, + tree op0, tree op1) +{ + gimple_stmt_iterator gsi; + tree type_outer; + tree type_inner, op_inner; + tree op1_l, op1_r, tem; + gimple newop; + + type_outer = TREE_TYPE (op0); + if ((code != EQ_EXPR && code != NE_EXPR) + || !INTEGRAL_TYPE_P (type_outer)) + return NULL_TREE; + + /* If OP0 isn't a conversion, then check if OP1 might be one. If so + swap arguments, otherwise return NULL_TREE. */ + if (!CONVERT_EXPR_P (op0)) + { + if (!CONVERT_EXPR_P (op1)) + return NULL_TREE; + tem = op0; + op0 = op1; + op1 = tem; + } + + op_inner = TREE_OPERAND (op0, 0); + type_inner = TREE_TYPE (op_inner); + + /* Operations only apply to integral typed operands of cast. */ + if (!INTEGRAL_TYPE_P (type_inner)) + return NULL_TREE; + + /* If second operand is also a type-conversion, check that underlying operand + is integral typed. */ + if (CONVERT_EXPR_P (op1) + && !INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (op1, 0)))) + return NULL_TREE; + + /* Simplify ((type) X ==/!= (type) X) to true/false, if X has no side-effects + and is integral typed. */ + if (CONVERT_EXPR_P (op1) + && TREE_OPERAND (op0, 0) == TREE_OPERAND (op1, 0) + && !TREE_SIDE_EFFECTS (TREE_OPERAND (op0, 0))) + return fold_convert (type, (code == EQ_EXPR ? boolean_true_node + : boolean_false_node)); + + /* Simplify (type) X ==/!= (type) Y to X ==/!= Y, if types of X and Y are + compatible and type-precision of X is smaller or equal to TYPE's. */ + if (CONVERT_EXPR_P (op1) + && TYPE_PRECISION (type_inner) <= TYPE_PRECISION (type_outer)) + { + tem = TREE_OPERAND (op1, 0); + if (!useless_type_conversion_p (type_inner, TREE_TYPE (tem))) + return NULL_TREE; + return fold_build2_loc (gimple_location (stmt), code, type, + op_inner, tem); + } + + /* Verify that for pattern 'OP0 = (type) X' the type of X is of integral kind, + is unsigned, and has smaller or equal precision to type TYPE. */ + if (TYPE_PRECISION (type_inner) > TYPE_PRECISION (type_outer) + || !TYPE_UNSIGNED (type_inner)) + return NULL_TREE; + + /* Simplify (type) X ==/!= CST to X ==/!= CST' with CST'=(type-of-X) CST. */ + if (TREE_CODE (op1) == INTEGER_CST) + { + tree new_cst = fold_convert (type_inner, op1); + /* If constant is out of range for (type) X, then return + constant result of comparison. */ + if (!operand_equal_p (op1, fold_convert (type_outer, new_cst), 0)) + return fold_convert (type, (code == EQ_EXPR ? boolean_false_node + : boolean_true_node)); + return fold_build2_loc (gimple_location (stmt), code, type, op_inner, new_cst); + } + + /* Simplify (type) X ==/!= Y & CST to X ==/!= (type-of-X) (Y & CST). If CST + is equal to ~(type-of-X)0, then simplify to X ==/!= (type-of-X). */ + if (TREE_CODE (op1) != BIT_AND_EXPR) + return NULL_TREE; + + gsi = gsi_for_stmt (stmt); + + op1_l = TREE_OPERAND (op1, 0); + op1_r = TREE_OPERAND (op1, 1); + + /* Make sure OP1_R holds an integer-constant. */ + if (TREE_CODE (op1_l) == INTEGER_CST) + { + op1_l = op1_r; + op1_r = TREE_OPERAND (op1, 0); + } + if (TREE_CODE (op1_r) != INTEGER_CST) + return NULL_TREE; + + tem = fold_convert (type_inner, op1_r); + + /* If CST doesn't fit complete into precision of the type of X, + then we can't do anything here. + Remark: Type-precision is here of interested, not sign/unsigned + value-range. */ + if (!operand_equal_p (op1_r, fold_convert (TREE_TYPE (op1_r), tem), 0)) + return NULL_TREE; + op0 = op_inner; + + /* If CST has value of zero, then we can special case + to X == 0. */ + if (integer_zerop (tem)) + return fold_build2_loc (gimple_location (stmt), code, type, op0, tem); + + /* If CST has value of ~((type-of-X) 0), then we can special case + to X == (type-of-X) Y. */ + if (!integer_all_onesp (tem)) + { + tem = create_tmp_reg (TREE_TYPE (op1), NULL); + newop = gimple_build_assign_with_ops (TREE_CODE (op1), + tem, TREE_OPERAND (op1, 0), + TREE_OPERAND (op1, 1)); + tem = make_ssa_name (tem, newop); + gimple_assign_set_lhs (newop, tem); + gimple_set_location (newop, gimple_location (stmt)); + gsi_insert_before (&gsi, newop, GSI_SAME_STMT); + update_stmt (newop); + op1 = tem; + } + else + op1 = op1_l; + tem = create_tmp_reg (type_inner, NULL); + newop = gimple_build_assign_with_ops (NOP_EXPR, + tem, op1, NULL_TREE); + tem = make_ssa_name (tem, newop); + gimple_assign_set_lhs (newop, tem); + gimple_set_location (newop, gimple_location (stmt)); + gsi_insert_before (&gsi, newop, GSI_SAME_STMT); + update_stmt (newop); + op1 = tem; + return fold_build2_loc (gimple_location (stmt), code, type, op0, op1); +} + /* Combine OP0 CODE OP1 in the context of a COND_EXPR. Returns the folded result in a form suitable for COND_EXPR_COND or NULL_TREE, if there is no suitable simplified form. If @@ -378,6 +522,10 @@ combine_cond_expr_cond (gimple stmt, enu fold_defer_overflow_warnings (); t = fold_binary_loc (gimple_location (stmt), code, type, op0, op1); + + if (!t && !invariant_only) + t = compare_equal_optimize_1 (stmt, code, type, op0, op1); + if (!t) { fold_undefer_overflow_warnings (false, NULL, 0); @@ -2191,6 +2339,253 @@ out: return false; } +/* Function truncates all constant integer-values to precision of + type TCAST within STMT expressions made of +, -, ^, |, and &. + STMT has to be an valid gimple-assignment statement. */ + +static void +truncate_integers (gimple stmt, tree tcast) +{ + gimple_stmt_iterator gsi2; + gimple s; + enum tree_code code; + tree op1, op2, tem; + int need_update = 0; + + do + { + code = gimple_assign_rhs_code (stmt); + if (code != PLUS_EXPR && code != MINUS_EXPR + && code != BIT_AND_EXPR + && code != BIT_IOR_EXPR + && code != BIT_XOR_EXPR) + return; + + op1 = gimple_assign_rhs1 (stmt); + op2 = gimple_assign_rhs2 (stmt); + + if (code != MINUS_EXPR + && TREE_CODE (op1) == INTEGER_CST + && TREE_CODE (op2) != INTEGER_CST) + { + need_update = 1; + tem = op1; + op1 = op2; + op2 = tem; + } + + if (TREE_CODE (op1) == INTEGER_CST) + { + tem = fold_convert (tcast, op1); + tem = fold_convert (TREE_TYPE (op1), tem); + if (!operand_equal_p (op1, tem, 0)) + { + op1 = tem; + need_update = 1; + } + } + + if (TREE_CODE (op2) == INTEGER_CST) + { + tem = fold_convert (tcast, op2); + tem = fold_convert (TREE_TYPE (op2), tem); + if (!operand_equal_p (op2, tem, 0)) + { + op2 = tem; + need_update = 1; + } + } + + if (need_update) + { + gsi2 = gsi_for_stmt (stmt); + gimple_assign_set_rhs_with_ops (&gsi2, code, op1, op2); + update_stmt (stmt); + } + + if (TREE_CODE (op2) == SSA_NAME + && (s = SSA_NAME_DEF_STMT (op2)) != NULL + && has_single_use (op2) + && is_gimple_assign (s)) + truncate_integers (s, tcast); + } + while (TREE_CODE (op1) == SSA_NAME + && (stmt = SSA_NAME_DEF_STMT (op1)) != NULL + && has_single_use (op1) + && is_gimple_assign (stmt)); +} + +/* Convert (type1) ((type2) X [PLUS|MINUS|AND|IOR|XOR]_EXPR (type2) Y) to + X' [PLUS|MINUS|AND|IOR|XOR]_EXPR Y'. If X or Y have compatible type to TYPE1, + the types TYPE1, TYPE2, type of X, and type of Y have integral kind, and TYPE2 + has smaller or equal precision as TYPE1. + Returns 1 if there were any changes made, 2 if cfg-cleanup needs to + run. Else it returns 0. */ + +static int +combine_inner_conversion (gimple_stmt_iterator *gsi) +{ + gimple stmt = gsi_stmt (*gsi); + tree op0, lhs, inner_op0, inner_op1; + tree new_op0, new_op1, tem; + gimple s, newop; + enum tree_code inner_code, code; + int sunken_cast = 0, require_cast = 0, has_cst = 0; + + if (!is_gimple_assign (stmt)) + return 0; + code = gimple_assign_rhs_code (stmt); + + if (!CONVERT_EXPR_CODE_P (code)) + return 0; + + new_op0 = new_op1 = NULL_TREE; + lhs = gimple_assign_lhs (stmt); + op0 = gimple_assign_rhs1 (stmt); + + /* Check that inner and outer type of conversion is of integral + kind, and that the outer type has smaller or equal precision + then the inner type. */ + if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) + || !INTEGRAL_TYPE_P (TREE_TYPE (op0)) + || TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (TREE_TYPE (op0))) + return 0; + + if (TREE_CODE (op0) != SSA_NAME + || (s = SSA_NAME_DEF_STMT (op0)) == NULL + || !has_single_use (op0) + || !is_gimple_assign (s)) + return 0; + + inner_code = gimple_assign_rhs_code (s); + if (inner_code != PLUS_EXPR && inner_code != MINUS_EXPR + && inner_code != BIT_AND_EXPR + && inner_code != BIT_IOR_EXPR + && inner_code != BIT_XOR_EXPR) + return 0; + + truncate_integers (s, TREE_TYPE (lhs)); + + inner_op0 = gimple_assign_rhs1 (s); + inner_op1 = gimple_assign_rhs2 (s); + + if (TREE_CODE (inner_op0) == INTEGER_CST) + { + new_op0 = fold_convert (TREE_TYPE (lhs), inner_op0); + has_cst++; + } + + if (TREE_CODE (inner_op1) == INTEGER_CST) + { + new_op1 = fold_convert (TREE_TYPE (lhs), inner_op1); + has_cst++; + } + + if (has_cst == 2) + { + tem = fold_build2 (inner_code, TREE_TYPE (lhs), new_op0, new_op1); + gimple_assign_set_rhs_with_ops (gsi, TREE_CODE (tem), tem, NULL_TREE); + update_stmt (gsi_stmt (*gsi)); + return 2; + } + + if ((inner_code == PLUS_EXPR || inner_code == MINUS_EXPR) + && !TYPE_UNSIGNED (TREE_TYPE (lhs))) + return 0; + + if (TREE_CODE (inner_op0) == SSA_NAME + && has_single_use (inner_op0) + && (s = SSA_NAME_DEF_STMT (inner_op0)) != NULL + && is_gimple_assign (s) + && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (s)) + && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (s))) + && TYPE_PRECISION (TREE_TYPE (lhs)) + <= TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (s)))) + { + new_op0 = gimple_assign_rhs1 (s); + sunken_cast++; + } + else if (TREE_CODE (inner_op0) == SSA_NAME) + { + new_op0 = inner_op0; + require_cast++; + } + + if (TREE_CODE (inner_op1) == SSA_NAME + && has_single_use (inner_op1) + && (s = SSA_NAME_DEF_STMT (inner_op1)) != NULL + && is_gimple_assign (s) + && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (s)) + && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (s))) + && TYPE_PRECISION (TREE_TYPE (lhs)) + <= TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (s)))) + { + new_op1 = gimple_assign_rhs1 (s); + sunken_cast++; + } + else if (TREE_CODE (inner_op1) == SSA_NAME) + { + new_op1 = inner_op1; + require_cast++; + } + + if (!new_op0 || !new_op1) + return 0; + + if (require_cast == 2) + return 0; + + if (require_cast && sunken_cast + && !useless_type_conversion_p (TREE_TYPE (new_op0), TREE_TYPE (lhs)) + && !useless_type_conversion_p (TREE_TYPE (new_op1), TREE_TYPE (lhs))) + return 0; + + if (require_cast && has_cst) + { + if (TREE_CODE (new_op0) == INTEGER_CST) + new_op0 = fold_convert (TREE_TYPE (new_op1), new_op0); + if (TREE_CODE (new_op1) == INTEGER_CST) + new_op1 = fold_convert (TREE_TYPE (new_op0), new_op1); + + /* Can we simplify to (X op CST)? */ + if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_op0)) + && ((inner_code != PLUS_EXPR && inner_code != MINUS_EXPR) + || TYPE_UNSIGNED (TREE_TYPE (new_op0)))) + { + gimple_assign_set_rhs_with_ops (gsi, inner_code, new_op0, new_op1); + update_stmt (gsi_stmt (*gsi)); + return 2; + } + return 0; + } + + if (!useless_type_conversion_p (TREE_TYPE (new_op0), TREE_TYPE (lhs))) + { + tem = create_tmp_reg (TREE_TYPE (lhs), NULL); + newop = gimple_build_assign_with_ops (NOP_EXPR, tem, new_op0, NULL_TREE); + tem = make_ssa_name (tem, newop); + gimple_assign_set_lhs (newop, tem); + gimple_set_location (newop, gimple_location (stmt)); + gsi_insert_before (gsi, newop, GSI_SAME_STMT); + new_op0 = tem; + } + + if (!useless_type_conversion_p (TREE_TYPE (new_op1), TREE_TYPE (lhs))) + { + tem = create_tmp_reg (TREE_TYPE (lhs), NULL); + newop = gimple_build_assign_with_ops (NOP_EXPR, tem, new_op1, NULL_TREE); + tem = make_ssa_name (tem, newop); + gimple_assign_set_lhs (newop, tem); + gimple_set_location (newop, gimple_location (stmt)); + gsi_insert_before (gsi, newop, GSI_SAME_STMT); + new_op1 = tem; + } + + gimple_assign_set_rhs_with_ops (gsi, inner_code, new_op0, new_op1); + update_stmt (gsi_stmt (*gsi)); + return 2; +} + /* Combine two conversions in a row for the second conversion at *GSI. Returns 1 if there were any changes made, 2 if cfg-cleanup needs to run. Else it returns 0. */ @@ -2506,6 +2901,8 @@ ssa_forward_propagate_and_combine (void) || code == FIX_TRUNC_EXPR) { int did_something = combine_conversions (&gsi); + if (!did_something) + did_something = combine_inner_conversion (&gsi); if (did_something == 2) cfg_changed = true; changed = did_something != 0; Index: gcc-trunk/gcc/testsuite/gcc.dg/tree-ssa/pr45397-1.c =================================================================== --- /dev/null +++ gcc-trunk/gcc/testsuite/gcc.dg/tree-ssa/pr45397-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +signed char a[1024], b[1024]; + +void +baz (void) +{ + int i, s, t; + for (i = 0; i < 1024; i++) + { s = a[i]; t = b[i]; s += t + 0x12345600; a[i] = s; } +} + +/* { dg-final { scan-tree-dump-times "305419776" 0 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ +