The following fixes the missing staturating op pattern detection by teaching FRE to CSE a narrower (or sign-changed) add/sub/mul with a wider one and compensating with a cast (for sign-changing) or a bit-and (for narrowing and zero-extending).
This is a bit awkward (and it doesn't handle all of the testcases that queued up in the comments of PR45397) but apart from doing even more complicated pattern matching inside phiopt I don't see a good way of dealing with this. I didn't try to address the non-regression testcases, but the code can serve as a recipie for adding more "hacks" as needed. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Ok? Thanks, Richard. 2017-02-24 Richard Biener <rguent...@suse.de> PR tree-optimization/45397 * tree-ssa-pre.c (eliminate_insert): Handle BIT_AND_EXPR. * tree-ssa-sccvn.c (valueized_wider_op): New helper. (visit_nary_op): Add pattern matching for CSEing sign-changed or truncated operations with wider ones. * gcc.dg/tree-ssa/pr45397.c: New testcase. Index: gcc/tree-ssa-pre.c =================================================================== *** gcc/tree-ssa-pre.c (revision 245696) --- gcc/tree-ssa-pre.c (working copy) *************** eliminate_insert (gimple_stmt_iterator * *** 4103,4109 **** if (!is_gimple_assign (stmt) || (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) && gimple_assign_rhs_code (stmt) != VIEW_CONVERT_EXPR ! && gimple_assign_rhs_code (stmt) != BIT_FIELD_REF)) return NULL_TREE; tree op = gimple_assign_rhs1 (stmt); --- 4103,4111 ---- if (!is_gimple_assign (stmt) || (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) && gimple_assign_rhs_code (stmt) != VIEW_CONVERT_EXPR ! && gimple_assign_rhs_code (stmt) != BIT_FIELD_REF ! && (gimple_assign_rhs_code (stmt) != BIT_AND_EXPR ! || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST))) return NULL_TREE; tree op = gimple_assign_rhs1 (stmt); *************** eliminate_insert (gimple_stmt_iterator * *** 4121,4126 **** --- 4123,4131 ---- TREE_TYPE (val), leader, TREE_OPERAND (gimple_assign_rhs1 (stmt), 1), TREE_OPERAND (gimple_assign_rhs1 (stmt), 2)); + else if (gimple_assign_rhs_code (stmt) == BIT_AND_EXPR) + res = gimple_build (&stmts, BIT_AND_EXPR, + TREE_TYPE (val), leader, gimple_assign_rhs2 (stmt)); else res = gimple_build (&stmts, gimple_assign_rhs_code (stmt), TREE_TYPE (val), leader); Index: gcc/tree-ssa-sccvn.c =================================================================== *** gcc/tree-ssa-sccvn.c (revision 245696) --- gcc/tree-ssa-sccvn.c (working copy) *************** visit_copy (tree lhs, tree rhs) *** 3447,3469 **** return set_ssa_val_to (lhs, rhs); } /* Visit a nary operator RHS, value number it, and return true if the value number of LHS has changed as a result. */ static bool ! visit_nary_op (tree lhs, gimple *stmt) { - bool changed = false; tree result = vn_nary_op_lookup_stmt (stmt, NULL); - if (result) ! changed = set_ssa_val_to (lhs, result); ! else { ! changed = set_ssa_val_to (lhs, lhs); ! vn_nary_op_insert_stmt (stmt, lhs); } return changed; } --- 3447,3571 ---- return set_ssa_val_to (lhs, rhs); } + /* Lookup a value for OP in type WIDE_TYPE where the value in type of OP + is the same. */ + + static tree + valueized_wider_op (tree wide_type, tree op) + { + if (TREE_CODE (op) == SSA_NAME) + op = SSA_VAL (op); + + /* Either we have the op widened available. */ + tree ops[3] = {}; + ops[0] = op; + tree tem = vn_nary_op_lookup_pieces (1, NOP_EXPR, + wide_type, ops, NULL); + if (tem) + return tem; + + /* Or the op is truncated from some existing value. */ + if (TREE_CODE (op) == SSA_NAME) + { + gimple *def = SSA_NAME_DEF_STMT (op); + if (is_gimple_assign (def) + && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))) + { + tem = gimple_assign_rhs1 (def); + if (useless_type_conversion_p (wide_type, TREE_TYPE (tem))) + { + if (TREE_CODE (tem) == SSA_NAME) + tem = SSA_VAL (tem); + return tem; + } + } + } + + /* For constants simply extend it. */ + if (TREE_CODE (op) == INTEGER_CST) + return wide_int_to_tree (wide_type, op); + + return NULL_TREE; + } + /* Visit a nary operator RHS, value number it, and return true if the value number of LHS has changed as a result. */ static bool ! visit_nary_op (tree lhs, gassign *stmt) { tree result = vn_nary_op_lookup_stmt (stmt, NULL); if (result) ! return set_ssa_val_to (lhs, result); ! ! /* Do some special pattern matching for redundancies of operations ! in different types. */ ! enum tree_code code = gimple_assign_rhs_code (stmt); ! tree type = TREE_TYPE (lhs); ! tree rhs1 = gimple_assign_rhs1 (stmt); ! switch (code) { ! CASE_CONVERT: ! /* Match arithmetic done in a different type where we can easily ! substitute the result from some earlier sign-changed or widened ! operation. */ ! if (INTEGRAL_TYPE_P (type) ! && TREE_CODE (rhs1) == SSA_NAME ! /* We only handle sign-changes or zero-extension -> & mask. */ ! && ((TYPE_UNSIGNED (TREE_TYPE (rhs1)) ! && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (rhs1))) ! || TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (rhs1)))) ! { ! gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (rhs1)); ! if (def ! && (gimple_assign_rhs_code (def) == PLUS_EXPR ! || gimple_assign_rhs_code (def) == MINUS_EXPR ! || gimple_assign_rhs_code (def) == MULT_EXPR)) ! { ! tree ops[3] = {}; ! /* Either we have the op widened available. */ ! ops[0] = valueized_wider_op (type, ! gimple_assign_rhs1 (def)); ! if (ops[0]) ! ops[1] = valueized_wider_op (type, ! gimple_assign_rhs2 (def)); ! if (ops[0] && ops[1]) ! { ! ops[0] = vn_nary_op_lookup_pieces ! (2, gimple_assign_rhs_code (def), type, ops, NULL); ! /* We have wider operation available. */ ! if (ops[0]) ! { ! unsigned lhs_prec = TYPE_PRECISION (type); ! unsigned rhs_prec = TYPE_PRECISION (TREE_TYPE (rhs1)); ! if (lhs_prec == rhs_prec) ! { ! ops[1] = NULL_TREE; ! result = vn_nary_build_or_lookup (NOP_EXPR, ! type, ops); ! if (result) ! return set_ssa_val_to (lhs, result); ! } ! else ! { ! ops[1] = wide_int_to_tree (type, ! wi::mask (rhs_prec, false, ! lhs_prec)); ! result = vn_nary_build_or_lookup (BIT_AND_EXPR, ! TREE_TYPE (lhs), ! ops); ! if (result) ! return set_ssa_val_to (lhs, result); ! } ! } ! } ! } ! } ! default:; } + bool changed = set_ssa_val_to (lhs, lhs); + vn_nary_op_insert_stmt (stmt, lhs); return changed; } Index: gcc/testsuite/gcc.dg/tree-ssa/pr45397.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr45397.c (nonexistent) +++ gcc/testsuite/gcc.dg/tree-ssa/pr45397.c (working copy) @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-phiopt1" } */ + +int foo_add (const unsigned char *tmp, int i, int val) +{ + return (unsigned char)(((tmp[i] + val)>0xFF)?0xFF:(((tmp[i] + val)<0)?0:(tmp[i] + val))); +} + +int foo_sub (const unsigned char *tmp, int i, int val) +{ + return (unsigned char)(((tmp[i] - val)>0xFF)?0xFF:(((tmp[i] - val)<0)?0:(tmp[i] - val))); +} + +int foo_mul (const unsigned char *tmp, int i, int val) +{ + return (unsigned char)(((tmp[i] * val)>0xFF)?0xFF:(((tmp[i] * val)<0)?0:(tmp[i] * val))); +} + +/* All cases should end up using min/max for the saturated operations and + have no control flow. */ +/* { dg-final { scan-tree-dump-times "MAX_EXPR" 3 "phiopt1" } } */ +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 3 "phiopt1" } } */ +/* { dg-final { scan-tree-dump-not "if " "phiopt1" } } */