https://gcc.gnu.org/g:05722b6d8799d08861dd65af06721b4784acc650
commit 05722b6d8799d08861dd65af06721b4784acc650 Author: Alexandre Oliva <ol...@adacore.com> Date: Tue Dec 17 22:19:07 2024 -0300 ifcombine field merge: handle masks with sign extensions When a loaded field is sign extended, masked and compared, we used to drop from the mask the bits past the original field width, which is not correct. Take note of the fact that the mask covered copies of the sign bit, before clipping it, and arrange to test the sign bit if we're comparing with zero. Punt in other cases. If bits_test fail recoverably, try other ifcombine strategies. for gcc/ChangeLog * gimple-fold.cc (decode_field_reference): Add psignbit parameter. Set it if the mask references sign-extending bits. (fold_truth_andor_for_ifcombine): Adjust calls with new variables. Swap them along with other r?_* variables. Handle extended sign bit compares with zero. * tree-ssa-ifcombine.cc (ifcombine_ifandif): If bits_test fails in a way that doesn't prevent other ifcombine strategies from passing, give them a try. for gcc/testsuite/ChangeLog * gcc.dg/field-merge-16.c: New. Diff: --- gcc/gimple-fold.cc | 69 ++++++++++++++++++++++++++++++----- gcc/testsuite/gcc.dg/field-merge-16.c | 66 +++++++++++++++++++++++++++++++++ gcc/tree-ssa-ifcombine.cc | 5 ++- 3 files changed, 128 insertions(+), 12 deletions(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index ee1da47b080b..25c150effcc8 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -7514,6 +7514,9 @@ gimple_binop_def_p (enum tree_code code, tree t, tree op[2]) is initially set to a mask with nonzero precision, that mask is combined with the found mask, or adjusted in precision to match. + *PSIGNBIT is set to TRUE if, before clipping to *PBITSIZE, the mask + encompassed bits that corresponded to extensions of the sign bit. + *XOR_P is to be FALSE if EXP might be a XOR used in a compare, in which case, if XOR_CMP_OP is a zero constant, it will be overridden with *PEXP, *XOR_P will be set to TRUE, and the left-hand operand of the XOR will be @@ -7533,7 +7536,8 @@ static tree decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, HOST_WIDE_INT *pbitpos, bool *punsignedp, bool *preversep, bool *pvolatilep, - wide_int *pand_mask, bool *xor_p, tree *xor_cmp_op, + wide_int *pand_mask, bool *psignbit, + bool *xor_p, tree *xor_cmp_op, gimple **load, location_t loc[4]) { tree exp = *pexp; @@ -7545,6 +7549,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, machine_mode mode; *load = NULL; + *psignbit = false; /* All the optimizations using this function assume integer fields. There are problems with FP fields since the type_for_size call @@ -7712,12 +7717,23 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, if (outer_type && *pbitsize == TYPE_PRECISION (outer_type)) *punsignedp = TYPE_UNSIGNED (outer_type); - /* Make the mask the expected width. */ - if (and_mask.get_precision () != 0) - and_mask = wide_int::from (and_mask, *pbitsize, UNSIGNED); - if (pand_mask) - *pand_mask = and_mask; + { + /* Make the mask the expected width. */ + if (and_mask.get_precision () != 0) + { + /* If the AND_MASK encompasses bits that would be extensions of + the sign bit, set *PSIGNBIT. */ + if (!unsignedp + && and_mask.get_precision () > *pbitsize + && (and_mask + & wi::mask (*pbitsize, true, and_mask.get_precision ())) != 0) + *psignbit = true; + and_mask = wide_int::from (and_mask, *pbitsize, UNSIGNED); + } + + *pand_mask = and_mask; + } return inner; } @@ -7999,6 +8015,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, HOST_WIDE_INT rnbitsize, rnbitpos, rnprec; bool ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp; bool ll_reversep, lr_reversep, rl_reversep, rr_reversep; + bool ll_signbit, lr_signbit, rl_signbit, rr_signbit; scalar_int_mode lnmode, lnmode2, rnmode; wide_int ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask; wide_int l_const, r_const; @@ -8118,19 +8135,19 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, bool l_xor = false, r_xor = false; ll_inner = decode_field_reference (&ll_arg, &ll_bitsize, &ll_bitpos, &ll_unsignedp, &ll_reversep, &volatilep, - &ll_and_mask, &l_xor, &lr_arg, + &ll_and_mask, &ll_signbit, &l_xor, &lr_arg, &ll_load, ll_loc); lr_inner = decode_field_reference (&lr_arg, &lr_bitsize, &lr_bitpos, &lr_unsignedp, &lr_reversep, &volatilep, - &lr_and_mask, &l_xor, 0, + &lr_and_mask, &lr_signbit, &l_xor, 0, &lr_load, lr_loc); rl_inner = decode_field_reference (&rl_arg, &rl_bitsize, &rl_bitpos, &rl_unsignedp, &rl_reversep, &volatilep, - &rl_and_mask, &r_xor, &rr_arg, + &rl_and_mask, &rl_signbit, &r_xor, &rr_arg, &rl_load, rl_loc); rr_inner = decode_field_reference (&rr_arg, &rr_bitsize, &rr_bitpos, &rr_unsignedp, &rr_reversep, &volatilep, - &rr_and_mask, &r_xor, 0, + &rr_and_mask, &rr_signbit, &r_xor, 0, &rr_load, rr_loc); /* It must be true that the inner operation on the lhs of each @@ -8160,6 +8177,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, std::swap (rl_unsignedp, rr_unsignedp); std::swap (rl_reversep, rr_reversep); std::swap (rl_and_mask, rr_and_mask); + std::swap (rl_signbit, rr_signbit); std::swap (rl_load, rr_load); std::swap (rl_loc, rr_loc); } @@ -8169,6 +8187,37 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, : (!ll_load != !rl_load)) return 0; + /* ??? Can we do anything with these? */ + if (lr_signbit || rr_signbit) + return 0; + + /* If the mask encompassed extensions of the sign bit before + clipping, try to include the sign bit in the test. If we're not + comparing with zero, don't even try to deal with it (for now?). + If we've already commited to a sign test, the extended (before + clipping) mask could already be messing with it. */ + if (ll_signbit) + { + if (!integer_zerop (lr_arg) || lsignbit) + return 0; + wide_int sign = wi::mask (ll_bitsize - 1, true, ll_bitsize); + if (!ll_and_mask.get_precision ()) + ll_and_mask = sign; + else + ll_and_mask |= sign; + } + + if (rl_signbit) + { + if (!integer_zerop (rr_arg) || rsignbit) + return 0; + wide_int sign = wi::mask (rl_bitsize - 1, true, rl_bitsize); + if (!rl_and_mask.get_precision ()) + rl_and_mask = sign; + else + rl_and_mask |= sign; + } + if (TREE_CODE (lr_arg) == INTEGER_CST && TREE_CODE (rr_arg) == INTEGER_CST) { diff --git a/gcc/testsuite/gcc.dg/field-merge-16.c b/gcc/testsuite/gcc.dg/field-merge-16.c new file mode 100644 index 000000000000..2ca23ea663a4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/field-merge-16.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-options "-O -fdump-tree-ifcombine-details" } */ + +/* Check that tests for sign-extension bits are handled correctly. */ + +struct s { + short a; + short b; + unsigned short c; + unsigned short d; +} __attribute__ ((aligned (8))); + +struct s p = { -1, 0, 0, 0 }; +struct s q = { 0, -1, 0, 0 }; +struct s r = { 1, 1, 0, 0 }; + +const long long mask = 1ll << (sizeof (long long) * __CHAR_BIT__ - 5); + +int fp () +{ + if ((p.a & mask) + || (p.c & mask) + || p.d + || (p.b & mask)) + return 1; + else + return -1; +} + +int fq () +{ + if ((q.a & mask) + || (q.c & mask) + || q.d + || (q.b & mask)) + return 1; + else + return -1; +} + +int fr () +{ + if ((r.a & mask) + || (r.c & mask) + || r.d + || (r.b & mask)) + return 1; + else + return -1; +} + +int main () { + /* Unlikely, but play safe. */ + if (sizeof (long long) == sizeof (short)) + return 0; + if (fp () < 0 + || fq () < 0 + || fr () > 0) + __builtin_abort (); + return 0; +} + +/* We test .b after other fields instead of right after .a to give field + merging a chance, otherwise the masked compares with zero are combined by + other ifcombine logic. The .c test is discarded by earlier optimizers. */ +/* { dg-final { scan-tree-dump-times "optimizing" 6 "ifcombine" } } */ diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 02c2f5a29b56..c9399a106945 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -899,7 +899,7 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, else if (bits1 == name2) std::swap (name1, bits1); else - return false; + goto bits_test_failed; /* As we strip non-widening conversions in finding a common name that is tested make sure to end up with an integral @@ -944,7 +944,8 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, } /* See if we have two comparisons that we can merge into one. */ - else if (TREE_CODE_CLASS (gimple_cond_code (inner_cond)) == tcc_comparison + else bits_test_failed: + if (TREE_CODE_CLASS (gimple_cond_code (inner_cond)) == tcc_comparison && TREE_CODE_CLASS (gimple_cond_code (outer_cond)) == tcc_comparison) { tree t, ts = NULL_TREE;