https://gcc.gnu.org/g:2cbd4409bcfaba2bd4200412090fd06db1948369
commit r15-6725-g2cbd4409bcfaba2bd4200412090fd06db1948369 Author: Jakub Jelinek <ja...@redhat.com> Date: Thu Jan 9 08:30:12 2025 +0100 match.pd: Avoid introducing UB in the a r<< (32-b) -> a r>> b optimization [PR117927] As mentioned in the PR, the a r<< (bitsize-b) to a r>> b and similar match.pd optimization which has been introduced in GCC 15 can introduce UB which wasn't there before, in particular if b is equal at runtime to bitsize, then a r<< 0 is turned into a r>> bitsize. The following patch fixes it by optimizing it early only if VRP tells us the count isn't equal to the bitsize, and late into a r>> (b & (bitsize - 1)) if bitsize is power of two and the subtraction has single use, on various targets the masking then goes away because its rotate instructions do masking already. The latter can't be done too early though, because then the expr_not_equal_to case is basically useless and we introduce the masking always and can't find out anymore that there was originally no masking. Even cfun->after_inlining check would be too early, there is forwprop before vrp, so the patch introduces a new PROP for the start of the last forwprop pass. 2025-01-09 Jakub Jelinek <ja...@redhat.com> Andrew Pinski <quic_apin...@quicinc.com> PR tree-optimization/117927 * tree-pass.h (PROP_last_full_fold): Define. * passes.def: Add last= parameters to pass_forwprop. * tree-ssa-forwprop.cc (pass_forwprop): Add last_p non-static data member and initialize it in the ctor. (pass_forwprop::set_pass_param): New method. (pass_forwprop::execute): Set PROP_last_full_fold in curr_properties at the start if last_p. * match.pd (a rrotate (32-b) -> a lrotate b): Only optimize either if @2 is known not to be equal to prec or if during/after last forwprop the subtraction has single use and prec is power of two; in that case transform it into orotate by masked count. * gcc.dg/tree-ssa/pr117927.c: New test. Diff: --- gcc/match.pd | 28 +++++++-- gcc/passes.def | 8 +-- gcc/testsuite/gcc.dg/tree-ssa/pr117927.c | 97 ++++++++++++++++++++++++++++++++ gcc/tree-pass.h | 1 + gcc/tree-ssa-forwprop.cc | 12 +++- 5 files changed, 136 insertions(+), 10 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 1d0c9f58f99d..171930874988 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4950,14 +4950,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) build_int_cst (TREE_TYPE (@1), element_precision (type)), @1); })) -/* a rrotate (32-b) -> a lrotate b */ -/* a lrotate (32-b) -> a rrotate b */ +/* a rrotate (bitsize-b) -> a lrotate b */ +/* a lrotate (bitsize-b) -> a rrotate b */ +/* Only do the above when it is known that b != bitsize. + Otherwise canonicalize to a orotate (b & mask) when the subtraction + has single use and prec is a power of two. */ (for rotate (lrotate rrotate) orotate (rrotate lrotate) (simplify - (rotate @0 (minus INTEGER_CST@1 @2)) - (if (element_precision (TREE_TYPE (@0)) == wi::to_wide (@1)) - (orotate @0 @2)))) + (rotate @0 (minus@3 INTEGER_CST@1 @2)) + (with { auto prec = element_precision (TREE_TYPE (@0)); } + (if (prec == wi::to_wide (@1)) + (switch + (if (expr_not_equal_to (@2, wi::uhwi (prec, + TYPE_PRECISION (TREE_TYPE (@2))))) + (orotate @0 @2)) + (if (single_use (@3) + && pow2p_hwi (prec) + /* Defer this case until last forwprop, so that VRP could be run and + expr_not_equal_to had a chance to match. Otherwise we'd do + pretty much always just the second case. */ + && cfun + && ((cfun->curr_properties & PROP_last_full_fold) != 0 + || !flag_tree_vrp + || optimize_debug)) + (orotate @0 + (bit_and @2 { build_int_cst (TREE_TYPE (@2), prec - 1); })))))))) /* Turn (a OP c1) OP c2 into a OP (c1+c2). */ (for op (lrotate rrotate rshift lshift) diff --git a/gcc/passes.def b/gcc/passes.def index 9bfdc615d86e..74d38766a623 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -80,7 +80,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ccp, false /* nonzero_p */); /* After CCP we rewrite no longer addressed locals into SSA form if possible. */ - NEXT_PASS (pass_forwprop); + NEXT_PASS (pass_forwprop, /*last=*/false); NEXT_PASS (pass_early_thread_jumps, /*first=*/true); NEXT_PASS (pass_sra_early); /* pass_build_ealias is a dummy pass that ensures that we @@ -214,7 +214,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_complete_unrolli); NEXT_PASS (pass_backprop); NEXT_PASS (pass_phiprop); - NEXT_PASS (pass_forwprop); + NEXT_PASS (pass_forwprop, /*last=*/false); /* pass_build_alias is a dummy pass that ensures that we execute TODO_rebuild_alias at this point. */ NEXT_PASS (pass_build_alias); @@ -254,7 +254,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_isolate_erroneous_paths); NEXT_PASS (pass_reassoc, true /* early_p */); NEXT_PASS (pass_dce); - NEXT_PASS (pass_forwprop); + NEXT_PASS (pass_forwprop, /*last=*/false); NEXT_PASS (pass_phiopt, false /* early_p */); NEXT_PASS (pass_ccp, true /* nonzero_p */); /* After CCP we rewrite no longer addressed locals into SSA @@ -356,7 +356,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_dce, true /* update_address_taken_p */, true /* remove_unused_locals */); /* After late DCE we rewrite no longer addressed locals into SSA form if possible. */ - NEXT_PASS (pass_forwprop); + NEXT_PASS (pass_forwprop, /*last=*/true); NEXT_PASS (pass_sink_code, true /* unsplit edges */); NEXT_PASS (pass_phiopt, false /* early_p */); NEXT_PASS (pass_fold_builtins); diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr117927.c b/gcc/testsuite/gcc.dg/tree-ssa/pr117927.c new file mode 100644 index 000000000000..18067bad460e --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr117927.c @@ -0,0 +1,97 @@ +/* PR tree-optimization/117927 */ +/* { dg-do compile { target { int32 && longlong64 } } } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times " r<< " 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " r>> " 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " & 31;" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " & 63;" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-not "32 - " "optimized" } } */ +/* { dg-final { scan-tree-dump-not "64 - " "optimized" } } */ + +static inline +unsigned lrotate32 (unsigned x, int t) +{ + unsigned tl = x << t; + unsigned th = x >> (-t & 31); + return tl | th; +} + +static inline +unsigned rrotate32 (unsigned x, int t) +{ + unsigned tl = x >> t; + unsigned th = x << (-t & 31); + return tl | th; +} + +static inline +unsigned long long lrotate64 (unsigned long long x, int t) +{ + unsigned long long tl = x << t; + unsigned long long th = x >> (-t & 63); + return tl | th; +} + +static inline +unsigned long long rrotate64 (unsigned long long x, int t) +{ + unsigned long long tl = x >> t; + unsigned long long th = x << (-t & 63); + return tl | th; +} + +unsigned +f1 (unsigned x, int t) +{ + return lrotate32 (x, 32 - t); +} + +unsigned long long +f2 (unsigned long long x, int t) +{ + return lrotate64 (x, 64 - t); +} + +unsigned +f3 (unsigned x, int t) +{ + if (t == 32) + __builtin_unreachable (); + return lrotate32 (x, 32 - t); +} + +unsigned long long +f4 (unsigned long long x, int t) +{ + if (t == 64) + __builtin_unreachable (); + return lrotate64 (x, 64 - t); +} + +unsigned +f5 (unsigned x, int t) +{ + return rrotate32 (x, 32 - t); +} + +unsigned long long +f6 (unsigned long long x, int t) +{ + return rrotate64 (x, 64 - t); +} + +unsigned +f7 (unsigned x, int t) +{ + if (t == 32) + __builtin_unreachable (); + return rrotate32 (x, 32 - t); +} + +unsigned long long +f8 (unsigned long long x, int t) +{ + if (t == 64) + __builtin_unreachable (); + return rrotate64 (x, 64 - t); +} diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index e764dc933dd1..d46a6b187094 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -230,6 +230,7 @@ protected: #define PROP_assumptions_done (1 << 19) /* Assume function kept around. */ #define PROP_gimple_lbitint (1 << 20) /* lowered large _BitInt */ +#define PROP_last_full_fold (1 << 21) /* Start of last forwprop. */ #define PROP_gimple \ (PROP_gimple_any | PROP_gimple_lcf | PROP_gimple_leh | PROP_gimple_lomp) diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 2e09027db56f..0d62f2bf60db 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -4107,14 +4107,22 @@ class pass_forwprop : public gimple_opt_pass { public: pass_forwprop (gcc::context *ctxt) - : gimple_opt_pass (pass_data_forwprop, ctxt) + : gimple_opt_pass (pass_data_forwprop, ctxt), last_p (false) {} /* opt_pass methods: */ opt_pass * clone () final override { return new pass_forwprop (m_ctxt); } + void set_pass_param (unsigned int n, bool param) final override + { + gcc_assert (n == 0); + last_p = param; + } bool gate (function *) final override { return flag_tree_forwprop; } unsigned int execute (function *) final override; + private: + /* Determines whether the pass instance should set PROP_last_full_fold. */ + bool last_p; }; // class pass_forwprop unsigned int @@ -4123,6 +4131,8 @@ pass_forwprop::execute (function *fun) unsigned int todoflags = 0; cfg_changed = false; + if (last_p) + fun->curr_properties |= PROP_last_full_fold; calculate_dominance_info (CDI_DOMINATORS);