Dear all, Here's my patch for PR120265. Bootstrapped and tested on aarch64 that it causes no regressions. I also added a testcase. I'd be grateful if you could commit it.
Otherwise, feedback to improve it is welcome. Many thanks MCCCS From 1e901c3fa5c8cc3e55d4f1715b4aae4ae3d66714 Mon Sep 17 00:00:00 2001 From: MCCCS <mc...@gmx.com> Date: Thu, 15 May 2025 09:16:49 +0100 Subject: [PATCH] tree-optimization/120265 - Optimize modular counters This PR is about replacing trunc_mod with with a simpler expression given the bounds of variables. PR tree-optimization/120265 * match.pd: X % M -> X for X in 0 to M-1 X % M -> (X == M) ? 0 : X for X in 0 to M X % M -> (X >= M) ? (X - M) : X for X in 0 to 2*M-1. * gcc.dg/pr120265.c. New testcase. --- gcc/match.pd | 27 ++++++++++++++++++++ gcc/testsuite/gcc.dg/pr120265.c | 44 +++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/pr120265.c diff --git a/gcc/match.pd b/gcc/match.pd index 79485f9678a..bd8950b4e10 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5602,6 +5602,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) optab_vector))) (eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); }))) +#if GIMPLE +/* X % M -> X for X in 0 to M-1. */ +/* X % M -> (X == M) ? 0 : X for X in 0 to M. */ +/* X % M -> (X >= M) ? (X - M) : X for X in 0 to 2*M-1. */ +(simplify + (trunc_mod @0 @1) + (with { int_range_max vr0, vr1; } + (if (get_range_query (cfun)->range_of_expr (vr0, @0) + && get_range_query (cfun)->range_of_expr (vr1, @1) + && !vr0.undefined_p () + && !vr1.undefined_p () + && !integer_zerop (@1) + && (TYPE_UNSIGNED (type) + || (vr0.nonnegative_p () && vr1.nonnegative_p ()))) + (with + { wide_int twice = 2 * vr1.lower_bound (); } + (switch + (if (wi::gtu_p (vr1.lower_bound (), vr0.upper_bound ())) + @0) + (if (wi::geu_p (vr1.lower_bound (), vr0.upper_bound ())) + (cond (eq @0 @1) + { build_zero_cst (type); } + @0)) + (if (wi::gtu_p (twice, vr0.upper_bound ())) + (cond (ge @0 @1) (minus @0 @1) @0))))))) +#endif + /* ((X /[ex] C1) +- C2) * (C1 * C3) --> (X * C3) +- (C1 * C2 * C3). */ (for op (plus minus) (simplify diff --git a/gcc/testsuite/gcc.dg/pr120265.c b/gcc/testsuite/gcc.dg/pr120265.c new file mode 100644 index 00000000000..2634af36226 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr120265.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized" } */ +__attribute__((noipa)) void g(int r) +{ + (void) r; +} + +int x; + +void a(void) +{ + unsigned m = 0; + for(int i = 0; i < 300; i++) + { + m++; + m %= 600; + g(m); + } +} + +void b(void) +{ + unsigned m = 0; + for(int i = 0; i < x; i++) + { + m++; + m %= 600; + g(m); + } +} + +void c(void) +{ + unsigned m = 0; + for(int i = 0; i < x; i++) + { + m += 7; + m %= 600; + g(m); + } +} + +/* { dg-final { scan-tree-dump-not "% 600" "optimized" } } */ + -- 2.45.2