On Wed, 1 Jul 2026, Philipp Tomsich wrote: > visit_nary_op canonicalises (T)(A + C) into (T)A + (T)C for its VN > lookup, but not the reverse -- so whether VN discovers (T)A + C == > (T)(A + C) depends on which form it sees first. Add a match.pd rule > that rewrites (T)A +- CST into (T)(A +- CST') using the op! qualifier, > so the fold only fires when the narrow expression already has a value > number -- i.e. only inside VN via mprts_hook. > > Restrict to TYPE_OVERFLOW_UNDEFINED inner types: for unsigned inner the > narrow op wraps mod 2^prec (defined) while the widened outer op does > not, changing the observed value (bitfld-5.c is the concrete miscompile > when the guard is loosened). > > Use wi::min_precision (CST, SIGNED) rather than int_fits_type_p for the > fits-check, so sign-encoded small negatives (e.g. -1 as sizetype's > 0xFFFF...FFFF) qualify.
OK. Thanks, Richard. > PR tree-optimization/124545 > > gcc/ChangeLog: > > * match.pd: Add (T)A +- CST -> (T)(A +- CST') for widening > conversions from a signed inner type with undefined overflow. > > gcc/testsuite/ChangeLog: > > * gcc.dg/pr124545.c: New test. > * gcc.dg/pr124545-2.c: New test. > > Signed-off-by: Philipp Tomsich <[email protected]> > > --- > > gcc/match.pd | 32 ++++++++++++++++++ > gcc/testsuite/gcc.dg/pr124545-2.c | 55 +++++++++++++++++++++++++++++++ > gcc/testsuite/gcc.dg/pr124545.c | 29 ++++++++++++++++ > 3 files changed, 116 insertions(+) > create mode 100644 gcc/testsuite/gcc.dg/pr124545-2.c > create mode 100644 gcc/testsuite/gcc.dg/pr124545.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index ddf3b61638ce..817a52499128 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -4067,6 +4067,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (plus (convert @0) (op @2 (convert @1)))))) > #endif > > +/* Inverse of the above: (T)(A) +- CST -> (T)(A +- CST') when T is a > + widening conversion from a type with undefined overflow and the outer > + type wraps. This allows VN to discover that (T)A + (T)C == (T)(A + C) > + regardless of which form appears first in program order. PR124545. > + The rewrite is unsound for unsigned inner types: the narrow op wraps > + mod 2^prec (defined) while the widened op does not, changing the > + observed value. Cover the unsigned case separately once ranger can > + prove no wrap. */ > +#if GIMPLE > + (for op (plus minus) > + (simplify > + (op (convert @0) INTEGER_CST@1) > + (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE > + && TREE_CODE (type) == INTEGER_TYPE > + && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0)) > + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)) > + && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0)) > + && TYPE_OVERFLOW_WRAPS (type) > + /* CST must be the sign-extension of its low inner-precision bits, > + otherwise narrowing changes the value. Use min_precision (.., > + SIGNED) rather than int_fits_type_p so that small negative offsets > + encoded as large unsigned constants (e.g. -1 as sizetype) still > + qualify. */ > + && wi::min_precision (wi::to_wide (@1), SIGNED) > + <= TYPE_PRECISION (TREE_TYPE (@0))) > + (with { > + wide_int c1 = wi::to_wide (@1); > + tree inner_cst = wide_int_to_tree (TREE_TYPE (@0), > + wi::sext (c1, TYPE_PRECISION (TREE_TYPE (@0)))); } > + (convert (op! @0 { inner_cst; })))))) > +#endif > + > /* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be simplified > to a simple value. */ > (for op (plus minus) > diff --git a/gcc/testsuite/gcc.dg/pr124545-2.c > b/gcc/testsuite/gcc.dg/pr124545-2.c > new file mode 100644 > index 000000000000..b4806567acce > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr124545-2.c > @@ -0,0 +1,55 @@ > +/* PR tree-optimization/124545 */ > +/* Runtime correctness for the inverse-widening VN rewrite > + (T)A +- CST -> (T)(A +- CST'). The rewrite must never change the > + computed value. In particular it must NOT fire when CST is not > + representable in the inner type (which would silently drop the bits > + above the inner precision), and it must stay correct for unsigned > + inner types where the narrow operation wraps. */ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +/* CST = 2^32 does not fit in int: the value must be preserved. > + Before the fix this comparison folded to a constant 1. */ > +__attribute__((noipa)) int > +oor_eq (int a) > +{ > + return ((unsigned long long) a + 0x100000000ULL) == (unsigned long long) a; > +} > + > +__attribute__((noipa)) unsigned long long > +oor_val (int a) > +{ > + return (unsigned long long) a + 0x100000000ULL; > +} > + > +/* Unsigned inner: narrow add wraps mod 2^32; the widened add does not. > + The result must match the wide arithmetic for every input. */ > +__attribute__((noipa)) int > +uns_carry (unsigned int a) > +{ > + unsigned int t = a + 100u; > + unsigned long w = (unsigned long) a + 100; > + return w == (unsigned long) t; > +} > + > +/* Legitimate in-range case (matches the PR): k == j - 1, so the two > + loads are the same address and the rewrite may fire. */ > +__attribute__((noipa)) int > +inrange_eq (int *p, int j) > +{ > + int k = j - 1; > + return p[j - 1] == p[k]; > +} > + > +int > +main (void) > +{ > + if (oor_eq (5) != 0) __builtin_abort (); > + if (oor_eq (-1) != 0) __builtin_abort (); > + if (oor_val (5) != 5ULL + 0x100000000ULL) __builtin_abort (); > + if (uns_carry (0xfffffff0u) != 0) __builtin_abort (); > + if (uns_carry (10) != 1) __builtin_abort (); > + int arr[4] = { 7, 7, 7, 7 }; > + if (inrange_eq (arr, 2) != 1) __builtin_abort (); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.dg/pr124545.c b/gcc/testsuite/gcc.dg/pr124545.c > new file mode 100644 > index 000000000000..a21346b179c7 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr124545.c > @@ -0,0 +1,29 @@ > +/* PR tree-optimization/124545 */ > +/* Verify that VN recognizes (T)A + C == (T)(A + C') regardless of > + operand order in the equality comparison. */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-fre1" } */ > + > +int func1(int *a, int j) { > + int k = j - 1; > + return a[j - 1] == a[k]; > +} > + > +int func2(int *a, int j) { > + int k = j - 1; > + return a[k] == a[j - 1]; > +} > + > +int func3(int *a, int j) { > + int k = j - 3; > + return a[k] == a[j - 3]; > +} > + > +int func4(int *a, int j) { > + int k = j + 2; > + return a[k] == a[j + 2]; > +} > + > +/* All four functions should fold to return 1 after FRE. */ > +/* The pattern is not applied on ilp32 targets (PR116845). */ > +/* { dg-final { scan-tree-dump-times "return 1;" 4 "fre1" { xfail { ilp32 } > } } } */ > -- Richard Biener <[email protected]> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Jochen Jaser, Andrew McDonald; (HRB 36809, AG Nuernberg)
