On Wed, 1 Jul 2026, Philipp Tomsich wrote:

> visit_nary_op canonicalises (T)(A + C) into (T)A + (T)C for its VN
> lookup, but not the reverse -- so whether VN discovers (T)A + C ==
> (T)(A + C) depends on which form it sees first.  Add a match.pd rule
> that rewrites (T)A +- CST into (T)(A +- CST') using the op! qualifier,
> so the fold only fires when the narrow expression already has a value
> number -- i.e. only inside VN via mprts_hook.
> 
> Restrict to TYPE_OVERFLOW_UNDEFINED inner types: for unsigned inner the
> narrow op wraps mod 2^prec (defined) while the widened outer op does
> not, changing the observed value (bitfld-5.c is the concrete miscompile
> when the guard is loosened).
> 
> Use wi::min_precision (CST, SIGNED) rather than int_fits_type_p for the
> fits-check, so sign-encoded small negatives (e.g. -1 as sizetype's
> 0xFFFF...FFFF) qualify.

OK.

Thanks,
Richard.

>       PR tree-optimization/124545
> 
> gcc/ChangeLog:
> 
>       * match.pd: Add (T)A +- CST -> (T)(A +- CST') for widening
>       conversions from a signed inner type with undefined overflow.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.dg/pr124545.c: New test.
>       * gcc.dg/pr124545-2.c: New test.
> 
> Signed-off-by: Philipp Tomsich <[email protected]>
> 
> ---
> 
>  gcc/match.pd                      | 32 ++++++++++++++++++
>  gcc/testsuite/gcc.dg/pr124545-2.c | 55 +++++++++++++++++++++++++++++++
>  gcc/testsuite/gcc.dg/pr124545.c   | 29 ++++++++++++++++
>  3 files changed, 116 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr124545-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr124545.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index ddf3b61638ce..817a52499128 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -4067,6 +4067,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>         (plus (convert @0) (op @2 (convert @1))))))
>  #endif
>  
> +/* Inverse of the above: (T)(A) +- CST -> (T)(A +- CST') when T is a
> +   widening conversion from a type with undefined overflow and the outer
> +   type wraps.  This allows VN to discover that (T)A + (T)C == (T)(A + C)
> +   regardless of which form appears first in program order.  PR124545.
> +   The rewrite is unsound for unsigned inner types: the narrow op wraps
> +   mod 2^prec (defined) while the widened op does not, changing the
> +   observed value.  Cover the unsigned case separately once ranger can
> +   prove no wrap.  */
> +#if GIMPLE
> +  (for op (plus minus)
> +   (simplify
> +    (op (convert @0) INTEGER_CST@1)
> +     (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
> +       && TREE_CODE (type) == INTEGER_TYPE
> +       && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
> +       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> +       && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
> +       && TYPE_OVERFLOW_WRAPS (type)
> +       /* CST must be the sign-extension of its low inner-precision bits,
> +          otherwise narrowing changes the value.  Use min_precision (..,
> +          SIGNED) rather than int_fits_type_p so that small negative offsets
> +          encoded as large unsigned constants (e.g. -1 as sizetype) still
> +          qualify.  */
> +       && wi::min_precision (wi::to_wide (@1), SIGNED)
> +          <= TYPE_PRECISION (TREE_TYPE (@0)))
> +       (with {
> +       wide_int c1 = wi::to_wide (@1);
> +       tree inner_cst = wide_int_to_tree (TREE_TYPE (@0),
> +                          wi::sext (c1, TYPE_PRECISION (TREE_TYPE (@0)))); }
> +     (convert (op! @0 { inner_cst; }))))))
> +#endif
> +
>  /* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be simplified
>     to a simple value.  */
>    (for op (plus minus)
> diff --git a/gcc/testsuite/gcc.dg/pr124545-2.c 
> b/gcc/testsuite/gcc.dg/pr124545-2.c
> new file mode 100644
> index 000000000000..b4806567acce
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr124545-2.c
> @@ -0,0 +1,55 @@
> +/* PR tree-optimization/124545 */
> +/* Runtime correctness for the inverse-widening VN rewrite
> +   (T)A +- CST -> (T)(A +- CST').  The rewrite must never change the
> +   computed value.  In particular it must NOT fire when CST is not
> +   representable in the inner type (which would silently drop the bits
> +   above the inner precision), and it must stay correct for unsigned
> +   inner types where the narrow operation wraps.  */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +/* CST = 2^32 does not fit in int: the value must be preserved.
> +   Before the fix this comparison folded to a constant 1.  */
> +__attribute__((noipa)) int
> +oor_eq (int a)
> +{
> +  return ((unsigned long long) a + 0x100000000ULL) == (unsigned long long) a;
> +}
> +
> +__attribute__((noipa)) unsigned long long
> +oor_val (int a)
> +{
> +  return (unsigned long long) a + 0x100000000ULL;
> +}
> +
> +/* Unsigned inner: narrow add wraps mod 2^32; the widened add does not.
> +   The result must match the wide arithmetic for every input.  */
> +__attribute__((noipa)) int
> +uns_carry (unsigned int a)
> +{
> +  unsigned int t = a + 100u;
> +  unsigned long w = (unsigned long) a + 100;
> +  return w == (unsigned long) t;
> +}
> +
> +/* Legitimate in-range case (matches the PR): k == j - 1, so the two
> +   loads are the same address and the rewrite may fire.  */
> +__attribute__((noipa)) int
> +inrange_eq (int *p, int j)
> +{
> +  int k = j - 1;
> +  return p[j - 1] == p[k];
> +}
> +
> +int
> +main (void)
> +{
> +  if (oor_eq (5) != 0) __builtin_abort ();
> +  if (oor_eq (-1) != 0) __builtin_abort ();
> +  if (oor_val (5) != 5ULL + 0x100000000ULL) __builtin_abort ();
> +  if (uns_carry (0xfffffff0u) != 0) __builtin_abort ();
> +  if (uns_carry (10) != 1) __builtin_abort ();
> +  int arr[4] = { 7, 7, 7, 7 };
> +  if (inrange_eq (arr, 2) != 1) __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/pr124545.c b/gcc/testsuite/gcc.dg/pr124545.c
> new file mode 100644
> index 000000000000..a21346b179c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr124545.c
> @@ -0,0 +1,29 @@
> +/* PR tree-optimization/124545 */
> +/* Verify that VN recognizes (T)A + C == (T)(A + C') regardless of
> +   operand order in the equality comparison.  */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-fre1" } */
> +
> +int func1(int *a, int j) {
> +  int k = j - 1;
> +  return a[j - 1] == a[k];
> +}
> +
> +int func2(int *a, int j) {
> +  int k = j - 1;
> +  return a[k] == a[j - 1];
> +}
> +
> +int func3(int *a, int j) {
> +  int k = j - 3;
> +  return a[k] == a[j - 3];
> +}
> +
> +int func4(int *a, int j) {
> +  int k = j + 2;
> +  return a[k] == a[j + 2];
> +}
> +
> +/* All four functions should fold to return 1 after FRE.  */
> +/* The pattern is not applied on ilp32 targets (PR116845).  */
> +/* { dg-final { scan-tree-dump-times "return 1;" 4 "fre1" { xfail { ilp32 } 
> } } } */
> 

-- 
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald; (HRB 36809, AG Nuernberg)

Reply via email to