https://gcc.gnu.org/g:7841513ff437829304619a750a550a41d6918cc4
commit 7841513ff437829304619a750a550a41d6918cc4 Author: Jeff Law <j...@ventanamicro.com> Date: Mon Oct 21 13:37:21 2024 -0600 [committed][PR rtl-optimization/116488] Fix SIGN_EXTEND source handling in ext-dce A while back I noticed that the code to call carry_backpropagate was being called after the optimization step. Which seemed wrong, but at the time I didn't have a testcase showing it as a problem. Now I have 4 :-) The way things used to work, the extension would be stripped away before calling carry_backpropagte, meaning carry_backpropagate would never see a SIGN_EXTENSION. Thus the code trying to account for the sign extended bit was never reached. Getting that bit marked live is what's needed to fix these testcases. Fallout is minor with just an adjustment needed to sensibly deal with vector modes in a place where we didn't have them before. I'm still somewhat concerned about this code. Specifically whether or not we can get in here with arbitrarily complex RTL, and if so do we need to recurse down and look at those sub-expressions. So while this patch fixes the most pressing issue, I wouldn't be terribly surprised if we're back inside this code at some point. Bootstrapped and regression tested on x86_64, ppc64le, riscv64, s390x, mips64, loongarch, aarch64, m68k, alpha, hppa, sh4, sh4eb, perhaps something else that I've forgotten... Also tested on all the crosses in my tester. PR rtl-optimization/116488 PR rtl-optimization/116579 PR rtl-optimization/116915 PR rtl-optimization/117226 gcc/ * ext-dce.cc (carry_backpropagate): Properly handle SIGN_EXTEND, add ZERO_EXTEND handling as well. (ext_dce_process_uses): Call carry_backpropagate before the optimization step. gcc/testsuite/ * gcc.dg/torture/pr116488.c: New test. * gcc.dg/torture/pr116579.c: New test. * gcc.dg/torture/pr116915.c: New test. * gcc.dg/torture/pr117226.c: New test. (cherry picked from commit 36e91df7716d34aa5694533837551593ec28f22b) Diff: --- gcc/ext-dce.cc | 34 +++++++++++++++++++++++++++------ gcc/testsuite/gcc.dg/torture/pr116488.c | 20 +++++++++++++++++++ gcc/testsuite/gcc.dg/torture/pr116579.c | 18 +++++++++++++++++ gcc/testsuite/gcc.dg/torture/pr116915.c | 15 +++++++++++++++ gcc/testsuite/gcc.dg/torture/pr117226.c | 17 +++++++++++++++++ 5 files changed, 98 insertions(+), 6 deletions(-) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index 2f3514ae7976..a449b9f6b49c 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -478,7 +478,12 @@ binop_implies_op2_fully_live (rtx_code code) holds true, and bits set in MASK are live in the result. Compute a mask of (potentially) live bits in the non-constant inputs. In case of binop_implies_op2_fully_live (e.g. shifts), the computed mask may - exclusively pertain to the first operand. */ + exclusively pertain to the first operand. + + This looks wrong as we may have some important operations embedded as + operands of another operation. For example, we might have an extension + wrapping a shift. It really feels like this needs to be recursing down + into operands much more often. */ unsigned HOST_WIDE_INT carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x) @@ -557,9 +562,26 @@ carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x) return mmask; case SIGN_EXTEND: - if (mask & ~GET_MODE_MASK (GET_MODE_INNER (GET_MODE (XEXP (x, 0))))) + if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant () + || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ()) + return -1; + + /* We want the mode of the inner object. We need to ensure its + sign bit is on in MASK. */ + mode = GET_MODE (XEXP (x, 0)); + if (mask & ~GET_MODE_MASK (GET_MODE_INNER (mode))) mask |= 1ULL << (GET_MODE_BITSIZE (mode).to_constant () - 1); - return mask; + + /* Recurse into the operand. */ + return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0)); + + case ZERO_EXTEND: + if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant () + || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ()) + return -1; + + /* Recurse into the operand. */ + return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0)); /* We propagate for the shifted operand, but not the shift count. The count is handled specially. */ @@ -670,6 +692,8 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, if (skipped_dest) dst_mask = -1; + dst_mask = carry_backpropagate (dst_mask, code, src); + /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT of the source specially to improve optimization. */ if (code == SIGN_EXTEND || code == ZERO_EXTEND) @@ -696,9 +720,7 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, /* Optimization is done at this point. We just want to make sure everything that should get marked as live is marked - from here onward. Shouldn't the backpropagate step happen - before optimization? */ - dst_mask = carry_backpropagate (dst_mask, code, src); + from here onward. */ /* We will handle the other operand of a binary operator at the bottom of the loop by resetting Y. */ diff --git a/gcc/testsuite/gcc.dg/torture/pr116488.c b/gcc/testsuite/gcc.dg/torture/pr116488.c new file mode 100644 index 000000000000..9ead1298eb14 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116488.c @@ -0,0 +1,20 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fno-forward-propagate" } */ +int a, b; +char c, e; +unsigned char d; +__attribute__ ((noinline,noclone,noipa)) +void f(int g, short h) { + for (; a < 2; a++) { + b = h >> 16; + e = b * h; + d = h; + h = c = d % g; + } +} +int main() +{ + f(129, 128); + if (b != -1) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.dg/torture/pr116579.c b/gcc/testsuite/gcc.dg/torture/pr116579.c new file mode 100644 index 000000000000..36d57784b665 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116579.c @@ -0,0 +1,18 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fno-thread-jumps" } */ + + +int a, c, d, e; +char b; +long f; +int main() { + unsigned char g; + int h = -2, i = 0; + g = a + 128; + d = 0 <= (c || (h = g)); + f = h < 0 || i >= 32 ? h : 0; + (b = h) | (e = f); + if (e != 0) + __builtin_abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/pr116915.c b/gcc/testsuite/gcc.dg/torture/pr116915.c new file mode 100644 index 000000000000..9368113b364a --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116915.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ + +long a, b, *c = &b; +short d, e; +int main() { + int f = 0; + for (; f != 1; f = (short)(f - 1)) { + d = -f; + e = a && e; + *c = 0 > f; + } + if (b != 0) + __builtin_abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/pr117226.c b/gcc/testsuite/gcc.dg/torture/pr117226.c new file mode 100644 index 000000000000..2bb35a12b2b9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr117226.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-require-effective-target int32 } */ +/* { dg-additional-options "-fno-tree-forwprop" } */ + +int a = 128, b, d; +long e = -2, c; +int main() { + char f = a; + int g = f; + c = (g < 0) - e; + unsigned char h = g; + b = h % c; + d = 9000000000000 << b; + if (d > 1) + __builtin_abort(); + return 0; +}