https://gcc.gnu.org/g:24336625bb26102f393c8c58d63c1cb0f24cbb1a

commit r16-8398-g24336625bb26102f393c8c58d63c1cb0f24cbb1a
Author: Vladimir N. Makarov <[email protected]>
Date:   Wed Apr 1 15:08:01 2026 -0400

    [PR124696, LRA]: Fix LRA cycle involving reloads, secondary memory reloads, 
and subreg reloads
    
    The code for reloading the register inside paradoxical subreg is assumed
    to require that the subreg is located in more than one hard reg.  But the 
check was
    omitted and this resulted in LRA cycling through series of reloads,
    secondary memory reloads, and subreg reloads.  The patch fixes this.
    
    gcc/ChangeLog:
    
            PR rtl-optimization/124696
            * lra-constraints.cc (simplify_operand_subreg): Reload
            paradoxical subreg only if it requires more than one hard reg.
    
    gcc/testsuite/ChangeLog:
    
            PR rtl-optimization/124696
            * gcc.target/i386/pr124696.c: New.

Diff:
---
 gcc/lra-constraints.cc                   |  1 +
 gcc/testsuite/gcc.target/i386/pr124696.c | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index e24b56675eb7..8526a89f9ec7 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -1996,6 +1996,7 @@ simplify_operand_subreg (int nop, machine_mode reg_mode)
           && REGNO (reg) >= FIRST_PSEUDO_REGISTER
           && paradoxical_subreg_p (operand)
           && (inner_hard_regno = lra_get_regno_hard_regno (REGNO (reg))) >= 0
+          && hard_regno_nregs (inner_hard_regno, mode) > 1
           && ((hard_regno
                = simplify_subreg_regno (inner_hard_regno, innermode,
                                         SUBREG_BYTE (operand), mode)) < 0
diff --git a/gcc/testsuite/gcc.target/i386/pr124696.c 
b/gcc/testsuite/gcc.target/i386/pr124696.c
new file mode 100644
index 000000000000..19474edbdc26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr124696.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O -w -mno-sse2 -mstringop-strategy=loop 
--param=hot-bb-frequency-fraction=0" } */
+typedef char U __attribute__((__vector_size__ (64)));
+typedef __int128 V __attribute__((__vector_size__ (64)));
+typedef short W __attribute__((__vector_size__ (64)));
+
+U u;
+V v;
+W w;
+
+V
+foo (int i, __int128 y)
+{
+  w = (W){};
+  u -= (U)(W){1, i, i};
+  return i + y + (V) u + v;
+}

Reply via email to