https://gcc.gnu.org/g:c86e1c54c6f8771d08a8c070717b80607f990f8a

commit r15-6464-gc86e1c54c6f8771d08a8c070717b80607f990f8a
Author: kelefth <konstantinos.elefther...@vrull.eu>
Date:   Mon Dec 16 14:36:59 2024 +0100

    avoid-store-forwarding: fix reg init on load-eliminiation [PR117835]
    
    During the initialization of the base register for the zero-offset
    store, in the case that we are eliminating the load, we used a
    paradoxical subreg assuming that we don't care about the higher bits
    of the register. This led to writing wrong values when we were not
    updating the whole register.
    
    This patch fixes the issue by zero-extending the value stored in the
    base register instead of using a paradoxical subreg.
    
    Bootstrapped/regtested on x86 and AArch64.
    
            PR rtl-optimization/117835
            PR rtl-optimization/117872
    
    gcc/ChangeLog:
    
            * avoid-store-forwarding.cc
            (store_forwarding_analyzer::process_store_forwarding):
            Zero-extend the value stored in the base register instead of
            using a paradoxical subreg.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr117835.c: New test.

Diff:
---
 gcc/avoid-store-forwarding.cc            |  6 +-----
 gcc/testsuite/gcc.target/i386/pr117835.c | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc
index 1b8c35bc6cb7..fa83e10fedca 100644
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -238,11 +238,7 @@ process_store_forwarding (vec<store_fwd_info> &stores, 
rtx_insn *load_insn,
        {
          start_sequence ();
 
-         /* We can use a paradoxical subreg to force this to a wider mode, as
-            the only use will be inserting the bits (i.e., we don't care about
-            the value of the higher bits).  */
-         rtx ext0 = lowpart_subreg (GET_MODE (dest), it->mov_reg,
-                                    GET_MODE (it->mov_reg));
+         rtx ext0 = gen_rtx_ZERO_EXTEND (GET_MODE (dest), it->mov_reg);
          if (ext0)
            {
              rtx_insn *move0 = emit_move_insn (dest, ext0);
diff --git a/gcc/testsuite/gcc.target/i386/pr117835.c 
b/gcc/testsuite/gcc.target/i386/pr117835.c
new file mode 100644
index 000000000000..eac71aac916b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117835.c
@@ -0,0 +1,20 @@
+/* { dg-do run } */
+/* { dg-options "-O -favoid-store-forwarding -mno-push-args 
--param=store-forwarding-max-distance=0 -Wno-psabi" } */
+
+typedef __attribute__((__vector_size__ (64))) unsigned short V;
+
+__attribute__((__noipa__)) V
+foo (V v, V)
+{
+  return v;
+}
+
+int main ()
+{
+  V a = (V){3, 5, 0, 8, 9, 3, 5, 1, 3, 4, 2, 5, 5, 0, 5, 3, 61886};
+  V b = (V){6, 80, 15, 2, 2, 1, 1, 3, 5};
+  V x = foo (a, b);
+  for (unsigned i = 0; i < sizeof(x)/sizeof(x[0]); i++)
+    if (x[i] != a[i])
+      __builtin_abort();
+}
\ No newline at end of file

Reply via email to