https://gcc.gnu.org/g:ec5349c37afe972ee79b777ee749630b1a0a007e

commit r16-905-gec5349c37afe972ee79b777ee749630b1a0a007e
Author: Konstantinos Eleftheriou <konstantinos.elefther...@vrull.eu>
Date:   Mon May 19 13:00:05 2025 +0200

    asf: Fix calling of emit_move_insn on registers of different modes 
[PR119884]
    
    This patch uses `lowpart_subreg` for the base register initialization,
    instead of zero-extending it. We had tried this solution before, but
    we were leaving undefined bytes in the upper part of the register.
    This shouldn't be happening as we are supposed to write the whole
    register when the load is eliminated. This was occurring when having
    multiple stores with the same offset as the load, generating a
    register move for all of them, overwriting the bit inserts that were
    inserted before them.
    
    In order to overcome this, we are removing redundant stores from the
    sequence, i.e. stores that write to addresses that will be overwritten
    by stores that come after them in the sequence. We are using the same
    bitmap that is used for the load elimination check, to keep track of
    the bytes that are written by each store.
    
    Also, we are now allowing the load to be eliminated even when there
    are overlaps between the stores, as there is no obvious reason why we
    shouldn't do that, we just want the stores to cover all of the load's
    bytes.
    
    Bootstrapped/regtested on AArch64 and x86_64.
    
            PR rtl-optimization/119884
    
    gcc/ChangeLog:
    
            * avoid-store-forwarding.cc (process_store_forwarding):
            Use `lowpart_subreg` for the base register initialization
            and remove redundant stores from the store/load sequence.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr119884.c: New test.

Diff:
---
 gcc/avoid-store-forwarding.cc            | 51 +++++++++++++++++++++++++-------
 gcc/testsuite/gcc.target/i386/pr119884.c | 13 ++++++++
 2 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc
index 5d960adec359..6825d0426ecc 100644
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -176,20 +176,28 @@ process_store_forwarding (vec<store_fwd_info> &stores, 
rtx_insn *load_insn,
   /* Memory sizes should be constants at this stage.  */
   HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
 
-  /* If the stores cover all the bytes of the load without overlap then we can
-     eliminate the load entirely and use the computed value instead.  */
+  /* If the stores cover all the bytes of the load, then we can eliminate
+     the load entirely and use the computed value instead.
+     We can also eliminate stores on addresses that are overwritten
+     by later stores.  */
 
   sbitmap forwarded_bytes = sbitmap_alloc (load_size);
   bitmap_clear (forwarded_bytes);
 
   unsigned int i;
   store_fwd_info* it;
+  auto_vec<store_fwd_info> redundant_stores;
+  auto_vec<int> store_ind_to_remove;
   FOR_EACH_VEC_ELT (stores, i, it)
     {
       HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
-      if (bitmap_bit_in_range_p (forwarded_bytes, it->offset,
-                                it->offset + store_size - 1))
-       break;
+      if (bitmap_all_bits_in_range_p (forwarded_bytes, it->offset,
+                                     it->offset + store_size - 1))
+       {
+         redundant_stores.safe_push (*it);
+         store_ind_to_remove.safe_push (i);
+         continue;
+       }
       bitmap_set_range (forwarded_bytes, it->offset, store_size);
     }
 
@@ -215,6 +223,15 @@ process_store_forwarding (vec<store_fwd_info> &stores, 
rtx_insn *load_insn,
        fprintf (dump_file, "(Load elimination candidate)\n");
     }
 
+  /* Remove redundant stores from the vector.  Although this is quadratic,
+     there doesn't seem to be much point optimizing it.  The number of
+     redundant stores is expected to be low and the length of the list is
+     limited by a --param.  The dependence checking that we did earlier is
+     also quadratic in the size of this list.  */
+  store_ind_to_remove.reverse ();
+  for (int i : store_ind_to_remove)
+    stores.ordered_remove (i);
+
   rtx load = single_set (load_insn);
   rtx dest;
 
@@ -231,18 +248,16 @@ process_store_forwarding (vec<store_fwd_info> &stores, 
rtx_insn *load_insn,
     {
       it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem));
       rtx_insn *insns = NULL;
+      const bool has_zero_offset = it->offset == 0;
 
       /* If we're eliminating the load then find the store with zero offset
         and use it as the base register to avoid a bit insert if possible.  */
-      if (load_elim && it->offset == 0)
+      if (load_elim && has_zero_offset)
        {
          start_sequence ();
 
-         machine_mode dest_mode = GET_MODE (dest);
-         rtx base_reg = it->mov_reg;
-         if (known_gt (GET_MODE_BITSIZE (dest_mode),
-                       GET_MODE_BITSIZE (GET_MODE (it->mov_reg))))
-           base_reg = gen_rtx_ZERO_EXTEND (dest_mode, it->mov_reg);
+         rtx base_reg = lowpart_subreg (GET_MODE (dest), it->mov_reg,
+                                        GET_MODE (it->mov_reg));
 
          if (base_reg)
            {
@@ -380,6 +395,16 @@ process_store_forwarding (vec<store_fwd_info> &stores, 
rtx_insn *load_insn,
              print_rtl_single (dump_file, insn);
            }
        }
+
+      if (redundant_stores.length () > 0)
+       {
+         fprintf (dump_file, "\nRedundant stores that have been removed:\n");
+         FOR_EACH_VEC_ELT (redundant_stores, i, it)
+           {
+             fprintf (dump_file, "  ");
+             print_rtl_single (dump_file, it->store_insn);
+           }
+       }
     }
 
   stats_sf_avoided++;
@@ -399,6 +424,10 @@ process_store_forwarding (vec<store_fwd_info> &stores, 
rtx_insn *load_insn,
       delete_insn (it->store_insn);
     }
 
+  /* Delete redundant stores.  */
+  FOR_EACH_VEC_ELT (redundant_stores, i, it)
+    delete_insn (it->store_insn);
+
   df_insn_rescan (load_insn);
 
   if (load_elim)
diff --git a/gcc/testsuite/gcc.target/i386/pr119884.c 
b/gcc/testsuite/gcc.target/i386/pr119884.c
new file mode 100644
index 000000000000..34d5b689244d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr119884.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-dse -favoid-store-forwarding" } */
+
+typedef __attribute__((__vector_size__(64))) char V;
+char c;
+V v;
+
+char
+foo()
+{
+  v *= c;
+  return v[0];
+}
\ No newline at end of file

Reply via email to