https://gcc.gnu.org/g:524fedd7f658f9c57e5f230f21cadf406c5d5011

commit r15-6439-g524fedd7f658f9c57e5f230f21cadf406c5d5011
Author: Maciej W. Rozycki <ma...@orcam.me.uk>
Date:   Wed Dec 25 22:23:40 2024 +0000

    Alpha: Fix offset adjustment in unaligned access helpers
    
    Correct the offset adjustment made in the multi-word unaligned access
    helpers such that it is actually used by the unaligned load and store
    instructions, fixing a bug introduced with commit 1eb356b98df2 ("alpha
    gprel optimizations")[1] back in 2001, which replaced address changes
    made directly according to the argument of the MEM expression passed
    with one made according to an address previously extracted from said MEM
    expression.  The address is however incorrectly extracted from said MEM
    before an adjustment has been made to it for the offset supplied.
    
    This bug is usually covered by the fact that our block move and clear
    operations are hardly ever provided with correct block alignment data
    and we also usually fail to fetch that information from the MEM supplied
    (although PR target/115459 shows it does happen sometimes).  Instead the
    bit alignment of 8 is usually conservatively used, meaning that a zero
    offset is passed to `alpha_expand_unaligned_store_words' and then code
    has been written such that neither `alpha_expand_unaligned_load_words'
    nor `alpha_expand_unaligned_store_words' cannot ever be called with
    nonzero offset from `alpha_expand_block_move'.
    
    The only situation where `alpha_expand_unaligned_store_words' can be
    called with nonzero offset is from `alpha_expand_block_clear' with a BWX
    target for a misaligned block that has been embedded in a data object of
    a higher alignment such that there is a small unaligned prefix our code
    decides to handle so as to align further stores.
    
    For instance it happens when a block clear is called for a block of 9
    bytes embedded at offset 1 in a structure aligned to a 2-byte word, as
    illustrated by the test case included.  Now this test case does not work
    without the change that comes next applied, because the backend cannot
    see the word alignment of the struct and uses the bit alignment of 8
    instead.
    
    Should this change be swapped with the next one incorrect code such as:
    
            stb $31,1($16)
            lda $3,1($16)
            ldq_u $2,8($16)
            ldq_u $1,1($16)
            mskqh $2,$3,$2
            stq_u $2,8($16)
            mskql $1,$3,$1
            stq_u $1,1($16)
    
    would be produced, where the unadjusted offsets of 1/8 can be seen with
    the LDQ_U/STQ_U operations along with byte masks calculated accordingly
    rather than the expected offsets of 2/9.  As a result the byte at the
    offset of 9 fails to get cleared.  In these circumstances this would
    also show as execution failures with the memclr.c test:
    
    FAIL: gcc.c-torture/execute/memclr.c   -O1  execution test
    FAIL: gcc.c-torture/execute/memclr.c   -Os  execution test
    
    -- not at `-O0' though, as the higher alignment cannot be retrieved in
    that case, and then not at `-O2' or higher optimization levels either,
    because then we choose to open-code this block clear instead:
    
            ldbu $1,0($16)
            stw $31,8($16)
            stq $1,0($16)
    
    avoiding the bug in `alpha_expand_unaligned_store_words'.
    
    I am leaving the pattern match test case XFAIL-ed here for documentation
    purposes and it will be un-XFAIL-ed along with the fix to retrieve the
    correct alignment.  The run test is of course never expected to fail.
    
    References:
    
    [1] 
<https://inbox.sourceware.org/gcc-patches/20010909020708.a29...@twiddle.net/>
    
            gcc/
            * config/alpha/alpha.cc (alpha_expand_unaligned_load_words):
            Move address extraction until after the MEM referred has been
            adjusted for the offset supplied.
            (alpha_expand_unaligned_store_words): Likewise.
    
            gcc/testsuite/
            * gcc.target/alpha/memclr-a2-o1-c9-ptr.c: New file.
            * gcc.target/alpha/memclr-a2-o1-c9-run.c: New file.

Diff:
---
 gcc/config/alpha/alpha.cc                          | 16 +++----
 .../gcc.target/alpha/memclr-a2-o1-c9-ptr.c         | 50 ++++++++++++++++++++++
 .../gcc.target/alpha/memclr-a2-o1-c9-run.c         | 25 +++++++++++
 3 files changed, 83 insertions(+), 8 deletions(-)

diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index 7c28743f2ee3..07753297c387 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -3625,10 +3625,6 @@ alpha_expand_unaligned_load_words (rtx *out_regs, rtx 
smem,
   rtx sreg, areg, tmp, smema;
   HOST_WIDE_INT i;
 
-  smema = XEXP (smem, 0);
-  if (GET_CODE (smema) == LO_SUM)
-    smema = force_reg (Pmode, smema);
-
   /* Generate all the tmp registers we need.  */
   for (i = 0; i < words; ++i)
     {
@@ -3640,6 +3636,10 @@ alpha_expand_unaligned_load_words (rtx *out_regs, rtx 
smem,
   if (ofs != 0)
     smem = adjust_address (smem, GET_MODE (smem), ofs);
 
+  smema = XEXP (smem, 0);
+  if (GET_CODE (smema) == LO_SUM)
+    smema = force_reg (Pmode, smema);
+
   /* Load up all of the source data.  */
   for (i = 0; i < words; ++i)
     {
@@ -3698,10 +3698,6 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx 
dmem,
   rtx st_addr_1, st_addr_2, dmema;
   HOST_WIDE_INT i;
 
-  dmema = XEXP (dmem, 0);
-  if (GET_CODE (dmema) == LO_SUM)
-    dmema = force_reg (Pmode, dmema);
-
   /* Generate all the tmp registers we need.  */
   if (data_regs != NULL)
     for (i = 0; i < words; ++i)
@@ -3712,6 +3708,10 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx 
dmem,
   if (ofs != 0)
     dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
 
+  dmema = XEXP (dmem, 0);
+  if (GET_CODE (dmema) == LO_SUM)
+    dmema = force_reg (Pmode, dmema);
+
   st_addr_2 = change_address (dmem, DImode,
                              gen_rtx_AND (DImode,
                                           plus_constant (DImode, dmema,
diff --git a/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c 
b/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c
new file mode 100644
index 000000000000..06d0f0beffbc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-mbwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+typedef unsigned int __attribute__ ((mode (QI))) int08_t;
+typedef unsigned int __attribute__ ((mode (HI))) int16_t;
+
+typedef struct
+  {
+    int08_t v[9];
+  }
+s_t;
+
+typedef union
+  {
+    struct
+      {
+       int08_t c[1];
+       s_t s;
+       int08_t d[6];
+      };
+    int16_t a;
+  }
+u_t;
+
+void __attribute__ ((noinline))
+memclr_a2_o1_c9 (u_t *u)
+{
+  u->s = (s_t) { 0 };
+}
+
+/* Expect assembly such as:
+
+       ldq_u $2,9($16)
+       stb $31,1($16)
+       lda $3,2($16)
+       ldq_u $1,2($16)
+       mskqh $2,$3,$2
+       stq_u $2,9($16)
+       mskql $1,$3,$1
+       stq_u $1,2($16)
+
+   that is with a byte store at offset 1 and with two unaligned load/store
+   pairs at offsets 2 and 9 each.  */
+
+/* { dg-final { scan-assembler-times 
"\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times 
"\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 { 
xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times 
"\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times 
"\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-run.c 
b/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-run.c
new file mode 100644
index 000000000000..43ba14701d41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-run.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "" } */
+
+#include "memclr-a2-o1-c9-ptr.c"
+
+u_t u = { { { [0] = 0xaa }, {{ [0 ... 8] = 0xaa }}, { [0 ... 5] = 0xaa } } };
+
+int
+main (void)
+{
+  int i;
+
+  memclr_a2_o1_c9 (&u);
+  asm ("" : : : "memory");
+  for (i = 0; i < sizeof (u.c); i++)
+    if (u.c[i] != 0xaa)
+      __builtin_abort ();
+  for (i = 0; i < sizeof (u.s.v); i++)
+    if (u.s.v[i] != 0x00)
+      __builtin_abort ();
+  for (i = 0; i < sizeof (u.d); i++)
+    if (u.d[i] != 0xaa)
+      __builtin_abort ();
+  return 0;
+}

Reply via email to