https://gcc.gnu.org/g:e0dae4da4c45e3959b0624551f80283c45a60446

commit r15-6440-ge0dae4da4c45e3959b0624551f80283c45a60446
Author: Maciej W. Rozycki <ma...@orcam.me.uk>
Date:   Wed Dec 25 22:23:40 2024 +0000

    Alpha: Also use tree information to get base block alignment
    
    We hardly ever emit code using machine instructions for aligned memory
    accesses for block move and clear operation and the reason for this
    appears to be that suboptimal alignment is often passed by the caller
    and then we only try to find a better alignment by checking pseudo
    register pointer alignment information, and from observation it's most
    often only set for stack frame references.
    
    This code originates from before Tree SSA days and we can do better
    nowadays, by looking up the original tree node associated with a MEM
    RTL, so implement this approach, factoring out repeating code from
    `alpha_expand_block_move' and `alpha_expand_block_clear' to a new
    function.
    
    In some cases howewer tree information is not available while pointer
    alignment is, such as with the case concerned with PR target/115459,
    where we have:
    
    (gdb) pr orig_src
    (mem:BLK (plus:DI (reg/f:DI 65 virtual-stack-vars [ lock.206_2 ])
            (const_int 8368 [0x20b0])) [8  S18 A8])
    (gdb) pr orig_dst
    (mem/j/c:BLK (plus:DI (reg/f:DI 65 virtual-stack-vars [ lock.206_2 ])
            (const_int 8208 [0x2010])) [8 MEM[(struct 
gnat__debug_pools__print_info_stdout__internal__L_18__B1182b__S1183b___PAD 
*)_339].F[1 ...]{lb: 1 sz: 1}+0 S18 A128])
    (gdb)
    
    showing no tree information and the alignment of 8 only for `orig_src',
    while indeed REGNO_POINTER_ALIGN returns 128 for pseudo 65.  So retain
    the old approach and return the largest alignment determined and its
    associated offset.
    
    Add test cases accordingly and remove XFAILs from memclr-a2-o1-c9-ptr.c
    now that it does get aligned code produced now.
    
            gcc/
            * config/alpha/alpha.cc
            (alpha_get_mem_rtx_alignment_and_offset): New function.
            (alpha_expand_block_move, alpha_expand_block_clear): Use it for
            alignment retrieval.
    
            gcc/testsuite/
            * gcc.target/alpha/memclr-a2-o1-c9-ptr.c: Remove XFAILs.
            * gcc.target/alpha/memcpy-di-aligned.c: New file.
            * gcc.target/alpha/memcpy-di-unaligned.c: New file.
            * gcc.target/alpha/memcpy-di-unaligned-dst.c: New file.
            * gcc.target/alpha/memcpy-di-unaligned-src.c: New file.

Diff:
---
 gcc/config/alpha/alpha.cc                          | 160 +++++++++++++--------
 .../gcc.target/alpha/memclr-a2-o1-c9-ptr.c         |  10 +-
 gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c |  16 +++
 .../gcc.target/alpha/memcpy-di-unaligned-dst.c     |  16 +++
 .../gcc.target/alpha/memcpy-di-unaligned-src.c     |  15 ++
 .../gcc.target/alpha/memcpy-di-unaligned.c         |  51 +++++++
 6 files changed, 206 insertions(+), 62 deletions(-)

diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index 07753297c387..3b3a237a955f 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -3771,6 +3771,78 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx 
dmem,
   emit_move_insn (st_addr_1, st_tmp_1);
 }
 
+/* Get the base alignment and offset of EXPR in A and O respectively.
+   Check for any pseudo register pointer alignment and for any tree
+   node information and return the largest alignment determined and
+   its associated offset.  */
+
+static void
+alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
+{
+  HOST_WIDE_INT tree_offset = 0, reg_offset = 0, mem_offset = 0;
+  int tree_align = 0, reg_align = 0, mem_align = MEM_ALIGN (expr);
+
+  gcc_assert (MEM_P (expr));
+
+  rtx addr = XEXP (expr, 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      reg_align = REGNO_POINTER_ALIGN (REGNO (addr));
+      break;
+
+    case PLUS:
+      if (REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
+       {
+         reg_offset = INTVAL (XEXP (addr, 1));
+         reg_align = REGNO_POINTER_ALIGN (REGNO (XEXP (addr, 0)));
+       }
+      break;
+
+    default:
+      break;
+    }
+
+  tree mem = MEM_EXPR (expr);
+  if (mem != NULL_TREE)
+    switch (TREE_CODE (mem))
+      {
+      case MEM_REF:
+       tree_offset = mem_ref_offset (mem).force_shwi ();
+       tree_align = get_object_alignment (get_base_address (mem));
+       break;
+
+      case COMPONENT_REF:
+       {
+         tree byte_offset = component_ref_field_offset (mem);
+         tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1));
+         poly_int64 offset;
+         if (!byte_offset
+             || !poly_int_tree_p (byte_offset, &offset)
+             || !tree_fits_shwi_p (bit_offset))
+           break;
+         tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
+       }
+       tree_align = get_object_alignment (get_base_address (mem));
+       break;
+
+      default:
+       break;
+      }
+
+  if (reg_align > mem_align)
+    {
+      mem_offset = reg_offset;
+      mem_align = reg_align;
+    }
+  if (tree_align > mem_align)
+    {
+      mem_offset = tree_offset;
+      mem_align = tree_align;
+    }
+  o = mem_offset;
+  a = mem_align;
+}
 
 /* Expand string/block move operations.
 
@@ -3799,27 +3871,19 @@ alpha_expand_block_move (rtx operands[])
   else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
     return 0;
 
-  /* Look for additional alignment information from recorded register info.  */
+  /* Look for stricter alignment.  */
+  HOST_WIDE_INT c;
+  int a;
 
-  tmp = XEXP (orig_src, 0);
-  if (REG_P (tmp))
-    src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
-  else if (GET_CODE (tmp) == PLUS
-          && REG_P (XEXP (tmp, 0))
-          && CONST_INT_P (XEXP (tmp, 1)))
+  alpha_get_mem_rtx_alignment_and_offset (orig_src, a, c);
+  if (a > src_align)
     {
-      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
-      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
-
-      if (a > src_align)
-       {
-          if (a >= 64 && c % 8 == 0)
-           src_align = 64;
-          else if (a >= 32 && c % 4 == 0)
-           src_align = 32;
-          else if (a >= 16 && c % 2 == 0)
-           src_align = 16;
-       }
+      if (a >= 64 && c % 8 == 0)
+       src_align = 64;
+      else if (a >= 32 && c % 4 == 0)
+       src_align = 32;
+      else if (a >= 16 && c % 2 == 0)
+       src_align = 16;
 
       if (MEM_P (orig_src) && MEM_ALIGN (orig_src) < src_align)
        {
@@ -3828,25 +3892,15 @@ alpha_expand_block_move (rtx operands[])
        }
     }
 
-  tmp = XEXP (orig_dst, 0);
-  if (REG_P (tmp))
-    dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
-  else if (GET_CODE (tmp) == PLUS
-          && REG_P (XEXP (tmp, 0))
-          && CONST_INT_P (XEXP (tmp, 1)))
+  alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c);
+  if (a > dst_align)
     {
-      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
-      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
-
-      if (a > dst_align)
-       {
-          if (a >= 64 && c % 8 == 0)
-           dst_align = 64;
-          else if (a >= 32 && c % 4 == 0)
-           dst_align = 32;
-          else if (a >= 16 && c % 2 == 0)
-           dst_align = 16;
-       }
+      if (a >= 64 && c % 8 == 0)
+       dst_align = 64;
+      else if (a >= 32 && c % 4 == 0)
+       dst_align = 32;
+      else if (a >= 16 && c % 2 == 0)
+       dst_align = 16;
 
       if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < dst_align)
        {
@@ -4048,7 +4102,6 @@ alpha_expand_block_clear (rtx operands[])
   HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
   HOST_WIDE_INT alignofs = 0;
   rtx orig_dst = operands[0];
-  rtx tmp;
   int i, words, ofs = 0;
 
   if (orig_bytes <= 0)
@@ -4057,25 +4110,18 @@ alpha_expand_block_clear (rtx operands[])
     return 0;
 
   /* Look for stricter alignment.  */
-  tmp = XEXP (orig_dst, 0);
-  if (REG_P (tmp))
-    align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
-  else if (GET_CODE (tmp) == PLUS
-          && REG_P (XEXP (tmp, 0))
-          && CONST_INT_P (XEXP (tmp, 1)))
-    {
-      HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
-      int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
-
-      if (a > align)
-       {
-          if (a >= 64)
-           align = a, alignofs = 8 - c % 8;
-          else if (a >= 32)
-           align = a, alignofs = 4 - c % 4;
-          else if (a >= 16)
-           align = a, alignofs = 2 - c % 2;
-       }
+  HOST_WIDE_INT c;
+  int a;
+
+  alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c);
+  if (a > align)
+    {
+      if (a >= 64)
+       align = a, alignofs = -c & 7;
+      else if (a >= 32)
+       align = a, alignofs = -c & 3;
+      else if (a >= 16)
+       align = a, alignofs = -c & 1;
 
       if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < align)
        {
diff --git a/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c 
b/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c
index 06d0f0beffbc..3f7edc890e44 100644
--- a/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c
+++ b/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c
@@ -43,8 +43,8 @@ memclr_a2_o1_c9 (u_t *u)
    that is with a byte store at offset 1 and with two unaligned load/store
    pairs at offsets 2 and 9 each.  */
 
-/* { dg-final { scan-assembler-times 
"\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times 
"\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 { 
xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times 
"\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times 
"\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times 
"\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times 
"\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 } 
} */
+/* { dg-final { scan-assembler-times 
"\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times 
"\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 } } */
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c 
b/gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c
new file mode 100644
index 000000000000..fd3c2b90c572
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned long aligned_src_di[9] = { [0 ... 8] = 0xe6e7e8e9eaebeced };
+unsigned long aligned_dst_di[9] = { [0 ... 8] = 0xdcdbdad9d8d7d6d5 };
+
+void
+memcpy_aligned_data_di (void)
+{
+  __builtin_memcpy (aligned_dst_di + 1, aligned_src_di + 1, 56);
+}
+
+/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s" 7 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c 
b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c
new file mode 100644
index 000000000000..5e9b5c32e52c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned long unaligned_src_di[9] = { [0 ... 8] = 0xfefdfcfbfaf9f8f7 };
+
+void
+memcpy_unaligned_dst_di (void *dst)
+{
+  __builtin_memcpy (dst, unaligned_src_di + 1, 56);
+}
+
+/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 2 } } */
+/* { dg-final { scan-assembler-times "\\sstq_u\\s" 8 } } */
+/* { dg-final { scan-assembler-not "\\sstq\\s" } } */
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c 
b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c
new file mode 100644
index 000000000000..912fa56dcc00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned long unaligned_dst_di[9] = { [0 ... 8] = 0xc4c5c6c7c8c9cacb };
+
+void
+memcpy_unaligned_src_di (const void *src)
+{
+  __builtin_memcpy (unaligned_dst_di + 1, src, 56);
+}
+
+/* { dg-final { scan-assembler-times "\\sstq\\s" 7 } } */
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 8 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldq|stq_u)\\s" } } */
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c 
b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c
new file mode 100644
index 000000000000..fe7fc9b1d17d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-additional-sources memcpy-di-aligned.c } */
+/* { dg-additional-sources memcpy-di-unaligned-src.c } */
+/* { dg-additional-sources memcpy-di-unaligned-dst.c } */
+/* { dg-options "" } */
+
+void memcpy_aligned_data_di (void);
+void memcpy_unaligned_dst_di (void *);
+void memcpy_unaligned_src_di (const void *);
+
+extern unsigned long aligned_src_di[];
+extern unsigned long aligned_dst_di[];
+extern unsigned long unaligned_src_di[];
+extern unsigned long unaligned_dst_di[];
+
+int
+main (void)
+{
+  unsigned long v;
+  int i;
+
+  for (i = 1, v = 0x0807060504030201; i < 8; i++, v += 0x0808080808080808)
+    unaligned_src_di[i] = v;
+  asm ("" : : : "memory");
+  memcpy_unaligned_dst_di (aligned_src_di + 1);
+  asm ("" : : : "memory");
+  memcpy_aligned_data_di ();
+  asm ("" : : : "memory");
+  memcpy_unaligned_src_di (aligned_dst_di + 1);
+  asm ("" : : : "memory");
+  for (i = 1, v = 0x0807060504030201; i < 8; i++, v += 0x0808080808080808)
+    if (unaligned_dst_di[i] != v)
+      return 1;
+  if (unaligned_src_di[0] != 0xfefdfcfbfaf9f8f7)
+      return 1;
+  if (unaligned_src_di[8] != 0xfefdfcfbfaf9f8f7)
+      return 1;
+  if (aligned_src_di[0] != 0xe6e7e8e9eaebeced)
+      return 1;
+  if (aligned_src_di[8] != 0xe6e7e8e9eaebeced)
+      return 1;
+  if (aligned_dst_di[0] != 0xdcdbdad9d8d7d6d5)
+      return 1;
+  if (aligned_dst_di[8] != 0xdcdbdad9d8d7d6d5)
+      return 1;
+  if (unaligned_dst_di[0] != 0xc4c5c6c7c8c9cacb)
+      return 1;
+  if (unaligned_dst_di[8] != 0xc4c5c6c7c8c9cacb)
+      return 1;
+  return 0;
+}

Reply via email to