https://gcc.gnu.org/g:1dd769b3d0d9251649dcb645d7ed6c4ba2202306
commit r16-33-g1dd769b3d0d9251649dcb645d7ed6c4ba2202306 Author: Maciej W. Rozycki <ma...@orcam.me.uk> Date: Sat Apr 19 14:10:25 2025 +0100 Alpha: Fix base block alignment calculation regression In determination of base block alignment we only examine a COMPONENT_REF tree node at hand without ever checking if its ultimate alignment has been reduced by the combined offset going back to the outermost object. Consequently cases have been observed where quadword accesses have been produced for a memory location referring a nested struct member only aligned to the longword boundary, causing emulation to trigger. Address this issue by recursing into COMPONENT_REF tree nodes until the outermost one has been reached, which is supposed to be a MEM_REF one, accumulating the offset as we go, fixing a commit e0dae4da4c45 ("Alpha: Also use tree information to get base block alignment") regression. Bail out and refrain from using tree information for alignment if we end up at something different or we are unable to calculate the offset at any point. gcc/ * config/alpha/alpha.cc (alpha_get_mem_rtx_alignment_and_offset): Recurse into COMPONENT_REF nodes. gcc/testsuite/ * gcc.target/alpha/memcpy-nested-offset-long.c: New file. * gcc.target/alpha/memcpy-nested-offset-quad.c: New file. Diff: --- gcc/config/alpha/alpha.cc | 23 +++---- .../gcc.target/alpha/memcpy-nested-offset-long.c | 76 ++++++++++++++++++++++ .../gcc.target/alpha/memcpy-nested-offset-quad.c | 64 ++++++++++++++++++ 3 files changed, 150 insertions(+), 13 deletions(-) diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index ba470d9e75ec..14e7da57ca6f 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -4291,14 +4291,10 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o) tree mem = MEM_EXPR (expr); if (mem != NULL_TREE) - switch (TREE_CODE (mem)) - { - case MEM_REF: - tree_offset = mem_ref_offset (mem).force_shwi (); - tree_align = get_object_alignment (get_base_address (mem)); - break; + { + HOST_WIDE_INT comp_offset = 0; - case COMPONENT_REF: + for (; TREE_CODE (mem) == COMPONENT_REF; mem = TREE_OPERAND (mem, 0)) { tree byte_offset = component_ref_field_offset (mem); tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1)); @@ -4307,14 +4303,15 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o) || !poly_int_tree_p (byte_offset, &offset) || !tree_fits_shwi_p (bit_offset)) break; - tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT; + comp_offset += offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT; } - tree_align = get_object_alignment (get_base_address (mem)); - break; - default: - break; - } + if (TREE_CODE (mem) == MEM_REF) + { + tree_offset = comp_offset + mem_ref_offset (mem).force_shwi (); + tree_align = get_object_alignment (get_base_address (mem)); + } + } if (reg_align > mem_align) { diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c new file mode 100644 index 000000000000..631d14f3de27 --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c @@ -0,0 +1,76 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +typedef unsigned int __attribute__ ((mode (DI))) int64_t; +typedef unsigned int __attribute__ ((mode (SI))) int32_t; + +typedef union + { + int32_t l[8]; + } +val; + +typedef struct + { + int32_t l[2]; + val v; + } +tre; + +typedef struct + { + int32_t l[3]; + tre t; + } +due; + +typedef struct + { + val v; + int64_t q; + int32_t l[2]; + due d; + } +uno; + +void +memcpy_nested_offset_long (uno *u) +{ + u->d.t.v = u->v; +} + +/* Expect assembly such as: + + ldq $4,0($16) + ldq $3,8($16) + ldq $2,16($16) + srl $4,32,$7 + ldq $1,24($16) + srl $3,32,$6 + stl $4,68($16) + srl $2,32,$5 + stl $7,72($16) + srl $1,32,$4 + stl $3,76($16) + stl $6,80($16) + stl $2,84($16) + stl $5,88($16) + stl $1,92($16) + stl $4,96($16) + + that is with four quadword loads at offsets 0, 8, 16, 24 each and + eight longword stores at offsets 68, 72, 76, 80, 84, 88, 92, 96 each. */ + +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,0\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,8\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,16\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,24\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,68\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,72\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,76\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,80\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,84\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,88\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,92\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,96\\\(\\\$16\\\)\\s" 1 } } */ diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c new file mode 100644 index 000000000000..1d2227e0861c --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +typedef unsigned int __attribute__ ((mode (DI))) int64_t; +typedef unsigned int __attribute__ ((mode (SI))) int32_t; + +typedef union + { + int32_t l[8]; + } +val; + +typedef struct + { + int32_t l[2]; + val v; + } +tre; + +typedef struct + { + int32_t l[3]; + tre t; + } +due; + +typedef struct + { + val v; + int64_t q; + int32_t l[3]; + due d; + } +uno; + +void +memcpy_nested_offset_quad (uno *u) +{ + u->d.t.v = u->v; +} + +/* Expect assembly such as: + + ldq $4,0($16) + ldq $3,8($16) + ldq $2,16($16) + ldq $1,24($16) + stq $4,72($16) + stq $3,80($16) + stq $2,88($16) + stq $1,96($16) + + that is with four quadword loads at offsets 0, 8, 16, 24 each + and four quadword stores at offsets 72, 80, 88, 96 each. */ + +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,0\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,8\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,16\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,24\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,72\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,80\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,88\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,96\\\(\\\$16\\\)\\s" 1 } } */