On Thu, Feb 20, 2025 at 5:37 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > On Wed, Feb 19, 2025 at 10:09 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > > ... > > > My algorithm keeps a list of registers which can access the stack > > > starting with SP and FP. If any registers are derived from the list, > > > add them to the list until all used registers are exhausted. If > > > any stores use the register on the list, update the stack alignment. > > > The missing part is that it doesn't check if the register is actually > > > used for memory access. > > > > > > Here is the v2 patch to also check if the register is used for memory > > > access. > > > > @@ -8473,16 +8482,15 @@ static void > > ix86_update_stack_alignment (rtx, const_rtx pat, void *data) > > { > > /* This insn may reference stack slot. Update the maximum stack slot > > - alignment. */ > > + alignment if the memory is reference by the specified register. */ > > > > referenced > > Fixed. > > > + stack_access_data *p = (stack_access_data *) data; > > subrtx_iterator::array_type array; > > FOR_EACH_SUBRTX (iter, array, pat, ALL) > > - if (MEM_P (*iter)) > > + if (MEM_P (*iter) && reg_mentioned_p (p->reg, *iter)) > > > > @@ -8494,7 +8502,7 @@ ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET > > &stack_slot_access, > > auto_bitmap &worklist) > > { > > rtx dest = SET_DEST (set); > > - if (!REG_P (dest)) > > + if (!GENERAL_REG_P (dest)) > > return; > > > > The above change is not needed now that the register in the memory > > reference is checked. The compiler can generate GPR->XMM->GPR > > sequence. > > Fixed. > > > @@ -8630,8 +8638,8 @@ ix86_find_max_used_stack_alignment (unsigned int > > &stack_alignment, > > if (!NONJUMP_INSN_P (insn)) > > continue; > > > > - note_stores (insn, ix86_update_stack_alignment, > > - &stack_alignment); > > + stack_access_data data = { DF_REF_REG (ref), &stack_alignment }; > > + note_stores (insn, ix86_update_stack_alignment, &data); > > > > Do we need FOR_EACH_SUBRTX here instead of note_stores to also process > > reads from stack? Reads should also be aligned. > > > > Yes, we do since ix86_update_stack_alignment gets the RTL pattern, > not operand: > > Breakpoint 1, ix86_update_stack_alignment (pat=0x7fffe966a108, > data=0x7fffffffce00) > at /export/gnu/import/git/gitlab/x86-gcc/gcc/config/i386/i386.cc:8486 > 8486 stack_access_data *p = (stack_access_data *) data; > (set (reg/f:DI 20 xmm0 [orig:126 _53 ] [126]) > (reg/f:DI 0 ax [orig:126 _53 ] [126])) > (gdb) > > FOR_EACH_SUBRTX is needed to check for the memory > operand referenced by the stack access register. > > Here is the v3 patch. OK for master? > > -- > H.J.
Here is the v4 patch to move stack_access_data data = { nullptr, &stack_alignment }; out of the loop. -- H.J.
From 1f8d9b3850333984eab7e70cf73fee908aec9e77 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Wed, 19 Feb 2025 19:48:07 +0800 Subject: [PATCH v4] x86: Check the stack access register for stack access Pass the stack access register to ix86_update_stack_alignment to check the stack access register for stack access. gcc/ PR target/118936 * config/i386/i386.cc (stack_access_data): New. (ix86_update_stack_alignment): Check if the memory is referenced by the stack access register. (ix86_find_max_used_stack_alignment): Also pass the stack access register to ix86_update_stack_alignment. gcc/testsuite/ PR target/118936 * gcc.target/i386/pr118936.c: New test. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/i386.cc | 26 ++++++++++++++++-------- gcc/testsuite/gcc.target/i386/pr118936.c | 22 ++++++++++++++++++++ 2 files changed, 40 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr118936.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 560e6525b56..f70bcd9c63f 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -8466,6 +8466,15 @@ output_probe_stack_range (rtx reg, rtx end) return ""; } +/* Data passed to ix86_update_stack_alignment. */ +struct stack_access_data +{ + /* The stack access register. */ + const_rtx reg; + /* Pointer to stack alignment. */ + unsigned int *stack_alignment; +}; + /* Update the maximum stack slot alignment from memory alignment in PAT. */ @@ -8473,16 +8482,16 @@ static void ix86_update_stack_alignment (rtx, const_rtx pat, void *data) { /* This insn may reference stack slot. Update the maximum stack slot - alignment. */ + alignment if the memory is referenced by the stack access register. + */ + stack_access_data *p = (stack_access_data *) data; subrtx_iterator::array_type array; FOR_EACH_SUBRTX (iter, array, pat, ALL) - if (MEM_P (*iter)) + if (MEM_P (*iter) && reg_mentioned_p (p->reg, *iter)) { unsigned int alignment = MEM_ALIGN (*iter); - unsigned int *stack_alignment - = (unsigned int *) data; - if (alignment > *stack_alignment) - *stack_alignment = alignment; + if (alignment > *p->stack_alignment) + *p->stack_alignment = alignment; break; } } @@ -8616,6 +8625,7 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, while (!bitmap_empty_p (worklist)); hard_reg_set_iterator hrsi; + stack_access_data data = { nullptr, &stack_alignment }; EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi) for (df_ref ref = DF_REG_USE_CHAIN (reg); @@ -8630,8 +8640,8 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, if (!NONJUMP_INSN_P (insn)) continue; - note_stores (insn, ix86_update_stack_alignment, - &stack_alignment); + data.reg = DF_REF_REG (ref); + note_stores (insn, ix86_update_stack_alignment, &data); } } diff --git a/gcc/testsuite/gcc.target/i386/pr118936.c b/gcc/testsuite/gcc.target/i386/pr118936.c new file mode 100644 index 00000000000..a3de8cbc27a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr118936.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-omit-frame-pointer -fno-toplevel-reorder" } */ + +struct S1 +{ + int f1 : 17; + int f2 : 23; + int f3 : 11; + int f4 : 31; + int f6; +}; +volatile struct S1 **g_1680; +volatile struct S1 ***g_1679[8][8]; +void +func_40 (struct S1 p_41, short *l_2436) +{ + char __trans_tmp_3; + __trans_tmp_3 = p_41.f6; + *l_2436 ^= __trans_tmp_3; + for (int i = 0; i < 8; i+= 1) + g_1679[1][i] = &g_1680; +} -- 2.48.1