On Wed, Feb 19, 2025 at 8:16 PM Uros Bizjak <ubiz...@gmail.com> wrote:
>
> On Wed, Feb 19, 2025 at 12:53 PM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > Since stack can only be accessed by GPR, check GENERAL_REG_P, instead of
> > REG_P, in ix86_find_all_reg_use_1.
> >
> > gcc/
> >
> > PR target/118936
> > * config/i386/i386.cc (ix86_find_all_reg_use_1): Replace REG_P
> > with GENERAL_REG_P.
> >
> > gcc/testsuite/
> >
> > PR target/118936
> > * gcc.target/i386/pr118936.c: New test.
> >
> > OK for master?
>
> As said in the PR, this change "fixes" only this particular problem,
> where we have:
>
>    97: ax:DI='g_1680'
>   170: xmm0:DI=ax:DI
>    98: xmm0:V2DI=vec_duplicate(xmm0:DI)
>       REG_EQUIV vec_duplicate(`g_1680')
>   101: [`g_1679']=xmm0:V2DI
>   103: [const(`g_1679'+0x10)]=xmm0:V2DI
>   105: [const(`g_1679'+0x20)]=xmm0:V2DI
>   107: [const(`g_1679'+0x30)]=xmm0:V2DI
>
> But does not fix the fact that your algorithm considers these stores
> even when they are in no way connected to stack or frame pointer.
> These do not even use registers in their address.
>
> Previous approach did this:
> -           if (check_stack_slot)
> -             {
> -               /* Find the maximum stack alignment.  */
> -               subrtx_iterator::array_type array;
> -               FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> -                 if (MEM_P (*iter)
> -                     && (reg_mentioned_p (stack_pointer_rtx,
> -                                          *iter)
> -                         || reg_mentioned_p (frame_pointer_rtx,
> -                                             *iter)))
> -                   {
> -                     unsigned int alignment = MEM_ALIGN (*iter);
> -                     if (alignment > stack_alignment)
> -                       stack_alignment = alignment;
> -                   }
> -             }
>
> So, anywhere in the insn SP or FP was mentioned (either memory store
> or load), the alignment was increased as requested by MEM. The issue
> here is that another register that is calculated from SP or FP can be
> used to access the stack slot. I'm not sure I know how your algorithm
> works id detail, it detects e.g.:
>
>   103: [const(`g_1679'+0x10)]=xmm0:V2DI
>
> as a candidate to increase the alignment, but its address doesn't even
> use registers, let alone SP or FP. Also,
>
> +       note_stores (insn, ix86_update_stack_alignment,
> +                    &stack_alignment);
>
> in your algorithm does not consider that loads from the stack also
> increase alignment requirements.
>
> Can you please explain the approach in your original patch in some
> more detail? I'd expect a variant of the original approach, where the
> compiler keeps an up-to-date list of registers that depend on SP or
> FP, and instead of a naive:
>
> reg_mentioned_p (stack_pointer_rtx, *iter)
>
> it goes through a list, formed of SP, FP and their dependent registers
> and updates stack alignment if the insn memory address uses the
> register on the list.
>
> Uros.

My algorithm keeps a list of registers which can access the stack
starting with SP and FP.  If any registers are derived from the list,
add them to the list until all used registers are exhausted.   If
any stores use the register on the list, update the stack alignment.
The missing part is that it doesn't check if the register is actually
used for memory access.

Here is the v2 patch to also check if the register is used for memory
access.

-- 
H.J.
From 76d42ce8afc9255c729c54062148c77e748147c4 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Wed, 19 Feb 2025 19:48:07 +0800
Subject: [PATCH v2] x86: Check register and GENERAL_REG_P for stack access

Check the register for stack access in ix86_update_stack_alignment.  Since
stack can only be accessed by GPR, check GENERAL_REG_P, instead of REG_P,
in ix86_find_all_reg_use_1.

gcc/

	PR target/118936
	* config/i386/i386.cc (stack_access_data): New.
	(ix86_update_stack_alignment): Check if the memory is reference
	by the specified register.
	(ix86_find_all_reg_use_1): Replace REG_P with GENERAL_REG_P.
	(ix86_find_max_used_stack_alignment): Pass the register to
	ix86_update_stack_alignment.

gcc/testsuite/

	PR target/118936
	* gcc.target/i386/pr118936.c: New test.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386.cc                  | 26 ++++++++++++++++--------
 gcc/testsuite/gcc.target/i386/pr118936.c | 22 ++++++++++++++++++++
 2 files changed, 39 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118936.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 560e6525b56..33bc8200876 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -8466,6 +8466,15 @@ output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
+/* Data passed to ix86_update_stack_alignment.  */
+struct stack_access_data
+{
+  /* Register to check for memory access.  */
+  const_rtx reg;
+  /* Pointer to stack alignment.  */
+  unsigned int *stack_alignment;
+};
+
 /* Update the maximum stack slot alignment from memory alignment in
    PAT.  */
 
@@ -8473,16 +8482,15 @@ static void
 ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
 {
   /* This insn may reference stack slot.  Update the maximum stack slot
-     alignment.  */
+     alignment if the memory is reference by the specified register.  */
+  stack_access_data *p = (stack_access_data *) data;
   subrtx_iterator::array_type array;
   FOR_EACH_SUBRTX (iter, array, pat, ALL)
-    if (MEM_P (*iter))
+    if (MEM_P (*iter) && reg_mentioned_p (p->reg, *iter))
       {
 	unsigned int alignment = MEM_ALIGN (*iter);
-	unsigned int *stack_alignment
-	  = (unsigned int *) data;
-	if (alignment > *stack_alignment)
-	  *stack_alignment = alignment;
+	if (alignment > *p->stack_alignment)
+	  *p->stack_alignment = alignment;
 	break;
       }
 }
@@ -8494,7 +8502,7 @@ ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
 			 auto_bitmap &worklist)
 {
   rtx dest = SET_DEST (set);
-  if (!REG_P (dest))
+  if (!GENERAL_REG_P (dest))
     return;
 
   rtx src = SET_SRC (set);
@@ -8630,8 +8638,8 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
 	if (!NONJUMP_INSN_P (insn))
 	  continue;
 
-	note_stores (insn, ix86_update_stack_alignment,
-		     &stack_alignment);
+	stack_access_data data = { DF_REF_REG (ref), &stack_alignment };
+	note_stores (insn, ix86_update_stack_alignment, &data);
       }
 }
 
diff --git a/gcc/testsuite/gcc.target/i386/pr118936.c b/gcc/testsuite/gcc.target/i386/pr118936.c
new file mode 100644
index 00000000000..a3de8cbc27a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr118936.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-omit-frame-pointer -fno-toplevel-reorder" } */
+
+struct S1
+{
+  int f1 : 17;
+  int f2 : 23;
+  int f3 : 11;
+  int f4 : 31;
+  int f6;
+};
+volatile struct S1 **g_1680;
+volatile struct S1 ***g_1679[8][8];
+void
+func_40 (struct S1 p_41, short *l_2436)
+{
+  char __trans_tmp_3;
+  __trans_tmp_3 = p_41.f6;
+  *l_2436 ^= __trans_tmp_3;
+  for (int i = 0; i < 8; i+= 1)
+    g_1679[1][i] = &g_1680;
+}
-- 
2.48.1

Reply via email to