On Sun, Apr 20, 2025 at 11:26 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> Don't assume that stack slots can only be accessed by stack or frame
> registers.  We first find all registers defined by stack or frame
> registers.  Then check memory accesses by such registers, including
> stack and frame registers.
>
> gcc/
>
>         PR target/109780
>         PR target/109093
>         * config/i386/i386.cc (stack_access_data): New.
>         (ix86_update_stack_alignment): Likewise.
>         (ix86_find_all_reg_use_1): Likewise.
>         (ix86_find_all_reg_use): Likewise.
>         (ix86_find_max_used_stack_alignment): Also check memory accesses
>         from registers defined by stack or frame registers.
>
> gcc/testsuite/
>
>         PR target/109780
>         PR target/109093
>         * g++.target/i386/pr109780-1.C: New test.
>         * gcc.target/i386/pr109093-1.c: Likewise.
>         * gcc.target/i386/pr109780-1.c: Likewise.
>         * gcc.target/i386/pr109780-2.c: Likewise.
>         * gcc.target/i386/pr109780-3.c: Likewise.
>
> Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
> ---
>  gcc/config/i386/i386.cc                    | 174 ++++++++++++++++++---
>  gcc/testsuite/g++.target/i386/pr109780-1.C |  72 +++++++++
>  gcc/testsuite/gcc.target/i386/pr109093-1.c |  33 ++++
>  gcc/testsuite/gcc.target/i386/pr109780-1.c |  14 ++
>  gcc/testsuite/gcc.target/i386/pr109780-2.c |  21 +++
>  gcc/testsuite/gcc.target/i386/pr109780-3.c |  46 ++++++
>  6 files changed, 339 insertions(+), 21 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109093-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-3.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 28603c2943e..9e4e76857e6 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -8473,6 +8473,103 @@ output_probe_stack_range (rtx reg, rtx end)
>    return "";
>  }
>
> +/* Data passed to ix86_update_stack_alignment.  */
> +struct stack_access_data
> +{
> +  /* The stack access register.  */
> +  const_rtx reg;
> +  /* Pointer to stack alignment.  */
> +  unsigned int *stack_alignment;
> +};
> +
> +/* Update the maximum stack slot alignment from memory alignment in
> +   PAT.  */
> +
> +static void
> +ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
> +{
> +  /* This insn may reference stack slot.  Update the maximum stack slot
> +     alignment if the memory is referenced by the stack access register.
> +   */
> +  stack_access_data *p = (stack_access_data *) data;
> +  subrtx_iterator::array_type array;
> +  FOR_EACH_SUBRTX (iter, array, pat, ALL)
> +    {
> +      auto op = *iter;
> +      if (GET_CODE (op) == ZERO_EXTEND)
> +       op = XEXP (op, 0);

No need for the above, FOR_EACH_SUBRTX will iterate over ZERO_EXTEND.

> +      if (MEM_P (op) && reg_mentioned_p (p->reg, op))
> +       {
> +         unsigned int alignment = MEM_ALIGN (op);
> +         if (alignment > *p->stack_alignment)
> +           *p->stack_alignment = alignment;
> +         break;
> +       }
> +    }
> +}
> +
> +/* Helper function for ix86_find_all_reg_use.  */
> +
> +static void
> +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
> +                        auto_bitmap &worklist)
> +{
> +  rtx dest = SET_DEST (set);
> +  if (!REG_P (dest))
> +    return;
> +
> +  rtx src = SET_SRC (set);
> +
> +  if (GET_CODE (src) == ZERO_EXTEND)
> +    src = XEXP (src, 0);
> +
> +  if (MEM_P (src) || CONST_SCALAR_INT_P (src))
> +    return;
> +
> +  if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
> +    return;
> +
> +  /* Add this register to stack_slot_access.  */
> +  add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
> +  bitmap_set_bit (worklist, REGNO (dest));
> +}

The above should also use FOR_EACH_SUBRTX iterators. Also, the
function will also record non-integer registers, such as flags_reg and
XMM registers as candidates. Please see the attached patch that
implements my proposed version.

> +
> +/* Find all registers defined with REG.  */
> +
> +static void
> +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> +                      unsigned int reg, auto_bitmap &worklist)
> +{
> +  for (df_ref ref = DF_REG_USE_CHAIN (reg);
> +       ref != NULL;
> +       ref = DF_REF_NEXT_REG (ref))

This will still record the whole chain of dependencies. This is firmly
on the safe side, but perhaps is a slight overkill. OTOH, my
implementation of ix86_find_all_reg_use_1 will reject all non-integer
registers, so let's keep it as is.

Can you please test the attached patch?

Uros.
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d15f91ddd2c..288dc9d55ff 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -8473,6 +8473,116 @@ output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
+/* Data passed to ix86_update_stack_alignment.  */
+struct stack_access_data
+{
+  /* The stack access register.  */
+  const_rtx reg;
+  /* Pointer to stack alignment.  */
+  unsigned int *stack_alignment;
+};
+
+/* Update the maximum stack slot alignment from memory alignment in PAT.  */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+  /* This insn may reference stack slot.  Update the maximum stack slot
+     alignment if the memory is referenced by the stack access register. */
+  stack_access_data *p = (stack_access_data *) data;
+
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, pat, ALL)
+    {
+      auto op = *iter;
+      if (MEM_P (op) && reg_mentioned_p (p->reg, op))
+       {
+         unsigned int alignment = MEM_ALIGN (op);
+
+         if (alignment > *p->stack_alignment)
+           *p->stack_alignment = alignment;
+         break;
+       }
+    }
+}
+
+/* Helper function for ix86_find_all_reg_use.  */
+
+static void
+ix86_find_all_reg_use_1 (rtx set, unsigned int regno,
+                        HARD_REG_SET &stack_slot_access,
+                        auto_bitmap &worklist)
+{
+  rtx dest = SET_DEST (set);
+
+  if (!REG_P (dest))
+    return;
+
+  unsigned int dst_regno = REGNO (dest);
+
+  if (TEST_HARD_REG_BIT (stack_slot_access, dst_regno))
+    return;
+
+  if (!GENERAL_REGNO_P (dst_regno))
+    return;
+
+  rtx src = SET_SRC (set);
+
+  subrtx_var_iterator::array_type array;
+  FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
+    {
+      auto op = *iter;
+
+      if (MEM_P (op))
+       iter.skip_subrtxes ();
+
+      if (REG_P (op) && REGNO (op) == regno)
+       {
+         /* Add this register to stack_slot_access.  */
+         add_to_hard_reg_set (&stack_slot_access, Pmode, dst_regno);
+         bitmap_set_bit (worklist, dst_regno);
+       }
+    }
+}
+
+/* Find all registers defined with REG.  */
+
+static void
+ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
+                      unsigned int reg, auto_bitmap &worklist)
+{
+  for (df_ref ref = DF_REG_USE_CHAIN (reg);
+       ref != NULL;
+       ref = DF_REF_NEXT_REG (ref))
+    {
+      if (DF_REF_IS_ARTIFICIAL (ref))
+       continue;
+
+      rtx_insn *insn = DF_REF_INSN (ref);
+
+      if (!NONJUMP_INSN_P (insn))
+       continue;
+
+      rtx set = single_set (insn);
+      if (set)
+       ix86_find_all_reg_use_1 (set, DF_REF_REGNO (ref),
+                                stack_slot_access, worklist);
+
+      rtx pat = PATTERN (insn);
+      if (GET_CODE (pat) != PARALLEL)
+       continue;
+
+      for (int i = 0; i < XVECLEN (pat, 0); i++)
+       {
+         rtx exp = XVECEXP (pat, 0, i);
+
+         if (GET_CODE (exp) == SET)
+           ix86_find_all_reg_use_1 (exp, DF_REF_REGNO (ref),
+                                    stack_slot_access, worklist);
+       }
+    }
+}
+
 /* Set stack_frame_required to false if stack frame isn't required.
    Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
    slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
@@ -8491,10 +8601,6 @@ ix86_find_max_used_stack_alignment (unsigned int 
&stack_alignment,
   add_to_hard_reg_set (&set_up_by_prologue, Pmode,
                       HARD_FRAME_POINTER_REGNUM);
 
-  /* The preferred stack alignment is the minimum stack alignment.  */
-  if (stack_alignment > crtl->preferred_stack_boundary)
-    stack_alignment = crtl->preferred_stack_boundary;
-
   bool require_stack_frame = false;
 
   FOR_EACH_BB_FN (bb, cfun)
@@ -8506,27 +8612,66 @@ ix86_find_max_used_stack_alignment (unsigned int 
&stack_alignment,
                                       set_up_by_prologue))
          {
            require_stack_frame = true;
-
-           if (check_stack_slot)
-             {
-               /* Find the maximum stack alignment.  */
-               subrtx_iterator::array_type array;
-               FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
-                 if (MEM_P (*iter)
-                     && (reg_mentioned_p (stack_pointer_rtx,
-                                          *iter)
-                         || reg_mentioned_p (frame_pointer_rtx,
-                                             *iter)))
-                   {
-                     unsigned int alignment = MEM_ALIGN (*iter);
-                     if (alignment > stack_alignment)
-                       stack_alignment = alignment;
-                   }
-             }
+           break;
          }
     }
 
   cfun->machine->stack_frame_required = require_stack_frame;
+
+  /* Stop if we don't need to check stack slot.  */
+  if (!check_stack_slot)
+    return;
+
+  /* The preferred stack alignment is the minimum stack alignment.  */
+  if (stack_alignment > crtl->preferred_stack_boundary)
+    stack_alignment = crtl->preferred_stack_boundary;
+
+  HARD_REG_SET stack_slot_access;
+  CLEAR_HARD_REG_SET (stack_slot_access);
+
+  /* Stack slot can be accessed by stack pointer, frame pointer or
+     registers defined by stack pointer or frame pointer.  */
+  auto_bitmap worklist;
+
+  add_to_hard_reg_set (&stack_slot_access, Pmode,
+                      STACK_POINTER_REGNUM);
+  bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
+
+  if (frame_pointer_needed)
+    {
+      add_to_hard_reg_set (&stack_slot_access, Pmode,
+                          HARD_FRAME_POINTER_REGNUM);
+      bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+    }
+
+  unsigned int reg;
+
+  do
+    {
+      reg = bitmap_clear_first_set_bit (worklist);
+      ix86_find_all_reg_use (stack_slot_access, reg, worklist);
+    }
+  while (!bitmap_empty_p (worklist));
+
+  hard_reg_set_iterator hrsi;
+  stack_access_data data = { nullptr, &stack_alignment };
+
+  EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
+    for (df_ref ref = DF_REG_USE_CHAIN (reg);
+        ref != NULL;
+        ref = DF_REF_NEXT_REG (ref))
+      {
+       if (DF_REF_IS_ARTIFICIAL (ref))
+         continue;
+
+       rtx_insn *insn = DF_REF_INSN (ref);
+
+       if (!NONJUMP_INSN_P (insn))
+         continue;
+
+       data.reg = DF_REF_REG (ref);
+       note_stores (insn, ix86_update_stack_alignment, &data);
+      }
 }
 
 /* Finalize stack_realign_needed and frame_pointer_needed flags, which

Reply via email to