On Sun, Apr 20, 2025 at 11:26 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > Don't assume that stack slots can only be accessed by stack or frame > registers. We first find all registers defined by stack or frame > registers. Then check memory accesses by such registers, including > stack and frame registers. > > gcc/ > > PR target/109780 > PR target/109093 > * config/i386/i386.cc (stack_access_data): New. > (ix86_update_stack_alignment): Likewise. > (ix86_find_all_reg_use_1): Likewise. > (ix86_find_all_reg_use): Likewise. > (ix86_find_max_used_stack_alignment): Also check memory accesses > from registers defined by stack or frame registers. > > gcc/testsuite/ > > PR target/109780 > PR target/109093 > * g++.target/i386/pr109780-1.C: New test. > * gcc.target/i386/pr109093-1.c: Likewise. > * gcc.target/i386/pr109780-1.c: Likewise. > * gcc.target/i386/pr109780-2.c: Likewise. > * gcc.target/i386/pr109780-3.c: Likewise. > > Signed-off-by: H.J. Lu <hjl.to...@gmail.com> > --- > gcc/config/i386/i386.cc | 174 ++++++++++++++++++--- > gcc/testsuite/g++.target/i386/pr109780-1.C | 72 +++++++++ > gcc/testsuite/gcc.target/i386/pr109093-1.c | 33 ++++ > gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 ++ > gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 +++ > gcc/testsuite/gcc.target/i386/pr109780-3.c | 46 ++++++ > 6 files changed, 339 insertions(+), 21 deletions(-) > create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C > create mode 100644 gcc/testsuite/gcc.target/i386/pr109093-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-3.c > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index 28603c2943e..9e4e76857e6 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -8473,6 +8473,103 @@ output_probe_stack_range (rtx reg, rtx end) > return ""; > } > > +/* Data passed to ix86_update_stack_alignment. */ > +struct stack_access_data > +{ > + /* The stack access register. */ > + const_rtx reg; > + /* Pointer to stack alignment. */ > + unsigned int *stack_alignment; > +}; > + > +/* Update the maximum stack slot alignment from memory alignment in > + PAT. */ > + > +static void > +ix86_update_stack_alignment (rtx, const_rtx pat, void *data) > +{ > + /* This insn may reference stack slot. Update the maximum stack slot > + alignment if the memory is referenced by the stack access register. > + */ > + stack_access_data *p = (stack_access_data *) data; > + subrtx_iterator::array_type array; > + FOR_EACH_SUBRTX (iter, array, pat, ALL) > + { > + auto op = *iter; > + if (GET_CODE (op) == ZERO_EXTEND) > + op = XEXP (op, 0);
No need for the above, FOR_EACH_SUBRTX will iterate over ZERO_EXTEND. > + if (MEM_P (op) && reg_mentioned_p (p->reg, op)) > + { > + unsigned int alignment = MEM_ALIGN (op); > + if (alignment > *p->stack_alignment) > + *p->stack_alignment = alignment; > + break; > + } > + } > +} > + > +/* Helper function for ix86_find_all_reg_use. */ > + > +static void > +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access, > + auto_bitmap &worklist) > +{ > + rtx dest = SET_DEST (set); > + if (!REG_P (dest)) > + return; > + > + rtx src = SET_SRC (set); > + > + if (GET_CODE (src) == ZERO_EXTEND) > + src = XEXP (src, 0); > + > + if (MEM_P (src) || CONST_SCALAR_INT_P (src)) > + return; > + > + if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest))) > + return; > + > + /* Add this register to stack_slot_access. */ > + add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest)); > + bitmap_set_bit (worklist, REGNO (dest)); > +} The above should also use FOR_EACH_SUBRTX iterators. Also, the function will also record non-integer registers, such as flags_reg and XMM registers as candidates. Please see the attached patch that implements my proposed version. > + > +/* Find all registers defined with REG. */ > + > +static void > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, > + unsigned int reg, auto_bitmap &worklist) > +{ > + for (df_ref ref = DF_REG_USE_CHAIN (reg); > + ref != NULL; > + ref = DF_REF_NEXT_REG (ref)) This will still record the whole chain of dependencies. This is firmly on the safe side, but perhaps is a slight overkill. OTOH, my implementation of ix86_find_all_reg_use_1 will reject all non-integer registers, so let's keep it as is. Can you please test the attached patch? Uros.
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index d15f91ddd2c..288dc9d55ff 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -8473,6 +8473,116 @@ output_probe_stack_range (rtx reg, rtx end) return ""; } +/* Data passed to ix86_update_stack_alignment. */ +struct stack_access_data +{ + /* The stack access register. */ + const_rtx reg; + /* Pointer to stack alignment. */ + unsigned int *stack_alignment; +}; + +/* Update the maximum stack slot alignment from memory alignment in PAT. */ + +static void +ix86_update_stack_alignment (rtx, const_rtx pat, void *data) +{ + /* This insn may reference stack slot. Update the maximum stack slot + alignment if the memory is referenced by the stack access register. */ + stack_access_data *p = (stack_access_data *) data; + + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, pat, ALL) + { + auto op = *iter; + if (MEM_P (op) && reg_mentioned_p (p->reg, op)) + { + unsigned int alignment = MEM_ALIGN (op); + + if (alignment > *p->stack_alignment) + *p->stack_alignment = alignment; + break; + } + } +} + +/* Helper function for ix86_find_all_reg_use. */ + +static void +ix86_find_all_reg_use_1 (rtx set, unsigned int regno, + HARD_REG_SET &stack_slot_access, + auto_bitmap &worklist) +{ + rtx dest = SET_DEST (set); + + if (!REG_P (dest)) + return; + + unsigned int dst_regno = REGNO (dest); + + if (TEST_HARD_REG_BIT (stack_slot_access, dst_regno)) + return; + + if (!GENERAL_REGNO_P (dst_regno)) + return; + + rtx src = SET_SRC (set); + + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, src, ALL) + { + auto op = *iter; + + if (MEM_P (op)) + iter.skip_subrtxes (); + + if (REG_P (op) && REGNO (op) == regno) + { + /* Add this register to stack_slot_access. */ + add_to_hard_reg_set (&stack_slot_access, Pmode, dst_regno); + bitmap_set_bit (worklist, dst_regno); + } + } +} + +/* Find all registers defined with REG. */ + +static void +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, + unsigned int reg, auto_bitmap &worklist) +{ + for (df_ref ref = DF_REG_USE_CHAIN (reg); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + + if (!NONJUMP_INSN_P (insn)) + continue; + + rtx set = single_set (insn); + if (set) + ix86_find_all_reg_use_1 (set, DF_REF_REGNO (ref), + stack_slot_access, worklist); + + rtx pat = PATTERN (insn); + if (GET_CODE (pat) != PARALLEL) + continue; + + for (int i = 0; i < XVECLEN (pat, 0); i++) + { + rtx exp = XVECEXP (pat, 0, i); + + if (GET_CODE (exp) == SET) + ix86_find_all_reg_use_1 (exp, DF_REF_REGNO (ref), + stack_slot_access, worklist); + } + } +} + /* Set stack_frame_required to false if stack frame isn't required. Update STACK_ALIGNMENT to the largest alignment, in bits, of stack slot used if stack frame is required and CHECK_STACK_SLOT is true. */ @@ -8491,10 +8601,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, add_to_hard_reg_set (&set_up_by_prologue, Pmode, HARD_FRAME_POINTER_REGNUM); - /* The preferred stack alignment is the minimum stack alignment. */ - if (stack_alignment > crtl->preferred_stack_boundary) - stack_alignment = crtl->preferred_stack_boundary; - bool require_stack_frame = false; FOR_EACH_BB_FN (bb, cfun) @@ -8506,27 +8612,66 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, set_up_by_prologue)) { require_stack_frame = true; - - if (check_stack_slot) - { - /* Find the maximum stack alignment. */ - subrtx_iterator::array_type array; - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) - if (MEM_P (*iter) - && (reg_mentioned_p (stack_pointer_rtx, - *iter) - || reg_mentioned_p (frame_pointer_rtx, - *iter))) - { - unsigned int alignment = MEM_ALIGN (*iter); - if (alignment > stack_alignment) - stack_alignment = alignment; - } - } + break; } } cfun->machine->stack_frame_required = require_stack_frame; + + /* Stop if we don't need to check stack slot. */ + if (!check_stack_slot) + return; + + /* The preferred stack alignment is the minimum stack alignment. */ + if (stack_alignment > crtl->preferred_stack_boundary) + stack_alignment = crtl->preferred_stack_boundary; + + HARD_REG_SET stack_slot_access; + CLEAR_HARD_REG_SET (stack_slot_access); + + /* Stack slot can be accessed by stack pointer, frame pointer or + registers defined by stack pointer or frame pointer. */ + auto_bitmap worklist; + + add_to_hard_reg_set (&stack_slot_access, Pmode, + STACK_POINTER_REGNUM); + bitmap_set_bit (worklist, STACK_POINTER_REGNUM); + + if (frame_pointer_needed) + { + add_to_hard_reg_set (&stack_slot_access, Pmode, + HARD_FRAME_POINTER_REGNUM); + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM); + } + + unsigned int reg; + + do + { + reg = bitmap_clear_first_set_bit (worklist); + ix86_find_all_reg_use (stack_slot_access, reg, worklist); + } + while (!bitmap_empty_p (worklist)); + + hard_reg_set_iterator hrsi; + stack_access_data data = { nullptr, &stack_alignment }; + + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi) + for (df_ref ref = DF_REG_USE_CHAIN (reg); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + + if (!NONJUMP_INSN_P (insn)) + continue; + + data.reg = DF_REF_REG (ref); + note_stores (insn, ix86_update_stack_alignment, &data); + } } /* Finalize stack_realign_needed and frame_pointer_needed flags, which