On Wed, 5 Feb 2025, Jakub Jelinek wrote:

> On Tue, Feb 04, 2025 at 01:11:10PM +0100, Richard Biener wrote:
> > OK, fair enough.  I think there should be a comment indicating this
> > isn't a full conservative approach but handling a certain class of
> > accesses only, with the hope that this is all that's actually needed.
> 
> Ok, here is an updated patch.
> 
> cselib_invalidate_mem (callmem[1]) is now done solely for
> !ACCUMULATE_OUTGOING_ARGS || cfun->calls_alloca functions, I've
> renamed the variables to x_{base,off} and sp_{base,off} -
> the first pair is for x's address, the second pair is for current
> value of stack pointer, tweaked/extended comments and sp_{base,off}
> is now initialized just once per cselib_invalidate_mem call, though still
> only lazily (if we actually need it for something).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

LGTM.

Richard.

> 2025-02-05  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR rtl-optimization/117239
>       * cselib.cc: Include predict.h.
>       (callmem): Change type from rtx to rtx[2].
>       (cselib_preserve_only_values): Use callmem[0] rather than callmem.
>       (cselib_invalidate_mem): Optimize and don't try to invalidate
>       for the mem_rtx == callmem[1] case MEMs which clearly can't be
>       below the stack pointer.
>       (cselib_process_insn): Use callmem[0] rather than callmem.
>       For const/pure calls also call cselib_invalidate_mem (callmem[1])
>       in !ACCUMULATE_OUTGOING_ARGS or cfun->calls_alloca functions.
>       (cselib_init): Initialize callmem[0] rather than callmem and also
>       initialize callmem[1].
> 
>       * gcc.dg/pr117239.c: New test.
> 
> --- gcc/cselib.cc.jj  2025-02-01 00:46:53.073275225 +0100
> +++ gcc/cselib.cc     2025-02-05 09:15:47.394065496 +0100
> @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.
>  #include "cselib.h"
>  #include "function-abi.h"
>  #include "alias.h"
> +#include "predict.h"
>  
>  /* A list of cselib_val structures.  */
>  struct elt_list
> @@ -248,8 +249,9 @@ static unsigned int *used_regs;
>  static unsigned int n_used_regs;
>  
>  /* We pass this to cselib_invalidate_mem to invalidate all of
> -   memory for a non-const call instruction.  */
> -static GTY(()) rtx callmem;
> +   memory for a non-const call instruction and memory below stack pointer
> +   for const/pure calls.  */
> +static GTY(()) rtx callmem[2];
>  
>  /* Set by discard_useless_locs if it deleted the last location of any
>     value.  */
> @@ -808,7 +810,7 @@ cselib_preserve_only_values (void)
>    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
>      cselib_invalidate_regno (i, reg_raw_mode[i]);
>  
> -  cselib_invalidate_mem (callmem);
> +  cselib_invalidate_mem (callmem[0]);
>  
>    remove_useless_values ();
>  
> @@ -2630,6 +2632,8 @@ cselib_invalidate_mem (rtx mem_rtx)
>        struct elt_loc_list **p = &v->locs;
>        bool had_locs = v->locs != NULL;
>        rtx_insn *setting_insn = v->locs ? v->locs->setting_insn : NULL;
> +      rtx sp_base = NULL_RTX;
> +      HOST_WIDE_INT sp_off = 0;
>  
>        while (*p)
>       {
> @@ -2644,6 +2648,114 @@ cselib_invalidate_mem (rtx mem_rtx)
>             p = &(*p)->next;
>             continue;
>           }
> +
> +       /* When invalidating memory below the stack pointer for const/pure
> +          calls and alloca/VLAs aren't used, attempt to optimize.  Values
> +          stored into area sometimes below the stack pointer shouldn't be
> +          addressable and should be stored just through stack pointer
> +          derived expressions, so don't invalidate MEMs not using stack
> +          derived addresses, or if the MEMs clearly aren't below the stack
> +          pointer.  This isn't a fully conservative approach, the hope is
> +          that invalidating more MEMs than this isn't actually needed.  */
> +       if (mem_rtx == callmem[1]
> +           && num_mems < param_max_cselib_memory_locations
> +           && GET_CODE (XEXP (x, 0)) == VALUE
> +           && !cfun->calls_alloca)
> +         {
> +           cselib_val *v2 = CSELIB_VAL_PTR (XEXP (x, 0));
> +           rtx x_base = NULL_RTX;
> +           HOST_WIDE_INT x_off = 0;
> +           if (SP_DERIVED_VALUE_P (v2->val_rtx))
> +             x_base = v2->val_rtx;
> +           else
> +             for (struct elt_loc_list *l = v2->locs; l; l = l->next)
> +               if (GET_CODE (l->loc) == PLUS
> +                   && GET_CODE (XEXP (l->loc, 0)) == VALUE
> +                   && SP_DERIVED_VALUE_P (XEXP (l->loc, 0))
> +                   && CONST_INT_P (XEXP (l->loc, 1)))
> +                 {
> +                   x_base = XEXP (l->loc, 0);
> +                   x_off = INTVAL (XEXP (l->loc, 1));
> +                   break;
> +                 }
> +           /* If x_base is NULL here, don't invalidate x as its address
> +              isn't derived from sp such that it could be in outgoing
> +              argument area of some call in !ACCUMULATE_OUTGOING_ARGS
> +              function.  */
> +           if (x_base)
> +             {
> +               if (sp_base == NULL_RTX)
> +                 {
> +                   if (cselib_val *v3
> +                       = cselib_lookup_1 (stack_pointer_rtx, Pmode, 0,
> +                                          VOIDmode))
> +                     {
> +                       if (SP_DERIVED_VALUE_P (v3->val_rtx))
> +                         sp_base = v3->val_rtx;
> +                       else
> +                         for (struct elt_loc_list *l = v3->locs;
> +                              l; l = l->next)
> +                           if (GET_CODE (l->loc) == PLUS
> +                               && GET_CODE (XEXP (l->loc, 0)) == VALUE
> +                               && SP_DERIVED_VALUE_P (XEXP (l->loc, 0))
> +                               && CONST_INT_P (XEXP (l->loc, 1)))
> +                             {
> +                               sp_base = XEXP (l->loc, 0);
> +                               sp_off = INTVAL (XEXP (l->loc, 1));
> +                               break;
> +                             }
> +                     }
> +                   if (sp_base == NULL_RTX)
> +                     sp_base = pc_rtx;
> +                 }
> +               /* Otherwise, if x_base and sp_base are the same,
> +                  we know that x_base + x_off is the x's address and
> +                  sp_base + sp_off is current value of stack pointer,
> +                  so try to determine if x is certainly not below stack
> +                  pointer.  */
> +               if (sp_base == x_base)
> +                 {
> +                   if (STACK_GROWS_DOWNWARD)
> +                     {
> +                       HOST_WIDE_INT off = sp_off;
> +#ifdef STACK_ADDRESS_OFFSET
> +                       /* On SPARC take stack pointer bias into account as
> +                          well.  */
> +                       off += (STACK_ADDRESS_OFFSET
> +                               - FIRST_PARM_OFFSET (current_function_decl));
> +#endif
> +                       if (x_off >= off)
> +                         /* x is at or above the current stack pointer,
> +                            no need to invalidate it.  */
> +                         x_base = NULL_RTX;
> +                     }
> +                   else
> +                     {
> +                       HOST_WIDE_INT sz;
> +                       enum machine_mode mode = GET_MODE (x);
> +                       if ((MEM_SIZE_KNOWN_P (x)
> +                            && MEM_SIZE (x).is_constant (&sz))
> +                           || (mode != BLKmode
> +                               && GET_MODE_SIZE (mode).is_constant (&sz)))
> +                         if (x_off < sp_off
> +                             && ((HOST_WIDE_INT) ((unsigned HOST_WIDE_INT)
> +                                                  x_off + sz) <= sp_off))
> +                           /* x's end is below or at the current stack
> +                              pointer in !STACK_GROWS_DOWNWARD target,
> +                              no need to invalidate it.  */
> +                           x_base = NULL_RTX;
> +                      }
> +                 }
> +             }
> +           if (x_base == NULL_RTX)
> +             {
> +               has_mem = true;
> +               num_mems++;
> +               p = &(*p)->next;
> +               continue;
> +             }
> +         }
> +
>         if (num_mems < param_max_cselib_memory_locations
>             && ! canon_anti_dependence (x, false, mem_rtx,
>                                         GET_MODE (mem_rtx), mem_addr))
> @@ -3196,14 +3308,24 @@ cselib_process_insn (rtx_insn *insn)
>        as if they were regular functions.  */
>        if (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
>         || !(RTL_CONST_OR_PURE_CALL_P (insn)))
> -     cselib_invalidate_mem (callmem);
> +     cselib_invalidate_mem (callmem[0]);
>        else
> -     /* For const/pure calls, invalidate any argument slots because
> -        they are owned by the callee.  */
> -     for (x = CALL_INSN_FUNCTION_USAGE (insn); x; x = XEXP (x, 1))
> -       if (GET_CODE (XEXP (x, 0)) == USE
> -           && MEM_P (XEXP (XEXP (x, 0), 0)))
> -         cselib_invalidate_mem (XEXP (XEXP (x, 0), 0));
> +     {
> +       /* For const/pure calls, invalidate any argument slots because
> +          they are owned by the callee.  */
> +       for (x = CALL_INSN_FUNCTION_USAGE (insn); x; x = XEXP (x, 1))
> +         if (GET_CODE (XEXP (x, 0)) == USE
> +             && MEM_P (XEXP (XEXP (x, 0), 0)))
> +           cselib_invalidate_mem (XEXP (XEXP (x, 0), 0));
> +       /* And invalidate memory below the stack (or above for
> +          !STACK_GROWS_DOWNWARD), as even const/pure call can invalidate
> +          that.  Do this only if !ACCUMULATE_OUTGOING_ARGS or if
> +          cfun->calls_alloca, otherwise the stack pointer shouldn't be
> +          changing in the middle of the function and nothing should be
> +          stored below the stack pointer.  */
> +       if (!ACCUMULATE_OUTGOING_ARGS || cfun->calls_alloca)
> +         cselib_invalidate_mem (callmem[1]);
> +     }
>      }
>  
>    cselib_record_sets (insn);
> @@ -3256,8 +3378,31 @@ cselib_init (int record_what)
>  
>    /* (mem:BLK (scratch)) is a special mechanism to conflict with everything,
>       see canon_true_dependence.  This is only created once.  */
> -  if (! callmem)
> -    callmem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
> +  if (! callmem[0])
> +    callmem[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
> +  /* Similarly create a MEM representing roughly everything below
> +     the stack for STACK_GROWS_DOWNWARD targets or everything above
> +     it otherwise.  Do this only when !ACCUMULATE_OUTGOING_ARGS or
> +     if cfun->calls_alloca, otherwise the stack pointer shouldn't be
> +     changing in the middle of the function and nothing should be stored
> +     below the stack pointer.  */
> +  if (!callmem[1] && (!ACCUMULATE_OUTGOING_ARGS || cfun->calls_alloca))
> +    {
> +      if (STACK_GROWS_DOWNWARD)
> +     {
> +       unsigned HOST_WIDE_INT off = -(GET_MODE_MASK (Pmode) >> 1);
> +#ifdef STACK_ADDRESS_OFFSET
> +       /* On SPARC take stack pointer bias into account as well.  */
> +       off += (STACK_ADDRESS_OFFSET
> +               - FIRST_PARM_OFFSET (current_function_decl)));
> +#endif
> +       callmem[1] = plus_constant (Pmode, stack_pointer_rtx, off);
> +     }
> +      else
> +     callmem[1] = stack_pointer_rtx;
> +      callmem[1] = gen_rtx_MEM (BLKmode, callmem[1]);
> +      set_mem_size (callmem[1], GET_MODE_MASK (Pmode) >> 1);
> +    }
>  
>    cselib_nregs = max_reg_num ();
>  
> --- gcc/testsuite/gcc.dg/pr117239.c.jj        2025-02-03 11:13:59.399159640 
> +0100
> +++ gcc/testsuite/gcc.dg/pr117239.c   2025-02-03 11:13:59.399159640 +0100
> @@ -0,0 +1,42 @@
> +/* PR rtl-optimization/117239 */
> +/* { dg-do run } */
> +/* { dg-options "-fno-inline -O2" } */
> +/* { dg-additional-options "-fschedule-insns" { target i?86-*-* x86_64-*-* } 
> } */
> +
> +int a, b, c = 1, d;
> +
> +int
> +foo (void)
> +{
> +  return a;
> +}
> +
> +struct A {
> +  int e, f, g, h;
> +  short i;
> +  int j;
> +};
> +
> +void
> +bar (int x, struct A y)
> +{
> +  if (y.j == 1)
> +    c = 0;
> +}
> +
> +int
> +baz (struct A x)
> +{
> +  return b;
> +}
> +
> +int
> +main ()
> +{
> +  struct A k = { 0, 0, 0, 0, 0, 1 };
> +  d = baz (k);
> +  bar (foo (), k);
> +  if (c != 0)
> +    __builtin_abort ();
> +  return 0;
> +}
> 
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to