> -----Original Message-----
> From: Andrew Pinski <[email protected]>
> Sent: 20 November 2025 19:04
> To: Alfie Richards <[email protected]>
> Cc: [email protected]; Richard Earnshaw
> <[email protected]>; Tamar Christina <[email protected]>;
> [email protected]; Alice Carlotti <[email protected]>; Alex Coplan
> <[email protected]>; Wilco Dijkstra <[email protected]>;
> [email protected]
> Subject: Re: [PATCH] aarch64: Cache the PCS value for a function
> 
> On Tue, Nov 18, 2025 at 9:03 AM Alfie Richards <[email protected]>
> wrote:
> >
> > Hi All,
> >
> > This fixes the compile time regression noted by Andrew Pinski.
> >
> > The compilation speed regression was caused by my patch requirig querying
> > fndecl_abi signifficantly more than previously. This fixes this by caching
> > the pcs in the machine_function struct.
> >
> > Regr tested and bootstrapped on aarch64-linux-gnu.
> >
> > Okay for master?
> >
> > Alfie
> >
> > -- >8 --
> >
> > As aarch64_function_arg_regno_p is a very hot function called many times
> for a
> > function it should not call fndecl_abi every time as it is expensive and
> > unecessasary.
> >
> > This caches the result and in doing so fixes a regression in compilation 
> > speed
> > introduced by r16-5076-g7197d8062fddc2.
> 
> I think this looks good and thanks for fixing the regressions I reported.
> I do want a second eyes on it though.
> 
> 
> >
> > gcc/ChangeLog:
> >
> >         * config/aarch64/aarch64.cc (aarch64_fndecl_abi): New function.
> >         (aarch64_function_arg_regno_p): Update to use aarch64_fndecl_abi.
> >         (aarch64_output_mi_thunk): Likewise.
> >         (aarch64_is_variant_pcs): Likewise.
> >         (aarch64_set_current_function): Update to initialize pcs value.
> >         * config/aarch64/aarch64.h (enum arm_pcs): Move location earlier in
> >         file.
> >         (machine_function) Add pcs value.
> > ---
> >  gcc/config/aarch64/aarch64.cc | 29 +++++++++++++++++++++++++----
> >  gcc/config/aarch64/aarch64.h  | 31 +++++++++++++++++--------------
> >  2 files changed, 42 insertions(+), 18 deletions(-)
> >
> > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> > index 60608a19078..2e6e468e62b 100644
> > --- a/gcc/config/aarch64/aarch64.cc
> > +++ b/gcc/config/aarch64/aarch64.cc
> > @@ -728,6 +728,22 @@ aarch64_merge_string_arguments (tree args, tree
> old_attr,
> >    return !use_old_attr;
> >  }
> >
> > +/* Get the PCS for a decl and store the result to avoid needing to do
> > +   too many calls to fndecl_abi which can be expensive.  */
> > +
> > +static arm_pcs
> > +aarch64_fndecl_abi (tree fn)
> > +{
> > +  gcc_assert (TREE_CODE (fn) == FUNCTION_DECL);
> Maybe gcc_checking_assert? I am not sure it will hurt here though.
> 
> > +  struct function *fun = DECL_STRUCT_FUNCTION (fn);
> > +  if (!fun)
> > +    return arm_pcs (fndecl_abi (fn).id ());
> > +  if (fun->machine->pcs == ARM_PCS_UNKNOWN)
> > +    fun->machine->pcs = arm_pcs (fndecl_abi (fn).id ());
> > +
> > +  return fun->machine->pcs;
> > +}
> > +
> >  /* Check whether an 'aarch64_vector_pcs' attribute is valid.  */
> >
> >  static tree
> > @@ -7896,8 +7912,7 @@ function_arg_preserve_none_regno_p (unsigned
> regno)
> >  bool
> >  aarch64_function_arg_regno_p (unsigned regno)
> >  {
> > -  enum arm_pcs pcs
> > -    = cfun ? (arm_pcs) fndecl_abi (cfun->decl).id () : ARM_PCS_AAPCS64;
> > +  enum arm_pcs pcs = cfun ? aarch64_fndecl_abi (cfun->decl) :
> ARM_PCS_AAPCS64;
> 
> Why not just pass cfun directly and have a form of
> aarch64_function_abi which takes a `function*` as you grab from the
> decl the function anyways. So removes 2 loads and slightly faster.
> 

Agreed.  Patch looks good to me too with this change.

Thanks,
Tamar

> Thanks,
> Andrew Pinski
> 
> >
> >    switch (pcs)
> >      {
> > @@ -10764,7 +10779,7 @@ aarch64_output_mi_thunk (FILE *file, tree
> thunk ATTRIBUTE_UNUSED,
> >    funexp = XEXP (DECL_RTL (function), 0);
> >    funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
> >    auto isa_mode = aarch64_fntype_isa_mode (TREE_TYPE (function));
> > -  auto pcs_variant = arm_pcs (fndecl_abi (function).id ());
> > +  auto pcs_variant = aarch64_fndecl_abi (function);
> >    bool ir = lookup_attribute ("indirect_return",
> >                               TYPE_ATTRIBUTES (TREE_TYPE (function)));
> >    rtx callee_abi = aarch64_gen_callee_cookie (isa_mode, pcs_variant, ir);
> > @@ -19735,6 +19750,7 @@ aarch64_init_machine_status (void)
> >  {
> >    struct machine_function *machine;
> >    machine = ggc_cleared_alloc<machine_function> ();
> > +  machine->pcs = ARM_PCS_UNKNOWN;
> >    return machine;
> >  }
> >
> > @@ -19891,6 +19907,11 @@ aarch64_set_current_function (tree fndecl)
> >
> >    aarch64_previous_fndecl = fndecl;
> >
> > +  /* Initialize the PCS value to UNKNOWN.  */
> > +  if (fndecl && TREE_CODE (fndecl) == FUNCTION_DECL)
> > +    if (function *fn = DECL_STRUCT_FUNCTION (fndecl))
> > +      fn->machine->pcs = ARM_PCS_UNKNOWN;
> > +
> >    /* First set the target options.  */
> >    cl_target_option_restore (&global_options, &global_options_set,
> >                             TREE_TARGET_OPTION (new_tree));
> > @@ -25648,7 +25669,7 @@ static bool
> >  aarch64_is_variant_pcs (tree fndecl)
> >  {
> >    /* Check for ABIs that preserve more registers than usual.  */
> > -  arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id ();
> > +  arm_pcs pcs = aarch64_fndecl_abi (fndecl);
> >    if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE || pcs ==
> ARM_PCS_PRESERVE_NONE)
> >      return true;
> >
> > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> > index 0e596b59744..de0db894c7a 100644
> > --- a/gcc/config/aarch64/aarch64.h
> > +++ b/gcc/config/aarch64/aarch64.h
> > @@ -984,6 +984,20 @@ extern enum aarch64_cpu aarch64_tune;
> >
> >  #define DEFAULT_PCC_STRUCT_RETURN 0
> >
> > +/* The set of available Procedure Call Stardards.  */
> > +
> > +enum arm_pcs
> > +{
> > +  ARM_PCS_AAPCS64,             /* Base standard AAPCS for 64 bit.  */
> > +  ARM_PCS_SIMD,                        /* For aarch64_vector_pcs 
> > functions.  */
> > +  ARM_PCS_SVE,                 /* For functions that pass or return
> > +                                  values in SVE registers.  */
> > +  ARM_PCS_TLSDESC,             /* For targets of tlsdesc calls.  */
> > +  ARM_PCS_PRESERVE_NONE,       /* PCS variant with no call-preserved
> > +                                  registers except X29.  */
> > +  ARM_PCS_UNKNOWN
> > +};
> > +
> >  #if defined(HAVE_POLY_INT_H) && defined(GCC_VEC_H)
> >  struct GTY (()) aarch64_frame
> >  {
> > @@ -1151,6 +1165,9 @@ typedef struct GTY (()) machine_function
> >
> >    /* During SEH output, this is non-null.  */
> >    struct seh_frame_state * GTY ((skip (""))) seh;
> > +
> > +  /* The Procedure Call Standard for the function.  */
> > +  enum arm_pcs pcs;
> >  } machine_function;
> >  #endif
> >  #endif
> > @@ -1168,20 +1185,6 @@ enum aarch64_abi_type
> >
> >  #define TARGET_ILP32   (aarch64_abi & AARCH64_ABI_ILP32)
> >
> > -enum arm_pcs
> > -{
> > -  ARM_PCS_AAPCS64,             /* Base standard AAPCS for 64 bit.  */
> > -  ARM_PCS_SIMD,                        /* For aarch64_vector_pcs 
> > functions.  */
> > -  ARM_PCS_SVE,                 /* For functions that pass or return
> > -                                  values in SVE registers.  */
> > -  ARM_PCS_TLSDESC,             /* For targets of tlsdesc calls.  */
> > -  ARM_PCS_PRESERVE_NONE,       /* PCS variant with no call-preserved
> > -                                  registers except X29.  */
> > -  ARM_PCS_UNKNOWN
> > -};
> > -
> > -
> > -
> >
> >  /* We can't use machine_mode inside a generator file because it
> >     hasn't been created yet; we shouldn't be using any code that
> > --
> > 2.34.1
> >

Reply via email to