On Mon, Nov 22, 2021 at 10:36 AM Jakub Jelinek <ja...@redhat.com> wrote:
>
> Hi!
>
> The target attribute handling is very expensive and for the common case
> from x86intrin.h where many functions get implicitly the same target
> attribute, we can speed up compilation a lot by caching it.
>
> The following patches both create a single entry cache, where they cache
> for a particular target attribute argument list the resulting
> DECL_FUNCTION_SPECIFIC_TARGET and DECL_FUNCTION_SPECIFIC_OPTIMIZATION
> values from ix86_valid_target_attribute_p and use the cache if the
> args are the same as last time and we start either from NULL values
> of those, or from the recorded values for those from last time.
>
> Compiling a simple:
>  #include <x86intrin.h>
>
>  int i;
> testcase with ./cc1 -quiet -O2 -isystem include/ test.c
> takes on my WS without the patches ~0.392s and with either of the
> patches ~0.182s, i.e. roughly half the time as before.
> For ./cc1plus -quiet -O2 -isystem include/ test.c
> it is slightly worse, the speed up is from ~0.613s to ~0.403s.
>
> The difference between the 2 patches is that the first one uses copy_list
> while the second one uses a vec, so I think the second one has the advantage
> of creating less GC garbage.
> I've verified both patches achieve the same content of those
> DECL_FUNCTION_SPECIFIC_TARGET and DECL_FUNCTION_SPECIFIC_OPTIMIZATION
> nodes as before on x86intrin.h by doing debug_tree on those and comparing
> the stderr from without these patches to with these patches.
>
> Both patches were bootstrapped/regtested on x86_64-linux and i686-linux,
> ok for trunk (and which one)?
>
> 2021-11-22  Jakub Jelinek  <ja...@redhat.com>
>
>         * attribs.h (simple_cst_list_equal): Declare.
>         * attribs.c (simple_cst_list_equal): No longer static.
>         * config/i386/i386-options.c (target_attribute_cache): New variable.
>         (ix86_valid_target_attribute_p): Cache DECL_FUNCTION_SPECIFIC_TARGET
>         and DECL_FUNCTION_SPECIFIC_OPTIMIZATION based on args.

LGTM, but really I'll just rubber-stamp the patch.

Uros.

> --- gcc/attribs.h.jj    2021-11-11 14:35:37.442350841 +0100
> +++ gcc/attribs.h       2021-11-19 11:52:08.843252645 +0100
> @@ -60,6 +60,7 @@ extern tree build_type_attribute_variant
>  extern tree build_decl_attribute_variant (tree, tree);
>  extern tree build_type_attribute_qual_variant (tree, tree, int);
>
> +extern bool simple_cst_list_equal (const_tree, const_tree);
>  extern bool attribute_value_equal (const_tree, const_tree);
>
>  /* Return 0 if the attributes for two types are incompatible, 1 if they
> --- gcc/attribs.c.jj    2021-11-11 14:35:37.442350841 +0100
> +++ gcc/attribs.c       2021-11-19 11:51:43.473615692 +0100
> @@ -1290,7 +1290,7 @@ cmp_attrib_identifiers (const_tree attr1
>  /* Compare two constructor-element-type constants.  Return 1 if the lists
>     are known to be equal; otherwise return 0.  */
>
> -static bool
> +bool
>  simple_cst_list_equal (const_tree l1, const_tree l2)
>  {
>    while (l1 != NULL_TREE && l2 != NULL_TREE)
> --- gcc/config/i386/i386-options.c.jj   2021-11-15 13:19:07.347900863 +0100
> +++ gcc/config/i386/i386-options.c      2021-11-20 00:27:32.919505947 +0100
> @@ -1403,6 +1403,8 @@ ix86_valid_target_attribute_tree (tree f
>    return t;
>  }
>
> +static GTY(()) tree target_attribute_cache[3];
> +
>  /* Hook to validate attribute((target("string"))).  */
>
>  bool
> @@ -1423,6 +1425,19 @@ ix86_valid_target_attribute_p (tree fnde
>        && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
>      return true;
>
> +  if ((DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == target_attribute_cache[1]
> +       || DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == NULL_TREE)
> +      && (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl)
> +         == target_attribute_cache[2]
> +         || DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) == NULL_TREE)
> +      && simple_cst_list_equal (args, target_attribute_cache[0]))
> +    {
> +      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_attribute_cache[1];
> +      DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl)
> +       = target_attribute_cache[2];
> +      return true;
> +    }
> +
>    tree old_optimize = build_optimization_node (&global_options,
>                                                &global_options_set);
>
> @@ -1456,8 +1471,17 @@ ix86_valid_target_attribute_p (tree fnde
>    if (new_target == error_mark_node)
>      ret = false;
>
> -  else if (fndecl && new_target)
> +  else if (new_target)
>      {
> +      if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == NULL_TREE
> +         && DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) == NULL_TREE)
> +       {
> +         target_attribute_cache[0] = copy_list (args);
> +         target_attribute_cache[1] = new_target;
> +         target_attribute_cache[2]
> +           = old_optimize != new_optimize ? new_optimize : NULL_TREE;
> +       }
> +
>        DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
>
>        if (old_optimize != new_optimize)
>
>         Jakub

Reply via email to