The 10/03/2025 01:08, Yangyu Chen wrote:
> 
> 
> On 29/9/2025 19:50, Alfie Richards wrote:
> > On 28/09/2025 16:52, Yangyu Chen wrote:
> > > This patch adds support for target_clones table option. The
> > > target_clones table option allows users to specify multiple versions
> > > of a function and select the version at runtime based on the specified
> > > table.
> > > 
> > > The target clone table is a JSON object where function names serve as
> > > keys, and their values are nested objects. Each nested object maps
> > > architecture names to lists of target clone attributes. This structure
> > > allows specifying function clones for different architectures. Below is
> > > an example:
> > > 
> > > ```
> > > {
> > >    "foo": {
> > >      "x86_64": ["avx2", "avx512f"],
> > >      "riscv64": ["arch=+v", "arch=+zba,+zbb", ...],
> > >      ... // more architectures
> > >    },
> > >    // more functions
> > > }
> > > ```
> > Thanks for the patch! I like the idea a lot.>
> > > A example of the table is as follows on RISC-V:
> > > 
> > > C source code "ror32.c":
> > > 
> > > ```
> > > void ror32(unsigned int *a, unsigned int b, unsigned long size) {
> > >    for (unsigned long i = 0; i < size; i++) {
> > >      a[i] = a[i] >> b | (a[i] << (32 - b));
> > >    }
> > > }
> > > ```
> > > 
> > > Table "ror32.target_clones":
> > > 
> > > ```
> > > {
> > >    "ror32": {
> > >      "riscv64": ["arch=+zvbb,+zbb", "arch=+zbb"]
> > >    }
> > > }
> > > ```
> > > 
> > > Then use: gcc -O3 -ftarget-clones-table=ror32.target_clones -S ror32.c
> > > to compile the source code. This will generate 3 versions and its IFUNC
> > > resolver for the ror32 function which is "arch=+zvbb,+zbb" and
> > > "arch=+zbb" and the default version.
> > > 
> > > Signed-off-by: Yangyu Chen <[email protected]>
> > > 
> > > gcc/ChangeLog:
> > > 
> > >     * Makefile.in: Add -DTARGET_NAME to CFLAGS-multiple_target.o.
> > >     * common.opt: Add target clone table option.
> > >     * multiple_target.cc (expand_target_clones): Add support for
> > >     target_clones table option.
> > >     (node_versionable_function_p): New function to check if a function
> > >     can be versioned.
> > >     (init_clone_map): Ditto.
> > >     (ipa_target_clone): Ditto.
> > > 
> > > gcc/testsuite/ChangeLog:
> > > 
> > >     * gcc.target/i386/tct-0.c: New test.
> > >     * gcc.target/i386/tct-0.json: New test.
> > > ---
> > >   gcc/Makefile.in                          |   1 +
> > >   gcc/common.opt                           |   7 ++
> > >   gcc/multiple_target.cc                   | 138 +++++++++++++++++++++--
> > >   gcc/testsuite/gcc.target/i386/tct-0.c    |  11 ++
> > >   gcc/testsuite/gcc.target/i386/tct-0.json |   5 +
> > >   5 files changed, 152 insertions(+), 10 deletions(-)
> > >   create mode 100644 gcc/testsuite/gcc.target/i386/tct-0.c
> > >   create mode 100644 gcc/testsuite/gcc.target/i386/tct-0.json
> > > 
> > > diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> > > index 4503dab6037..99ed204f036 100644
> > > --- a/gcc/Makefile.in
> > > +++ b/gcc/Makefile.in
> > > @@ -2694,6 +2694,7 @@ s-bversion: BASE-VER
> > >       $(STAMP) s-bversion
> > >   CFLAGS-toplev.o += -DTARGET_NAME=\"$(target_noncanonical)\"
> > > +CFLAGS-multiple_target.o += -DTARGET_NAME=\"$(target_noncanonical)\"
> > >   CFLAGS-tree-diagnostic-client-data-hooks.o += -
> > > DTARGET_NAME=\"$(target_noncanonical)\"
> > >   CFLAGS-optinfo-emit-json.o += -
> > > DTARGET_NAME=\"$(target_noncanonical)\" $(ZLIBINC)
> > >   CFLAGS-analyzer/engine.o += $(ZLIBINC)
> > > diff --git a/gcc/common.opt b/gcc/common.opt
> > > index f6d93dc05fb..102e03496b8 100644
> > > --- a/gcc/common.opt
> > > +++ b/gcc/common.opt
> > > @@ -2718,6 +2718,13 @@ fprofile-reorder-functions
> > >   Common Var(flag_profile_reorder_functions) Optimization
> > >   Enable function reordering that improves code placement.
> > > +ftarget-clones-table=
> > > +Common Joined RejectNegative Var(target_clones_table)
> > > +Enable target clones attributes specified in the JSON file.
> > > +
> > > +Variable
> > > +const char *target_clones_table = NULL
> > > +
> > >   fpatchable-function-entry=
> > >   Common Var(flag_patchable_function_entry) Joined Optimization
> > >   Insert NOP instructions at each function entry.
> > > diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc
> > > index e85d7e71442..33014449210 100644
> > > --- a/gcc/multiple_target.cc
> > > +++ b/gcc/multiple_target.cc
> > > @@ -21,6 +21,10 @@ along with GCC; see the file COPYING3.  If not see
> > >   <http://www.gnu.org/licenses/>.  */
> > >   #include "config.h"
> > > +#include <fstream>
> > > +#define INCLUDE_MAP
> > > +#define INCLUDE_STRING
> > > +#define INCLUDE_SSTREAM
> > >   #include "system.h"
> > >   #include "coretypes.h"
> > >   #include "backend.h"
> > > @@ -38,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
> > >   #include "gimple-walk.h"
> > >   #include "tree-inline.h"
> > >   #include "intl.h"
> > > +#include "json-parsing.h"
> > >   /* Walker callback that replaces all FUNCTION_DECL of a function that's
> > >      going to be versioned.  */
> > > @@ -252,25 +257,46 @@ create_target_clone (cgraph_node *node, bool
> > > definition, char *name,
> > >     return new_node;
> > >   }
> > > +/* Skip functions that are declared but not defined.  Also skip
> > > +   C++ virtual functions, as they cannot be cloned.  */
> > > +static bool node_versionable_function_p (cgraph_node *node)
> > > +{
> > > +  return (!node->definition
> > > +      || (!node->alias && tree_versionable_function_p (node->decl)))
> > > +      && !DECL_VIRTUAL_P (node->decl);
> > > +}
> > > +
> > >   /* If the function in NODE has multiple target attributes
> > >      create the appropriate clone for each valid target attribute.  */
> > >   static bool
> > > -expand_target_clones (struct cgraph_node *node, bool definition)
> > > +expand_target_clones (struct cgraph_node *node, bool definition,
> > > +              std::map <std::string, auto_vec<string_slice> >
> > > +              &clone_map)>   {
> > > -  /* Parsing target attributes separated by
> > > TARGET_CLONES_ATTR_SEPARATOR.  */
> > > -  tree attr_target = lookup_attribute ("target_clones",
> > > -                       DECL_ATTRIBUTES (node->decl));
> > > -  /* No targets specified.  */
> > > -  if (!attr_target)
> > > -    return false;
> > > -
> > >     int num_defaults = 0;
> > >     auto_vec<string_slice> attr_list = get_clone_versions (node->decl,
> > >                                &num_defaults);
> > > +  /* Skip functions that are declared but not defined.  */
> > > +  if (DECL_INITIAL (node->decl) != NULL_TREE)
> > > +    {
> > > +      auto it = clone_map.find (IDENTIFIER_POINTER (
> > > +                DECL_ASSEMBLER_NAME_RAW (node->decl)));
> > > +      if (it != clone_map.end () && node_versionable_function_p (node))
> > > +    {
> > > +      for (string_slice attr : it->second)
> > > +        attr_list.safe_push (attr);
> > > +
> > > +      if (num_defaults == 0) /* No default in the source, add one.  */
> > > +        {
> > > +          attr_list.safe_push ("default");
> > > +          num_defaults = 1;
> > > +        }
> > > +    }
> > > +    }
> > >     /* If the target clones list is empty after filtering, remove
> > > this node.  */
> > > -  if (!TARGET_HAS_FMV_TARGET_ATTRIBUTE && attr_list.is_empty ())
> > > +  if (!TARGET_HAS_FMV_TARGET_ATTRIBUTE || attr_list.is_empty ())
> > This seems wrong? I think this means that we will always delete the node
> > for targets with target_version semantics?>       {
> 
> Hi Alfie,
> 
> I didn't understand why we should remove the entire cgraph_node when
> attr_list is empty and !TARGET_HAS_FMV_TARGET_ATTRIBUTE (for target uses
> "target_version" like aarch64 and riscv currently). The original logic was
> written by you.

Hi Yangyu,

Yes, you're right that this is for the case where you have a target_clones
containing only garbage version strings.

In particular, it's spec'ed that for aarch64 an invalid target_clones version
string is a *warning* instead of an error, and that a target_clones doesn't
implicitly imply a default version.

Therefore, if we have:

int fn [[gnu::target_version ("default")]] (void) 
{return 1;}
int fn [[gnu::target_clones ("garbage1", "garbage2")]] (void) 
{return 2;}

Then this would be 2 warnings, and the second decl ends up completely removed.

This is not the case for x86 and Power. They do not allow target_clones to be
mixed with target_version, or multiple target_clones decls to be combined, so
always implicitly add default to target_clones.

> 
> This means that if there is an illegal target_clones attribute, the entire
> function will be removed. I didn't understand why we should do this instead
> of giving them an error. And I don't even understand why this only applies
> for aarch64 and riscv.

Yeah this is correct, if the target_clones is all invalid values, then warning's
are issues, and the node is removed. This is intended behaviour.
We have the check_target_clone_version hook to allow emitting diagnostics
at invalid versions.

The original intention of this is to allow a source with target_clones with
version strings for multiple targets (though with warnings).
Though that would require more alignment with other targets to work well.

This is (hopefully) described in the ACLE function multi-versioning section.

Thanks,
Alfie
> 
> Should we remove the "node->remove ();" and only return false in this case?
> 
> Thanks,
> Yangyu Chen
> 
> > >         node->remove ();
> > >         return false;
> > > @@ -506,11 +532,103 @@ is_simple_target_clones_case (cgraph_node *node)
> > >     return true;
> > >   }
> > > +/* Initialize the clone map from the target clone table JSON file.
> > > Specified
> > > +   by the -ftarget-clone-table option.  The map is a mapping from
> > > symbol name
> > > +   to a string with target clones attributes separated by
> > > +   TARGET_CLONES_ATTR_SEPARATOR.  */
> > > +static std::map <std::string, auto_vec<string_slice> >
> > > +init_clone_map (void)
> > > +{
> > > +  std::map <std::string, auto_vec<string_slice> > res;
> > > +  if (! target_clones_table)
> > > +    return res;
> > > +
> > > +  /* Take target string from TARGET_NAME, this macro looks like
> > > +     "x86_64-linux-gnu" and we need to strip all the suffixes
> > > +     after the first dash, so it becomes "x86_64".  */
> > > +  std::string target = TARGET_NAME;
> > > +  if (target.find ('-') != std::string::npos)
> > > +    target.erase (target.find ('-'));
> > > +
> > > +  /* Open the target clone table file and read to a string.  */
> > > +  std::ifstream json_file (target_clones_table);
> > > +  if (json_file.fail ())
> > > +    {
> > > +      error ("cannot open target clone table file %s",
> > > +         target_clones_table);
> > > +      return res;
> > > +    }
> > > +  std::stringstream ss_buf;
> > > +  ss_buf << json_file.rdbuf ();
> > > +  std::string json_str = ss_buf.str ();
> > > +
> > > +  /* Parse the JSON string.
> > > +     The JSON string format looks like this:
> > > +     {
> > > +       "symbol_name1": {
> > > +     "target1": ["clone1", "clone2", ...],
> > > +     "target2": ["clone1", "clone2", ...],
> > > +       },
> > > +       ...
> > > +     }
> > > +     where symbol_name is the ASM name of the function mangled by the
> > > +     frontend.  The target1 and target2 are the targets, which can be
> > > +     "x86_64", "aarch64", "riscv64", etc.  The clone1, clone2, etc
> > > are the
> > > +     target clones attributes, which can be "avx2", "avx512" etc.
> > > Note that
> > > +     there is no need to specify the "default" target clone, it is
> > > +     automatically added by the pass.  */
> > > +  json::parser_result_t result = json::parse_utf8_string (
> > > +    json_str.size (), json_str.c_str (), true, NULL);
> > > +  if (auto json_err = result.m_err.get ())
> > > +    {
> > > +      error ("error parsing target clone table file %s: %s",
> > > +         target_clones_table, json_err->get_msg ());
> > > +      return res;
> > > +    }
> > > +
> > > +  auto json_val = result.m_val.get ();
> > > +  auto kind = json_val->get_kind ();
> > > +  if (kind != json::JSON_OBJECT)
> > > +    {
> > > +      error ("target clone table file %s is not a JSON object",
> > > +         target_clones_table);
> > > +      return res;
> > > +    }
> > > +  auto json_obj = static_cast<const json::object *> (json_val);
> > > +  unsigned i;
> > > +  const char *symbol_name;
> > > +  FOR_EACH_VEC_ELT (*json_obj, i, symbol_name)
> > > +    {
> > > +      auto symbol_val = json_obj->get (symbol_name);
> > > +      if (!symbol_val || symbol_val->get_kind () != json::JSON_OBJECT)
> > > +    continue;
> > > +      auto symbol_obj = static_cast<const json::object *> (symbol_val);
> > > +      auto cur_target_val = symbol_obj->get (target.c_str ());
> > > +      if (!cur_target_val
> > > +      || cur_target_val->get_kind () != json::JSON_ARRAY)
> > > +    continue;
> > > +      auto cur_target_array = static_cast<const json::array *>
> > > +    (cur_target_val);
> > > +      for (unsigned j = 0; j < cur_target_array->length (); j++)
> > > +    {
> > > +      auto target_str_val = cur_target_array->get (j);
> > > +      if (target_str_val->get_kind () != json::JSON_STRING)
> > > +        error ("target clones attribute is not a string");
> > > +      res[symbol_name].safe_push (string_slice (
> > > +        ggc_strdup (static_cast<const json::string *>
> > > +            (target_str_val)->get_string ())));
> > > +    }
> > > +    }
> > > +  return res;
> > > +}
> > > +
> > >   static unsigned int
> > >   ipa_target_clone (bool early)
> > >   {
> > >     struct cgraph_node *node;
> > >     auto_vec<cgraph_node *> to_dispatch;
> > > +  std::map <std::string, auto_vec<string_slice> > clone_map
> > > +    = init_clone_map ();
> > >     /* Don't need to do anything early for target attribute
> > > semantics.  */
> > >     if (early && TARGET_HAS_FMV_TARGET_ATTRIBUTE)
> > > @@ -543,7 +661,7 @@ ipa_target_clone (bool early)
> > >        the simple case.  Simple cases are dispatched in the later
> > > stage.  */
> > >         if (early == !is_simple_target_clones_case (node))
> > > -    if (expand_target_clones (node, node->definition)
> > > +    if (expand_target_clones (node, node->definition, clone_map)
> > >           && TARGET_HAS_FMV_TARGET_ATTRIBUTE)
> > >         /* In non target_version semantics, dispatch all target
> > > clones.  */
> > >         to_dispatch.safe_push (node);
> > > diff --git a/gcc/testsuite/gcc.target/i386/tct-0.c b/gcc/testsuite/
> > > gcc.target/i386/tct-0.c
> > > new file mode 100644
> > > index 00000000000..0de0938e9fd
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/tct-0.c
> > > @@ -0,0 +1,11 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-require-ifunc "" } */
> > > +/* { dg-options "-ftarget-clones-table=${srcdir}/gcc.target/i386/
> > > tct-0.json" } */
> > > +/* { dg-final { scan-assembler "foo\.default" } } */
> > > +/* { dg-final { scan-assembler "foo\.arch_x86_64_v2" } } */
> > > +/* { dg-final { scan-assembler "foo\.arch_x86_64_v3" } } */
> > > +/* { dg-final { scan-assembler "foo\.arch_x86_64_v4" } } */
> > > +
> > > +void foo() {
> > > +
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/tct-0.json
> > > b/gcc/testsuite/ gcc.target/i386/tct-0.json
> > > new file mode 100644
> > > index 00000000000..9dc712e66e2
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/tct-0.json
> > > @@ -0,0 +1,5 @@
> > > +{
> > > +    "foo": {
> > > +        "x86_64": ["arch=x86-64-v2", "arch=x86-64-v3", "arch=x86-64-v4"]
> > > +    }
> > > +}
> > 
> > LGTM other than that one issue. But I cannot approve.
> 

-- 
Alfie Richards

Reply via email to