> On 29 Sep 2025, at 19:50, Alfie Richards <[email protected]> wrote:
>
> On 28/09/2025 16:52, Yangyu Chen wrote:
>> This patch adds support for target_clones table option. The
>> target_clones table option allows users to specify multiple versions
>> of a function and select the version at runtime based on the specified
>> table.
>> The target clone table is a JSON object where function names serve as
>> keys, and their values are nested objects. Each nested object maps
>> architecture names to lists of target clone attributes. This structure
>> allows specifying function clones for different architectures. Below is
>> an example:
>> ```
>> {
>> "foo": {
>> "x86_64": ["avx2", "avx512f"],
>> "riscv64": ["arch=+v", "arch=+zba,+zbb", ...],
>> ... // more architectures
>> },
>> // more functions
>> }
>> ```
> Thanks for the patch! I like the idea a lot.>
>> A example of the table is as follows on RISC-V:
>> C source code "ror32.c":
>> ```
>> void ror32(unsigned int *a, unsigned int b, unsigned long size) {
>> for (unsigned long i = 0; i < size; i++) {
>> a[i] = a[i] >> b | (a[i] << (32 - b));
>> }
>> }
>> ```
>> Table "ror32.target_clones":
>> ```
>> {
>> "ror32": {
>> "riscv64": ["arch=+zvbb,+zbb", "arch=+zbb"]
>> }
>> }
>> ```
>> Then use: gcc -O3 -ftarget-clones-table=ror32.target_clones -S ror32.c
>> to compile the source code. This will generate 3 versions and its IFUNC
>> resolver for the ror32 function which is "arch=+zvbb,+zbb" and
>> "arch=+zbb" and the default version.
>> Signed-off-by: Yangyu Chen <[email protected]>
>> gcc/ChangeLog:
>> * Makefile.in: Add -DTARGET_NAME to CFLAGS-multiple_target.o.
>> * common.opt: Add target clone table option.
>> * multiple_target.cc (expand_target_clones): Add support for
>> target_clones table option.
>> (node_versionable_function_p): New function to check if a function
>> can be versioned.
>> (init_clone_map): Ditto.
>> (ipa_target_clone): Ditto.
>> gcc/testsuite/ChangeLog:
>> * gcc.target/i386/tct-0.c: New test.
>> * gcc.target/i386/tct-0.json: New test.
>> ---
>> gcc/Makefile.in | 1 +
>> gcc/common.opt | 7 ++
>> gcc/multiple_target.cc | 138 +++++++++++++++++++++--
>> gcc/testsuite/gcc.target/i386/tct-0.c | 11 ++
>> gcc/testsuite/gcc.target/i386/tct-0.json | 5 +
>> 5 files changed, 152 insertions(+), 10 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.target/i386/tct-0.c
>> create mode 100644 gcc/testsuite/gcc.target/i386/tct-0.json
>> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>> index 4503dab6037..99ed204f036 100644
>> --- a/gcc/Makefile.in
>> +++ b/gcc/Makefile.in
>> @@ -2694,6 +2694,7 @@ s-bversion: BASE-VER
>> $(STAMP) s-bversion
>> CFLAGS-toplev.o += -DTARGET_NAME=\"$(target_noncanonical)\"
>> +CFLAGS-multiple_target.o += -DTARGET_NAME=\"$(target_noncanonical)\"
>> CFLAGS-tree-diagnostic-client-data-hooks.o +=
>> -DTARGET_NAME=\"$(target_noncanonical)\"
>> CFLAGS-optinfo-emit-json.o += -DTARGET_NAME=\"$(target_noncanonical)\"
>> $(ZLIBINC)
>> CFLAGS-analyzer/engine.o += $(ZLIBINC)
>> diff --git a/gcc/common.opt b/gcc/common.opt
>> index f6d93dc05fb..102e03496b8 100644
>> --- a/gcc/common.opt
>> +++ b/gcc/common.opt
>> @@ -2718,6 +2718,13 @@ fprofile-reorder-functions
>> Common Var(flag_profile_reorder_functions) Optimization
>> Enable function reordering that improves code placement.
>> +ftarget-clones-table=
>> +Common Joined RejectNegative Var(target_clones_table)
>> +Enable target clones attributes specified in the JSON file.
>> +
>> +Variable
>> +const char *target_clones_table = NULL
>> +
>> fpatchable-function-entry=
>> Common Var(flag_patchable_function_entry) Joined Optimization
>> Insert NOP instructions at each function entry.
>> diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc
>> index e85d7e71442..33014449210 100644
>> --- a/gcc/multiple_target.cc
>> +++ b/gcc/multiple_target.cc
>> @@ -21,6 +21,10 @@ along with GCC; see the file COPYING3. If not see
>> <http://www.gnu.org/licenses/>. */
>> #include "config.h"
>> +#include <fstream>
>> +#define INCLUDE_MAP
>> +#define INCLUDE_STRING
>> +#define INCLUDE_SSTREAM
>> #include "system.h"
>> #include "coretypes.h"
>> #include "backend.h"
>> @@ -38,6 +42,7 @@ along with GCC; see the file COPYING3. If not see
>> #include "gimple-walk.h"
>> #include "tree-inline.h"
>> #include "intl.h"
>> +#include "json-parsing.h"
>> /* Walker callback that replaces all FUNCTION_DECL of a function that's
>> going to be versioned. */
>> @@ -252,25 +257,46 @@ create_target_clone (cgraph_node *node, bool
>> definition, char *name,
>> return new_node;
>> }
>> +/* Skip functions that are declared but not defined. Also skip
>> + C++ virtual functions, as they cannot be cloned. */
>> +static bool node_versionable_function_p (cgraph_node *node)
>> +{
>> + return (!node->definition
>> + || (!node->alias && tree_versionable_function_p (node->decl)))
>> + && !DECL_VIRTUAL_P (node->decl);
>> +}
>> +
>> /* If the function in NODE has multiple target attributes
>> create the appropriate clone for each valid target attribute. */
>> static bool
>> -expand_target_clones (struct cgraph_node *node, bool definition)
>> +expand_target_clones (struct cgraph_node *node, bool definition,
>> + std::map <std::string, auto_vec<string_slice> >
>> + &clone_map)> {
>> - /* Parsing target attributes separated by TARGET_CLONES_ATTR_SEPARATOR.
>> */
>> - tree attr_target = lookup_attribute ("target_clones",
>> - DECL_ATTRIBUTES (node->decl));
>> - /* No targets specified. */
>> - if (!attr_target)
>> - return false;
>> -
>> int num_defaults = 0;
>> auto_vec<string_slice> attr_list = get_clone_versions (node->decl,
>> &num_defaults);
>> + /* Skip functions that are declared but not defined. */
>> + if (DECL_INITIAL (node->decl) != NULL_TREE)
>> + {
>> + auto it = clone_map.find (IDENTIFIER_POINTER (
>> + DECL_ASSEMBLER_NAME_RAW (node->decl)));
>> + if (it != clone_map.end () && node_versionable_function_p (node))
>> + {
>> + for (string_slice attr : it->second)
>> + attr_list.safe_push (attr);
>> +
>> + if (num_defaults == 0) /* No default in the source, add one. */
>> + {
>> + attr_list.safe_push ("default");
>> + num_defaults = 1;
>> + }
>> + }
>> + }
>> /* If the target clones list is empty after filtering, remove this
>> node. */
>> - if (!TARGET_HAS_FMV_TARGET_ATTRIBUTE && attr_list.is_empty ())
>> + if (!TARGET_HAS_FMV_TARGET_ATTRIBUTE || attr_list.is_empty ())
> This seems wrong? I think this means that we will always delete the node for
> targets with target_version semantics?> {
Indeed. I think it should be:
if (attr_list.is_empty ())
{
if (!TARGET_HAS_FMV_TARGET_ATTRIBUTE)
node->remove ();
return false;
}
Thanks,
Yangyu Chen
>> node->remove ();
>> return false;
>> @@ -506,11 +532,103 @@ is_simple_target_clones_case (cgraph_node *node)
>> return true;
>> }
>> +/* Initialize the clone map from the target clone table JSON file.
>> Specified
>> + by the -ftarget-clone-table option. The map is a mapping from symbol
>> name
>> + to a string with target clones attributes separated by
>> + TARGET_CLONES_ATTR_SEPARATOR. */
>> +static std::map <std::string, auto_vec<string_slice> >
>> +init_clone_map (void)
>> +{
>> + std::map <std::string, auto_vec<string_slice> > res;
>> + if (! target_clones_table)
>> + return res;
>> +
>> + /* Take target string from TARGET_NAME, this macro looks like
>> + "x86_64-linux-gnu" and we need to strip all the suffixes
>> + after the first dash, so it becomes "x86_64". */
>> + std::string target = TARGET_NAME;
>> + if (target.find ('-') != std::string::npos)
>> + target.erase (target.find ('-'));
>> +
>> + /* Open the target clone table file and read to a string. */
>> + std::ifstream json_file (target_clones_table);
>> + if (json_file.fail ())
>> + {
>> + error ("cannot open target clone table file %s",
>> + target_clones_table);
>> + return res;
>> + }
>> + std::stringstream ss_buf;
>> + ss_buf << json_file.rdbuf ();
>> + std::string json_str = ss_buf.str ();
>> +
>> + /* Parse the JSON string.
>> + The JSON string format looks like this:
>> + {
>> + "symbol_name1": {
>> + "target1": ["clone1", "clone2", ...],
>> + "target2": ["clone1", "clone2", ...],
>> + },
>> + ...
>> + }
>> + where symbol_name is the ASM name of the function mangled by the
>> + frontend. The target1 and target2 are the targets, which can be
>> + "x86_64", "aarch64", "riscv64", etc. The clone1, clone2, etc are the
>> + target clones attributes, which can be "avx2", "avx512" etc. Note that
>> + there is no need to specify the "default" target clone, it is
>> + automatically added by the pass. */
>> + json::parser_result_t result = json::parse_utf8_string (
>> + json_str.size (), json_str.c_str (), true, NULL);
>> + if (auto json_err = result.m_err.get ())
>> + {
>> + error ("error parsing target clone table file %s: %s",
>> + target_clones_table, json_err->get_msg ());
>> + return res;
>> + }
>> +
>> + auto json_val = result.m_val.get ();
>> + auto kind = json_val->get_kind ();
>> + if (kind != json::JSON_OBJECT)
>> + {
>> + error ("target clone table file %s is not a JSON object",
>> + target_clones_table);
>> + return res;
>> + }
>> + auto json_obj = static_cast<const json::object *> (json_val);
>> + unsigned i;
>> + const char *symbol_name;
>> + FOR_EACH_VEC_ELT (*json_obj, i, symbol_name)
>> + {
>> + auto symbol_val = json_obj->get (symbol_name);
>> + if (!symbol_val || symbol_val->get_kind () != json::JSON_OBJECT)
>> + continue;
>> + auto symbol_obj = static_cast<const json::object *> (symbol_val);
>> + auto cur_target_val = symbol_obj->get (target.c_str ());
>> + if (!cur_target_val
>> + || cur_target_val->get_kind () != json::JSON_ARRAY)
>> + continue;
>> + auto cur_target_array = static_cast<const json::array *>
>> + (cur_target_val);
>> + for (unsigned j = 0; j < cur_target_array->length (); j++)
>> + {
>> + auto target_str_val = cur_target_array->get (j);
>> + if (target_str_val->get_kind () != json::JSON_STRING)
>> + error ("target clones attribute is not a string");
>> + res[symbol_name].safe_push (string_slice (
>> + ggc_strdup (static_cast<const json::string *>
>> + (target_str_val)->get_string ())));
>> + }
>> + }
>> + return res;
>> +}
>> +
>> static unsigned int
>> ipa_target_clone (bool early)
>> {
>> struct cgraph_node *node;
>> auto_vec<cgraph_node *> to_dispatch;
>> + std::map <std::string, auto_vec<string_slice> > clone_map
>> + = init_clone_map ();
>> /* Don't need to do anything early for target attribute semantics. */
>> if (early && TARGET_HAS_FMV_TARGET_ATTRIBUTE)
>> @@ -543,7 +661,7 @@ ipa_target_clone (bool early)
>> the simple case. Simple cases are dispatched in the later stage. */
>> if (early == !is_simple_target_clones_case (node))
>> - if (expand_target_clones (node, node->definition)
>> + if (expand_target_clones (node, node->definition, clone_map)
>> && TARGET_HAS_FMV_TARGET_ATTRIBUTE)
>> /* In non target_version semantics, dispatch all target clones. */
>> to_dispatch.safe_push (node);
>> diff --git a/gcc/testsuite/gcc.target/i386/tct-0.c
>> b/gcc/testsuite/gcc.target/i386/tct-0.c
>> new file mode 100644
>> index 00000000000..0de0938e9fd
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/tct-0.c
>> @@ -0,0 +1,11 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-ifunc "" } */
>> +/* { dg-options
>> "-ftarget-clones-table=${srcdir}/gcc.target/i386/tct-0.json" } */
>> +/* { dg-final { scan-assembler "foo\.default" } } */
>> +/* { dg-final { scan-assembler "foo\.arch_x86_64_v2" } } */
>> +/* { dg-final { scan-assembler "foo\.arch_x86_64_v3" } } */
>> +/* { dg-final { scan-assembler "foo\.arch_x86_64_v4" } } */
>> +
>> +void foo() {
>> +
>> +}
>> diff --git a/gcc/testsuite/gcc.target/i386/tct-0.json
>> b/gcc/testsuite/gcc.target/i386/tct-0.json
>> new file mode 100644
>> index 00000000000..9dc712e66e2
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/tct-0.json
>> @@ -0,0 +1,5 @@
>> +{
>> + "foo": {
>> + "x86_64": ["arch=x86-64-v2", "arch=x86-64-v3", "arch=x86-64-v4"]
>> + }
>> +}
>
> LGTM other than that one issue. But I cannot approve.