> On 30 Jun 2025, at 17:55, Alfie Richards <alfie.richa...@arm.com> wrote:
>
> On 29/06/2025 19:57, Yangyu Chen wrote:
>> This patch adds support for target_clones table option. The
>> target_clones table option allows users to specify multiple versions
>> of a function and select the version at runtime based on the specified
>> table.
>> The target clone table is a JSON object where function names serve as
>> keys, and their values are nested objects. Each nested object maps
>> architecture names to lists of target clone attributes. This structure
>> allows specifying function clones for different architectures. Below is
>> an example:
>> ```
>> {
>> "foo": {
>> "x86_64": ["avx2", "avx512f"],
>> "riscv64": ["arch=+v", "arch=+zba,+zbb", ...],
>> ... // more architectures
>> },
>> // more functions
>> }
>> ```
>> A example of the table is as follows on RISC-V:
>> C source code "ror32.c":
>> ```
>> void ror32(unsigned int *a, unsigned int b, unsigned long size) {
>> for (unsigned long i = 0; i < size; i++) {
>> a[i] = a[i] >> b | (a[i] << (32 - b));
>> }
>> }
>> ```
>> Table "ror32.target_clones":
>> ```
>> {
>> "ror32": {
>> "riscv64": ["arch=+zvbb,+zbb", "arch=+zbb"]
>> }
>> }
>> ```
>> Then use: gcc -O3 -ftarget-clones-table=ror32.target_clones -S ror32.c
>> to compile the source code. This will generate 3 versions and its IFUNC
>> resolver for the ror32 function which is "arch=+zvbb,+zbb" and
>> "arch=+zbb" and the default version.
>> Signed-off-by: Yangyu Chen <c...@cyyself.name>
>> gcc/ChangeLog:
>> * Makefile.in: Add -DTARGET_NAME to CFLAGS-multiple_target.o.
>> * common.opt: Add target clone table option.
>> * multiple_target.cc (expand_target_clones): Add support for
>> target_clones table option.
>> (init_clone_map): Ditto.
>> (ipa_target_clone): Ditto.
>> ---
>> gcc/Makefile.in | 1 +
>> gcc/common.opt | 7 +++
>> gcc/multiple_target.cc | 124 ++++++++++++++++++++++++++++++++++++++++-
>> 3 files changed, 129 insertions(+), 3 deletions(-)
>> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>> index 9535804f7fb..9bf633367e2 100644
>> --- a/gcc/Makefile.in
>> +++ b/gcc/Makefile.in
>> @@ -2682,6 +2682,7 @@ s-bversion: BASE-VER
>> $(STAMP) s-bversion
>> CFLAGS-toplev.o += -DTARGET_NAME=\"$(target_noncanonical)\"
>> +CFLAGS-multiple_target.o += -DTARGET_NAME=\"$(target_noncanonical)\"
>> CFLAGS-tree-diagnostic-client-data-hooks.o +=
>> -DTARGET_NAME=\"$(target_noncanonical)\"
>> CFLAGS-optinfo-emit-json.o += -DTARGET_NAME=\"$(target_noncanonical)\"
>> $(ZLIBINC)
>> CFLAGS-analyzer/engine.o += $(ZLIBINC)
>> diff --git a/gcc/common.opt b/gcc/common.opt
>> index a76a6920b54..45d49a92b5f 100644
>> --- a/gcc/common.opt
>> +++ b/gcc/common.opt
>> @@ -2692,6 +2692,13 @@ fprofile-reorder-functions
>> Common Var(flag_profile_reorder_functions) Optimization
>> Enable function reordering that improves code placement.
>> +ftarget-clones-table=
>> +Common Joined RejectNegative Var(target_clones_table)
>> +Enable target clones attributes specified in the JSON file.
>> +
>> +Variable
>> +const char *target_clones_table = NULL
>> +
>> fpatchable-function-entry=
>> Common Var(flag_patchable_function_entry) Joined Optimization
>> Insert NOP instructions at each function entry.
>> diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc
>> index d25277c0a93..b5e52d5b91f 100644
>> --- a/gcc/multiple_target.cc
>> +++ b/gcc/multiple_target.cc
>> @@ -21,6 +21,10 @@ along with GCC; see the file COPYING3. If not see
>> <http://www.gnu.org/licenses/>. */
>> #include "config.h"
>> +#include <fstream>
>> +#define INCLUDE_MAP
>> +#define INCLUDE_STRING
>> +#define INCLUDE_SSTREAM
>> #include "system.h"
>> #include "coretypes.h"
>> #include "backend.h"
>> @@ -38,6 +42,7 @@ along with GCC; see the file COPYING3. If not see
>> #include "gimple-walk.h"
>> #include "tree-inline.h"
>> #include "intl.h"
>> +#include "json-parsing.h"
>> /* Walker callback that replaces all FUNCTION_DECL of a function that's
>> going to be versioned. */
>> @@ -311,7 +316,8 @@ create_target_clone (cgraph_node *node, bool definition,
>> char *name,
>> create the appropriate clone for each valid target attribute. */
>> static bool
>> -expand_target_clones (struct cgraph_node *node, bool definition)
>> +expand_target_clones (struct cgraph_node *node, bool definition,
>> + std::map <std::string, std::string> &clone_map)
>> {
>> int i;
>> /* Parsing target attributes separated by TARGET_CLONES_ATTR_SEPARATOR.
>> */
>> @@ -319,7 +325,24 @@ expand_target_clones (struct cgraph_node *node, bool
>> definition)
>> DECL_ATTRIBUTES (node->decl));
>> /* No targets specified. */
>> if (!attr_target)
>> - return false;
>> + {
>
> This seems to only apply the file specified versions if there is no attribute
> in the source code. I think this could be confusing to users.
>
> My preference would be to merge the source code attr versions and the JSON
> file versions, but if you would rather not do that then I think we should at
> least issue a warning acknowledging the conflict.
>
Indeed! I think we should merge them and warn user this behavior exists.
>> + /* Skip functions that are declared but not defined. */
>> + if (DECL_INITIAL (node->decl) != NULL_TREE)
>> + {
>> + auto it = clone_map.find (IDENTIFIER_POINTER (
>> + DECL_ASSEMBLER_NAME_RAW (node->decl)));
>> + if (it != clone_map.end ())
>> + {
>> + attr_target = make_attribute ("target_clones",
>> + it->second.c_str (),
>> + DECL_ATTRIBUTES (node->decl));
>> + }
>> + else
>> + return false;
>> + }
>> + else
>> + return false;
>> + }
>> tree arglist = TREE_VALUE (attr_target);
>> int attr_len = get_target_clone_attr_len (arglist);
>> @@ -502,14 +525,109 @@ redirect_to_specific_clone (cgraph_node *node)
>> }
>> }
>> +/* Initialize the clone map from the target clone table JSON file.
>> Specified
>> + by the -ftarget-clone-table option. The map is a mapping from symbol
>> name
>> + to a string with target clones attributes separated by
>> + TARGET_CLONES_ATTR_SEPARATOR. */
>
> I would rather this return something like:
> std::map <std::string, auto_vec<std::string>>
> with a string for each version, to avoid reparsing the newly created string
> later? But this isn't a deal breaker for me.
It requires modifying existing code for parsing the string. I think we can
leave it for future work.
Thanks,
Yangyu Chen
>
>> +static std::map <std::string, std::string>
>> +init_clone_map (void)
>> +{
>> + std::map <std::string, std::string> res;
>> + if (! target_clones_table)
>> + return res;
>> +
>> + /* Take target string from TARGET_NAME, this macro looks like
>> + "x86_64-linux-gnu" and we need to strip all the suffixes
>> + after the first dash, so it becomes "x86_64". */
>> + std::string target = TARGET_NAME;
>> + if (target.find ('-') != std::string::npos)
>> + target.erase (target.find ('-'));
>> +
>> + /* Open the target clone table file and read to a string. */
>> + std::ifstream json_file (target_clones_table);
>> + if (json_file.fail ())
>> + {
>> + error ("cannot open target clone table file %s",
>> + target_clones_table);
>> + return res;
>> + }
>> + std::stringstream ss_buf;
>> + ss_buf << json_file.rdbuf ();
>> + std::string json_str = ss_buf.str ();
>> +
>> + /* Parse the JSON string.
>> + The JSON string format looks like this:
>> + {
>> + "symbol_name1": {
>> + "target1": ["clone1", "clone2", ...],
>> + "target2": ["clone1", "clone2", ...],
>> + },
>> + ...
>> + }
>> + where symbol_name is the ASM name of the function mangled by the
>> + frontend. The target1 and target2 are the targets, which can be
>> + "x86_64", "aarch64", "riscv64", etc. The clone1, clone2, etc are the
>> + target clones attributes, which can be "avx2", "avx512" etc. Note that
>> + there is no need to specify the "default" target clone, it is
>> + automatically added by the pass. */
>> + json::parser_result_t result = json::parse_utf8_string (
>> + json_str.size (), json_str.c_str (), true, NULL);
>> + if (auto json_err = result.m_err.get ())
>> + {
>> + error ("error parsing target clone table file %s: %s",
>> + target_clones_table, json_err->get_msg ());
>> + return res;
>> + }
>> +
>> + auto json_val = result.m_val.get ();
>> + auto kind = json_val->get_kind ();
>> + if (kind != json::JSON_OBJECT)
>> + {
>> + error ("target clone table file %s is not a JSON object",
>> + target_clones_table);
>> + return res;
>> + }
>> + auto json_obj = static_cast<const json::object *> (json_val);
>> + unsigned i;
>> + const char *symbol_name;
>> + FOR_EACH_VEC_ELT (*json_obj, i, symbol_name)
>> + {
>> + auto symbol_val = json_obj->get (symbol_name);
>> + if (!symbol_val || symbol_val->get_kind () != json::JSON_OBJECT)
>> + continue;
>> + auto symbol_obj = static_cast<const json::object *> (symbol_val);
>> + auto cur_target_val = symbol_obj->get (target.c_str ());
>> + if (!cur_target_val
>> + || cur_target_val->get_kind () != json::JSON_ARRAY)
>> + continue;
>> + auto cur_target_array = static_cast<const json::array *>
>> + (cur_target_val);
>> + std::string target_clones_str = "default";
>> + for (unsigned j = 0; j < cur_target_array->length (); j++)
>> + {
>> + auto target_str_val = cur_target_array->get (j);
>> + if (target_str_val->get_kind () != json::JSON_STRING)
>> + error ("target clones attribute is not a string");
>> + target_clones_str += TARGET_CLONES_ATTR_SEPARATOR;
>> + target_clones_str += static_cast<const json::string *>
>> + (target_str_val)->get_string ();
>> + }
>> + res[symbol_name] = target_clones_str;
>> + }
>> + return res;
>> +}
>> +
>> static unsigned int
>> ipa_target_clone (void)
>> {
>> struct cgraph_node *node;
>> auto_vec<cgraph_node *> to_dispatch;
>> + std::map <std::string, std::string> clone_map
>> + = init_clone_map ();
>> +
>> FOR_EACH_FUNCTION (node)
>> - if (expand_target_clones (node, node->definition))
>> + if (expand_target_clones (node, node->definition, clone_map))
>> to_dispatch.safe_push (node);
>> for (unsigned i = 0; i < to_dispatch.length (); i++)