This patch adds support for target_clones table option. The target_clones table option allows users to specify multiple versions of a function and select the version at runtime based on the specified table.
The target clone table is a JSON object where function names serve as keys, and their values are nested objects. Each nested object maps architecture names to lists of target clone attributes. This structure allows specifying function clones for different architectures. Below is an example: ``` { "foo": { "x86_64": ["avx2", "avx512f"], "riscv64": ["arch=+v", "arch=+zba,+zbb", ...], ... // more architectures }, // more functions } ``` A example of the table is as follows on RISC-V: C source code "ror32.c": ``` void ror32(unsigned int *a, unsigned int b, unsigned long size) { for (unsigned long i = 0; i < size; i++) { a[i] = a[i] >> b | (a[i] << (32 - b)); } } ``` Table "ror32.target_clones": ``` { "ror32": { "riscv64": ["arch=+zvbb,+zbb", "arch=+zbb"] } } ``` Then use: gcc -O3 -ftarget-clones-table=ror32.target_clones -S ror32.c to compile the source code. This will generate 3 versions and its IFUNC resolver for the ror32 function which is "arch=+zvbb,+zbb" and "arch=+zbb" and the default version. Signed-off-by: Yangyu Chen <c...@cyyself.name> gcc/ChangeLog: * Makefile.in: Add -DTARGET_NAME to CFLAGS-multiple_target.o. * common.opt: Add target clone table option. * multiple_target.cc (expand_target_clones): Add support for target_clones table option. (init_clone_map): Ditto. (ipa_target_clone): Ditto. --- gcc/Makefile.in | 1 + gcc/common.opt | 7 +++ gcc/multiple_target.cc | 124 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 129 insertions(+), 3 deletions(-) diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 9535804f7fb..9bf633367e2 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2682,6 +2682,7 @@ s-bversion: BASE-VER $(STAMP) s-bversion CFLAGS-toplev.o += -DTARGET_NAME=\"$(target_noncanonical)\" +CFLAGS-multiple_target.o += -DTARGET_NAME=\"$(target_noncanonical)\" CFLAGS-tree-diagnostic-client-data-hooks.o += -DTARGET_NAME=\"$(target_noncanonical)\" CFLAGS-optinfo-emit-json.o += -DTARGET_NAME=\"$(target_noncanonical)\" $(ZLIBINC) CFLAGS-analyzer/engine.o += $(ZLIBINC) diff --git a/gcc/common.opt b/gcc/common.opt index a76a6920b54..45d49a92b5f 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2692,6 +2692,13 @@ fprofile-reorder-functions Common Var(flag_profile_reorder_functions) Optimization Enable function reordering that improves code placement. +ftarget-clones-table= +Common Joined RejectNegative Var(target_clones_table) +Enable target clones attributes specified in the JSON file. + +Variable +const char *target_clones_table = NULL + fpatchable-function-entry= Common Var(flag_patchable_function_entry) Joined Optimization Insert NOP instructions at each function entry. diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc index d25277c0a93..b5e52d5b91f 100644 --- a/gcc/multiple_target.cc +++ b/gcc/multiple_target.cc @@ -21,6 +21,10 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ #include "config.h" +#include <fstream> +#define INCLUDE_MAP +#define INCLUDE_STRING +#define INCLUDE_SSTREAM #include "system.h" #include "coretypes.h" #include "backend.h" @@ -38,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple-walk.h" #include "tree-inline.h" #include "intl.h" +#include "json-parsing.h" /* Walker callback that replaces all FUNCTION_DECL of a function that's going to be versioned. */ @@ -311,7 +316,8 @@ create_target_clone (cgraph_node *node, bool definition, char *name, create the appropriate clone for each valid target attribute. */ static bool -expand_target_clones (struct cgraph_node *node, bool definition) +expand_target_clones (struct cgraph_node *node, bool definition, + std::map <std::string, std::string> &clone_map) { int i; /* Parsing target attributes separated by TARGET_CLONES_ATTR_SEPARATOR. */ @@ -319,7 +325,24 @@ expand_target_clones (struct cgraph_node *node, bool definition) DECL_ATTRIBUTES (node->decl)); /* No targets specified. */ if (!attr_target) - return false; + { + /* Skip functions that are declared but not defined. */ + if (DECL_INITIAL (node->decl) != NULL_TREE) + { + auto it = clone_map.find (IDENTIFIER_POINTER ( + DECL_ASSEMBLER_NAME_RAW (node->decl))); + if (it != clone_map.end ()) + { + attr_target = make_attribute ("target_clones", + it->second.c_str (), + DECL_ATTRIBUTES (node->decl)); + } + else + return false; + } + else + return false; + } tree arglist = TREE_VALUE (attr_target); int attr_len = get_target_clone_attr_len (arglist); @@ -502,14 +525,109 @@ redirect_to_specific_clone (cgraph_node *node) } } +/* Initialize the clone map from the target clone table JSON file. Specified + by the -ftarget-clone-table option. The map is a mapping from symbol name + to a string with target clones attributes separated by + TARGET_CLONES_ATTR_SEPARATOR. */ +static std::map <std::string, std::string> +init_clone_map (void) +{ + std::map <std::string, std::string> res; + if (! target_clones_table) + return res; + + /* Take target string from TARGET_NAME, this macro looks like + "x86_64-linux-gnu" and we need to strip all the suffixes + after the first dash, so it becomes "x86_64". */ + std::string target = TARGET_NAME; + if (target.find ('-') != std::string::npos) + target.erase (target.find ('-')); + + /* Open the target clone table file and read to a string. */ + std::ifstream json_file (target_clones_table); + if (json_file.fail ()) + { + error ("cannot open target clone table file %s", + target_clones_table); + return res; + } + std::stringstream ss_buf; + ss_buf << json_file.rdbuf (); + std::string json_str = ss_buf.str (); + + /* Parse the JSON string. + The JSON string format looks like this: + { + "symbol_name1": { + "target1": ["clone1", "clone2", ...], + "target2": ["clone1", "clone2", ...], + }, + ... + } + where symbol_name is the ASM name of the function mangled by the + frontend. The target1 and target2 are the targets, which can be + "x86_64", "aarch64", "riscv64", etc. The clone1, clone2, etc are the + target clones attributes, which can be "avx2", "avx512" etc. Note that + there is no need to specify the "default" target clone, it is + automatically added by the pass. */ + json::parser_result_t result = json::parse_utf8_string ( + json_str.size (), json_str.c_str (), true, NULL); + if (auto json_err = result.m_err.get ()) + { + error ("error parsing target clone table file %s: %s", + target_clones_table, json_err->get_msg ()); + return res; + } + + auto json_val = result.m_val.get (); + auto kind = json_val->get_kind (); + if (kind != json::JSON_OBJECT) + { + error ("target clone table file %s is not a JSON object", + target_clones_table); + return res; + } + auto json_obj = static_cast<const json::object *> (json_val); + unsigned i; + const char *symbol_name; + FOR_EACH_VEC_ELT (*json_obj, i, symbol_name) + { + auto symbol_val = json_obj->get (symbol_name); + if (!symbol_val || symbol_val->get_kind () != json::JSON_OBJECT) + continue; + auto symbol_obj = static_cast<const json::object *> (symbol_val); + auto cur_target_val = symbol_obj->get (target.c_str ()); + if (!cur_target_val + || cur_target_val->get_kind () != json::JSON_ARRAY) + continue; + auto cur_target_array = static_cast<const json::array *> + (cur_target_val); + std::string target_clones_str = "default"; + for (unsigned j = 0; j < cur_target_array->length (); j++) + { + auto target_str_val = cur_target_array->get (j); + if (target_str_val->get_kind () != json::JSON_STRING) + error ("target clones attribute is not a string"); + target_clones_str += TARGET_CLONES_ATTR_SEPARATOR; + target_clones_str += static_cast<const json::string *> + (target_str_val)->get_string (); + } + res[symbol_name] = target_clones_str; + } + return res; +} + static unsigned int ipa_target_clone (void) { struct cgraph_node *node; auto_vec<cgraph_node *> to_dispatch; + std::map <std::string, std::string> clone_map + = init_clone_map (); + FOR_EACH_FUNCTION (node) - if (expand_target_clones (node, node->definition)) + if (expand_target_clones (node, node->definition, clone_map)) to_dispatch.safe_push (node); for (unsigned i = 0; i < to_dispatch.length (); i++) -- 2.49.0