> On 30 Jun 2025, at 17:55, Alfie Richards <alfie.richa...@arm.com> wrote:
> 
> On 29/06/2025 19:57, Yangyu Chen wrote:
>> This patch adds support for target_clones table option. The
>> target_clones table option allows users to specify multiple versions
>> of a function and select the version at runtime based on the specified
>> table.
>> The target clone table is a JSON object where function names serve as
>> keys, and their values are nested objects. Each nested object maps
>> architecture names to lists of target clone attributes. This structure
>> allows specifying function clones for different architectures. Below is
>> an example:
>> ```
>> {
>>   "foo": {
>>     "x86_64": ["avx2", "avx512f"],
>>     "riscv64": ["arch=+v", "arch=+zba,+zbb", ...],
>>     ... // more architectures
>>   },
>>   // more functions
>> }
>> ```
>> A example of the table is as follows on RISC-V:
>> C source code "ror32.c":
>> ```
>> void ror32(unsigned int *a, unsigned int b, unsigned long size) {
>>   for (unsigned long i = 0; i < size; i++) {
>>     a[i] = a[i] >> b | (a[i] << (32 - b));
>>   }
>> }
>> ```
>> Table "ror32.target_clones":
>> ```
>> {
>>   "ror32": {
>>     "riscv64": ["arch=+zvbb,+zbb", "arch=+zbb"]
>>   }
>> }
>> ```
>> Then use: gcc -O3 -ftarget-clones-table=ror32.target_clones -S ror32.c
>> to compile the source code. This will generate 3 versions and its IFUNC
>> resolver for the ror32 function which is "arch=+zvbb,+zbb" and
>> "arch=+zbb" and the default version.
>> Signed-off-by: Yangyu Chen <c...@cyyself.name>
>> gcc/ChangeLog:
>> * Makefile.in: Add -DTARGET_NAME to CFLAGS-multiple_target.o.
>> * common.opt: Add target clone table option.
>> * multiple_target.cc (expand_target_clones): Add support for
>> target_clones table option.
>> (init_clone_map): Ditto.
>> (ipa_target_clone): Ditto.
>> ---
>>  gcc/Makefile.in        |   1 +
>>  gcc/common.opt         |   7 +++
>>  gcc/multiple_target.cc | 124 ++++++++++++++++++++++++++++++++++++++++-
>>  3 files changed, 129 insertions(+), 3 deletions(-)
>> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>> index 9535804f7fb..9bf633367e2 100644
>> --- a/gcc/Makefile.in
>> +++ b/gcc/Makefile.in
>> @@ -2682,6 +2682,7 @@ s-bversion: BASE-VER
>>   $(STAMP) s-bversion
>>    CFLAGS-toplev.o += -DTARGET_NAME=\"$(target_noncanonical)\"
>> +CFLAGS-multiple_target.o += -DTARGET_NAME=\"$(target_noncanonical)\"
>>  CFLAGS-tree-diagnostic-client-data-hooks.o += 
>> -DTARGET_NAME=\"$(target_noncanonical)\"
>>  CFLAGS-optinfo-emit-json.o += -DTARGET_NAME=\"$(target_noncanonical)\" 
>> $(ZLIBINC)
>>  CFLAGS-analyzer/engine.o += $(ZLIBINC)
>> diff --git a/gcc/common.opt b/gcc/common.opt
>> index a76a6920b54..45d49a92b5f 100644
>> --- a/gcc/common.opt
>> +++ b/gcc/common.opt
>> @@ -2692,6 +2692,13 @@ fprofile-reorder-functions
>>  Common Var(flag_profile_reorder_functions) Optimization
>>  Enable function reordering that improves code placement.
>>  +ftarget-clones-table=
>> +Common Joined RejectNegative Var(target_clones_table)
>> +Enable target clones attributes specified in the JSON file.
>> +
>> +Variable
>> +const char *target_clones_table = NULL
>> +
>>  fpatchable-function-entry=
>>  Common Var(flag_patchable_function_entry) Joined Optimization
>>  Insert NOP instructions at each function entry.
>> diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc
>> index d25277c0a93..b5e52d5b91f 100644
>> --- a/gcc/multiple_target.cc
>> +++ b/gcc/multiple_target.cc
>> @@ -21,6 +21,10 @@ along with GCC; see the file COPYING3.  If not see
>>  <http://www.gnu.org/licenses/>.  */
>>    #include "config.h"
>> +#include <fstream>
>> +#define INCLUDE_MAP
>> +#define INCLUDE_STRING
>> +#define INCLUDE_SSTREAM
>>  #include "system.h"
>>  #include "coretypes.h"
>>  #include "backend.h"
>> @@ -38,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "gimple-walk.h"
>>  #include "tree-inline.h"
>>  #include "intl.h"
>> +#include "json-parsing.h"
>>    /* Walker callback that replaces all FUNCTION_DECL of a function that's
>>     going to be versioned.  */
>> @@ -311,7 +316,8 @@ create_target_clone (cgraph_node *node, bool definition, 
>> char *name,
>>     create the appropriate clone for each valid target attribute.  */
>>    static bool
>> -expand_target_clones (struct cgraph_node *node, bool definition)
>> +expand_target_clones (struct cgraph_node *node, bool definition,
>> +       std::map <std::string, std::string> &clone_map)
>>  {
>>    int i;
>>    /* Parsing target attributes separated by TARGET_CLONES_ATTR_SEPARATOR.  
>> */
>> @@ -319,7 +325,24 @@ expand_target_clones (struct cgraph_node *node, bool 
>> definition)
>>          DECL_ATTRIBUTES (node->decl));
>>    /* No targets specified.  */
>>    if (!attr_target)
>> -    return false;
>> +    {
> 
> This seems to only apply the file specified versions if there is no attribute 
> in the source code. I think this could be confusing to users.
> 
> My preference would be to merge the source code attr versions and the JSON 
> file versions, but if you would rather not do that then I think we should at 
> least issue a warning acknowledging the conflict.
> 

Indeed! I think we should merge them and warn user this behavior exists.

>> +      /* Skip functions that are declared but not defined.  */
>> +      if (DECL_INITIAL (node->decl) != NULL_TREE)
>> + {
>> +   auto it = clone_map.find (IDENTIFIER_POINTER (
>> +     DECL_ASSEMBLER_NAME_RAW (node->decl)));
>> +   if (it != clone_map.end ())
>> +     {
>> +       attr_target = make_attribute ("target_clones",
>> +     it->second.c_str (),
>> +     DECL_ATTRIBUTES (node->decl));
>> +     }
>> +   else
>> +     return false;
>> + }
>> +      else
>> + return false;
>> +    }
>>      tree arglist = TREE_VALUE (attr_target);
>>    int attr_len = get_target_clone_attr_len (arglist);
>> @@ -502,14 +525,109 @@ redirect_to_specific_clone (cgraph_node *node)
>>      }
>>  }
>>  +/* Initialize the clone map from the target clone table JSON file.  
>> Specified
>> +   by the -ftarget-clone-table option.  The map is a mapping from symbol 
>> name
>> +   to a string with target clones attributes separated by
>> +   TARGET_CLONES_ATTR_SEPARATOR.  */
> 
> I would rather this return something like:
> std::map <std::string, auto_vec<std::string>>
> with a string for each version, to avoid reparsing the newly created string 
> later? But this isn't a deal breaker for me.

It requires modifying existing code for parsing the string. I think we can 
leave it for future work.

Thanks,
Yangyu Chen

> 
>> +static std::map <std::string, std::string>
>> +init_clone_map (void)
>> +{
>> +  std::map <std::string, std::string> res;
>> +  if (! target_clones_table)
>> +    return res;
>> +
>> +  /* Take target string from TARGET_NAME, this macro looks like
>> +     "x86_64-linux-gnu" and we need to strip all the suffixes
>> +     after the first dash, so it becomes "x86_64".  */
>> +  std::string target = TARGET_NAME;
>> +  if (target.find ('-') != std::string::npos)
>> +    target.erase (target.find ('-'));
>> +
>> +  /* Open the target clone table file and read to a string.  */
>> +  std::ifstream json_file (target_clones_table);
>> +  if (json_file.fail ())
>> +    {
>> +      error ("cannot open target clone table file %s",
>> +      target_clones_table);
>> +      return res;
>> +    }
>> +  std::stringstream ss_buf;
>> +  ss_buf << json_file.rdbuf ();
>> +  std::string json_str = ss_buf.str ();
>> +
>> +  /* Parse the JSON string.
>> +     The JSON string format looks like this:
>> +     {
>> +       "symbol_name1": {
>> +  "target1": ["clone1", "clone2", ...],
>> +  "target2": ["clone1", "clone2", ...],
>> +       },
>> +       ...
>> +     }
>> +     where symbol_name is the ASM name of the function mangled by the
>> +     frontend.  The target1 and target2 are the targets, which can be
>> +     "x86_64", "aarch64", "riscv64", etc.  The clone1, clone2, etc are the
>> +     target clones attributes, which can be "avx2", "avx512" etc.  Note that
>> +     there is no need to specify the "default" target clone, it is
>> +     automatically added by the pass.  */
>> +  json::parser_result_t result = json::parse_utf8_string (
>> +    json_str.size (), json_str.c_str (), true, NULL);
>> +  if (auto json_err = result.m_err.get ())
>> +    {
>> +      error ("error parsing target clone table file %s: %s",
>> +      target_clones_table, json_err->get_msg ());
>> +      return res;
>> +    }
>> +
>> +  auto json_val = result.m_val.get ();
>> +  auto kind = json_val->get_kind ();
>> +  if (kind != json::JSON_OBJECT)
>> +    {
>> +      error ("target clone table file %s is not a JSON object",
>> +      target_clones_table);
>> +      return res;
>> +    }
>> +  auto json_obj = static_cast<const json::object *> (json_val);
>> +  unsigned i;
>> +  const char *symbol_name;
>> +  FOR_EACH_VEC_ELT (*json_obj, i, symbol_name)
>> +    {
>> +      auto symbol_val = json_obj->get (symbol_name);
>> +      if (!symbol_val || symbol_val->get_kind () != json::JSON_OBJECT)
>> + continue;
>> +      auto symbol_obj = static_cast<const json::object *> (symbol_val);
>> +      auto cur_target_val = symbol_obj->get (target.c_str ());
>> +      if (!cur_target_val
>> +   || cur_target_val->get_kind () != json::JSON_ARRAY)
>> + continue;
>> +      auto cur_target_array = static_cast<const json::array *>
>> + (cur_target_val);
>> +      std::string target_clones_str = "default";
>> +      for (unsigned j = 0; j < cur_target_array->length (); j++)
>> + {
>> +   auto target_str_val = cur_target_array->get (j);
>> +   if (target_str_val->get_kind () != json::JSON_STRING)
>> +     error ("target clones attribute is not a string");
>> +   target_clones_str += TARGET_CLONES_ATTR_SEPARATOR;
>> +   target_clones_str += static_cast<const json::string *>
>> +     (target_str_val)->get_string ();
>> + }
>> +      res[symbol_name] = target_clones_str;
>> +    }
>> +  return res;
>> +}
>> +
>>  static unsigned int
>>  ipa_target_clone (void)
>>  {
>>    struct cgraph_node *node;
>>    auto_vec<cgraph_node *> to_dispatch;
>>  +  std::map <std::string, std::string> clone_map
>> +    = init_clone_map ();
>> +
>>    FOR_EACH_FUNCTION (node)
>> -    if (expand_target_clones (node, node->definition))
>> +    if (expand_target_clones (node, node->definition, clone_map))
>>        to_dispatch.safe_push (node);
>>      for (unsigned i = 0; i < to_dispatch.length (); i++)


Reply via email to