Prior to this commit GCC -O2 generated quite bad code for this function: bool f() { return __builtin_cpu_supports("popcnt") && __builtin_cpu_supports("ssse3"); }
f: movl __cpu_model+12(%rip), %eax xorl %r8d, %r8d testb $4, %al je .L1 shrl $6, %eax movl %eax, %r8d andl $1, %r8d .L1: movl %r8d, %eax ret The problem was caused by the fact that internally every invocation of __builtin_cpu_supports built a new variable __cpu_model and a new type __processor_model. Because of this GIMPLE level optimizers weren't able to CSE the loads of __cpu_model and optimize bit-operations properly. This commit fixes the problem by caching created __cpu_model variable and __processor_model type. Now the GCC -O2 generates: f: movl __cpu_model+12(%rip), %eax andl $68, %eax cmpl $68, %eax sete %al ret gcc/ChangeLog: PR target/91400 * config/i386/i386-builtins.c (fold_builtin_cpu): Extract building of __cpu_model and __processor_model into new function. * config/i386/i386-builtins.c (init_cpu_model_var): New. Cache creation of __cpu_model and __processor_model. gcc/testsuite/Changelog: PR target/91400 * gcc.target/i386/pr91400.c: New. --- gcc/config/i386/i386-builtins.c | 27 ++++++++++++++++++------- gcc/testsuite/gcc.target/i386/pr91400.c | 11 ++++++++++ 2 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr91400.c diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c index 4fcdf4b89ee..96534318756 100644 --- a/gcc/config/i386/i386-builtins.c +++ b/gcc/config/i386/i386-builtins.c @@ -2085,6 +2085,25 @@ make_var_decl (tree type, const char *name) return new_decl; } +static GTY(()) tree __cpu_model_var; +static GTY(()) tree __processor_model_type; + +static void +init_cpu_model_var() +{ + if (__cpu_model_var != NULL_TREE) + { + gcc_assert(__processor_model_type != NULL_TREE); + return; + } + + __processor_model_type = build_processor_model_struct (); + __cpu_model_var = make_var_decl (__processor_model_type, + "__cpu_model"); + + varpool_node::add (__cpu_model_var); +} + /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded into an integer defined in libgcc/config/i386/cpuinfo.c */ @@ -2096,13 +2115,7 @@ fold_builtin_cpu (tree fndecl, tree *args) = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); tree param_string_cst = NULL; - tree __processor_model_type = build_processor_model_struct (); - tree __cpu_model_var = make_var_decl (__processor_model_type, - "__cpu_model"); - - - varpool_node::add (__cpu_model_var); - + init_cpu_model_var (); gcc_assert ((args != NULL) && (*args != NULL)); param_string_cst = *args; diff --git a/gcc/testsuite/gcc.target/i386/pr91400.c b/gcc/testsuite/gcc.target/i386/pr91400.c new file mode 100644 index 00000000000..e8b7d9285f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91400.c @@ -0,0 +1,11 @@ +/* PR target/91400 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "andl" 1 } } */ +/* { dg-final { scan-assembler-times "68" 2 } } */ +/* { dg-final { scan-assembler-not "je" } } */ + +_Bool f() +{ + return __builtin_cpu_supports("popcnt") && __builtin_cpu_supports("ssse3"); +} -- 2.25.1