Prior to this commit GCC -O2 generated quite bad code for this
function:

bool f()
{
    return __builtin_cpu_supports("popcnt")
        && __builtin_cpu_supports("ssse3");
}

f:
        movl    __cpu_model+12(%rip), %eax
        xorl    %r8d, %r8d
        testb   $4, %al
        je      .L1
        shrl    $6, %eax
        movl    %eax, %r8d
        andl    $1, %r8d
.L1:
        movl    %r8d, %eax
        ret

The problem was caused by the fact that internally every invocation
of __builtin_cpu_supports built a new variable __cpu_model and a new
type __processor_model. Because of this GIMPLE level optimizers
weren't able to CSE the loads of __cpu_model and optimize
bit-operations properly.

This commit fixes the problem by caching created __cpu_model
variable and __processor_model type. Now the GCC -O2 generates:

f:
        movl    __cpu_model+12(%rip), %eax
        andl    $68, %eax
        cmpl    $68, %eax
        sete    %al
        ret

gcc/ChangeLog:

        PR target/91400
        * config/i386/i386-builtins.c (fold_builtin_cpu): Extract
        building of __cpu_model and __processor_model into new
        function.
        * config/i386/i386-builtins.c (init_cpu_model_var): New.
        Cache creation of __cpu_model and __processor_model.

gcc/testsuite/Changelog:

        PR target/91400
        * gcc.target/i386/pr91400.c: New.
---
 gcc/config/i386/i386-builtins.c         | 27 ++++++++++++++++++-------
 gcc/testsuite/gcc.target/i386/pr91400.c | 11 ++++++++++
 2 files changed, 31 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr91400.c

diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c
index 4fcdf4b89ee..96534318756 100644
--- a/gcc/config/i386/i386-builtins.c
+++ b/gcc/config/i386/i386-builtins.c
@@ -2085,6 +2085,25 @@ make_var_decl (tree type, const char *name)
   return new_decl;
 }
 
+static GTY(()) tree __cpu_model_var;
+static GTY(()) tree __processor_model_type;
+
+static void
+init_cpu_model_var()
+{
+  if (__cpu_model_var != NULL_TREE)
+    {
+      gcc_assert(__processor_model_type != NULL_TREE);
+      return;
+    }
+
+  __processor_model_type = build_processor_model_struct ();
+  __cpu_model_var = make_var_decl (__processor_model_type,
+                                  "__cpu_model");
+
+  varpool_node::add (__cpu_model_var);
+}
+
 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
    into an integer defined in libgcc/config/i386/cpuinfo.c */
 
@@ -2096,13 +2115,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
     = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
   tree param_string_cst = NULL;
 
-  tree __processor_model_type = build_processor_model_struct ();
-  tree __cpu_model_var = make_var_decl (__processor_model_type,
-                                       "__cpu_model");
-
-
-  varpool_node::add (__cpu_model_var);
-
+  init_cpu_model_var ();
   gcc_assert ((args != NULL) && (*args != NULL));
 
   param_string_cst = *args;
diff --git a/gcc/testsuite/gcc.target/i386/pr91400.c 
b/gcc/testsuite/gcc.target/i386/pr91400.c
new file mode 100644
index 00000000000..e8b7d9285f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr91400.c
@@ -0,0 +1,11 @@
+/* PR target/91400 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "andl" 1 } } */
+/* { dg-final { scan-assembler-times "68" 2 } } */
+/* { dg-final { scan-assembler-not "je" } } */
+
+_Bool f()
+{
+    return __builtin_cpu_supports("popcnt") && __builtin_cpu_supports("ssse3");
+}
-- 
2.25.1

Reply via email to