Hi!

This patch adds folding for the new ia32 md builtins.
If they can be folded into constant, it is done in ix86_fold_builtin,
if they can fold to corresponding generic __builtin_c[lt]z* (which have
e.g. the advantage that VRP knows about what values it can have etc.),
it is done in gimple_fold_builtin target hook.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-10-21  Jakub Jelinek  <ja...@redhat.com>

        PR target/78057
        * config/i386/i386.c: Include fold-const-call.h, tree-vrp.h
        and tree-ssanames.h.
        (ix86_fold_builtin): Fold IX86_BUILTIN_[LT]ZCNT{16,32,64}
        with INTEGER_CST argument.
        (ix86_gimple_fold_builtin): New function.
        (TARGET_GIMPLE_FOLD_BUILTIN): Define.

        * gcc.target/i386/pr78057.c: New test.

--- gcc/config/i386/i386.c.jj   2016-10-21 11:36:33.135677698 +0200
+++ gcc/config/i386/i386.c      2016-10-21 11:57:58.248530521 +0200
@@ -77,6 +77,9 @@ along with GCC; see the file COPYING3.
 #include "case-cfn-macros.h"
 #include "regrename.h"
 #include "dojump.h"
+#include "fold-const-call.h"
+#include "tree-vrp.h"
+#include "tree-ssanames.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -33332,6 +33335,40 @@ ix86_fold_builtin (tree fndecl, int n_ar
            return build_real (type, inf);
          }
 
+       case IX86_BUILTIN_TZCNT16:
+       case IX86_BUILTIN_TZCNT32:
+       case IX86_BUILTIN_TZCNT64:
+         gcc_assert (n_args == 1);
+         if (TREE_CODE (args[0]) == INTEGER_CST)
+           {
+             tree type = TREE_TYPE (TREE_TYPE (fndecl));
+             tree arg = args[0];
+             if (fn_code == IX86_BUILTIN_TZCNT16)
+               arg = fold_convert (short_unsigned_type_node, arg);
+             if (integer_zerop (arg))
+               return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
+             else
+               return fold_const_call (CFN_CTZ, type, arg);
+           }
+         break;
+
+       case IX86_BUILTIN_LZCNT16:
+       case IX86_BUILTIN_LZCNT32:
+       case IX86_BUILTIN_LZCNT64:
+         gcc_assert (n_args == 1);
+         if (TREE_CODE (args[0]) == INTEGER_CST)
+           {
+             tree type = TREE_TYPE (TREE_TYPE (fndecl));
+             tree arg = args[0];
+             if (fn_code == IX86_BUILTIN_LZCNT16)
+               arg = fold_convert (short_unsigned_type_node, arg);
+             if (integer_zerop (arg))
+               return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
+             else
+               return fold_const_call (CFN_CLZ, type, arg);
+           }
+         break;
+
        default:
          break;
        }
@@ -33344,6 +33381,67 @@ ix86_fold_builtin (tree fndecl, int n_ar
   return NULL_TREE;
 }
 
+/* Fold a MD builtin (use ix86_fold_builtin for folding into
+   constant) in GIMPLE.  */
+
+bool
+ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+{
+  gimple *stmt = gsi_stmt (*gsi);
+  tree fndecl = gimple_call_fndecl (stmt);
+  gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
+  int n_args = gimple_call_num_args (stmt);
+  enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE 
(fndecl);
+  tree decl = NULL_TREE;
+  tree arg0;
+
+  switch (fn_code)
+    {
+    case IX86_BUILTIN_TZCNT32:
+      decl = builtin_decl_implicit (BUILT_IN_CTZ);
+      goto fold_tzcnt_lzcnt;
+
+    case IX86_BUILTIN_TZCNT64:
+      decl = builtin_decl_implicit (BUILT_IN_CTZLL);
+      goto fold_tzcnt_lzcnt;
+
+    case IX86_BUILTIN_LZCNT32:
+      decl = builtin_decl_implicit (BUILT_IN_CLZ);
+      goto fold_tzcnt_lzcnt;
+
+    case IX86_BUILTIN_LZCNT64:
+      decl = builtin_decl_implicit (BUILT_IN_CLZLL);
+      goto fold_tzcnt_lzcnt;
+
+    fold_tzcnt_lzcnt:
+      gcc_assert (n_args == 1);
+      arg0 = gimple_call_arg (stmt, 0);
+      if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
+       {
+         int prec = TYPE_PRECISION (TREE_TYPE (arg0));
+         if (!expr_not_equal_to (arg0, wi::zero (prec)))
+           return false;
+
+         location_t loc = gimple_location (stmt);
+         gimple *g = gimple_build_call (decl, 1, arg0);
+         gimple_set_location (g, loc);
+         tree lhs = make_ssa_name (integer_type_node);
+         gimple_call_set_lhs (g, lhs);
+         gsi_insert_before (gsi, g, GSI_SAME_STMT);
+         g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
+         gimple_set_location (g, loc);
+         gsi_replace (gsi, g, true);
+         return true;
+       }
+      break;
+
+    default:
+      break;
+    }
+
+  return false;
+}
+
 /* Make builtins to detect cpu type and features supported.  NAME is
    the builtin name, CODE is the builtin code, and FTYPE is the function
    type of the builtin.  */
@@ -50531,6 +50629,9 @@ ix86_addr_space_zero_address_valid (addr
 #undef TARGET_FOLD_BUILTIN
 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
 
+#undef TARGET_GIMPLE_FOLD_BUILTIN
+#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
+
 #undef TARGET_COMPARE_VERSION_PRIORITY
 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
 
--- gcc/testsuite/gcc.target/i386/pr78057.c.jj  2016-10-21 11:57:58.249530508 
+0200
+++ gcc/testsuite/gcc.target/i386/pr78057.c     2016-10-21 11:57:58.249530508 
+0200
@@ -0,0 +1,42 @@
+/* PR target/78057 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi -mlzcnt -fdump-tree-optimized" } */
+
+extern void link_error (void);
+
+int
+foo (int x)
+{
+  if (__builtin_ia32_tzcnt_u16 (16) != 4
+      || __builtin_ia32_tzcnt_u16 (0) != 16
+      || __builtin_ia32_lzcnt_u16 (0x1ff) != 7
+      || __builtin_ia32_lzcnt_u16 (0) != 16
+      || __builtin_ia32_tzcnt_u32 (8) != 3
+      || __builtin_ia32_tzcnt_u32 (0) != 32
+      || __builtin_ia32_lzcnt_u32 (0x3fffffff) != 2
+      || __builtin_ia32_lzcnt_u32 (0) != 32
+#ifdef __x86_64__
+      || __builtin_ia32_tzcnt_u64 (4) != 2
+      || __builtin_ia32_tzcnt_u64 (0) != 64
+      || __builtin_ia32_lzcnt_u64 (0x1fffffff) != 35
+      || __builtin_ia32_lzcnt_u64 (0) != 64
+#endif
+     )
+    link_error ();
+  x += 2;
+  if (x == 0)
+    return 5;
+  return __builtin_ia32_tzcnt_u32 (x)
+         + __builtin_ia32_lzcnt_u32 (x)
+#ifdef __x86_64__
+        + __builtin_ia32_tzcnt_u64 (x)
+        + __builtin_ia32_lzcnt_u64 (x)
+#endif
+        ;
+}
+
+/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_ia32_\[lt]zcnt" "optimized" } } 
*/
+/* { dg-final { scan-tree-dump-times "__builtin_ctz " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_clz " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_ctzll " 1 "optimized" { target 
lp64 } } } */
+/* { dg-final { scan-tree-dump-times "__builtin_clzll " 1 "optimized" { target 
lp64 } } } */

        Jakub

Reply via email to