Hi! This patch adds folding for the new ia32 md builtins. If they can be folded into constant, it is done in ix86_fold_builtin, if they can fold to corresponding generic __builtin_c[lt]z* (which have e.g. the advantage that VRP knows about what values it can have etc.), it is done in gimple_fold_builtin target hook.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-10-21 Jakub Jelinek <ja...@redhat.com> PR target/78057 * config/i386/i386.c: Include fold-const-call.h, tree-vrp.h and tree-ssanames.h. (ix86_fold_builtin): Fold IX86_BUILTIN_[LT]ZCNT{16,32,64} with INTEGER_CST argument. (ix86_gimple_fold_builtin): New function. (TARGET_GIMPLE_FOLD_BUILTIN): Define. * gcc.target/i386/pr78057.c: New test. --- gcc/config/i386/i386.c.jj 2016-10-21 11:36:33.135677698 +0200 +++ gcc/config/i386/i386.c 2016-10-21 11:57:58.248530521 +0200 @@ -77,6 +77,9 @@ along with GCC; see the file COPYING3. #include "case-cfn-macros.h" #include "regrename.h" #include "dojump.h" +#include "fold-const-call.h" +#include "tree-vrp.h" +#include "tree-ssanames.h" /* This file should be included last. */ #include "target-def.h" @@ -33332,6 +33335,40 @@ ix86_fold_builtin (tree fndecl, int n_ar return build_real (type, inf); } + case IX86_BUILTIN_TZCNT16: + case IX86_BUILTIN_TZCNT32: + case IX86_BUILTIN_TZCNT64: + gcc_assert (n_args == 1); + if (TREE_CODE (args[0]) == INTEGER_CST) + { + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + tree arg = args[0]; + if (fn_code == IX86_BUILTIN_TZCNT16) + arg = fold_convert (short_unsigned_type_node, arg); + if (integer_zerop (arg)) + return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); + else + return fold_const_call (CFN_CTZ, type, arg); + } + break; + + case IX86_BUILTIN_LZCNT16: + case IX86_BUILTIN_LZCNT32: + case IX86_BUILTIN_LZCNT64: + gcc_assert (n_args == 1); + if (TREE_CODE (args[0]) == INTEGER_CST) + { + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + tree arg = args[0]; + if (fn_code == IX86_BUILTIN_LZCNT16) + arg = fold_convert (short_unsigned_type_node, arg); + if (integer_zerop (arg)) + return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); + else + return fold_const_call (CFN_CLZ, type, arg); + } + break; + default: break; } @@ -33344,6 +33381,67 @@ ix86_fold_builtin (tree fndecl, int n_ar return NULL_TREE; } +/* Fold a MD builtin (use ix86_fold_builtin for folding into + constant) in GIMPLE. */ + +bool +ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) +{ + gimple *stmt = gsi_stmt (*gsi); + tree fndecl = gimple_call_fndecl (stmt); + gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD); + int n_args = gimple_call_num_args (stmt); + enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl); + tree decl = NULL_TREE; + tree arg0; + + switch (fn_code) + { + case IX86_BUILTIN_TZCNT32: + decl = builtin_decl_implicit (BUILT_IN_CTZ); + goto fold_tzcnt_lzcnt; + + case IX86_BUILTIN_TZCNT64: + decl = builtin_decl_implicit (BUILT_IN_CTZLL); + goto fold_tzcnt_lzcnt; + + case IX86_BUILTIN_LZCNT32: + decl = builtin_decl_implicit (BUILT_IN_CLZ); + goto fold_tzcnt_lzcnt; + + case IX86_BUILTIN_LZCNT64: + decl = builtin_decl_implicit (BUILT_IN_CLZLL); + goto fold_tzcnt_lzcnt; + + fold_tzcnt_lzcnt: + gcc_assert (n_args == 1); + arg0 = gimple_call_arg (stmt, 0); + if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt)) + { + int prec = TYPE_PRECISION (TREE_TYPE (arg0)); + if (!expr_not_equal_to (arg0, wi::zero (prec))) + return false; + + location_t loc = gimple_location (stmt); + gimple *g = gimple_build_call (decl, 1, arg0); + gimple_set_location (g, loc); + tree lhs = make_ssa_name (integer_type_node); + gimple_call_set_lhs (g, lhs); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } + break; + + default: + break; + } + + return false; +} + /* Make builtins to detect cpu type and features supported. NAME is the builtin name, CODE is the builtin code, and FTYPE is the function type of the builtin. */ @@ -50531,6 +50629,9 @@ ix86_addr_space_zero_address_valid (addr #undef TARGET_FOLD_BUILTIN #define TARGET_FOLD_BUILTIN ix86_fold_builtin +#undef TARGET_GIMPLE_FOLD_BUILTIN +#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin + #undef TARGET_COMPARE_VERSION_PRIORITY #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority --- gcc/testsuite/gcc.target/i386/pr78057.c.jj 2016-10-21 11:57:58.249530508 +0200 +++ gcc/testsuite/gcc.target/i386/pr78057.c 2016-10-21 11:57:58.249530508 +0200 @@ -0,0 +1,42 @@ +/* PR target/78057 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbmi -mlzcnt -fdump-tree-optimized" } */ + +extern void link_error (void); + +int +foo (int x) +{ + if (__builtin_ia32_tzcnt_u16 (16) != 4 + || __builtin_ia32_tzcnt_u16 (0) != 16 + || __builtin_ia32_lzcnt_u16 (0x1ff) != 7 + || __builtin_ia32_lzcnt_u16 (0) != 16 + || __builtin_ia32_tzcnt_u32 (8) != 3 + || __builtin_ia32_tzcnt_u32 (0) != 32 + || __builtin_ia32_lzcnt_u32 (0x3fffffff) != 2 + || __builtin_ia32_lzcnt_u32 (0) != 32 +#ifdef __x86_64__ + || __builtin_ia32_tzcnt_u64 (4) != 2 + || __builtin_ia32_tzcnt_u64 (0) != 64 + || __builtin_ia32_lzcnt_u64 (0x1fffffff) != 35 + || __builtin_ia32_lzcnt_u64 (0) != 64 +#endif + ) + link_error (); + x += 2; + if (x == 0) + return 5; + return __builtin_ia32_tzcnt_u32 (x) + + __builtin_ia32_lzcnt_u32 (x) +#ifdef __x86_64__ + + __builtin_ia32_tzcnt_u64 (x) + + __builtin_ia32_lzcnt_u64 (x) +#endif + ; +} + +/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "__builtin_ia32_\[lt]zcnt" "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_ctz " 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_clz " 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_ctzll " 1 "optimized" { target lp64 } } } */ +/* { dg-final { scan-tree-dump-times "__builtin_clzll " 1 "optimized" { target lp64 } } } */ Jakub