On Sat, 18 Nov 2023, Jakub Jelinek wrote:

> Hi!
> 
> On Fri, Nov 17, 2023 at 03:01:04PM +0100, Jakub Jelinek wrote:
> > As a follow-up, I'm considering changing in this routine the popcount
> > call to IFN_POPCOUNT with 2 arguments and during expansion test costs.
> 
> Here is the follow-up which does the rtx costs testing.
> While having to tweak internal-fn.def so that POPCOUNT can have a custom
> expand_POPCOUNT, I have noticed we are inconsistent, some DEF_INTERNAL*
> macros (most of them) were undefined at the end of internal-fn.def (but in
> some cases uselessly undefined again after inclusion), while others were not
> (and sometimes undefined after the inclusion).  I've changed it to always
> undefine at the end of internal-fn.def.

The last bit is cleanup that could go in independently.

> Ok for trunk if it passes bootstrap/regtest?
> 
> 2023-11-18  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/90693
>       * tree-ssa-math-opts.cc (match_single_bit_test): Mark POPCOUNT with
>       result only used in equality comparison against 1 with direct optab
>       support as .POPCOUNT call with 2 arguments.
>       * internal-fn.h (expand_POPCOUNT): Declare.
>       * internal-fn.def: Document missing DEF_INTERNAL* macros and make sure
>       they are all undefined at the end.
>       (DEF_INTERNAL_INT_EXT_FN): New macro.
>       (POPCOUNT): Use it instead of DEF_INTERNAL_INT_FN.
>       * internal-fn.cc (lookup_hilo_internal_fn, lookup_evenodd_internal_fn,
>       widening_fn_p, get_len_internal_fn): Don't undef DEF_INTERNAL_*FN
>       macros after inclusion of internal-fn.def.
>       (DEF_INTERNAL_INT_EXT_FN): Define to nothing before inclusion to
>       define expanders.
>       (expand_POPCOUNT): New function.
> 
> --- gcc/tree-ssa-math-opts.cc.jj      2023-11-18 09:38:03.460813910 +0100
> +++ gcc/tree-ssa-math-opts.cc 2023-11-18 10:25:18.751207936 +0100
> @@ -5195,7 +5195,16 @@ match_single_bit_test (gimple_stmt_itera
>    if (!INTEGRAL_TYPE_P (type))
>      return;
>    if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH))
> -    return;
> +    {
> +      /* Tell expand_POPCOUNT the popcount result is only used in equality
> +      comparison with one, so that it can decide based on rtx costs.  */
> +      gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg,
> +                                           integer_one_node);
> +      gimple_call_set_lhs (g, gimple_call_lhs (call));
> +      gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
> +      gsi_replace (&gsi2, g, true);
> +      return;
> +    }
>    tree argm1 = make_ssa_name (type);
>    gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg,
>                                  build_int_cst (type, -1));
> --- gcc/internal-fn.h.jj      2023-11-02 12:15:12.223998565 +0100
> +++ gcc/internal-fn.h 2023-11-18 10:14:25.834340505 +0100
> @@ -262,6 +262,7 @@ extern void expand_MULBITINT (internal_f
>  extern void expand_DIVMODBITINT (internal_fn, gcall *);
>  extern void expand_FLOATTOBITINT (internal_fn, gcall *);
>  extern void expand_BITINTTOFLOAT (internal_fn, gcall *);
> +extern void expand_POPCOUNT (internal_fn, gcall *);
>  
>  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
>  
> --- gcc/internal-fn.def.jj    2023-11-17 15:51:02.642802521 +0100
> +++ gcc/internal-fn.def       2023-11-18 10:12:10.329236626 +0100
> @@ -33,9 +33,13 @@ along with GCC; see the file COPYING3.
>       DEF_INTERNAL_SIGNED_OPTAB_FN (NAME, FLAGS, SELECTOR, SIGNED_OPTAB,
>                                  UNSIGNED_OPTAB, TYPE)
>       DEF_INTERNAL_FLT_FN (NAME, FLAGS, OPTAB, TYPE)
> +     DEF_INTERNAL_FLT_FLOATN_FN (NAME, FLAGS, OPTAB, TYPE)
>       DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE)
> +     DEF_INTERNAL_INT_EXT_FN (NAME, FLAGS, OPTAB, TYPE)
>       DEF_INTERNAL_COND_FN (NAME, FLAGS, OPTAB, TYPE)
>       DEF_INTERNAL_SIGNED_COND_FN (NAME, FLAGS, OPTAB, TYPE)
> +     DEF_INTERNAL_WIDENING_OPTAB_FN (NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB,
> +                                  TYPE)
>  
>     where NAME is the name of the function, FLAGS is a set of
>     ECF_* flags and FNSPEC is a string describing functions fnspec.
> @@ -97,6 +101,10 @@ along with GCC; see the file COPYING3.
>     says that the function extends the C-level BUILT_IN_<NAME>{,L,LL,IMAX}
>     group of functions to any integral mode (including vector modes).
>  
> +   DEF_INTERNAL_INT_EXT_FN is like DEF_INTERNAL_INT_FN, except that it
> +   has expand_##NAME defined in internal-fn.cc to override the
> +   DEF_INTERNAL_INT_FN expansion behavior.
> +
>     DEF_INTERNAL_WIDENING_OPTAB_FN is a wrapper that defines five internal
>     functions with DEF_INTERNAL_SIGNED_OPTAB_FN:
>     - one that describes a widening operation with the same number of elements
> @@ -160,6 +168,11 @@ along with GCC; see the file COPYING3.
>    DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE)
>  #endif
>  
> +#ifndef DEF_INTERNAL_INT_EXT_FN
> +#define DEF_INTERNAL_INT_EXT_FN(NAME, FLAGS, OPTAB, TYPE) \
> +  DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE)
> +#endif
> +
>  #ifndef DEF_INTERNAL_WIDENING_OPTAB_FN
>  #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, FLAGS, SELECTOR, SOPTAB, 
> UOPTAB, TYPE)              \
>    DEF_INTERNAL_SIGNED_OPTAB_FN (NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB, TYPE) 
>                     \
> @@ -432,7 +445,7 @@ DEF_INTERNAL_INT_FN (CLZ, ECF_CONST | EC
>  DEF_INTERNAL_INT_FN (CTZ, ECF_CONST | ECF_NOTHROW, ctz, unary)
>  DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary)
>  DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
> -DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
> +DEF_INTERNAL_INT_EXT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
>  
>  DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
>  DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
> @@ -572,6 +585,10 @@ DEF_INTERNAL_FN (DIVMODBITINT, ECF_LEAF,
>  DEF_INTERNAL_FN (FLOATTOBITINT, ECF_LEAF | ECF_NOTHROW, ". O . . ")
>  DEF_INTERNAL_FN (BITINTTOFLOAT, ECF_PURE | ECF_LEAF, ". R . ")
>  
> +#undef DEF_INTERNAL_WIDENING_OPTAB_FN
> +#undef DEF_INTERNAL_SIGNED_COND_FN
> +#undef DEF_INTERNAL_COND_FN
> +#undef DEF_INTERNAL_INT_EXT_FN
>  #undef DEF_INTERNAL_INT_FN
>  #undef DEF_INTERNAL_FLT_FN
>  #undef DEF_INTERNAL_FLT_FLOATN_FN
> --- gcc/internal-fn.cc.jj     2023-11-07 08:32:01.741254155 +0100
> +++ gcc/internal-fn.cc        2023-11-18 11:06:31.740703230 +0100
> @@ -102,8 +102,6 @@ lookup_hilo_internal_fn (internal_fn ifn
>      {
>      default:
>        gcc_unreachable ();
> -#undef DEF_INTERNAL_FN
> -#undef DEF_INTERNAL_WIDENING_OPTAB_FN
>  #define DEF_INTERNAL_FN(NAME, FLAGS, TYPE)
>  #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, F, S, SO, UO, T)        \
>      case IFN_##NAME:                                         \
> @@ -111,8 +109,6 @@ lookup_hilo_internal_fn (internal_fn ifn
>        *hi = internal_fn (IFN_##NAME##_HI);                   \
>        break;
>  #include "internal-fn.def"
> -#undef DEF_INTERNAL_FN
> -#undef DEF_INTERNAL_WIDENING_OPTAB_FN
>      }
>  }
>  
> @@ -129,8 +125,6 @@ lookup_evenodd_internal_fn (internal_fn
>      {
>      default:
>        gcc_unreachable ();
> -#undef DEF_INTERNAL_FN
> -#undef DEF_INTERNAL_WIDENING_OPTAB_FN
>  #define DEF_INTERNAL_FN(NAME, FLAGS, TYPE)
>  #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, F, S, SO, UO, T)        \
>      case IFN_##NAME:                                         \
> @@ -138,8 +132,6 @@ lookup_evenodd_internal_fn (internal_fn
>        *odd = internal_fn (IFN_##NAME##_ODD);                 \
>        break;
>  #include "internal-fn.def"
> -#undef DEF_INTERNAL_FN
> -#undef DEF_INTERNAL_WIDENING_OPTAB_FN
>      }
>  }
>  
> @@ -4261,7 +4253,6 @@ widening_fn_p (code_helper code)
>    internal_fn fn = as_internal_fn ((combined_fn) code);
>    switch (fn)
>      {
> -    #undef DEF_INTERNAL_WIDENING_OPTAB_FN
>      #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, F, S, SO, UO, T) \
>      case IFN_##NAME:                                           \
>      case IFN_##NAME##_HI:                                      \
> @@ -4270,7 +4261,6 @@ widening_fn_p (code_helper code)
>      case IFN_##NAME##_ODD:                                     \
>        return true;
>      #include "internal-fn.def"
> -    #undef DEF_INTERNAL_WIDENING_OPTAB_FN
>  
>      default:
>        return false;
> @@ -4295,6 +4285,7 @@ set_edom_supported_p (void)
>    {                                                  \
>      expand_##TYPE##_optab_fn (fn, stmt, OPTAB##_optab);      \
>    }
> +#define DEF_INTERNAL_INT_EXT_FN(CODE, FLAGS, OPTAB, TYPE)
>  #define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
>                                    UNSIGNED_OPTAB, TYPE)              \
>    static void                                                                
> \
> @@ -4305,8 +4296,6 @@ set_edom_supported_p (void)
>      expand_##TYPE##_optab_fn (fn, stmt, which_optab);                        
> \
>    }
>  #include "internal-fn.def"
> -#undef DEF_INTERNAL_OPTAB_FN
> -#undef DEF_INTERNAL_SIGNED_OPTAB_FN
>  
>  /* Routines to expand each internal function, indexed by function number.
>     Each routine has the prototype:
> @@ -4465,8 +4454,6 @@ get_len_internal_fn (internal_fn fn)
>  {
>    switch (fn)
>      {
> -#undef DEF_INTERNAL_COND_FN
> -#undef DEF_INTERNAL_SIGNED_COND_FN
>  #define DEF_INTERNAL_COND_FN(NAME, ...)                                      
>   \
>    case IFN_COND_##NAME:                                                      
>   \
>      return IFN_COND_LEN_##NAME;
> @@ -4474,8 +4461,6 @@ get_len_internal_fn (internal_fn fn)
>    case IFN_COND_##NAME:                                                      
>   \
>      return IFN_COND_LEN_##NAME;
>  #include "internal-fn.def"
> -#undef DEF_INTERNAL_COND_FN
> -#undef DEF_INTERNAL_SIGNED_COND_FN
>      default:
>        return IFN_LAST;
>      }
> @@ -5113,3 +5098,67 @@ expand_BITINTTOFLOAT (internal_fn, gcall
>    if (val != target)
>      emit_move_insn (target, val);
>  }
> +
> +void
> +expand_POPCOUNT (internal_fn fn, gcall *stmt)
> +{
> +  if (gimple_call_num_args (stmt) == 1)
> +    {
> +      expand_unary_optab_fn (fn, stmt, popcount_optab);
> +      return;
> +    }
> +  /* If .POPCOUNT call has 2 arguments, match_single_bit_test marked it
> +     because the result is only used in an equality comparison against 1.
> +     Use rtx costs in that case to determine if .POPCOUNT (arg) == 1
> +     or (arg ^ (arg - 1)) > arg - 1 is cheaper.  */

We're doing plenty of RTX costing during GIMPLE optimizations, I wonder
if it could be done equally well there given that seq_cost of an RTX
sequence isn't going to capture the compare & branch cost very reliably
anyway ...

> +  bool speed_p = optimize_insn_for_speed_p ();
> +  tree lhs = gimple_call_lhs (stmt);
> +  tree arg = gimple_call_arg (stmt, 0);
> +  tree type = TREE_TYPE (arg);
> +  machine_mode mode = TYPE_MODE (type);
> +  do_pending_stack_adjust ();
> +  start_sequence ();
> +  expand_unary_optab_fn (fn, stmt, popcount_optab);
> +  rtx_insn *popcount_insns = get_insns ();
> +  end_sequence ();
> +  start_sequence ();
> +  rtx plhs = expand_normal (lhs);
> +  rtx pcmp = emit_store_flag (NULL_RTX, EQ, plhs, const1_rtx, mode, 0, 0);
> +  if (pcmp == NULL_RTX)
> +    {
> +    fail:
> +      end_sequence ();
> +      emit_insn (popcount_insns);
> +      return;
> +    }
> +  rtx_insn *popcount_cmp_insns = get_insns ();
> +  end_sequence ();
> +  start_sequence ();
> +  rtx op0 = expand_normal (arg);
> +  rtx argm1 = expand_simple_binop (mode, PLUS, op0, constm1_rtx, NULL_RTX,
> +                                1, OPTAB_DIRECT);
> +  if (argm1 == NULL_RTX)
> +    goto fail;
> +  rtx argxorargm1 = expand_simple_binop (mode, XOR, op0, argm1, NULL_RTX,
> +                                      1, OPTAB_DIRECT);
> +  if (argxorargm1 == NULL_RTX)
> +    goto fail;
> +  rtx cmp = emit_store_flag (NULL_RTX, GTU, argxorargm1, argm1, mode, 1, 1);
> +  if (cmp == NULL_RTX)
> +    goto fail;
> +  rtx_insn *cmp_insns = get_insns ();
> +  end_sequence ();
> +  unsigned popcount_cost = (seq_cost (popcount_insns, speed_p)
> +                         + seq_cost (popcount_cmp_insns, speed_p));
> +  unsigned cmp_cost = seq_cost (cmp_insns, speed_p);
> +  if (popcount_cost < cmp_cost)
> +    emit_insn (popcount_insns);
> +  else
> +    {
> +      emit_insn (cmp_insns);
> +      plhs = expand_normal (lhs);
> +      if (GET_MODE (cmp) != GET_MODE (plhs))
> +     cmp = convert_to_mode (GET_MODE (plhs), cmp, 1);
> +      emit_move_insn (plhs, cmp);
> +    }
> +}
> 
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to