On Thu, 20 Apr 2023, Jakub Jelinek wrote:

> Hi!
> 
> The following patch allows to vectorize __builtin_ffs*/.FFS even if
> we just have vector .CTZ support, or __builtin_ffs*/.FFS/__builtin_ctz*/.CTZ
> if we just have vector .CLZ or .POPCOUNT support.
> It uses various expansions from Hacker's Delight book as well as GCC's
> expansion, in particular:
> .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
> .CTZ (X) = .POPCOUNT ((X - 1) & ~X)
> .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
> .FFS (X) = PREC - .CLZ (X & -X)
> .CTZ (X) = PREC - .POPCOUNT (X | -X)
> .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X)
> .FFS (X) = .CTZ (X) + 1
> where the first one can be only used if both CTZ and CLZ have value
> defined at zero (kind 2) and both have value of PREC there.
> If the original has value defined at zero and the latter doesn't
> for other forms or if it doesn't have matching value for that case,
> a COND_EXPR is added for that afterwards.
> 
> The patch also modifies vect_recog_popcount_clz_ctz_ffs_pattern
> such that the two can work together.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, plus tested
> on the testcases on powerpc64le-linux and s390x-linux crosses, ok for trunk?

OK.

Thanks,
Richard.

> 2023-04-20  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/109011
>       * tree-vect-patterns.cc (vect_recog_ctz_ffs_pattern): New function.
>       (vect_recog_popcount_clz_ctz_ffs_pattern): Move vect_pattern_detected
>       call later.  Don't punt for IFN_CTZ or IFN_FFS if it doesn't have
>       direct optab support, but has instead IFN_CLZ, IFN_POPCOUNT or
>       for IFN_FFS IFN_CTZ support, use vect_recog_ctz_ffs_pattern for that
>       case.
>       (vect_vect_recog_func_ptrs): Add ctz_ffs entry.
> 
>       * gcc.dg/vect/pr109011-1.c: Remove -mpower9-vector from
>       dg-additional-options.
>       (baz, qux): Remove functions and corresponding dg-final.
>       * gcc.dg/vect/pr109011-2.c: New test.
>       * gcc.dg/vect/pr109011-3.c: New test.
>       * gcc.dg/vect/pr109011-4.c: New test.
>       * gcc.dg/vect/pr109011-5.c: New test.
> 
> --- gcc/tree-vect-patterns.cc.jj      2023-04-19 11:14:17.445843870 +0200
> +++ gcc/tree-vect-patterns.cc 2023-04-19 20:49:27.946432713 +0200
> @@ -1501,6 +1501,266 @@ vect_recog_widen_minus_pattern (vec_info
>                                     "vect_recog_widen_minus_pattern");
>  }
>  
> +/* Function vect_recog_ctz_ffs_pattern
> +
> +   Try to find the following pattern:
> +
> +   TYPE1 A;
> +   TYPE1 B;
> +
> +   B = __builtin_ctz{,l,ll} (A);
> +
> +   or
> +
> +   B = __builtin_ffs{,l,ll} (A);
> +
> +   Input:
> +
> +   * STMT_VINFO: The stmt from which the pattern search begins.
> +   here it starts with B = __builtin_* (A);
> +
> +   Output:
> +
> +   * TYPE_OUT: The vector type of the output of this pattern.
> +
> +   * Return value: A new stmt that will be used to replace the sequence of
> +   stmts that constitute the pattern, using clz or popcount builtins.  */
> +
> +static gimple *
> +vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
> +                         tree *type_out)
> +{
> +  gimple *call_stmt = stmt_vinfo->stmt;
> +  gimple *pattern_stmt;
> +  tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
> +  tree new_var;
> +  internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
> +  bool defined_at_zero = true, defined_at_zero_new = false;
> +  int val = 0, val_new = 0;
> +  int prec;
> +  int sub = 0, add = 0;
> +  location_t loc;
> +
> +  if (!is_gimple_call (call_stmt))
> +    return NULL;
> +
> +  if (gimple_call_num_args (call_stmt) != 1)
> +    return NULL;
> +
> +  rhs_oprnd = gimple_call_arg (call_stmt, 0);
> +  rhs_type = TREE_TYPE (rhs_oprnd);
> +  lhs_oprnd = gimple_call_lhs (call_stmt);
> +  if (!lhs_oprnd)
> +    return NULL;
> +  lhs_type = TREE_TYPE (lhs_oprnd);
> +  if (!INTEGRAL_TYPE_P (lhs_type)
> +      || !INTEGRAL_TYPE_P (rhs_type)
> +      || !type_has_mode_precision_p (rhs_type)
> +      || TREE_CODE (rhs_oprnd) != SSA_NAME)
> +    return NULL;
> +
> +  switch (gimple_call_combined_fn (call_stmt))
> +    {
> +    CASE_CFN_CTZ:
> +      ifn = IFN_CTZ;
> +      if (!gimple_call_internal_p (call_stmt)
> +       || CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
> +                                     val) != 2)
> +     defined_at_zero = false;
> +      break;
> +    CASE_CFN_FFS:
> +      ifn = IFN_FFS;
> +      break;
> +    default:
> +      return NULL;
> +    }
> +
> +  prec = TYPE_PRECISION (rhs_type);
> +  loc = gimple_location (call_stmt);
> +
> +  vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
> +  if (!vec_type)
> +    return NULL;
> +
> +  vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
> +  if (!vec_rhs_type)
> +    return NULL;
> +
> +  /* Do it only if the backend doesn't have ctz<vector_mode>2 or
> +     ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
> +     popcount<vector_mode>2.  */
> +  if (!vec_type
> +      || direct_internal_fn_supported_p (ifn, vec_rhs_type,
> +                                      OPTIMIZE_FOR_SPEED))
> +    return NULL;
> +
> +  if (ifn == IFN_FFS
> +      && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
> +                                      OPTIMIZE_FOR_SPEED))
> +    {
> +      ifnnew = IFN_CTZ;
> +      defined_at_zero_new
> +     = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
> +                                  val_new) == 2;
> +    }
> +  else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
> +                                        OPTIMIZE_FOR_SPEED))
> +    {
> +      ifnnew = IFN_CLZ;
> +      defined_at_zero_new
> +     = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
> +                                  val_new) == 2;
> +    }
> +  if ((ifnnew == IFN_LAST
> +       || (defined_at_zero && !defined_at_zero_new))
> +      && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
> +                                      OPTIMIZE_FOR_SPEED))
> +    {
> +      ifnnew = IFN_POPCOUNT;
> +      defined_at_zero_new = true;
> +      val_new = prec;
> +    }
> +  if (ifnnew == IFN_LAST)
> +    return NULL;
> +
> +  vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
> +
> +  if ((ifnnew == IFN_CLZ
> +       && defined_at_zero
> +       && defined_at_zero_new
> +       && val == prec
> +       && val_new == prec)
> +      || (ifnnew == IFN_POPCOUNT && ifn == IFN_CLZ))
> +    {
> +      /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
> +      .CTZ (X) = .POPCOUNT ((X - 1) & ~X).  */
> +      if (ifnnew == IFN_CLZ)
> +     sub = prec;
> +      val_new = prec;
> +
> +      if (!TYPE_UNSIGNED (rhs_type))
> +     {
> +       rhs_type = unsigned_type_for (rhs_type);
> +       vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
> +       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
> +       pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
> +       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
> +                               vec_rhs_type);
> +       rhs_oprnd = new_var;
> +     }
> +
> +      tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
> +      pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
> +                                       build_int_cst (rhs_type, -1));
> +      gimple_set_location (pattern_stmt, loc);
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
> +
> +      new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
> +      pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
> +      gimple_set_location (pattern_stmt, loc);
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
> +      rhs_oprnd = new_var;
> +
> +      new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
> +      pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
> +                                       m1, rhs_oprnd);
> +      gimple_set_location (pattern_stmt, loc);
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
> +      rhs_oprnd = new_var;
> +    }
> +  else if (ifnnew == IFN_CLZ)
> +    {
> +      /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
> +      .FFS (X) = PREC - .CLZ (X & -X).  */
> +      sub = prec - (ifn == IFN_CTZ);
> +      val_new = sub - val_new;
> +
> +      tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
> +      pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
> +      gimple_set_location (pattern_stmt, loc);
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
> +
> +      new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
> +      pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
> +                                       rhs_oprnd, neg);
> +      gimple_set_location (pattern_stmt, loc);
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
> +      rhs_oprnd = new_var;
> +    }
> +  else if (ifnnew == IFN_POPCOUNT)
> +    {
> +      /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
> +      .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X).  */
> +      sub = prec + (ifn == IFN_FFS);
> +      val_new = sub;
> +
> +      tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
> +      pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
> +      gimple_set_location (pattern_stmt, loc);
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
> +
> +      new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
> +      pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
> +                                       rhs_oprnd, neg);
> +      gimple_set_location (pattern_stmt, loc);
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
> +      rhs_oprnd = new_var;
> +    }
> +  else if (ifnnew == IFN_CTZ)
> +    {
> +      /* .FFS (X) = .CTZ (X) + 1.  */
> +      add = 1;
> +      val_new++;
> +    }
> +
> +  /* Create B = .IFNNEW (A).  */
> +  new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
> +  pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
> +  gimple_call_set_lhs (pattern_stmt, new_var);
> +  gimple_set_location (pattern_stmt, loc);
> +  *type_out = vec_type;
> +
> +  if (sub)
> +    {
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
> +      tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
> +      pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
> +                                       build_int_cst (lhs_type, sub),
> +                                       new_var);
> +      gimple_set_location (pattern_stmt, loc);
> +      new_var = ret_var;
> +    }
> +  else if (add)
> +    {
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
> +      tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
> +      pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
> +                                       build_int_cst (lhs_type, add));
> +      gimple_set_location (pattern_stmt, loc);
> +      new_var = ret_var;
> +    }
> +
> +  if (defined_at_zero
> +      && (!defined_at_zero_new || val != val_new))
> +    {
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
> +      tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
> +      rhs_oprnd = gimple_call_arg (call_stmt, 0);
> +      rhs_type = TREE_TYPE (rhs_oprnd);
> +      tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
> +                          rhs_oprnd, build_zero_cst (rhs_type));
> +      pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
> +                                       new_var,
> +                                       build_int_cst (lhs_type, val));
> +    }
> +
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +                  "created pattern stmt: %G", pattern_stmt);
> +
> +  return pattern_stmt;
> +}
> +
>  /* Function vect_recog_popcount_clz_ctz_ffs_pattern
>  
>     Try to find the following pattern:
> @@ -1680,15 +1940,42 @@ vect_recog_popcount_clz_ctz_ffs_pattern
>       gcc_unreachable ();
>        }
>  
> -  vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
> -                      call_stmt);
>    vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
>    /* Do it only if the backend has popcount<vector_mode>2 etc. pattern.  */
> -  if (!vec_type
> -      || !direct_internal_fn_supported_p (ifn, vec_type,
> -                                       OPTIMIZE_FOR_SPEED))
> +  if (!vec_type)
>      return NULL;
>  
> +  bool supported
> +    = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
> +  if (!supported)
> +    switch (ifn)
> +      {
> +      case IFN_POPCOUNT:
> +      case IFN_CLZ:
> +     return NULL;
> +      case IFN_FFS:
> +     /* vect_recog_ctz_ffs_pattern can implement ffs using ctz.  */
> +     if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
> +                                         OPTIMIZE_FOR_SPEED))
> +       break;
> +     /* FALLTHRU */
> +      case IFN_CTZ:
> +     /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
> +        clz or popcount.  */
> +     if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
> +                                         OPTIMIZE_FOR_SPEED))
> +       break;
> +     if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
> +                                         OPTIMIZE_FOR_SPEED))
> +       break;
> +     return NULL;
> +      default:
> +     gcc_unreachable ();
> +      }
> +
> +  vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
> +                      call_stmt);
> +
>    /* Create B = .POPCOUNT (A).  */
>    new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
>    pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
> @@ -1702,11 +1989,26 @@ vect_recog_popcount_clz_ctz_ffs_pattern
>  
>    if (addend)
>      {
> +      gcc_assert (supported);
>        append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
>        tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
>        pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
>                                         build_int_cst (lhs_type, addend));
>      }
> +  else if (!supported)
> +    {
> +      stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
> +      STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
> +      pattern_stmt
> +     = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
> +      if (pattern_stmt == NULL)
> +     return NULL;
> +      if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
> +     {
> +       gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
> +       gimple_seq_add_seq_without_update (pseq, seq);
> +     }
> +    }
>    return pattern_stmt;
>  }
>  
> @@ -6150,6 +6452,7 @@ static vect_recog_func vect_vect_recog_f
>    { vect_recog_widen_sum_pattern, "widen_sum" },
>    { vect_recog_pow_pattern, "pow" },
>    { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
> +  { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
>    { vect_recog_widen_shift_pattern, "widen_shift" },
>    { vect_recog_rotate_pattern, "rotate" },
>    { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
> --- gcc/testsuite/gcc.dg/vect/pr109011-1.c.jj 2023-04-19 11:14:17.458843682 
> +0200
> +++ gcc/testsuite/gcc.dg/vect/pr109011-1.c    2023-04-19 20:59:52.080597720 
> +0200
> @@ -4,7 +4,6 @@
>  /* { dg-additional-options "-mavx512cd" { target { { i?86-*-* x86_64-*-* } 
> && avx512cd } } } */
>  /* { dg-additional-options "-mavx512vpopcntdq" { target { { i?86-*-* 
> x86_64-*-* } && avx512vpopcntdq } } } */
>  /* { dg-additional-options "-mpower8-vector" { target powerpc_p8vector_ok } 
> } */
> -/* { dg-additional-options "-mpower9-vector" { target powerpc_p9vector_ok } 
> } */
>  /* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */
>  
>  void
> @@ -28,21 +27,3 @@ bar (long long *p, long long *q)
>  
>  /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 1 "optimized" { target 
> { { i?86-*-* x86_64-*-* } && avx512cd } } } } */
>  /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 1 "optimized" { target 
> { powerpc_p8vector_ok || s390_vx } } } } */
> -
> -void
> -baz (long long *p, long long *q)
> -{
> -#pragma omp simd
> -  for (int i = 0; i < 2048; ++i)
> -    p[i] = __builtin_ctzll (q[i]);
> -}
> -
> -/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 1 "optimized" { target 
> { powerpc_p9vector_ok || s390_vx } } } } */
> -
> -void
> -qux (long long *p, long long *q)
> -{
> -#pragma omp simd
> -  for (int i = 0; i < 2048; ++i)
> -    p[i] = __builtin_ffsll (q[i]);
> -}
> --- gcc/testsuite/gcc.dg/vect/pr109011-2.c.jj 2023-04-19 13:03:20.621977340 
> +0200
> +++ gcc/testsuite/gcc.dg/vect/pr109011-2.c    2023-04-19 20:53:30.205003402 
> +0200
> @@ -0,0 +1,35 @@
> +/* PR tree-optimization/109011 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 
> -fdump-tree-optimized" } */
> +/* { dg-additional-options "-mavx512cd -mbmi -mlzcnt -mno-avx512vpopcntdq" { 
> target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } */
> +/* { dg-additional-options "-mpower9-vector" { target powerpc_p9vector_ok } 
> } */
> +/* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */
> +
> +void
> +foo (int *p, int *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = __builtin_ctz (q[i]);
> +}
> +
> +void
> +bar (int *p, int *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = q[i] ? __builtin_ctz (q[i]) : __SIZEOF_INT__ * __CHAR_BIT__;
> +}
> +
> +void
> +baz (int *p, int *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = __builtin_ffs (q[i]);
> +}
> +
> +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target 
> { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } */
> +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 4 "optimized" { target 
> powerpc_p9vector_ok } } } */
> +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 2 "optimized" { target 
> s390_vx } } } */
> +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 1 "optimized" { 
> target s390_vx } } } */
> --- gcc/testsuite/gcc.dg/vect/pr109011-3.c.jj 2023-04-19 13:13:23.524284082 
> +0200
> +++ gcc/testsuite/gcc.dg/vect/pr109011-3.c    2023-04-19 20:58:19.517908001 
> +0200
> @@ -0,0 +1,32 @@
> +/* PR tree-optimization/109011 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 
> -fdump-tree-optimized" } */
> +/* { dg-additional-options "-mno-avx512cd -mbmi -mlzcnt -mavx512vpopcntdq" { 
> target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } 
> } } */
> +/* { dg-additional-options "-mpower8-vector -mno-power9-vector" { target 
> powerpc_p8vector_ok } } */
> +
> +void
> +foo (int *p, int *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = __builtin_ctz (q[i]);
> +}
> +
> +void
> +bar (int *p, int *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = q[i] ? __builtin_ctz (q[i]) : __SIZEOF_INT__ * __CHAR_BIT__;
> +}
> +
> +void
> +baz (int *p, int *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = __builtin_ffs (q[i]);
> +}
> +
> +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 3 "optimized" { 
> target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } 
> } } } */
> +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target 
> powerpc_p8vector_ok } } } */
> --- gcc/testsuite/gcc.dg/vect/pr109011-4.c.jj 2023-04-19 18:42:02.530527826 
> +0200
> +++ gcc/testsuite/gcc.dg/vect/pr109011-4.c    2023-04-19 20:57:17.813781462 
> +0200
> @@ -0,0 +1,35 @@
> +/* PR tree-optimization/109011 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 
> -fdump-tree-optimized" } */
> +/* { dg-additional-options "-mavx512cd -mbmi -mlzcnt -mno-avx512vpopcntdq" { 
> target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } */
> +/* { dg-additional-options "-mpower9-vector" { target powerpc_p9vector_ok } 
> } */
> +/* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */
> +
> +void
> +foo (long long *p, long long *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = __builtin_ctzll (q[i]);
> +}
> +
> +void
> +bar (long long *p, long long *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = q[i] ? __builtin_ctzll (q[i]) : __SIZEOF_LONG_LONG__ * 
> __CHAR_BIT__;
> +}
> +
> +void
> +baz (long long *p, long long *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = __builtin_ffsll (q[i]);
> +}
> +
> +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target 
> { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } */
> +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 4 "optimized" { target 
> powerpc_p9vector_ok } } } */
> +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 2 "optimized" { target 
> s390_vx } } } */
> +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 1 "optimized" { 
> target s390_vx } } } */
> --- gcc/testsuite/gcc.dg/vect/pr109011-5.c.jj 2023-04-19 18:42:52.249824866 
> +0200
> +++ gcc/testsuite/gcc.dg/vect/pr109011-5.c    2023-04-19 20:58:33.845705184 
> +0200
> @@ -0,0 +1,32 @@
> +/* PR tree-optimization/109011 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 
> -fdump-tree-optimized" } */
> +/* { dg-additional-options "-mno-avx512cd -mbmi -mlzcnt -mavx512vpopcntdq" { 
> target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } 
> } } */
> +/* { dg-additional-options "-mpower8-vector -mno-power9-vector" { target 
> powerpc_p8vector_ok } } */
> +
> +void
> +foo (long long *p, long long *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = __builtin_ctzll (q[i]);
> +}
> +
> +void
> +bar (long long *p, long long *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = q[i] ? __builtin_ctzll (q[i]) : __SIZEOF_LONG_LONG__ * 
> __CHAR_BIT__;
> +}
> +
> +void
> +baz (long long *p, long long *q)
> +{
> +#pragma omp simd
> +  for (int i = 0; i < 2048; ++i)
> +    p[i] = __builtin_ffsll (q[i]);
> +}
> +
> +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 3 "optimized" { 
> target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } 
> } } } */
> +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target 
> powerpc_p8vector_ok } } } */
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

Reply via email to