On Thu, 20 Apr 2023, Jakub Jelinek wrote: > Hi! > > The following patch allows to vectorize __builtin_ffs*/.FFS even if > we just have vector .CTZ support, or __builtin_ffs*/.FFS/__builtin_ctz*/.CTZ > if we just have vector .CLZ or .POPCOUNT support. > It uses various expansions from Hacker's Delight book as well as GCC's > expansion, in particular: > .CTZ (X) = PREC - .CLZ ((X - 1) & ~X) > .CTZ (X) = .POPCOUNT ((X - 1) & ~X) > .CTZ (X) = (PREC - 1) - .CLZ (X & -X) > .FFS (X) = PREC - .CLZ (X & -X) > .CTZ (X) = PREC - .POPCOUNT (X | -X) > .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X) > .FFS (X) = .CTZ (X) + 1 > where the first one can be only used if both CTZ and CLZ have value > defined at zero (kind 2) and both have value of PREC there. > If the original has value defined at zero and the latter doesn't > for other forms or if it doesn't have matching value for that case, > a COND_EXPR is added for that afterwards. > > The patch also modifies vect_recog_popcount_clz_ctz_ffs_pattern > such that the two can work together. > > Bootstrapped/regtested on x86_64-linux and i686-linux, plus tested > on the testcases on powerpc64le-linux and s390x-linux crosses, ok for trunk?
OK. Thanks, Richard. > 2023-04-20 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/109011 > * tree-vect-patterns.cc (vect_recog_ctz_ffs_pattern): New function. > (vect_recog_popcount_clz_ctz_ffs_pattern): Move vect_pattern_detected > call later. Don't punt for IFN_CTZ or IFN_FFS if it doesn't have > direct optab support, but has instead IFN_CLZ, IFN_POPCOUNT or > for IFN_FFS IFN_CTZ support, use vect_recog_ctz_ffs_pattern for that > case. > (vect_vect_recog_func_ptrs): Add ctz_ffs entry. > > * gcc.dg/vect/pr109011-1.c: Remove -mpower9-vector from > dg-additional-options. > (baz, qux): Remove functions and corresponding dg-final. > * gcc.dg/vect/pr109011-2.c: New test. > * gcc.dg/vect/pr109011-3.c: New test. > * gcc.dg/vect/pr109011-4.c: New test. > * gcc.dg/vect/pr109011-5.c: New test. > > --- gcc/tree-vect-patterns.cc.jj 2023-04-19 11:14:17.445843870 +0200 > +++ gcc/tree-vect-patterns.cc 2023-04-19 20:49:27.946432713 +0200 > @@ -1501,6 +1501,266 @@ vect_recog_widen_minus_pattern (vec_info > "vect_recog_widen_minus_pattern"); > } > > +/* Function vect_recog_ctz_ffs_pattern > + > + Try to find the following pattern: > + > + TYPE1 A; > + TYPE1 B; > + > + B = __builtin_ctz{,l,ll} (A); > + > + or > + > + B = __builtin_ffs{,l,ll} (A); > + > + Input: > + > + * STMT_VINFO: The stmt from which the pattern search begins. > + here it starts with B = __builtin_* (A); > + > + Output: > + > + * TYPE_OUT: The vector type of the output of this pattern. > + > + * Return value: A new stmt that will be used to replace the sequence of > + stmts that constitute the pattern, using clz or popcount builtins. */ > + > +static gimple * > +vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, > + tree *type_out) > +{ > + gimple *call_stmt = stmt_vinfo->stmt; > + gimple *pattern_stmt; > + tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type; > + tree new_var; > + internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST; > + bool defined_at_zero = true, defined_at_zero_new = false; > + int val = 0, val_new = 0; > + int prec; > + int sub = 0, add = 0; > + location_t loc; > + > + if (!is_gimple_call (call_stmt)) > + return NULL; > + > + if (gimple_call_num_args (call_stmt) != 1) > + return NULL; > + > + rhs_oprnd = gimple_call_arg (call_stmt, 0); > + rhs_type = TREE_TYPE (rhs_oprnd); > + lhs_oprnd = gimple_call_lhs (call_stmt); > + if (!lhs_oprnd) > + return NULL; > + lhs_type = TREE_TYPE (lhs_oprnd); > + if (!INTEGRAL_TYPE_P (lhs_type) > + || !INTEGRAL_TYPE_P (rhs_type) > + || !type_has_mode_precision_p (rhs_type) > + || TREE_CODE (rhs_oprnd) != SSA_NAME) > + return NULL; > + > + switch (gimple_call_combined_fn (call_stmt)) > + { > + CASE_CFN_CTZ: > + ifn = IFN_CTZ; > + if (!gimple_call_internal_p (call_stmt) > + || CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type), > + val) != 2) > + defined_at_zero = false; > + break; > + CASE_CFN_FFS: > + ifn = IFN_FFS; > + break; > + default: > + return NULL; > + } > + > + prec = TYPE_PRECISION (rhs_type); > + loc = gimple_location (call_stmt); > + > + vec_type = get_vectype_for_scalar_type (vinfo, lhs_type); > + if (!vec_type) > + return NULL; > + > + vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type); > + if (!vec_rhs_type) > + return NULL; > + > + /* Do it only if the backend doesn't have ctz<vector_mode>2 or > + ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or > + popcount<vector_mode>2. */ > + if (!vec_type > + || direct_internal_fn_supported_p (ifn, vec_rhs_type, > + OPTIMIZE_FOR_SPEED)) > + return NULL; > + > + if (ifn == IFN_FFS > + && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type, > + OPTIMIZE_FOR_SPEED)) > + { > + ifnnew = IFN_CTZ; > + defined_at_zero_new > + = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type), > + val_new) == 2; > + } > + else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type, > + OPTIMIZE_FOR_SPEED)) > + { > + ifnnew = IFN_CLZ; > + defined_at_zero_new > + = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type), > + val_new) == 2; > + } > + if ((ifnnew == IFN_LAST > + || (defined_at_zero && !defined_at_zero_new)) > + && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type, > + OPTIMIZE_FOR_SPEED)) > + { > + ifnnew = IFN_POPCOUNT; > + defined_at_zero_new = true; > + val_new = prec; > + } > + if (ifnnew == IFN_LAST) > + return NULL; > + > + vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt); > + > + if ((ifnnew == IFN_CLZ > + && defined_at_zero > + && defined_at_zero_new > + && val == prec > + && val_new == prec) > + || (ifnnew == IFN_POPCOUNT && ifn == IFN_CLZ)) > + { > + /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X) > + .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */ > + if (ifnnew == IFN_CLZ) > + sub = prec; > + val_new = prec; > + > + if (!TYPE_UNSIGNED (rhs_type)) > + { > + rhs_type = unsigned_type_for (rhs_type); > + vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type); > + new_var = vect_recog_temp_ssa_var (rhs_type, NULL); > + pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd); > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, > + vec_rhs_type); > + rhs_oprnd = new_var; > + } > + > + tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL); > + pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd, > + build_int_cst (rhs_type, -1)); > + gimple_set_location (pattern_stmt, loc); > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); > + > + new_var = vect_recog_temp_ssa_var (rhs_type, NULL); > + pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd); > + gimple_set_location (pattern_stmt, loc); > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); > + rhs_oprnd = new_var; > + > + new_var = vect_recog_temp_ssa_var (rhs_type, NULL); > + pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR, > + m1, rhs_oprnd); > + gimple_set_location (pattern_stmt, loc); > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); > + rhs_oprnd = new_var; > + } > + else if (ifnnew == IFN_CLZ) > + { > + /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X) > + .FFS (X) = PREC - .CLZ (X & -X). */ > + sub = prec - (ifn == IFN_CTZ); > + val_new = sub - val_new; > + > + tree neg = vect_recog_temp_ssa_var (rhs_type, NULL); > + pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd); > + gimple_set_location (pattern_stmt, loc); > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); > + > + new_var = vect_recog_temp_ssa_var (rhs_type, NULL); > + pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR, > + rhs_oprnd, neg); > + gimple_set_location (pattern_stmt, loc); > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); > + rhs_oprnd = new_var; > + } > + else if (ifnnew == IFN_POPCOUNT) > + { > + /* .CTZ (X) = PREC - .POPCOUNT (X | -X) > + .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */ > + sub = prec + (ifn == IFN_FFS); > + val_new = sub; > + > + tree neg = vect_recog_temp_ssa_var (rhs_type, NULL); > + pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd); > + gimple_set_location (pattern_stmt, loc); > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); > + > + new_var = vect_recog_temp_ssa_var (rhs_type, NULL); > + pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR, > + rhs_oprnd, neg); > + gimple_set_location (pattern_stmt, loc); > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); > + rhs_oprnd = new_var; > + } > + else if (ifnnew == IFN_CTZ) > + { > + /* .FFS (X) = .CTZ (X) + 1. */ > + add = 1; > + val_new++; > + } > + > + /* Create B = .IFNNEW (A). */ > + new_var = vect_recog_temp_ssa_var (lhs_type, NULL); > + pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd); > + gimple_call_set_lhs (pattern_stmt, new_var); > + gimple_set_location (pattern_stmt, loc); > + *type_out = vec_type; > + > + if (sub) > + { > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type); > + tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL); > + pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR, > + build_int_cst (lhs_type, sub), > + new_var); > + gimple_set_location (pattern_stmt, loc); > + new_var = ret_var; > + } > + else if (add) > + { > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type); > + tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL); > + pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var, > + build_int_cst (lhs_type, add)); > + gimple_set_location (pattern_stmt, loc); > + new_var = ret_var; > + } > + > + if (defined_at_zero > + && (!defined_at_zero_new || val != val_new)) > + { > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type); > + tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL); > + rhs_oprnd = gimple_call_arg (call_stmt, 0); > + rhs_type = TREE_TYPE (rhs_oprnd); > + tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node, > + rhs_oprnd, build_zero_cst (rhs_type)); > + pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp, > + new_var, > + build_int_cst (lhs_type, val)); > + } > + > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "created pattern stmt: %G", pattern_stmt); > + > + return pattern_stmt; > +} > + > /* Function vect_recog_popcount_clz_ctz_ffs_pattern > > Try to find the following pattern: > @@ -1680,15 +1940,42 @@ vect_recog_popcount_clz_ctz_ffs_pattern > gcc_unreachable (); > } > > - vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern", > - call_stmt); > vec_type = get_vectype_for_scalar_type (vinfo, lhs_type); > /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */ > - if (!vec_type > - || !direct_internal_fn_supported_p (ifn, vec_type, > - OPTIMIZE_FOR_SPEED)) > + if (!vec_type) > return NULL; > > + bool supported > + = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED); > + if (!supported) > + switch (ifn) > + { > + case IFN_POPCOUNT: > + case IFN_CLZ: > + return NULL; > + case IFN_FFS: > + /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */ > + if (direct_internal_fn_supported_p (IFN_CTZ, vec_type, > + OPTIMIZE_FOR_SPEED)) > + break; > + /* FALLTHRU */ > + case IFN_CTZ: > + /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using > + clz or popcount. */ > + if (direct_internal_fn_supported_p (IFN_CLZ, vec_type, > + OPTIMIZE_FOR_SPEED)) > + break; > + if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type, > + OPTIMIZE_FOR_SPEED)) > + break; > + return NULL; > + default: > + gcc_unreachable (); > + } > + > + vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern", > + call_stmt); > + > /* Create B = .POPCOUNT (A). */ > new_var = vect_recog_temp_ssa_var (lhs_type, NULL); > pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op); > @@ -1702,11 +1989,26 @@ vect_recog_popcount_clz_ctz_ffs_pattern > > if (addend) > { > + gcc_assert (supported); > append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type); > tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL); > pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var, > build_int_cst (lhs_type, addend)); > } > + else if (!supported) > + { > + stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt); > + STMT_VINFO_VECTYPE (new_stmt_info) = vec_type; > + pattern_stmt > + = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out); > + if (pattern_stmt == NULL) > + return NULL; > + if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info)) > + { > + gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo); > + gimple_seq_add_seq_without_update (pseq, seq); > + } > + } > return pattern_stmt; > } > > @@ -6150,6 +6452,7 @@ static vect_recog_func vect_vect_recog_f > { vect_recog_widen_sum_pattern, "widen_sum" }, > { vect_recog_pow_pattern, "pow" }, > { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" }, > + { vect_recog_ctz_ffs_pattern, "ctz_ffs" }, > { vect_recog_widen_shift_pattern, "widen_shift" }, > { vect_recog_rotate_pattern, "rotate" }, > { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" }, > --- gcc/testsuite/gcc.dg/vect/pr109011-1.c.jj 2023-04-19 11:14:17.458843682 > +0200 > +++ gcc/testsuite/gcc.dg/vect/pr109011-1.c 2023-04-19 20:59:52.080597720 > +0200 > @@ -4,7 +4,6 @@ > /* { dg-additional-options "-mavx512cd" { target { { i?86-*-* x86_64-*-* } > && avx512cd } } } */ > /* { dg-additional-options "-mavx512vpopcntdq" { target { { i?86-*-* > x86_64-*-* } && avx512vpopcntdq } } } */ > /* { dg-additional-options "-mpower8-vector" { target powerpc_p8vector_ok } > } */ > -/* { dg-additional-options "-mpower9-vector" { target powerpc_p9vector_ok } > } */ > /* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */ > > void > @@ -28,21 +27,3 @@ bar (long long *p, long long *q) > > /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 1 "optimized" { target > { { i?86-*-* x86_64-*-* } && avx512cd } } } } */ > /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 1 "optimized" { target > { powerpc_p8vector_ok || s390_vx } } } } */ > - > -void > -baz (long long *p, long long *q) > -{ > -#pragma omp simd > - for (int i = 0; i < 2048; ++i) > - p[i] = __builtin_ctzll (q[i]); > -} > - > -/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 1 "optimized" { target > { powerpc_p9vector_ok || s390_vx } } } } */ > - > -void > -qux (long long *p, long long *q) > -{ > -#pragma omp simd > - for (int i = 0; i < 2048; ++i) > - p[i] = __builtin_ffsll (q[i]); > -} > --- gcc/testsuite/gcc.dg/vect/pr109011-2.c.jj 2023-04-19 13:03:20.621977340 > +0200 > +++ gcc/testsuite/gcc.dg/vect/pr109011-2.c 2023-04-19 20:53:30.205003402 > +0200 > @@ -0,0 +1,35 @@ > +/* PR tree-optimization/109011 */ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 > -fdump-tree-optimized" } */ > +/* { dg-additional-options "-mavx512cd -mbmi -mlzcnt -mno-avx512vpopcntdq" { > target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } */ > +/* { dg-additional-options "-mpower9-vector" { target powerpc_p9vector_ok } > } */ > +/* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */ > + > +void > +foo (int *p, int *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = __builtin_ctz (q[i]); > +} > + > +void > +bar (int *p, int *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = q[i] ? __builtin_ctz (q[i]) : __SIZEOF_INT__ * __CHAR_BIT__; > +} > + > +void > +baz (int *p, int *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = __builtin_ffs (q[i]); > +} > + > +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target > { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } */ > +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 4 "optimized" { target > powerpc_p9vector_ok } } } */ > +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 2 "optimized" { target > s390_vx } } } */ > +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 1 "optimized" { > target s390_vx } } } */ > --- gcc/testsuite/gcc.dg/vect/pr109011-3.c.jj 2023-04-19 13:13:23.524284082 > +0200 > +++ gcc/testsuite/gcc.dg/vect/pr109011-3.c 2023-04-19 20:58:19.517908001 > +0200 > @@ -0,0 +1,32 @@ > +/* PR tree-optimization/109011 */ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 > -fdump-tree-optimized" } */ > +/* { dg-additional-options "-mno-avx512cd -mbmi -mlzcnt -mavx512vpopcntdq" { > target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } > } } */ > +/* { dg-additional-options "-mpower8-vector -mno-power9-vector" { target > powerpc_p8vector_ok } } */ > + > +void > +foo (int *p, int *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = __builtin_ctz (q[i]); > +} > + > +void > +bar (int *p, int *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = q[i] ? __builtin_ctz (q[i]) : __SIZEOF_INT__ * __CHAR_BIT__; > +} > + > +void > +baz (int *p, int *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = __builtin_ffs (q[i]); > +} > + > +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 3 "optimized" { > target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } > } } } */ > +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target > powerpc_p8vector_ok } } } */ > --- gcc/testsuite/gcc.dg/vect/pr109011-4.c.jj 2023-04-19 18:42:02.530527826 > +0200 > +++ gcc/testsuite/gcc.dg/vect/pr109011-4.c 2023-04-19 20:57:17.813781462 > +0200 > @@ -0,0 +1,35 @@ > +/* PR tree-optimization/109011 */ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 > -fdump-tree-optimized" } */ > +/* { dg-additional-options "-mavx512cd -mbmi -mlzcnt -mno-avx512vpopcntdq" { > target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } */ > +/* { dg-additional-options "-mpower9-vector" { target powerpc_p9vector_ok } > } */ > +/* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */ > + > +void > +foo (long long *p, long long *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = __builtin_ctzll (q[i]); > +} > + > +void > +bar (long long *p, long long *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = q[i] ? __builtin_ctzll (q[i]) : __SIZEOF_LONG_LONG__ * > __CHAR_BIT__; > +} > + > +void > +baz (long long *p, long long *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = __builtin_ffsll (q[i]); > +} > + > +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target > { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } */ > +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 4 "optimized" { target > powerpc_p9vector_ok } } } */ > +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 2 "optimized" { target > s390_vx } } } */ > +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 1 "optimized" { > target s390_vx } } } */ > --- gcc/testsuite/gcc.dg/vect/pr109011-5.c.jj 2023-04-19 18:42:52.249824866 > +0200 > +++ gcc/testsuite/gcc.dg/vect/pr109011-5.c 2023-04-19 20:58:33.845705184 > +0200 > @@ -0,0 +1,32 @@ > +/* PR tree-optimization/109011 */ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 > -fdump-tree-optimized" } */ > +/* { dg-additional-options "-mno-avx512cd -mbmi -mlzcnt -mavx512vpopcntdq" { > target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } > } } */ > +/* { dg-additional-options "-mpower8-vector -mno-power9-vector" { target > powerpc_p8vector_ok } } */ > + > +void > +foo (long long *p, long long *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = __builtin_ctzll (q[i]); > +} > + > +void > +bar (long long *p, long long *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = q[i] ? __builtin_ctzll (q[i]) : __SIZEOF_LONG_LONG__ * > __CHAR_BIT__; > +} > + > +void > +baz (long long *p, long long *q) > +{ > +#pragma omp simd > + for (int i = 0; i < 2048; ++i) > + p[i] = __builtin_ffsll (q[i]); > +} > + > +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 3 "optimized" { > target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } > } } } */ > +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target > powerpc_p8vector_ok } } } */ > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman; HRB 36809 (AG Nuernberg)