The following allows us to emit a conditional move when the value of the table based CLZ/CLZ implementation at zero differs from what the target implementation guarantees or we cannot easily fixup otherwise. In that case emit a val == 0 ? table-based-zero-result : ...
Bootstrapped and tested on x86_64-unknown-linux-gnu. PR tree-optimization/120032 * tree-ssa-forwprop.cc (simplify_count_zeroes): When we cannot use the IFN to determine the result at zero use a conditional move to reproduce the correct result from the table-based algorithm. * gcc.target/i386/pr120032-3.c: New testcase. --- gcc/testsuite/gcc.target/i386/pr120032-3.c | 20 ++++++++++++++++ gcc/tree-ssa-forwprop.cc | 27 +++++++++++++++------- 2 files changed, 39 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr120032-3.c diff --git a/gcc/testsuite/gcc.target/i386/pr120032-3.c b/gcc/testsuite/gcc.target/i386/pr120032-3.c new file mode 100644 index 00000000000..9523bbb0f5b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120032-3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlzcnt" } */ + +unsigned int +ZSTD_countLeadingZeros32_fallback(unsigned int val) +{ + static const unsigned int DeBruijnClz[32] + = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31}; + val |= val >> 1; + val |= val >> 2; + val |= val >> 4; + val |= val >> 8; + val |= val >> 16; + return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; +} + +/* { dg-final { scan-assembler "lzcnt" } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 0c2b10e92aa..43b1c9d696f 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -2728,13 +2728,6 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi) nargs = 1; } - /* Skip if there is no value defined at zero, or if we can't easily - return the correct value for zero. */ - if (!zero_ok) - return false; - if (zero_val != ctz_val && !(zero_val == 0 && ctz_val == input_bits)) - return false; - gimple_seq seq = NULL; gimple *g; gcall *call = gimple_build_call_internal (fn, nargs, res_ops[0], @@ -2758,8 +2751,10 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi) prev_lhs = gimple_assign_lhs (g); } + if (zero_ok && zero_val == ctz_val) + ; /* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0. */ - if (zero_val == 0 && ctz_val == input_bits) + else if (zero_ok && zero_val == 0 && ctz_val == input_bits) { g = gimple_build_assign (make_ssa_name (integer_type_node), BIT_AND_EXPR, prev_lhs, @@ -2769,6 +2764,22 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi) gimple_seq_add_stmt (&seq, g); prev_lhs = gimple_assign_lhs (g); } + /* As fallback emit a conditional move. */ + else + { + g = gimple_build_assign (make_ssa_name (boolean_type_node), EQ_EXPR, + res_ops[0], build_zero_cst (input_type)); + gimple_set_location (g, gimple_location (stmt)); + gimple_seq_add_stmt (&seq, g); + tree cond = gimple_assign_lhs (g); + g = gimple_build_assign (make_ssa_name (integer_type_node), + COND_EXPR, cond, + build_int_cst (integer_type_node, zero_val), + prev_lhs); + gimple_set_location (g, gimple_location (stmt)); + gimple_seq_add_stmt (&seq, g); + prev_lhs = gimple_assign_lhs (g); + } g = gimple_build_assign (gimple_assign_lhs (stmt), NOP_EXPR, prev_lhs); gimple_seq_add_stmt (&seq, g); -- 2.43.0