The following allows us to emit a conditional move when the value
of the table based CLZ/CLZ implementation at zero differs from what
the target implementation guarantees or we cannot easily fixup
otherwise.  In that case emit a val == 0 ? table-based-zero-result : ...

Bootstrapped and tested on x86_64-unknown-linux-gnu.

        PR tree-optimization/120032
        * tree-ssa-forwprop.cc (simplify_count_zeroes): When we cannot use
        the IFN to determine the result at zero use a conditional move
        to reproduce the correct result from the table-based
        algorithm.

        * gcc.target/i386/pr120032-3.c: New testcase.
---
 gcc/testsuite/gcc.target/i386/pr120032-3.c | 20 ++++++++++++++++
 gcc/tree-ssa-forwprop.cc                   | 27 +++++++++++++++-------
 2 files changed, 39 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120032-3.c

diff --git a/gcc/testsuite/gcc.target/i386/pr120032-3.c 
b/gcc/testsuite/gcc.target/i386/pr120032-3.c
new file mode 100644
index 00000000000..9523bbb0f5b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120032-3.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlzcnt" } */
+
+unsigned int
+ZSTD_countLeadingZeros32_fallback(unsigned int val)
+{
+  static const unsigned int DeBruijnClz[32]
+    = { 0, 9, 1, 10, 13, 21, 2, 29,
+       11, 14, 16, 18, 22, 25, 3, 30,
+       8, 12, 20, 28, 15, 17, 24, 7,
+       19, 27, 23, 6, 26, 5, 4, 31};
+  val |= val >> 1;
+  val |= val >> 2;
+  val |= val >> 4;
+  val |= val >> 8;
+  val |= val >> 16;
+  return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
+}
+
+/* { dg-final { scan-assembler "lzcnt" } } */
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 0c2b10e92aa..43b1c9d696f 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -2728,13 +2728,6 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
       nargs = 1;
     }
 
-  /* Skip if there is no value defined at zero, or if we can't easily
-     return the correct value for zero.  */
-  if (!zero_ok)
-    return false;
-  if (zero_val != ctz_val && !(zero_val == 0 && ctz_val == input_bits))
-    return false;
-
   gimple_seq seq = NULL;
   gimple *g;
   gcall *call = gimple_build_call_internal (fn, nargs, res_ops[0],
@@ -2758,8 +2751,10 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
       prev_lhs = gimple_assign_lhs (g);
     }
 
+  if (zero_ok && zero_val == ctz_val)
+    ;
   /* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0.  */
-  if (zero_val == 0 && ctz_val == input_bits)
+  else if (zero_ok && zero_val == 0 && ctz_val == input_bits)
     {
       g = gimple_build_assign (make_ssa_name (integer_type_node),
                               BIT_AND_EXPR, prev_lhs,
@@ -2769,6 +2764,22 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
       gimple_seq_add_stmt (&seq, g);
       prev_lhs = gimple_assign_lhs (g);
     }
+  /* As fallback emit a conditional move.  */
+  else
+    {
+      g = gimple_build_assign (make_ssa_name (boolean_type_node), EQ_EXPR,
+                              res_ops[0], build_zero_cst (input_type));
+      gimple_set_location (g, gimple_location (stmt));
+      gimple_seq_add_stmt (&seq, g);
+      tree cond = gimple_assign_lhs (g);
+      g = gimple_build_assign (make_ssa_name (integer_type_node),
+                              COND_EXPR, cond,
+                              build_int_cst (integer_type_node, zero_val),
+                              prev_lhs);
+      gimple_set_location (g, gimple_location (stmt));
+      gimple_seq_add_stmt (&seq, g);
+      prev_lhs = gimple_assign_lhs (g);
+    }
 
   g = gimple_build_assign (gimple_assign_lhs (stmt), NOP_EXPR, prev_lhs);
   gimple_seq_add_stmt (&seq, g);
-- 
2.43.0

Reply via email to