This PR is about the pattern matching in tree-ssa-forwprop.cc not
working for the fallback implementation in ZSTD which uses a cast
aroud the negation of the value to be tested.  There's a pattern
eliding casts in (T')-(T)x already but that only covered an
inner widening conversion.  The following extends this to other
conversions given the negation will then be carried out in an
unsigned type.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

        PR tree-optimization/120031
        * match.pd ((nop_outer_cast)-(inner_cast)var -> -(outer_cast)(var)):
        Allow inner conversions that are not widenings when the outer
        type is unsigned.

        * gcc.target/i386/pr120031.c: New testcase.
---
 gcc/match.pd                             |  9 +++++----
 gcc/testsuite/gcc.target/i386/pr120031.c | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120031.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 2a63e4c7ddb..67896f30b43 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1123,9 +1123,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 #endif
 
 /* (nop_outer_cast)-(inner_cast)var -> -(outer_cast)(var)
-   if var is smaller in precision.
-   This is always safe for both doing the negative in signed or unsigned
-   as the value for undefined will not show up.
+   If var is smaller in precision this is always safe for both doing
+   the negative in signed or unsigned as the value for undefined will not
+   show up.  Else it is safe if the negation is done in an unsigned type.
    Note the outer cast cannot be a boolean type as the only valid values
    are 0,-1/1 (depending on the signedness of the boolean) and the negative
    is there to get the correct value.  */
@@ -1133,7 +1133,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (convert (negate:s@1 (convert:s @0)))
  (if (INTEGRAL_TYPE_P (type)
       && tree_nop_conversion_p (type, TREE_TYPE (@1))
-      && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+      && (TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+         || TYPE_UNSIGNED (type))
       && TREE_CODE (type) != BOOLEAN_TYPE)
     (negate (convert @0))))
 
diff --git a/gcc/testsuite/gcc.target/i386/pr120031.c 
b/gcc/testsuite/gcc.target/i386/pr120031.c
new file mode 100644
index 00000000000..e329cbc2f54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120031.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi" } */
+
+unsigned int
+ZSTD_countTrailingZeros32_fallback (unsigned int val)
+{
+  static const unsigned int DeBruijn[32]
+    = { 0, 1, 28, 2, 29, 14, 24, 3,
+       30, 22, 20, 15, 25, 17, 4, 8,
+       31, 27, 13, 23, 21, 19, 16, 7,
+       26, 12, 18, 6, 11, 5, 10, 9};
+  return DeBruijn[((unsigned int) ((val & -(int) val) * 0x077CB531U)) >> 27];
+}
+
+/* { dg-final { scan-assembler "tzcnt" } } */
-- 
2.43.0

Reply via email to