https://gcc.gnu.org/g:df9451936c6c9e4faea371e3f188e1fc6b6d39e3

commit r15-2053-gdf9451936c6c9e4faea371e3f188e1fc6b6d39e3
Author: Roger Sayle <ro...@nextmovesoftware.com>
Date:   Tue Jul 16 07:58:28 2024 +0100

    PR tree-optimization/114661: Generalize MULT_EXPR recognition in match.pd.
    
    This patch resolves PR tree-optimization/114661, by generalizing the set
    of expressions that we canonicalize to multiplication.  This extends the
    optimization(s) contributed (by me) back in July 2021.
    https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575999.html
    
    The existing transformation folds (X*C1)^(X<<C2) into X*C3 when
    allowed.  A subtlety is that for non-wrapping integer types, we
    actually fold this into (int)((unsigned)X*C3) so that we don't
    introduce an undefined overflow that wasn't in the original.
    Unfortunately, this transformation confuses itself, as the type-cast
    multiplication isn't recognized when further combining bit operations.
    Fixed here by allowing optional useless type conversions in transforms
    to turn (int)((unsigned)X*C1)^(X<<C2) into (int)((unsigned)X*C3) so
    that match.pd and EVRP can continue to construct multiplications.
    
    For the example given in the PR:
    
    unsigned mul(unsigned char c) {
        if (c > 3) __builtin_unreachable();
        return c << 18 | c << 15 |
               c << 12 | c << 9 |
               c << 6 | c << 3 | c;
    }
    
    GCC on x86_64 with -O2 previously generated:
    
    mul:    movzbl  %dil, %edi
            leal    (%rdi,%rdi,8), %edx
            leal    0(,%rdx,8), %eax
            movl    %edx, %ecx
            sall    $15, %edx
            orl     %edi, %eax
            sall    $9, %ecx
            orl     %ecx, %eax
            orl     %edx, %eax
            ret
    
    with this patch we now generate:
    
    mul:    movzbl  %dil, %eax
            imull   $299593, %eax, %eax
            ret
    
    2024-07-16  Roger Sayle  <ro...@nextmovesoftware.com>
                Richard Biener  <rguent...@suse.de>
    
    gcc/ChangeLog
            PR tree-optimization/114661
            * match.pd ((X*C1)|(X*C2) to X*(C1+C2)): Allow optional useless
            type conversions around multiplications, such as those inserted
            by this transformation.
    
    gcc/testsuite/ChangeLog
            PR tree-optimization/114661
            * gcc.dg/pr114661.c: New test case.

Diff:
---
 gcc/match.pd                    | 43 +++++++++++++++++++++++++----------------
 gcc/testsuite/gcc.dg/pr114661.c | 10 ++++++++++
 2 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 3759c64d461f..24a0bbead3e7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4171,30 +4171,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    Likewise, handle (X<<C3) and X as legitimate variants of X*C.  */
 (for op (bit_ior bit_xor)
  (simplify
-  (op (mult:s@0 @1 INTEGER_CST@2)
-      (mult:s@3 @1 INTEGER_CST@4))
-  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
-       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
-   (mult @1
-        { wide_int_to_tree (type, wi::to_wide (@2) + wi::to_wide (@4)); })))
+  (op (nop_convert?:s@6 (mult:s@0 (nop_convert? @1) INTEGER_CST@2))
+      (nop_convert?:s@7 (mult:s@3 (nop_convert? @5) INTEGER_CST@4)))
+  (if (INTEGRAL_TYPE_P (type)
+       && operand_equal_p (@1, @5, 0)
+       && (tree_nonzero_bits (@6) & tree_nonzero_bits (@7)) == 0)
+   (with { tree t = type;
+          if (!TYPE_OVERFLOW_WRAPS (t))
+            t = unsigned_type_for (t);
+          wide_int c = wi::add (wi::to_wide (@2), wi::to_wide (@4)); }
+    (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); })))))
  (simplify
-  (op:c (mult:s@0 @1 INTEGER_CST@2)
+  (op:c (nop_convert?:s@5 (mult:s@0 (nop_convert? @1) INTEGER_CST@2))
        (lshift:s@3 @1 INTEGER_CST@4))
-  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
+  (if (INTEGRAL_TYPE_P (type)
        && tree_int_cst_sgn (@4) > 0
-       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
-   (with { wide_int wone = wi::one (TYPE_PRECISION (type));
+       && (tree_nonzero_bits (@5) & tree_nonzero_bits (@3)) == 0)
+   (with { tree t = type;
+          if (!TYPE_OVERFLOW_WRAPS (t))
+            t = unsigned_type_for (t);
+          wide_int wone = wi::one (TYPE_PRECISION (type));
           wide_int c = wi::add (wi::to_wide (@2),
                                 wi::lshift (wone, wi::to_wide (@4))); }
-    (mult @1 { wide_int_to_tree (type, c); }))))
+    (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); })))))
  (simplify
-  (op:c (mult:s@0 @1 INTEGER_CST@2)
+  (op:c (nop_convert?:s@3 (mult:s@0 (nop_convert? @1) INTEGER_CST@2))
        @1)
-  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
-       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@1)) == 0)
-   (mult @1
-        { wide_int_to_tree (type,
-                            wi::add (wi::to_wide (@2), 1)); })))
+  (if (INTEGRAL_TYPE_P (type)
+       && (tree_nonzero_bits (@3) & tree_nonzero_bits (@1)) == 0)
+   (with { tree t = type;
+          if (!TYPE_OVERFLOW_WRAPS (t))
+            t = unsigned_type_for (t);
+          wide_int c = wi::add (wi::to_wide (@2), 1); }
+    (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); })))))
  (simplify
   (op (lshift:s@0 @1 INTEGER_CST@2)
       (lshift:s@3 @1 INTEGER_CST@4))
diff --git a/gcc/testsuite/gcc.dg/pr114661.c b/gcc/testsuite/gcc.dg/pr114661.c
new file mode 100644
index 000000000000..e6b5c69dba86
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114661.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+unsigned mul(unsigned char c) {
+    if (c > 3) __builtin_unreachable();
+    return c << 18 | c << 15 |
+        c << 12 | c << 9 |
+        c << 6 | c << 3 | c;
+}
+/* { dg-final { scan-tree-dump-times " \\* 299593" 1 "evrp" } } */

Reply via email to