This patch resolves PR tree-optimization/114661, by generalizing the set
of expressions that we canonicalize to multiplication. This extends the
optimization(s) contributed (by me) back in July 2021.
https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575999.html
The existing transformation folds (X*C1)^(X<<C2) into X*C3 when
allowed. A subtlety is that for non-wrapping integer types, we
actually fold this into (int)((unsigned)X*C3) so that we don't
introduce an undefined overflow that wasn't in the original.
Unfortunately, this transformation confuses itself, as the type-safe
multiplication isn't recognized when further combining bit operations.
Fixed here by adding transforms to turn (int)((unsigned)X*C1)^(X<<C2)
into (int)((unsigned)X*C3) so that match.pd and EVRP can continue
to construct multiplications.
For the example given in the PR:
unsigned mul(unsigned char c) {
if (c > 3) __builtin_unreachable();
return c << 18 | c << 15 |
c << 12 | c << 9 |
c << 6 | c << 3 | c;
}
GCC on x86_64 with -O2 previously generated:
mul: movzbl %dil, %edi
leal (%rdi,%rdi,8), %edx
leal 0(,%rdx,8), %eax
movl %edx, %ecx
sall $15, %edx
orl %edi, %eax
sall $9, %ecx
orl %ecx, %eax
orl %edx, %eax
ret
with this patch we now generate:
mul: movzbl %dil, %eax
imull $299593, %eax, %eax
ret
This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures. Ok for mainline?
2024-07-09 Roger Sayle <[email protected]>
gcc/ChangeLog
PR tree-optimization/114661
* match.pd ((X*C1)|(X*C2) to X*(C1+C2)): Additionally recognize
multiplications surrounded by casts to an unsigned type and back
such as those generated by these transformations.
gcc/testsuite/ChangeLog
PR tree-optimization/114661
* gcc.dg/pr114661.c: New test case.
Thanks in advance,
Roger
--
diff --git a/gcc/match.pd b/gcc/match.pd
index 4edfa2a..228387f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4205,7 +4205,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
t = unsigned_type_for (t);
wide_int wone = wi::one (TYPE_PRECISION (t));
wide_int c = wi::add (wi::lshift (wone, wi::to_wide (@2)), wone); }
- (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); }))))))
+ (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); })))))
+ (simplify
+ (op:c (convert:s@0 (mult:s@1 (convert @2) INTEGER_CST@3))
+ (lshift:s@4 @2 INTEGER_CST@5))
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+ && TREE_TYPE (@2) == type
+ && TYPE_UNSIGNED (TREE_TYPE (@1))
+ && TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@1))
+ && tree_int_cst_sgn (@5) > 0
+ && (tree_nonzero_bits (@0) & tree_nonzero_bits (@4)) == 0)
+ (with { tree t = TREE_TYPE (@1);
+ wide_int wone = wi::one (TYPE_PRECISION (t));
+ wide_int c = wi::add (wi::to_wide (@3),
+ wi::lshift (wone, wi::to_wide (@5))); }
+ (convert (mult:t (convert:t @2) { wide_int_to_tree (t, c); })))))
+ (simplify
+ (op:c (convert:s@0 (mult:s@1 (convert @2) INTEGER_CST@3))
+ @2)
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+ && TREE_TYPE (@2) == type
+ && TYPE_UNSIGNED (TREE_TYPE (@1))
+ && TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@1))
+ && (tree_nonzero_bits (@0) & tree_nonzero_bits (@2)) == 0)
+ (with { tree t = TREE_TYPE (@1);
+ wide_int wone = wi::one (TYPE_PRECISION (t));
+ wide_int c = wi::add (wi::to_wide (@3), wone); }
+ (convert (mult:t (convert:t @2) { wide_int_to_tree (t, c); }))))))
/* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */
diff --git a/gcc/testsuite/gcc.dg/pr114661.c b/gcc/testsuite/gcc.dg/pr114661.c
new file mode 100644
index 0000000..e6b5c69
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114661.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+unsigned mul(unsigned char c) {
+ if (c > 3) __builtin_unreachable();
+ return c << 18 | c << 15 |
+ c << 12 | c << 9 |
+ c << 6 | c << 3 | c;
+}
+/* { dg-final { scan-tree-dump-times " \\* 299593" 1 "evrp" } } */