On Fri, 18 Oct 2024, Richard Sandiford wrote:
> This patch adds a rule to simplify (X >> C1) * (C2 << C1) -> X * C2
> when the low C1 bits of X are known to be zero. As with the earlier
> X >> C1 << (C2 + C1) patch, any single conversion is allowed between
> the shift and the multiplication.
OK.
Thanks,
Richard.
> gcc/
> * match.pd: Simplify (X >> C1) * (C2 << C1) -> X * C2 if the
> low C1 bits of X are zero.
>
> gcc/testsuite/
> * gcc.dg/tree-ssa/shifts-3.c: New test.
> * gcc.dg/tree-ssa/shifts-4.c: Likewise.
> * gcc.target/aarch64/sve/cnt_fold_5.c: Likewise.
> ---
> gcc/match.pd | 13 ++++
> gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c | 65 +++++++++++++++++++
> gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c | 23 +++++++
> .../gcc.target/aarch64/sve/cnt_fold_5.c | 38 +++++++++++
> 4 files changed, 139 insertions(+)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 41903554478..85f5eeefa08 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -4915,6 +4915,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> && wi::to_widest (@2) >= wi::to_widest (@1)
> && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0)))
> (lshift (convert @0) (minus @2 @1))))
> +
> +/* (X >> C1) * (C2 << C1) -> X * C2 if the low C1 bits of X are zero. */
> +(simplify
> + (mult (convert? (rshift (with_possible_nonzero_bits2 @0) INTEGER_CST@1))
> + poly_int_tree_p@2)
> + (with { poly_widest_int factor; }
> + (if (INTEGRAL_TYPE_P (type)
> + && wi::ltu_p (wi::to_wide (@1), element_precision (type))
> + && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0))
> + && multiple_p (wi::to_poly_widest (@2),
> + widest_int (1) << tree_to_uhwi (@1),
> + &factor))
> + (mult (convert @0) { wide_int_to_tree (type, factor); }))))
> #endif
>
> /* For (x << c) >> c, optimize into x & ((unsigned)-1 >> c) for
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c
> b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c
> new file mode 100644
> index 00000000000..dcff518e630
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c
> @@ -0,0 +1,65 @@
> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +
> +unsigned int
> +f1 (unsigned int x)
> +{
> + if (x & 3)
> + __builtin_unreachable ();
> + x >>= 2;
> + return x * 20;
> +}
> +
> +unsigned int
> +f2 (unsigned int x)
> +{
> + if (x & 3)
> + __builtin_unreachable ();
> + unsigned char y = x;
> + y >>= 2;
> + return y * 36;
> +}
> +
> +unsigned long
> +f3 (unsigned int x)
> +{
> + if (x & 3)
> + __builtin_unreachable ();
> + x >>= 2;
> + return (unsigned long) x * 88;
> +}
> +
> +int
> +f4 (int x)
> +{
> + if (x & 15)
> + __builtin_unreachable ();
> + x >>= 4;
> + return x * 48;
> +}
> +
> +unsigned int
> +f5 (int x)
> +{
> + if (x & 31)
> + __builtin_unreachable ();
> + x >>= 5;
> + return x * 3200;
> +}
> +
> +unsigned int
> +f6 (unsigned int x)
> +{
> + if (x & 1)
> + __builtin_unreachable ();
> + x >>= 1;
> + return x * (~0U / 3 & -2);
> +}
> +
> +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */
> +/* { dg-final { scan-tree-dump-not {<rshift_expr,} "optimized" } } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 5,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 9,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<(?:widen_)?mult_expr, [^,]*, [^,]*, 22,}
> "optimized" } } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 3,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 100,} "optimized"
> } } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 715827882,}
> "optimized" { target int32 } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c
> b/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c
> new file mode 100644
> index 00000000000..5638653d0c2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c
> @@ -0,0 +1,23 @@
> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +
> +unsigned int
> +f1 (unsigned int x)
> +{
> + if (x & 3)
> + __builtin_unreachable ();
> + x >>= 2;
> + return x * 10;
> +}
> +
> +unsigned int
> +f2 (unsigned int x)
> +{
> + if (x & 3)
> + __builtin_unreachable ();
> + x >>= 3;
> + return x * 24;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {<rshift_expr,} 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 10,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 24,} "optimized" }
> } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c
> b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c
> new file mode 100644
> index 00000000000..3f60e9b4941
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_sve.h>
> +
> +/*
> +** f1:
> +** ...
> +** cntd [^\n]+
> +** ...
> +** mul [^\n]+
> +** ret
> +*/
> +uint64_t
> +f1 (int x)
> +{
> + if (x & 3)
> + __builtin_unreachable ();
> + x >>= 2;
> + return (uint64_t) x * svcnth ();
> +}
> +
> +/*
> +** f2:
> +** ...
> +** asr [^\n]+
> +** ...
> +** ret
> +*/
> +uint64_t
> +f2 (int x)
> +{
> + if (x & 3)
> + __builtin_unreachable ();
> + x >>= 2;
> + return (uint64_t) x * svcntw ();
> +}
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)