On Fri, 18 Oct 2024, Richard Sandiford wrote:
OK.
Thanks,
Richard.
> gcc/
> * match.pd: Simplify (X /[ex] C1) * (C1 * C2) -> X * C2.
>
> gcc/testsuite/
> * gcc.dg/tree-ssa/mulexactdiv-1.c: New test.
> * gcc.dg/tree-ssa/mulexactdiv-2.c: Likewise.
> * gcc.dg/tree-ssa/mulexactdiv-3.c: Likewise.
> * gcc.dg/tree-ssa/mulexactdiv-4.c: Likewise.
> * gcc.target/aarch64/sve/cnt_fold_1.c: Likewise.
> * gcc.target/aarch64/sve/cnt_fold_2.c: Likewise.
> ---
> gcc/match.pd | 8 ++
> gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-1.c | 23 ++++
> gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-2.c | 19 +++
> gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-3.c | 21 ++++
> gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-4.c | 14 +++
> .../gcc.target/aarch64/sve/cnt_fold_1.c | 110 ++++++++++++++++++
> .../gcc.target/aarch64/sve/cnt_fold_2.c | 55 +++++++++
> 7 files changed, 250 insertions(+)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-1.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-2.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-3.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-4.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_1.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_2.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 1b1d38cf105..6677bc06d80 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see
> zerop
> initializer_each_zero_or_onep
> CONSTANT_CLASS_P
> + poly_int_tree_p
> tree_expr_nonnegative_p
> tree_expr_nonzero_p
> integer_valued_real_p
> @@ -5467,6 +5468,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (mult (convert1? (exact_div @0 @@1)) (convert2? @1))
> (convert @0))
>
> +/* (X /[ex] C1) * (C1 * C2) -> X * C2. */
> +(simplify
> + (mult (convert? (exact_div @0 INTEGER_CST@1)) poly_int_tree_p@2)
> + (with { poly_widest_int factor; }
> + (if (multiple_p (wi::to_poly_widest (@2), wi::to_widest (@1), &factor))
> + (mult (convert @0) { wide_int_to_tree (type, factor); }))))
> +
> /* Simplify (A / B) * B + (A % B) -> A. */
> (for div (trunc_div ceil_div floor_div round_div)
> mod (trunc_mod ceil_mod floor_mod round_mod)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-1.c
> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-1.c
> new file mode 100644
> index 00000000000..fa853eb7dff
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-1.c
> @@ -0,0 +1,23 @@
> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +
> +#define TEST_CMP(FN, DIV, MUL) \
> + int \
> + FN (int x) \
> + { \
> + if (x & 7) \
> + __builtin_unreachable (); \
> + x /= DIV; \
> + return x * MUL; \
> + }
> +
> +TEST_CMP (f1, 2, 6)
> +TEST_CMP (f2, 2, 10)
> +TEST_CMP (f3, 4, 80)
> +TEST_CMP (f4, 8, 200)
> +
> +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr, } "optimized" } } */
> +/* { dg-final { scan-tree-dump-not {<rshift_expr, } "optimized" } } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 3,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 5,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 20,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 25,} "optimized" }
> } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-2.c
> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-2.c
> new file mode 100644
> index 00000000000..9df49690ab6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +
> +#define TEST_CMP(FN, DIV, MUL) \
> + int \
> + FN (int x) \
> + { \
> + if (x & 7) \
> + __builtin_unreachable (); \
> + x /= DIV; \
> + return x * MUL; \
> + }
> +
> +TEST_CMP (f1, 2, 1)
> +TEST_CMP (f2, 2, 5)
> +TEST_CMP (f3, 4, 10)
> +TEST_CMP (f4, 8, 100)
> +TEST_CMP (f5, 16, 32)
> +
> +/* { dg-final { scan-tree-dump-times {<[a-z]*_div_expr, } 5 "optimized" } }
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-3.c
> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-3.c
> new file mode 100644
> index 00000000000..38778a0d7a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-3.c
> @@ -0,0 +1,21 @@
> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +
> +#define TEST_CMP(FN, TYPE1, DIV, TYPE2, MUL) \
> + TYPE2 \
> + FN (TYPE1 x) \
> + { \
> + if (x & 7) \
> + __builtin_unreachable (); \
> + x /= (TYPE1) (DIV); \
> + return (TYPE2) x * (TYPE2) (MUL); \
> + }
> +
> +TEST_CMP (f1, int, 2, long, (~0UL >> 1) & -2)
> +TEST_CMP (f2, int, 4, unsigned long, -8)
> +TEST_CMP (f3, int, 8, unsigned int, -24)
> +TEST_CMP (f4, long, 2, int, (~0U >> 1) & -2)
> +TEST_CMP (f5, long, 4, unsigned int, 100)
> +TEST_CMP (f6, long, 8, unsigned long, 200)
> +
> +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr, } "optimized" } } */
> +/* { dg-final { scan-tree-dump-not {<rshift_expr, } "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-4.c
> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-4.c
> new file mode 100644
> index 00000000000..b641c0bff6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-4.c
> @@ -0,0 +1,14 @@
> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +
> +int
> +f1 (int x)
> +{
> + if (x & 15)
> + __builtin_unreachable ();
> + x /= 2;
> + x = (unsigned short) x * 4;
> + return x;
> +}
> +
> +
> +/* { dg-final { scan-tree-dump {<exact_div_expr, } "optimized" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_1.c
> b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_1.c
> new file mode 100644
> index 00000000000..afa50f93a38
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_1.c
> @@ -0,0 +1,110 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_sve.h>
> +
> +/*
> +** f1:
> +** cntd x([0-9]+)
> +** mul w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +int
> +f1 (int x)
> +{
> + if (x & 1)
> + __builtin_unreachable ();
> + x /= 2;
> + return x * svcntw();
> +}
> +
> +/*
> +** f2:
> +** cntd x([0-9]+)
> +** mul w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +int
> +f2 (int x)
> +{
> + if (x & 3)
> + __builtin_unreachable ();
> + x /= 4;
> + return x * svcnth();
> +}
> +
> +/*
> +** f3:
> +** cntd x([0-9]+)
> +** mul w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +int
> +f3 (int x)
> +{
> + if (x & 7)
> + __builtin_unreachable ();
> + x /= 8;
> + return x * svcntb();
> +}
> +
> +/*
> +** f4:
> +** cntw x([0-9]+)
> +** mul w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +int
> +f4 (int x)
> +{
> + if (x & 1)
> + __builtin_unreachable ();
> + x /= 2;
> + return x * svcnth();
> +}
> +
> +/*
> +** f5:
> +** cntw x([0-9]+)
> +** mul w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +int
> +f5 (int x)
> +{
> + if (x & 3)
> + __builtin_unreachable ();
> + x /= 4;
> + return x * svcntb();
> +}
> +
> +/*
> +** f6:
> +** cnth x([0-9]+)
> +** mul w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +int
> +f6 (int x)
> +{
> + if (x & 1)
> + __builtin_unreachable ();
> + x /= 2;
> + return x * svcntb();
> +}
> +
> +/*
> +** f7:
> +** cntb x([0-9]+)
> +** mul w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +int
> +f7 (int x)
> +{
> + if (x & 15)
> + __builtin_unreachable ();
> + x /= 16;
> + return x * svcntb() * 16;
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_2.c
> b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_2.c
> new file mode 100644
> index 00000000000..7412b7b964e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_2.c
> @@ -0,0 +1,55 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <arm_sve.h>
> +
> +int
> +f1 (int x)
> +{
> + x /= 2;
> + return x * svcntw();
> +}
> +
> +int
> +f2 (int x)
> +{
> + x /= 4;
> + return x * svcnth();
> +}
> +
> +int
> +f3 (int x)
> +{
> + x /= 8;
> + return x * svcntb();
> +}
> +
> +int
> +f4 (int x)
> +{
> + x /= 2;
> + return x * svcnth();
> +}
> +
> +int
> +f5 (int x)
> +{
> + x /= 4;
> + return x * svcntb();
> +}
> +
> +int
> +f6 (int x)
> +{
> + x /= 2;
> + return x * svcntb();
> +}
> +
> +int
> +f7 (int x)
> +{
> + x /= 16;
> + return x * svcntb() * 16;
> +}
> +
> +/* { dg-final { scan-assembler-times {\tasr\t} 7 } } */
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)