https://gcc.gnu.org/g:5a674367c6da870184f3bdb7ec110b96aa91bb2b
commit r15-5201-g5a674367c6da870184f3bdb7ec110b96aa91bb2b Author: Soumya AR <soum...@nvidia.com> Date: Wed Nov 13 15:41:15 2024 +0530 Match: Fold pow calls to ldexp when possible [PR57492] This patch transforms the following POW calls to equivalent LDEXP calls, as discussed in PR57492: powi (powof2, i) -> ldexp (1.0, i * log2 (powof2)) powof2 * ldexp (x, i) -> ldexp (x, i + log2 (powof2)) a * ldexp(1., i) -> ldexp (a, i) This is especially helpful for SVE architectures as LDEXP calls can be implemented using the FSCALE instruction, as seen in the following patch: https://gcc.gnu.org/g:9b2915d95d855333d4d8f66b71a75f653ee0d076 SPEC2017 was run with this patch, while there are no noticeable improvements, there are no non-noise regressions either. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. Signed-off-by: Soumya AR <soum...@nvidia.com> gcc/ChangeLog: PR target/57492 * match.pd: Added patterns to fold calls to pow to ldexp and optimize specific ldexp calls. gcc/testsuite/ChangeLog: PR target/57492 * gcc.dg/tree-ssa/ldexp.c: New test. * gcc.dg/tree-ssa/pow-to-ldexp.c: New test. Diff: --- gcc/match.pd | 25 ++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/ldexp.c | 32 ++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c | 44 ++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 9d86d0fe395a..6fa1b59fc971 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -8485,6 +8485,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && real_isfinite (TREE_REAL_CST_PTR (@0))) (exps (plus (mult (logs @0) @1) @2))))) + /* Simplify powi (powof2, i) to ldexp (1, i * log2 (powof2)). */ + (simplify + (POWI REAL_CST@0 @1) + (with { HOST_WIDE_INT tmp = 0; } + (if (real_isinteger (&TREE_REAL_CST (@0), &tmp) + && tmp > 0 && pow2p_hwi (tmp)) + (LDEXP { build_one_cst (type); } + (mult @1 {build_int_cst (integer_type_node, + exact_log2 (tmp)); }))))) + + /* Simplify powof2 * ldexp (x, i) to ldexp (x, i + log2 (powof2)) */ + (simplify + (mult:c REAL_CST@0 (LDEXP @1 @2)) + (with { HOST_WIDE_INT tmp = 0; } + (if (real_isinteger (&TREE_REAL_CST (@0), &tmp) + && tmp > 0 && pow2p_hwi (tmp)) + (LDEXP @1 (plus {build_int_cst (integer_type_node, + exact_log2 (tmp)); } @2))))) + + /* Simplify a * ldexp (1., i) to ldexp (a, i). */ + (simplify + (mult:c @0 (LDEXP REAL_CST@1 @2)) + (if (real_equal (TREE_REAL_CST_PTR (@1), &dconst1)) + (LDEXP @0 @2))) + (for sqrts (SQRT) cbrts (CBRT) pows (POW) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c b/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c new file mode 100644 index 000000000000..63b36f97a30d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast" } */ + +/* { dg-final { scan-assembler-not "\tfmul\t" } } */ + +#define TEST(TYPE, BUILTIN, CONST, NAME) \ + TYPE test_##NAME##_1(TYPE a, int i) \ + { \ + return CONST * __builtin_##BUILTIN(a, i); \ + } \ + TYPE test_##NAME##_2(int i) \ + { \ + return 45 * __builtin_##BUILTIN(1.0, i); \ + } \ + TYPE test_##NAME##_3(TYPE a, int i) \ + { \ + return a * __builtin_##BUILTIN(1.0, i); \ + } \ + TYPE test_##NAME##_4(int i) \ + { \ + TYPE a = CONST; \ + return a * __builtin_##BUILTIN(a, i); \ + } \ + TYPE test_##NAME##_5(TYPE a, int i) \ + { \ + TYPE t1 = a; \ + return t1 * __builtin_##BUILTIN(1.0, i); \ + } + +TEST(double, ldexp, 8.0, double_ldexp) +TEST(float, ldexpf, 8.0f, float_ldexp) +TEST(long double, ldexpl, 8.0L, long_ldexp) \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c b/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c new file mode 100644 index 000000000000..007949dbb539 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast -fdump-tree-optimized" } */ + +/* { dg-final { scan-tree-dump-times "__builtin_ldexp\ " 7 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_ldexpf\ " 7 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_ldexpl\ " 7 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_powi" 0 "optimized" } } */ + +#define TEST(TYPE, BUILTIN, CONST, NAME) \ + TYPE test_##NAME##_1(TYPE a, int i) \ + { \ + return a * __builtin_##BUILTIN(CONST, i); \ + } \ + TYPE test_##NAME##_2(int i) \ + { \ + return __builtin_##BUILTIN(CONST, i); \ + } \ + TYPE test_##NAME##_3(int i) \ + { \ + return CONST * __builtin_##BUILTIN(CONST, i); \ + } \ + TYPE test_##NAME##_4(TYPE a, int i) \ + { \ + TYPE t1 = a; \ + return t1 * __builtin_##BUILTIN(CONST, i); \ + } \ + TYPE test_##NAME##_5(int i) \ + { \ + TYPE powof2 = 8; \ + return powof2 * __builtin_##BUILTIN(CONST, i); \ + } \ + TYPE test_##NAME##_6(int i) \ + { \ + TYPE powof2 = 32; \ + return __builtin_##BUILTIN(powof2, i); \ + } \ + TYPE test_##NAME##_7(int i) \ + { \ + return 2 * __builtin_##BUILTIN(CONST, i); \ + } + +TEST(double, powi, 2.0, double_powi) +TEST(float, powif, 2.0f, float_powif) +TEST(long double, powil, 2.0, long_powil)