https://gcc.gnu.org/g:5a674367c6da870184f3bdb7ec110b96aa91bb2b

commit r15-5201-g5a674367c6da870184f3bdb7ec110b96aa91bb2b
Author: Soumya AR <soum...@nvidia.com>
Date:   Wed Nov 13 15:41:15 2024 +0530

    Match: Fold pow calls to ldexp when possible [PR57492]
    
    This patch transforms the following POW calls to equivalent LDEXP calls, as
    discussed in PR57492:
    
    powi (powof2, i) -> ldexp (1.0, i * log2 (powof2))
    
    powof2 * ldexp (x, i) -> ldexp (x, i + log2 (powof2))
    
    a * ldexp(1., i) -> ldexp (a, i)
    
    This is especially helpful for SVE architectures as LDEXP calls can be
    implemented using the FSCALE instruction, as seen in the following patch:
    https://gcc.gnu.org/g:9b2915d95d855333d4d8f66b71a75f653ee0d076
    
    SPEC2017 was run with this patch, while there are no noticeable 
improvements,
    there are no non-noise regressions either.
    
    The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
    
    Signed-off-by: Soumya AR <soum...@nvidia.com>
    
    gcc/ChangeLog:
            PR target/57492
            * match.pd: Added patterns to fold calls to pow to ldexp and 
optimize
            specific ldexp calls.
    
    gcc/testsuite/ChangeLog:
            PR target/57492
            * gcc.dg/tree-ssa/ldexp.c: New test.
            * gcc.dg/tree-ssa/pow-to-ldexp.c: New test.

Diff:
---
 gcc/match.pd                                 | 25 ++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/ldexp.c        | 32 ++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c | 44 ++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 9d86d0fe395a..6fa1b59fc971 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8485,6 +8485,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        && real_isfinite (TREE_REAL_CST_PTR (@0)))
     (exps (plus (mult (logs @0) @1) @2)))))
 
+ /* Simplify powi (powof2, i) to ldexp (1, i * log2 (powof2)). */
+ (simplify
+  (POWI REAL_CST@0 @1)
+  (with { HOST_WIDE_INT tmp = 0; }
+   (if (real_isinteger (&TREE_REAL_CST (@0), &tmp)
+       && tmp > 0 && pow2p_hwi (tmp))
+    (LDEXP { build_one_cst (type); }
+       (mult @1 {build_int_cst (integer_type_node,
+            exact_log2 (tmp)); })))))
+
+ /* Simplify powof2 * ldexp (x, i) to ldexp (x, i + log2 (powof2)) */
+ (simplify
+  (mult:c REAL_CST@0 (LDEXP @1 @2))
+  (with { HOST_WIDE_INT tmp = 0; }
+   (if (real_isinteger (&TREE_REAL_CST (@0), &tmp)
+       && tmp > 0 && pow2p_hwi (tmp))
+    (LDEXP @1 (plus {build_int_cst (integer_type_node,
+            exact_log2 (tmp)); } @2)))))
+
+ /* Simplify a * ldexp (1., i) to ldexp (a, i). */
+ (simplify
+  (mult:c @0 (LDEXP REAL_CST@1 @2))
+  (if (real_equal (TREE_REAL_CST_PTR (@1), &dconst1))
+   (LDEXP @0 @2)))
+
  (for sqrts (SQRT)
       cbrts (CBRT)
       pows (POW)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c
new file mode 100644
index 000000000000..63b36f97a30d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast" } */
+
+/* { dg-final { scan-assembler-not "\tfmul\t" } } */
+
+#define TEST(TYPE, BUILTIN, CONST, NAME)                       \
+  TYPE test_##NAME##_1(TYPE a, int i)                          \
+  {                                                            \
+    return CONST * __builtin_##BUILTIN(a, i);                  \
+  }                                                            \
+  TYPE test_##NAME##_2(int i)                                  \
+  {                                                            \
+    return 45 * __builtin_##BUILTIN(1.0, i);                   \
+  }                                                            \
+  TYPE test_##NAME##_3(TYPE a, int i)                          \
+  {                                                            \
+    return a * __builtin_##BUILTIN(1.0, i);                    \
+  }                                                            \
+  TYPE test_##NAME##_4(int i)                                  \
+  {                                                            \
+    TYPE a = CONST;                                            \
+    return a * __builtin_##BUILTIN(a, i);                      \
+  }                                                            \
+  TYPE test_##NAME##_5(TYPE a, int i)                          \
+  {                                                            \
+    TYPE t1 = a;                                               \
+    return t1 * __builtin_##BUILTIN(1.0, i);                   \
+  }
+
+TEST(double, ldexp, 8.0, double_ldexp)
+TEST(float, ldexpf, 8.0f, float_ldexp)
+TEST(long double, ldexpl, 8.0L, long_ldexp)
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c
new file mode 100644
index 000000000000..007949dbb539
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast -fdump-tree-optimized" } */
+
+/* { dg-final { scan-tree-dump-times "__builtin_ldexp\ " 7 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_ldexpf\ " 7 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_ldexpl\ " 7 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_powi" 0 "optimized" } } */
+
+#define TEST(TYPE, BUILTIN, CONST, NAME)                        \
+  TYPE test_##NAME##_1(TYPE a, int i)                           \
+  {                                                             \
+    return a * __builtin_##BUILTIN(CONST, i);                   \
+  }                                                             \
+  TYPE test_##NAME##_2(int i)                                   \
+  {                                                             \
+    return __builtin_##BUILTIN(CONST, i);                       \
+  }                                                             \
+  TYPE test_##NAME##_3(int i)                                   \
+  {                                                             \
+    return CONST * __builtin_##BUILTIN(CONST, i);               \
+  }                                                             \
+  TYPE test_##NAME##_4(TYPE a, int i)                           \
+  {                                                             \
+    TYPE t1 = a;                                                \
+    return t1 * __builtin_##BUILTIN(CONST, i);                  \
+  }                                                             \
+  TYPE test_##NAME##_5(int i)                                   \
+  {                                                             \
+    TYPE powof2 = 8;                                            \
+    return powof2 * __builtin_##BUILTIN(CONST, i);              \
+  }                                                             \
+  TYPE test_##NAME##_6(int i)                                   \
+  {                                                             \
+    TYPE powof2 = 32;                                           \
+    return __builtin_##BUILTIN(powof2, i);                      \
+  }                                                             \
+  TYPE test_##NAME##_7(int i)                                   \
+  {                                                             \
+    return 2 * __builtin_##BUILTIN(CONST, i);                   \
+  }
+
+TEST(double, powi, 2.0, double_powi)
+TEST(float, powif, 2.0f, float_powif)
+TEST(long double, powil, 2.0, long_powil)

Reply via email to