https://gcc.gnu.org/g:07a8538d90763f0ae640dea822bdeb63ea17ec44

commit r15-4673-g07a8538d90763f0ae640dea822bdeb63ea17ec44
Author: Jennifer Schmitz <jschm...@nvidia.com>
Date:   Thu Oct 17 08:40:34 2024 -0700

    match.pd: Add std::pow folding optimizations.
    
    This patch adds the following two simplifications in match.pd for
    POW_ALL and POWI:
    - pow (1.0/x, y) to pow (x, -y), avoiding the division
    - pow (0.0, x) to 0.0, avoiding the call to pow.
    The patterns are guarded by flag_unsafe_math_optimizations,
    !flag_trapping_math, and !HONOR_INFINITIES.
    The POW_ALL patterns are also gated under !flag_errno_math.
    The second pattern is also guarded by !HONOR_NANS and
    !HONOR_SIGNED_ZEROS.
    
    Tests were added to confirm the application of the transform for
    builtins pow, powf, powl, powi, powif, powil, and powf16.
    
    The patch was bootstrapped and regtested on aarch64-linux-gnu and
    x86_64-linux-gnu, no regression.
    OK for mainline?
    
    Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com>
    
    gcc/
            * match.pd: Fold pow (1.0/x, y) -> pow (x, -y) and
            pow (0.0, x) -> 0.0.
    
    gcc/testsuite/
            * gcc.dg/tree-ssa/pow_fold_1.c: New test.

Diff:
---
 gcc/match.pd                               | 28 ++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pow_fold_1.c | 42 ++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index f16b733b8914..809c717bc862 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8285,6 +8285,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (rdiv @0 (exps:s @1))
     (mult @0 (exps (negate @1)))))
 
+ (for pow (POW_ALL)
+  (if (! HONOR_INFINITIES (type)
+       && ! flag_trapping_math
+       && ! flag_errno_math)
+   /* Simplify pow(1.0/x, y) into pow(x, -y).  */
+   (simplify
+    (pow (rdiv:s real_onep@0 @1) @2)
+     (pow @1 (negate @2)))
+
+   /* Simplify pow(0.0, x) into 0.0.  */
+   (if (! HONOR_NANS (type) && ! HONOR_SIGNED_ZEROS (type))
+    (simplify
+     (pow real_zerop@0 @1)
+      @0))))
+
  (if (! HONOR_SIGN_DEPENDENT_ROUNDING (type)
       && ! HONOR_NANS (type) && ! HONOR_INFINITIES (type)
       && ! flag_trapping_math
@@ -8643,6 +8658,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (mult (POW:s @0 @1) (POW:s @2 @1))
    (POW (mult @0 @2) @1))
 
+ (if (! HONOR_INFINITIES (type) && ! flag_trapping_math)
+  /* Simplify powi(1.0/x, y) into powi(x, -y).  */
+  (simplify
+   (POWI (rdiv@3 real_onep@0 @1) @2)
+   (if (single_use (@3))
+    (POWI @1 (negate @2))))
+
+  /* Simplify powi(0.0, x) into 0.0.  */
+  (if (! HONOR_NANS (type) && ! HONOR_SIGNED_ZEROS (type))
+   (simplify
+    (POWI real_zerop@0 @1)
+     @0)))
+
  /* Simplify powi(x,y) * powi(z,y) -> powi(x*z,y). */
  (simplify
   (mult (POWI:s @0 @1) (POWI:s @2 @1))
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pow_fold_1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pow_fold_1.c
new file mode 100644
index 000000000000..d98bcb0827e4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pow_fold_1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-optimized -fexcess-precision=16" } */
+/* { dg-add-options float16 } */
+/* { dg-require-effective-target float16_runtime } */
+/* { dg-require-effective-target c99_runtime } */
+
+extern void link_error (void);
+
+#define POW1OVER(TYPE1, TYPE2, CTY, TY)                        \
+  void                                                 \
+  pow1over_##TY (TYPE1 x, TYPE2 y)                     \
+  {                                                    \
+    TYPE1 t1 = 1.0##CTY / x;                           \
+    TYPE1 t2 = __builtin_pow##TY (t1, y);              \
+    TYPE2 t3 = -y;                                     \
+    TYPE1 t4 = __builtin_pow##TY (x, t3);              \
+    if (t2 != t4)                                      \
+      link_error ();                                   \
+  }                                                    \
+
+#define POW0(TYPE1, TYPE2, CTY, TY)                    \
+  void                                                 \
+  pow0_##TY (TYPE2 x)                                  \
+  {                                                    \
+    TYPE1 t1 = __builtin_pow##TY (0.0##CTY, x);                \
+    if (t1 != 0.0##CTY)                                        \
+      link_error ();                                   \
+  }                                                    \
+
+#define TEST_ALL(TYPE1, TYPE2, CTY, TY)                        \
+  POW1OVER (TYPE1, TYPE2, CTY, TY)                     \
+  POW0 (TYPE1, TYPE2, CTY, TY)
+
+TEST_ALL (double, double, , )
+TEST_ALL (float, float, f, f)
+TEST_ALL (_Float16, _Float16, f16, f16)
+TEST_ALL (long double, long double, L, l)
+TEST_ALL (double, int, , i)
+TEST_ALL (float, int, f, if)
+TEST_ALL (long double, int, L, il)
+
+/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */

Reply via email to