This patch implements the copysign optimization for reassoc I promised I'd look into. I.e.,
CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0 After getting familiar with reassoc a bit this wasn't that hard. But I'm hopeless when it comes to floating-point stuff, so I'd appreciate if you could glance over the tests. The reassoc-40.c should address Joseph's comment in the audit trail (with -fno-rounding-math the optimization would take place). For 0.0 * copysign (cst, x), the result is folded into 0.0 way before reassoc, so we probably don't have to pay attention to this case. Bootstrapped/regtested on x86_64-linux, ok for trunk? 2015-10-13 Marek Polacek <pola...@redhat.com> PR tree-optimization/67815 * tree-ssa-reassoc.c (attempt_builtin_copysign): New function. (reassociate_bb): Call it. * gcc.dg/tree-ssa/reassoc-39.c: New test. * gcc.dg/tree-ssa/reassoc-40.c: New test. diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c index e69de29..589d06b 100644 --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c +++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c @@ -0,0 +1,41 @@ +/* PR tree-optimization/67815 */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */ + +float +f0 (float x) +{ + return 7.5 * __builtin_copysignf (2.0, x); +} + +float +f1 (float x) +{ + return -7.5 * __builtin_copysignf (2.0, x); +} + +double +f2 (double x, double y) +{ + return x * ((1.0/12) * __builtin_copysign (1.0, y)); +} + +double +f3 (double x, double y) +{ + return (x * (-1.0/12)) * __builtin_copysign (1.0, y); +} + +double +f4 (double x, double y, double z) +{ + return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y)); +} + +double +f5 (double x, double y, double z) +{ + return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y); +} + +/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/ diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c index e69de29..d65bcc1b 100644 --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c +++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c @@ -0,0 +1,21 @@ +/* PR tree-optimization/67815 */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */ + +/* Test that the copysign reassoc optimization doesn't fire for + -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication + is inexact. */ + +double +f1 (double y) +{ + return (1.2 * __builtin_copysign (1.1, y)); +} + +double +f2 (double y) +{ + return (-1.2 * __builtin_copysign (1.1, y)); +} + +/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */ diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c index 879722e..b8897b7 100644 --- gcc/tree-ssa-reassoc.c +++ gcc/tree-ssa-reassoc.c @@ -4622,6 +4622,95 @@ attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops) return result; } +/* Attempt to optimize + CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or + CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */ + +static void +attempt_builtin_copysign (vec<operand_entry *> *ops) +{ + operand_entry *oe; + unsigned int i; + unsigned int length = ops->length (); + tree cst1 = ops->last ()->op; + + if (length == 1 || TREE_CODE (cst1) != REAL_CST) + return; + + FOR_EACH_VEC_ELT (*ops, i, oe) + { + if (TREE_CODE (oe->op) == SSA_NAME) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); + if (is_gimple_call (def_stmt)) + { + tree fndecl = gimple_call_fndecl (def_stmt); + tree cst2; + switch (DECL_FUNCTION_CODE (fndecl)) + { + CASE_FLT_FN (BUILT_IN_COPYSIGN): + cst2 = gimple_call_arg (def_stmt, 0); + /* The first argument of copysign must be a constant, + otherwise there's nothing to do. */ + if (TREE_CODE (cst2) == REAL_CST) + { + tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst1), + cst1, cst2); + /* If we couldn't fold to a single constant, skip it. */ + if (mul == NULL_TREE) + break; + /* We're going to replace the copysign argument with + the multiplication product. Remove the constant. */ + ops->pop (); + gimple_call_set_arg (def_stmt, 0, mul); + bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst1)); + /* Handle the CST1 < 0 case -- negate the result. */ + if (cst1_neg) + { + tree lhs = gimple_call_lhs (def_stmt); + tree negrhs = make_ssa_name (TREE_TYPE (lhs)); + gimple *negate_stmt + = gimple_build_assign (negrhs, NEGATE_EXPR, lhs); + insert_stmt_after (negate_stmt, def_stmt); + ops->ordered_remove (i); + add_to_ops_vec (ops, negrhs); + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Optimizing copysign: "); + print_generic_expr (dump_file, cst1, 0); + fprintf (dump_file, " * "); + print_generic_expr (dump_file, + gimple_call_fn (def_stmt), 0); + fprintf (dump_file, " ("); + print_generic_expr (dump_file, cst2, 0); + fprintf (dump_file, ", "); + print_generic_expr (dump_file, + gimple_call_arg (def_stmt, 1), + 0); + fprintf (dump_file, ") into %s", + cst1_neg ? "-" : ""); + print_generic_expr (dump_file, + gimple_call_fn (def_stmt), 0); + fprintf (dump_file, " ("); + print_generic_expr (dump_file, mul, 0); + fprintf (dump_file, ", "); + print_generic_expr (dump_file, + gimple_call_arg (def_stmt, 1), + 0); + fprintf (dump_file, "\n"); + } + return; + } + break; + default: + break; + } + } + } + } +} + /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ static void @@ -4764,6 +4853,9 @@ reassociate_bb (basic_block bb) if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) optimize_range_tests (rhs_code, &ops); + if (rhs_code == MULT_EXPR) + attempt_builtin_copysign (&ops); + if (first_pass_instance && rhs_code == MULT_EXPR && flag_unsafe_math_optimizations) Marek