Hi all, For targets that have expensive shifts this may not get a better sequence right now, specially for AVR and MSP430 according to our tests. Before I start looking for a fix on those targets I want to know if someone has any advise or other concerns with this transformation.
Thanks, -- >8 -- This patch allows us to use the "splat the sign bit" idiom to efficiently select between 0 and 2^n-1. With that we can avoid branches and we got a shorter sequence than using conditional-zero instructions. The PLUS 2^n-1 sequence appears particularly for signed division by a power of two. gcc/ChangeLog: * gcc/tree-ssa-phiopt.cc (negative_ifconv_replacement): New function. (pass_phiopt::execute): Call negative_ifconv_replacement. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/phi-opt-46.c: New test. * gcc.target/sh/pr59533-1.c: Fix comments and expected output. --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c | 34 ++++++ gcc/testsuite/gcc.target/sh/pr59533-1.c | 20 ++-- gcc/tree-ssa-phiopt.cc | 123 +++++++++++++++++++++ 3 files changed, 167 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c new file mode 100644 index 00000000000..7995f378307 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-phiopt1" } */ + +long f1(long c, long a) +{ + return c < 0 ? a : a + 7; +} + +long f2(long c, long a) +{ + return c < 0 ? a + 7 : a; +} + +long f3(long c, long a) +{ + return c < 0 ? a : a | 15; +} + +long f4(long c, long a) +{ + return c < 0 ? a | 15 : a; +} + +long f5(long c, long a) +{ + return c < 0 ? a : a ^ 31; +} + +long f6(long c, long a) +{ + return c < 0 ? a ^ 31 : a; +} + +/* { dg-final { scan-tree-dump-not "if" "phiopt1" } } */ diff --git a/gcc/testsuite/gcc.target/sh/pr59533-1.c b/gcc/testsuite/gcc.target/sh/pr59533-1.c index b0469859df5..eb03b67c32c 100644 --- a/gcc/testsuite/gcc.target/sh/pr59533-1.c +++ b/gcc/testsuite/gcc.target/sh/pr59533-1.c @@ -2,19 +2,19 @@ /* { dg-do compile } */ /* { dg-options "-O1" } */ -/* { dg-final { scan-assembler-times "shll" 1 } } */ -/* { dg-final { scan-assembler-times "movt" 5 } } */ +/* { dg-final { scan-assembler-times "shll" 5 } } */ +/* { dg-final { scan-assembler-times "movt" 9 } } */ /* { dg-final { scan-assembler-times "rotcl" 1 } } */ /* { dg-final { scan-assembler-times "and" 3 } } */ /* { dg-final { scan-assembler-times "extu.b" 5 } } */ -/* { dg-final { scan-assembler-times "cmp/pz" 27 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "cmp/pz" 23 { target { ! sh2a } } } } */ /* { dg-final { scan-assembler-times "addc" 4 { target { ! sh2a } } } } */ -/* { dg-final { scan-assembler-times "subc" 16 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "subc" 12 { target { ! sh2a } } } } */ -/* { dg-final { scan-assembler-times "cmp/pz" 25 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "cmp/pz" 21 { target { sh2a } } } } */ /* { dg-final { scan-assembler-times "addc" 6 { target { sh2a } } } } */ -/* { dg-final { scan-assembler-times "subc" 14 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "subc" 10 { target { sh2a } } } } */ /* { dg-final { scan-assembler-times "bld" 2 { target { sh2a } } } } */ int @@ -186,28 +186,28 @@ test_22 (int x) int test_23 (int x) { - /* 1x cmp/pz, 1x subc */ + /* 1x shll, 1x add */ return x < 0 ? x + 1 : x; } unsigned int test_24 (unsigned int x) { - /* 1x cmp/pz, 1x subc */ + /* 1x shll, 1x add */ return x & 0x80000000 ? x + 1 : x; } unsigned int test_25 (unsigned int x) { - /* 1x cmp/pz, 1x subc */ + /* 1x shll, 1x add */ return x >> 31 ? x + 1 : x; } int test_26 (int x) { - /* 1x cmp/pz, 1x subc */ + /* 1x shll, 1x add */ return x >> 31 ? x + 1 : x; } diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc index faecab6ab7a..d2dcd4e60ca 100644 --- a/gcc/tree-ssa-phiopt.cc +++ b/gcc/tree-ssa-phiopt.cc @@ -3983,6 +3983,127 @@ cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, return ok; } +/* Optimize A < 0 ? ARG1 OP CONST : ARG1. + When 0 OP ARG1 is ARG1 and CONST is 2^n - 1 we can eliminate the branch by + splatting the sign bit and masking off the high bits. + An arithmetic right shift by size - 1 will return 0 or -1, + then a following logical right shift by size - log2(CONST + 1) would generate + 0 or 2^n - 1 respectively. */ +static bool +negative_ifconv_replacement (basic_block cond_bb, edge e, gphi *phi, tree arg0, + tree arg1) +{ + gimple_seq seq = NULL; + enum tree_code op = ERROR_MARK; + tree base; + tree result = NULL_TREE; + bool invert = false; + + gimple *stmt = last_nondebug_stmt (cond_bb); + tree type = TREE_TYPE (gimple_phi_result (phi)); + enum tree_code comp_code = gimple_cond_code (stmt); + tree cmp0 = gimple_cond_lhs (stmt); + tree cmp1 = gimple_cond_rhs (stmt); + tree cmp_type = TREE_TYPE (cmp0); + + /* For A >= 0 try the inverted comparison, that is A < 0 ? ARG1 : ARG0. */ + if (comp_code == GE_EXPR) + { + comp_code = invert_tree_comparison (comp_code, HONOR_NANS (cmp0)); + if (comp_code == ERROR_MARK) + return false; + + std::swap (arg0, arg1); + } + + if (comp_code != LT_EXPR || TREE_CODE (arg1) != SSA_NAME + || TREE_CODE (arg0) != SSA_NAME || !integer_zerop (cmp1)) + return false; + + base = arg1; + gimple *op_def = SSA_NAME_DEF_STMT (arg0); + if (is_gimple_assign (op_def)) + op = gimple_assign_rhs_code (op_def); + + /* With a bit inversion step we can also optimize + A < 0 ? ARG0 : ARG0 OP CONST. */ + if (op != PLUS_EXPR && op != BIT_IOR_EXPR && op != BIT_XOR_EXPR) + { + op_def = SSA_NAME_DEF_STMT (arg1); + if (!is_gimple_assign (op_def)) + return false; + + op = gimple_assign_rhs_code (op_def); + base = arg0; + invert = true; + } + + tree other_base = gimple_assign_rhs1 (op_def); + if (!operand_equal_p (base, other_base)) + return false; + + /* This only works if (0 OP ARG1) == ARG1. */ + if (op != PLUS_EXPR && op != BIT_IOR_EXPR && op != BIT_XOR_EXPR) + return false; + + tree op_const = gimple_assign_rhs2 (op_def); + if (!tree_fits_shwi_p (op_const) || !tree_fits_shwi_p (TYPE_SIZE (type)) + || !tree_fits_shwi_p (TYPE_SIZE (cmp_type))) + return false; + + /* Check if CONST differs by 2^n - 1. */ + HOST_WIDE_INT const2n = tree_to_shwi (op_const); + if (!TYPE_UNSIGNED (cmp_type) && pow2p_hwi (const2n + 1)) + { + if (dump_file && (dump_flags & TDF_FOLDING)) + { + fprintf (dump_file, "\nphiopt negative if-conversion replacing:\n\t"); + print_generic_expr (dump_file, cmp0); + fprintf (dump_file, " < 0 ? "); + print_generic_expr (dump_file, arg1); + fprintf (dump_file, " : "); + print_generic_expr (dump_file, arg0); + fprintf (dump_file, "\n"); + } + + HOST_WIDE_INT size = tree_to_shwi (TYPE_SIZE (cmp_type)); + /* Splat the sign bit with an arithmetic shift right. */ + tree temp = gimple_build (&seq, RSHIFT_EXPR, cmp_type, cmp0, + build_int_cst (type, size - 1)); + if (invert) + temp = gimple_build (&seq, BIT_NOT_EXPR, cmp_type, temp); + + /* Do a logical right shift to mask off the high bits. + We need the type changes as the logical right shift is generated by an + unsigned RSHIFT_EXPR. */ + tree utype = unsigned_type_for (type); + temp = gimple_build (&seq, CONVERT_EXPR, utype, temp); + size = tree_to_shwi (TYPE_SIZE (type)); + tree shift + = build_int_cst (utype, size - exact_log2 (const2n + 1)); + temp = gimple_build (&seq, RSHIFT_EXPR, utype, temp, shift); + temp = gimple_build (&seq, CONVERT_EXPR, type, temp); + + /* Do OP with the base ARG. */ + result = gimple_build (&seq, op, type, temp, base); + + /* Insert the sequence. */ + gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); + gsi_insert_seq_before (&gsi, seq, GSI_CONTINUE_LINKING); + + if (dump_file && (dump_flags & TDF_FOLDING)) + fprintf (dump_file, "accepted the phiopt neg if-conversion sequence.\n"); + + replace_phi_edge_with_variable (cond_bb, e, phi, result); + + statistics_counter_event (cfun, "negative if-conversion replacement", 1); + + return true; + } + + return false; +} + /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */ static bool @@ -4586,6 +4707,8 @@ pass_phiopt::execute (function *) && !diamond_p && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) cfgchanged = true; + else if (negative_ifconv_replacement (bb, e2, phi, arg0, arg1)) + cfgchanged = true; }; execute_over_cond_phis (phiopt_exec); -- 2.47.0