On Fri, Jun 27, 2025, 11:06 AM Raphael Moreira Zinsly < rzin...@ventanamicro.com> wrote:
> Hi all, > > For targets that have expensive shifts this may not get a better > sequence right now, specially for AVR and MSP430 according to > our tests. > Before I start looking for a fix on those targets I want to know > if someone has any advise or other concerns with this transformation. > > > Thanks, > > -- >8 -- > > This patch allows us to use the "splat the sign bit" idiom to > efficiently select between 0 and 2^n-1. > With that we can avoid branches and we got a shorter sequence than > using conditional-zero instructions. > > The PLUS 2^n-1 sequence appears particularly for signed division > by a power of two. > > gcc/ChangeLog: > * gcc/tree-ssa-phiopt.cc > (negative_ifconv_replacement): New function. > (pass_phiopt::execute): Call negative_ifconv_replacement. > > gcc/testsuite/ChangeLog: > * gcc.dg/tree-ssa/phi-opt-46.c: New test. > * gcc.target/sh/pr59533-1.c: Fix comments and expected output. > --- > gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c | 34 ++++++ > gcc/testsuite/gcc.target/sh/pr59533-1.c | 20 ++-- > gcc/tree-ssa-phiopt.cc | 123 +++++++++++++++++++++ > 3 files changed, 167 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c > b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c > new file mode 100644 > index 00000000000..7995f378307 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c > @@ -0,0 +1,34 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-phiopt1" } */ > + > +long f1(long c, long a) > +{ > + return c < 0 ? a : a + 7; > +} > + > +long f2(long c, long a) > +{ > + return c < 0 ? a + 7 : a; > +} > + > +long f3(long c, long a) > +{ > + return c < 0 ? a : a | 15; > +} > + > +long f4(long c, long a) > +{ > + return c < 0 ? a | 15 : a; > +} > + > +long f5(long c, long a) > +{ > + return c < 0 ? a : a ^ 31; > +} > + > +long f6(long c, long a) > +{ > + return c < 0 ? a ^ 31 : a; > +} > + > +/* { dg-final { scan-tree-dump-not "if" "phiopt1" } } */ > diff --git a/gcc/testsuite/gcc.target/sh/pr59533-1.c > b/gcc/testsuite/gcc.target/sh/pr59533-1.c > index b0469859df5..eb03b67c32c 100644 > --- a/gcc/testsuite/gcc.target/sh/pr59533-1.c > +++ b/gcc/testsuite/gcc.target/sh/pr59533-1.c > @@ -2,19 +2,19 @@ > /* { dg-do compile } */ > /* { dg-options "-O1" } */ > > -/* { dg-final { scan-assembler-times "shll" 1 } } */ > -/* { dg-final { scan-assembler-times "movt" 5 } } */ > +/* { dg-final { scan-assembler-times "shll" 5 } } */ > +/* { dg-final { scan-assembler-times "movt" 9 } } */ > /* { dg-final { scan-assembler-times "rotcl" 1 } } */ > /* { dg-final { scan-assembler-times "and" 3 } } */ > /* { dg-final { scan-assembler-times "extu.b" 5 } } */ > > -/* { dg-final { scan-assembler-times "cmp/pz" 27 { target { ! sh2a } } } > } */ > +/* { dg-final { scan-assembler-times "cmp/pz" 23 { target { ! sh2a } } } > } */ > /* { dg-final { scan-assembler-times "addc" 4 { target { ! sh2a } } } } > */ > -/* { dg-final { scan-assembler-times "subc" 16 { target { ! sh2a } } } } > */ > +/* { dg-final { scan-assembler-times "subc" 12 { target { ! sh2a } } } } > */ > > -/* { dg-final { scan-assembler-times "cmp/pz" 25 { target { sh2a } } } } > */ > +/* { dg-final { scan-assembler-times "cmp/pz" 21 { target { sh2a } } } } > */ > /* { dg-final { scan-assembler-times "addc" 6 { target { sh2a } } } } */ > -/* { dg-final { scan-assembler-times "subc" 14 { target { sh2a } } } } */ > +/* { dg-final { scan-assembler-times "subc" 10 { target { sh2a } } } } */ > /* { dg-final { scan-assembler-times "bld" 2 { target { sh2a } } } } */ > > int > @@ -186,28 +186,28 @@ test_22 (int x) > int > test_23 (int x) > { > - /* 1x cmp/pz, 1x subc */ > + /* 1x shll, 1x add */ > return x < 0 ? x + 1 : x; > } > > unsigned int > test_24 (unsigned int x) > { > - /* 1x cmp/pz, 1x subc */ > + /* 1x shll, 1x add */ > return x & 0x80000000 ? x + 1 : x; > } > > unsigned int > test_25 (unsigned int x) > { > - /* 1x cmp/pz, 1x subc */ > + /* 1x shll, 1x add */ > return x >> 31 ? x + 1 : x; > } > > int > test_26 (int x) > { > - /* 1x cmp/pz, 1x subc */ > + /* 1x shll, 1x add */ > return x >> 31 ? x + 1 : x; > } > > diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc > index faecab6ab7a..d2dcd4e60ca 100644 > --- a/gcc/tree-ssa-phiopt.cc > +++ b/gcc/tree-ssa-phiopt.cc > @@ -3983,6 +3983,127 @@ cond_if_else_store_replacement (basic_block > then_bb, basic_block else_bb, > return ok; > } > > +/* Optimize A < 0 ? ARG1 OP CONST : ARG1. > + When 0 OP ARG1 is ARG1 and CONST is 2^n - 1 we can eliminate the > branch by > + splatting the sign bit and masking off the high bits. > + An arithmetic right shift by size - 1 will return 0 or -1, > + then a following logical right shift by size - log2(CONST + 1) would > generate > + 0 or 2^n - 1 respectively. */ > +static bool > +negative_ifconv_replacement (basic_block cond_bb, edge e, gphi *phi, tree > arg0, > + tree arg1) > +{ > + gimple_seq seq = NULL; > + enum tree_code op = ERROR_MARK; > + tree base; > + tree result = NULL_TREE; > + bool invert = false; > + > + gimple *stmt = last_nondebug_stmt (cond_bb); > + tree type = TREE_TYPE (gimple_phi_result (phi)); > + enum tree_code comp_code = gimple_cond_code (stmt); > + tree cmp0 = gimple_cond_lhs (stmt); > + tree cmp1 = gimple_cond_rhs (stmt); > + tree cmp_type = TREE_TYPE (cmp0); > + > + /* For A >= 0 try the inverted comparison, that is A < 0 ? ARG1 : > ARG0. */ > + if (comp_code == GE_EXPR) > + { > + comp_code = invert_tree_comparison (comp_code, HONOR_NANS (cmp0)); > + if (comp_code == ERROR_MARK) > + return false; > + > + std::swap (arg0, arg1); > + } > + > + if (comp_code != LT_EXPR || TREE_CODE (arg1) != SSA_NAME > + || TREE_CODE (arg0) != SSA_NAME || !integer_zerop (cmp1)) > + return false; > + > + base = arg1; > + gimple *op_def = SSA_NAME_DEF_STMT (arg0); > + if (is_gimple_assign (op_def)) > + op = gimple_assign_rhs_code (op_def); > + > + /* With a bit inversion step we can also optimize > + A < 0 ? ARG0 : ARG0 OP CONST. */ > + if (op != PLUS_EXPR && op != BIT_IOR_EXPR && op != BIT_XOR_EXPR) > + { > + op_def = SSA_NAME_DEF_STMT (arg1); > + if (!is_gimple_assign (op_def)) > + return false; > + > + op = gimple_assign_rhs_code (op_def); > + base = arg0; > + invert = true; > + } > + > + tree other_base = gimple_assign_rhs1 (op_def); > + if (!operand_equal_p (base, other_base)) > + return false; > + > + /* This only works if (0 OP ARG1) == ARG1. */ > + if (op != PLUS_EXPR && op != BIT_IOR_EXPR && op != BIT_XOR_EXPR) > + return false; > + > + tree op_const = gimple_assign_rhs2 (op_def); > + if (!tree_fits_shwi_p (op_const) || !tree_fits_shwi_p (TYPE_SIZE (type)) > + || !tree_fits_shwi_p (TYPE_SIZE (cmp_type))) > + return false; > + > + /* Check if CONST differs by 2^n - 1. */ > + HOST_WIDE_INT const2n = tree_to_shwi (op_const); > I suspect using wi::wide_int here would be better here rather using HWI. Plus you could just use only_sign_bit_p to check if the only the last bit is set. + if (!TYPE_UNSIGNED (cmp_type) && pow2p_hwi (const2n + 1)) > This could get sign integer overflow if you have HWINT_MAX. + { > + if (dump_file && (dump_flags & TDF_FOLDING)) > + { > + fprintf (dump_file, "\nphiopt negative if-conversion > replacing:\n\t"); > + print_generic_expr (dump_file, cmp0); > + fprintf (dump_file, " < 0 ? "); > + print_generic_expr (dump_file, arg1); > + fprintf (dump_file, " : "); > + print_generic_expr (dump_file, arg0); > + fprintf (dump_file, "\n"); > + } > + > + HOST_WIDE_INT size = tree_to_shwi (TYPE_SIZE (cmp_type)); > + /* Splat the sign bit with an arithmetic shift right. */ > + tree temp = gimple_build (&seq, RSHIFT_EXPR, cmp_type, cmp0, > + build_int_cst (type, size - 1)); > + if (invert) > + temp = gimple_build (&seq, BIT_NOT_EXPR, cmp_type, temp); > + > + /* Do a logical right shift to mask off the high bits. > + We need the type changes as the logical right shift is generated > by an > + unsigned RSHIFT_EXPR. */ > + tree utype = unsigned_type_for (type); > + temp = gimple_build (&seq, CONVERT_EXPR, utype, temp); > + size = tree_to_shwi (TYPE_SIZE (type)); > + tree shift > + = build_int_cst (utype, size - exact_log2 (const2n + 1)); > + temp = gimple_build (&seq, RSHIFT_EXPR, utype, temp, shift); > + temp = gimple_build (&seq, CONVERT_EXPR, type, temp); > + > + /* Do OP with the base ARG. */ > + result = gimple_build (&seq, op, type, temp, base); > + > + /* Insert the sequence. */ > + gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); > + gsi_insert_seq_before (&gsi, seq, GSI_CONTINUE_LINKING); > + > + if (dump_file && (dump_flags & TDF_FOLDING)) > + fprintf (dump_file, "accepted the phiopt neg if-conversion > sequence.\n"); > + > + replace_phi_edge_with_variable (cond_bb, e, phi, result); > + > + statistics_counter_event (cfun, "negative if-conversion > replacement", 1); > + > + return true; > + } > + > + return false; > +} > + > I have been trying to most of the phiopt to over to use match and simplify (via match.pd patterns). Is there an issue why this can't be a match pattern instead? It seems like a good fit too. It should simplify the code added even. Thanks, Andrew /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */ > > static bool > @@ -4586,6 +4707,8 @@ pass_phiopt::execute (function *) > && !diamond_p > && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) > cfgchanged = true; > + else if (negative_ifconv_replacement (bb, e2, phi, arg0, arg1)) > + cfgchanged = true; > }; > > execute_over_cond_phis (phiopt_exec); > -- > 2.47.0 > >