On Fri, Jun 27, 2025, 11:06 AM Raphael Moreira Zinsly <
rzin...@ventanamicro.com> wrote:

> Hi all,
>
> For targets that have expensive shifts this may not get a better
> sequence right now, specially for AVR and MSP430 according to
> our tests.
> Before I start looking for a fix on those targets I want to know
> if someone has any advise or other concerns with this transformation.
>
>
> Thanks,
>
> -- >8 --
>
> This patch allows us to use the "splat the sign bit" idiom to
> efficiently select between 0 and 2^n-1.
> With that we can avoid branches and we got a shorter sequence than
> using conditional-zero instructions.
>
> The PLUS 2^n-1 sequence appears particularly for signed division
> by a power of two.
>
> gcc/ChangeLog:
>         * gcc/tree-ssa-phiopt.cc
>         (negative_ifconv_replacement): New function.
>         (pass_phiopt::execute): Call negative_ifconv_replacement.
>
> gcc/testsuite/ChangeLog:
>         * gcc.dg/tree-ssa/phi-opt-46.c: New test.
>         * gcc.target/sh/pr59533-1.c: Fix comments and expected output.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c |  34 ++++++
>  gcc/testsuite/gcc.target/sh/pr59533-1.c    |  20 ++--
>  gcc/tree-ssa-phiopt.cc                     | 123 +++++++++++++++++++++
>  3 files changed, 167 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c
> b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c
> new file mode 100644
> index 00000000000..7995f378307
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-46.c
> @@ -0,0 +1,34 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-phiopt1" } */
> +
> +long f1(long c, long a)
> +{
> +  return c < 0 ? a : a + 7;
> +}
> +
> +long f2(long c, long a)
> +{
> +  return c < 0 ? a + 7 : a;
> +}
> +
> +long f3(long c, long a)
> +{
> +  return c < 0 ? a : a | 15;
> +}
> +
> +long f4(long c, long a)
> +{
> +  return c < 0 ? a | 15 : a;
> +}
> +
> +long f5(long c, long a)
> +{
> +  return c < 0 ? a : a ^ 31;
> +}
> +
> +long f6(long c, long a)
> +{
> +  return c < 0 ? a ^ 31 : a;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "if" "phiopt1" } } */
> diff --git a/gcc/testsuite/gcc.target/sh/pr59533-1.c
> b/gcc/testsuite/gcc.target/sh/pr59533-1.c
> index b0469859df5..eb03b67c32c 100644
> --- a/gcc/testsuite/gcc.target/sh/pr59533-1.c
> +++ b/gcc/testsuite/gcc.target/sh/pr59533-1.c
> @@ -2,19 +2,19 @@
>  /* { dg-do compile }  */
>  /* { dg-options "-O1" } */
>
> -/* { dg-final { scan-assembler-times "shll" 1 } }  */
> -/* { dg-final { scan-assembler-times "movt" 5 } }  */
> +/* { dg-final { scan-assembler-times "shll" 5 } }  */
> +/* { dg-final { scan-assembler-times "movt" 9 } }  */
>  /* { dg-final { scan-assembler-times "rotcl" 1 } }  */
>  /* { dg-final { scan-assembler-times "and" 3 } }  */
>  /* { dg-final { scan-assembler-times "extu.b" 5 } }  */
>
> -/* { dg-final { scan-assembler-times "cmp/pz" 27 { target { ! sh2a } } }
> }  */
> +/* { dg-final { scan-assembler-times "cmp/pz" 23 { target { ! sh2a } } }
> }  */
>  /* { dg-final { scan-assembler-times "addc" 4 { target { ! sh2a } } } }
> */
> -/* { dg-final { scan-assembler-times "subc" 16 { target { ! sh2a } } } }
> */
> +/* { dg-final { scan-assembler-times "subc" 12 { target { ! sh2a } } } }
> */
>
> -/* { dg-final { scan-assembler-times "cmp/pz" 25 { target { sh2a } } } }
> */
> +/* { dg-final { scan-assembler-times "cmp/pz" 21 { target { sh2a } } } }
> */
>  /* { dg-final { scan-assembler-times "addc" 6 { target { sh2a } } } }  */
> -/* { dg-final { scan-assembler-times "subc" 14 { target { sh2a } } } }  */
> +/* { dg-final { scan-assembler-times "subc" 10 { target { sh2a } } } }  */
>  /* { dg-final { scan-assembler-times "bld" 2 { target { sh2a } } } }  */
>
>  int
> @@ -186,28 +186,28 @@ test_22 (int x)
>  int
>  test_23 (int x)
>  {
> -  /* 1x cmp/pz, 1x subc */
> +  /* 1x shll, 1x add */
>    return x < 0 ? x + 1 : x;
>  }
>
>  unsigned int
>  test_24 (unsigned int x)
>  {
> -  /* 1x cmp/pz, 1x subc */
> +  /* 1x shll, 1x add */
>    return x & 0x80000000 ? x + 1 : x;
>  }
>
>  unsigned int
>  test_25 (unsigned int x)
>  {
> -  /* 1x cmp/pz, 1x subc */
> +  /* 1x shll, 1x add */
>    return x >> 31 ? x + 1 : x;
>  }
>
>  int
>  test_26 (int x)
>  {
> -  /* 1x cmp/pz, 1x subc  */
> +  /* 1x shll, 1x add */
>    return x >> 31 ? x + 1 : x;
>  }
>
> diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
> index faecab6ab7a..d2dcd4e60ca 100644
> --- a/gcc/tree-ssa-phiopt.cc
> +++ b/gcc/tree-ssa-phiopt.cc
> @@ -3983,6 +3983,127 @@ cond_if_else_store_replacement (basic_block
> then_bb, basic_block else_bb,
>    return ok;
>  }
>
> +/* Optimize A < 0 ? ARG1 OP CONST : ARG1.
> +   When 0 OP ARG1 is ARG1 and CONST is 2^n - 1 we can eliminate the
> branch by
> +   splatting the sign bit and masking off the high bits.
> +   An arithmetic right shift by size - 1 will return 0 or -1,
> +   then a following logical right shift by size - log2(CONST + 1) would
> generate
> +   0 or 2^n - 1 respectively.  */
> +static bool
> +negative_ifconv_replacement (basic_block cond_bb, edge e, gphi *phi, tree
> arg0,
> +                         tree arg1)
> +{
> +  gimple_seq seq = NULL;
> +  enum tree_code op = ERROR_MARK;
> +  tree base;
> +  tree result = NULL_TREE;
> +  bool invert = false;
> +
> +  gimple *stmt = last_nondebug_stmt (cond_bb);
> +  tree type = TREE_TYPE (gimple_phi_result (phi));
> +  enum tree_code comp_code = gimple_cond_code (stmt);
> +  tree cmp0 = gimple_cond_lhs (stmt);
> +  tree cmp1 = gimple_cond_rhs (stmt);
> +  tree cmp_type = TREE_TYPE (cmp0);
> +
> +  /* For A >= 0 try the inverted comparison, that is A < 0 ? ARG1 :
> ARG0.  */
> +  if (comp_code == GE_EXPR)
> +    {
> +      comp_code = invert_tree_comparison (comp_code, HONOR_NANS (cmp0));
> +      if (comp_code == ERROR_MARK)
> +       return false;
> +
> +      std::swap (arg0, arg1);
> +    }
> +
> +  if (comp_code != LT_EXPR || TREE_CODE (arg1) != SSA_NAME
> +      || TREE_CODE (arg0) != SSA_NAME || !integer_zerop (cmp1))
> +    return false;
> +
> +  base = arg1;
> +  gimple *op_def = SSA_NAME_DEF_STMT (arg0);
> +  if (is_gimple_assign (op_def))
> +    op = gimple_assign_rhs_code (op_def);
> +
> +  /* With a bit inversion step we can also optimize
> +     A < 0 ? ARG0 : ARG0 OP CONST.  */
> +  if (op != PLUS_EXPR && op != BIT_IOR_EXPR && op != BIT_XOR_EXPR)
> +    {
> +      op_def = SSA_NAME_DEF_STMT (arg1);
> +      if (!is_gimple_assign (op_def))
> +       return false;
> +
> +      op = gimple_assign_rhs_code (op_def);
> +      base = arg0;
> +      invert = true;
> +    }
> +
> +  tree other_base = gimple_assign_rhs1 (op_def);
> +  if (!operand_equal_p (base, other_base))
> +    return false;
> +
> +  /* This only works if (0 OP ARG1) == ARG1.  */
> +  if (op != PLUS_EXPR && op != BIT_IOR_EXPR && op != BIT_XOR_EXPR)
> +    return false;
> +
> +  tree op_const = gimple_assign_rhs2 (op_def);
> +  if (!tree_fits_shwi_p (op_const) || !tree_fits_shwi_p (TYPE_SIZE (type))
> +      || !tree_fits_shwi_p (TYPE_SIZE (cmp_type)))
> +    return false;
> +
> +  /* Check if CONST differs by 2^n - 1.  */
> +  HOST_WIDE_INT const2n = tree_to_shwi (op_const);
>

I suspect using wi::wide_int here would be better here rather using HWI.
Plus you could just use only_sign_bit_p to check if the only the last bit
is set.

+  if (!TYPE_UNSIGNED (cmp_type) && pow2p_hwi (const2n + 1))
>

This could get sign integer overflow if you have HWINT_MAX.

+    {
> +      if (dump_file && (dump_flags & TDF_FOLDING))
> +       {
> +         fprintf (dump_file, "\nphiopt negative if-conversion
> replacing:\n\t");
> +         print_generic_expr (dump_file, cmp0);
> +         fprintf (dump_file, " < 0 ? ");
> +         print_generic_expr (dump_file, arg1);
> +         fprintf (dump_file, " : ");
> +         print_generic_expr (dump_file, arg0);
> +         fprintf (dump_file, "\n");
> +       }
> +
> +      HOST_WIDE_INT size = tree_to_shwi (TYPE_SIZE (cmp_type));
> +      /* Splat the sign bit with an arithmetic shift right.  */
> +      tree temp = gimple_build (&seq, RSHIFT_EXPR, cmp_type, cmp0,
> +                               build_int_cst (type, size - 1));
> +      if (invert)
> +       temp = gimple_build (&seq, BIT_NOT_EXPR, cmp_type, temp);
> +
> +      /* Do a logical right shift to mask off the high bits.
> +        We need the type changes as the logical right shift is generated
> by an
> +        unsigned RSHIFT_EXPR.  */
> +      tree utype = unsigned_type_for (type);
> +      temp = gimple_build (&seq, CONVERT_EXPR, utype, temp);
> +      size = tree_to_shwi (TYPE_SIZE (type));
> +      tree shift
> +       = build_int_cst (utype, size - exact_log2 (const2n + 1));
> +      temp = gimple_build (&seq, RSHIFT_EXPR, utype, temp, shift);
> +      temp = gimple_build (&seq, CONVERT_EXPR, type, temp);
> +
> +      /* Do OP with the base ARG.  */
> +      result = gimple_build (&seq, op, type, temp, base);
> +
> +      /* Insert the sequence.  */
> +      gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
> +      gsi_insert_seq_before (&gsi, seq, GSI_CONTINUE_LINKING);
> +
> +      if (dump_file && (dump_flags & TDF_FOLDING))
> +       fprintf (dump_file, "accepted the phiopt neg if-conversion
> sequence.\n");
> +
> +      replace_phi_edge_with_variable (cond_bb, e, phi, result);
> +
> +      statistics_counter_event (cfun, "negative if-conversion
> replacement", 1);
> +
> +      return true;
> +    }
> +
> +  return false;
> +}
> +
>

I have been trying to most of the phiopt to over to use match and simplify
(via match.pd patterns). Is there an issue why this can't be a match
pattern instead? It seems like a good fit too.
It should simplify the code added even.

Thanks,
Andrew


 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB.  */
>
>  static bool
> @@ -4586,6 +4707,8 @@ pass_phiopt::execute (function *)
>                && !diamond_p
>                && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
>         cfgchanged = true;
> +      else if (negative_ifconv_replacement (bb, e2, phi, arg0, arg1))
> +       cfgchanged = true;
>      };
>
>    execute_over_cond_phis (phiopt_exec);
> --
> 2.47.0
>
>

Reply via email to