On Fri, Nov 1, 2024 at 4:06 PM Andrew Pinski <pins...@gmail.com> wrote:
>
> On Tue, Oct 29, 2024 at 10:10 AM Andrew Pinski <pins...@gmail.com> wrote:
> >
> > On Tue, Oct 29, 2024 at 5:59 AM Richard Biener
> > <richard.guent...@gmail.com> wrote:
> > >
> > > On Tue, Oct 29, 2024 at 4:29 AM Andrew Pinski <quic_apin...@quicinc.com> 
> > > wrote:
> > > >
> > > > r0-126134-g5d2a9da9a7f7c1 added support for circuiting and combing the 
> > > > ifs
> > > > into using either AND or OR. But it only allowed the inner condition
> > > > basic block having the conditional only. This changes to allow up to 2 
> > > > defining
> > > > statements as long as they are just nop conversions for either the lhs 
> > > > or rhs
> > > > of the conditional.
> > > >
> > > > This should allow to use ccmp on aarch64 and x86_64 (APX) slightly more 
> > > > than before.
> > > >
> > > > Boootstrapped and tested on x86_64-linux-gnu.
> > > >
> > > >         PR tree-optimization/85605
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >         * tree-ssa-ifcombine.cc (can_combine_bbs_with_short_circuit): 
> > > > New function.
> > > >         (ifcombine_ifandif): Use can_combine_bbs_with_short_circuit 
> > > > instead of checking
> > > >         if iterator is one before the last statement.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >         * g++.dg/tree-ssa/ifcombine-ccmp-1.C: New test.
> > > >         * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c: New test.
> > > >         * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c: New test.
> > > >
> > > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> > > > ---
> > > >  .../g++.dg/tree-ssa/ifcombine-ccmp-1.C        | 27 +++++++++++++
> > > >  .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c    | 18 +++++++++
> > > >  .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c    | 19 +++++++++
> > > >  gcc/tree-ssa-ifcombine.cc                     | 39 ++++++++++++++++++-
> > > >  4 files changed, 101 insertions(+), 2 deletions(-)
> > > >  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > > >
> > > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C 
> > > > b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > > > new file mode 100644
> > > > index 00000000000..282cec8c628
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > > > @@ -0,0 +1,27 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param 
> > > > logical-op-non-short-circuit=1" } */
> > > > +
> > > > +/* PR tree-optimization/85605 */
> > > > +#include <stdint.h>
> > > > +
> > > > +template<class T,class T2>
> > > > +inline bool cmp(T a, T2 b) {
> > > > +  return a<0 ? true : T2(a) < b;
> > > > +}
> > > > +
> > > > +template<class T,class T2>
> > > > +inline bool cmp2(T a, T2 b) {
> > > > +  return (a<0) | (T2(a) < b);
> > > > +}
> > > > +
> > > > +bool f(int a, int b) {
> > > > +    return cmp(int64_t(a), unsigned(b));
> > > > +}
> > > > +
> > > > +bool f2(int a, int b) {
> > > > +    return cmp2(int64_t(a), unsigned(b));
> > > > +}
> > > > +
> > > > +
> > > > +/* Both of these functions should be optimized to the same, and have 
> > > > an | in them. */
> > > > +/* { dg-final { scan-tree-dump-times " \\\| " 2 "optimized" } } */
> > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c 
> > > > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > > > new file mode 100644
> > > > index 00000000000..1bdbb9358b4
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > > > @@ -0,0 +1,18 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param 
> > > > logical-op-non-short-circuit=1" } */
> > > > +
> > > > +/* PR tree-optimization/85605 */
> > > > +/* Like ssa-ifcombine-ccmp-1.c but with conversion from unsigned to 
> > > > signed in the
> > > > +   inner bb which should be able to move too. */
> > > > +
> > > > +int t (int a, unsigned b)
> > > > +{
> > > > +  if (a > 0)
> > > > +  {
> > > > +    signed t = b;
> > > > +    if (t > 0)
> > > > +      return 0;
> > > > +  }
> > > > +  return 1;
> > > > +}
> > > > +/* { dg-final { scan-tree-dump "\&" "optimized" } } */
> > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c 
> > > > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > > > new file mode 100644
> > > > index 00000000000..8d74b4932c5
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > > > @@ -0,0 +1,19 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param 
> > > > logical-op-non-short-circuit=1" } */
> > > > +
> > > > +/* PR tree-optimization/85605 */
> > > > +/* Like ssa-ifcombine-ccmp-2.c but with conversion from unsigned to 
> > > > signed in the
> > > > +   inner bb which should be able to move too. */
> > > > +
> > > > +int t (int a, unsigned b)
> > > > +{
> > > > +  if (a > 0)
> > > > +    goto L1;
> > > > +  signed t = b;
> > > > +  if (t > 0)
> > > > +    goto L1;
> > > > +  return 0;
> > > > +L1:
> > > > +  return 1;
> > > > +}
> > > > +/* { dg-final { scan-tree-dump "\|" "optimized" } } */
> > > > diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
> > > > index 39702929fc0..3acecda31cc 100644
> > > > --- a/gcc/tree-ssa-ifcombine.cc
> > > > +++ b/gcc/tree-ssa-ifcombine.cc
> > > > @@ -400,6 +400,38 @@ update_profile_after_ifcombine (basic_block 
> > > > inner_cond_bb,
> > > >    outer2->probability = profile_probability::never ();
> > > >  }
> > > >
> > > > +/* Returns true if inner_cond_bb contains just the condition or 1/2 
> > > > statements
> > > > +   that define lhs or rhs with a nop conversion. */
> > > > +
> > > > +static bool
> > > > +can_combine_bbs_with_short_circuit (basic_block inner_cond_bb, tree 
> > > > lhs, tree rhs)
> > > > +{
> > > > +  gimple_stmt_iterator gsi;
> > > > +  gsi = gsi_start_nondebug_after_labels_bb (inner_cond_bb);
> > > > +  /* If only the condition, this should be allowed. */
> > > > +  if (gsi_one_before_end_p (gsi))
> > > > +    return true;
> > > > +  /* Can have up to 2 statements defining each of lhs/rhs. */
> > > > +  for (int i = 0; i < 2; i++)
> > > > +    {
> > > > +      gimple *stmt = gsi_stmt (gsi);
> > > > +      if (!gimple_assign_cast_p (stmt))
> > > > +       return false;
> > > > +      if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
> > > > +                                 TREE_TYPE (gimple_assign_rhs1 
> > > > (stmt))))
> > >
> > > I would say non-nop conversions should be fine as well - not sure why
> > > gimple_assign_cast_p covers FIX_TRUNC_EXPR but for example not
> > > FLOAT_EXPR - so possibly is_gimple_assign && CONVERT_EXPR_CODE_P (..)
> > > would be a better check?
> >
> > I will check on that in a little bit but I think so.
>
> Just a quick update on this. Without checking for a nop_conversion,
> gcc.dg/tree-ssa/pr111456-1.c fails.
> I am looking into a way of fixing that still. I think there is a
> missed optimization still, plus the way ifcombine uses fold_build2_loc
> to build the full tree and then does force_gimple_operand_gsi_1 also
> seems like it is not helping (though I could be wrong).

This is what I committed in the end now all of the known regressions
are fixed. It just checks CONVERT_EXPR_CODE_P rather than a
nop_conversion.
Also uses `!=` rather than `!operand_equal_p`.

Thanks,
Andrew Pinski

>
> Thanks,
> Andrew
>
> >
> > >
> > > > +       return false;
> > > > +      /* The defining statement needs to match either the lhs or rhs of
> > > > +         the condition. */
> > > > +      if (!operand_equal_p (lhs, gimple_assign_lhs (stmt))
> > > > +          && !operand_equal_p (rhs, gimple_assign_lhs (stmt)))
> > >
> > > I think you can use == here.
> >
> > Yes I was thinking using `!=` here was fine but I was not 100% sure at
> > the time I wrote it.
> >
> > >
> > > OK with that change but note this might conflict with the series from 
> > > Alexandre.
> >
> > I looked and I think it won't conflict, Alexandre's changes did not
> > touch the logical_op_non_short_circuit case as far as I could see.
> >
> > Note I am going to wait on applying this until I get the patch for PR
> > 117346 approved (I should be posting it later today) as this patch
> > exposed a missed optimization with ccmp on aarch64.
> >
> > Thanks,
> > Andrew
> >
> > >
> > > Richard.
> > >
> > > > +        return false;
> > > > +      gsi_next_nondebug (&gsi);
> > > > +      if (gsi_one_before_end_p (gsi))
> > > > +       return true;
> > > > +    }
> > > > +  return false;
> > > > +}
> > > > +
> > > >  /* If-convert on a and pattern with a common else block.  The inner
> > > >     if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
> > > >     inner_inv, outer_inv and result_inv indicate whether the conditions
> > > > @@ -610,8 +642,11 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
> > > > inner_inv,
> > > >               = param_logical_op_non_short_circuit;
> > > >           if (!logical_op_non_short_circuit || sanitize_coverage_p ())
> > > >             return false;
> > > > -         /* Only do this optimization if the inner bb contains only 
> > > > the conditional. */
> > > > -         if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb 
> > > > (inner_cond_bb)))
> > > > +         /* Only do this optimization if the inner bb contains only 
> > > > the conditional
> > > > +            or there is one or 2 statements which are nop conversion 
> > > > for the comparison. */
> > > > +         if (!can_combine_bbs_with_short_circuit (inner_cond_bb,
> > > > +                                                  gimple_cond_lhs 
> > > > (inner_cond),
> > > > +                                                  gimple_cond_rhs 
> > > > (inner_cond)))
> > > >             return false;
> > > >           t1 = fold_build2_loc (gimple_location (inner_cond),
> > > >                                 inner_cond_code,
> > > > --
> > > > 2.43.0
> > > >

Attachment: 0001-ifcombine-For-short-circuit-case-allow-2-convert-def.patch
Description: Binary data

Reply via email to