On Thu, Aug 28, 2025 at 9:43 PM Andrew Pinski
<[email protected]> wrote:
>
> On Thu, Aug 28, 2025 at 9:32 PM Andrew Pinski
> <[email protected]> wrote:
> >
> > To better optimize code dealing with `memcmp == 0` where we have
> > a small constant size, we can inline the memcmp in those cases.
> > There is code to do this in strlen but that is run too late in
> > the case where we can figure out the value of one of the arguments
> > to memcmp. So this copies the optimization to forwprop.
> >
> > An example of where this helps is:
> > ```
> > bool cmpvect(const std::vector<int> &a) { return a == std::vector<int>{10}; 
> > }
> > ```
> >
> > Where the above should be optimized to just `return a.size() == 1 && a[0] 
> > == 10;`.
> >
> > Note pr44130.c testcase needed to change as now it will be optimized away 
> > otherwise.
> > Note the loop in pr44130.c os also vectorized which it was not before.
> >
> > Note the optimization remains in strlen as the other part (memcmp -> 
> > memcmp_eq)
> > should move to either isel or fab and I didn't want to remove it just yet.
> >
> > Bootstrapped and tested on x86_64-linux-gnu.
> >
> >         PR tree-optimization/116651
>
> Note this fixes PR 93265 and PR 103647 .
And is the first part of PR 52171.

Thanks,
Andrew

>
> >
> > gcc/ChangeLog:
> >
> >         * tree-ssa-forwprop.cc (simplify_builtin_memcmp): New function.
> >         (simplify_builtin_call): Call simplify_builtin_memcmp for memcmp
> >         memcmp_eq builtins.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/pr44130.c: Add an inline-asm clobber.
> >         * g++.dg/tree-ssa/vector-compare-1.C: New test.
> >
> > Signed-off-by: Andrew Pinski <[email protected]>
> > ---
> >  .../g++.dg/tree-ssa/vector-compare-1.C        | 24 ++++++++
> >  gcc/testsuite/gcc.target/i386/pr44130.c       |  2 +
> >  gcc/tree-ssa-forwprop.cc                      | 57 +++++++++++++++++++
> >  3 files changed, 83 insertions(+)
> >  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
> >
> > diff --git a/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C 
> > b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
> > new file mode 100644
> > index 00000000000..d9b2bc2533e
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
> > @@ -0,0 +1,24 @@
> > +// { dg-do compile { target c++11 } }
> > +// { dg-options "-O2 -fdump-tree-optimized" }
> > +
> > +// PR tree-optimization/116651
> > +
> > +#include <vector>
> > +
> > +bool test1(const std::vector<int>& in) {
> > +    return in == std::vector<int>{24};
> > +}
> > +
> > +/* We should be to optimize this to:
> > +   int *b = in.bptr;
> > +   int *e = in.eptr;
> > +   auto size = e - b;
> > +   if (size != 4)
> > +     return false;
> > +   int v = *b;
> > +   return v == 24;
> > +
> > +*/
> > +
> > +// { dg-final { scan-tree-dump-times "== 24" 1 "optimized" } } */
> > +// { dg-final { scan-tree-dump-times "== 4" 1 "optimized"  { target int32 
> > } } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr44130.c 
> > b/gcc/testsuite/gcc.target/i386/pr44130.c
> > index 2ad740993c1..6269dc89f5e 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr44130.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr44130.c
> > @@ -21,6 +21,8 @@ void testf (void)
> >    xxxxx[5] = __builtin_copysignf (-0.0, Yf[5]);
> >    xxxxx[6] = __builtin_copysignf (__builtin_inff (), Yf[6]);
> >    xxxxx[7] = __builtin_copysignf (-__builtin_nanf (""), Yf[7]);
> > +
> > +  asm("":"=m"(xxxxx));
> >    for (i = 0; i < 8; ++i)
> >      if (__builtin_memcmp (xxxxx+i, Zf+i, sizeof(float)) != 0)
> >        abort ();
> > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> > index 447004ef048..f5cd3a8f390 100644
> > --- a/gcc/tree-ssa-forwprop.cc
> > +++ b/gcc/tree-ssa-forwprop.cc
> > @@ -1593,6 +1593,60 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip)
> >    return changed;
> >  }
> >
> > +/* Optimizes builtin memcmps for small constant sizes.
> > +   GSI_P is the GSI for the call. STMT is the call itself.
> > +   */
> > +
> > +static bool
> > +simplify_builtin_memcmp (gimple_stmt_iterator *gsi_p, gcall *stmt)
> > +{
> > +  tree res = gimple_call_lhs (stmt);
> > +
> > +  if (!res || !use_in_zero_equality (res))
> > +    return false;
> > +  tree arg1 = gimple_call_arg (stmt, 0);
> > +  tree arg2 = gimple_call_arg (stmt, 1);
> > +  tree len = gimple_call_arg (stmt, 2);
> > +  unsigned HOST_WIDE_INT leni;
> > +
> > +  if (tree_fits_uhwi_p (len)
> > +      && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
> > +      && pow2p_hwi (leni))
> > +    {
> > +      leni *= CHAR_TYPE_SIZE;
> > +      unsigned align1 = get_pointer_alignment (arg1);
> > +      unsigned align2 = get_pointer_alignment (arg2);
> > +      unsigned align = MIN (align1, align2);
> > +      scalar_int_mode mode;
> > +      if (int_mode_for_size (leni, 1).exists (&mode)
> > +         && (align >= leni || !targetm.slow_unaligned_access (mode, 
> > align)))
> > +       {
> > +         location_t loc = gimple_location (stmt);
> > +         tree type, off;
> > +         type = build_nonstandard_integer_type (leni, 1);
> > +         gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
> > +         tree ptrtype = build_pointer_type_for_mode (char_type_node,
> > +                                                     ptr_mode, true);
> > +         off = build_int_cst (ptrtype, 0);
> > +         arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
> > +         arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
> > +         tree tem1 = fold_const_aggregate_ref (arg1);
> > +         if (tem1)
> > +           arg1 = tem1;
> > +         tree tem2 = fold_const_aggregate_ref (arg2);
> > +         if (tem2)
> > +           arg2 = tem2;
> > +         res = fold_convert_loc (loc, TREE_TYPE (res),
> > +                                 fold_build2_loc (loc, NE_EXPR,
> > +                                                  boolean_type_node,
> > +                                                  arg1, arg2));
> > +         gimplify_and_update_call_from_tree (gsi_p, res);
> > +         return true;
> > +       }
> > +    }
> > +  return false;
> > +}
> > +
> >  /* *GSI_P is a GIMPLE_CALL to a builtin function.
> >     Optimize
> >     memcpy (p, "abcd", 4);
> > @@ -1630,6 +1684,9 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, 
> > tree callee2, bool full_walk
> >
> >    switch (DECL_FUNCTION_CODE (callee2))
> >      {
> > +    case BUILT_IN_MEMCMP:
> > +    case BUILT_IN_MEMCMP_EQ:
> > +      return simplify_builtin_memcmp (gsi_p, as_a<gcall*>(stmt2));
> >      case BUILT_IN_MEMCHR:
> >        if (gimple_call_num_args (stmt2) == 3
> >           && (res = gimple_call_lhs (stmt2)) != nullptr
> > --
> > 2.43.0
> >

Reply via email to