On Fri, Aug 29, 2025 at 12:31 AM Richard Biener <[email protected]> wrote: > > On Fri, Aug 29, 2025 at 6:33 AM Andrew Pinski > <[email protected]> wrote: > > > > To better optimize code dealing with `memcmp == 0` where we have > > a small constant size, we can inline the memcmp in those cases. > > There is code to do this in strlen but that is run too late in > > the case where we can figure out the value of one of the arguments > > to memcmp. So this copies the optimization to forwprop. > > > > An example of where this helps is: > > ``` > > bool cmpvect(const std::vector<int> &a) { return a == std::vector<int>{10}; > > } > > ``` > > > > Where the above should be optimized to just `return a.size() == 1 && a[0] > > == 10;`. > > > > Note pr44130.c testcase needed to change as now it will be optimized away > > otherwise. > > Note the loop in pr44130.c os also vectorized which it was not before. > > > > Note the optimization remains in strlen as the other part (memcmp -> > > memcmp_eq) > > should move to either isel or fab and I didn't want to remove it just yet. > > > > Bootstrapped and tested on x86_64-linux-gnu. > > > > PR tree-optimization/116651 > > > > gcc/ChangeLog: > > > > * tree-ssa-forwprop.cc (simplify_builtin_memcmp): New function. > > (simplify_builtin_call): Call simplify_builtin_memcmp for memcmp > > memcmp_eq builtins. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/pr44130.c: Add an inline-asm clobber. > > * g++.dg/tree-ssa/vector-compare-1.C: New test. > > > > Signed-off-by: Andrew Pinski <[email protected]> > > --- > > .../g++.dg/tree-ssa/vector-compare-1.C | 24 ++++++++ > > gcc/testsuite/gcc.target/i386/pr44130.c | 2 + > > gcc/tree-ssa-forwprop.cc | 57 +++++++++++++++++++ > > 3 files changed, 83 insertions(+) > > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > > > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > > b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > > new file mode 100644 > > index 00000000000..d9b2bc2533e > > --- /dev/null > > +++ b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > > @@ -0,0 +1,24 @@ > > +// { dg-do compile { target c++11 } } > > +// { dg-options "-O2 -fdump-tree-optimized" } > > + > > +// PR tree-optimization/116651 > > + > > +#include <vector> > > + > > +bool test1(const std::vector<int>& in) { > > + return in == std::vector<int>{24}; > > +} > > + > > +/* We should be to optimize this to: > > + int *b = in.bptr; > > + int *e = in.eptr; > > + auto size = e - b; > > + if (size != 4) > > + return false; > > + int v = *b; > > + return v == 24; > > + > > +*/ > > + > > +// { dg-final { scan-tree-dump-times "== 24" 1 "optimized" } } */ > > +// { dg-final { scan-tree-dump-times "== 4" 1 "optimized" { target int32 > > } } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr44130.c > > b/gcc/testsuite/gcc.target/i386/pr44130.c > > index 2ad740993c1..6269dc89f5e 100644 > > --- a/gcc/testsuite/gcc.target/i386/pr44130.c > > +++ b/gcc/testsuite/gcc.target/i386/pr44130.c > > @@ -21,6 +21,8 @@ void testf (void) > > xxxxx[5] = __builtin_copysignf (-0.0, Yf[5]); > > xxxxx[6] = __builtin_copysignf (__builtin_inff (), Yf[6]); > > xxxxx[7] = __builtin_copysignf (-__builtin_nanf (""), Yf[7]); > > + > > + asm("":"=m"(xxxxx)); > > for (i = 0; i < 8; ++i) > > if (__builtin_memcmp (xxxxx+i, Zf+i, sizeof(float)) != 0) > > abort (); > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > > index 447004ef048..f5cd3a8f390 100644 > > --- a/gcc/tree-ssa-forwprop.cc > > +++ b/gcc/tree-ssa-forwprop.cc > > @@ -1593,6 +1593,60 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip) > > return changed; > > } > > > > +/* Optimizes builtin memcmps for small constant sizes. > > + GSI_P is the GSI for the call. STMT is the call itself. > > + */ > > + > > +static bool > > +simplify_builtin_memcmp (gimple_stmt_iterator *gsi_p, gcall *stmt) > > +{ > > + tree res = gimple_call_lhs (stmt); > > + > > + if (!res || !use_in_zero_equality (res)) > > + return false; > > + tree arg1 = gimple_call_arg (stmt, 0); > > + tree arg2 = gimple_call_arg (stmt, 1); > > + tree len = gimple_call_arg (stmt, 2); > > you need to check the number of arguments are actually here and > arg1/2 are pointers. > > Otherwise looks OK to me.
Attached is what I pushed.
I added the checks here and also added references to the other bug
reports in the commit message.
Note the checks for number of arguments and types is not checked in
the strlen pass for any of the builtins; which means it could possibly
crash.
Thanks,
Andrew Pinski
>
> Thanks,
> Richard.
>
> > + unsigned HOST_WIDE_INT leni;
> > +
> > + if (tree_fits_uhwi_p (len)
> > + && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
> > + && pow2p_hwi (leni))
> > + {
> > + leni *= CHAR_TYPE_SIZE;
> > + unsigned align1 = get_pointer_alignment (arg1);
> > + unsigned align2 = get_pointer_alignment (arg2);
> > + unsigned align = MIN (align1, align2);
> > + scalar_int_mode mode;
> > + if (int_mode_for_size (leni, 1).exists (&mode)
> > + && (align >= leni || !targetm.slow_unaligned_access (mode,
> > align)))
> > + {
> > + location_t loc = gimple_location (stmt);
> > + tree type, off;
> > + type = build_nonstandard_integer_type (leni, 1);
> > + gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
> > + tree ptrtype = build_pointer_type_for_mode (char_type_node,
> > + ptr_mode, true);
> > + off = build_int_cst (ptrtype, 0);
> > + arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
> > + arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
> > + tree tem1 = fold_const_aggregate_ref (arg1);
> > + if (tem1)
> > + arg1 = tem1;
> > + tree tem2 = fold_const_aggregate_ref (arg2);
> > + if (tem2)
> > + arg2 = tem2;
> > + res = fold_convert_loc (loc, TREE_TYPE (res),
> > + fold_build2_loc (loc, NE_EXPR,
> > + boolean_type_node,
> > + arg1, arg2));
> > + gimplify_and_update_call_from_tree (gsi_p, res);
> > + return true;
> > + }
> > + }
> > + return false;
> > +}
> > +
> > /* *GSI_P is a GIMPLE_CALL to a builtin function.
> > Optimize
> > memcpy (p, "abcd", 4);
> > @@ -1630,6 +1684,9 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p,
> > tree callee2, bool full_walk
> >
> > switch (DECL_FUNCTION_CODE (callee2))
> > {
> > + case BUILT_IN_MEMCMP:
> > + case BUILT_IN_MEMCMP_EQ:
> > + return simplify_builtin_memcmp (gsi_p, as_a<gcall*>(stmt2));
> > case BUILT_IN_MEMCHR:
> > if (gimple_call_num_args (stmt2) == 3
> > && (res = gimple_call_lhs (stmt2)) != nullptr
> > --
> > 2.43.0
> >
v2-0001-forwprop-Copy-the-memcmp-optimization-from-strlen.patch
Description: Binary data
