On Thu, Aug 28, 2025 at 9:43 PM Andrew Pinski <[email protected]> wrote: > > On Thu, Aug 28, 2025 at 9:32 PM Andrew Pinski > <[email protected]> wrote: > > > > To better optimize code dealing with `memcmp == 0` where we have > > a small constant size, we can inline the memcmp in those cases. > > There is code to do this in strlen but that is run too late in > > the case where we can figure out the value of one of the arguments > > to memcmp. So this copies the optimization to forwprop. > > > > An example of where this helps is: > > ``` > > bool cmpvect(const std::vector<int> &a) { return a == std::vector<int>{10}; > > } > > ``` > > > > Where the above should be optimized to just `return a.size() == 1 && a[0] > > == 10;`. > > > > Note pr44130.c testcase needed to change as now it will be optimized away > > otherwise. > > Note the loop in pr44130.c os also vectorized which it was not before. > > > > Note the optimization remains in strlen as the other part (memcmp -> > > memcmp_eq) > > should move to either isel or fab and I didn't want to remove it just yet. > > > > Bootstrapped and tested on x86_64-linux-gnu. > > > > PR tree-optimization/116651 > > Note this fixes PR 93265 and PR 103647 . And is the first part of PR 52171.
Thanks, Andrew > > > > > gcc/ChangeLog: > > > > * tree-ssa-forwprop.cc (simplify_builtin_memcmp): New function. > > (simplify_builtin_call): Call simplify_builtin_memcmp for memcmp > > memcmp_eq builtins. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/pr44130.c: Add an inline-asm clobber. > > * g++.dg/tree-ssa/vector-compare-1.C: New test. > > > > Signed-off-by: Andrew Pinski <[email protected]> > > --- > > .../g++.dg/tree-ssa/vector-compare-1.C | 24 ++++++++ > > gcc/testsuite/gcc.target/i386/pr44130.c | 2 + > > gcc/tree-ssa-forwprop.cc | 57 +++++++++++++++++++ > > 3 files changed, 83 insertions(+) > > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > > > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > > b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > > new file mode 100644 > > index 00000000000..d9b2bc2533e > > --- /dev/null > > +++ b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > > @@ -0,0 +1,24 @@ > > +// { dg-do compile { target c++11 } } > > +// { dg-options "-O2 -fdump-tree-optimized" } > > + > > +// PR tree-optimization/116651 > > + > > +#include <vector> > > + > > +bool test1(const std::vector<int>& in) { > > + return in == std::vector<int>{24}; > > +} > > + > > +/* We should be to optimize this to: > > + int *b = in.bptr; > > + int *e = in.eptr; > > + auto size = e - b; > > + if (size != 4) > > + return false; > > + int v = *b; > > + return v == 24; > > + > > +*/ > > + > > +// { dg-final { scan-tree-dump-times "== 24" 1 "optimized" } } */ > > +// { dg-final { scan-tree-dump-times "== 4" 1 "optimized" { target int32 > > } } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr44130.c > > b/gcc/testsuite/gcc.target/i386/pr44130.c > > index 2ad740993c1..6269dc89f5e 100644 > > --- a/gcc/testsuite/gcc.target/i386/pr44130.c > > +++ b/gcc/testsuite/gcc.target/i386/pr44130.c > > @@ -21,6 +21,8 @@ void testf (void) > > xxxxx[5] = __builtin_copysignf (-0.0, Yf[5]); > > xxxxx[6] = __builtin_copysignf (__builtin_inff (), Yf[6]); > > xxxxx[7] = __builtin_copysignf (-__builtin_nanf (""), Yf[7]); > > + > > + asm("":"=m"(xxxxx)); > > for (i = 0; i < 8; ++i) > > if (__builtin_memcmp (xxxxx+i, Zf+i, sizeof(float)) != 0) > > abort (); > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > > index 447004ef048..f5cd3a8f390 100644 > > --- a/gcc/tree-ssa-forwprop.cc > > +++ b/gcc/tree-ssa-forwprop.cc > > @@ -1593,6 +1593,60 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip) > > return changed; > > } > > > > +/* Optimizes builtin memcmps for small constant sizes. > > + GSI_P is the GSI for the call. STMT is the call itself. > > + */ > > + > > +static bool > > +simplify_builtin_memcmp (gimple_stmt_iterator *gsi_p, gcall *stmt) > > +{ > > + tree res = gimple_call_lhs (stmt); > > + > > + if (!res || !use_in_zero_equality (res)) > > + return false; > > + tree arg1 = gimple_call_arg (stmt, 0); > > + tree arg2 = gimple_call_arg (stmt, 1); > > + tree len = gimple_call_arg (stmt, 2); > > + unsigned HOST_WIDE_INT leni; > > + > > + if (tree_fits_uhwi_p (len) > > + && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode) > > + && pow2p_hwi (leni)) > > + { > > + leni *= CHAR_TYPE_SIZE; > > + unsigned align1 = get_pointer_alignment (arg1); > > + unsigned align2 = get_pointer_alignment (arg2); > > + unsigned align = MIN (align1, align2); > > + scalar_int_mode mode; > > + if (int_mode_for_size (leni, 1).exists (&mode) > > + && (align >= leni || !targetm.slow_unaligned_access (mode, > > align))) > > + { > > + location_t loc = gimple_location (stmt); > > + tree type, off; > > + type = build_nonstandard_integer_type (leni, 1); > > + gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni)); > > + tree ptrtype = build_pointer_type_for_mode (char_type_node, > > + ptr_mode, true); > > + off = build_int_cst (ptrtype, 0); > > + arg1 = build2_loc (loc, MEM_REF, type, arg1, off); > > + arg2 = build2_loc (loc, MEM_REF, type, arg2, off); > > + tree tem1 = fold_const_aggregate_ref (arg1); > > + if (tem1) > > + arg1 = tem1; > > + tree tem2 = fold_const_aggregate_ref (arg2); > > + if (tem2) > > + arg2 = tem2; > > + res = fold_convert_loc (loc, TREE_TYPE (res), > > + fold_build2_loc (loc, NE_EXPR, > > + boolean_type_node, > > + arg1, arg2)); > > + gimplify_and_update_call_from_tree (gsi_p, res); > > + return true; > > + } > > + } > > + return false; > > +} > > + > > /* *GSI_P is a GIMPLE_CALL to a builtin function. > > Optimize > > memcpy (p, "abcd", 4); > > @@ -1630,6 +1684,9 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, > > tree callee2, bool full_walk > > > > switch (DECL_FUNCTION_CODE (callee2)) > > { > > + case BUILT_IN_MEMCMP: > > + case BUILT_IN_MEMCMP_EQ: > > + return simplify_builtin_memcmp (gsi_p, as_a<gcall*>(stmt2)); > > case BUILT_IN_MEMCHR: > > if (gimple_call_num_args (stmt2) == 3 > > && (res = gimple_call_lhs (stmt2)) != nullptr > > -- > > 2.43.0 > >
