On Fri, Aug 29, 2025 at 6:33 AM Andrew Pinski
<[email protected]> wrote:
>
> To better optimize code dealing with `memcmp == 0` where we have
> a small constant size, we can inline the memcmp in those cases.
> There is code to do this in strlen but that is run too late in
> the case where we can figure out the value of one of the arguments
> to memcmp. So this copies the optimization to forwprop.
>
> An example of where this helps is:
> ```
> bool cmpvect(const std::vector<int> &a) { return a == std::vector<int>{10}; }
> ```
>
> Where the above should be optimized to just `return a.size() == 1 && a[0] ==
> 10;`.
>
> Note pr44130.c testcase needed to change as now it will be optimized away
> otherwise.
> Note the loop in pr44130.c os also vectorized which it was not before.
>
> Note the optimization remains in strlen as the other part (memcmp ->
> memcmp_eq)
> should move to either isel or fab and I didn't want to remove it just yet.
>
> Bootstrapped and tested on x86_64-linux-gnu.
>
> PR tree-optimization/116651
>
> gcc/ChangeLog:
>
> * tree-ssa-forwprop.cc (simplify_builtin_memcmp): New function.
> (simplify_builtin_call): Call simplify_builtin_memcmp for memcmp
> memcmp_eq builtins.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr44130.c: Add an inline-asm clobber.
> * g++.dg/tree-ssa/vector-compare-1.C: New test.
>
> Signed-off-by: Andrew Pinski <[email protected]>
> ---
> .../g++.dg/tree-ssa/vector-compare-1.C | 24 ++++++++
> gcc/testsuite/gcc.target/i386/pr44130.c | 2 +
> gcc/tree-ssa-forwprop.cc | 57 +++++++++++++++++++
> 3 files changed, 83 insertions(+)
> create mode 100644 gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
>
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
> b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
> new file mode 100644
> index 00000000000..d9b2bc2533e
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
> @@ -0,0 +1,24 @@
> +// { dg-do compile { target c++11 } }
> +// { dg-options "-O2 -fdump-tree-optimized" }
> +
> +// PR tree-optimization/116651
> +
> +#include <vector>
> +
> +bool test1(const std::vector<int>& in) {
> + return in == std::vector<int>{24};
> +}
> +
> +/* We should be to optimize this to:
> + int *b = in.bptr;
> + int *e = in.eptr;
> + auto size = e - b;
> + if (size != 4)
> + return false;
> + int v = *b;
> + return v == 24;
> +
> +*/
> +
> +// { dg-final { scan-tree-dump-times "== 24" 1 "optimized" } } */
> +// { dg-final { scan-tree-dump-times "== 4" 1 "optimized" { target int32 }
> } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr44130.c
> b/gcc/testsuite/gcc.target/i386/pr44130.c
> index 2ad740993c1..6269dc89f5e 100644
> --- a/gcc/testsuite/gcc.target/i386/pr44130.c
> +++ b/gcc/testsuite/gcc.target/i386/pr44130.c
> @@ -21,6 +21,8 @@ void testf (void)
> xxxxx[5] = __builtin_copysignf (-0.0, Yf[5]);
> xxxxx[6] = __builtin_copysignf (__builtin_inff (), Yf[6]);
> xxxxx[7] = __builtin_copysignf (-__builtin_nanf (""), Yf[7]);
> +
> + asm("":"=m"(xxxxx));
> for (i = 0; i < 8; ++i)
> if (__builtin_memcmp (xxxxx+i, Zf+i, sizeof(float)) != 0)
> abort ();
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index 447004ef048..f5cd3a8f390 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -1593,6 +1593,60 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip)
> return changed;
> }
>
> +/* Optimizes builtin memcmps for small constant sizes.
> + GSI_P is the GSI for the call. STMT is the call itself.
> + */
> +
> +static bool
> +simplify_builtin_memcmp (gimple_stmt_iterator *gsi_p, gcall *stmt)
> +{
> + tree res = gimple_call_lhs (stmt);
> +
> + if (!res || !use_in_zero_equality (res))
> + return false;
> + tree arg1 = gimple_call_arg (stmt, 0);
> + tree arg2 = gimple_call_arg (stmt, 1);
> + tree len = gimple_call_arg (stmt, 2);
you need to check the number of arguments are actually here and
arg1/2 are pointers.
Otherwise looks OK to me.
Thanks,
Richard.
> + unsigned HOST_WIDE_INT leni;
> +
> + if (tree_fits_uhwi_p (len)
> + && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
> + && pow2p_hwi (leni))
> + {
> + leni *= CHAR_TYPE_SIZE;
> + unsigned align1 = get_pointer_alignment (arg1);
> + unsigned align2 = get_pointer_alignment (arg2);
> + unsigned align = MIN (align1, align2);
> + scalar_int_mode mode;
> + if (int_mode_for_size (leni, 1).exists (&mode)
> + && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
> + {
> + location_t loc = gimple_location (stmt);
> + tree type, off;
> + type = build_nonstandard_integer_type (leni, 1);
> + gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
> + tree ptrtype = build_pointer_type_for_mode (char_type_node,
> + ptr_mode, true);
> + off = build_int_cst (ptrtype, 0);
> + arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
> + arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
> + tree tem1 = fold_const_aggregate_ref (arg1);
> + if (tem1)
> + arg1 = tem1;
> + tree tem2 = fold_const_aggregate_ref (arg2);
> + if (tem2)
> + arg2 = tem2;
> + res = fold_convert_loc (loc, TREE_TYPE (res),
> + fold_build2_loc (loc, NE_EXPR,
> + boolean_type_node,
> + arg1, arg2));
> + gimplify_and_update_call_from_tree (gsi_p, res);
> + return true;
> + }
> + }
> + return false;
> +}
> +
> /* *GSI_P is a GIMPLE_CALL to a builtin function.
> Optimize
> memcpy (p, "abcd", 4);
> @@ -1630,6 +1684,9 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p,
> tree callee2, bool full_walk
>
> switch (DECL_FUNCTION_CODE (callee2))
> {
> + case BUILT_IN_MEMCMP:
> + case BUILT_IN_MEMCMP_EQ:
> + return simplify_builtin_memcmp (gsi_p, as_a<gcall*>(stmt2));
> case BUILT_IN_MEMCHR:
> if (gimple_call_num_args (stmt2) == 3
> && (res = gimple_call_lhs (stmt2)) != nullptr
> --
> 2.43.0
>