On Fri, May 23, 2025 at 10:12 PM Andrew Pinski <[email protected]> wrote:
>
> This improves copy prop for aggregates by working over statements that don't
> modify the access
> just like how it is done for copying zeros.
> To speed up things, we should only have one loop back on the vuse instead of
> doing it twice for
> each load/store assignments.
I will be submitting this after the first patch is approved. Note
there is a bug with the check of stmt_may_clobber_ref_p_1 when it
comes to checking stores, we can't use TBAA.
Otherwise you miscompile the following:
```
struct f{unsigned t;};
struct p{unsigned t;};
unsigned int tt;
static inline
void g(struct f *a, struct p *b)
{
struct f c = *a;
b->t = 1;
*a = c;
}
struct f a = {};
int h(void)
{
g(&a, (struct p*)&a);
return a.t;
}
```
Thanks,
Andrew
>
> PR tree-optimization/14295
>
> gcc/ChangeLog:
>
> * tree-ssa-forwprop.cc (optimize_memcpy_to_memset): Split rewriting
> part out into ...
> (optimize_memcpy_to_memset_1): This function.
> (optimize_agr_copyprop): Walk back until we get a
> statement that may clobber the read. Call optimize_memcpy_to_memset_1.
> Check that the src variable is not clobbered between the 2 statements.
> (pass_forwprop::execute): Call optimize_agr_copyprop only instead of
> optimize_memcpy_to_memset too.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/copy-prop-arg-1.c: New test.
> * gcc.dg/tree-ssa/copy-prop-arg-2.c: New test.
> * gcc.dg/tree-ssa/copy-prop-arg-3.c: New test.
>
> Signed-off-by: Andrew Pinski <[email protected]>
> ---
> .../gcc.dg/tree-ssa/copy-prop-arg-1.c | 37 +++++
> .../gcc.dg/tree-ssa/copy-prop-arg-2.c | 35 ++++
> .../gcc.dg/tree-ssa/copy-prop-arg-3.c | 41 +++++
> gcc/tree-ssa-forwprop.cc | 149 ++++++++++++------
> 4 files changed, 212 insertions(+), 50 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c
> new file mode 100644
> index 00000000000..876556fc464
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized
> -fdump-tree-forwprop1-details" } */
> +
> +/* PR tree-optimization/14295 */
> +
> +/* Check for copyprop on structs. */
> +
> +struct s
> +{
> + char d;
> + int a, b;
> + double m;
> + int t[1024];
> +};
> +
> +void g();
> +struct s foo (struct s r)
> +{
> + struct s temp_struct1;
> + struct s temp_struct2;
> + struct s temp_struct3;
> + temp_struct1 = r;
> + temp_struct2 = temp_struct1;
> + temp_struct3 = temp_struct2;
> + /* The call here should not make
> + a difference in removing all of the copies,
> + this is true as we end up with r = r; */
> + g();
> + r = temp_struct3;
> + return r;
> +}
> +
> +/* There should be no references to any of "temp_struct*"
> + temporaries. */
> +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> +/* Also check that forwprop pass did the copy prop. */
> +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c
> new file mode 100644
> index 00000000000..b1a76b48953
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized
> -fdump-tree-forwprop1-details" } */
> +
> +/* PR tree-optimization/14295 */
> +
> +/* Check for copyprop on structs. */
> +
> +struct s
> +{
> + char d;
> + int a, b;
> + double m;
> + int t[1024];
> +};
> +
> +void g();
> +struct s foo (struct s r)
> +{
> + struct s temp_struct1;
> + struct s temp_struct2;
> + struct s temp_struct3;
> + temp_struct1 = r;
> + temp_struct2 = temp_struct1;
> + temp_struct3 = temp_struct2;
> + /* The call here should not make
> + a difference in removing all of the copies. */
> + g();
> + return temp_struct3;
> +}
> +
> +/* There should be no references to any of "temp_struct*"
> + temporaries. */
> +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> +/* Also check that forwprop pass did the copy prop. */
> +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c
> new file mode 100644
> index 00000000000..96efafbe20c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-sra -fdump-tree-optimized
> -fdump-tree-forwprop1-details" } */
> +
> +/* PR tree-optimization/14295 */
> +
> +/* Check for copyprop on structs. */
> +
> +struct s
> +{
> + int t[1024];
> +};
> +
> +[[gnu::noinline]]
> +void g(float *a)
> +{
> + *a = 1.0;
> +}
> +
> +struct s r;
> +struct s foo (float *a)
> +{
> + struct s temp_struct1;
> + struct s temp_struct2;
> + struct s temp_struct3;
> + temp_struct1 = r;
> + temp_struct2 = temp_struct1;
> + temp_struct3 = temp_struct2;
> + /* The call here should not make
> + a difference in removing all of the copies,
> + this is true as we end up with r = r; g should be known only to change
> + float types so can be ignored with TBAA. */
> + g(a);
> + r = temp_struct3;
> + return r;
> +}
> +
> +/* There should be no references to any of "temp_struct*"
> + temporaries. */
> +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> +/* Also check that forwprop pass did the copy prop. */
> +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index 242538ccc62..ec03a93da6d 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -1191,55 +1191,17 @@ constant_pointer_difference (tree p1, tree p2)
> return NULL_TREE;
> }
>
> -
> -/* Optimize
> - a = {};
> - b = a;
> - into
> - a = {};
> - b = {};
> - Similarly for memset (&a, ..., sizeof (a)); instead of a = {};
> - and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */
> -
> static bool
> -optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src,
> tree len)
> +optimize_memcpy_to_memset_1 (gimple_stmt_iterator *gsip, tree dest, tree
> src, tree len, gimple *stmt, gimple *defstmt)
> {
> - ao_ref read;
> - gimple *stmt = gsi_stmt (*gsip);
> - if (gimple_has_volatile_ops (stmt))
> - return false;
> -
> - tree src2 = NULL_TREE, len2 = NULL_TREE;
> poly_int64 offset, offset2;
> tree val = integer_zero_node;
> - bool len_was_null = len == NULL_TREE;
> - if (len == NULL_TREE)
> - len = (TREE_CODE (src) == COMPONENT_REF
> - ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1))
> - : TYPE_SIZE_UNIT (TREE_TYPE (src)));
> + tree len2 = NULL_TREE;
> + tree src2 = NULL_TREE;
> if (len == NULL_TREE
> || !poly_int_tree_p (len))
> return false;
>
> - ao_ref_init (&read, src);
> - tree vuse = gimple_vuse (stmt);
> - gimple *defstmt;
> - unsigned limit = param_sccvn_max_alias_queries_per_access;
> - do {
> - if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME)
> - return false;
> - defstmt = SSA_NAME_DEF_STMT (vuse);
> - if (is_a <gphi*>(defstmt))
> - return false;
> - if (limit-- == 0)
> - return false;
> - /* If the len was null, then we can use TBBA. */
> - if (stmt_may_clobber_ref_p_1 (defstmt, &read,
> - /* tbaa_p = */ len_was_null))
> - break;
> - vuse = gimple_vuse (defstmt);
> - } while (true);
> -
> if (gimple_store_p (defstmt)
> && gimple_assign_single_p (defstmt)
> && TREE_CODE (gimple_assign_rhs1 (defstmt)) == STRING_CST
> @@ -1341,6 +1303,50 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip,
> tree dest, tree src, tree
> }
> return true;
> }
> +
> +/* Optimize
> + a = {};
> + b = a;
> + into
> + a = {};
> + b = {};
> + Similarly for memset (&a, ..., sizeof (a)); instead of a = {};
> + and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */
> +
> +static bool
> +optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src,
> tree len)
> +{
> + gcc_assert (len);
> + ao_ref read;
> + gimple *stmt = gsi_stmt (*gsip);
> + if (gimple_has_volatile_ops (stmt))
> + return false;
> +
> + if (!poly_int_tree_p (len))
> + return false;
> +
> + ao_ref_init (&read, src);
> + tree vuse = gimple_vuse (stmt);
> + gimple *defstmt;
> + unsigned limit = param_sccvn_max_alias_queries_per_access;
> + do {
> + if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME)
> + return false;
> + defstmt = SSA_NAME_DEF_STMT (vuse);
> + if (is_a <gphi*>(defstmt))
> + return false;
> + if (limit-- == 0)
> + return false;
> + if (stmt_may_clobber_ref_p_1 (defstmt, &read,
> + /* tbaa_p = */ false))
> + break;
> + vuse = gimple_vuse (defstmt);
> + } while (true);
> +
> + return optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt);
> +}
> +
> +
> /* Optimizes
> a = c;
> b = a;
> @@ -1374,7 +1380,33 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip,
> tree dest, tree src)
> gsi_replace (gsip, gimple_build_nop (), false);
> return true;
> }
> - gimple *defstmt = SSA_NAME_DEF_STMT (vuse);
> + gimple *defstmt;
> + bool callinbetween = false;
> +
> + ao_ref read;
> + ao_ref_init (&read, src);
> + vuse = gimple_vuse (stmt);
> + unsigned limit = param_sccvn_max_alias_queries_per_access;
> + do {
> + if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME)
> + return false;
> + defstmt = SSA_NAME_DEF_STMT (vuse);
> + if (is_a <gphi*>(defstmt))
> + return false;
> + if (limit-- == 0)
> + return false;
> + callinbetween |= is_a <gcall *>(defstmt);
> + if (stmt_may_clobber_ref_p_1 (defstmt, &read, true))
> + break;
> + vuse = gimple_vuse (defstmt);
> + } while (true);
> +
> + tree len = (TREE_CODE (src) == COMPONENT_REF
> + ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1))
> + : TYPE_SIZE_UNIT (TREE_TYPE (src)));
> + if (optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt))
> + return true;
> +
> if (!gimple_assign_load_p (defstmt)
> || !gimple_store_p (defstmt))
> return false;
> @@ -1390,6 +1422,30 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip,
> tree dest, tree src)
> if (!operand_equal_p (src, dest2, 0))
> return false;
>
> + /* Check that the src2 has not changed. */
> + ao_ref_init (&read, src2);
> + vuse = gimple_vuse (stmt);
> + limit = param_sccvn_max_alias_queries_per_access;
> + gimple *defstmt1;
> + do {
> + if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME)
> + return false;
> + defstmt1 = SSA_NAME_DEF_STMT (vuse);
> + if (defstmt == defstmt1)
> + break;
> + if (is_a <gphi*>(defstmt1))
> + return false;
> + if (limit-- == 0)
> + return false;
> + if (stmt_may_clobber_ref_p_1 (defstmt1, &read, true))
> + {
> + if (defstmt != defstmt1)
> + return false;
> + break;
> + }
> + vuse = gimple_vuse (defstmt1);
> + } while (true);
> +
> /* If replacing with the same thing, then change the statement
> into a NOP. */
> if (operand_equal_p (src2, dest, 0))
> @@ -1407,6 +1463,7 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip, tree
> dest, tree src)
> gsi_replace (gsip, gimple_build_nop (), false);
> return true;
> }
> +
> /* For 2 memory refences and using a temporary to do the copy,
> don't remove the temporary as the 2 memory references might overlap.
> Note t does not need to be decl as it could be field.
> @@ -4806,14 +4863,6 @@ pass_forwprop::execute (function *fun)
> enum tree_code code = gimple_assign_rhs_code (stmt);
> if (gimple_assign_load_p (stmt) && gimple_store_p (stmt))
> {
> - if (optimize_memcpy_to_memset (&gsi,
> - gimple_assign_lhs
> (stmt),
> - gimple_assign_rhs1
> (stmt),
> - /* len = */NULL_TREE))
> - {
> - changed = true;
> - break;
> - }
> if (optimize_agr_copyprop (&gsi, gimple_assign_lhs
> (stmt),
> gimple_assign_rhs1 (stmt)))
> {
> --
> 2.43.0
>