On Mon, Sep 8, 2025 at 5:30 AM Andrew Pinski <[email protected]> wrote: > > It turns out easy to add support for memcpy copy prop when the memcpy > has changed into `MEM<char[N]>` copy. > Instead of rejecting right out we need to figure out that > `a` and `MEM<char[N]>[&a]` are equivalent in terms of address and size. > And then create a VIEW_CONVER_EXPR from the original src to the new type. > > Note this also allows for `a.b` and `a` being considered equivalent if b is > the > only field (PR 121751). > > Bootstrapped and tested on x86_64-linux-gnu. > > PR tree-optimization/121751 > PR tree-optimization/121418 > PR tree-optimization/121417 > gcc/ChangeLog: > > * tree-ssa-forwprop.cc (optimize_agr_copyprop_1): Allow for the same > address but different type accesses via a VCE. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/copy-prop-aggregate-1.c: New test. > * gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c: New test. > * gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c: New test. > > Signed-off-by: Andrew Pinski <[email protected]> > --- > .../gcc.dg/tree-ssa/copy-prop-aggregate-1.c | 33 +++++++++++++++ > .../tree-ssa/copy-prop-aggregate-memcpy-1.c | 18 ++++++++ > .../tree-ssa/copy-prop-aggregate-memcpy-2.c | 20 +++++++++ > gcc/tree-ssa-forwprop.cc | 42 ++++++++++++++++++- > 4 files changed, 112 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-1.c > create mode 100644 > gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c > create mode 100644 > gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-1.c > new file mode 100644 > index 00000000000..1094c4d768b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-1.c > @@ -0,0 +1,33 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fdump-tree-optimized" } > */ > +/* PR tree-optimization/121751 */ > + > + > +struct s1 > +{ > + int t[1024]; > +}; > + > +struct s2 { > + struct s1 t; > +}; > + > +struct s3 > +{ > + struct s2 t; > +}; > + > +void g(struct s3*); > + > +void f(struct s1 s) > +{ > + struct s2 removeme; > + removeme.t = s; > + struct s3 t1; > + t1.t = removeme; > + g(&t1); > +} > + > + > +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-not "removeme " "optimized" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c > new file mode 100644 > index 00000000000..5faf6d0bf9b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fdump-tree-optimized" } > */ > +/* PR tree-optimization/121418 */ > + > +struct s1 > +{ > + unsigned char t[1024]; > +}; > + > +struct s1 f(struct s1 a) > +{ > + struct s1 removeme1 = a; > + __builtin_memcpy (&removeme1, &a, sizeof(struct s1)); > + return removeme1; > +} > + > +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-not "removeme1 " "optimized" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c > new file mode 100644 > index 00000000000..b1ba30d0aba > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c > @@ -0,0 +1,20 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fdump-tree-optimized" } > */ > +/* PR tree-optimization/121417 */ > + > +struct s1 > +{ > + unsigned char t[1024]; > +}; > + > +struct s1 f(struct s1 a) > +{ > + struct s1 removeme1 = a; > + struct s1 removeme2; > + __builtin_memcpy (&removeme2, &removeme1, sizeof(struct s1)); > + return removeme2; > +} > + > +/* { dg-final { scan-tree-dump-times "after previous" 2 "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-not "removeme1 " "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "removeme2 " "optimized" } } */ > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > index 9c6f4b355d6..ba9be862e8e 100644 > --- a/gcc/tree-ssa-forwprop.cc > +++ b/gcc/tree-ssa-forwprop.cc > @@ -1434,8 +1434,48 @@ optimize_agr_copyprop_1 (gimple *stmt, gimple > *use_stmt, > /* If the new store is `src2 = src2;` skip over it. */ > if (operand_equal_p (src2, dest2, 0)) > return false; > + /* If the second src is not exactly the same as dest, > + try to handle it seperately; see it is address/size equivalent. > + Handles `a` and `a.b` and `MEM<char[N]>(&a)` which all have > + the same size and offsets as address/size equivalent. > + This allows copying over a memcpy and also one for copying > + where one field is the same size as the whole struct. */ > if (!operand_equal_p (dest, src2, 0)) > - return false; > + { > + poly_int64 offset1, offset2; > + tree base1 = get_addr_base_and_unit_offset (dest, &offset1); > + tree base2 = get_addr_base_and_unit_offset (src2, &offset2); > + /* The bases have been equivalent. */ > + if (!base1 || !base2 > + || !operand_equal_p (base1, base2)) > + return false; > + /* Offsets from the base needs to be the same. */ > + if (!known_eq (offset1, offset2)) > + return false; > + tree len1 = (TREE_CODE (dest) == COMPONENT_REF > + ? DECL_SIZE_UNIT (TREE_OPERAND (dest, 1)) > + : TYPE_SIZE_UNIT (TREE_TYPE (dest))); > + tree len2 = (TREE_CODE (src2) == COMPONENT_REF > + ? DECL_SIZE_UNIT (TREE_OPERAND (src2, 1)) > + : TYPE_SIZE_UNIT (TREE_TYPE (src2)));
I think it's better to use get_inner_reference or get_ref_base_and_extent above, this gets you a proper size (and you want to reject non-byte sized accesses/offsets). And do the IMAG/REAL/BFR check from below earlier. > + /* The sizes need to be constant. */ > + if (!poly_int_tree_p (len1) > + || !poly_int_tree_p (len1)) > + return false; > + auto l1 = wi::to_poly_offset (len1); > + auto l2 = wi::to_poly_offset (len2); > + /* The sizes of the 2 accesses need to be the same. */ > + if (!known_eq (l1, l2)) > + return false; > + /* A VCE can't be used with imag/real or BFR so reject them. */ > + if (TREE_CODE (src) == IMAGPART_EXPR > + || TREE_CODE (src) == REALPART_EXPR > + || TREE_CODE (src) == BIT_FIELD_REF) > + return false; > + src = fold_build1_loc (gimple_location (use_stmt), > + VIEW_CONVERT_EXPR, > + TREE_TYPE (src2), src); > + } > /* For 2 memory refences and using a temporary to do the copy, > don't remove the temporary as the 2 memory references might overlap. > Note t does not need to be decl as it could be field. > -- > 2.43.0 >
