On Tue, 5 Dec 2017, Martin Jambor wrote:

> On Tue, Dec 05 2017, Martin Jambor wrote:
> > On Tue, Dec 05 2017, Martin Jambor wrote:
> > Hi,
> >
> >> Hi,
> >>
> >> this is a followup to Richi's
> >> https://gcc.gnu.org/ml/gcc-patches/2017-11/msg02396.html to fix PR
> >> 83141.  The basic idea is simple, be just as conservative about type
> >> changing MEM_REFs as we are about actual VCEs.
> >>
> >> I have checked how that would affect compilation of SPEC 2006 and (non
> >> LTO) Mozilla Firefox and am happy to report that the difference was
> >> tiny.  However, I had to make the test less strict, otherwise testcase
> >> gcc.dg/guality/pr54970.c kept failing because it contains folded memcpy
> >> and expects us to track values accross:
> >>
> >>   int a[] = { 1, 2, 3 };
> >>   /* ... */
> >>   __builtin_memcpy (&a, (int [3]) { 4, 5, 6 }, sizeof (a));
> >>                            /* { dg-final { gdb-test 31 "a\[0\]" "4" } } */
> >>                            /* { dg-final { gdb-test 31 "a\[1\]" "5" } } */
> >>                            /* { dg-final { gdb-test 31 "a\[2\]" "6" } } */
> >>   
> >> SRA is able to load replacement of a[0] directly from the temporary
> >> array which is apparently necessary to generate proper debug info.  I
> >> have therefore allowed the current transformation to go forward if the
> >> source does not contain any padding or if it is a read-only declaration.
> >
> > Ah, the read-only test is of course bogus, it was a last minute addition
> > when I was apparently already too tired to think it through.  Please
> > disregard that line in the patch (it has passed bootstrap and testing
> > without it).
> >
> > Sorry for the noise,
> >
> > Martin
> >
> 
> And for the record, below is the actual patch, after a fresh round of
> re-testing to double check I did not mess up anything else.  As before,
> I'd like to ask for review, especially of the type_contains_padding_p
> predicate and then would like to commit it to trunk.

Comments below...

> Thanks,
> 
> Martin
> 
> 
> 2017-12-05  Martin Jambor  <mjam...@suse.cz>
> 
>       PR tree-optimization/83141
>       * tree-sra.c (type_contains_padding_p): New function.
>       (contains_vce_or_bfcref_p): Move up in the file, also test for
>       MEM_REFs implicitely changing types with padding.  Remove inline
>       keyword.
>       (build_accesses_from_assign): Check contains_vce_or_bfcref_p
>       before setting bit in should_scalarize_away_bitmap.
> 
> testsuite/
>       * gcc.dg/tree-ssa/pr83141.c: New test.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/pr83141.c |  31 +++++++++
>  gcc/tree-sra.c                          | 115 
> ++++++++++++++++++++++++++------
>  2 files changed, 127 insertions(+), 19 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83141.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83141.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr83141.c
> new file mode 100644
> index 00000000000..86caf66025b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83141.c
> @@ -0,0 +1,31 @@
> +/* { dg-do run } */
> +/* { dg-options "-O -fdump-tree-esra-details" } */
> +
> +volatile short vs;
> +volatile long vl;
> +
> +struct A { short s; long i; long j; };
> +struct A a, b;
> +void foo ()
> +{
> +  struct A c;
> +  __builtin_memcpy (&c, &b, sizeof (struct A));
> +  __builtin_memcpy (&a, &c, sizeof (struct A));
> +
> +  vs = c.s;
> +  vl = c.i;
> +  vl = c.j;
> +}
> +int main()
> +{
> +  __builtin_memset (&b, 0, sizeof (struct A));
> +  b.s = 1;
> +  __builtin_memcpy ((char *)&b+2, &b, 2);
> +  foo ();
> +  __builtin_memcpy (&a, (char *)&a+2, 2);
> +  if (a.s != 1)
> +    __builtin_abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "Will attempt to totally scalarize" 
> "esra" } } */
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index 866cff0edb0..df9f10f83b6 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -1141,6 +1141,100 @@ contains_view_convert_expr_p (const_tree ref)
>    return false;
>  }
>  
> +/* Return false if we can determine that TYPE has no padding, otherwise 
> return
> +   true.  */
> +
> +static bool
> +type_contains_padding_p (tree type)
> +{
> +  if (is_gimple_reg_type (type))
> +    return false;
> +
> +  if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
> +    return true;
> +
> +  switch (TREE_CODE (type))
> +    {
> +    case RECORD_TYPE:
> +      {
> +     unsigned HOST_WIDE_INT pos = 0;
> +     for (tree fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
> +       if (TREE_CODE (fld) == FIELD_DECL)
> +         {
> +           tree ft = TREE_TYPE (fld);
> +
> +           if (DECL_BIT_FIELD (fld)
> +               || DECL_PADDING_P (fld)
> +               || !tree_fits_uhwi_p (TYPE_SIZE (ft)))
> +             return true;
> +
> +           tree t = bit_position (fld);
> +           if (!tree_fits_uhwi_p (t))
> +             return true;
> +           unsigned HOST_WIDE_INT bp = tree_to_uhwi (t);
> +           if (pos != bp)
> +             return true;
> +
> +           pos += tree_to_uhwi (TYPE_SIZE (ft));
> +           if (type_contains_padding_p (ft))
> +             return true;
> +         }
> +
> +     if (pos != tree_to_uhwi (TYPE_SIZE (type)))
> +       return true;
> +
> +     return false;
> +      }
> +
> +    case ARRAY_TYPE:
> +      return type_contains_padding_p (TYPE_SIZE (type));
> +
> +    default:
> +      return true;
> +    }
> +  gcc_unreachable ();
> +}

The possibly repeated walk over all fields and subfields of an aggregate
type looks expensive - some caching on the main variant might be
advisable to me.

> +/* Return true if REF contains a MEM_REF that performs type conversion from a
> +   type with padding or any VIEW_CONVERT_EXPR or a COMPONENT_REF with a
> +   bit-field field declaration.  */
> +
> +static bool
> +contains_vce_or_bfcref_p (const_tree ref)
> +{
> +  while (true)
> +    {
> +      if (TREE_CODE (ref) == MEM_REF)
> +     {
> +       tree op0 = TREE_OPERAND (ref, 0);
> +       if (TREE_CODE (op0) == ADDR_EXPR)
> +         {
> +           tree mem = TREE_OPERAND (op0, 0);
> +           if ((TYPE_MAIN_VARIANT (TREE_TYPE (ref))
> +                != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
> +               && type_contains_padding_p (TREE_TYPE (mem)))
> +             return true;
> +
> +           ref = mem;
> +           continue;
> +         }
> +       else
> +         return false;
> +     }
> +
> +      if (!handled_component_p (ref))
> +     return false;
> +
> +      if (TREE_CODE (ref) == VIEW_CONVERT_EXPR
> +       || (TREE_CODE (ref) == COMPONENT_REF
> +           && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))))
> +     return true;
> +      ref = TREE_OPERAND (ref, 0);
> +    }

It is best to retain the old code and simply append

  if (TREE_CODE (ref) == MEM_REF)

after the while (handled_component_p ()) loop.  A MEM_REF can only
appear in innermost position.

I'm not sure that we really want to retain SRAing structures
based on their fields when we see a VCEd MEM_REF accessing them.
This might for example do floating-point accesses on data that
isn't floating-point and on some architectures may raise exceptions
(IA64 comes to my mind).  The memcpy folding code explicitely disallows
float modes and also BOOLEAN_TYPE and ENUMERAL_TYPE becuase they
might not match their precision:

      /* Make sure we are not copying using a floating-point mode or
         a type whose size possibly does not match its precision.  */
      if (FLOAT_MODE_P (TYPE_MODE (desttype))
          || TREE_CODE (desttype) == BOOLEAN_TYPE
          || TREE_CODE (desttype) == ENUMERAL_TYPE)
        desttype = bitwise_type_for_mode (TYPE_MODE (desttype));
      if (FLOAT_MODE_P (TYPE_MODE (srctype))
          || TREE_CODE (srctype) == BOOLEAN_TYPE
          || TREE_CODE (srctype) == ENUMERAL_TYPE)
        srctype = bitwise_type_for_mode (TYPE_MODE (srctype));

so - what's the reason you think you need to retain SRAing memcpied
structs?  Via total scalarization I mean - if we see the "real" accesses
besides the aggregate copy then we of course win.

Do we check anywhere when seeing an aggregate copy that lhs and rhs
have matching TYPE_MAIN_VARIANT?  GIMPLE allows matching TYPE_CANONICAL
which can have mismatched fields (LTO, cross-language) or even any
type if TYPE_STRUCTURAL_EQUALITY.  That is, what happens if we mark
two distinct enough decls for total scalarization that are copied
to each other?  Do we end up re-materializing them if their
"sturcture" doesn't match when transforming the assignment?

Thanks,
Richard.

> +  return false;
> +}
> +
>  /* Search the given tree for a declaration by skipping handled components and
>     exclude it from the candidates.  */
>  
> @@ -1338,7 +1432,8 @@ build_accesses_from_assign (gimple *stmt)
>      {
>        racc->grp_assignment_read = 1;
>        if (should_scalarize_away_bitmap && !gimple_has_volatile_ops (stmt)
> -       && !is_gimple_reg_type (racc->type))
> +       && !is_gimple_reg_type (racc->type)
> +       && !contains_vce_or_bfcref_p (rhs))
>       bitmap_set_bit (should_scalarize_away_bitmap, DECL_UID (racc->base));
>        if (storage_order_barrier_p (lhs))
>       racc->grp_unscalarizable_region = 1;
> @@ -3416,24 +3511,6 @@ get_repl_default_def_ssa_name (struct access *racc)
>    return get_or_create_ssa_default_def (cfun, racc->replacement_decl);
>  }
>  
> -/* Return true if REF has an VIEW_CONVERT_EXPR or a COMPONENT_REF with a
> -   bit-field field declaration somewhere in it.  */
> -
> -static inline bool
> -contains_vce_or_bfcref_p (const_tree ref)
> -{
> -  while (handled_component_p (ref))
> -    {
> -      if (TREE_CODE (ref) == VIEW_CONVERT_EXPR
> -       || (TREE_CODE (ref) == COMPONENT_REF
> -           && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))))
> -     return true;
> -      ref = TREE_OPERAND (ref, 0);
> -    }
> -
> -  return false;
> -}
> -
>  /* Examine both sides of the assignment statement pointed to by STMT, replace
>     them with a scalare replacement if there is one and generate copying of
>     replacements if scalarized aggregates have been used in the assignment.  
> GSI
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Reply via email to