On Tue, Jun 28, 2011 at 2:50 PM, Martin Jambor <mjam...@suse.cz> wrote:
> Hi,
>
> at the moment SRA can get confused by alignment padding and think that
> it actually contains some data for which there is no planned
> replacement and thus might leave some loads and stores in place
> instead of removing them.  This is perhaps the biggest problem when we
> attempt total scalarization of simple structures exactly in order to
> get rid of these and of the variables altogether.
>
> I've pondered for quite a while how to best deal with them.  One
> option was to make just the total scalarization stronger.  I have also
> contemplated creating phantom accesses for padding I could detect
> (i.e. in simple structures) which would be more general, but this
> would complicate the parts of SRA which are already quite convoluted
> and I was not really sure it was worth it.
>
> Eventually I decided for the total scalarization option.  This patch
> changes it such that the flag is propagated down the access tree but
> also, if it does not work out, is reset on the way up.  If the flag
> survives, the access tree is considered "covered" by scalar
> replacements and thus it is known not to contain unscalarized data.
>
> While changing function analyze_access_subtree I have simplified the
> way we compute the hole flag and also fixed one comparison which we
> currently have the wrong way round but it fortunately does not matter
> because if there is a hole, the covered_to will never add up to the
> total size.  I'll probably post a separate patch against 4.6 just in
> case someone attempts to read the source.
>
> Bootstrapped and tested on x86_64-linux, OK for trunk?

So, what will it do for the testcase?

The following is what I _think_ it should do:

<bb 2>:
  l = *p_1(D);
  l$i_6 = p_1(D)->i;
  D.2700_2 = l$i_6;
  D.2701_3 = D.2700_2 + 1;
  l$i_12 = D.2701_3;
  *p_1(D) = l;
  p_1(D)->i = l$i_12;

and let FRE/DSE do their job (which they don't do, unfortunately).
So does your patch then remove the load/store from/to l but keep
the elementwise loads/stores (which are probably cleaned up by FRE)?

Richard.


> Thanks,
>
> Martin
>
>
> 2011-06-24  Martin Jambor  <mjam...@suse.cz>
>
>        * tree-sra.c (struct access): Rename total_scalarization to
>        grp_total_scalarization
>        (completely_scalarize_var): New function.
>        (sort_and_splice_var_accesses): Set total_scalarization in the
>        representative access.
>        (analyze_access_subtree): Propagate total scalarization accross the
>        tree, no holes in totally scalarized trees, simplify coverage
>        computation.
>        (analyze_all_variable_accesses): Call completely_scalarize_var instead
>        of completely_scalarize_record.
>
>        * testsuite/gcc.dg/tree-ssa/sra-12.c: New test.
>
> Index: src/gcc/tree-sra.c
> ===================================================================
> *** src.orig/gcc/tree-sra.c
> --- src/gcc/tree-sra.c
> *************** struct access
> *** 170,179 ****
>    /* Is this particular access write access? */
>    unsigned write : 1;
>
> -   /* Is this access an artificial one created to scalarize some record
> -      entirely? */
> -   unsigned total_scalarization : 1;
> -
>    /* Is this access an access to a non-addressable field? */
>    unsigned non_addressable : 1;
>
> --- 170,175 ----
> *************** struct access
> *** 204,209 ****
> --- 200,209 ----
>       is not propagated in the access tree in any direction.  */
>    unsigned grp_scalar_write : 1;
>
> +   /* Is this access an artificial one created to scalarize some record
> +      entirely? */
> +   unsigned grp_total_scalarization : 1;
> +
>    /* Other passes of the analysis use this bit to make function
>       analyze_access_subtree create scalar replacements for this group if
>       possible.  */
> *************** dump_access (FILE *f, struct access *acc
> *** 377,402 ****
>    fprintf (f, ", type = ");
>    print_generic_expr (f, access->type, 0);
>    if (grp)
> !     fprintf (f, ", total_scalarization = %d, grp_read = %d, grp_write = %d, 
> "
> !            "grp_assignment_read = %d, grp_assignment_write = %d, "
> !            "grp_scalar_read = %d, grp_scalar_write = %d, "
>             "grp_hint = %d, grp_covered = %d, "
>             "grp_unscalarizable_region = %d, grp_unscalarized_data = %d, "
>             "grp_partial_lhs = %d, grp_to_be_replaced = %d, "
>             "grp_maybe_modified = %d, "
>             "grp_not_necessarilly_dereferenced = %d\n",
> !            access->total_scalarization, access->grp_read, access->grp_write,
> !            access->grp_assignment_read, access->grp_assignment_write,
> !            access->grp_scalar_read, access->grp_scalar_write,
>             access->grp_hint, access->grp_covered,
>             access->grp_unscalarizable_region, access->grp_unscalarized_data,
>             access->grp_partial_lhs, access->grp_to_be_replaced,
>             access->grp_maybe_modified,
>             access->grp_not_necessarilly_dereferenced);
>    else
> !     fprintf (f, ", write = %d, total_scalarization = %d, "
>             "grp_partial_lhs = %d\n",
> !            access->write, access->total_scalarization,
>             access->grp_partial_lhs);
>  }
>
> --- 377,402 ----
>    fprintf (f, ", type = ");
>    print_generic_expr (f, access->type, 0);
>    if (grp)
> !     fprintf (f, ", grp_read = %d, grp_write = %d, grp_assignment_read = %d, 
> "
> !            "grp_assignment_write = %d, grp_scalar_read = %d, "
> !            "grp_scalar_write = %d, grp_total_scalarization = %d, "
>             "grp_hint = %d, grp_covered = %d, "
>             "grp_unscalarizable_region = %d, grp_unscalarized_data = %d, "
>             "grp_partial_lhs = %d, grp_to_be_replaced = %d, "
>             "grp_maybe_modified = %d, "
>             "grp_not_necessarilly_dereferenced = %d\n",
> !            access->grp_read, access->grp_write, access->grp_assignment_read,
> !            access->grp_assignment_write, access->grp_scalar_read,
> !            access->grp_scalar_write, access->grp_total_scalarization,
>             access->grp_hint, access->grp_covered,
>             access->grp_unscalarizable_region, access->grp_unscalarized_data,
>             access->grp_partial_lhs, access->grp_to_be_replaced,
>             access->grp_maybe_modified,
>             access->grp_not_necessarilly_dereferenced);
>    else
> !     fprintf (f, ", write = %d, grp_total_scalarization = %d, "
>             "grp_partial_lhs = %d\n",
> !            access->write, access->grp_total_scalarization,
>             access->grp_partial_lhs);
>  }
>
> *************** completely_scalarize_record (tree base,
> *** 924,930 ****
>            access = create_access_1 (base, pos, size);
>            access->expr = nref;
>            access->type = ft;
> !           access->total_scalarization = 1;
>            /* Accesses for intraprocedural SRA can have their stmt NULL.  */
>          }
>        else
> --- 924,930 ----
>            access = create_access_1 (base, pos, size);
>            access->expr = nref;
>            access->type = ft;
> !           access->grp_total_scalarization = 1;
>            /* Accesses for intraprocedural SRA can have their stmt NULL.  */
>          }
>        else
> *************** completely_scalarize_record (tree base,
> *** 932,937 ****
> --- 932,954 ----
>        }
>  }
>
> + /* Create total_scalarization accesses for all scalar type fields in VAR and
> +    for VAR a a whole.  VAR must be of a RECORD_TYPE conforming to
> +    type_consists_of_records_p.   */
> +
> + static void
> + completely_scalarize_var (tree var)
> + {
> +   HOST_WIDE_INT size = tree_low_cst (DECL_SIZE (var), 1);
> +   struct access *access;
> +
> +   access = create_access_1 (var, 0, size);
> +   access->expr = var;
> +   access->type = TREE_TYPE (var);
> +   access->grp_total_scalarization = 1;
> +
> +   completely_scalarize_record (var, var, 0, var);
> + }
>
>  /* Search the given tree for a declaration by skipping handled components and
>     exclude it from the candidates.  */
> *************** sort_and_splice_var_accesses (tree var)
> *** 1713,1719 ****
>        bool grp_assignment_read = access->grp_assignment_read;
>        bool grp_assignment_write = access->grp_assignment_write;
>        bool multiple_scalar_reads = false;
> !       bool total_scalarization = access->total_scalarization;
>        bool grp_partial_lhs = access->grp_partial_lhs;
>        bool first_scalar = is_gimple_reg_type (access->type);
>        bool unscalarizable_region = access->grp_unscalarizable_region;
> --- 1730,1736 ----
>        bool grp_assignment_read = access->grp_assignment_read;
>        bool grp_assignment_write = access->grp_assignment_write;
>        bool multiple_scalar_reads = false;
> !       bool total_scalarization = access->grp_total_scalarization;
>        bool grp_partial_lhs = access->grp_partial_lhs;
>        bool first_scalar = is_gimple_reg_type (access->type);
>        bool unscalarizable_region = access->grp_unscalarizable_region;
> *************** sort_and_splice_var_accesses (tree var)
> *** 1757,1763 ****
>          grp_assignment_write |= ac2->grp_assignment_write;
>          grp_partial_lhs |= ac2->grp_partial_lhs;
>          unscalarizable_region |= ac2->grp_unscalarizable_region;
> !         total_scalarization |= ac2->total_scalarization;
>          relink_to_new_repr (access, ac2);
>
>          /* If there are both aggregate-type and scalar-type accesses with
> --- 1774,1780 ----
>          grp_assignment_write |= ac2->grp_assignment_write;
>          grp_partial_lhs |= ac2->grp_partial_lhs;
>          unscalarizable_region |= ac2->grp_unscalarizable_region;
> !         total_scalarization |= ac2->grp_total_scalarization;
>          relink_to_new_repr (access, ac2);
>
>          /* If there are both aggregate-type and scalar-type accesses with
> *************** sort_and_splice_var_accesses (tree var)
> *** 1778,1783 ****
> --- 1795,1801 ----
>        access->grp_assignment_read = grp_assignment_read;
>        access->grp_assignment_write = grp_assignment_write;
>        access->grp_hint = multiple_scalar_reads || total_scalarization;
> +       access->grp_total_scalarization = total_scalarization;
>        access->grp_partial_lhs = grp_partial_lhs;
>        access->grp_unscalarizable_region = unscalarizable_region;
>        if (access->first_link)
> *************** analyze_access_subtree (struct access *r
> *** 2023,2028 ****
> --- 2041,2048 ----
>        root->grp_write = 1;
>        if (parent->grp_assignment_write)
>        root->grp_assignment_write = 1;
> +       if (parent->grp_total_scalarization)
> +       root->grp_total_scalarization = 1;
>      }
>
>    if (root->grp_unscalarizable_region)
> *************** analyze_access_subtree (struct access *r
> *** 2033,2048 ****
>
>    for (child = root->first_child; child; child = child->next_sibling)
>      {
> !       if (!hole && child->offset < covered_to)
> !       hole = true;
> !       else
> !       covered_to += child->size;
> !
>        sth_created |= analyze_access_subtree (child, root,
>                                             allow_replacements && !scalar);
>
>        root->grp_unscalarized_data |= child->grp_unscalarized_data;
> !       hole |= !child->grp_covered;
>      }
>
>    if (allow_replacements && scalar && !root->first_child
> --- 2053,2068 ----
>
>    for (child = root->first_child; child; child = child->next_sibling)
>      {
> !       hole |= covered_to < child->offset;
>        sth_created |= analyze_access_subtree (child, root,
>                                             allow_replacements && !scalar);
>
>        root->grp_unscalarized_data |= child->grp_unscalarized_data;
> !       root->grp_total_scalarization &= child->grp_total_scalarization;
> !       if (child->grp_covered)
> !       covered_to += child->size;
> !       else
> !       hole = true;
>      }
>
>    if (allow_replacements && scalar && !root->first_child
> *************** analyze_access_subtree (struct access *r
> *** 2063,2072 ****
>        sth_created = true;
>        hole = false;
>      }
> !   else if (covered_to < limit)
> !     hole = true;
>
> !   if (sth_created && !hole)
>      {
>        root->grp_covered = 1;
>        return true;
> --- 2083,2098 ----
>        sth_created = true;
>        hole = false;
>      }
> !   else
> !     {
> !       if (covered_to < limit)
> !       hole = true;
> !       if (scalar)
> !       root->grp_total_scalarization = 0;
> !     }
>
> !   if (sth_created
> !       && (!hole || root->grp_total_scalarization))
>      {
>        root->grp_covered = 1;
>        return true;
> *************** analyze_all_variable_accesses (void)
> *** 2288,2294 ****
>                <= max_total_scalarization_size)
>            && type_consists_of_records_p (TREE_TYPE (var)))
>          {
> !           completely_scalarize_record (var, var, 0, var);
>            if (dump_file && (dump_flags & TDF_DETAILS))
>              {
>                fprintf (dump_file, "Will attempt to totally scalarize ");
> --- 2314,2320 ----
>                <= max_total_scalarization_size)
>            && type_consists_of_records_p (TREE_TYPE (var)))
>          {
> !           completely_scalarize_var (var);
>            if (dump_file && (dump_flags & TDF_DETAILS))
>              {
>                fprintf (dump_file, "Will attempt to totally scalarize ");
> Index: src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c
> ===================================================================
> *** /dev/null
> --- src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c
> ***************
> *** 0 ****
> --- 1,25 ----
> + /* Verify that SRA total scalarization will not be confused by padding.  */
> +
> + /* { dg-do compile } */
> + /* { dg-options "-O1 -fdump-tree-release_ssa" } */
> +
> + struct S
> + {
> +   int i;
> +   unsigned short f1;
> +   char f2;
> +   unsigned short f3, f4;
> + };
> +
> +
> + int foo (struct S *p)
> + {
> +   struct S l;
> +
> +   l = *p;
> +   l.i++;
> +   *p = l;
> + }
> +
> + /* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa"} } */
> + /* { dg-final { cleanup-tree-dump "release_ssa" } } */
>
>

Reply via email to