Richard Sandiford <richard.sandif...@arm.com> writes:
> predcom has the following code to stop one rogue load from
> interfering with other store-load opportunities:
>
>       /* If A is read and B write or vice versa and there is unsuitable
>        dependence, instead of merging both components into a component
>        that will certainly not pass suitable_component_p, just put the
>        read into bad component, perhaps at least the write together with
>        all the other data refs in it's component will be optimizable.  */
>
> But when store-store commoning was added later, this had the effect
> of ignoring loads that occur between two candidate stores.
>
> There is code further up to handle loads and stores with unknown
> dependences:
>
>       /* Don't do store elimination if there is any unknown dependence for
>        any store data reference.  */
>       if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
>         && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
>             || DDR_NUM_DIST_VECTS (ddr) == 0))
>       eliminate_store_p = false;
>
> But the store-load code above skips loads for *known* dependences
> if (a) the load has already been marked "bad" or (b) the data-ref
> machinery knows the dependence distance, but determine_offsets
> can't handle the combination.
>
> (a) happens to be the problem in the testcase, but a different
> sequence could have given (b) instead.  We have writes to individual
> fields of a structure and reads from the whole structure.  Since
> determine_offsets requires the types to be the same, it returns false
> for each such read/write combination.
>
> This patch records which components have had loads removed and
> prevents store-store commoning for them.  It's a bit too pessimistic,
> since there shouldn't be a problem if a "bad" load dominates all stores
> in a component.  But (a) we can't AFAIK use pcom_stmt_dominates_stmt_p
> here and (b) the handling for that case would probably need to be
> removed again if we handled more exotic cases in future.

Forgot to add:

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK for master
and eventually for backports?

Thanks,
Richard

>
> 2020-01-28  Richard Sandiford  <richard.sandif...@arm.com>
>
> gcc/
>       PR tree-optimization/93434
>       * tree-predcom.c (split_data_refs_to_components): Record which
>       components have had aliasing loads removed.  Prevent store-store
>       commoning for all such components.
>
> gcc/testsuite/
>       PR tree-optimization/93434
>       * gcc.c-torture/execute/pr93434.c: New test.
> ---
>  gcc/testsuite/gcc.c-torture/execute/pr93434.c | 36 +++++++++++++++++++
>  gcc/tree-predcom.c                            | 24 +++++++++++--
>  2 files changed, 58 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr93434.c
>
> diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
> index 047c7fa9583..d2dcfe7f42d 100644
> --- a/gcc/tree-predcom.c
> +++ b/gcc/tree-predcom.c
> @@ -767,6 +767,7 @@ split_data_refs_to_components (class loop *loop,
>    /* Don't do store elimination if loop has multiple exit edges.  */
>    bool eliminate_store_p = single_exit (loop) != NULL;
>    basic_block last_always_executed = last_always_executed_block (loop);
> +  auto_bitmap no_store_store_comps;
>  
>    FOR_EACH_VEC_ELT (datarefs, i, dr)
>      {
> @@ -838,9 +839,13 @@ split_data_refs_to_components (class loop *loop,
>        else if (DR_IS_READ (dra) && ib != bad)
>       {
>         if (ia == bad)
> -         continue;
> +         {
> +           bitmap_set_bit (no_store_store_comps, ib);
> +           continue;
> +         }
>         else if (!determine_offset (dra, drb, &dummy_off))
>           {
> +           bitmap_set_bit (no_store_store_comps, ib);
>             merge_comps (comp_father, comp_size, bad, ia);
>             continue;
>           }
> @@ -848,9 +853,13 @@ split_data_refs_to_components (class loop *loop,
>        else if (DR_IS_READ (drb) && ia != bad)
>       {
>         if (ib == bad)
> -         continue;
> +         {
> +           bitmap_set_bit (no_store_store_comps, ia);
> +           continue;
> +         }
>         else if (!determine_offset (dra, drb, &dummy_off))
>           {
> +           bitmap_set_bit (no_store_store_comps, ia);
>             merge_comps (comp_father, comp_size, bad, ib);
>             continue;
>           }
> @@ -908,6 +917,17 @@ split_data_refs_to_components (class loop *loop,
>        comp->refs.quick_push (dataref);
>      }
>  
> +  if (eliminate_store_p)
> +    {
> +      bitmap_iterator bi;
> +      EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
> +     {
> +       ca = component_of (comp_father, ia);
> +       if (ca != bad)
> +         comps[ca]->eliminate_store_p = false;
> +     }
> +    }
> +
>    for (i = 0; i < n; i++)
>      {
>        comp = comps[i];
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr93434.c 
> b/gcc/testsuite/gcc.c-torture/execute/pr93434.c
> new file mode 100644
> index 00000000000..e786252794b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr93434.c
> @@ -0,0 +1,36 @@
> +typedef struct creal_T {
> +  double re;
> +  double im;
> +} creal_T;
> +
> +#define N 16
> +int main() {
> +  int k;
> +  int i;
> +  int j;
> +  creal_T t2[N];
> +  double inval;
> +
> +  inval = 1.0;
> +  for (j = 0; j < N; ++j) {
> +    t2[j].re = 0;
> +    t2[j].im = 0;
> +  }
> +
> +  for (j = 0; j < N/4; j++) {
> +    i = j * 4;
> +    t2[i].re = inval;
> +    t2[i].im = inval;
> +    k = i + 3;
> +    t2[k].re = inval;
> +    t2[k].im = inval;
> +    t2[i] = t2[k];
> +    t2[k].re = inval;
> +  }
> +
> +  for (i = 0; i < 2; ++i)
> +    if (t2[i].re != !i || t2[i].im != !i)
> +      __builtin_abort ();
> +
> +  return 0;
> +}

Reply via email to