Hi!

My recent change for PR86844 broke the following testcases.

The problem is that we are processing the stores in the bitpos/bitsize
order.  The change PR86844 change was about if we try to merge two
overlapping stores of INTEGER_CST, we need to go through all other stores
that are overlapping those and come in program order before the last of
those; if they are INTEGER_CST stores, we can merge them, if they are other
stores, we punt completely.  If there are any stores ordered in the
bitpos/bitsize ordering in between those (thus also overlapping), we keep
them unmerged as is.

The problem is that if we this way skip any stores, we need to ensure that
we don't merge this store group with any statements beyond that, because
merged store group is stored at the point of the last statement, and that
would overwrite the skipped stores.  Let's use the testcase:
  i.f.i[0] = 0; // bitpos 0, bitsize 32
  i.f.i[1] = 0; // bitpos 32, bitsize 32
  i.f.i[2] = 0; // bitpos 64, bitsize 32
  i.f.q.f7 = 1; // all other stores are bitpos the number after f, bitsize 1
  i.f.q.f2 = 1;
  i.f.q.f21 = 1;
  i.f.q.f19 = 1;
  i.f.q.f14 = 1;
  i.f.q.f5 = 1;
  i.f.q.f0 = 1;
  i.f.q.f15 = 1;
  i.f.q.f16 = 1;
  i.f.q.f6 = 1;
  i.f.q.f9 = 1;
  i.f.q.f17 = 1;
  i.f.q.f1 = 1;
  i.f.q.f8 = 1;
  i.f.q.f13 = 1;
  i.f.q.f66 = 1;
In the bitpos/bitsize order, this is:
  i.f.i[0] = 0; // bitpos 0, bitsize 32
  i.f.q.f0 = 1;
  i.f.q.f1 = 1;
  i.f.q.f2 = 1;
  i.f.q.f5 = 1;
  i.f.q.f6 = 1;
  i.f.q.f7 = 1;
  i.f.q.f8 = 1;
  i.f.q.f9 = 1;
  i.f.q.f13 = 1;
  i.f.q.f14 = 1;
  i.f.q.f15 = 1;
  i.f.q.f16 = 1;
  i.f.q.f17 = 1;
  i.f.q.f19 = 1;
  i.f.q.f21 = 1;
  i.f.i[1] = 0; // bitpos 32, bitsize 32
  i.f.i[2] = 0; // bitpos 64, bitsize 32
  i.f.q.f66 = 1;
and when trying to merge overlapping i.f.i[0] = 0; with i.f.q.f0 = 1;,
we also merge in overlapping f7, f2, f21, f19, f14, f5 and skip
overlapping, but coming after the f0 store, stores f1 .. f17 other than the
above mentioned ones (so those stores remain in the source).  Now, in this
case the merged store group would be emitted where the f0 store is and the
optimization would be still correct.  The problem is that we keep going and
merge that store group with i[1] and i[2] stores (that is ok, those are
before the f0 store, so again, still at the spot of f0 store), but then also
merge with f66 store and that makes it incorrect, the
  i.f.q.f15 = 1;
  i.f.q.f16 = 1;
  i.f.q.f6 = 1;
  i.f.q.f9 = 1;
  i.f.q.f17 = 1;
  i.f.q.f1 = 1;
  i.f.q.f8 = 1;
  i.f.q.f13 = 1;
stores remain in the IL, but because the last store of the merged group is
now at f66 store, that is where those bits are overwritten with the store
of 96 bits together.  The following patch fixes that by making sure that if
we skip this way any stores (keep them in the IL), then we ensure that we
don't grow the store group beyond that last_order point.

Now, the first testcase also shows that we skip those stmts completely
unnecessarily, while they are overlapping stores that come after the
latter of the two overlapping stores being merged, they are still
INTEGER_CST stores and in the past we've merged them all into a constant
store just fine.  The reason PR86844 was done is that there can be
non-INTEGER_CST stores that prevent the merging.  So, this patch also
remembers which of the overlapping stores we would skip are INTEGER_CST
stores and which are other stores; if we see any to be skipped INTEGER_CSTs,
we retry the analysis with including those too; if that works out, we use
that, if it doesn't, we revert to what we chose before (skipping those
stores with magic to ensure we don't merge with other stores later in
program order).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and 8.3?

2018-11-05  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/87859
        * gimple-ssa-store-merging.c (struct merged_store_group): Add
        only_constants and first_nonmergeable_order members.
        (merged_store_group::merged_store_group): Initialize them.
        (merged_store_group::do_merge): Clear only_constants member if
        adding something other than INTEGER_CST store.
        (imm_store_chain_info::coalesce_immediate_stores): Don't merge
        stores with order >= first_nonmergeable_order.  Use
        merged_store->only_constants instead of always recomputing it.
        Set merged_store->first_nonmergeable_order if we've skipped any
        stores.  Attempt to merge overlapping INTEGER_CST stores that
        we would otherwise skip.

        * gcc.dg/store_merging_24.c: New test.
        * gcc.dg/store_merging_25.c: New test.

--- gcc/gimple-ssa-store-merging.c.jj   2018-11-02 15:36:30.802999681 +0100
+++ gcc/gimple-ssa-store-merging.c      2018-11-05 10:20:25.980370963 +0100
@@ -1429,6 +1429,8 @@ struct merged_store_group
   unsigned int first_order;
   unsigned int last_order;
   bool bit_insertion;
+  bool only_constants;
+  unsigned int first_nonmergeable_order;
 
   auto_vec<store_immediate_info *> stores;
   /* We record the first and last original statements in the sequence because
@@ -1821,6 +1823,8 @@ merged_store_group::merged_store_group (
   val = NULL;
   mask = NULL;
   bit_insertion = false;
+  only_constants = info->rhs_code == INTEGER_CST;
+  first_nonmergeable_order = ~0U;
   unsigned HOST_WIDE_INT align_bitpos = 0;
   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
                          &align, &align_bitpos);
@@ -1936,6 +1940,8 @@ merged_store_group::do_merge (store_imme
       first_order = info->order;
       first_stmt = stmt;
     }
+  if (info->rhs_code != INTEGER_CST)
+    only_constants = false;
 }
 
 /* Merge a store recorded by INFO into this merged store.
@@ -2696,32 +2702,25 @@ imm_store_chain_info::coalesce_immediate
            }
        }
 
+      if (info->order >= merged_store->first_nonmergeable_order)
+       ;
+
       /* |---store 1---|
               |---store 2---|
         Overlapping stores.  */
-      if (IN_RANGE (info->bitpos, merged_store->start,
-                   merged_store->start + merged_store->width - 1))
+      else if (IN_RANGE (info->bitpos, merged_store->start,
+                        merged_store->start + merged_store->width - 1))
        {
          /* Only allow overlapping stores of constants.  */
-         if (info->rhs_code == INTEGER_CST)
+         if (info->rhs_code == INTEGER_CST && merged_store->only_constants)
            {
-             bool only_constants = true;
-             store_immediate_info *infoj;
-             unsigned int j;
-             FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
-               if (infoj->rhs_code != INTEGER_CST)
-                 {
-                   only_constants = false;
-                   break;
-                 }
              unsigned int last_order
                = MAX (merged_store->last_order, info->order);
              unsigned HOST_WIDE_INT end
                = MAX (merged_store->start + merged_store->width,
                       info->bitpos + info->bitsize);
-             if (only_constants
-                 && check_no_overlap (m_store_info, i, INTEGER_CST,
-                                      last_order, end))
+             if (check_no_overlap (m_store_info, i, INTEGER_CST,
+                                   last_order, end))
                {
                  /* check_no_overlap call above made sure there are no
                     overlapping stores with non-INTEGER_CST rhs_code
@@ -2741,36 +2740,97 @@ imm_store_chain_info::coalesce_immediate
                     store_by_bitpos order it comes after the last store that
                     we can't merge with them.  We can merge the first 3 stores
                     and keep the last store as is though.  */
-                 unsigned int len = m_store_info.length (), k = i;
-                 for (unsigned int j = i + 1; j < len; ++j)
+                 unsigned int len = m_store_info.length ();
+                 unsigned int try_order = last_order;
+                 unsigned int first_nonmergeable_order;
+                 unsigned int k;
+                 bool last_iter = false;
+                 int attempts = 0;
+                 do
                    {
-                     store_immediate_info *info2 = m_store_info[j];
-                     if (info2->bitpos >= end)
-                       break;
-                     if (info2->order < last_order)
+                     unsigned int max_order = 0;
+                     unsigned first_nonmergeable_int_order = ~0U;
+                     unsigned HOST_WIDE_INT this_end = end;
+                     k = i;
+                     first_nonmergeable_order = ~0U;
+                     for (unsigned int j = i + 1; j < len; ++j)
                        {
-                         if (info2->rhs_code != INTEGER_CST)
+                         store_immediate_info *info2 = m_store_info[j];
+                         if (info2->bitpos >= this_end)
+                           break;
+                         if (info2->order < try_order)
                            {
-                             /* Normally check_no_overlap makes sure this
-                                doesn't happen, but if end grows below, then
-                                we need to process more stores than
-                                check_no_overlap verified.  Example:
-                                   MEM[(int *)p_5] = 0;
-                                   MEM[(short *)p_5 + 3B] = 1;
-                                   MEM[(char *)p_5 + 4B] = _9;
-                                   MEM[(char *)p_5 + 2B] = 2;  */
-                             k = 0;
-                             break;
+                             if (info2->rhs_code != INTEGER_CST)
+                               {
+                                 /* Normally check_no_overlap makes sure this
+                                    doesn't happen, but if end grows below,
+                                    then we need to process more stores than
+                                    check_no_overlap verified.  Example:
+                                     MEM[(int *)p_5] = 0;
+                                     MEM[(short *)p_5 + 3B] = 1;
+                                     MEM[(char *)p_5 + 4B] = _9;
+                                     MEM[(char *)p_5 + 2B] = 2;  */
+                                 k = 0;
+                                 break;
+                               }
+                             k = j;
+                             this_end = MAX (this_end,
+                                             info2->bitpos + info2->bitsize);
                            }
-                         k = j;
-                         end = MAX (end, info2->bitpos + info2->bitsize);
+                         else if (info2->rhs_code == INTEGER_CST
+                                  && !last_iter)
+                           {
+                             max_order = MAX (max_order, info2->order + 1);
+                             first_nonmergeable_int_order
+                               = MIN (first_nonmergeable_int_order,
+                                      info2->order);
+                           }
+                         else
+                           first_nonmergeable_order
+                             = MIN (first_nonmergeable_order, info2->order);
                        }
+                     if (k == 0)
+                       {
+                         if (last_order == try_order)
+                           break;
+                         /* If this failed, but only because we grew
+                            try_order, retry with the last working one,
+                            so that we merge at least something.  */
+                         try_order = last_order;
+                         last_iter = true;
+                         continue;
+                       }
+                     last_order = try_order;
+                     /* Retry with a larger try_order to see if we could
+                        merge some further INTEGER_CST stores.  */
+                     if (max_order
+                         && (first_nonmergeable_int_order
+                             < first_nonmergeable_order))
+                       {
+                         try_order = MIN (max_order,
+                                          first_nonmergeable_order);
+                         try_order
+                           = MIN (try_order,
+                                  merged_store->first_nonmergeable_order);
+                         if (try_order > last_order && ++attempts < 16)
+                           continue;
+                       }
+                     first_nonmergeable_order
+                       = MIN (first_nonmergeable_order,
+                              first_nonmergeable_int_order);
+                     end = this_end;
+                     break;
                    }
+                 while (1);
 
                  if (k != 0)
                    {
                      merged_store->merge_overlapping (info);
 
+                     merged_store->first_nonmergeable_order
+                       = MIN (merged_store->first_nonmergeable_order,
+                              first_nonmergeable_order);
+
                      for (unsigned int j = i + 1; j <= k; j++)
                        {
                          store_immediate_info *info2 = m_store_info[j];
@@ -2778,7 +2838,8 @@ imm_store_chain_info::coalesce_immediate
                          if (info2->order < last_order)
                            {
                              gcc_assert (info2->rhs_code == INTEGER_CST);
-                             merged_store->merge_overlapping (info2);
+                             if (info != info2)
+                               merged_store->merge_overlapping (info2);
                            }
                          /* Other stores are kept and not merged in any
                             way.  */
--- gcc/testsuite/gcc.dg/store_merging_24.c.jj  2018-11-02 15:36:59.829518310 
+0100
+++ gcc/testsuite/gcc.dg/store_merging_24.c     2018-11-02 15:36:59.829518310 
+0100
@@ -0,0 +1,75 @@
+/* PR tree-optimization/87859 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-store-merging-details" } */
+/* { dg-final { scan-tree-dump "New sequence of \[23] stores to replace old 
one of 19 stores" "store-merging" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "New sequence of 1 stores to replace old one of 
6 stores" "store-merging" { target i?86-*-* x86_64-*-* } } } */
+
+struct S {
+  union F {
+    struct T {
+#define A(n) unsigned n:1;
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) \
+            A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
+      B(f) B(f1) B(f2) B(f3) B(f4) B(f5)
+      A(f60) A(f61) A(f62) A(f63) A(f64) A(f65) A(f66)
+    } q;
+    unsigned int i[3];
+  } f;
+};
+
+struct S s = {
+  .f.q.f0 = 1, .f.q.f1 = 1, .f.q.f2 = 1, .f.q.f5 = 1, .f.q.f6 = 1,
+  .f.q.f7 = 1, .f.q.f8 = 1, .f.q.f9 = 1, .f.q.f13 = 1, .f.q.f14 = 1,
+  .f.q.f15 = 1, .f.q.f16 = 1, .f.q.f17 = 1, .f.q.f19 = 1, .f.q.f21 = 1,
+  .f.q.f66 = 1
+};
+
+__attribute__((noipa)) void
+bar (unsigned *x)
+{
+  if (x[0] != s.f.i[0] || x[1] != s.f.i[1] || x[2] != s.f.i[2])
+    __builtin_abort ();
+}
+
+__attribute__((noipa)) void
+foo (unsigned char *state)
+{
+  struct S i;
+  i.f.i[0] = 0;
+  i.f.i[1] = 0;
+  i.f.i[2] = 0;
+  i.f.q.f7 = 1;
+  i.f.q.f2 = 1;
+  i.f.q.f21 = 1;
+  i.f.q.f19 = 1;
+  i.f.q.f14 = 1;
+  i.f.q.f5 = 1;
+  i.f.q.f0 = 1;
+  i.f.q.f15 = 1;
+  i.f.q.f16 = 1;
+  i.f.q.f6 = 1;
+  i.f.q.f9 = 1;
+  i.f.q.f17 = 1;
+  i.f.q.f1 = 1;
+  i.f.q.f8 = 1;
+  i.f.q.f13 = 1;
+  i.f.q.f66 = 1;
+  if (*state)
+    {
+      i.f.q.f37 = 1;
+      i.f.q.f38 = 1;
+      i.f.q.f39 = 1;
+      i.f.q.f40 = 1;
+      i.f.q.f41 = 1;
+      i.f.q.f36 = 1;
+    }
+  bar (i.f.i);
+}
+
+int
+main ()
+{
+  unsigned char z = 0;
+  foo (&z);
+  return 0;
+}
--- gcc/testsuite/gcc.dg/store_merging_25.c.jj  2018-11-05 10:28:41.544249761 
+0100
+++ gcc/testsuite/gcc.dg/store_merging_25.c     2018-11-05 10:28:04.033864475 
+0100
@@ -0,0 +1,75 @@
+/* PR tree-optimization/87859 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-store-merging-details" } */
+/* { dg-final { scan-tree-dump "New sequence of \[23] stores to replace old 
one of 14 stores" "store-merging" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "New sequence of 1 stores to replace old one of 
6 stores" "store-merging" { target i?86-*-* x86_64-*-* } } } */
+
+struct S {
+  union F {
+    struct T {
+#define A(n) unsigned n:1;
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) \
+            A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
+      B(f) B(f1) B(f2) B(f3) B(f4) B(f5)
+      A(f60) A(f61) A(f62) A(f63) A(f64) A(f65) A(f66)
+    } q;
+    unsigned int i[3];
+  } f;
+};
+
+struct S s = {
+  .f.q.f0 = 1, .f.q.f1 = 1, .f.q.f2 = 1, .f.q.f5 = 1, .f.q.f6 = 1,
+  .f.q.f7 = 1, .f.q.f8 = 1, .f.q.f9 = 1, .f.q.f13 = 1, .f.q.f14 = 1,
+  .f.q.f15 = 1, .f.q.f16 = 1, .f.q.f17 = 1, .f.q.f19 = 1, .f.q.f21 = 1,
+  .f.q.f66 = 1
+};
+
+__attribute__((noipa)) void
+bar (unsigned *x)
+{
+  if (x[0] != s.f.i[0] || x[1] != s.f.i[1] || x[2] != s.f.i[2])
+    __builtin_abort ();
+}
+
+__attribute__((noipa)) void
+foo (unsigned char *state, unsigned char c)
+{
+  struct S i;
+  i.f.i[0] = 0;
+  i.f.i[1] = 0;
+  i.f.i[2] = 0;
+  i.f.q.f7 = 1;
+  i.f.q.f2 = 1;
+  i.f.q.f21 = 1;
+  i.f.q.f19 = 1;
+  i.f.q.f14 = 1;
+  i.f.q.f5 = 1;
+  i.f.q.f0 = 1;
+  i.f.q.f15 = 1;
+  i.f.q.f16 = 1;
+  i.f.q.f6 = 1;
+  i.f.q.f9 = 1;
+  i.f.q.f17 = c;
+  i.f.q.f1 = 1;
+  i.f.q.f8 = 1;
+  i.f.q.f13 = 1;
+  i.f.q.f66 = 1;
+  if (*state)
+    {
+      i.f.q.f37 = 1;
+      i.f.q.f38 = 1;
+      i.f.q.f39 = 1;
+      i.f.q.f40 = 1;
+      i.f.q.f41 = 1;
+      i.f.q.f36 = 1;
+    }
+  bar (i.f.i);
+}
+
+int
+main ()
+{
+  unsigned char z = 0;
+  foo (&z, 1);
+  return 0;
+}

        Jakub

Reply via email to