Hi All,

The patch is refactored a little according to the last comment. Do you have 
more comments? If no, I will commit it later.

Tested on X86_64 and AArch64.

gcc/:

        PR tree-optimization/89430
        * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking
        to support ARRAY_REFs and COMPONENT_REFs.  Support a special case: if
        there is a dominating load of local variable without address escape,
        a store is not trapped (as local stack is always writable).
        The logic is also simplified to ignore other loads, as they don't
        help to check if a store is trapped (may be read-only).

gcc/testsuite/:

        PR tree-optimization/89430
        * gcc.dg/tree-ssa/pr89430-1.c: Remove xfail.
        * gcc.dg/tree-ssa/pr89430-2.c: Remove xfail.
        * gcc.dg/tree-ssa/pr89430-5.c: Remove xfail.
        * gcc.dg/tree-ssa/pr89430-6.c: Remove xfail.
        * gcc.dg/tree-ssa/pr89430-7-comp-ref.c: New test.
        * gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c: New test.
        * gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim.

---
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c     |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c     |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c     |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c     |   2 +-
 .../gcc.dg/tree-ssa/pr89430-7-comp-ref.c      |  17 +++
 .../gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c  |  15 ++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c    |   2 +-
 gcc/tree-ssa-phiopt.c                         | 129 ++++++++++--------
 8 files changed, 107 insertions(+), 64 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
index ce242ba569b..8ee1850ac63 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
         return a[0]+a[1];
 }

-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
index 90ae36bfce2..9b96875ac7a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
         return a[0]+a[1];
 }

-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
index c633cbe947d..b2d04119381 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
@@ -13,4 +13,4 @@ int test(int b, int k) {
     return a.data[0] + a.data[1];
 }

-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
index 7cad563128d..8d3c4f7cc6a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
@@ -16,4 +16,4 @@ int test(int b, int k) {
     return a.data[0].x + a.data[1].x;
 }

-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
new file mode 100644
index 00000000000..c35a2afc70b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+typedef union {
+  int i;
+  float f;
+} U;
+
+int foo(U *u, int b, int i)
+{
+  u->i = 0;
+  if (b)
+    u->i = i;
+  return u->i;
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
new file mode 100644
index 00000000000..f9e66aefb13
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+int *t;
+
+int f1 (int tt)
+{
+  int *t1 = t;
+  *t1 = -5;
+  if (*t1 < tt)
+    *((unsigned *) t1) = 5;
+  return *t1;
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
index 09313716598..a06f339f0bb 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-pre-stats" } */
+/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */

 typedef union {
   int i;
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index bb97dcf63b4..9d69a9565b4 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1987,26 +1987,33 @@ abs_replacement (basic_block cond_bb, basic_block 
middle_bb,

    ??? We currently are very conservative and assume that a load might
    trap even if a store doesn't (write-only memory).  This probably is
-   overly conservative.  */
+   overly conservative.

-/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
-   through it was seen, which would constitute a no-trap region for
-   same accesses.  */
-struct name_to_bb
+   We currently support a special case that for !TREE_ADDRESSABLE automatic
+   variables, it could ignore whether something is a load or store because the
+   local stack should be always writable.  */
+
+/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
+   basic block an *_REF through it was seen, which would constitute a
+   no-trap region for same accesses.
+
+   Size is needed to support 2 MEM_REFs of different types, like
+   MEM<double>(s_1) and MEM<long>(s_1), which would compare equal with
+   OEP_ADDRESS_OF.  */
+struct ref_to_bb
 {
-  unsigned int ssa_name_ver;
+  tree exp;
+  HOST_WIDE_INT size;
   unsigned int phase;
-  bool store;
-  HOST_WIDE_INT offset, size;
   basic_block bb;
 };

 /* Hashtable helpers.  */

-struct ssa_names_hasher : free_ptr_hash <name_to_bb>
+struct refs_hasher : free_ptr_hash<ref_to_bb>
 {
-  static inline hashval_t hash (const name_to_bb *);
-  static inline bool equal (const name_to_bb *, const name_to_bb *);
+  static inline hashval_t hash (const ref_to_bb *);
+  static inline bool equal (const ref_to_bb *, const ref_to_bb *);
 };

 /* Used for quick clearing of the hash-table when we see calls.
@@ -2016,28 +2023,29 @@ static unsigned int nt_call_phase;
 /* The hash function.  */

 inline hashval_t
-ssa_names_hasher::hash (const name_to_bb *n)
+refs_hasher::hash (const ref_to_bb *n)
 {
-  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
-         ^ (n->offset << 6) ^ (n->size << 3);
+  inchash::hash hstate;
+  inchash::add_expr (n->exp, hstate, OEP_ADDRESS_OF);
+  hstate.add_hwi (n->size);
+  return hstate.end ();
 }

 /* The equality function of *P1 and *P2.  */

 inline bool
-ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
+refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
 {
-  return n1->ssa_name_ver == n2->ssa_name_ver
-         && n1->store == n2->store
-         && n1->offset == n2->offset
-         && n1->size == n2->size;
+  return operand_equal_p (n1->exp, n2->exp, OEP_ADDRESS_OF)
+        && n1->size == n2->size;
 }

 class nontrapping_dom_walker : public dom_walker
 {
 public:
   nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
-    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
+    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)
+  {}

   virtual edge before_dom_children (basic_block);
   virtual void after_dom_children (basic_block);
@@ -2054,7 +2062,7 @@ private:
   hash_set<tree> *m_nontrapping;

   /* The hash table for remembering what we've seen.  */
-  hash_table<ssa_names_hasher> m_seen_ssa_names;
+  hash_table<refs_hasher> m_seen_refs;
 };

 /* Called by walk_dominator_tree, when entering the block BB.  */
@@ -2103,65 +2111,68 @@ nontrapping_dom_walker::after_dom_children (basic_block 
bb)
 }

 /* We see the expression EXP in basic block BB.  If it's an interesting
-   expression (an MEM_REF through an SSA_NAME) possibly insert the
-   expression into the set NONTRAP or the hash table of seen expressions.
-   STORE is true if this expression is on the LHS, otherwise it's on
-   the RHS.  */
+   expression of:
+     1) MEM_REF
+     2) ARRAY_REF
+     3) COMPONENT_REF
+   possibly insert the expression into the set NONTRAP or the hash table
+   of seen expressions.  STORE is true if this expression is on the LHS,
+   otherwise it's on the RHS.  */
 void
 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
 {
   HOST_WIDE_INT size;

-  if (TREE_CODE (exp) == MEM_REF
-      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
-      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
+  if ((TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
+       || TREE_CODE (exp) == COMPONENT_REF)
       && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
     {
-      tree name = TREE_OPERAND (exp, 0);
-      struct name_to_bb map;
-      name_to_bb **slot;
-      struct name_to_bb *n2bb;
+      struct ref_to_bb map;
+      ref_to_bb **slot;
+      struct ref_to_bb *r2bb;
       basic_block found_bb = 0;

-      /* Try to find the last seen MEM_REF through the same
-         SSA_NAME, which can trap.  */
-      map.ssa_name_ver = SSA_NAME_VERSION (name);
-      map.phase = 0;
-      map.bb = 0;
-      map.store = store;
-      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
-      map.size = size;
+      if (!store)
+       {
+         tree base = get_base_address (exp);
+         /* Only record a LOAD of a local variable without address-taken, as
+            the local stack is always writable.  This allows cselim on a STORE
+            with a dominating LOAD.  */
+         if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
+           return;
+       }

-      slot = m_seen_ssa_names.find_slot (&map, INSERT);
-      n2bb = *slot;
-      if (n2bb && n2bb->phase >= nt_call_phase)
-        found_bb = n2bb->bb;
+      /* Try to find the last seen *_REF, which can trap.  */
+      map.exp = exp;
+      map.size = size;
+      slot = m_seen_refs.find_slot (&map, INSERT);
+      r2bb = *slot;
+      if (r2bb && r2bb->phase >= nt_call_phase)
+       found_bb = r2bb->bb;

-      /* If we've found a trapping MEM_REF, _and_ it dominates EXP
-         (it's in a basic block on the path from us to the dominator root)
+      /* If we've found a trapping *_REF, _and_ it dominates EXP
+        (it's in a basic block on the path from us to the dominator root)
         then we can't trap.  */
-      if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
+      if (found_bb && (((size_t) found_bb->aux) & 1) == 1)
        {
          m_nontrapping->add (exp);
        }
       else
-        {
+       {
          /* EXP might trap, so insert it into the hash table.  */
-         if (n2bb)
+         if (r2bb)
            {
-             n2bb->phase = nt_call_phase;
-             n2bb->bb = bb;
+             r2bb->phase = nt_call_phase;
+             r2bb->bb = bb;
            }
          else
            {
-             n2bb = XNEW (struct name_to_bb);
-             n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
-             n2bb->phase = nt_call_phase;
-             n2bb->bb = bb;
-             n2bb->store = store;
-             n2bb->offset = map.offset;
-             n2bb->size = size;
-             *slot = n2bb;
+             r2bb = XNEW (struct ref_to_bb);
+             r2bb->phase = nt_call_phase;
+             r2bb->bb = bb;
+             r2bb->exp = exp;
+             r2bb->size = size;
+             *slot = r2bb;
            }
        }
     }
--
2.17.1

________________________________________
From: Richard Biener <rguent...@suse.de>
Sent: Tuesday, June 2, 2020 19:29
To: Hao Liu OS
Cc: gcc-patches@gcc.gnu.org; Jakub Jelinek
Subject: RE: [PATCH] extend cselim to check non-trapping for more references 
(PR tree-optimizaton/89430)

On Fri, 29 May 2020, Hao Liu OS wrote:

> Hi Richard,
>
> Thanks for your comments. It's a good idea to simplify the code and remove 
> get_inner_reference. I've updated the patch accordingly. I also simplified 
> the code to ignore other loads, which can not help to check if a store can be 
> trapped.
>
> About tests:
>     1.  All previously XFAIL tests (gcc.dg/tree-ssa/pr89430-*) for pr89430 
> are passed with this patch.
>     2.  ssa-pre-17.c is failed as cselim optimizes the conditional store, so 
> "-fno-tree-cselim" is added. That case is added as a new test case for 
> pr89430.
>     3.  Other test cases (including the regression test for pr94734) in gcc 
> testsuit are not affected by this patch, according to gcc "make check".
>     4.  Some other benchmarks are also tested for correctness and 
> performance. The performance regression mentioned in pr89430 can be fixed.
>
> Review, please.

Looks mostly good now, some more comments inline below

> gcc/ChangeLog:
>
>     PR tree-optimization/89430
>     * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking to
>     support ARRAY_REFs and COMPONENT_REFs.  Support a special case: if there 
> is
>     a dominating load of local variable without address escape, a store is not
>     trapped (as local stack is always writable).  Other loads are ignored for
>     simplicity, as they don't help to check if a store can be trapped.
>
> gcc/testsuite/ChangeLog:
>
>     PR tree-optimization/89430
>     * gcc.dg/tree-ssa/pr89430-1.c: Remove xfail.
>     * gcc.dg/tree-ssa/pr89430-2.c: Remove xfail.
>     * gcc.dg/tree-ssa/pr89430-5.c: Remove xfail.
>     * gcc.dg/tree-ssa/pr89430-6.c: Remove xfail.
>     * gcc.dg/tree-ssa/pr89430-7-comp-ref.c: New test.
>     * gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim.
>
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c     |   2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c     |   2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c     |   2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c     |   2 +-
>  .../gcc.dg/tree-ssa/pr89430-7-comp-ref.c      |  17 +++
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c    |   2 +-
>  gcc/tree-ssa-phiopt.c                         | 140 +++++++++---------
>  7 files changed, 91 insertions(+), 76 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
> index ce242ba569b..8ee1850ac63 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
> @@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
>          return a[0]+a[1];
>  }
>
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
> xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } 
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
> index 90ae36bfce2..9b96875ac7a 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
> @@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
>          return a[0]+a[1];
>  }
>
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
> xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } 
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
> index c633cbe947d..b2d04119381 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
> @@ -13,4 +13,4 @@ int test(int b, int k) {
>      return a.data[0] + a.data[1];
>  }
>
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
> xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } 
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
> index 7cad563128d..8d3c4f7cc6a 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
> @@ -16,4 +16,4 @@ int test(int b, int k) {
>      return a.data[0].x + a.data[1].x;
>  }
>
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
> xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } 
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
> new file mode 100644
> index 00000000000..c35a2afc70b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-cselim-details" } */
> +
> +typedef union {
> +  int i;
> +  float f;
> +} U;
> +
> +int foo(U *u, int b, int i)
> +{
> +  u->i = 0;
> +  if (b)
> +    u->i = i;
> +  return u->i;
> +}
> +
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } 
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
> index 09313716598..a06f339f0bb 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-pre-stats" } */
> +/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */
>
>  typedef union {
>    int i;
> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
> index b1e0dce93d8..7c67b75486c 100644
> --- a/gcc/tree-ssa-phiopt.c
> +++ b/gcc/tree-ssa-phiopt.c
> @@ -1969,43 +1969,44 @@ abs_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>    return true;
>  }
>
> -/* Auxiliary functions to determine the set of memory accesses which
> +/* Auxiliary functions to determine the set of memory write accesses which
>     can't trap because they are preceded by accesses to the same memory
> -   portion.  We do that for MEM_REFs, so we only need to track
> -   the SSA_NAME of the pointer indirectly referenced.  The algorithm
> +   portion.  We do that for MEM_REFs/ARRAY_REFs/COMPONENT_REFs.  The 
> algorithm
>     simply is a walk over all instructions in dominator order.  When
> -   we see an MEM_REF we determine if we've already seen a same
> +   we see an *_REF we determine if we've already seen a same
>     ref anywhere up to the root of the dominator tree.  If we do the
>     current access can't trap.  If we don't see any dominating access
>     the current access might trap, but might also make later accesses
>     non-trapping, so we remember it.  We need to be careful with loads
>     or stores, for instance a load might not trap, while a store would,
>     so if we see a dominating read access this doesn't mean that a later
> -   write access would not trap.  Hence we also need to differentiate the
> -   type of access(es) seen.
> +   write access would not trap.
>
> -   ??? We currently are very conservative and assume that a load might
> -   trap even if a store doesn't (write-only memory).  This probably is
> -   overly conservative.  */
> +   We care about following memory accesses:
> +     1) a store.
> +     2) a load of local variable without address-taken (as local stack is
> +     always writable).  It helps to optimize a conditoinal store with
> +     a dominating load.
>
> -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
> -   through it was seen, which would constitute a no-trap region for
> -   same accesses.  */
> -struct name_to_bb
> +   other loads are ignored as we don't know if the accessed memory is 
> writable,
> +   so they don't help conditional store replacement.  */
> +
> +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
> +   basic block an *_REF through it was seen, which would constitute a
> +   no-trap region for same accesses.  */
> +struct ref_to_bb
>  {
> -  unsigned int ssa_name_ver;
> +  tree exp;
>    unsigned int phase;
> -  bool store;
> -  HOST_WIDE_INT offset, size;
>    basic_block bb;
>  };
>
>  /* Hashtable helpers.  */
>
> -struct ssa_names_hasher : free_ptr_hash <name_to_bb>
> +struct refs_hasher : free_ptr_hash<ref_to_bb>
>  {
> -  static inline hashval_t hash (const name_to_bb *);
> -  static inline bool equal (const name_to_bb *, const name_to_bb *);
> +  static inline hashval_t hash (const ref_to_bb *);
> +  static inline bool equal (const ref_to_bb *, const ref_to_bb *);
>  };
>
>  /* Used for quick clearing of the hash-table when we see calls.
> @@ -2015,28 +2016,27 @@ static unsigned int nt_call_phase;
>  /* The hash function.  */
>
>  inline hashval_t
> -ssa_names_hasher::hash (const name_to_bb *n)
> +refs_hasher::hash (const ref_to_bb *n)
>  {
> -  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
> -         ^ (n->offset << 6) ^ (n->size << 3);
> +  inchash::hash hstate;
> +  inchash::add_expr (n->exp, hstate);
> +  return hstate.end ();
>  }
>
>  /* The equality function of *P1 and *P2.  */
>
>  inline bool
> -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
> +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
>  {
> -  return n1->ssa_name_ver == n2->ssa_name_ver
> -         && n1->store == n2->store
> -         && n1->offset == n2->offset
> -         && n1->size == n2->size;
> +  return operand_equal_p (n1->exp, n2->exp, 0);

So I figured we're going to regress some cases here that do not
have semantically agreeing MEM_REFs, like MEM<double>(s_1)
and MEM<long>(s_1) they would not compare equal but did before,
just considering offset and size.  So we'd like to pass
OEP_ADDRESS_OF as flag argument to operand_equal_p here
and to add_expr above.  This part would then just check whether
the accesses are to the same address.

This means you have to preserve the ->size member (and check it).

The rest of the patch looks good to me.

Thanks,
Richard.

>  }
>
>  class nontrapping_dom_walker : public dom_walker
>  {
>  public:
>    nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
> -    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
> +    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)
> +  {}
>
>    virtual edge before_dom_children (basic_block);
>    virtual void after_dom_children (basic_block);
> @@ -2053,7 +2053,7 @@ private:
>    hash_set<tree> *m_nontrapping;
>
>    /* The hash table for remembering what we've seen.  */
> -  hash_table<ssa_names_hasher> m_seen_ssa_names;
> +  hash_table<refs_hasher> m_seen_refs;
>  };
>
>  /* Called by walk_dominator_tree, when entering the block BB.  */
> @@ -2102,65 +2102,63 @@ nontrapping_dom_walker::after_dom_children 
> (basic_block bb)
>  }
>
>  /* We see the expression EXP in basic block BB.  If it's an interesting
> -   expression (an MEM_REF through an SSA_NAME) possibly insert the
> -   expression into the set NONTRAP or the hash table of seen expressions.
> -   STORE is true if this expression is on the LHS, otherwise it's on
> -   the RHS.  */
> +   expression of:
> +     1) MEM_REF
> +     2) ARRAY_REF
> +     3) COMPONENT_REF
> +   possibly insert the expression into the set NONTRAP or the hash table
> +   of seen expressions.  STORE is true if this expression is on the LHS,
> +   otherwise it's on the RHS.  */
>  void
>  nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool 
> store)
>  {
> -  HOST_WIDE_INT size;
> -
> -  if (TREE_CODE (exp) == MEM_REF
> -      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
> -      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
> -      && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
> +  if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
> +      || TREE_CODE (exp) == COMPONENT_REF)
>      {
> -      tree name = TREE_OPERAND (exp, 0);
> -      struct name_to_bb map;
> -      name_to_bb **slot;
> -      struct name_to_bb *n2bb;
> +      struct ref_to_bb map;
> +      ref_to_bb **slot;
> +      struct ref_to_bb *r2bb;
>        basic_block found_bb = 0;
>
> -      /* Try to find the last seen MEM_REF through the same
> -         SSA_NAME, which can trap.  */
> -      map.ssa_name_ver = SSA_NAME_VERSION (name);
> -      map.phase = 0;
> -      map.bb = 0;
> -      map.store = store;
> -      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
> -      map.size = size;
> -
> -      slot = m_seen_ssa_names.find_slot (&map, INSERT);
> -      n2bb = *slot;
> -      if (n2bb && n2bb->phase >= nt_call_phase)
> -        found_bb = n2bb->bb;
> -
> -      /* If we've found a trapping MEM_REF, _and_ it dominates EXP
> -         (it's in a basic block on the path from us to the dominator root)
> +      if (!store)
> +     {
> +       tree base = get_base_address (exp);
> +       /* Only record a LOAD of local variable without address-taken, as
> +          the local stack is always writable.  This allows cselim on a STORE
> +          with a dominating LOAD.  */
> +       if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
> +         return;
> +     }
> +
> +      /* Try to find the last seen *_REF, which can trap.  */
> +      map.exp = exp;
> +      slot = m_seen_refs.find_slot (&map, INSERT);
> +      r2bb = *slot;
> +      if (r2bb && r2bb->phase >= nt_call_phase)
> +     found_bb = r2bb->bb;
> +
> +      /* If we've found a trapping *_REF, _and_ it dominates EXP
> +      (it's in a basic block on the path from us to the dominator root)
>        then we can't trap.  */
>        if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
>       {
>         m_nontrapping->add (exp);
>       }
>        else
> -        {
> +     {
>         /* EXP might trap, so insert it into the hash table.  */
> -       if (n2bb)
> +       if (r2bb)
>           {
> -           n2bb->phase = nt_call_phase;
> -           n2bb->bb = bb;
> +           r2bb->phase = nt_call_phase;
> +           r2bb->bb = bb;
>           }
>         else
>           {
> -           n2bb = XNEW (struct name_to_bb);
> -           n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
> -           n2bb->phase = nt_call_phase;
> -           n2bb->bb = bb;
> -           n2bb->store = store;
> -           n2bb->offset = map.offset;
> -           n2bb->size = size;
> -           *slot = n2bb;
> +           r2bb = XNEW (struct ref_to_bb);
> +           r2bb->phase = nt_call_phase;
> +           r2bb->bb = bb;
> +           r2bb->exp = exp;
> +           *slot = r2bb;
>           }
>       }
>      }
>

--
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend?rffer; HRB 36809 (AG Nuernberg)

Reply via email to