Hi All, The patch is refactored a little according to the last comment. Do you have more comments? If no, I will commit it later.
Tested on X86_64 and AArch64. gcc/: PR tree-optimization/89430 * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking to support ARRAY_REFs and COMPONENT_REFs. Support a special case: if there is a dominating load of local variable without address escape, a store is not trapped (as local stack is always writable). The logic is also simplified to ignore other loads, as they don't help to check if a store is trapped (may be read-only). gcc/testsuite/: PR tree-optimization/89430 * gcc.dg/tree-ssa/pr89430-1.c: Remove xfail. * gcc.dg/tree-ssa/pr89430-2.c: Remove xfail. * gcc.dg/tree-ssa/pr89430-5.c: Remove xfail. * gcc.dg/tree-ssa/pr89430-6.c: Remove xfail. * gcc.dg/tree-ssa/pr89430-7-comp-ref.c: New test. * gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c: New test. * gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim. --- gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c | 2 +- .../gcc.dg/tree-ssa/pr89430-7-comp-ref.c | 17 +++ .../gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c | 15 ++ gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c | 2 +- gcc/tree-ssa-phiopt.c | 129 ++++++++++-------- 8 files changed, 107 insertions(+), 64 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c index ce242ba569b..8ee1850ac63 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c @@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) { return a[0]+a[1]; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c index 90ae36bfce2..9b96875ac7a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c @@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) { return a[0]+a[1]; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c index c633cbe947d..b2d04119381 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c @@ -13,4 +13,4 @@ int test(int b, int k) { return a.data[0] + a.data[1]; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c index 7cad563128d..8d3c4f7cc6a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c @@ -16,4 +16,4 @@ int test(int b, int k) { return a.data[0].x + a.data[1].x; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c new file mode 100644 index 00000000000..c35a2afc70b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-cselim-details" } */ + +typedef union { + int i; + float f; +} U; + +int foo(U *u, int b, int i) +{ + u->i = 0; + if (b) + u->i = i; + return u->i; +} + +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c new file mode 100644 index 00000000000..f9e66aefb13 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-cselim-details" } */ + +int *t; + +int f1 (int tt) +{ + int *t1 = t; + *t1 = -5; + if (*t1 < tt) + *((unsigned *) t1) = 5; + return *t1; +} + +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c index 09313716598..a06f339f0bb 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-pre-stats" } */ +/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */ typedef union { int i; diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index bb97dcf63b4..9d69a9565b4 100644 --- a/gcc/tree-ssa-phiopt.c +++ b/gcc/tree-ssa-phiopt.c @@ -1987,26 +1987,33 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, ??? We currently are very conservative and assume that a load might trap even if a store doesn't (write-only memory). This probably is - overly conservative. */ + overly conservative. -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF - through it was seen, which would constitute a no-trap region for - same accesses. */ -struct name_to_bb + We currently support a special case that for !TREE_ADDRESSABLE automatic + variables, it could ignore whether something is a load or store because the + local stack should be always writable. */ + +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which + basic block an *_REF through it was seen, which would constitute a + no-trap region for same accesses. + + Size is needed to support 2 MEM_REFs of different types, like + MEM<double>(s_1) and MEM<long>(s_1), which would compare equal with + OEP_ADDRESS_OF. */ +struct ref_to_bb { - unsigned int ssa_name_ver; + tree exp; + HOST_WIDE_INT size; unsigned int phase; - bool store; - HOST_WIDE_INT offset, size; basic_block bb; }; /* Hashtable helpers. */ -struct ssa_names_hasher : free_ptr_hash <name_to_bb> +struct refs_hasher : free_ptr_hash<ref_to_bb> { - static inline hashval_t hash (const name_to_bb *); - static inline bool equal (const name_to_bb *, const name_to_bb *); + static inline hashval_t hash (const ref_to_bb *); + static inline bool equal (const ref_to_bb *, const ref_to_bb *); }; /* Used for quick clearing of the hash-table when we see calls. @@ -2016,28 +2023,29 @@ static unsigned int nt_call_phase; /* The hash function. */ inline hashval_t -ssa_names_hasher::hash (const name_to_bb *n) +refs_hasher::hash (const ref_to_bb *n) { - return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) - ^ (n->offset << 6) ^ (n->size << 3); + inchash::hash hstate; + inchash::add_expr (n->exp, hstate, OEP_ADDRESS_OF); + hstate.add_hwi (n->size); + return hstate.end (); } /* The equality function of *P1 and *P2. */ inline bool -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2) +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2) { - return n1->ssa_name_ver == n2->ssa_name_ver - && n1->store == n2->store - && n1->offset == n2->offset - && n1->size == n2->size; + return operand_equal_p (n1->exp, n2->exp, OEP_ADDRESS_OF) + && n1->size == n2->size; } class nontrapping_dom_walker : public dom_walker { public: nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps) - : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} + : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128) + {} virtual edge before_dom_children (basic_block); virtual void after_dom_children (basic_block); @@ -2054,7 +2062,7 @@ private: hash_set<tree> *m_nontrapping; /* The hash table for remembering what we've seen. */ - hash_table<ssa_names_hasher> m_seen_ssa_names; + hash_table<refs_hasher> m_seen_refs; }; /* Called by walk_dominator_tree, when entering the block BB. */ @@ -2103,65 +2111,68 @@ nontrapping_dom_walker::after_dom_children (basic_block bb) } /* We see the expression EXP in basic block BB. If it's an interesting - expression (an MEM_REF through an SSA_NAME) possibly insert the - expression into the set NONTRAP or the hash table of seen expressions. - STORE is true if this expression is on the LHS, otherwise it's on - the RHS. */ + expression of: + 1) MEM_REF + 2) ARRAY_REF + 3) COMPONENT_REF + possibly insert the expression into the set NONTRAP or the hash table + of seen expressions. STORE is true if this expression is on the LHS, + otherwise it's on the RHS. */ void nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store) { HOST_WIDE_INT size; - if (TREE_CODE (exp) == MEM_REF - && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME - && tree_fits_shwi_p (TREE_OPERAND (exp, 1)) + if ((TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF + || TREE_CODE (exp) == COMPONENT_REF) && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) { - tree name = TREE_OPERAND (exp, 0); - struct name_to_bb map; - name_to_bb **slot; - struct name_to_bb *n2bb; + struct ref_to_bb map; + ref_to_bb **slot; + struct ref_to_bb *r2bb; basic_block found_bb = 0; - /* Try to find the last seen MEM_REF through the same - SSA_NAME, which can trap. */ - map.ssa_name_ver = SSA_NAME_VERSION (name); - map.phase = 0; - map.bb = 0; - map.store = store; - map.offset = tree_to_shwi (TREE_OPERAND (exp, 1)); - map.size = size; + if (!store) + { + tree base = get_base_address (exp); + /* Only record a LOAD of a local variable without address-taken, as + the local stack is always writable. This allows cselim on a STORE + with a dominating LOAD. */ + if (!auto_var_p (base) || TREE_ADDRESSABLE (base)) + return; + } - slot = m_seen_ssa_names.find_slot (&map, INSERT); - n2bb = *slot; - if (n2bb && n2bb->phase >= nt_call_phase) - found_bb = n2bb->bb; + /* Try to find the last seen *_REF, which can trap. */ + map.exp = exp; + map.size = size; + slot = m_seen_refs.find_slot (&map, INSERT); + r2bb = *slot; + if (r2bb && r2bb->phase >= nt_call_phase) + found_bb = r2bb->bb; - /* If we've found a trapping MEM_REF, _and_ it dominates EXP - (it's in a basic block on the path from us to the dominator root) + /* If we've found a trapping *_REF, _and_ it dominates EXP + (it's in a basic block on the path from us to the dominator root) then we can't trap. */ - if (found_bb && (((size_t)found_bb->aux) & 1) == 1) + if (found_bb && (((size_t) found_bb->aux) & 1) == 1) { m_nontrapping->add (exp); } else - { + { /* EXP might trap, so insert it into the hash table. */ - if (n2bb) + if (r2bb) { - n2bb->phase = nt_call_phase; - n2bb->bb = bb; + r2bb->phase = nt_call_phase; + r2bb->bb = bb; } else { - n2bb = XNEW (struct name_to_bb); - n2bb->ssa_name_ver = SSA_NAME_VERSION (name); - n2bb->phase = nt_call_phase; - n2bb->bb = bb; - n2bb->store = store; - n2bb->offset = map.offset; - n2bb->size = size; - *slot = n2bb; + r2bb = XNEW (struct ref_to_bb); + r2bb->phase = nt_call_phase; + r2bb->bb = bb; + r2bb->exp = exp; + r2bb->size = size; + *slot = r2bb; } } } -- 2.17.1 ________________________________________ From: Richard Biener <rguent...@suse.de> Sent: Tuesday, June 2, 2020 19:29 To: Hao Liu OS Cc: gcc-patches@gcc.gnu.org; Jakub Jelinek Subject: RE: [PATCH] extend cselim to check non-trapping for more references (PR tree-optimizaton/89430) On Fri, 29 May 2020, Hao Liu OS wrote: > Hi Richard, > > Thanks for your comments. It's a good idea to simplify the code and remove > get_inner_reference. I've updated the patch accordingly. I also simplified > the code to ignore other loads, which can not help to check if a store can be > trapped. > > About tests: > 1. All previously XFAIL tests (gcc.dg/tree-ssa/pr89430-*) for pr89430 > are passed with this patch. > 2. ssa-pre-17.c is failed as cselim optimizes the conditional store, so > "-fno-tree-cselim" is added. That case is added as a new test case for > pr89430. > 3. Other test cases (including the regression test for pr94734) in gcc > testsuit are not affected by this patch, according to gcc "make check". > 4. Some other benchmarks are also tested for correctness and > performance. The performance regression mentioned in pr89430 can be fixed. > > Review, please. Looks mostly good now, some more comments inline below > gcc/ChangeLog: > > PR tree-optimization/89430 > * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking to > support ARRAY_REFs and COMPONENT_REFs. Support a special case: if there > is > a dominating load of local variable without address escape, a store is not > trapped (as local stack is always writable). Other loads are ignored for > simplicity, as they don't help to check if a store can be trapped. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/89430 > * gcc.dg/tree-ssa/pr89430-1.c: Remove xfail. > * gcc.dg/tree-ssa/pr89430-2.c: Remove xfail. > * gcc.dg/tree-ssa/pr89430-5.c: Remove xfail. > * gcc.dg/tree-ssa/pr89430-6.c: Remove xfail. > * gcc.dg/tree-ssa/pr89430-7-comp-ref.c: New test. > * gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim. > > --- > gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c | 2 +- > gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c | 2 +- > gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c | 2 +- > gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c | 2 +- > .../gcc.dg/tree-ssa/pr89430-7-comp-ref.c | 17 +++ > gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c | 2 +- > gcc/tree-ssa-phiopt.c | 140 +++++++++--------- > 7 files changed, 91 insertions(+), 76 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c > index ce242ba569b..8ee1850ac63 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c > @@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) { > return a[0]+a[1]; > } > > -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { > xfail *-*-* } } } */ > +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } > */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c > index 90ae36bfce2..9b96875ac7a 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c > @@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) { > return a[0]+a[1]; > } > > -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { > xfail *-*-* } } } */ > +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } > */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c > index c633cbe947d..b2d04119381 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c > @@ -13,4 +13,4 @@ int test(int b, int k) { > return a.data[0] + a.data[1]; > } > > -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { > xfail *-*-* } } } */ > +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } > */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c > index 7cad563128d..8d3c4f7cc6a 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c > @@ -16,4 +16,4 @@ int test(int b, int k) { > return a.data[0].x + a.data[1].x; > } > > -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { > xfail *-*-* } } } */ > +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } > */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c > new file mode 100644 > index 00000000000..c35a2afc70b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-cselim-details" } */ > + > +typedef union { > + int i; > + float f; > +} U; > + > +int foo(U *u, int b, int i) > +{ > + u->i = 0; > + if (b) > + u->i = i; > + return u->i; > +} > + > +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } > */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c > index 09313716598..a06f339f0bb 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-pre-stats" } */ > +/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */ > > typedef union { > int i; > diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c > index b1e0dce93d8..7c67b75486c 100644 > --- a/gcc/tree-ssa-phiopt.c > +++ b/gcc/tree-ssa-phiopt.c > @@ -1969,43 +1969,44 @@ abs_replacement (basic_block cond_bb, basic_block > middle_bb, > return true; > } > > -/* Auxiliary functions to determine the set of memory accesses which > +/* Auxiliary functions to determine the set of memory write accesses which > can't trap because they are preceded by accesses to the same memory > - portion. We do that for MEM_REFs, so we only need to track > - the SSA_NAME of the pointer indirectly referenced. The algorithm > + portion. We do that for MEM_REFs/ARRAY_REFs/COMPONENT_REFs. The > algorithm > simply is a walk over all instructions in dominator order. When > - we see an MEM_REF we determine if we've already seen a same > + we see an *_REF we determine if we've already seen a same > ref anywhere up to the root of the dominator tree. If we do the > current access can't trap. If we don't see any dominating access > the current access might trap, but might also make later accesses > non-trapping, so we remember it. We need to be careful with loads > or stores, for instance a load might not trap, while a store would, > so if we see a dominating read access this doesn't mean that a later > - write access would not trap. Hence we also need to differentiate the > - type of access(es) seen. > + write access would not trap. > > - ??? We currently are very conservative and assume that a load might > - trap even if a store doesn't (write-only memory). This probably is > - overly conservative. */ > + We care about following memory accesses: > + 1) a store. > + 2) a load of local variable without address-taken (as local stack is > + always writable). It helps to optimize a conditoinal store with > + a dominating load. > > -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF > - through it was seen, which would constitute a no-trap region for > - same accesses. */ > -struct name_to_bb > + other loads are ignored as we don't know if the accessed memory is > writable, > + so they don't help conditional store replacement. */ > + > +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which > + basic block an *_REF through it was seen, which would constitute a > + no-trap region for same accesses. */ > +struct ref_to_bb > { > - unsigned int ssa_name_ver; > + tree exp; > unsigned int phase; > - bool store; > - HOST_WIDE_INT offset, size; > basic_block bb; > }; > > /* Hashtable helpers. */ > > -struct ssa_names_hasher : free_ptr_hash <name_to_bb> > +struct refs_hasher : free_ptr_hash<ref_to_bb> > { > - static inline hashval_t hash (const name_to_bb *); > - static inline bool equal (const name_to_bb *, const name_to_bb *); > + static inline hashval_t hash (const ref_to_bb *); > + static inline bool equal (const ref_to_bb *, const ref_to_bb *); > }; > > /* Used for quick clearing of the hash-table when we see calls. > @@ -2015,28 +2016,27 @@ static unsigned int nt_call_phase; > /* The hash function. */ > > inline hashval_t > -ssa_names_hasher::hash (const name_to_bb *n) > +refs_hasher::hash (const ref_to_bb *n) > { > - return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) > - ^ (n->offset << 6) ^ (n->size << 3); > + inchash::hash hstate; > + inchash::add_expr (n->exp, hstate); > + return hstate.end (); > } > > /* The equality function of *P1 and *P2. */ > > inline bool > -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2) > +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2) > { > - return n1->ssa_name_ver == n2->ssa_name_ver > - && n1->store == n2->store > - && n1->offset == n2->offset > - && n1->size == n2->size; > + return operand_equal_p (n1->exp, n2->exp, 0); So I figured we're going to regress some cases here that do not have semantically agreeing MEM_REFs, like MEM<double>(s_1) and MEM<long>(s_1) they would not compare equal but did before, just considering offset and size. So we'd like to pass OEP_ADDRESS_OF as flag argument to operand_equal_p here and to add_expr above. This part would then just check whether the accesses are to the same address. This means you have to preserve the ->size member (and check it). The rest of the patch looks good to me. Thanks, Richard. > } > > class nontrapping_dom_walker : public dom_walker > { > public: > nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps) > - : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} > + : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128) > + {} > > virtual edge before_dom_children (basic_block); > virtual void after_dom_children (basic_block); > @@ -2053,7 +2053,7 @@ private: > hash_set<tree> *m_nontrapping; > > /* The hash table for remembering what we've seen. */ > - hash_table<ssa_names_hasher> m_seen_ssa_names; > + hash_table<refs_hasher> m_seen_refs; > }; > > /* Called by walk_dominator_tree, when entering the block BB. */ > @@ -2102,65 +2102,63 @@ nontrapping_dom_walker::after_dom_children > (basic_block bb) > } > > /* We see the expression EXP in basic block BB. If it's an interesting > - expression (an MEM_REF through an SSA_NAME) possibly insert the > - expression into the set NONTRAP or the hash table of seen expressions. > - STORE is true if this expression is on the LHS, otherwise it's on > - the RHS. */ > + expression of: > + 1) MEM_REF > + 2) ARRAY_REF > + 3) COMPONENT_REF > + possibly insert the expression into the set NONTRAP or the hash table > + of seen expressions. STORE is true if this expression is on the LHS, > + otherwise it's on the RHS. */ > void > nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool > store) > { > - HOST_WIDE_INT size; > - > - if (TREE_CODE (exp) == MEM_REF > - && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME > - && tree_fits_shwi_p (TREE_OPERAND (exp, 1)) > - && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) > + if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF > + || TREE_CODE (exp) == COMPONENT_REF) > { > - tree name = TREE_OPERAND (exp, 0); > - struct name_to_bb map; > - name_to_bb **slot; > - struct name_to_bb *n2bb; > + struct ref_to_bb map; > + ref_to_bb **slot; > + struct ref_to_bb *r2bb; > basic_block found_bb = 0; > > - /* Try to find the last seen MEM_REF through the same > - SSA_NAME, which can trap. */ > - map.ssa_name_ver = SSA_NAME_VERSION (name); > - map.phase = 0; > - map.bb = 0; > - map.store = store; > - map.offset = tree_to_shwi (TREE_OPERAND (exp, 1)); > - map.size = size; > - > - slot = m_seen_ssa_names.find_slot (&map, INSERT); > - n2bb = *slot; > - if (n2bb && n2bb->phase >= nt_call_phase) > - found_bb = n2bb->bb; > - > - /* If we've found a trapping MEM_REF, _and_ it dominates EXP > - (it's in a basic block on the path from us to the dominator root) > + if (!store) > + { > + tree base = get_base_address (exp); > + /* Only record a LOAD of local variable without address-taken, as > + the local stack is always writable. This allows cselim on a STORE > + with a dominating LOAD. */ > + if (!auto_var_p (base) || TREE_ADDRESSABLE (base)) > + return; > + } > + > + /* Try to find the last seen *_REF, which can trap. */ > + map.exp = exp; > + slot = m_seen_refs.find_slot (&map, INSERT); > + r2bb = *slot; > + if (r2bb && r2bb->phase >= nt_call_phase) > + found_bb = r2bb->bb; > + > + /* If we've found a trapping *_REF, _and_ it dominates EXP > + (it's in a basic block on the path from us to the dominator root) > then we can't trap. */ > if (found_bb && (((size_t)found_bb->aux) & 1) == 1) > { > m_nontrapping->add (exp); > } > else > - { > + { > /* EXP might trap, so insert it into the hash table. */ > - if (n2bb) > + if (r2bb) > { > - n2bb->phase = nt_call_phase; > - n2bb->bb = bb; > + r2bb->phase = nt_call_phase; > + r2bb->bb = bb; > } > else > { > - n2bb = XNEW (struct name_to_bb); > - n2bb->ssa_name_ver = SSA_NAME_VERSION (name); > - n2bb->phase = nt_call_phase; > - n2bb->bb = bb; > - n2bb->store = store; > - n2bb->offset = map.offset; > - n2bb->size = size; > - *slot = n2bb; > + r2bb = XNEW (struct ref_to_bb); > + r2bb->phase = nt_call_phase; > + r2bb->bb = bb; > + r2bb->exp = exp; > + *slot = r2bb; > } > } > } > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imend?rffer; HRB 36809 (AG Nuernberg)