On Fri, 24 Apr 2020, Jakub Jelinek wrote: > Hi! > > As the new testcase shows, it is not safe to assume we can optimize > a conditional store into an automatic non-addressable var, we can do it > only if we can prove that the unconditional load or store actually will > not be outside of the boundaries of the variable. > If the offset and size are constant, we can, but this is already all > checked in !tree_could_trap_p, otherwise we really need to check for > a dominating unconditional store, or for the specific case of automatic > non-addressable variables, it is enough if there is a dominating load > (that is what those 4 testcases have). tree-ssa-phiopt.c has some > infrastructure for this already, see the add_or_mark_expr method etc., > but right now it handles only MEM_REFs with SSA_NAME first operand > and some integral offset. So, I think it can be for GCC11 extended > to handle other memory references, possibly up to just doing > get_inner_reference and hasing based on the base, offset expressions > and bit_offset and bit_size, and have also a special case that for > !TREE_ADDRESSABLE automatic variables it could ignore whether something > is a load or store because the local stack should be always writable. > But it feels way too dangerous to do this this late for GCC10, so this > patch just restricts the optimization to the safe case (where lhs doesn't > trap), and on Richi's request also ignores TREE_ADDRESSABLE bit if > flag_store_data_races, because my understanding the reason for > TREE_ADDRESSABLE check is that we want to avoid introducing > store data races (if address of an automatic var escapes, some other thread > could be accessing it concurrently). > > Bootstrapped/regtested on powerpc64{,le}-linux, ok for trunk?
OK. Thanks, Richard. > 2020-04-24 Jakub Jelinek <ja...@redhat.com> > Richard Biener <rguent...@suse.de> > > PR tree-optimization/94734 > PR tree-optimization/89430 > * tree-ssa-phiopt.c: Include tree-eh.h. > (cond_store_replacement): Return false if an automatic variable > access could trap. If -fstore-data-races, don't return false > just because an automatic variable is addressable. > > * gcc.dg/tree-ssa/pr89430-1.c: Add xfail. > * gcc.dg/tree-ssa/pr89430-2.c: Add xfail. > * gcc.dg/tree-ssa/pr89430-5.c: Add xfail. > * gcc.dg/tree-ssa/pr89430-6.c: Add xfail. > * gcc.c-torture/execute/pr94734.c: New test. > > --- gcc/tree-ssa-phiopt.c.jj 2020-03-19 10:23:50.542872359 +0100 > +++ gcc/tree-ssa-phiopt.c 2020-04-24 12:33:02.368355735 +0200 > @@ -45,6 +45,7 @@ along with GCC; see the file COPYING3. > #include "tree-scalar-evolution.h" > #include "tree-inline.h" > #include "case-cfn-macros.h" > +#include "tree-eh.h" > > static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); > static bool two_value_replacement (basic_block, basic_block, edge, gphi *, > @@ -2237,10 +2238,13 @@ cond_store_replacement (basic_block midd > whose value is not available readily, which we want to avoid. */ > if (!nontrap->contains (lhs)) > { > - /* If LHS is a local variable without address-taken, we could > + /* If LHS is an access to a local variable without address-taken > + (or when we allow data races) and known not to trap, we could > always safely move down the store. */ > tree base = get_base_address (lhs); > - if (!auto_var_p (base) || TREE_ADDRESSABLE (base)) > + if (!auto_var_p (base) > + || (TREE_ADDRESSABLE (base) && !flag_store_data_races) > + || tree_could_trap_p (lhs)) > return false; > } > > --- gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c.jj 2020-01-12 > 11:54:37.606395410 +0100 > +++ gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c 2020-04-24 11:26:45.021425675 > +0200 > @@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) { > return a[0]+a[1]; > } > > -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } > */ > +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { > xfail *-*-* } } } */ > --- gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c.jj 2020-01-12 > 11:54:37.606395410 +0100 > +++ gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c 2020-04-24 11:26:52.319316212 > +0200 > @@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) { > return a[0]+a[1]; > } > > -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } > */ > +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { > xfail *-*-* } } } */ > --- gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c.jj 2020-01-12 > 11:54:37.606395410 +0100 > +++ gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c 2020-04-24 11:26:59.782204277 > +0200 > @@ -13,4 +13,4 @@ int test(int b, int k) { > return a.data[0] + a.data[1]; > } > > -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } > */ > +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { > xfail *-*-* } } } */ > --- gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c.jj 2020-01-12 > 11:54:37.606395410 +0100 > +++ gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c 2020-04-24 11:27:06.456104170 > +0200 > @@ -16,4 +16,4 @@ int test(int b, int k) { > return a.data[0].x + a.data[1].x; > } > > -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } > */ > +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { > xfail *-*-* } } } */ > --- gcc/testsuite/gcc.c-torture/execute/pr94734.c.jj 2020-04-24 > 11:19:13.745211960 +0200 > +++ gcc/testsuite/gcc.c-torture/execute/pr94734.c 2020-04-24 > 11:18:45.426639974 +0200 > @@ -0,0 +1,59 @@ > +/* PR tree-optimization/94734 */ > + > +__attribute__((noipa)) int > +foo (int n) > +{ > + int arr[16], s = 0; > + for (int i = 0; i < n; i++) > + { > + if (i < 16) > + arr[i] = i; > + } > + for (int i = 0; i < 16; i++) > + s += arr[i]; > + return s; > +} > + > +__attribute__((noipa)) int > +bar (int n, int x, unsigned long y, unsigned long z) > +{ > + int arr[16], s = 0; > + arr[4] = 42; > + for (int i = 0; i < n; i++) > + { > + if (x == (i & 0x25)) > + arr[y] = i; > + } > + return arr[z]; > +} > + > +__attribute__((noipa)) int > +baz (int n, int x, unsigned long z) > +{ > + int arr[16], s = 0; > + arr[12] = 42; > + for (int i = 0; i < n; i++) > + { > + if (x == (i & 0x25)) > + arr[7] = i; > + } > + return arr[z]; > +} > + > +int > +main () > +{ > + if (foo (10374) != 15 * 16 / 2) > + __builtin_abort (); > + if (bar (25, 0x25, (unsigned long) 0xdeadbeefbeefdeadULL, 4) != 42) > + __builtin_abort (); > + if (bar (25, 4, 15, 15) != 22) > + __builtin_abort (); > + if (baz (25, 0x25, 12) != 42) > + __builtin_abort (); > + if (baz (25, 4, 7) != 22) > + __builtin_abort (); > + if (baz (25, 4, 12) != 42) > + __builtin_abort (); > + return 0; > +} > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)