On Fri, 28 Feb 2020, Jakub Jelinek wrote: > Hi! > > As mentioned in the PR and discussed on IRC, the following patch is the > patch that fixes the originally reported issue. > We have there because of the premature bitfield comparison -> BIT_FIELD_REF > optimization: > s$s4_19 = 0; > s.s4 = s$s4_19; > _10 = BIT_FIELD_REF <s, 8, 0>; > _13 = _10 & 8; > and no other s fields are initialized. If they would be all initialized with > constants, then my earlier PR93582 bitfield handling patches would handle it > already, but if at least one bit we ignore after the BIT_AND_EXPR masking > is not initialized or is initialized earlier to non-constant, we aren't able > to look through it until combine, which is too late for the warnings on the > dead code. > This patch handles BIT_AND_EXPR where the first operand is a SSA_NAME > initialized with a memory load and second operand is INTEGER_CST, by trying > a partial def lookup after pushing the ranges of 0 bits in the mask as > artificial initializers. In the above case on little-endian, we push > offset 0 size 3 {} partial def and offset 4 size 4 (the result is unsigned > char) and then perform normal partial def handling. > My initial version of the patch failed miserably during bootstrap, because > data->finish (...) called vn_reference_lookup_or_insert_for_pieces > which I believe tried to remember the masked value rather than real for the > reference, or for failed lookup visit_reference_op_load called > vn_reference_insert. The following version makes sure we aren't calling > either of those functions in the masked case, as we don't know anything > better about the reference from whatever has been discovered when the load > stmt has been visited, the patch just calls vn_nary_op_insert_stmt on > failure with the lhs (apparently calling it with the INTEGER_CST doesn't > work). > > Bootstrapped/regtested on powerpc64{,le}-linux, I've additionally > gathered statistics of successful BIT_AND_EXPR optimizations (attached, first > column is uniq -c count, second BITS_PER_WORD, then filename, function name, > the mask on BIT_AND_EXPR and finally the value it returned). > Ok for trunk if it also passes bootstrap/regtest on x86_64-linux and > i686-linux?
Comments below. > 2020-02-28 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/93582 > * tree-ssa-sccvn.h (vn_reference_lookup): Add mask argument. > * tree-ssa-sccvn.c (struct vn_walk_cb_data): Add masked_p member, > initialize it in the constructor. > (vn_walk_cb_data::finish): If masked_p is true, return val instead > of calling vn_reference_lookup_or_insert_for_pieces. Formatting fix. > (vn_reference_lookup_pieces): Adjust vn_walk_cb_data initialization. > Formatting fix. > (vn_reference_lookup): Add mask argument. If non-NULL, don't call > fully_constant_vn_reference_p nor vn_reference_lookup_1 and > artificially push partial {} defs for the portions of the mask that > contains zeros. > (visit_nary_op): Handle BIT_AND_EXPR of a memory load and INTEGER_CST > mask. > (visit_reference_op_load): Add mask argument, pass it through > to vn_reference_lookup. If non-NULL, don't call vn_reference_insert. > (visit_stmt): Adjust visit_reference_op_load caller. Formatting fix. > > * gcc.dg/tree-ssa/pr93582-10.c: New test. > * gcc.dg/pr93582.c: New test. > * gcc.c-torture/execute/pr93582.c: New test. > > --- gcc/tree-ssa-sccvn.h.jj 2020-02-28 11:56:39.506941888 +0100 > +++ gcc/tree-ssa-sccvn.h 2020-02-28 12:01:42.677404902 +0100 > @@ -256,7 +256,7 @@ tree vn_reference_lookup_pieces (tree, a > vec<vn_reference_op_s> , > vn_reference_t *, vn_lookup_kind); > tree vn_reference_lookup (tree, tree, vn_lookup_kind, vn_reference_t *, bool, > - tree * = NULL); > + tree * = NULL, tree = NULL_TREE); > void vn_reference_lookup_call (gcall *, vn_reference_t *, vn_reference_t); > vn_reference_t vn_reference_insert_pieces (tree, alias_set_type, tree, > vec<vn_reference_op_s> , > --- gcc/tree-ssa-sccvn.c.jj 2020-02-28 11:56:39.506941888 +0100 > +++ gcc/tree-ssa-sccvn.c 2020-02-28 12:53:58.500459041 +0100 > @@ -1686,9 +1686,9 @@ struct pd_data > struct vn_walk_cb_data > { > vn_walk_cb_data (vn_reference_t vr_, tree orig_ref_, tree *last_vuse_ptr_, > - vn_lookup_kind vn_walk_kind_, bool tbaa_p_) > + vn_lookup_kind vn_walk_kind_, bool tbaa_p_, bool masked_p_) > : vr (vr_), last_vuse_ptr (last_vuse_ptr_), last_vuse (NULL_TREE), > - vn_walk_kind (vn_walk_kind_), tbaa_p (tbaa_p_), > + vn_walk_kind (vn_walk_kind_), tbaa_p (tbaa_p_), masked_p (masked_p_), > saved_operands (vNULL), first_set (-2), known_ranges (NULL) > { > if (!last_vuse_ptr) > @@ -1705,6 +1705,7 @@ struct vn_walk_cb_data > tree last_vuse; > vn_lookup_kind vn_walk_kind; > bool tbaa_p; > + bool masked_p; > vec<vn_reference_op_s> saved_operands; > > /* The VDEFs of partial defs we come along. */ > @@ -1731,9 +1732,12 @@ vn_walk_cb_data::finish (alias_set_type > { > if (first_set != -2) > set = first_set; > - return vn_reference_lookup_or_insert_for_pieces > - (last_vuse, set, vr->type, > - saved_operands.exists () ? saved_operands : vr->operands, val); > + if (masked_p) > + return (void *) val; > + vec<vn_reference_op_s> &operands > + = saved_operands.exists () ? saved_operands : vr->operands; > + return vn_reference_lookup_or_insert_for_pieces (last_vuse, set, vr->type, > + operands, val); > } > > /* pd_range splay-tree helpers. */ > @@ -3380,13 +3384,13 @@ vn_reference_lookup_pieces (tree vuse, a > { > ao_ref r; > unsigned limit = param_sccvn_max_alias_queries_per_access; > - vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true); > + vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true, false); > if (ao_ref_init_from_vn_reference (&r, set, type, vr1.operands)) > - *vnresult = > - (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, true, > - vn_reference_lookup_2, > - vn_reference_lookup_3, > - vuse_valueize, limit, &data); > + *vnresult > + = ((vn_reference_t) > + walk_non_aliased_vuses (&r, vr1.vuse, true, vn_reference_lookup_2, > + vn_reference_lookup_3, vuse_valueize, > + limit, &data)); > gcc_checking_assert (vr1.operands == shared_lookup_references); > } > > @@ -3402,15 +3406,19 @@ vn_reference_lookup_pieces (tree vuse, a > was NULL.. VNRESULT will be filled in with the vn_reference_t > stored in the hashtable if one exists. When TBAA_P is false assume > we are looking up a store and treat it as having alias-set zero. > - *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded. > */ > + *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded. > + MASK is either NULL_TREE, or can be an INTEGER_CST if the result of the > + load is bitwise anded with MASK and so we are only interested in a subset > + of the bits and can ignore if the other bits are uninitialized or > + not initialized with constants. */ > > tree > vn_reference_lookup (tree op, tree vuse, vn_lookup_kind kind, > - vn_reference_t *vnresult, bool tbaa_p, tree *last_vuse_ptr) > + vn_reference_t *vnresult, bool tbaa_p, > + tree *last_vuse_ptr, tree mask) > { > vec<vn_reference_op_s> operands; > struct vn_reference_s vr1; > - tree cst; > bool valuezied_anything; > > if (vnresult) > @@ -3422,11 +3430,11 @@ vn_reference_lookup (tree op, tree vuse, > vr1.type = TREE_TYPE (op); > vr1.set = get_alias_set (op); > vr1.hashcode = vn_reference_compute_hash (&vr1); > - if ((cst = fully_constant_vn_reference_p (&vr1))) > - return cst; > + if (mask == NULL_TREE) > + if (tree cst = fully_constant_vn_reference_p (&vr1)) > + return cst; > > - if (kind != VN_NOWALK > - && vr1.vuse) > + if (kind != VN_NOWALK && vr1.vuse) > { > vn_reference_t wvnresult; > ao_ref r; > @@ -3438,12 +3446,63 @@ vn_reference_lookup (tree op, tree vuse, > vr1.operands)) > ao_ref_init (&r, op); > vn_walk_cb_data data (&vr1, r.ref ? NULL_TREE : op, > - last_vuse_ptr, kind, tbaa_p); > - wvnresult = > - (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p, > - vn_reference_lookup_2, > - vn_reference_lookup_3, > - vuse_valueize, limit, &data); > + last_vuse_ptr, kind, tbaa_p, mask != NULL_TREE); > + if (mask) > + { > + wide_int w = wi::to_wide (mask); > + unsigned int pos = 0, prec = w.get_precision (); > + pd_data pd; > + pd.rhs = build_constructor (NULL_TREE, NULL); > + /* When bitwise and with a constant is done on a memory load, > + we don't really need all the bits to be defined or defined > + to constants, we don't really care what is in the position > + corresponding to 0 bits in the mask. > + So, push the ranges of those 0 bits in the mask as artificial > + zero stores and let the partial def handling code do the > + rest. */ > + while (pos < prec) > + { > + int tz = wi::ctz (w); > + if (pos + tz > prec) > + tz = prec - pos; > + if (tz) > + { > + if (BYTES_BIG_ENDIAN) > + pd.offset = prec - pos - tz; > + else > + pd.offset = pos; > + pd.size = tz; > + void *r = data.push_partial_def (pd, 0, prec); > + if (r == (void *) -1) > + return NULL_TREE; > + gcc_assert (r == NULL_TREE); > + } > + pos += tz; > + if (pos == prec) > + break; > + w = wi::lrshift (w, tz); > + tz = wi::ctz (wi::bit_not (w)); > + if (pos + tz > prec) > + tz = prec - pos; > + pos += tz; > + w = wi::lrshift (w, tz); > + } I'd do this in the vn_walk_cb_data CTOR instead - you pass mask != NULL_TREE anyway so you can as well pass mask. > + gcc_assert (vnresult == NULL); > + tree res > + = ((tree) Ick. > + walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p, > + vn_reference_lookup_2, > + vn_reference_lookup_3, vuse_valueize, > + limit, &data)); > + return res; > + } > + > + wvnresult > + = ((vn_reference_t) > + walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p, vn_reference_lookup_2, > + vn_reference_lookup_3, vuse_valueize, limit, > + &data)); I wonder if we can instead make the above return NULL (finish return (void *)-1) and do sth like if (!wvnresult && mask) return data.masked_result; and thus avoid the type-"unsafe" return frobbing by storing the result value in an extra member of the vn_walk_cb_data struct. > gcc_checking_assert (vr1.operands == shared_lookup_references); > if (wvnresult) > { > @@ -3457,6 +3516,8 @@ vn_reference_lookup (tree op, tree vuse, > > if (last_vuse_ptr) > *last_vuse_ptr = vr1.vuse; > + if (mask) > + return NULL_TREE; > return vn_reference_lookup_1 (&vr1, vnresult); > } > > @@ -4565,6 +4626,8 @@ valueized_wider_op (tree wide_type, tree > return NULL_TREE; > } > > +static bool visit_reference_op_load (tree, tree, gimple *, tree); > + > /* Visit a nary operator RHS, value number it, and return true if the > value number of LHS has changed as a result. */ > > @@ -4664,7 +4727,35 @@ visit_nary_op (tree lhs, gassign *stmt) > } > } > } > - default:; > + break; > + case BIT_AND_EXPR: > + if (INTEGRAL_TYPE_P (type) > + && TREE_CODE (rhs1) == SSA_NAME > + && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST > + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1) > + && default_vn_walk_kind != VN_NOWALK > + && CHAR_BIT == 8 > + && BITS_PER_UNIT == 8 > + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN > + && !integer_all_onesp (gimple_assign_rhs2 (stmt)) > + && !integer_zerop (gimple_assign_rhs2 (stmt))) > + { > + gassign *ass = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (rhs1)); > + if (ass > + && !gimple_has_volatile_ops (ass) > + && vn_get_stmt_kind (ass) == VN_REFERENCE) > + { > + bool changed > + = visit_reference_op_load (lhs, gimple_assign_rhs1 (ass), > + ass, gimple_assign_rhs2 (stmt)); Any reason you piggy-back on visit_reference_op_load instead of using vn_reference_lookup directly? I'd very much prefer that since it doesn't even try to mess with the SSA lattice. > + if (SSA_VAL (lhs) == lhs) > + vn_nary_op_insert_stmt (stmt, lhs); > + return changed; > + } > + } > + break; > + default: > + break; > } > > bool changed = set_ssa_val_to (lhs, lhs); > @@ -4751,18 +4842,22 @@ visit_reference_op_call (tree lhs, gcall > } > > /* Visit a load from a reference operator RHS, part of STMT, value number it, > - and return true if the value number of the LHS has changed as a result. > */ > + and return true if the value number of the LHS has changed as a result. > + MASK is either NULL_TREE, or can be an INTEGER_CST if the result of the > + load is bitwise anded with MASK and so we are only interested in a subset > + of the bits and can ignore if the other bits are uninitialized or > + not initialized with constants. */ > > static bool > -visit_reference_op_load (tree lhs, tree op, gimple *stmt) > +visit_reference_op_load (tree lhs, tree op, gimple *stmt, tree mask) > { > bool changed = false; > tree last_vuse; > tree result; > > last_vuse = gimple_vuse (stmt); > - result = vn_reference_lookup (op, gimple_vuse (stmt), > - default_vn_walk_kind, NULL, true, &last_vuse); > + result = vn_reference_lookup (op, gimple_vuse (stmt), default_vn_walk_kind, > + NULL, true, &last_vuse, mask); > > /* We handle type-punning through unions by value-numbering based > on offset and size of the access. Be prepared to handle a > @@ -4788,7 +4883,8 @@ visit_reference_op_load (tree lhs, tree > else > { > changed = set_ssa_val_to (lhs, lhs); > - vn_reference_insert (op, lhs, last_vuse, NULL_TREE); > + if (mask == NULL_TREE) > + vn_reference_insert (op, lhs, last_vuse, NULL_TREE); > } > > return changed; > @@ -5175,14 +5271,15 @@ visit_stmt (gimple *stmt, bool backedges > switch (vn_get_stmt_kind (ass)) > { > case VN_NARY: > - changed = visit_nary_op (lhs, ass); > - break; > + changed = visit_nary_op (lhs, ass); > + break; > case VN_REFERENCE: > - changed = visit_reference_op_load (lhs, rhs1, ass); > - break; > + changed = visit_reference_op_load (lhs, rhs1, ass, > + NULL_TREE); > + break; > default: > - changed = defs_to_varying (ass); > - break; > + changed = defs_to_varying (ass); > + break; > } > } > } > --- gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c.jj 2020-02-28 > 12:01:42.680404857 +0100 > +++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c 2020-02-28 > 12:01:42.679404872 +0100 > @@ -0,0 +1,29 @@ > +/* PR tree-optimization/93582 */ > +/* { dg-do compile { target int32 } } */ > +/* { dg-options "-O2 -fdump-tree-fre1" } */ > +/* { dg-final { scan-tree-dump "return 72876566;" "fre1" { target le } } } */ > +/* { dg-final { scan-tree-dump "return 559957376;" "fre1" { target be } } } > */ > + > +union U { > + struct S { int a : 12, b : 5, c : 10, d : 5; } s; > + unsigned int i; > +}; > +struct A { char a[12]; union U u; }; > +void bar (struct A *); > + > +unsigned > +foo (void) > +{ > + struct A a; > + bar (&a); > + a.u.s.a = 1590; > + a.u.s.c = -404; > +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ > +#define M 0x67e0a5f > +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ > +#define M 0xa5f067e0 > +#else > +#define M 0 > +#endif > + return a.u.i & M; > +} > --- gcc/testsuite/gcc.dg/pr93582.c.jj 2020-02-28 12:01:42.679404872 +0100 > +++ gcc/testsuite/gcc.dg/pr93582.c 2020-02-28 12:01:42.679404872 +0100 > @@ -0,0 +1,57 @@ > +/* PR tree-optimization/93582 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -Warray-bounds" } */ > + > +struct S { > + unsigned int s1:1; > + unsigned int s2:1; > + unsigned int s3:1; > + unsigned int s4:1; > + unsigned int s5:4; > + unsigned char s6; > + unsigned short s7; > + unsigned short s8; > +}; > +struct T { > + int t1; > + int t2; > +}; > + > +static inline int > +bar (struct S *x) > +{ > + if (x->s4) > + return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2; /* { > dg-bogus "array subscript 1 is outside array bounds of" } */ > + else > + return 0; > +} > + > +int > +foo (int x, int y) > +{ > + struct S s; > /* { dg-bogus "while referencing" } */ > + s.s6 = x; > + s.s7 = y & 0x1FFF; > + s.s4 = 0; > + return bar (&s); > +} > + > +static inline int > +qux (struct S *x) > +{ > + int s4 = x->s4; > + if (s4) > + return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2; > + else > + return 0; > +} > + > +int > +baz (int x, int y) > +{ > + struct S s; > + s.s6 = x; > + s.s7 = y & 0x1FFF; > + s.s4 = 0; > + return qux (&s); > +} > --- gcc/testsuite/gcc.c-torture/execute/pr93582.c.jj 2020-02-28 > 12:27:51.280925113 +0100 > +++ gcc/testsuite/gcc.c-torture/execute/pr93582.c 2020-02-28 > 12:26:17.272332573 +0100 > @@ -0,0 +1,22 @@ > +/* PR tree-optimization/93582 */ > + > +short a; > +int b, c; > + > +__attribute__((noipa)) void > +foo (void) > +{ > + b = c; > + a &= 7; > +} > + > +int > +main () > +{ > + c = 27; > + a = 14; > + foo (); > + if (b != 27 || a != 6) > + __builtin_abort (); > + return 0; > +} > > Jakub > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)