On Fri, 28 Feb 2020, Jakub Jelinek wrote:
> Hi!
>
> As mentioned in the PR and discussed on IRC, the following patch is the
> patch that fixes the originally reported issue.
> We have there because of the premature bitfield comparison -> BIT_FIELD_REF
> optimization:
> s$s4_19 = 0;
> s.s4 = s$s4_19;
> _10 = BIT_FIELD_REF <s, 8, 0>;
> _13 = _10 & 8;
> and no other s fields are initialized. If they would be all initialized with
> constants, then my earlier PR93582 bitfield handling patches would handle it
> already, but if at least one bit we ignore after the BIT_AND_EXPR masking
> is not initialized or is initialized earlier to non-constant, we aren't able
> to look through it until combine, which is too late for the warnings on the
> dead code.
> This patch handles BIT_AND_EXPR where the first operand is a SSA_NAME
> initialized with a memory load and second operand is INTEGER_CST, by trying
> a partial def lookup after pushing the ranges of 0 bits in the mask as
> artificial initializers. In the above case on little-endian, we push
> offset 0 size 3 {} partial def and offset 4 size 4 (the result is unsigned
> char) and then perform normal partial def handling.
> My initial version of the patch failed miserably during bootstrap, because
> data->finish (...) called vn_reference_lookup_or_insert_for_pieces
> which I believe tried to remember the masked value rather than real for the
> reference, or for failed lookup visit_reference_op_load called
> vn_reference_insert. The following version makes sure we aren't calling
> either of those functions in the masked case, as we don't know anything
> better about the reference from whatever has been discovered when the load
> stmt has been visited, the patch just calls vn_nary_op_insert_stmt on
> failure with the lhs (apparently calling it with the INTEGER_CST doesn't
> work).
>
> Bootstrapped/regtested on powerpc64{,le}-linux, I've additionally
> gathered statistics of successful BIT_AND_EXPR optimizations (attached, first
> column is uniq -c count, second BITS_PER_WORD, then filename, function name,
> the mask on BIT_AND_EXPR and finally the value it returned).
> Ok for trunk if it also passes bootstrap/regtest on x86_64-linux and
> i686-linux?
Comments below.
> 2020-02-28 Jakub Jelinek <[email protected]>
>
> PR tree-optimization/93582
> * tree-ssa-sccvn.h (vn_reference_lookup): Add mask argument.
> * tree-ssa-sccvn.c (struct vn_walk_cb_data): Add masked_p member,
> initialize it in the constructor.
> (vn_walk_cb_data::finish): If masked_p is true, return val instead
> of calling vn_reference_lookup_or_insert_for_pieces. Formatting fix.
> (vn_reference_lookup_pieces): Adjust vn_walk_cb_data initialization.
> Formatting fix.
> (vn_reference_lookup): Add mask argument. If non-NULL, don't call
> fully_constant_vn_reference_p nor vn_reference_lookup_1 and
> artificially push partial {} defs for the portions of the mask that
> contains zeros.
> (visit_nary_op): Handle BIT_AND_EXPR of a memory load and INTEGER_CST
> mask.
> (visit_reference_op_load): Add mask argument, pass it through
> to vn_reference_lookup. If non-NULL, don't call vn_reference_insert.
> (visit_stmt): Adjust visit_reference_op_load caller. Formatting fix.
>
> * gcc.dg/tree-ssa/pr93582-10.c: New test.
> * gcc.dg/pr93582.c: New test.
> * gcc.c-torture/execute/pr93582.c: New test.
>
> --- gcc/tree-ssa-sccvn.h.jj 2020-02-28 11:56:39.506941888 +0100
> +++ gcc/tree-ssa-sccvn.h 2020-02-28 12:01:42.677404902 +0100
> @@ -256,7 +256,7 @@ tree vn_reference_lookup_pieces (tree, a
> vec<vn_reference_op_s> ,
> vn_reference_t *, vn_lookup_kind);
> tree vn_reference_lookup (tree, tree, vn_lookup_kind, vn_reference_t *, bool,
> - tree * = NULL);
> + tree * = NULL, tree = NULL_TREE);
> void vn_reference_lookup_call (gcall *, vn_reference_t *, vn_reference_t);
> vn_reference_t vn_reference_insert_pieces (tree, alias_set_type, tree,
> vec<vn_reference_op_s> ,
> --- gcc/tree-ssa-sccvn.c.jj 2020-02-28 11:56:39.506941888 +0100
> +++ gcc/tree-ssa-sccvn.c 2020-02-28 12:53:58.500459041 +0100
> @@ -1686,9 +1686,9 @@ struct pd_data
> struct vn_walk_cb_data
> {
> vn_walk_cb_data (vn_reference_t vr_, tree orig_ref_, tree *last_vuse_ptr_,
> - vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
> + vn_lookup_kind vn_walk_kind_, bool tbaa_p_, bool masked_p_)
> : vr (vr_), last_vuse_ptr (last_vuse_ptr_), last_vuse (NULL_TREE),
> - vn_walk_kind (vn_walk_kind_), tbaa_p (tbaa_p_),
> + vn_walk_kind (vn_walk_kind_), tbaa_p (tbaa_p_), masked_p (masked_p_),
> saved_operands (vNULL), first_set (-2), known_ranges (NULL)
> {
> if (!last_vuse_ptr)
> @@ -1705,6 +1705,7 @@ struct vn_walk_cb_data
> tree last_vuse;
> vn_lookup_kind vn_walk_kind;
> bool tbaa_p;
> + bool masked_p;
> vec<vn_reference_op_s> saved_operands;
>
> /* The VDEFs of partial defs we come along. */
> @@ -1731,9 +1732,12 @@ vn_walk_cb_data::finish (alias_set_type
> {
> if (first_set != -2)
> set = first_set;
> - return vn_reference_lookup_or_insert_for_pieces
> - (last_vuse, set, vr->type,
> - saved_operands.exists () ? saved_operands : vr->operands, val);
> + if (masked_p)
> + return (void *) val;
> + vec<vn_reference_op_s> &operands
> + = saved_operands.exists () ? saved_operands : vr->operands;
> + return vn_reference_lookup_or_insert_for_pieces (last_vuse, set, vr->type,
> + operands, val);
> }
>
> /* pd_range splay-tree helpers. */
> @@ -3380,13 +3384,13 @@ vn_reference_lookup_pieces (tree vuse, a
> {
> ao_ref r;
> unsigned limit = param_sccvn_max_alias_queries_per_access;
> - vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true);
> + vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true, false);
> if (ao_ref_init_from_vn_reference (&r, set, type, vr1.operands))
> - *vnresult =
> - (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, true,
> - vn_reference_lookup_2,
> - vn_reference_lookup_3,
> - vuse_valueize, limit, &data);
> + *vnresult
> + = ((vn_reference_t)
> + walk_non_aliased_vuses (&r, vr1.vuse, true, vn_reference_lookup_2,
> + vn_reference_lookup_3, vuse_valueize,
> + limit, &data));
> gcc_checking_assert (vr1.operands == shared_lookup_references);
> }
>
> @@ -3402,15 +3406,19 @@ vn_reference_lookup_pieces (tree vuse, a
> was NULL.. VNRESULT will be filled in with the vn_reference_t
> stored in the hashtable if one exists. When TBAA_P is false assume
> we are looking up a store and treat it as having alias-set zero.
> - *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded.
> */
> + *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded.
> + MASK is either NULL_TREE, or can be an INTEGER_CST if the result of the
> + load is bitwise anded with MASK and so we are only interested in a subset
> + of the bits and can ignore if the other bits are uninitialized or
> + not initialized with constants. */
>
> tree
> vn_reference_lookup (tree op, tree vuse, vn_lookup_kind kind,
> - vn_reference_t *vnresult, bool tbaa_p, tree *last_vuse_ptr)
> + vn_reference_t *vnresult, bool tbaa_p,
> + tree *last_vuse_ptr, tree mask)
> {
> vec<vn_reference_op_s> operands;
> struct vn_reference_s vr1;
> - tree cst;
> bool valuezied_anything;
>
> if (vnresult)
> @@ -3422,11 +3430,11 @@ vn_reference_lookup (tree op, tree vuse,
> vr1.type = TREE_TYPE (op);
> vr1.set = get_alias_set (op);
> vr1.hashcode = vn_reference_compute_hash (&vr1);
> - if ((cst = fully_constant_vn_reference_p (&vr1)))
> - return cst;
> + if (mask == NULL_TREE)
> + if (tree cst = fully_constant_vn_reference_p (&vr1))
> + return cst;
>
> - if (kind != VN_NOWALK
> - && vr1.vuse)
> + if (kind != VN_NOWALK && vr1.vuse)
> {
> vn_reference_t wvnresult;
> ao_ref r;
> @@ -3438,12 +3446,63 @@ vn_reference_lookup (tree op, tree vuse,
> vr1.operands))
> ao_ref_init (&r, op);
> vn_walk_cb_data data (&vr1, r.ref ? NULL_TREE : op,
> - last_vuse_ptr, kind, tbaa_p);
> - wvnresult =
> - (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p,
> - vn_reference_lookup_2,
> - vn_reference_lookup_3,
> - vuse_valueize, limit, &data);
> + last_vuse_ptr, kind, tbaa_p, mask != NULL_TREE);
> + if (mask)
> + {
> + wide_int w = wi::to_wide (mask);
> + unsigned int pos = 0, prec = w.get_precision ();
> + pd_data pd;
> + pd.rhs = build_constructor (NULL_TREE, NULL);
> + /* When bitwise and with a constant is done on a memory load,
> + we don't really need all the bits to be defined or defined
> + to constants, we don't really care what is in the position
> + corresponding to 0 bits in the mask.
> + So, push the ranges of those 0 bits in the mask as artificial
> + zero stores and let the partial def handling code do the
> + rest. */
> + while (pos < prec)
> + {
> + int tz = wi::ctz (w);
> + if (pos + tz > prec)
> + tz = prec - pos;
> + if (tz)
> + {
> + if (BYTES_BIG_ENDIAN)
> + pd.offset = prec - pos - tz;
> + else
> + pd.offset = pos;
> + pd.size = tz;
> + void *r = data.push_partial_def (pd, 0, prec);
> + if (r == (void *) -1)
> + return NULL_TREE;
> + gcc_assert (r == NULL_TREE);
> + }
> + pos += tz;
> + if (pos == prec)
> + break;
> + w = wi::lrshift (w, tz);
> + tz = wi::ctz (wi::bit_not (w));
> + if (pos + tz > prec)
> + tz = prec - pos;
> + pos += tz;
> + w = wi::lrshift (w, tz);
> + }
I'd do this in the vn_walk_cb_data CTOR instead - you pass mask !=
NULL_TREE anyway so you can as well pass mask.
> + gcc_assert (vnresult == NULL);
> + tree res
> + = ((tree)
Ick.
> + walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p,
> + vn_reference_lookup_2,
> + vn_reference_lookup_3, vuse_valueize,
> + limit, &data));
> + return res;
> + }
> +
> + wvnresult
> + = ((vn_reference_t)
> + walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p, vn_reference_lookup_2,
> + vn_reference_lookup_3, vuse_valueize, limit,
> + &data));
I wonder if we can instead make the above return NULL (finish
return (void *)-1) and do sth like
if (!wvnresult && mask)
return data.masked_result;
and thus avoid the type-"unsafe" return frobbing by storing the
result value in an extra member of the vn_walk_cb_data struct.
> gcc_checking_assert (vr1.operands == shared_lookup_references);
> if (wvnresult)
> {
> @@ -3457,6 +3516,8 @@ vn_reference_lookup (tree op, tree vuse,
>
> if (last_vuse_ptr)
> *last_vuse_ptr = vr1.vuse;
> + if (mask)
> + return NULL_TREE;
> return vn_reference_lookup_1 (&vr1, vnresult);
> }
>
> @@ -4565,6 +4626,8 @@ valueized_wider_op (tree wide_type, tree
> return NULL_TREE;
> }
>
> +static bool visit_reference_op_load (tree, tree, gimple *, tree);
> +
> /* Visit a nary operator RHS, value number it, and return true if the
> value number of LHS has changed as a result. */
>
> @@ -4664,7 +4727,35 @@ visit_nary_op (tree lhs, gassign *stmt)
> }
> }
> }
> - default:;
> + break;
> + case BIT_AND_EXPR:
> + if (INTEGRAL_TYPE_P (type)
> + && TREE_CODE (rhs1) == SSA_NAME
> + && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
> + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)
> + && default_vn_walk_kind != VN_NOWALK
> + && CHAR_BIT == 8
> + && BITS_PER_UNIT == 8
> + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
> + && !integer_all_onesp (gimple_assign_rhs2 (stmt))
> + && !integer_zerop (gimple_assign_rhs2 (stmt)))
> + {
> + gassign *ass = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (rhs1));
> + if (ass
> + && !gimple_has_volatile_ops (ass)
> + && vn_get_stmt_kind (ass) == VN_REFERENCE)
> + {
> + bool changed
> + = visit_reference_op_load (lhs, gimple_assign_rhs1 (ass),
> + ass, gimple_assign_rhs2 (stmt));
Any reason you piggy-back on visit_reference_op_load instead of using
vn_reference_lookup directly? I'd very much prefer that since it
doesn't even try to mess with the SSA lattice.
> + if (SSA_VAL (lhs) == lhs)
> + vn_nary_op_insert_stmt (stmt, lhs);
> + return changed;
> + }
> + }
> + break;
> + default:
> + break;
> }
>
> bool changed = set_ssa_val_to (lhs, lhs);
> @@ -4751,18 +4842,22 @@ visit_reference_op_call (tree lhs, gcall
> }
>
> /* Visit a load from a reference operator RHS, part of STMT, value number it,
> - and return true if the value number of the LHS has changed as a result.
> */
> + and return true if the value number of the LHS has changed as a result.
> + MASK is either NULL_TREE, or can be an INTEGER_CST if the result of the
> + load is bitwise anded with MASK and so we are only interested in a subset
> + of the bits and can ignore if the other bits are uninitialized or
> + not initialized with constants. */
>
> static bool
> -visit_reference_op_load (tree lhs, tree op, gimple *stmt)
> +visit_reference_op_load (tree lhs, tree op, gimple *stmt, tree mask)
> {
> bool changed = false;
> tree last_vuse;
> tree result;
>
> last_vuse = gimple_vuse (stmt);
> - result = vn_reference_lookup (op, gimple_vuse (stmt),
> - default_vn_walk_kind, NULL, true, &last_vuse);
> + result = vn_reference_lookup (op, gimple_vuse (stmt), default_vn_walk_kind,
> + NULL, true, &last_vuse, mask);
>
> /* We handle type-punning through unions by value-numbering based
> on offset and size of the access. Be prepared to handle a
> @@ -4788,7 +4883,8 @@ visit_reference_op_load (tree lhs, tree
> else
> {
> changed = set_ssa_val_to (lhs, lhs);
> - vn_reference_insert (op, lhs, last_vuse, NULL_TREE);
> + if (mask == NULL_TREE)
> + vn_reference_insert (op, lhs, last_vuse, NULL_TREE);
> }
>
> return changed;
> @@ -5175,14 +5271,15 @@ visit_stmt (gimple *stmt, bool backedges
> switch (vn_get_stmt_kind (ass))
> {
> case VN_NARY:
> - changed = visit_nary_op (lhs, ass);
> - break;
> + changed = visit_nary_op (lhs, ass);
> + break;
> case VN_REFERENCE:
> - changed = visit_reference_op_load (lhs, rhs1, ass);
> - break;
> + changed = visit_reference_op_load (lhs, rhs1, ass,
> + NULL_TREE);
> + break;
> default:
> - changed = defs_to_varying (ass);
> - break;
> + changed = defs_to_varying (ass);
> + break;
> }
> }
> }
> --- gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c.jj 2020-02-28
> 12:01:42.680404857 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c 2020-02-28
> 12:01:42.679404872 +0100
> @@ -0,0 +1,29 @@
> +/* PR tree-optimization/93582 */
> +/* { dg-do compile { target int32 } } */
> +/* { dg-options "-O2 -fdump-tree-fre1" } */
> +/* { dg-final { scan-tree-dump "return 72876566;" "fre1" { target le } } } */
> +/* { dg-final { scan-tree-dump "return 559957376;" "fre1" { target be } } }
> */
> +
> +union U {
> + struct S { int a : 12, b : 5, c : 10, d : 5; } s;
> + unsigned int i;
> +};
> +struct A { char a[12]; union U u; };
> +void bar (struct A *);
> +
> +unsigned
> +foo (void)
> +{
> + struct A a;
> + bar (&a);
> + a.u.s.a = 1590;
> + a.u.s.c = -404;
> +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
> +#define M 0x67e0a5f
> +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
> +#define M 0xa5f067e0
> +#else
> +#define M 0
> +#endif
> + return a.u.i & M;
> +}
> --- gcc/testsuite/gcc.dg/pr93582.c.jj 2020-02-28 12:01:42.679404872 +0100
> +++ gcc/testsuite/gcc.dg/pr93582.c 2020-02-28 12:01:42.679404872 +0100
> @@ -0,0 +1,57 @@
> +/* PR tree-optimization/93582 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -Warray-bounds" } */
> +
> +struct S {
> + unsigned int s1:1;
> + unsigned int s2:1;
> + unsigned int s3:1;
> + unsigned int s4:1;
> + unsigned int s5:4;
> + unsigned char s6;
> + unsigned short s7;
> + unsigned short s8;
> +};
> +struct T {
> + int t1;
> + int t2;
> +};
> +
> +static inline int
> +bar (struct S *x)
> +{
> + if (x->s4)
> + return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2; /* {
> dg-bogus "array subscript 1 is outside array bounds of" } */
> + else
> + return 0;
> +}
> +
> +int
> +foo (int x, int y)
> +{
> + struct S s;
> /* { dg-bogus "while referencing" } */
> + s.s6 = x;
> + s.s7 = y & 0x1FFF;
> + s.s4 = 0;
> + return bar (&s);
> +}
> +
> +static inline int
> +qux (struct S *x)
> +{
> + int s4 = x->s4;
> + if (s4)
> + return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2;
> + else
> + return 0;
> +}
> +
> +int
> +baz (int x, int y)
> +{
> + struct S s;
> + s.s6 = x;
> + s.s7 = y & 0x1FFF;
> + s.s4 = 0;
> + return qux (&s);
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr93582.c.jj 2020-02-28
> 12:27:51.280925113 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr93582.c 2020-02-28
> 12:26:17.272332573 +0100
> @@ -0,0 +1,22 @@
> +/* PR tree-optimization/93582 */
> +
> +short a;
> +int b, c;
> +
> +__attribute__((noipa)) void
> +foo (void)
> +{
> + b = c;
> + a &= 7;
> +}
> +
> +int
> +main ()
> +{
> + c = 27;
> + a = 14;
> + foo ();
> + if (b != 27 || a != 6)
> + __builtin_abort ();
> + return 0;
> +}
>
> Jakub
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)