On Fri, 28 Feb 2020, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned in the PR and discussed on IRC, the following patch is the
> patch that fixes the originally reported issue.
> We have there because of the premature bitfield comparison -> BIT_FIELD_REF
> optimization:
>   s$s4_19 = 0;
>   s.s4 = s$s4_19;
>   _10 = BIT_FIELD_REF <s, 8, 0>;
>   _13 = _10 & 8;
> and no other s fields are initialized.  If they would be all initialized with
> constants, then my earlier PR93582 bitfield handling patches would handle it
> already, but if at least one bit we ignore after the BIT_AND_EXPR masking
> is not initialized or is initialized earlier to non-constant, we aren't able
> to look through it until combine, which is too late for the warnings on the
> dead code.
> This patch handles BIT_AND_EXPR where the first operand is a SSA_NAME
> initialized with a memory load and second operand is INTEGER_CST, by trying
> a partial def lookup after pushing the ranges of 0 bits in the mask as
> artificial initializers.  In the above case on little-endian, we push
> offset 0 size 3 {} partial def and offset 4 size 4 (the result is unsigned
> char) and then perform normal partial def handling.
> My initial version of the patch failed miserably during bootstrap, because
> data->finish (...) called vn_reference_lookup_or_insert_for_pieces
> which I believe tried to remember the masked value rather than real for the
> reference, or for failed lookup visit_reference_op_load called
> vn_reference_insert.  The following version makes sure we aren't calling
> either of those functions in the masked case, as we don't know anything
> better about the reference from whatever has been discovered when the load
> stmt has been visited, the patch just calls vn_nary_op_insert_stmt on
> failure with the lhs (apparently calling it with the INTEGER_CST doesn't
> work).
> 
> Bootstrapped/regtested on powerpc64{,le}-linux, I've additionally
> gathered statistics of successful BIT_AND_EXPR optimizations (attached, first
> column is uniq -c count, second BITS_PER_WORD, then filename, function name,
> the mask on BIT_AND_EXPR and finally the value it returned).
> Ok for trunk if it also passes bootstrap/regtest on x86_64-linux and
> i686-linux?

Comments below.

> 2020-02-28  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/93582
>       * tree-ssa-sccvn.h (vn_reference_lookup): Add mask argument.
>       * tree-ssa-sccvn.c (struct vn_walk_cb_data): Add masked_p member,
>       initialize it in the constructor.
>       (vn_walk_cb_data::finish): If masked_p is true, return val instead
>       of calling vn_reference_lookup_or_insert_for_pieces.  Formatting fix.
>       (vn_reference_lookup_pieces): Adjust vn_walk_cb_data initialization.
>       Formatting fix.
>       (vn_reference_lookup): Add mask argument.  If non-NULL, don't call
>       fully_constant_vn_reference_p nor vn_reference_lookup_1 and
>       artificially push partial {} defs for the portions of the mask that
>       contains zeros.
>       (visit_nary_op): Handle BIT_AND_EXPR of a memory load and INTEGER_CST
>       mask.
>       (visit_reference_op_load): Add mask argument, pass it through
>       to vn_reference_lookup.  If non-NULL, don't call vn_reference_insert.
>       (visit_stmt): Adjust visit_reference_op_load caller.  Formatting fix.
> 
>       * gcc.dg/tree-ssa/pr93582-10.c: New test.
>       * gcc.dg/pr93582.c: New test.
>       * gcc.c-torture/execute/pr93582.c: New test.
> 
> --- gcc/tree-ssa-sccvn.h.jj   2020-02-28 11:56:39.506941888 +0100
> +++ gcc/tree-ssa-sccvn.h      2020-02-28 12:01:42.677404902 +0100
> @@ -256,7 +256,7 @@ tree vn_reference_lookup_pieces (tree, a
>                                vec<vn_reference_op_s> ,
>                                vn_reference_t *, vn_lookup_kind);
>  tree vn_reference_lookup (tree, tree, vn_lookup_kind, vn_reference_t *, bool,
> -                       tree * = NULL);
> +                       tree * = NULL, tree = NULL_TREE);
>  void vn_reference_lookup_call (gcall *, vn_reference_t *, vn_reference_t);
>  vn_reference_t vn_reference_insert_pieces (tree, alias_set_type, tree,
>                                          vec<vn_reference_op_s> ,
> --- gcc/tree-ssa-sccvn.c.jj   2020-02-28 11:56:39.506941888 +0100
> +++ gcc/tree-ssa-sccvn.c      2020-02-28 12:53:58.500459041 +0100
> @@ -1686,9 +1686,9 @@ struct pd_data
>  struct vn_walk_cb_data
>  {
>    vn_walk_cb_data (vn_reference_t vr_, tree orig_ref_, tree *last_vuse_ptr_,
> -                vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
> +                vn_lookup_kind vn_walk_kind_, bool tbaa_p_, bool masked_p_)
>      : vr (vr_), last_vuse_ptr (last_vuse_ptr_), last_vuse (NULL_TREE),
> -      vn_walk_kind (vn_walk_kind_), tbaa_p (tbaa_p_),
> +      vn_walk_kind (vn_walk_kind_), tbaa_p (tbaa_p_), masked_p (masked_p_),
>        saved_operands (vNULL), first_set (-2), known_ranges (NULL)
>     {
>       if (!last_vuse_ptr)
> @@ -1705,6 +1705,7 @@ struct vn_walk_cb_data
>    tree last_vuse;
>    vn_lookup_kind vn_walk_kind;
>    bool tbaa_p;
> +  bool masked_p;
>    vec<vn_reference_op_s> saved_operands;
>  
>    /* The VDEFs of partial defs we come along.  */
> @@ -1731,9 +1732,12 @@ vn_walk_cb_data::finish (alias_set_type
>  {
>    if (first_set != -2)
>      set = first_set;
> -  return vn_reference_lookup_or_insert_for_pieces
> -      (last_vuse, set, vr->type,
> -       saved_operands.exists () ? saved_operands : vr->operands, val);
> +  if (masked_p)
> +    return (void *) val;
> +  vec<vn_reference_op_s> &operands
> +    = saved_operands.exists () ? saved_operands : vr->operands;
> +  return vn_reference_lookup_or_insert_for_pieces (last_vuse, set, vr->type,
> +                                                operands, val);
>  }
>  
>  /* pd_range splay-tree helpers.  */
> @@ -3380,13 +3384,13 @@ vn_reference_lookup_pieces (tree vuse, a
>      {
>        ao_ref r;
>        unsigned limit = param_sccvn_max_alias_queries_per_access;
> -      vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true);
> +      vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true, false);
>        if (ao_ref_init_from_vn_reference (&r, set, type, vr1.operands))
> -     *vnresult =
> -       (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, true,
> -                                               vn_reference_lookup_2,
> -                                               vn_reference_lookup_3,
> -                                               vuse_valueize, limit, &data);
> +     *vnresult
> +       = ((vn_reference_t)
> +          walk_non_aliased_vuses (&r, vr1.vuse, true, vn_reference_lookup_2,
> +                                  vn_reference_lookup_3, vuse_valueize,
> +                                  limit, &data));
>        gcc_checking_assert (vr1.operands == shared_lookup_references);
>      }
>  
> @@ -3402,15 +3406,19 @@ vn_reference_lookup_pieces (tree vuse, a
>     was NULL..  VNRESULT will be filled in with the vn_reference_t
>     stored in the hashtable if one exists.  When TBAA_P is false assume
>     we are looking up a store and treat it as having alias-set zero.
> -   *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded.  
> */
> +   *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded.
> +   MASK is either NULL_TREE, or can be an INTEGER_CST if the result of the
> +   load is bitwise anded with MASK and so we are only interested in a subset
> +   of the bits and can ignore if the other bits are uninitialized or
> +   not initialized with constants.  */
>  
>  tree
>  vn_reference_lookup (tree op, tree vuse, vn_lookup_kind kind,
> -                  vn_reference_t *vnresult, bool tbaa_p, tree *last_vuse_ptr)
> +                  vn_reference_t *vnresult, bool tbaa_p,
> +                  tree *last_vuse_ptr, tree mask)
>  {
>    vec<vn_reference_op_s> operands;
>    struct vn_reference_s vr1;
> -  tree cst;
>    bool valuezied_anything;
>  
>    if (vnresult)
> @@ -3422,11 +3430,11 @@ vn_reference_lookup (tree op, tree vuse,
>    vr1.type = TREE_TYPE (op);
>    vr1.set = get_alias_set (op);
>    vr1.hashcode = vn_reference_compute_hash (&vr1);
> -  if ((cst = fully_constant_vn_reference_p (&vr1)))
> -    return cst;
> +  if (mask == NULL_TREE)
> +    if (tree cst = fully_constant_vn_reference_p (&vr1))
> +      return cst;
>  
> -  if (kind != VN_NOWALK
> -      && vr1.vuse)
> +  if (kind != VN_NOWALK && vr1.vuse)
>      {
>        vn_reference_t wvnresult;
>        ao_ref r;
> @@ -3438,12 +3446,63 @@ vn_reference_lookup (tree op, tree vuse,
>                                            vr1.operands))
>       ao_ref_init (&r, op);
>        vn_walk_cb_data data (&vr1, r.ref ? NULL_TREE : op,
> -                         last_vuse_ptr, kind, tbaa_p);
> -      wvnresult =
> -     (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p,
> -                                             vn_reference_lookup_2,
> -                                             vn_reference_lookup_3,
> -                                             vuse_valueize, limit, &data);
> +                         last_vuse_ptr, kind, tbaa_p, mask != NULL_TREE);
> +      if (mask)
> +     {
> +       wide_int w = wi::to_wide (mask);
> +       unsigned int pos = 0, prec = w.get_precision ();
> +       pd_data pd;
> +       pd.rhs = build_constructor (NULL_TREE, NULL);
> +       /* When bitwise and with a constant is done on a memory load,
> +          we don't really need all the bits to be defined or defined
> +          to constants, we don't really care what is in the position
> +          corresponding to 0 bits in the mask.
> +          So, push the ranges of those 0 bits in the mask as artificial
> +          zero stores and let the partial def handling code do the
> +          rest.  */
> +       while (pos < prec)
> +         {
> +           int tz = wi::ctz (w);
> +           if (pos + tz > prec)
> +             tz = prec - pos;
> +           if (tz)
> +             {
> +               if (BYTES_BIG_ENDIAN)
> +                 pd.offset = prec - pos - tz;
> +               else
> +                 pd.offset = pos;
> +               pd.size = tz;
> +               void *r = data.push_partial_def (pd, 0, prec);
> +               if (r == (void *) -1)
> +                 return NULL_TREE;
> +               gcc_assert (r == NULL_TREE);
> +             }
> +           pos += tz;
> +           if (pos == prec)
> +             break;
> +           w = wi::lrshift (w, tz);
> +           tz = wi::ctz (wi::bit_not (w));
> +           if (pos + tz > prec)
> +             tz = prec - pos;
> +           pos += tz;
> +           w = wi::lrshift (w, tz);
> +         }

I'd do this in the vn_walk_cb_data CTOR instead - you pass mask != 
NULL_TREE anyway so you can as well pass mask.

> +       gcc_assert (vnresult == NULL);
> +       tree res
> +         = ((tree)

Ick.

> +            walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p,
> +                                    vn_reference_lookup_2,
> +                                    vn_reference_lookup_3, vuse_valueize,
> +                                    limit, &data));
> +       return res;
> +     }
> +
> +      wvnresult
> +     = ((vn_reference_t)
> +        walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p, vn_reference_lookup_2,
> +                                vn_reference_lookup_3, vuse_valueize, limit,
> +                                &data));

I wonder if we can instead make the above return NULL (finish
return (void *)-1) and do sth like

         if (!wvnresult && mask)
           return data.masked_result;

and thus avoid the type-"unsafe" return frobbing by storing the
result value in an extra member of the vn_walk_cb_data struct.

>        gcc_checking_assert (vr1.operands == shared_lookup_references);
>        if (wvnresult)
>       {
> @@ -3457,6 +3516,8 @@ vn_reference_lookup (tree op, tree vuse,
>  
>    if (last_vuse_ptr)
>      *last_vuse_ptr = vr1.vuse;
> +  if (mask)
> +    return NULL_TREE;
>    return vn_reference_lookup_1 (&vr1, vnresult);
>  }
>  
> @@ -4565,6 +4626,8 @@ valueized_wider_op (tree wide_type, tree
>    return NULL_TREE;
>  }
>  
> +static bool visit_reference_op_load (tree, tree, gimple *, tree);
> +
>  /* Visit a nary operator RHS, value number it, and return true if the
>     value number of LHS has changed as a result.  */
>  
> @@ -4664,7 +4727,35 @@ visit_nary_op (tree lhs, gassign *stmt)
>               }
>           }
>       }
> -    default:;
> +      break;
> +    case BIT_AND_EXPR:
> +      if (INTEGRAL_TYPE_P (type)
> +       && TREE_CODE (rhs1) == SSA_NAME
> +       && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
> +       && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)
> +       && default_vn_walk_kind != VN_NOWALK
> +       && CHAR_BIT == 8
> +       && BITS_PER_UNIT == 8
> +       && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
> +       && !integer_all_onesp (gimple_assign_rhs2 (stmt))
> +       && !integer_zerop (gimple_assign_rhs2 (stmt)))
> +     {
> +       gassign *ass = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (rhs1));
> +       if (ass
> +           && !gimple_has_volatile_ops (ass)
> +           && vn_get_stmt_kind (ass) == VN_REFERENCE)
> +         {
> +           bool changed
> +             = visit_reference_op_load (lhs, gimple_assign_rhs1 (ass),
> +                                        ass, gimple_assign_rhs2 (stmt));

Any reason you piggy-back on visit_reference_op_load instead of using
vn_reference_lookup directly?  I'd very much prefer that since it
doesn't even try to mess with the SSA lattice.

> +           if (SSA_VAL (lhs) == lhs)
> +             vn_nary_op_insert_stmt (stmt, lhs);
> +           return changed;
> +         }
> +     }
> +      break;
> +    default:
> +      break;
>      }
>  
>    bool changed = set_ssa_val_to (lhs, lhs);
> @@ -4751,18 +4842,22 @@ visit_reference_op_call (tree lhs, gcall
>  }
>  
>  /* Visit a load from a reference operator RHS, part of STMT, value number it,
> -   and return true if the value number of the LHS has changed as a result.  
> */
> +   and return true if the value number of the LHS has changed as a result.
> +   MASK is either NULL_TREE, or can be an INTEGER_CST if the result of the
> +   load is bitwise anded with MASK and so we are only interested in a subset
> +   of the bits and can ignore if the other bits are uninitialized or
> +   not initialized with constants.  */
>  
>  static bool
> -visit_reference_op_load (tree lhs, tree op, gimple *stmt)
> +visit_reference_op_load (tree lhs, tree op, gimple *stmt, tree mask)
>  {
>    bool changed = false;
>    tree last_vuse;
>    tree result;
>  
>    last_vuse = gimple_vuse (stmt);
> -  result = vn_reference_lookup (op, gimple_vuse (stmt),
> -                             default_vn_walk_kind, NULL, true, &last_vuse);
> +  result = vn_reference_lookup (op, gimple_vuse (stmt), default_vn_walk_kind,
> +                             NULL, true, &last_vuse, mask);
>  
>    /* We handle type-punning through unions by value-numbering based
>       on offset and size of the access.  Be prepared to handle a
> @@ -4788,7 +4883,8 @@ visit_reference_op_load (tree lhs, tree
>    else
>      {
>        changed = set_ssa_val_to (lhs, lhs);
> -      vn_reference_insert (op, lhs, last_vuse, NULL_TREE);
> +      if (mask == NULL_TREE)
> +     vn_reference_insert (op, lhs, last_vuse, NULL_TREE);
>      }
>  
>    return changed;
> @@ -5175,14 +5271,15 @@ visit_stmt (gimple *stmt, bool backedges
>             switch (vn_get_stmt_kind (ass))
>               {
>               case VN_NARY:
> -             changed = visit_nary_op (lhs, ass);
> -             break;
> +               changed = visit_nary_op (lhs, ass);
> +               break;
>               case VN_REFERENCE:
> -             changed = visit_reference_op_load (lhs, rhs1, ass);
> -             break;
> +               changed = visit_reference_op_load (lhs, rhs1, ass,
> +                                                  NULL_TREE);
> +               break;
>               default:
> -             changed = defs_to_varying (ass);
> -             break;
> +               changed = defs_to_varying (ass);
> +               break;
>               }
>           }
>       }
> --- gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c.jj     2020-02-28 
> 12:01:42.680404857 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c        2020-02-28 
> 12:01:42.679404872 +0100
> @@ -0,0 +1,29 @@
> +/* PR tree-optimization/93582 */
> +/* { dg-do compile { target int32 } } */
> +/* { dg-options "-O2 -fdump-tree-fre1" } */
> +/* { dg-final { scan-tree-dump "return 72876566;" "fre1" { target le } } } */
> +/* { dg-final { scan-tree-dump "return 559957376;" "fre1" { target be } } } 
> */
> +
> +union U {
> +  struct S { int a : 12, b : 5, c : 10, d : 5; } s;
> +  unsigned int i;
> +};
> +struct A { char a[12]; union U u; };
> +void bar (struct A *);
> +
> +unsigned
> +foo (void)
> +{
> +  struct A a;
> +  bar (&a);
> +  a.u.s.a = 1590;
> +  a.u.s.c = -404;
> +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
> +#define M 0x67e0a5f
> +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
> +#define M 0xa5f067e0
> +#else
> +#define M 0
> +#endif
> +  return a.u.i & M;
> +}
> --- gcc/testsuite/gcc.dg/pr93582.c.jj 2020-02-28 12:01:42.679404872 +0100
> +++ gcc/testsuite/gcc.dg/pr93582.c    2020-02-28 12:01:42.679404872 +0100
> @@ -0,0 +1,57 @@
> +/* PR tree-optimization/93582 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -Warray-bounds" } */
> +
> +struct S {
> +  unsigned int s1:1;
> +  unsigned int s2:1;
> +  unsigned int s3:1;
> +  unsigned int s4:1;
> +  unsigned int s5:4;
> +  unsigned char s6;
> +  unsigned short s7;
> +  unsigned short s8;
> +};
> +struct T {
> +  int t1;
> +  int t2;
> +};
> +
> +static inline int
> +bar (struct S *x)
> +{
> +  if (x->s4)
> +    return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2;    /* { 
> dg-bogus "array subscript 1 is outside array bounds of" } */
> +  else
> +    return 0;
> +}
> +
> +int
> +foo (int x, int y)
> +{
> +  struct S s;                                                                
> /* { dg-bogus "while referencing" } */
> +  s.s6 = x;
> +  s.s7 = y & 0x1FFF;
> +  s.s4 = 0;
> +  return bar (&s);
> +}
> +
> +static inline int
> +qux (struct S *x)
> +{
> +  int s4 = x->s4;
> +  if (s4)
> +    return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2;
> +  else
> +    return 0;
> +}
> +
> +int
> +baz (int x, int y)
> +{
> +  struct S s;
> +  s.s6 = x;
> +  s.s7 = y & 0x1FFF;
> +  s.s4 = 0;
> +  return qux (&s);
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr93582.c.jj  2020-02-28 
> 12:27:51.280925113 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr93582.c     2020-02-28 
> 12:26:17.272332573 +0100
> @@ -0,0 +1,22 @@
> +/* PR tree-optimization/93582 */
> +
> +short a;
> +int b, c;
> +
> +__attribute__((noipa)) void
> +foo (void)
> +{
> +  b = c;
> +  a &= 7;
> +}
> +
> +int
> +main ()
> +{
> +  c = 27;
> +  a = 14;
> +  foo ();
> +  if (b != 27 || a != 6)
> +    __builtin_abort ();
> +  return 0;
> +}
> 
>       Jakub
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to