On 5 August 2016 at 18:06, Martin Jambor <mjam...@suse.cz> wrote:
> Hi,
Hi Martin,
Thanks for the review. Please find my responses inline.
>
> generally speaking, the ipa-cp.c and ipa-cp.[hc] bits look reasonable,
> but I have a few comments:
>
> On Thu, Aug 04, 2016 at 12:06:18PM +0530, Prathamesh Kulkarni wrote:
>> Hi,
>> This is a prototype patch for propagating known/unknown bits 
>> inter-procedurally.
>> for integral types which propagates info obtained from get_nonzero_bits ().
>>
>> Patch required making following changes:
>> a) To make info from get_nonzero_bits() available to ipa
>
> in IPA phase, you should not be looking at SSA_NAMEs, those will not
> be available with LTO when you do not have access to function bodies
> and thus also not to SSA_NAMES.  In IPA, you should only rely on hat
> you have in jump functions.
>
>> , I had to remove
>> guard !nonzero_p in ccp_finalize. However that triggered the following ICE
>> in get_ptr_info() for default_none.f95 (and several other fortran tests)
>> with options: -fopenacc -O2
>> ICE: http://pastebin.com/KjD7HMQi
>> I confirmed with Richard that this was a latent issue.
>>
>>
>> I suppose we would want to gate this on some flag, say -fipa-bit-cp ?
>
> Yes, definitely.
Added -fipa-cp-bit (mirroring -fipa-cp-alignment)
>
>> I haven't yet gated it on the flag, will do in next version of patch.
>> I have added some very simple test-cases, I will try to add more
>> meaningful ones.
>
>
>>
>> Patch passes bootstrap+test on x86_64-unknown-linux-gnu
>> and cross-tested on arm*-*-* and aarch64*-*-* with the exception
>> of some fortran tests failing due to above ICE.
>>
>> As next steps, I am planning to extend it to handle alignment propagation,
>> and do further testing (lto-bootstrap, chromium).
>> I would be grateful for feedback on the current patch.
>>
>> Thanks,
>> Prathamesh
>
>> diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
>> index 5b6cb9a..b770f6a 100644
>> --- a/gcc/ipa-cp.c
>> +++ b/gcc/ipa-cp.c
>> @@ -120,6 +120,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "params.h"
>>  #include "ipa-inline.h"
>>  #include "ipa-utils.h"
>> +#include "tree-ssa-ccp.h"
>>
>>  template <typename valtype> class ipcp_value;
>>
>> @@ -266,6 +267,40 @@ private:
>>    bool meet_with_1 (unsigned new_align, unsigned new_misalign);
>>  };
>>
>> +/* Lattice of known bits, only capable of holding one value.
>> +   Similar to ccp_prop_value_t, mask represents which bits of value are 
>> constant.
>> +   If a bit in mask is set to 0, then the corresponding bit in
>> +   value is known to be constant.  */
>> +
>> +class ipcp_bits_lattice
>> +{
>> +public:
>> +  bool bottom_p () { return lattice_val == IPA_BITS_VARYING; }
>> +  bool top_p () { return lattice_val == IPA_BITS_UNDEFINED; }
>> +  bool constant_p () { return lattice_val == IPA_BITS_CONSTANT; }
>> +  bool set_to_bottom ();
>> +  bool set_to_constant (widest_int, widest_int, signop, unsigned);
>> +
>> +  widest_int get_value () { return value; }
>> +  widest_int get_mask () { return mask; }
>> +  signop get_sign () { return sgn; }
>> +  unsigned get_precision () { return precision; }
>> +
>> +  bool meet_with (ipcp_bits_lattice& other, enum tree_code, tree);
>> +  bool meet_with (widest_int, widest_int, signop, unsigned);
>> +
>> +  void print (FILE *);
>> +
>> +private:
>> +  enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } 
>> lattice_val;
>> +  widest_int value, mask;
>> +  signop sgn;
>> +  unsigned precision;
>> +
>> +  bool meet_with_1 (widest_int, widest_int);
>> +  void get_value_and_mask (tree, widest_int *, widest_int *);
>> +};
>> +
>>  /* Structure containing lattices for a parameter itself and for pieces of
>>     aggregates that are passed in the parameter or by a reference in a 
>> parameter
>>     plus some other useful flags.  */
>> @@ -281,6 +316,8 @@ public:
>>    ipcp_agg_lattice *aggs;
>>    /* Lattice describing known alignment.  */
>>    ipcp_alignment_lattice alignment;
>> +  /* Lattice describing known bits.  */
>> +  ipcp_bits_lattice bits_lattice;
>>    /* Number of aggregate lattices */
>>    int aggs_count;
>>    /* True if aggregate data were passed by reference (as opposed to by
>> @@ -458,6 +495,21 @@ ipcp_alignment_lattice::print (FILE * f)
>>      fprintf (f, "         Alignment %u, misalignment %u\n", align, 
>> misalign);
>>  }
>>
>> +void
>> +ipcp_bits_lattice::print (FILE *f)
>> +{
>> +  if (top_p ())
>> +    fprintf (f, "         Bits unknown (TOP)\n");
>> +  else if (bottom_p ())
>> +    fprintf (f, "         Bits unusable (BOTTOM)\n");
>> +  else
>> +    {
>> +      fprintf (f, "         Bits: value = "); print_hex (get_value (), f);
>> +      fprintf (f, ", mask = "); print_hex (get_mask (), f);
>> +      fprintf (f, "\n");
>> +    }
>> +}
>> +
>>  /* Print all ipcp_lattices of all functions to F.  */
>>
>>  static void
>> @@ -484,6 +536,7 @@ print_all_lattices (FILE * f, bool dump_sources, bool 
>> dump_benefits)
>>         fprintf (f, "         ctxs: ");
>>         plats->ctxlat.print (f, dump_sources, dump_benefits);
>>         plats->alignment.print (f);
>> +       plats->bits_lattice.print (f);
>>         if (plats->virt_call)
>>           fprintf (f, "        virt_call flag set\n");
>>
>> @@ -911,6 +964,161 @@ ipcp_alignment_lattice::meet_with (const 
>> ipcp_alignment_lattice &other,
>>    return meet_with_1 (other.align, adjusted_misalign);
>>  }
>>
>> +/* Set lattice value to bottom, if it already isn't the case.  */
>> +
>> +bool
>> +ipcp_bits_lattice::set_to_bottom ()
>> +{
>> +  if (bottom_p ())
>> +    return false;
>> +  lattice_val = IPA_BITS_VARYING;
>> +  value = 0;
>> +  mask = -1;
>> +  return true;
>> +}
>> +
>> +/* Set to constant if it isn't already. Only meant to be called
>> +   when switching state from TOP.  */
>> +
>> +bool
>> +ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask,
>> +                                 signop sgn, unsigned precision)
>> +{
>> +  gcc_assert (top_p ());
>> +  this->lattice_val = IPA_BITS_CONSTANT;
>> +  this->value = value;
>> +  this->mask = mask;
>> +  this->sgn = sgn;
>> +  this->precision = precision;
>> +  return true;
>> +}
>> +
>> +/* Convert operand to value, mask form.  */
>> +
>> +void
>> +ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, 
>> widest_int *maskp)
>> +{
>> +  wide_int get_nonzero_bits (const_tree);
>> +
>> +  if (TREE_CODE (operand) == INTEGER_CST)
>> +    {
>> +      *valuep = wi::to_widest (operand);
>> +      *maskp = 0;
>> +    }
>> +  else if (TREE_CODE (operand) == SSA_NAME)
>
> IIUC, operand is the operand from pass-through jump function and that
> should never be an SSA_NAME.  I have even looked at how we generate
> them and it seems fairly safe to say that they never are.  If you have
> seen an SSA_NAME here, it is a bug and please let me know because
> sooner or later it will cause an assert.
>
>> +    {
>> +      *valuep = 0;
>> +      *maskp = widest_int::from (get_nonzero_bits (operand), UNSIGNED);
>> +    }
>> +  else
>> +    gcc_unreachable ();
>
> The operand however can be any any other is_gimple_ip_invariant tree.
> I assume that you could hit this gcc_unreachable only in a program
> with undefined behavior (or with a Fortran CONST_DECL?) but you should
> not ICE here.
Changed to:
if (TREE_CODE (operand) == INTEGER_CST)
    {
      *valuep = wi::to_widest (operand);
      *maskp = 0;
    }
  else
    {
      *valuep = 0;
      *maskp = -1;
    }

I am not sure how to extract nonzero bits for gimple_ip_invariant if
it's not INTEGER_CST,
so setting to unknown (value = 0, mask = -1).
Does this look OK ?
>
>
>> +}
>> +
>> +/* Meet operation, similar to ccp_lattice_meet, we xor values
>> +   if this->value, value have different values at same bit positions, we 
>> want
>> +   to drop that bit to varying. Return true if mask is changed.
>> +   This function assumes that the lattice value is in CONSTANT state  */
>> +
>> +bool
>> +ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask)
>> +{
>> +  gcc_assert (constant_p ());
>> +
>> +  widest_int old_mask = this->mask;
>> +  this->mask = (this->mask | mask) | (this->value ^ value);
>> +
>> +  if (wi::sext (this->mask, this->precision) == -1)
>> +    return set_to_bottom ();
>> +
>> +  bool changed = this->mask != old_mask;
>> +  return changed;
>> +}
>> +
>> +/* Meet the bits lattice with operand
>> +   described by <value, mask, sgn, precision.  */
>> +
>> +bool
>> +ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
>> +                           signop sgn, unsigned precision)
>> +{
>> +  if (bottom_p ())
>> +    return false;
>> +
>> +  if (top_p ())
>> +    {
>> +      if (wi::sext (mask, precision) == -1)
>> +     return set_to_bottom ();
>> +      return set_to_constant (value, mask, sgn, precision);
>> +    }
>> +
>> +  return meet_with_1 (value, mask);
>
> What if precisions do not match?
Sorry I don't understand. Since we extend to widest_int, precision
would be same ?
bit_value_binop_1 requires original precision for few cases (shifts,
rotates, plus, mult), so
I was preserving the original precision in jump function.
Later in ipcp_update_bits(), the mask is set after narrowing to the
precision of the parameter.
>
>> +}
>> +
>> +/* Meet bits lattice with the result of bit_value_binop_1 (other, operand)
>> +   if code is binary operation or bit_value_unop_1 (other) if code is unary 
>> op.
>> +   In the case when code is nop_expr, no adjustment is required. */
>> +
>> +bool
>> +ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, enum tree_code 
>> code, tree operand)
>> +{
>> +  if (other.bottom_p ())
>> +    return set_to_bottom ();
>> +
>> +  if (bottom_p () || other.top_p ())
>> +    return false;
>> +
>> +  widest_int adjusted_value, adjusted_mask;
>> +
>> +  if (TREE_CODE_CLASS (code) == tcc_binary)
>> +    {
>> +      tree type = TREE_TYPE (operand);
>> +      gcc_assert (INTEGRAL_TYPE_P (type));
>> +      widest_int o_value, o_mask;
>> +      get_value_and_mask (operand, &o_value, &o_mask);
>> +
>> +      signop sgn = other.get_sign ();
>> +      unsigned prec = other.get_precision ();
>> +
>> +      bit_value_binop_1 (code, sgn, prec, &adjusted_value, &adjusted_mask,
>> +                      sgn, prec, other.get_value (), other.get_mask (),
>> +                      TYPE_SIGN (type), TYPE_PRECISION (type), o_value, 
>> o_mask);
>
> It is probably just me not being particularly sharp on a Friday
> afternoon and I might not understand the semantics of mask well (also,
> you did not document it :-), but... assume that we are looking at a
> binary and operation, other comes from an SSA pointer and its mask
> would be binary 100 and its value 0 because that's what you set for
> ssa names in ipa-prop.h, and the operand is binary value 101, which
> means that get_value_and_mask returns mask 0 and value 101.  Now,
> bit_value_binop_1 would return value 0 & 101 = 0 and mask according to
>
> (m1 | m2) & ((v1 | m1) & (v2 | m2))
>
> so in our case
>
> (100b & 0) & ((0 | 100b) & (101b | 0)) = 0 & 100b = 0.
Shouldn't this be:
(100b | 0) & ((0 | 100b) & (101b | 0)) = 100 & 100 = 100 -;)
>
> So both value and mask would be zero, which, if this was the only
> incoming value to the lattice, I am afraid you would later on in
> ipcp_update_bits interpret as that no bits can be non-zero.  Yet the
> third bit clearly could.
>
> I think that this is the only place where you interpret mask as a mask
> and actually use it for and-ing, everywhere else you interpret it
> basically only as the result of get_nonzero_bits and use it for
> or-ing.
>
>> +
>> +      if (wi::sext (adjusted_mask, prec) == -1)
>> +     return set_to_bottom ();
>> +    }
>> +
>> +  else if (TREE_CODE_CLASS (code) == tcc_unary)
>> +    {
>> +      signop sgn = other.get_sign ();
>> +      unsigned prec = other.get_precision ();
>> +
>> +      bit_value_unop_1 (code, sgn, prec, &adjusted_value,
>> +                     &adjusted_mask, sgn, prec, other.get_value (),
>> +                     other.get_mask ());
>> +
>> +      if (wi::sext (adjusted_mask, prec) == -1)
>> +     return set_to_bottom ();
>> +    }
>> +
>> +  else if (code == NOP_EXPR)
>> +    {
>> +      adjusted_value = other.value;
>> +      adjusted_mask = other.mask;
>> +    }
>> +
>> +  else
>> +    return set_to_bottom ();
>> +
>> +  if (top_p ())
>> +    {
>> +      if (wi::sext (adjusted_mask, other.get_precision ()) == -1)
>> +     return set_to_bottom ();
>> +      return set_to_constant (adjusted_value, adjusted_mask, other.get_sign 
>> (), other.get_precision ());
>> +    }
>> +  else
>> +    return meet_with_1 (adjusted_value, adjusted_mask);
>
> Again, What if precisions do not match?
>
>> +}
>> +
>>  /* Mark bot aggregate and scalar lattices as containing an unknown variable,
>>     return true is any of them has not been marked as such so far.  */
>>
>> @@ -922,6 +1130,7 @@ set_all_contains_variable (struct ipcp_param_lattices 
>> *plats)
>>    ret |= plats->ctxlat.set_contains_variable ();
>>    ret |= set_agg_lats_contain_variable (plats);
>>    ret |= plats->alignment.set_to_bottom ();
>> +  ret |= plats->bits_lattice.set_to_bottom ();
>>    return ret;
>>  }
>>
>> @@ -1003,6 +1212,7 @@ initialize_node_lattices (struct cgraph_node *node)
>>             plats->ctxlat.set_to_bottom ();
>>             set_agg_lats_to_bottom (plats);
>>             plats->alignment.set_to_bottom ();
>> +           plats->bits_lattice.set_to_bottom ();
>>           }
>>         else
>>           set_all_contains_variable (plats);
>> @@ -1621,6 +1831,57 @@ propagate_alignment_accross_jump_function 
>> (cgraph_edge *cs,
>>      }
>>  }
>>
>> +/* Propagate bits across jfunc that is associated with
>> +   edge cs and update dest_lattice accordingly.  */
>> +
>> +bool
>> +propagate_bits_accross_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
>> +                                   ipcp_bits_lattice *dest_lattice)
>> +{
>> +  if (dest_lattice->bottom_p ())
>> +    return false;
>> +
>> +  if (jfunc->type == IPA_JF_PASS_THROUGH)
>> +    {
>> +      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
>> +      enum tree_code code = ipa_get_jf_pass_through_operation (jfunc);
>> +      tree operand = NULL_TREE;
>> +
>> +      if (code != NOP_EXPR)
>> +     operand = ipa_get_jf_pass_through_operand (jfunc);
>> +
>> +      int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
>> +      struct ipcp_param_lattices *src_lats
>> +     = ipa_get_parm_lattices (caller_info, src_idx);
>> +
>> +      /* Try to proapgate bits if src_lattice is bottom, but jfunc is known.
>
> propagate
oops, corrected in attached version.
>
>> +      for eg consider:
>> +      int f(int x)
>> +      {
>> +        g (x & 0xff);
>> +      }
>> +      Assume lattice for x is bottom, however we can still propagate
>> +      result of x & 0xff == 0xff, which gets computed during ccp1 pass
>> +      and we store it in jump function during analysis stage.  */
>> +
>> +      if (src_lats->bits_lattice.bottom_p ()
>> +       && jfunc->bits.known)
>> +     return dest_lattice->meet_with (jfunc->bits.value, jfunc->bits.mask,
>> +                                     jfunc->bits.sgn, 
>> jfunc->bits.precision);
>> +      else
>> +     return dest_lattice->meet_with (src_lats->bits_lattice, code, operand);
>> +    }
>> +
>> +  else if (jfunc->type == IPA_JF_ANCESTOR)
>> +    return dest_lattice->set_to_bottom ();
>> +
>> +  else if (jfunc->bits.known)
>> +    return dest_lattice->meet_with (jfunc->bits.value, jfunc->bits.mask,
>> +                                 jfunc->bits.sgn, jfunc->bits.precision);
>> +  else
>> +    return dest_lattice->set_to_bottom ();
>> +}
>> +
>
> ...
>
>> diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h
>> index e32d078..d69a071 100644
>> --- a/gcc/ipa-prop.h
>> +++ b/gcc/ipa-prop.h
>> @@ -154,6 +154,16 @@ struct GTY(()) ipa_alignment
>>    unsigned misalign;
>>  };
>>
>> +/* Information about zero/non-zero bits.  */
>> +struct GTY(()) ipa_bits
>> +{
>> +  bool known;
>> +  widest_int value;
>> +  widest_int mask;
>> +  enum signop sgn;
>> +  unsigned precision;
>> +};
>
> Please order the fields according to their size, the largest first and
> add a comment describing each one of them.  As I explained above, it
> is not immediately clear why the mask would be a mask, for example.
Reordered the fields.
>
> Nevertheless, thanks for looking into this.  It would be nice to have
> this for pointers too, not least because we could represent
> non-NULL-ness this way, which could be very interesting for cloning
> and inlining.  But those are further steps, once normal propagation
> works.
Does this version look OK ?

Thanks,
Prathamesh
>
> Martin
diff --git a/gcc/common.opt b/gcc/common.opt
index 8a292ed..8bac0a2 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1561,6 +1561,10 @@ fipa-cp-alignment
 Common Report Var(flag_ipa_cp_alignment) Optimization
 Perform alignment discovery and propagation to make Interprocedural constant 
propagation stronger.
 
+fipa-cp-bit
+Common Report Var(flag_ipa_cp_bit) Optimization
+Perform interprocedural bitwise constant propagation.
+
 fipa-profile
 Common Report Var(flag_ipa_profile) Init(0) Optimization
 Perform interprocedural profile propagation.
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 5b6cb9a..a26a1a6 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -120,6 +120,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "ipa-inline.h"
 #include "ipa-utils.h"
+#include "tree-ssa-ccp.h"
 
 template <typename valtype> class ipcp_value;
 
@@ -266,6 +267,40 @@ private:
   bool meet_with_1 (unsigned new_align, unsigned new_misalign);
 };
 
+/* Lattice of known bits, only capable of holding one value.
+   Similar to ccp_prop_value_t, mask represents which bits of value are 
constant.
+   If a bit in mask is set to 0, then the corresponding bit in
+   value is known to be constant.  */
+
+class ipcp_bits_lattice
+{
+public:
+  bool bottom_p () { return lattice_val == IPA_BITS_VARYING; }
+  bool top_p () { return lattice_val == IPA_BITS_UNDEFINED; }
+  bool constant_p () { return lattice_val == IPA_BITS_CONSTANT; }
+  bool set_to_bottom ();
+  bool set_to_constant (widest_int, widest_int, signop, unsigned);
+ 
+  widest_int get_value () { return value; }
+  widest_int get_mask () { return mask; }
+  signop get_sign () { return sgn; }
+  unsigned get_precision () { return precision; }
+
+  bool meet_with (ipcp_bits_lattice& other, enum tree_code, tree);
+  bool meet_with (widest_int, widest_int, signop, unsigned);
+
+  void print (FILE *);
+
+private:
+  enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } lattice_val;
+  widest_int value, mask;
+  signop sgn;
+  unsigned precision;
+
+  bool meet_with_1 (widest_int, widest_int); 
+  void get_value_and_mask (tree, widest_int *, widest_int *);
+}; 
+
 /* Structure containing lattices for a parameter itself and for pieces of
    aggregates that are passed in the parameter or by a reference in a parameter
    plus some other useful flags.  */
@@ -281,6 +316,8 @@ public:
   ipcp_agg_lattice *aggs;
   /* Lattice describing known alignment.  */
   ipcp_alignment_lattice alignment;
+  /* Lattice describing known bits.  */
+  ipcp_bits_lattice bits_lattice;
   /* Number of aggregate lattices */
   int aggs_count;
   /* True if aggregate data were passed by reference (as opposed to by
@@ -458,6 +495,21 @@ ipcp_alignment_lattice::print (FILE * f)
     fprintf (f, "         Alignment %u, misalignment %u\n", align, misalign);
 }
 
+void
+ipcp_bits_lattice::print (FILE *f)
+{
+  if (top_p ())
+    fprintf (f, "         Bits unknown (TOP)\n");
+  else if (bottom_p ())
+    fprintf (f, "         Bits unusable (BOTTOM)\n");
+  else
+    {
+      fprintf (f, "         Bits: value = "); print_hex (get_value (), f);
+      fprintf (f, ", mask = "); print_hex (get_mask (), f);
+      fprintf (f, "\n");
+    }
+}
+
 /* Print all ipcp_lattices of all functions to F.  */
 
 static void
@@ -484,6 +536,7 @@ print_all_lattices (FILE * f, bool dump_sources, bool 
dump_benefits)
          fprintf (f, "         ctxs: ");
          plats->ctxlat.print (f, dump_sources, dump_benefits);
          plats->alignment.print (f);
+         plats->bits_lattice.print (f);
          if (plats->virt_call)
            fprintf (f, "        virt_call flag set\n");
 
@@ -911,6 +964,159 @@ ipcp_alignment_lattice::meet_with (const 
ipcp_alignment_lattice &other,
   return meet_with_1 (other.align, adjusted_misalign);
 }
 
+/* Set lattice value to bottom, if it already isn't the case.  */
+
+bool
+ipcp_bits_lattice::set_to_bottom ()
+{
+  if (bottom_p ())
+    return false;
+  lattice_val = IPA_BITS_VARYING;
+  value = 0;
+  mask = -1;
+  return true;
+}
+
+/* Set to constant if it isn't already. Only meant to be called
+   when switching state from TOP.  */
+
+bool
+ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask,
+                                   signop sgn, unsigned precision)
+{
+  gcc_assert (top_p ());
+  this->lattice_val = IPA_BITS_CONSTANT;
+  this->value = value;
+  this->mask = mask;
+  this->sgn = sgn;
+  this->precision = precision;
+  return true;
+}
+
+/* Convert operand to value, mask form.  */
+
+void
+ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, 
widest_int *maskp)
+{
+  wide_int get_nonzero_bits (const_tree);
+
+  if (TREE_CODE (operand) == INTEGER_CST)
+    {
+      *valuep = wi::to_widest (operand); 
+      *maskp = 0;
+    }
+  else
+    {
+      *valuep = 0;
+      *maskp = -1;
+    }
+}
+
+/* Meet operation, similar to ccp_lattice_meet, we xor values
+   if this->value, value have different values at same bit positions, we want
+   to drop that bit to varying. Return true if mask is changed.
+   This function assumes that the lattice value is in CONSTANT state  */
+
+bool
+ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask)
+{
+  gcc_assert (constant_p ());
+  
+  widest_int old_mask = this->mask;
+  this->mask = (this->mask | mask) | (this->value ^ value);
+
+  if (wi::sext (this->mask, this->precision) == -1)
+    return set_to_bottom ();
+
+  bool changed = this->mask != old_mask;
+  return changed;
+}
+
+/* Meet the bits lattice with operand
+   described by <value, mask, sgn, precision.  */
+
+bool
+ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
+                             signop sgn, unsigned precision)
+{
+  if (bottom_p ())
+    return false;
+
+  if (top_p ())
+    {
+      if (wi::sext (mask, precision) == -1)
+       return set_to_bottom ();
+      return set_to_constant (value, mask, sgn, precision);
+    }
+
+  return meet_with_1 (value, mask);
+}
+
+/* Meet bits lattice with the result of bit_value_binop (other, operand)
+   if code is binary operation or bit_value_unop (other) if code is unary op.
+   In the case when code is nop_expr, no adjustment is required. */
+
+bool
+ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, enum tree_code code, 
tree operand)
+{
+  if (other.bottom_p ())
+    return set_to_bottom ();
+
+  if (bottom_p () || other.top_p ())
+    return false;
+
+  widest_int adjusted_value, adjusted_mask;
+
+  if (TREE_CODE_CLASS (code) == tcc_binary)
+    {
+      tree type = TREE_TYPE (operand);
+      gcc_assert (INTEGRAL_TYPE_P (type));
+      widest_int o_value, o_mask;
+      get_value_and_mask (operand, &o_value, &o_mask);
+
+      signop sgn = other.get_sign ();
+      unsigned prec = other.get_precision ();
+
+      bit_value_binop (code, sgn, prec, &adjusted_value, &adjusted_mask,
+                      sgn, prec, other.get_value (), other.get_mask (),
+                      TYPE_SIGN (type), TYPE_PRECISION (type), o_value, 
o_mask);
+
+      if (wi::sext (adjusted_mask, prec) == -1)
+       return set_to_bottom ();
+    }
+
+  else if (TREE_CODE_CLASS (code) == tcc_unary)
+    {
+      signop sgn = other.get_sign ();
+      unsigned prec = other.get_precision ();
+
+      bit_value_unop (code, sgn, prec, &adjusted_value,
+                     &adjusted_mask, sgn, prec, other.get_value (),
+                     other.get_mask ());
+
+      if (wi::sext (adjusted_mask, prec) == -1)
+       return set_to_bottom ();
+    }
+
+  else if (code == NOP_EXPR)
+    {
+      adjusted_value = other.value;
+      adjusted_mask = other.mask;
+    }
+
+  else
+    return set_to_bottom ();
+
+  if (top_p ())
+    {
+      if (wi::sext (adjusted_mask, other.get_precision ()) == -1)
+       return set_to_bottom ();
+      return set_to_constant (adjusted_value, adjusted_mask, other.get_sign 
(), other.get_precision ());
+    }
+  else
+    return meet_with_1 (adjusted_value, adjusted_mask);
+}
+
 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
    return true is any of them has not been marked as such so far.  */
 
@@ -922,6 +1128,7 @@ set_all_contains_variable (struct ipcp_param_lattices 
*plats)
   ret |= plats->ctxlat.set_contains_variable ();
   ret |= set_agg_lats_contain_variable (plats);
   ret |= plats->alignment.set_to_bottom ();
+  ret |= plats->bits_lattice.set_to_bottom ();
   return ret;
 }
 
@@ -1003,6 +1210,7 @@ initialize_node_lattices (struct cgraph_node *node)
              plats->ctxlat.set_to_bottom ();
              set_agg_lats_to_bottom (plats);
              plats->alignment.set_to_bottom ();
+             plats->bits_lattice.set_to_bottom ();
            }
          else
            set_all_contains_variable (plats);
@@ -1621,6 +1829,57 @@ propagate_alignment_accross_jump_function (cgraph_edge 
*cs,
     }
 }
 
+/* Propagate bits across jfunc that is associated with
+   edge cs and update dest_lattice accordingly.  */
+
+bool
+propagate_bits_accross_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
+                                     ipcp_bits_lattice *dest_lattice)
+{
+  if (dest_lattice->bottom_p ())
+    return false;
+
+  if (jfunc->type == IPA_JF_PASS_THROUGH)
+    {
+      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+      enum tree_code code = ipa_get_jf_pass_through_operation (jfunc);
+      tree operand = NULL_TREE;
+
+      if (code != NOP_EXPR)
+       operand = ipa_get_jf_pass_through_operand (jfunc);
+
+      int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
+      struct ipcp_param_lattices *src_lats
+       = ipa_get_parm_lattices (caller_info, src_idx);
+
+      /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
+        for eg consider:
+        int f(int x)
+        {
+          g (x & 0xff);
+        }
+        Assume lattice for x is bottom, however we can still propagate
+        result of x & 0xff == 0xff, which gets computed during ccp1 pass
+        and we store it in jump function during analysis stage.  */
+
+      if (src_lats->bits_lattice.bottom_p ()
+         && jfunc->bits.known)
+       return dest_lattice->meet_with (jfunc->bits.value, jfunc->bits.mask,
+                                       jfunc->bits.sgn, jfunc->bits.precision);
+      else
+       return dest_lattice->meet_with (src_lats->bits_lattice, code, operand);
+    }
+
+  else if (jfunc->type == IPA_JF_ANCESTOR)
+    return dest_lattice->set_to_bottom ();
+
+  else if (jfunc->bits.known) 
+    return dest_lattice->meet_with (jfunc->bits.value, jfunc->bits.mask,
+                                   jfunc->bits.sgn, jfunc->bits.precision);
+  else
+    return dest_lattice->set_to_bottom ();
+}
+
 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
    NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
    other cases, return false).  If there are no aggregate items, set
@@ -1968,6 +2227,8 @@ propagate_constants_accross_call (struct cgraph_edge *cs)
                                                          &dest_plats->ctxlat);
          ret |= propagate_alignment_accross_jump_function (cs, jump_func,
                                                         
&dest_plats->alignment);
+         ret |= propagate_bits_accross_jump_function (cs, jump_func,
+                                                      
&dest_plats->bits_lattice);
          ret |= propagate_aggs_accross_jump_function (cs, jump_func,
                                                       dest_plats);
        }
@@ -4592,6 +4853,83 @@ ipcp_store_alignment_results (void)
   }
 }
 
+/* Look up all the bits information that we have discovered and copy it over
+   to the transformation summary.  */
+
+static void
+ipcp_store_bits_results (void)
+{
+  cgraph_node *node;
+
+  FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
+    {
+      ipa_node_params *info = IPA_NODE_REF (node);
+      bool dumped_sth = false;
+      bool found_useful_result = false;
+
+      if (!opt_for_fn (node->decl, flag_ipa_cp_bit))
+       {
+         if (dump_file)
+           fprintf (dump_file, "Not considering %s for ipa bitwise propagation 
"
+                               "; -fipa-cp-bit: disabled.\n",
+                               node->name ());
+         continue;
+       }
+
+      if (info->ipcp_orig_node)
+       info = IPA_NODE_REF (info->ipcp_orig_node);
+
+      unsigned count = ipa_get_param_count (info);
+      for (unsigned i = 0; i < count; i++)
+       {
+         ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+         if (plats->bits_lattice.constant_p ())
+           {
+             found_useful_result = true;
+             break;
+           }
+       }
+
+    if (!found_useful_result)
+      continue;
+
+    ipcp_grow_transformations_if_necessary ();
+    ipcp_transformation_summary *ts = ipcp_get_transformation_summary (node);
+    vec_safe_reserve_exact (ts->bits, count);
+
+    for (unsigned i = 0; i < count; i++)
+      {
+       ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+       ipa_bits bits_jfunc;                     
+
+       if (plats->bits_lattice.constant_p ())
+         {
+           bits_jfunc.known = true;
+           bits_jfunc.value = plats->bits_lattice.get_value ();
+           bits_jfunc.mask = plats->bits_lattice.get_mask ();
+           bits_jfunc.sgn = plats->bits_lattice.get_sign ();
+           bits_jfunc.precision = plats->bits_lattice.get_precision ();
+         }
+       else
+         bits_jfunc.known = false;
+
+       ts->bits->quick_push (bits_jfunc);
+       if (!dump_file || !bits_jfunc.known)
+         continue;
+       if (!dumped_sth)
+         {
+           fprintf (dump_file, "Propagated bits info for function %s/%i:\n",
+                               node->name (), node->order);
+           dumped_sth = true;
+         }
+       fprintf (dump_file, " param %i: value = ", i);
+       print_hex (bits_jfunc.value, dump_file);
+       fprintf (dump_file, ", mask = ");
+       print_hex (bits_jfunc.mask, dump_file);
+       fprintf (dump_file, "\n");
+      }
+    }
+}
 /* The IPCP driver.  */
 
 static unsigned int
@@ -4625,6 +4963,8 @@ ipcp_driver (void)
   ipcp_decision_stage (&topo);
   /* Store results of alignment propagation. */
   ipcp_store_alignment_results ();
+  /* Store results of bits propagation.  */
+  ipcp_store_bits_results ();
 
   /* Free all IPCP structures.  */
   free_toporder_info (&topo);
diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
index 132b622..46955bb 100644
--- a/gcc/ipa-prop.c
+++ b/gcc/ipa-prop.c
@@ -302,6 +302,15 @@ ipa_print_node_jump_functions_for_edge (FILE *f, struct 
cgraph_edge *cs)
        }
       else
        fprintf (f, "         Unknown alignment\n");
+
+      if (jump_func->bits.known)
+       {
+         fprintf (f, "         value: "); print_hex (jump_func->bits.value, f);
+         fprintf (f, ", mask: "); print_hex (jump_func->bits.mask, f);
+         fprintf (f, "\n");
+       }
+      else
+       fprintf (f, "         Unknown bits\n");
     }
 }
 
@@ -381,6 +390,7 @@ ipa_set_jf_unknown (struct ipa_jump_func *jfunc)
 {
   jfunc->type = IPA_JF_UNKNOWN;
   jfunc->alignment.known = false;
+  jfunc->bits.known = false;
 }
 
 /* Set JFUNC to be a copy of another jmp (to be used by jump function
@@ -1674,6 +1684,27 @@ ipa_compute_jump_functions_for_edge (struct 
ipa_func_body_info *fbi,
       else
        gcc_assert (!jfunc->alignment.known);
 
+      if (INTEGRAL_TYPE_P (TREE_TYPE (arg))
+         && (TREE_CODE (arg) == SSA_NAME || TREE_CODE (arg) == INTEGER_CST))
+       {
+         jfunc->bits.known = true;
+         jfunc->bits.sgn = TYPE_SIGN (TREE_TYPE (arg));
+         jfunc->bits.precision = TYPE_PRECISION (TREE_TYPE (arg));
+         
+         if (TREE_CODE (arg) == SSA_NAME)
+           {
+             jfunc->bits.value = 0;
+             jfunc->bits.mask = widest_int::from (get_nonzero_bits (arg), 
UNSIGNED); 
+           }
+         else
+           {
+             jfunc->bits.value = wi::to_widest (arg);
+             jfunc->bits.mask = 0;
+           }
+       }
+      else
+       gcc_assert (!jfunc->bits.known);
+
       if (is_gimple_ip_invariant (arg)
          || (TREE_CODE (arg) == VAR_DECL
              && is_global_var (arg)
@@ -3690,6 +3721,18 @@ ipa_node_params_t::duplicate(cgraph_node *src, 
cgraph_node *dst,
       for (unsigned i = 0; i < src_alignments->length (); ++i)
        dst_alignments->quick_push ((*src_alignments)[i]);
     }
+
+  if (src_trans && vec_safe_length (src_trans->bits) > 0)
+    {
+      ipcp_grow_transformations_if_necessary ();
+      src_trans = ipcp_get_transformation_summary (src);
+      const vec<ipa_bits, va_gc> *src_bits = src_trans->bits;
+      vec<ipa_bits, va_gc> *&dst_bits
+       = ipcp_get_transformation_summary (dst)->bits;
+      vec_safe_reserve_exact (dst_bits, src_bits->length ());
+      for (unsigned i = 0; i < src_bits->length (); ++i)
+       dst_bits->quick_push ((*src_bits)[i]);
+    }
 }
 
 /* Register our cgraph hooks if they are not already there.  */
@@ -4609,6 +4652,17 @@ ipa_write_jump_function (struct output_block *ob,
       streamer_write_uhwi (ob, jump_func->alignment.align);
       streamer_write_uhwi (ob, jump_func->alignment.misalign);
     }
+
+  bp = bitpack_create (ob->main_stream);
+  bp_pack_value (&bp, jump_func->bits.known, 1);
+  streamer_write_bitpack (&bp);
+  if (jump_func->bits.known)
+    {
+      streamer_write_widest_int (ob, jump_func->bits.value);
+      streamer_write_widest_int (ob, jump_func->bits.mask);
+      streamer_write_enum (ob->main_stream, signop, UNSIGNED + 1, 
jump_func->bits.sgn);
+      streamer_write_uhwi (ob, jump_func->bits.precision);
+    }   
 }
 
 /* Read in jump function JUMP_FUNC from IB.  */
@@ -4685,6 +4739,19 @@ ipa_read_jump_function (struct lto_input_block *ib,
     }
   else
     jump_func->alignment.known = false;
+
+  bp = streamer_read_bitpack (ib);
+  bool bits_known = bp_unpack_value (&bp, 1);
+  if (bits_known)
+    {
+      jump_func->bits.known = true;
+      jump_func->bits.value = streamer_read_widest_int (ib);
+      jump_func->bits.mask = streamer_read_widest_int (ib);
+      jump_func->bits.sgn = streamer_read_enum (ib, signop, UNSIGNED + 1);
+      jump_func->bits.precision = streamer_read_uhwi (ib); 
+    }
+  else
+    jump_func->bits.known = false;
 }
 
 /* Stream out parts of cgraph_indirect_call_info corresponding to CS that are
@@ -5050,6 +5117,31 @@ write_ipcp_transformation_info (output_block *ob, 
cgraph_node *node)
     }
   else
     streamer_write_uhwi (ob, 0);
+
+  ts = ipcp_get_transformation_summary (node);
+  if (ts && vec_safe_length (ts->bits) > 0)
+    {
+      count = ts->bits->length ();
+      streamer_write_uhwi (ob, count);
+
+      for (unsigned i = 0; i < count; ++i)
+       {
+         const ipa_bits& bits_jfunc = (*ts->bits)[i];
+         struct bitpack_d bp = bitpack_create (ob->main_stream);
+         bp_pack_value (&bp, bits_jfunc.known, 1);
+         streamer_write_bitpack (&bp);
+         if (bits_jfunc.known)
+           {
+             streamer_write_widest_int (ob, bits_jfunc.value);
+             streamer_write_widest_int (ob, bits_jfunc.mask);
+             streamer_write_enum (ob->main_stream, signop,
+                                  UNSIGNED + 1, bits_jfunc.sgn);
+             streamer_write_uhwi (ob, bits_jfunc.precision);
+           }
+       }
+    }
+  else
+    streamer_write_uhwi (ob, 0);
 }
 
 /* Stream in the aggregate value replacement chain for NODE from IB.  */
@@ -5102,6 +5194,28 @@ read_ipcp_transformation_info (lto_input_block *ib, 
cgraph_node *node,
            }
        }
     }
+
+  count = streamer_read_uhwi (ib);
+  if (count > 0)
+    {
+      ipcp_grow_transformations_if_necessary ();
+      ipcp_transformation_summary *ts = ipcp_get_transformation_summary (node);
+      vec_safe_grow_cleared (ts->bits, count);
+
+      for (i = 0; i < count; i++)
+       {
+         ipa_bits& bits_jfunc = (*ts->bits)[i];
+         struct bitpack_d bp = streamer_read_bitpack (ib);
+         bits_jfunc.known = bp_unpack_value (&bp, 1);
+         if (bits_jfunc.known)
+           {
+             bits_jfunc.value = streamer_read_widest_int (ib);
+             bits_jfunc.mask = streamer_read_widest_int (ib);
+             bits_jfunc.sgn = streamer_read_enum (ib, signop, UNSIGNED + 1);
+             bits_jfunc.precision = streamer_read_uhwi (ib);
+           }
+       }
+    }
 }
 
 /* Write all aggregate replacement for nodes in set.  */
@@ -5404,6 +5518,54 @@ ipcp_update_alignments (struct cgraph_node *node)
     }
 }
 
+/* Update bits info of formal parameters as described in
+   ipcp_transformation_summary.  */
+
+static void
+ipcp_update_bits (struct cgraph_node *node)
+{
+  tree parm = DECL_ARGUMENTS (node->decl);
+  tree next_parm = parm;
+  ipcp_transformation_summary *ts = ipcp_get_transformation_summary (node);
+
+  if (!ts || vec_safe_length (ts->bits) == 0)
+    return;
+
+  vec<ipa_bits, va_gc> &bits = *ts->bits;
+  unsigned count = bits.length ();
+
+  for (unsigned i = 0; i < count; ++i, parm = next_parm)
+    {
+      if (node->clone.combined_args_to_skip
+         && bitmap_bit_p (node->clone.combined_args_to_skip, i))
+       continue;
+
+      gcc_checking_assert (parm);
+      next_parm = DECL_CHAIN (parm);
+
+      if (!bits[i].known
+         || !INTEGRAL_TYPE_P (TREE_TYPE (parm))
+         || !is_gimple_reg (parm))
+       continue;       
+
+      tree ddef = ssa_default_def (DECL_STRUCT_FUNCTION (node->decl), parm);
+      if (!ddef)
+       continue;
+
+      if (dump_file)
+       {
+         fprintf (dump_file, "Adjusting mask for param %u to ", i); 
+         print_hex (bits[i].mask, dump_file);
+         fprintf (dump_file, "\n");
+       }
+
+      unsigned prec = TYPE_PRECISION (TREE_TYPE (ddef));
+      wide_int nonzero_bits = wide_int::from (bits[i].mask, prec, UNSIGNED)
+                             | wide_int::from (bits[i].value, prec, 
bits[i].sgn);
+      set_nonzero_bits (ddef, nonzero_bits);
+    }
+}
+
 /* IPCP transformation phase doing propagation of aggregate values.  */
 
 unsigned int
@@ -5423,6 +5585,7 @@ ipcp_transform_function (struct cgraph_node *node)
             node->name (), node->order);
 
   ipcp_update_alignments (node);
+  ipcp_update_bits (node);
   aggval = ipa_get_agg_replacements_for_node (node);
   if (!aggval)
       return 0;
diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h
index e32d078..1b9d0ef 100644
--- a/gcc/ipa-prop.h
+++ b/gcc/ipa-prop.h
@@ -154,6 +154,23 @@ struct GTY(()) ipa_alignment
   unsigned misalign;
 };
 
+/* Information about zero/non-zero bits.  */
+struct GTY(()) ipa_bits
+{
+  /* The propagated value.  */
+  widest_int value;
+  /* Mask corresponding to the value.
+     Similar to ccp_prop_t, if xth bit of mask is 0,
+     implies xth bit of value is constant.  */
+  widest_int mask;
+  /* Original precision of the value.  */
+  unsigned precision;
+  /* Sign obtained from TYPE_SIGN.  */
+  enum signop sgn;
+  /* True if jump function is known.  */
+  bool known;
+};
+
 /* A jump function for a callsite represents the values passed as actual
    arguments of the callsite. See enum jump_func_type for the various
    types of jump functions supported.  */
@@ -166,6 +183,9 @@ struct GTY (()) ipa_jump_func
   /* Information about alignment of pointers. */
   struct ipa_alignment alignment;
 
+  /* Information about zero/non-zero bits.  */
+  struct ipa_bits bits;
+
   enum jump_func_type type;
   /* Represents a value of a jump function.  pass_through is used only in jump
      function context.  constant represents the actual constant in constant 
jump
@@ -482,6 +502,8 @@ struct GTY(()) ipcp_transformation_summary
   ipa_agg_replacement_value *agg_values;
   /* Alignment information for pointers.  */
   vec<ipa_alignment, va_gc> *alignments;
+  /* Known bits information.  */
+  vec<ipa_bits, va_gc> *bits;
 };
 
 void ipa_set_node_agg_value_chain (struct cgraph_node *node,
diff --git a/gcc/opts.c b/gcc/opts.c
index 4053fb1..cde9a7b 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -505,6 +505,7 @@ static const struct default_options default_options_table[] 
=
     { OPT_LEVELS_2_PLUS, OPT_ftree_switch_conversion, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_fipa_cp, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_fipa_cp_alignment, NULL, 1 },
+    { OPT_LEVELS_2_PLUS, OPT_fipa_cp_bit, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_fdevirtualize, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_fdevirtualize_speculatively, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_fipa_sra, NULL, 1 },
@@ -1422,6 +1423,9 @@ enable_fdo_optimizations (struct gcc_options *opts,
   if (!opts_set->x_flag_ipa_cp_alignment
       && value && opts->x_flag_ipa_cp)
     opts->x_flag_ipa_cp_alignment = value;
+  if (!opts_set->x_flag_ipa_cp_bit
+      && value && opts->x_flag_ipa_cp)
+    opts->x_flag_ipa_cp_bit = value;
   if (!opts_set->x_flag_predictive_commoning)
     opts->x_flag_predictive_commoning = value;
   if (!opts_set->x_flag_unswitch_loops)

Reply via email to