https://gcc.gnu.org/g:ae42ade2c98f31a4095e78267e00aaa591b46f01

commit ae42ade2c98f31a4095e78267e00aaa591b46f01
Author: Alexandre Oliva <ol...@adacore.com>
Date:   Tue Dec 17 22:19:07 2024 -0300

    ifcombine field merge: handle masks with sign extensions
    
    When a loaded field is sign extended, masked and compared, we used to
    drop from the mask the bits past the original field width, which is
    not correct.
    
    Take note of the fact that the mask covered copies of the sign bit,
    before clipping it, and arrange to test the sign bit if we're
    comparing with zero.  Punt in other cases.
    
    
    for  gcc/ChangeLog
    
            * gimple-fold.cc (decode_field_reference): Add psignbit
            parameter.  Set it if the mask references sign-extending
            bits.
            (fold_truth_andor_for_ifcombine): Adjust calls with new
            variables.  Swap them along with other r?_* variables.  Handle
            extended sign bit compares with zero.
    
    for  gcc/testsuite/ChangeLog
    
            * gcc.dg/field-merge-16.c: New.

Diff:
---
 gcc/gimple-fold.cc                    | 69 ++++++++++++++++++++++++++++++-----
 gcc/testsuite/gcc.dg/field-merge-16.c | 66 +++++++++++++++++++++++++++++++++
 gcc/tree-ssa-ifcombine.cc             |  5 ++-
 3 files changed, 128 insertions(+), 12 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 856ee5369a8c..b84269d6b417 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -7514,6 +7514,9 @@ gimple_binop_def_p (enum tree_code code, tree t, tree 
op[2])
    is initially set to a mask with nonzero precision, that mask is
    combined with the found mask, or adjusted in precision to match.
 
+   *PSIGNBIT is set to TRUE if, before clipping to *PBITSIZE, the mask
+   encompassed bits that corresponded to extensions of the sign bit.
+
    *XOR_P is to be FALSE if EXP might be a XOR used in a compare, in which
    case, if XOR_CMP_OP is a zero constant, it will be overridden with *PEXP,
    *XOR_P will be set to TRUE, and the left-hand operand of the XOR will be
@@ -7533,7 +7536,8 @@ static tree
 decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize,
                        HOST_WIDE_INT *pbitpos,
                        bool *punsignedp, bool *preversep, bool *pvolatilep,
-                       wide_int *pand_mask, bool *xor_p, tree *xor_cmp_op,
+                       wide_int *pand_mask, bool *psignbit,
+                       bool *xor_p, tree *xor_cmp_op,
                        gimple **load, location_t loc[4])
 {
   tree exp = *pexp;
@@ -7545,6 +7549,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
   machine_mode mode;
 
   *load = NULL;
+  *psignbit = false;
 
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
@@ -7708,12 +7713,23 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
   if (outer_type && *pbitsize == TYPE_PRECISION (outer_type))
     *punsignedp = TYPE_UNSIGNED (outer_type);
 
-  /* Make the mask the expected width.  */
-  if (and_mask.get_precision () != 0)
-    and_mask = wide_int::from (and_mask, *pbitsize, UNSIGNED);
-
   if (pand_mask)
-    *pand_mask = and_mask;
+    {
+      /* Make the mask the expected width.  */
+      if (and_mask.get_precision () != 0)
+       {
+         /* If the AND_MASK encompasses bits that would be extensions of
+            the sign bit, set *PSIGNBIT.  */
+         if (!unsignedp
+             && and_mask.get_precision () > *pbitsize
+             && (and_mask
+                 & wi::mask (*pbitsize, true, and_mask.get_precision ())) != 0)
+           *psignbit = true;
+         and_mask = wide_int::from (and_mask, *pbitsize, UNSIGNED);
+       }
+
+      *pand_mask = and_mask;
+    }
 
   return inner;
 }
@@ -7995,6 +8011,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
   HOST_WIDE_INT rnbitsize, rnbitpos, rnprec;
   bool ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
   bool ll_reversep, lr_reversep, rl_reversep, rr_reversep;
+  bool ll_signbit, lr_signbit, rl_signbit, rr_signbit;
   scalar_int_mode lnmode, lnmode2, rnmode;
   wide_int ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
   wide_int l_const, r_const;
@@ -8114,19 +8131,19 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
   bool l_xor = false, r_xor = false;
   ll_inner = decode_field_reference (&ll_arg, &ll_bitsize, &ll_bitpos,
                                     &ll_unsignedp, &ll_reversep, &volatilep,
-                                    &ll_and_mask, &l_xor, &lr_arg,
+                                    &ll_and_mask, &ll_signbit, &l_xor, &lr_arg,
                                     &ll_load, ll_loc);
   lr_inner = decode_field_reference (&lr_arg, &lr_bitsize, &lr_bitpos,
                                     &lr_unsignedp, &lr_reversep, &volatilep,
-                                    &lr_and_mask, &l_xor, 0,
+                                    &lr_and_mask, &lr_signbit, &l_xor, 0,
                                     &lr_load, lr_loc);
   rl_inner = decode_field_reference (&rl_arg, &rl_bitsize, &rl_bitpos,
                                     &rl_unsignedp, &rl_reversep, &volatilep,
-                                    &rl_and_mask, &r_xor, &rr_arg,
+                                    &rl_and_mask, &rl_signbit, &r_xor, &rr_arg,
                                     &rl_load, rl_loc);
   rr_inner = decode_field_reference (&rr_arg, &rr_bitsize, &rr_bitpos,
                                     &rr_unsignedp, &rr_reversep, &volatilep,
-                                    &rr_and_mask, &r_xor, 0,
+                                    &rr_and_mask, &rr_signbit, &r_xor, 0,
                                     &rr_load, rr_loc);
 
   /* It must be true that the inner operation on the lhs of each
@@ -8156,6 +8173,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
       std::swap (rl_unsignedp, rr_unsignedp);
       std::swap (rl_reversep, rr_reversep);
       std::swap (rl_and_mask, rr_and_mask);
+      std::swap (rl_signbit, rr_signbit);
       std::swap (rl_load, rr_load);
       std::swap (rl_loc, rr_loc);
     }
@@ -8165,6 +8183,37 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
       : (!ll_load != !rl_load))
     return 0;
 
+  /* ??? Can we do anything with these?  */
+  if (lr_signbit || rr_signbit)
+    return 0;
+
+  /* If the mask encompassed extensions of the sign bit before
+     clipping, try to include the sign bit in the test.  If we're not
+     comparing with zero, don't even try to deal with it (for now?).
+     If we've already commited to a sign test, the extended (before
+     clipping) mask could already be messing with it.  */
+  if (ll_signbit)
+    {
+      if (!integer_zerop (lr_arg) || lsignbit)
+       return 0;
+      wide_int sign = wi::mask (ll_bitsize - 1, true, ll_bitsize);
+      if (!ll_and_mask.get_precision ())
+       ll_and_mask = sign;
+      else
+       ll_and_mask |= sign;
+    }
+
+  if (rl_signbit)
+    {
+      if (!integer_zerop (rr_arg) || rsignbit)
+       return 0;
+      wide_int sign = wi::mask (rl_bitsize - 1, true, rl_bitsize);
+      if (!rl_and_mask.get_precision ())
+       rl_and_mask = sign;
+      else
+       rl_and_mask |= sign;
+    }
+
   if (TREE_CODE (lr_arg) == INTEGER_CST
       && TREE_CODE (rr_arg) == INTEGER_CST)
     {
diff --git a/gcc/testsuite/gcc.dg/field-merge-16.c 
b/gcc/testsuite/gcc.dg/field-merge-16.c
new file mode 100644
index 000000000000..2ca23ea663a4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-16.c
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ifcombine-details" } */
+
+/* Check that tests for sign-extension bits are handled correctly.  */
+
+struct s {
+  short a;
+  short b;
+  unsigned short c;
+  unsigned short d;
+} __attribute__ ((aligned (8)));
+
+struct s p = { -1,  0, 0, 0 };
+struct s q = {  0, -1, 0, 0 };
+struct s r = {  1,  1, 0, 0 };
+
+const long long mask = 1ll << (sizeof (long long) * __CHAR_BIT__ - 5);
+
+int fp ()
+{
+  if ((p.a & mask)
+      || (p.c & mask)
+      || p.d
+      || (p.b & mask))
+    return 1;
+  else
+    return -1;
+}
+
+int fq ()
+{
+  if ((q.a & mask)
+      || (q.c & mask)
+      || q.d
+      || (q.b & mask))
+    return 1;
+  else
+    return -1;
+}
+
+int fr ()
+{
+  if ((r.a & mask)
+      || (r.c & mask)
+      || r.d
+      || (r.b & mask))
+    return 1;
+  else
+    return -1;
+}
+
+int main () {
+  /* Unlikely, but play safe.  */
+  if (sizeof (long long) == sizeof (short))
+    return 0;
+  if (fp () < 0
+      || fq () < 0
+      || fr () > 0)
+    __builtin_abort ();
+  return 0;
+}
+
+/* We test .b after other fields instead of right after .a to give field
+   merging a chance, otherwise the masked compares with zero are combined by
+   other ifcombine logic.  The .c test is discarded by earlier optimizers.  */
+/* { dg-final { scan-tree-dump-times "optimizing" 6 "ifcombine" } } */
diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 02c2f5a29b56..c9399a106945 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -899,7 +899,7 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
       else if (bits1 == name2)
        std::swap (name1, bits1);
       else
-       return false;
+       goto bits_test_failed;
 
       /* As we strip non-widening conversions in finding a common
          name that is tested make sure to end up with an integral
@@ -944,7 +944,8 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
     }
 
   /* See if we have two comparisons that we can merge into one.  */
-  else if (TREE_CODE_CLASS (gimple_cond_code (inner_cond)) == tcc_comparison
+  else bits_test_failed:
+    if (TREE_CODE_CLASS (gimple_cond_code (inner_cond)) == tcc_comparison
           && TREE_CODE_CLASS (gimple_cond_code (outer_cond)) == tcc_comparison)
     {
       tree t, ts = NULL_TREE;

Reply via email to