https://gcc.gnu.org/g:3eb6e1e3335fe0aabc79e75bf4d71366727c3971

commit r15-6174-g3eb6e1e3335fe0aabc79e75bf4d71366727c3971
Author: Alexandre Oliva <ol...@adacore.com>
Date:   Thu Dec 12 11:43:09 2024 -0300

    ifcombine field-merge: set upper bound for get_best_mode
    
    A bootstrap on aarch64-linux-gnu revealed that sometimes (for example,
    when building shorten_branches in final.cc) we will find such things
    as MEM <unsigned int>, where unsigned int happens to be a variant of
    the original unsigned int type, but with 64-bit alignment.  This
    unusual alignment circumstance caused (i) get_inner_reference to not
    look inside the MEM, (ii) get_best_mode to choose DImode instead of
    SImode to access the object, so we built a BIT_FIELD_REF that
    attempted to select all 64 bits of a 32-bit object, and that failed
    gimple verification ("position plus size exceeds size of referenced
    object") because there aren't that many bits in the unsigned int
    object.
    
    This patch avoids this failure mode by limiting the bitfield range
    with the size of the inner object, if it is a known constant.
    
    This enables us to avoid creating a BIT_FIELD_REF and reusing the load
    expr, but we still introduced a separate load, that would presumably
    get optimized out, but that is easy enough to avoid in the first place
    by reusing the SSA_NAME it was originally loaded into, so I
    implemented that in make_bit_field_load.
    
    
    for  gcc/ChangeLog
    
            * gimple-fold.cc (fold_truth_andor_for_ifcombine): Limit the
            size of the bitregion in get_best_mode calls by the inner
            object's type size, if known.
            (make_bit_field_load): Reuse SSA_NAME if we're attempting to
            issue an identical load.

Diff:
---
 gcc/gimple-fold.cc | 52 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 5f69ffce591f..e1b4b65dd662 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -7751,6 +7751,15 @@ make_bit_field_load (location_t loc, tree inner, tree 
orig_inner, tree type,
   if (!point)
     return ref;
 
+  /* If we're remaking the same load, reuse the SSA NAME it is already loaded
+     into.  */
+  if (gimple_assign_load_p (point)
+      && operand_equal_p (ref, gimple_assign_rhs1 (point)))
+    {
+      gcc_checking_assert (TREE_CODE (gimple_assign_lhs (point)) == SSA_NAME);
+      return gimple_assign_lhs (point);
+    }
+
   gimple_seq stmts = NULL;
   tree ret = force_gimple_operand (ref, &stmts, true, NULL_TREE);
 
@@ -8204,24 +8213,27 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
      to be relative to a field of that size.  */
   first_bit = MIN (ll_bitpos, rl_bitpos);
   end_bit = MAX (ll_bitpos + ll_bitsize, rl_bitpos + rl_bitsize);
-  if (get_best_mode (end_bit - first_bit, first_bit, 0, 0,
-                    TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
-                    volatilep, &lnmode))
+  HOST_WIDE_INT ll_align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+  poly_uint64 ll_end_region = 0;
+  if (TYPE_SIZE (TREE_TYPE (ll_inner))
+      && uniform_integer_cst_p (TYPE_SIZE (TREE_TYPE (ll_inner))))
+    ll_end_region = tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (ll_inner)));
+  if (get_best_mode (end_bit - first_bit, first_bit, 0, ll_end_region,
+                    ll_align, BITS_PER_WORD, volatilep, &lnmode))
     l_split_load = false;
   else
     {
       /* Consider the possibility of recombining loads if any of the
         fields straddles across an alignment boundary, so that either
         part can be loaded along with the other field.  */
-      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
       HOST_WIDE_INT boundary = compute_split_boundary_from_align
-       (align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
+       (ll_align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
 
       if (boundary < 0
-         || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
-                            align, BITS_PER_WORD, volatilep, &lnmode)
-         || !get_best_mode (end_bit - boundary, boundary, 0, 0,
-                            align, BITS_PER_WORD, volatilep, &lnmode2))
+         || !get_best_mode (boundary - first_bit, first_bit, 0, ll_end_region,
+                            ll_align, BITS_PER_WORD, volatilep, &lnmode)
+         || !get_best_mode (end_bit - boundary, boundary, 0, ll_end_region,
+                            ll_align, BITS_PER_WORD, volatilep, &lnmode2))
        return 0;
 
       /* If we can't have a single load, but can with two, figure out whether
@@ -8368,16 +8380,19 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
         and then we use two separate compares.  */
       first_bit = MIN (lr_bitpos, rr_bitpos);
       end_bit = MAX (lr_bitpos + lr_bitsize, rr_bitpos + rr_bitsize);
-      if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
-                         TYPE_ALIGN (TREE_TYPE (lr_inner)), BITS_PER_WORD,
-                         volatilep, &rnmode))
+      HOST_WIDE_INT lr_align = TYPE_ALIGN (TREE_TYPE (lr_inner));
+      poly_uint64 lr_end_region = 0;
+      if (TYPE_SIZE (TREE_TYPE (lr_inner))
+         && uniform_integer_cst_p (TYPE_SIZE (TREE_TYPE (lr_inner))))
+       lr_end_region = tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (lr_inner)));
+      if (!get_best_mode (end_bit - first_bit, first_bit, 0, lr_end_region,
+                         lr_align, BITS_PER_WORD, volatilep, &rnmode))
        {
          /* Consider the possibility of recombining loads if any of the
             fields straddles across an alignment boundary, so that either
             part can be loaded along with the other field.  */
-         HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (lr_inner));
          HOST_WIDE_INT boundary = compute_split_boundary_from_align
-           (align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize);
+           (lr_align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize);
 
          if (boundary < 0
              /* If we're to split both, make sure the split point is
@@ -8385,10 +8400,11 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
              || (l_split_load
                  && (boundary - lr_bitpos
                      != (lnbitpos + GET_MODE_BITSIZE (lnmode)) - ll_bitpos))
-             || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
-                                align, BITS_PER_WORD, volatilep, &rnmode)
-             || !get_best_mode (end_bit - boundary, boundary, 0, 0,
-                                align, BITS_PER_WORD, volatilep, &rnmode2))
+             || !get_best_mode (boundary - first_bit, first_bit,
+                                0, lr_end_region,
+                                lr_align, BITS_PER_WORD, volatilep, &rnmode)
+             || !get_best_mode (end_bit - boundary, boundary, 0, lr_end_region,
+                                lr_align, BITS_PER_WORD, volatilep, &rnmode2))
            return 0;
 
          r_split_load = true;

Reply via email to