https://gcc.gnu.org/g:3eb6e1e3335fe0aabc79e75bf4d71366727c3971
commit r15-6174-g3eb6e1e3335fe0aabc79e75bf4d71366727c3971 Author: Alexandre Oliva <ol...@adacore.com> Date: Thu Dec 12 11:43:09 2024 -0300 ifcombine field-merge: set upper bound for get_best_mode A bootstrap on aarch64-linux-gnu revealed that sometimes (for example, when building shorten_branches in final.cc) we will find such things as MEM <unsigned int>, where unsigned int happens to be a variant of the original unsigned int type, but with 64-bit alignment. This unusual alignment circumstance caused (i) get_inner_reference to not look inside the MEM, (ii) get_best_mode to choose DImode instead of SImode to access the object, so we built a BIT_FIELD_REF that attempted to select all 64 bits of a 32-bit object, and that failed gimple verification ("position plus size exceeds size of referenced object") because there aren't that many bits in the unsigned int object. This patch avoids this failure mode by limiting the bitfield range with the size of the inner object, if it is a known constant. This enables us to avoid creating a BIT_FIELD_REF and reusing the load expr, but we still introduced a separate load, that would presumably get optimized out, but that is easy enough to avoid in the first place by reusing the SSA_NAME it was originally loaded into, so I implemented that in make_bit_field_load. for gcc/ChangeLog * gimple-fold.cc (fold_truth_andor_for_ifcombine): Limit the size of the bitregion in get_best_mode calls by the inner object's type size, if known. (make_bit_field_load): Reuse SSA_NAME if we're attempting to issue an identical load. Diff: --- gcc/gimple-fold.cc | 52 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 5f69ffce591f..e1b4b65dd662 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -7751,6 +7751,15 @@ make_bit_field_load (location_t loc, tree inner, tree orig_inner, tree type, if (!point) return ref; + /* If we're remaking the same load, reuse the SSA NAME it is already loaded + into. */ + if (gimple_assign_load_p (point) + && operand_equal_p (ref, gimple_assign_rhs1 (point))) + { + gcc_checking_assert (TREE_CODE (gimple_assign_lhs (point)) == SSA_NAME); + return gimple_assign_lhs (point); + } + gimple_seq stmts = NULL; tree ret = force_gimple_operand (ref, &stmts, true, NULL_TREE); @@ -8204,24 +8213,27 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, to be relative to a field of that size. */ first_bit = MIN (ll_bitpos, rl_bitpos); end_bit = MAX (ll_bitpos + ll_bitsize, rl_bitpos + rl_bitsize); - if (get_best_mode (end_bit - first_bit, first_bit, 0, 0, - TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD, - volatilep, &lnmode)) + HOST_WIDE_INT ll_align = TYPE_ALIGN (TREE_TYPE (ll_inner)); + poly_uint64 ll_end_region = 0; + if (TYPE_SIZE (TREE_TYPE (ll_inner)) + && uniform_integer_cst_p (TYPE_SIZE (TREE_TYPE (ll_inner)))) + ll_end_region = tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (ll_inner))); + if (get_best_mode (end_bit - first_bit, first_bit, 0, ll_end_region, + ll_align, BITS_PER_WORD, volatilep, &lnmode)) l_split_load = false; else { /* Consider the possibility of recombining loads if any of the fields straddles across an alignment boundary, so that either part can be loaded along with the other field. */ - HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner)); HOST_WIDE_INT boundary = compute_split_boundary_from_align - (align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize); + (ll_align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize); if (boundary < 0 - || !get_best_mode (boundary - first_bit, first_bit, 0, 0, - align, BITS_PER_WORD, volatilep, &lnmode) - || !get_best_mode (end_bit - boundary, boundary, 0, 0, - align, BITS_PER_WORD, volatilep, &lnmode2)) + || !get_best_mode (boundary - first_bit, first_bit, 0, ll_end_region, + ll_align, BITS_PER_WORD, volatilep, &lnmode) + || !get_best_mode (end_bit - boundary, boundary, 0, ll_end_region, + ll_align, BITS_PER_WORD, volatilep, &lnmode2)) return 0; /* If we can't have a single load, but can with two, figure out whether @@ -8368,16 +8380,19 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, and then we use two separate compares. */ first_bit = MIN (lr_bitpos, rr_bitpos); end_bit = MAX (lr_bitpos + lr_bitsize, rr_bitpos + rr_bitsize); - if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0, - TYPE_ALIGN (TREE_TYPE (lr_inner)), BITS_PER_WORD, - volatilep, &rnmode)) + HOST_WIDE_INT lr_align = TYPE_ALIGN (TREE_TYPE (lr_inner)); + poly_uint64 lr_end_region = 0; + if (TYPE_SIZE (TREE_TYPE (lr_inner)) + && uniform_integer_cst_p (TYPE_SIZE (TREE_TYPE (lr_inner)))) + lr_end_region = tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (lr_inner))); + if (!get_best_mode (end_bit - first_bit, first_bit, 0, lr_end_region, + lr_align, BITS_PER_WORD, volatilep, &rnmode)) { /* Consider the possibility of recombining loads if any of the fields straddles across an alignment boundary, so that either part can be loaded along with the other field. */ - HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (lr_inner)); HOST_WIDE_INT boundary = compute_split_boundary_from_align - (align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize); + (lr_align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize); if (boundary < 0 /* If we're to split both, make sure the split point is @@ -8385,10 +8400,11 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, || (l_split_load && (boundary - lr_bitpos != (lnbitpos + GET_MODE_BITSIZE (lnmode)) - ll_bitpos)) - || !get_best_mode (boundary - first_bit, first_bit, 0, 0, - align, BITS_PER_WORD, volatilep, &rnmode) - || !get_best_mode (end_bit - boundary, boundary, 0, 0, - align, BITS_PER_WORD, volatilep, &rnmode2)) + || !get_best_mode (boundary - first_bit, first_bit, + 0, lr_end_region, + lr_align, BITS_PER_WORD, volatilep, &rnmode) + || !get_best_mode (end_bit - boundary, boundary, 0, lr_end_region, + lr_align, BITS_PER_WORD, volatilep, &rnmode2)) return 0; r_split_load = true;