This is a generalized version of the patch I sent last week. This variant fixes the remainder of the insv_1 and insv_2 failures on aarch64. In combination with an updated patch from Steve this should be enough to fix 87763.
A couple notes. First, it re-uses combine's make_field_assignment in the aarch64 backend. So we don't have to duplicate any of that logic. I scanned make_field_assignment and its children to make sure it didn't use any data that was only valid during the combine pass, but I could have missed something. This is one of those cases where encapsulating the pass specific bits in a class really helps avoid problems... Just saying.... Second, it relaxes the main insv pattern to allow an immediate in the operand predicate, but forces it into a register during LRA. I don't generally like having predicates that are looser than the constraints, but it really helps here. Basically we want to see the constant field we're going to insert. Primarily looking for feedback from the aarch64 maintainers on the pattern and Segher on the re-use of make_field_assignment. I've bootstrapped and regression tested on aarch64 and aarch64_be as well as x86_64, ppc64le, ppc64, i686, etc. It's also been regression tested on a nice variety of *-elf targets. Segher, Richard/James OK for the trunk? Jeff
diff --git a/gcc/combine.c b/gcc/combine.c index 5616e6b1bac..03be306f5bc 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -449,8 +449,6 @@ static rtx expand_compound_operation (rtx); static const_rtx expand_field_assignment (const_rtx); static rtx make_extraction (machine_mode, rtx, HOST_WIDE_INT, rtx, unsigned HOST_WIDE_INT, int, int, int); -static int get_pos_from_mask (unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT *); static rtx canon_reg_for_combine (rtx, rtx); static rtx force_int_to_mode (rtx, scalar_int_mode, scalar_int_mode, scalar_int_mode, unsigned HOST_WIDE_INT, int); @@ -459,7 +457,6 @@ static rtx force_to_mode (rtx, machine_mode, static rtx if_then_else_cond (rtx, rtx *, rtx *); static rtx known_cond (rtx, enum rtx_code, rtx, rtx); static int rtx_equal_for_field_assignment_p (rtx, rtx, bool = false); -static rtx make_field_assignment (rtx); static rtx apply_distributive_law (rtx); static rtx distribute_and_simplify_rtx (rtx, int); static rtx simplify_and_const_int_1 (scalar_int_mode, rtx, @@ -8569,7 +8566,7 @@ make_compound_operation (rtx x, enum rtx_code in_code) *PLEN is set to the length of the field. */ -static int +int get_pos_from_mask (unsigned HOST_WIDE_INT m, unsigned HOST_WIDE_INT *plen) { /* Get the bit number of the first 1 bit from the right, -1 if none. */ @@ -8584,7 +8581,8 @@ get_pos_from_mask (unsigned HOST_WIDE_INT m, unsigned HOST_WIDE_INT *plen) if (len <= 0) pos = -1; - *plen = len; + if (plen) + *plen = len; return pos; } @@ -9745,7 +9743,7 @@ rtx_equal_for_field_assignment_p (rtx x, rtx y, bool widen_x) We only handle the most common cases. */ -static rtx +rtx make_field_assignment (rtx x) { rtx dest = SET_DEST (x); diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 5a1894063a1..a2a0a86cb51 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -5473,7 +5473,7 @@ [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") (match_operand 1 "const_int_operand" "n") (match_operand 2 "const_int_operand" "n")) - (match_operand:GPI 3 "register_operand" "r"))] + (match_operand:GPI 3 "aarch64_reg_or_imm" "r"))] "!(UINTVAL (operands[1]) == 0 || (UINTVAL (operands[2]) + UINTVAL (operands[1]) > GET_MODE_BITSIZE (<MODE>mode)))" @@ -5481,6 +5481,43 @@ [(set_attr "type" "bfm")] ) +;; This must be split before register allocation and reloading as +;; we need to verify it's actually an bitfield insertion by +;; examining both immediate operands. After reload we will lose +;; knowledge of operands[3] constant status. +;; +(define_insn_and_split "" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n")) + (match_operand:GPI 3 "const_int_operand" "n")))] + "(!reload_completed + /* make_field_assignment doesn't handle subregs well. */ + && REG_P (operands[0]) + && get_pos_from_mask (~UINTVAL (operands[2]), NULL) >= 0 + && (UINTVAL (operands[2]) & UINTVAL (operands[3])) == 0)" + "#" + "&& 1" + [(const_int 0)] + "{ + /* If we do not have an RMW operand, then copy the input + to the output before this insn. Also modify the existing + insn in-place so we can have make_field_assignment actually + generate a suitable extraction. */ + if (!rtx_equal_p (operands[0], operands[1])) + { + emit_move_insn (operands[0], operands[1]); + XEXP (XEXP (SET_SRC (PATTERN (curr_insn)), 0), 0) = copy_rtx (operands[0]); + } + + rtx make_field_assignment (rtx); + rtx newpat = make_field_assignment (PATTERN (curr_insn)); + gcc_assert (newpat); + emit_insn (newpat); + DONE; + }" +) + (define_insn "*aarch64_bfi<GPI:mode><ALLX:mode>4" [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") (match_operand 1 "const_int_operand" "n") diff --git a/gcc/rtl.h b/gcc/rtl.h index b4a906f9181..5824d4a7770 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -4015,6 +4015,8 @@ extern rtx remove_death (unsigned int, rtx_insn *); extern void dump_combine_stats (FILE *); extern void dump_combine_total_stats (FILE *); extern rtx make_compound_operation (rtx, enum rtx_code); +extern int get_pos_from_mask (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT *); +extern rtx make_field_assignment (rtx); /* In sched-rgn.c. */ extern void schedule_insns (void);