Hi Richard, Segher, On 17/09/2020 08:10, Richard Sandiford wrote: > Alex Coplan <alex.cop...@arm.com> writes: > > Hi Richard, > > > > On 10/09/2020 19:18, Richard Sandiford wrote: > >> Alex Coplan <alex.cop...@arm.com> writes: > >> > Hello, > >> > > >> > Since r11-2903-g6b3034eaba83935d9f6dfb20d2efbdb34b5b00bf introduced a > >> > canonicalization from mult to shift on address reloads, a missing > >> > pattern in the AArch64 backend was exposed. > >> > > >> > In particular, we were missing the ashift variant of > >> > *add_<optab><mode>_multp2 (this mult variant is redundant and was > >> > removed in r11-3033-g2f8ae301f6a125f50b0a758047fcddae7b68daa8). > >> > > >> > This patch adds that missing pattern (fixing PR96998), updates > >> > aarch64_is_extend_from_extract() to work for the shift pattern (instead > >> > of the redundant mult variant) and updates callers in the cost > >> > calculations to apply the costing for the shift variant instead. > >> > >> Hmm. I think we should instead fix this in combine. > > > > Ok, thanks for the review. Please find a revised patch attached which > > fixes this in combine instead. > > Thanks for doing this, looks like a really nice clean-up. > > > @@ -7650,20 +7650,29 @@ make_extraction (machine_mode mode, rtx inner, > > HOST_WIDE_INT pos, > > is_mode = GET_MODE (SUBREG_REG (inner)); > > inner = SUBREG_REG (inner); > > } > > - else if (GET_CODE (inner) == ASHIFT > > + else if ((GET_CODE (inner) == ASHIFT || GET_CODE (inner) == MULT) > > && CONST_INT_P (XEXP (inner, 1)) > > - && pos_rtx == 0 && pos == 0 > > - && len > UINTVAL (XEXP (inner, 1))) > > - { > > - /* We're extracting the least significant bits of an rtx > > - (ashift X (const_int C)), where LEN > C. Extract the > > - least significant (LEN - C) bits of X, giving an rtx > > - whose mode is MODE, then shift it left C times. */ > > - new_rtx = make_extraction (mode, XEXP (inner, 0), > > - 0, 0, len - INTVAL (XEXP (inner, 1)), > > - unsignedp, in_dest, in_compare); > > - if (new_rtx != 0) > > - return gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1)); > > + && pos_rtx == 0 && pos == 0) > > + { > > + const HOST_WIDE_INT ci = INTVAL (XEXP (inner, 1)); > > + const bool mult = GET_CODE (inner) == MULT; > > + const int shift_amt = mult ? exact_log2 (ci) : ci; > > Not going to be a problem in practice but: HOST_WIDE_INT would be better > than int here, so that we don't truncate the value for ASHIFT before it > has been tested. Similarly: > > > + > > + if (shift_amt > 0 && len > (unsigned)shift_amt) > > unsigned HOST_WIDE_INT here.
Makes sense, thanks. > > > + { > > + /* We're extracting the least significant bits of an rtx > > + (ashift X (const_int C)) or (mult X (const_int (2^C))), > > + where LEN > C. Extract the least significant (LEN - C) bits > > + of X, giving an rtx whose mode is MODE, then shift it left > > + C times. */ > > + new_rtx = make_extraction (mode, XEXP (inner, 0), > > + 0, 0, len - shift_amt, > > + unsignedp, in_dest, in_compare); > > + if (new_rtx) > > + return mult > > + ? gen_rtx_MULT (mode, new_rtx, XEXP (inner, 1)) > > + : gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1)); > > Easier as gen_rtx_fmt_ee (GET_CODE (inner), mode, …); Ah, I thought there must have been an easier way to do that! > > The combine parts LGTM otherwise, but Segher should have the > final say. Great. FWIW, I tested the previous patch on aarch64, arm, and x86 and there were no regressions there. I've attached a revised patch with these fixes and that bootstraps/regtests fine on aarch64-none-linux-gnu. <snip> > The aarch64 changes are OK with those (incredibly inconsequential) > comments fixed. Great, thanks. I'll wait for Segher's verdict on the combine bits. Alex --- gcc/ChangeLog: * combine.c (expand_compound_operation): Tweak variable name in comment to match source. (make_extraction): Handle mult by power of two in addition to ashift. * config/aarch64/aarch64.c (aarch64_is_extend_from_extract): Delete. (aarch64_classify_index): Remove redundant extend-as-extract handling. (aarch64_strip_extend): Likewise. (aarch64_rtx_arith_op_extract_p): Likewise, remove now-unused parameter. Update callers... (aarch64_rtx_costs): ... here. gcc/testsuite/ChangeLog: * gcc.c-torture/compile/pr96998.c: New test.
diff --git a/gcc/combine.c b/gcc/combine.c index c88382efbd3..fe8eff2b464 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -7419,8 +7419,8 @@ expand_compound_operation (rtx x) } /* If we reach here, we want to return a pair of shifts. The inner - shift is a left shift of BITSIZE - POS - LEN bits. The outer - shift is a right shift of BITSIZE - LEN bits. It is arithmetic or + shift is a left shift of MODEWIDTH - POS - LEN bits. The outer + shift is a right shift of MODEWIDTH - LEN bits. It is arithmetic or logical depending on the value of UNSIGNEDP. If this was a ZERO_EXTEND or ZERO_EXTRACT, this pair of shifts will be @@ -7650,20 +7650,27 @@ make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos, is_mode = GET_MODE (SUBREG_REG (inner)); inner = SUBREG_REG (inner); } - else if (GET_CODE (inner) == ASHIFT + else if ((GET_CODE (inner) == ASHIFT || GET_CODE (inner) == MULT) && CONST_INT_P (XEXP (inner, 1)) - && pos_rtx == 0 && pos == 0 - && len > UINTVAL (XEXP (inner, 1))) - { - /* We're extracting the least significant bits of an rtx - (ashift X (const_int C)), where LEN > C. Extract the - least significant (LEN - C) bits of X, giving an rtx - whose mode is MODE, then shift it left C times. */ - new_rtx = make_extraction (mode, XEXP (inner, 0), - 0, 0, len - INTVAL (XEXP (inner, 1)), - unsignedp, in_dest, in_compare); - if (new_rtx != 0) - return gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1)); + && pos_rtx == 0 && pos == 0) + { + const HOST_WIDE_INT ci = INTVAL (XEXP (inner, 1)); + const auto code = GET_CODE (inner); + const HOST_WIDE_INT shift_amt = (code == MULT) ? exact_log2 (ci) : ci; + + if (shift_amt > 0 && len > (unsigned HOST_WIDE_INT)shift_amt) + { + /* We're extracting the least significant bits of an rtx + (ashift X (const_int C)) or (mult X (const_int (2^C))), + where LEN > C. Extract the least significant (LEN - C) bits + of X, giving an rtx whose mode is MODE, then shift it left + C times. */ + new_rtx = make_extraction (mode, XEXP (inner, 0), + 0, 0, len - shift_amt, + unsignedp, in_dest, in_compare); + if (new_rtx) + return gen_rtx_fmt_ee (code, mode, new_rtx, XEXP (inner, 1)); + } } else if (GET_CODE (inner) == TRUNCATE /* If trying or potentionally trying to extract diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index b251f3947e2..8bb7c310a77 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2811,33 +2811,6 @@ aarch64_is_noplt_call_p (rtx sym) return false; } -/* Return true if the offsets to a zero/sign-extract operation - represent an expression that matches an extend operation. The - operands represent the parameters from - - (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */ -bool -aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm, - rtx extract_imm) -{ - HOST_WIDE_INT mult_val, extract_val; - - if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm)) - return false; - - mult_val = INTVAL (mult_imm); - extract_val = INTVAL (extract_imm); - - if (extract_val > 8 - && extract_val < GET_MODE_BITSIZE (mode) - && exact_log2 (extract_val & ~7) > 0 - && (extract_val & 7) <= 4 - && mult_val == (1 << (extract_val & 7))) - return true; - - return false; -} - /* Emit an insn that's a simple single-set. Both the operands must be known to be valid. */ inline static rtx_insn * @@ -8837,22 +8810,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x, index = XEXP (XEXP (x, 0), 0); shift = INTVAL (XEXP (x, 1)); } - /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */ - else if ((GET_CODE (x) == SIGN_EXTRACT - || GET_CODE (x) == ZERO_EXTRACT) - && GET_MODE (x) == DImode - && GET_CODE (XEXP (x, 0)) == MULT - && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode - && CONST_INT_P (XEXP (XEXP (x, 0), 1))) - { - type = (GET_CODE (x) == SIGN_EXTRACT) - ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; - index = XEXP (XEXP (x, 0), 0); - shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))); - if (INTVAL (XEXP (x, 1)) != 32 + shift - || INTVAL (XEXP (x, 2)) != 0) - shift = -1; - } /* (and:DI (mult:DI (reg:DI) (const_int scale)) (const_int 0xffffffff<<shift)) */ else if (GET_CODE (x) == AND @@ -8868,22 +8825,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x, if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift) shift = -1; } - /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */ - else if ((GET_CODE (x) == SIGN_EXTRACT - || GET_CODE (x) == ZERO_EXTRACT) - && GET_MODE (x) == DImode - && GET_CODE (XEXP (x, 0)) == ASHIFT - && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode - && CONST_INT_P (XEXP (XEXP (x, 0), 1))) - { - type = (GET_CODE (x) == SIGN_EXTRACT) - ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; - index = XEXP (XEXP (x, 0), 0); - shift = INTVAL (XEXP (XEXP (x, 0), 1)); - if (INTVAL (XEXP (x, 1)) != 32 + shift - || INTVAL (XEXP (x, 2)) != 0) - shift = -1; - } /* (and:DI (ashift:DI (reg:DI) (const_int shift)) (const_int 0xffffffff<<shift)) */ else if (GET_CODE (x) == AND @@ -11259,16 +11200,6 @@ aarch64_strip_extend (rtx x, bool strip_shift) if (!is_a <scalar_int_mode> (GET_MODE (op), &mode)) return op; - /* Zero and sign extraction of a widened value. */ - if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT) - && XEXP (op, 2) == const0_rtx - && GET_CODE (XEXP (op, 0)) == MULT - && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1), - XEXP (op, 1))) - return XEXP (XEXP (op, 0), 0); - - /* It can also be represented (for zero-extend) as an AND with an - immediate. */ if (GET_CODE (op) == AND && GET_CODE (XEXP (op, 0)) == MULT && CONST_INT_P (XEXP (XEXP (op, 0), 1)) @@ -11606,32 +11537,12 @@ aarch64_branch_cost (bool speed_p, bool predictable_p) /* Return true if the RTX X in mode MODE is a zero or sign extract usable in an ADD or SUB (extended register) instruction. */ static bool -aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode) -{ - /* Catch add with a sign extract. - This is add_<optab><mode>_multp2. */ - if (GET_CODE (x) == SIGN_EXTRACT - || GET_CODE (x) == ZERO_EXTRACT) - { - rtx op0 = XEXP (x, 0); - rtx op1 = XEXP (x, 1); - rtx op2 = XEXP (x, 2); - - if (GET_CODE (op0) == MULT - && CONST_INT_P (op1) - && op2 == const0_rtx - && CONST_INT_P (XEXP (op0, 1)) - && aarch64_is_extend_from_extract (mode, - XEXP (op0, 1), - op1)) - { - return true; - } - } +aarch64_rtx_arith_op_extract_p (rtx x) +{ /* The simple case <ARITH>, XD, XN, XM, [us]xt. No shift. */ - else if (GET_CODE (x) == SIGN_EXTEND - || GET_CODE (x) == ZERO_EXTEND) + if (GET_CODE (x) == SIGN_EXTEND + || GET_CODE (x) == ZERO_EXTEND) return REG_P (XEXP (x, 0)); return false; @@ -12318,8 +12229,8 @@ cost_minus: } /* Look for SUB (extended register). */ - if (is_a <scalar_int_mode> (mode, &int_mode) - && aarch64_rtx_arith_op_extract_p (op1, int_mode)) + if (is_a <scalar_int_mode> (mode) + && aarch64_rtx_arith_op_extract_p (op1)) { if (speed) *cost += extra_cost->alu.extend_arith; @@ -12398,8 +12309,8 @@ cost_plus: *cost += rtx_cost (op1, mode, PLUS, 1, speed); /* Look for ADD (extended register). */ - if (is_a <scalar_int_mode> (mode, &int_mode) - && aarch64_rtx_arith_op_extract_p (op0, int_mode)) + if (is_a <scalar_int_mode> (mode) + && aarch64_rtx_arith_op_extract_p (op0)) { if (speed) *cost += extra_cost->alu.extend_arith; diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96998.c b/gcc/testsuite/gcc.c-torture/compile/pr96998.c new file mode 100644 index 00000000000..a75d5dcfe08 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr96998.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target arm*-*-* aarch64*-*-* } } */ + +int h(void); +struct c d; +struct c { + int e[1]; +}; + +void f(void) { + int g; + for (;; g = h()) { + int *i = &d.e[g]; + asm("" : "=Q"(*i)); + } +}