Hi Richard, Segher,
On 17/09/2020 08:10, Richard Sandiford wrote:
> Alex Coplan <[email protected]> writes:
> > Hi Richard,
> >
> > On 10/09/2020 19:18, Richard Sandiford wrote:
> >> Alex Coplan <[email protected]> writes:
> >> > Hello,
> >> >
> >> > Since r11-2903-g6b3034eaba83935d9f6dfb20d2efbdb34b5b00bf introduced a
> >> > canonicalization from mult to shift on address reloads, a missing
> >> > pattern in the AArch64 backend was exposed.
> >> >
> >> > In particular, we were missing the ashift variant of
> >> > *add_<optab><mode>_multp2 (this mult variant is redundant and was
> >> > removed in r11-3033-g2f8ae301f6a125f50b0a758047fcddae7b68daa8).
> >> >
> >> > This patch adds that missing pattern (fixing PR96998), updates
> >> > aarch64_is_extend_from_extract() to work for the shift pattern (instead
> >> > of the redundant mult variant) and updates callers in the cost
> >> > calculations to apply the costing for the shift variant instead.
> >>
> >> Hmm. I think we should instead fix this in combine.
> >
> > Ok, thanks for the review. Please find a revised patch attached which
> > fixes this in combine instead.
>
> Thanks for doing this, looks like a really nice clean-up.
>
> > @@ -7650,20 +7650,29 @@ make_extraction (machine_mode mode, rtx inner,
> > HOST_WIDE_INT pos,
> > is_mode = GET_MODE (SUBREG_REG (inner));
> > inner = SUBREG_REG (inner);
> > }
> > - else if (GET_CODE (inner) == ASHIFT
> > + else if ((GET_CODE (inner) == ASHIFT || GET_CODE (inner) == MULT)
> > && CONST_INT_P (XEXP (inner, 1))
> > - && pos_rtx == 0 && pos == 0
> > - && len > UINTVAL (XEXP (inner, 1)))
> > - {
> > - /* We're extracting the least significant bits of an rtx
> > - (ashift X (const_int C)), where LEN > C. Extract the
> > - least significant (LEN - C) bits of X, giving an rtx
> > - whose mode is MODE, then shift it left C times. */
> > - new_rtx = make_extraction (mode, XEXP (inner, 0),
> > - 0, 0, len - INTVAL (XEXP (inner, 1)),
> > - unsignedp, in_dest, in_compare);
> > - if (new_rtx != 0)
> > - return gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));
> > + && pos_rtx == 0 && pos == 0)
> > + {
> > + const HOST_WIDE_INT ci = INTVAL (XEXP (inner, 1));
> > + const bool mult = GET_CODE (inner) == MULT;
> > + const int shift_amt = mult ? exact_log2 (ci) : ci;
>
> Not going to be a problem in practice but: HOST_WIDE_INT would be better
> than int here, so that we don't truncate the value for ASHIFT before it
> has been tested. Similarly:
>
> > +
> > + if (shift_amt > 0 && len > (unsigned)shift_amt)
>
> unsigned HOST_WIDE_INT here.
Makes sense, thanks.
>
> > + {
> > + /* We're extracting the least significant bits of an rtx
> > + (ashift X (const_int C)) or (mult X (const_int (2^C))),
> > + where LEN > C. Extract the least significant (LEN - C) bits
> > + of X, giving an rtx whose mode is MODE, then shift it left
> > + C times. */
> > + new_rtx = make_extraction (mode, XEXP (inner, 0),
> > + 0, 0, len - shift_amt,
> > + unsignedp, in_dest, in_compare);
> > + if (new_rtx)
> > + return mult
> > + ? gen_rtx_MULT (mode, new_rtx, XEXP (inner, 1))
> > + : gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));
>
> Easier as gen_rtx_fmt_ee (GET_CODE (inner), mode, …);
Ah, I thought there must have been an easier way to do that!
>
> The combine parts LGTM otherwise, but Segher should have the
> final say.
Great. FWIW, I tested the previous patch on aarch64, arm, and x86 and
there were no regressions there. I've attached a revised patch with
these fixes and that bootstraps/regtests fine on aarch64-none-linux-gnu.
<snip>
> The aarch64 changes are OK with those (incredibly inconsequential)
> comments fixed.
Great, thanks. I'll wait for Segher's verdict on the combine bits.
Alex
---
gcc/ChangeLog:
* combine.c (expand_compound_operation): Tweak variable name in
comment to match source.
(make_extraction): Handle mult by power of two in addition to
ashift.
* config/aarch64/aarch64.c (aarch64_is_extend_from_extract): Delete.
(aarch64_classify_index): Remove redundant extend-as-extract handling.
(aarch64_strip_extend): Likewise.
(aarch64_rtx_arith_op_extract_p): Likewise, remove now-unused
parameter. Update callers...
(aarch64_rtx_costs): ... here.
gcc/testsuite/ChangeLog:
* gcc.c-torture/compile/pr96998.c: New test.
diff --git a/gcc/combine.c b/gcc/combine.c
index c88382efbd3..fe8eff2b464 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7419,8 +7419,8 @@ expand_compound_operation (rtx x)
}
/* If we reach here, we want to return a pair of shifts. The inner
- shift is a left shift of BITSIZE - POS - LEN bits. The outer
- shift is a right shift of BITSIZE - LEN bits. It is arithmetic or
+ shift is a left shift of MODEWIDTH - POS - LEN bits. The outer
+ shift is a right shift of MODEWIDTH - LEN bits. It is arithmetic or
logical depending on the value of UNSIGNEDP.
If this was a ZERO_EXTEND or ZERO_EXTRACT, this pair of shifts will be
@@ -7650,20 +7650,27 @@ make_extraction (machine_mode mode, rtx inner,
HOST_WIDE_INT pos,
is_mode = GET_MODE (SUBREG_REG (inner));
inner = SUBREG_REG (inner);
}
- else if (GET_CODE (inner) == ASHIFT
+ else if ((GET_CODE (inner) == ASHIFT || GET_CODE (inner) == MULT)
&& CONST_INT_P (XEXP (inner, 1))
- && pos_rtx == 0 && pos == 0
- && len > UINTVAL (XEXP (inner, 1)))
- {
- /* We're extracting the least significant bits of an rtx
- (ashift X (const_int C)), where LEN > C. Extract the
- least significant (LEN - C) bits of X, giving an rtx
- whose mode is MODE, then shift it left C times. */
- new_rtx = make_extraction (mode, XEXP (inner, 0),
- 0, 0, len - INTVAL (XEXP (inner, 1)),
- unsignedp, in_dest, in_compare);
- if (new_rtx != 0)
- return gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));
+ && pos_rtx == 0 && pos == 0)
+ {
+ const HOST_WIDE_INT ci = INTVAL (XEXP (inner, 1));
+ const auto code = GET_CODE (inner);
+ const HOST_WIDE_INT shift_amt = (code == MULT) ? exact_log2 (ci) : ci;
+
+ if (shift_amt > 0 && len > (unsigned HOST_WIDE_INT)shift_amt)
+ {
+ /* We're extracting the least significant bits of an rtx
+ (ashift X (const_int C)) or (mult X (const_int (2^C))),
+ where LEN > C. Extract the least significant (LEN - C) bits
+ of X, giving an rtx whose mode is MODE, then shift it left
+ C times. */
+ new_rtx = make_extraction (mode, XEXP (inner, 0),
+ 0, 0, len - shift_amt,
+ unsignedp, in_dest, in_compare);
+ if (new_rtx)
+ return gen_rtx_fmt_ee (code, mode, new_rtx, XEXP (inner, 1));
+ }
}
else if (GET_CODE (inner) == TRUNCATE
/* If trying or potentionally trying to extract
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b251f3947e2..8bb7c310a77 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2811,33 +2811,6 @@ aarch64_is_noplt_call_p (rtx sym)
return false;
}
-/* Return true if the offsets to a zero/sign-extract operation
- represent an expression that matches an extend operation. The
- operands represent the parameters from
-
- (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
-bool
-aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
- rtx extract_imm)
-{
- HOST_WIDE_INT mult_val, extract_val;
-
- if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
- return false;
-
- mult_val = INTVAL (mult_imm);
- extract_val = INTVAL (extract_imm);
-
- if (extract_val > 8
- && extract_val < GET_MODE_BITSIZE (mode)
- && exact_log2 (extract_val & ~7) > 0
- && (extract_val & 7) <= 4
- && mult_val == (1 << (extract_val & 7)))
- return true;
-
- return false;
-}
-
/* Emit an insn that's a simple single-set. Both the operands must be
known to be valid. */
inline static rtx_insn *
@@ -8837,22 +8810,6 @@ aarch64_classify_index (struct aarch64_address_info
*info, rtx x,
index = XEXP (XEXP (x, 0), 0);
shift = INTVAL (XEXP (x, 1));
}
- /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
- else if ((GET_CODE (x) == SIGN_EXTRACT
- || GET_CODE (x) == ZERO_EXTRACT)
- && GET_MODE (x) == DImode
- && GET_CODE (XEXP (x, 0)) == MULT
- && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
- && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
- {
- type = (GET_CODE (x) == SIGN_EXTRACT)
- ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
- index = XEXP (XEXP (x, 0), 0);
- shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
- if (INTVAL (XEXP (x, 1)) != 32 + shift
- || INTVAL (XEXP (x, 2)) != 0)
- shift = -1;
- }
/* (and:DI (mult:DI (reg:DI) (const_int scale))
(const_int 0xffffffff<<shift)) */
else if (GET_CODE (x) == AND
@@ -8868,22 +8825,6 @@ aarch64_classify_index (struct aarch64_address_info
*info, rtx x,
if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
shift = -1;
}
- /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
- else if ((GET_CODE (x) == SIGN_EXTRACT
- || GET_CODE (x) == ZERO_EXTRACT)
- && GET_MODE (x) == DImode
- && GET_CODE (XEXP (x, 0)) == ASHIFT
- && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
- && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
- {
- type = (GET_CODE (x) == SIGN_EXTRACT)
- ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
- index = XEXP (XEXP (x, 0), 0);
- shift = INTVAL (XEXP (XEXP (x, 0), 1));
- if (INTVAL (XEXP (x, 1)) != 32 + shift
- || INTVAL (XEXP (x, 2)) != 0)
- shift = -1;
- }
/* (and:DI (ashift:DI (reg:DI) (const_int shift))
(const_int 0xffffffff<<shift)) */
else if (GET_CODE (x) == AND
@@ -11259,16 +11200,6 @@ aarch64_strip_extend (rtx x, bool strip_shift)
if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
return op;
- /* Zero and sign extraction of a widened value. */
- if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
- && XEXP (op, 2) == const0_rtx
- && GET_CODE (XEXP (op, 0)) == MULT
- && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
- XEXP (op, 1)))
- return XEXP (XEXP (op, 0), 0);
-
- /* It can also be represented (for zero-extend) as an AND with an
- immediate. */
if (GET_CODE (op) == AND
&& GET_CODE (XEXP (op, 0)) == MULT
&& CONST_INT_P (XEXP (XEXP (op, 0), 1))
@@ -11606,32 +11537,12 @@ aarch64_branch_cost (bool speed_p, bool predictable_p)
/* Return true if the RTX X in mode MODE is a zero or sign extract
usable in an ADD or SUB (extended register) instruction. */
static bool
-aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
-{
- /* Catch add with a sign extract.
- This is add_<optab><mode>_multp2. */
- if (GET_CODE (x) == SIGN_EXTRACT
- || GET_CODE (x) == ZERO_EXTRACT)
- {
- rtx op0 = XEXP (x, 0);
- rtx op1 = XEXP (x, 1);
- rtx op2 = XEXP (x, 2);
-
- if (GET_CODE (op0) == MULT
- && CONST_INT_P (op1)
- && op2 == const0_rtx
- && CONST_INT_P (XEXP (op0, 1))
- && aarch64_is_extend_from_extract (mode,
- XEXP (op0, 1),
- op1))
- {
- return true;
- }
- }
+aarch64_rtx_arith_op_extract_p (rtx x)
+{
/* The simple case <ARITH>, XD, XN, XM, [us]xt.
No shift. */
- else if (GET_CODE (x) == SIGN_EXTEND
- || GET_CODE (x) == ZERO_EXTEND)
+ if (GET_CODE (x) == SIGN_EXTEND
+ || GET_CODE (x) == ZERO_EXTEND)
return REG_P (XEXP (x, 0));
return false;
@@ -12318,8 +12229,8 @@ cost_minus:
}
/* Look for SUB (extended register). */
- if (is_a <scalar_int_mode> (mode, &int_mode)
- && aarch64_rtx_arith_op_extract_p (op1, int_mode))
+ if (is_a <scalar_int_mode> (mode)
+ && aarch64_rtx_arith_op_extract_p (op1))
{
if (speed)
*cost += extra_cost->alu.extend_arith;
@@ -12398,8 +12309,8 @@ cost_plus:
*cost += rtx_cost (op1, mode, PLUS, 1, speed);
/* Look for ADD (extended register). */
- if (is_a <scalar_int_mode> (mode, &int_mode)
- && aarch64_rtx_arith_op_extract_p (op0, int_mode))
+ if (is_a <scalar_int_mode> (mode)
+ && aarch64_rtx_arith_op_extract_p (op0))
{
if (speed)
*cost += extra_cost->alu.extend_arith;
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96998.c
b/gcc/testsuite/gcc.c-torture/compile/pr96998.c
new file mode 100644
index 00000000000..a75d5dcfe08
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr96998.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target arm*-*-* aarch64*-*-* } } */
+
+int h(void);
+struct c d;
+struct c {
+ int e[1];
+};
+
+void f(void) {
+ int g;
+ for (;; g = h()) {
+ int *i = &d.e[g];
+ asm("" : "=Q"(*i));
+ }
+}