Hi Kyrill,
On 19/03/2021 17:18, Kyrylo Tkachov wrote:
> Hi Alex,
>
> > -----Original Message-----
> > From: Alex Coplan <[email protected]>
> > Sent: 19 March 2021 16:45
> > To: [email protected]
> > Cc: [email protected]; Richard Earnshaw <[email protected]>;
> > Ramana Radhakrishnan <[email protected]>; Kyrylo
> > Tkachov <[email protected]>
> > Subject: [PATCH] arm: Fix MVE ICEs with vector moves and -mpure-code
> > [PR97252]
> >
> > Hi all,
> >
> > This patch fixes around 500 ICEs in the testsuite which can be seen when
> > testing with -march=armv8.1-m.main+mve -mfloat-abi=hard -mpure-code
> > (leaving the testsuite free of ICEs in this configuration). All of the
> > ICEs are in arm_print_operand (which is expecting a mem and gets another
> > rtx, e.g. a const_vector) when running the output code for
> > *mve_mov<mode> in alternative 4.
<snip>
> > OK for trunk and eventual backport to GCC 10?
> >
>
> Thanks for doing this.
>
> static rtx
> -neon_vdup_constant (rtx vals)
> +neon_vdup_constant (rtx vals, bool check_only_p)
>
> Please document the new parameters in the function comment.
> Also, other similar functions in the backend use a "generate" parameter to
> guard whether to generate code or not.
> See arm_gen_constant, for example. I'd rather we were consistent with that
> style, so let's call this parameter "generate" and invert the logic.
>
> Ok with those changes.
Thanks for the review. Attached is the patch I'm pushing to the trunk
which incorporates these changes.
Alex
---
gcc/ChangeLog:
PR target/97252
* config/arm/arm-protos.h (neon_make_constant): Add generate
argument to guard emitting insns, default to true.
* config/arm/arm.c (arm_legitimate_constant_p_1): Reject
CONST_VECTORs which neon_make_constant can't handle.
(neon_vdup_constant): Add generate argument, avoid emitting
insns if it's not set.
(neon_make_constant): Plumb new generate argument through.
* config/arm/constraints.md (Ui): New. Use it...
* config/arm/mve.md (*mve_mov<mode>): ... here.
* config/arm/vec-common.md (movv8hf): Use neon_make_constant to
synthesize constants.
> Thanks,
> Kyrill
>
> {
> machine_mode mode = GET_MODE (vals);
> machine_mode inner_mode = GET_MODE_INNER (mode);
> @@ -13046,6 +13049,9 @@ neon_vdup_constant (rtx vals)
> vdup.i16). */
> return NULL_RTX;
>
> + if (check_only_p)
> + return x;
> +
>
> > Thanks,
> > Alex
> >
> > gcc/ChangeLog:
> >
> > PR target/97252
> > * config/arm/arm-protos.h (neon_make_constant): Add new
> > check_only_p
> > argument, default to false.
> > * config/arm/arm.c (arm_legitimate_constant_p_1): Reject
> > COST_VECTORs
> > which neon_make_constant can't handle.
> > (neon_vdup_constant): Add check_only_p, avoid emitting insns if it's
> > set.
> > (neon_make_constant): Plumb new check_only_p argument through.
> > * config/arm/constraints.md (Ui): New. Use it...
> > * config/arm/mve.md (*mve_mov<mode>): ... here.
> > * config/arm/vec-common.md (movv8hf): Use neon_make_constant
> > to
> > synthesize constants.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index bb5d3a2b133..952a8256a19 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -101,7 +101,7 @@ extern char *neon_output_shift_immediate (const char *,
char, rtx *,
machine_mode, int, bool);
extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
rtx (*) (rtx, rtx, rtx));
-extern rtx neon_make_constant (rtx);
+extern rtx neon_make_constant (rtx, bool generate = true);
extern tree arm_builtin_vectorized_function (unsigned int, tree, tree);
extern void neon_expand_vector_init (rtx, rtx);
extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 49635bc2d86..e89f5e24d3b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -9461,6 +9461,9 @@ arm_tls_referenced_p (rtx x)
static bool
arm_legitimate_constant_p_1 (machine_mode, rtx x)
{
+ if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
+ return false;
+
return flag_pic || !label_mentioned_p (x);
}
@@ -13025,12 +13028,14 @@ neon_pairwise_reduce (rtx op0, rtx op1, machine_mode
mode,
}
}
-/* If VALS is a vector constant that can be loaded into a register
- using VDUP, generate instructions to do so and return an RTX to
- assign to the register. Otherwise return NULL_RTX. */
+/* Return a non-NULL RTX iff VALS is a vector constant that can be
+ loaded into a register using VDUP.
+
+ If this is the case, and GENERATE is set, we also generate
+ instructions to do this and return an RTX to assign to the register. */
static rtx
-neon_vdup_constant (rtx vals)
+neon_vdup_constant (rtx vals, bool generate)
{
machine_mode mode = GET_MODE (vals);
machine_mode inner_mode = GET_MODE_INNER (mode);
@@ -13046,6 +13051,9 @@ neon_vdup_constant (rtx vals)
vdup.i16). */
return NULL_RTX;
+ if (!generate)
+ return x;
+
/* We can load this constant by using VDUP and a constant in a
single ARM register. This will be cheaper than a vector
load. */
@@ -13054,13 +13062,15 @@ neon_vdup_constant (rtx vals)
return gen_vec_duplicate (mode, x);
}
-/* Generate code to load VALS, which is a PARALLEL containing only
- constants (for vec_init) or CONST_VECTOR, efficiently into a
- register. Returns an RTX to copy into the register, or NULL_RTX
- for a PARALLEL that cannot be converted into a CONST_VECTOR. */
+/* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
+ constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
+ into a register.
+
+ If this is the case, and GENERATE is set, we also generate code to do
+ this and return an RTX to copy into the register. */
rtx
-neon_make_constant (rtx vals)
+neon_make_constant (rtx vals, bool generate)
{
machine_mode mode = GET_MODE (vals);
rtx target;
@@ -13092,7 +13102,7 @@ neon_make_constant (rtx vals)
&& simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
/* Load using VMOV. On Cortex-A8 this takes one cycle. */
return const_vec;
- else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
+ else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
/* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
pipeline cycle; creating the constant takes one or two ARM
pipeline cycles. */
@@ -13102,7 +13112,7 @@ neon_make_constant (rtx vals)
(for either double or quad vectors). We cannot take advantage
of single-cycle VLD1 because we need a PC-relative addressing
mode. */
- return const_vec;
+ return arm_disable_literal_pool ? NULL_RTX : const_vec;
else
/* A PARALLEL containing something not valid inside CONST_VECTOR.
We cannot construct an initializer. */
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 919f2995f1b..46c1eb57800 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -498,6 +498,13 @@ (define_memory_constraint "Ux"
&& mve_vector_mem_operand (GET_MODE (op),
XEXP (op, 0), true)")))
+(define_constraint "Ui"
+ "@internal
+ Match a constant (as per the 'i' constraint) provided that we have the
+ literal pool available. This is useful for load insns that would need
+ to move such constants to the literal pool after RA."
+ (match_test "!arm_disable_literal_pool && satisfies_constraint_i (op)"))
+
(define_memory_constraint "Uq"
"@internal
In ARM state an address valid in ldrsb instructions."
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index ec0ef7b8f71..440fd6adbac 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -19,7 +19,7 @@
(define_insn "*mve_mov<mode>"
[(set (match_operand:MVE_types 0 "nonimmediate_operand"
"=w,w,r,w,w,r,w,Ux,w")
- (match_operand:MVE_types 1 "general_operand" "w,r,w,Dn,Uxi,r,Dm,w,Ul"))]
+ (match_operand:MVE_types 1 "general_operand"
"w,r,w,Dn,UxUi,r,Dm,w,Ul"))]
"TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
{
if (which_alternative == 3 || which_alternative == 6)
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 345ada05523..ebf9e449b61 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -74,6 +74,11 @@ (define_expand "movv8hf"
{
if (!REG_P (operands[0]))
operands[1] = force_reg (E_V8HFmode, operands[1]);
+ else if (TARGET_HAVE_MVE_FLOAT && CONSTANT_P (operands[1]))
+ {
+ operands[1] = neon_make_constant (operands[1]);
+ gcc_assert (operands[1] != NULL_RTX);
+ }
}
})