Hi Kyrill, On 19/03/2021 17:18, Kyrylo Tkachov wrote: > Hi Alex, > > > -----Original Message----- > > From: Alex Coplan <alex.cop...@arm.com> > > Sent: 19 March 2021 16:45 > > To: gcc-patches@gcc.gnu.org > > Cc: ni...@redhat.com; Richard Earnshaw <richard.earns...@arm.com>; > > Ramana Radhakrishnan <ramana.radhakrish...@arm.com>; Kyrylo > > Tkachov <kyrylo.tkac...@arm.com> > > Subject: [PATCH] arm: Fix MVE ICEs with vector moves and -mpure-code > > [PR97252] > > > > Hi all, > > > > This patch fixes around 500 ICEs in the testsuite which can be seen when > > testing with -march=armv8.1-m.main+mve -mfloat-abi=hard -mpure-code > > (leaving the testsuite free of ICEs in this configuration). All of the > > ICEs are in arm_print_operand (which is expecting a mem and gets another > > rtx, e.g. a const_vector) when running the output code for > > *mve_mov<mode> in alternative 4. <snip> > > OK for trunk and eventual backport to GCC 10? > > > > Thanks for doing this. > > static rtx > -neon_vdup_constant (rtx vals) > +neon_vdup_constant (rtx vals, bool check_only_p) > > Please document the new parameters in the function comment. > Also, other similar functions in the backend use a "generate" parameter to > guard whether to generate code or not. > See arm_gen_constant, for example. I'd rather we were consistent with that > style, so let's call this parameter "generate" and invert the logic. > > Ok with those changes.
Thanks for the review. Attached is the patch I'm pushing to the trunk which incorporates these changes. Alex --- gcc/ChangeLog: PR target/97252 * config/arm/arm-protos.h (neon_make_constant): Add generate argument to guard emitting insns, default to true. * config/arm/arm.c (arm_legitimate_constant_p_1): Reject CONST_VECTORs which neon_make_constant can't handle. (neon_vdup_constant): Add generate argument, avoid emitting insns if it's not set. (neon_make_constant): Plumb new generate argument through. * config/arm/constraints.md (Ui): New. Use it... * config/arm/mve.md (*mve_mov<mode>): ... here. * config/arm/vec-common.md (movv8hf): Use neon_make_constant to synthesize constants. > Thanks, > Kyrill > > { > machine_mode mode = GET_MODE (vals); > machine_mode inner_mode = GET_MODE_INNER (mode); > @@ -13046,6 +13049,9 @@ neon_vdup_constant (rtx vals) > vdup.i16). */ > return NULL_RTX; > > + if (check_only_p) > + return x; > + > > > Thanks, > > Alex > > > > gcc/ChangeLog: > > > > PR target/97252 > > * config/arm/arm-protos.h (neon_make_constant): Add new > > check_only_p > > argument, default to false. > > * config/arm/arm.c (arm_legitimate_constant_p_1): Reject > > COST_VECTORs > > which neon_make_constant can't handle. > > (neon_vdup_constant): Add check_only_p, avoid emitting insns if it's > > set. > > (neon_make_constant): Plumb new check_only_p argument through. > > * config/arm/constraints.md (Ui): New. Use it... > > * config/arm/mve.md (*mve_mov<mode>): ... here. > > * config/arm/vec-common.md (movv8hf): Use neon_make_constant > > to > > synthesize constants.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index bb5d3a2b133..952a8256a19 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -101,7 +101,7 @@ extern char *neon_output_shift_immediate (const char *, char, rtx *, machine_mode, int, bool); extern void neon_pairwise_reduce (rtx, rtx, machine_mode, rtx (*) (rtx, rtx, rtx)); -extern rtx neon_make_constant (rtx); +extern rtx neon_make_constant (rtx, bool generate = true); extern tree arm_builtin_vectorized_function (unsigned int, tree, tree); extern void neon_expand_vector_init (rtx, rtx); extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 49635bc2d86..e89f5e24d3b 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -9461,6 +9461,9 @@ arm_tls_referenced_p (rtx x) static bool arm_legitimate_constant_p_1 (machine_mode, rtx x) { + if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false)) + return false; + return flag_pic || !label_mentioned_p (x); } @@ -13025,12 +13028,14 @@ neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode, } } -/* If VALS is a vector constant that can be loaded into a register - using VDUP, generate instructions to do so and return an RTX to - assign to the register. Otherwise return NULL_RTX. */ +/* Return a non-NULL RTX iff VALS is a vector constant that can be + loaded into a register using VDUP. + + If this is the case, and GENERATE is set, we also generate + instructions to do this and return an RTX to assign to the register. */ static rtx -neon_vdup_constant (rtx vals) +neon_vdup_constant (rtx vals, bool generate) { machine_mode mode = GET_MODE (vals); machine_mode inner_mode = GET_MODE_INNER (mode); @@ -13046,6 +13051,9 @@ neon_vdup_constant (rtx vals) vdup.i16). */ return NULL_RTX; + if (!generate) + return x; + /* We can load this constant by using VDUP and a constant in a single ARM register. This will be cheaper than a vector load. */ @@ -13054,13 +13062,15 @@ neon_vdup_constant (rtx vals) return gen_vec_duplicate (mode, x); } -/* Generate code to load VALS, which is a PARALLEL containing only - constants (for vec_init) or CONST_VECTOR, efficiently into a - register. Returns an RTX to copy into the register, or NULL_RTX - for a PARALLEL that cannot be converted into a CONST_VECTOR. */ +/* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only + constants (for vec_init) or CONST_VECTOR, can be effeciently loaded + into a register. + + If this is the case, and GENERATE is set, we also generate code to do + this and return an RTX to copy into the register. */ rtx -neon_make_constant (rtx vals) +neon_make_constant (rtx vals, bool generate) { machine_mode mode = GET_MODE (vals); rtx target; @@ -13092,7 +13102,7 @@ neon_make_constant (rtx vals) && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL)) /* Load using VMOV. On Cortex-A8 this takes one cycle. */ return const_vec; - else if ((target = neon_vdup_constant (vals)) != NULL_RTX) + else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX) /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON pipeline cycle; creating the constant takes one or two ARM pipeline cycles. */ @@ -13102,7 +13112,7 @@ neon_make_constant (rtx vals) (for either double or quad vectors). We cannot take advantage of single-cycle VLD1 because we need a PC-relative addressing mode. */ - return const_vec; + return arm_disable_literal_pool ? NULL_RTX : const_vec; else /* A PARALLEL containing something not valid inside CONST_VECTOR. We cannot construct an initializer. */ diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 919f2995f1b..46c1eb57800 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -498,6 +498,13 @@ (define_memory_constraint "Ux" && mve_vector_mem_operand (GET_MODE (op), XEXP (op, 0), true)"))) +(define_constraint "Ui" + "@internal + Match a constant (as per the 'i' constraint) provided that we have the + literal pool available. This is useful for load insns that would need + to move such constants to the literal pool after RA." + (match_test "!arm_disable_literal_pool && satisfies_constraint_i (op)")) + (define_memory_constraint "Uq" "@internal In ARM state an address valid in ldrsb instructions." diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index ec0ef7b8f71..440fd6adbac 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -19,7 +19,7 @@ (define_insn "*mve_mov<mode>" [(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Ux,w") - (match_operand:MVE_types 1 "general_operand" "w,r,w,Dn,Uxi,r,Dm,w,Ul"))] + (match_operand:MVE_types 1 "general_operand" "w,r,w,Dn,UxUi,r,Dm,w,Ul"))] "TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT" { if (which_alternative == 3 || which_alternative == 6) diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 345ada05523..ebf9e449b61 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -74,6 +74,11 @@ (define_expand "movv8hf" { if (!REG_P (operands[0])) operands[1] = force_reg (E_V8HFmode, operands[1]); + else if (TARGET_HAVE_MVE_FLOAT && CONSTANT_P (operands[1])) + { + operands[1] = neon_make_constant (operands[1]); + gcc_assert (operands[1] != NULL_RTX); + } } })