On 27/06/2024 17:25, Wilco Dijkstra wrote:
> Hi Richard,
>
>> The Linaro CI is reporting an ICE while building libgfortran with this
>> change.
>
> So it looks like Thumb-2 oddly enough restricts the negative range of DFmode
> eventhough that is unnecessary and inefficient. The easiest workaround turned
> out to avoid using checked adjust_address.
It is necessary because DFmode has constraints such as "r" (GENERAL_REGS) and
"m" in a load, so the range needs to be legitimate here as well as when a
VFP_REG is used.
It might be possible to fix this, but it would involve a lot of rewriting the
code to get it right.
But you're right that we don't need to validate this change: we just assume
it's right anyway and if it were wrong something bigger has already gone wrong
earlier and we'd end up with invalid offsets in the assembly code.
OK.
R.
>
> Cheers,
> Wilco
>
>
> v3: Use adjust_address_nv with DFmode.
>
> The valid offset range of LDRD in arm_legitimate_index_p is increased to
> -1024..1020 if NEON is enabled since VALID_NEON_DREG_MODE includes DImode.
> Fix this by moving the LDRD check earlier.
>
> Passes bootstrap & regress, OK for commit and backport to GCC14.2?
>
> gcc:
> PR target/115153
> * config/arm/arm.cc (arm_legitimate_index_p): Move LDRD case before
> NEON.
> (thumb2_legitimate_index_p): Update comments.
> (output_move_neon): Use DFmode for vldr/vstr and non-checking
> adjust_address.
>
> gcc/testsuite:
> PR target/115153
> * gcc.target/arm/pr115153.c: Add new test.
> * lib/target-supports.exp: Add arm_arch_v7ve_neon target support.
>
> ---
>
> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> index
> 7d67d2cfee9f4edc91f187e940be40c07ff726cd..5e6608a30f17bf8185464e3fd0b202a71ff83fc8
> 100644
> --- a/gcc/config/arm/arm.cc
> +++ b/gcc/config/arm/arm.cc
> @@ -8858,6 +8858,28 @@ arm_legitimate_index_p (machine_mode mode, rtx index,
> RTX_CODE outer,
> && INTVAL (index) > -1024
> && (INTVAL (index) & 3) == 0);
>
> + if (arm_address_register_rtx_p (index, strict_p)
> + && (GET_MODE_SIZE (mode) <= 4))
> + return 1;
> +
> + /* This handles DFmode only if !TARGET_HARD_FLOAT. */
> + if (mode == DImode || mode == DFmode)
> + {
> + if (code == CONST_INT)
> + {
> + HOST_WIDE_INT val = INTVAL (index);
> +
> + /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
> + If vldr is selected it uses arm_coproc_mem_operand. */
> + if (TARGET_LDRD)
> + return val > -256 && val < 256;
> + else
> + return val > -4096 && val < 4092;
> + }
> +
> + return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
> + }
> +
> /* For quad modes, we restrict the constant offset to be slightly less
> than what the instruction format permits. We do this because for
> quad mode moves, we will actually decompose them into two separate
> @@ -8870,7 +8892,7 @@ arm_legitimate_index_p (machine_mode mode, rtx index,
> RTX_CODE outer,
> && (INTVAL (index) & 3) == 0);
>
> /* We have no such constraint on double mode offsets, so we permit the
> - full range of the instruction format. */
> + full range of the instruction format. Note DImode is included here. */
> if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
> return (code == CONST_INT
> && INTVAL (index) < 1024
> @@ -8883,27 +8905,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index,
> RTX_CODE outer,
> && INTVAL (index) > -1024
> && (INTVAL (index) & 3) == 0);
>
> - if (arm_address_register_rtx_p (index, strict_p)
> - && (GET_MODE_SIZE (mode) <= 4))
> - return 1;
> -
> - if (mode == DImode || mode == DFmode)
> - {
> - if (code == CONST_INT)
> - {
> - HOST_WIDE_INT val = INTVAL (index);
> -
> - /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
> - If vldr is selected it uses arm_coproc_mem_operand. */
> - if (TARGET_LDRD)
> - return val > -256 && val < 256;
> - else
> - return val > -4096 && val < 4092;
> - }
> -
> - return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
> - }
> -
> if (GET_MODE_SIZE (mode) <= 4
> && ! (arm_arch4
> && (mode == HImode
> @@ -9006,7 +9007,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx
> index, int strict_p)
> && (INTVAL (index) & 3) == 0);
>
> /* We have no such constraint on double mode offsets, so we permit the
> - full range of the instruction format. */
> + full range of the instruction format. Note DImode is included here. */
> if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
> return (code == CONST_INT
> && INTVAL (index) < 1024
> @@ -9017,6 +9018,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx
> index, int strict_p)
> && (GET_MODE_SIZE (mode) <= 4))
> return 1;
>
> + /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE. */
> if (mode == DImode || mode == DFmode)
> {
> if (code == CONST_INT)
> @@ -20865,10 +20867,9 @@ output_move_neon (rtx *operands)
> int overlap = -1;
> for (i = 0; i < nregs; i++)
> {
> - /* We're only using DImode here because it's a convenient
> - size. */
> - ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
> - ops[1] = adjust_address (mem, DImode, 8 * i);
> + /* Use DFmode for vldr/vstr. */
> + ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * i);
> + ops[1] = adjust_address_nv (mem, DFmode, 8 * i);
> if (reg_overlap_mentioned_p (ops[0], mem))
> {
> gcc_assert (overlap == -1);
> @@ -20885,8 +20886,8 @@ output_move_neon (rtx *operands)
> }
> if (overlap != -1)
> {
> - ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
> - ops[1] = adjust_address (mem, SImode, 8 * overlap);
> + ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * overlap);
> + ops[1] = adjust_address_nv (mem, DFmode, 8 * overlap);
> if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
> sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
> else
> diff --git a/gcc/testsuite/gcc.target/arm/pr115153.c
> b/gcc/testsuite/gcc.target/arm/pr115153.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..80b57acf87ec667123146873afab1cd4a581e7f5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/pr115153.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -marm" } */
> +/* { dg-require-effective-target arm_arch_v7ve_neon_ok } */
> +/* { dg-add-options arm_arch_v7ve_neon } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** f1:
> +** add r0, r0, #256
> +** ldrd r0, r1, \[r0\]
> +** bx lr
> +*/
> +long long f1 (long long *p)
> +{
> + return __atomic_load_n (p + 32, __ATOMIC_RELAXED);
> +}
> diff --git a/gcc/testsuite/lib/target-supports.exp
> b/gcc/testsuite/lib/target-supports.exp
> index
> ed30cd18ad6935b4d19f573285197cd71cf743bb..500329e0fb78a83d6cd8a4b1e0db4fd61350f17f
> 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -5564,6 +5564,8 @@ foreach { armfunc armflag armdefs } {
> v7em "-march=armv7e-m+fp -mthumb" __ARM_ARCH_7EM__
> v7ve "-march=armv7ve+fp -marm"
> "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV"
> + v7ve_neon "-march=armv7ve+simd -mfpu=auto -mfloat-abi=softfp"
> + "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV && __ARM_NEON__"
> v8a "-march=armv8-a+simd" __ARM_ARCH_8A__
> v8a_hard "-march=armv8-a+simd -mfpu=auto -mfloat-abi=hard"
> __ARM_ARCH_8A__
> v8_1a "-march=armv8.1-a+simd" __ARM_ARCH_8A__
>