Re: [PATCH v3] Arm: Fix ldrd offset range [PR115153]

Richard Earnshaw (lists) Fri, 05 Jul 2024 09:14:34 -0700

On 27/06/2024 17:25, Wilco Dijkstra wrote:
> Hi Richard,
> 
>> The Linaro CI is reporting an ICE while building libgfortran with this 
>> change.
> 
> So it looks like Thumb-2 oddly enough restricts the negative range of DFmode
> eventhough that is unnecessary and inefficient. The easiest workaround turned
> out to avoid using checked adjust_address.


It is necessary because DFmode has constraints such as "r" (GENERAL_REGS) and 
"m" in a load, so the range needs to be legitimate here as well as when a 
VFP_REG is used.

It might be possible to fix this, but it would involve a lot of rewriting the 
code to get it right.

But you're right that we don't need to validate this change: we just assume 
it's right anyway and if it were wrong something bigger has already gone wrong 
earlier and we'd end up with invalid offsets in the assembly code.

OK.

R.

> 
> Cheers,
> Wilco
> 
> 
> v3: Use adjust_address_nv with DFmode.
> 
> The valid offset range of LDRD in arm_legitimate_index_p is increased to
> -1024..1020 if NEON is enabled since VALID_NEON_DREG_MODE includes DImode.
> Fix this by moving the LDRD check earlier.
> 
> Passes bootstrap & regress, OK for commit and backport to GCC14.2?
> 
> gcc:
>         PR target/115153
>         * config/arm/arm.cc (arm_legitimate_index_p): Move LDRD case before 
> NEON.
>         (thumb2_legitimate_index_p): Update comments.
>         (output_move_neon): Use DFmode for vldr/vstr and non-checking 
> adjust_address.
> 
> gcc/testsuite:
>         PR target/115153
>         * gcc.target/arm/pr115153.c: Add new test.
>         * lib/target-supports.exp: Add arm_arch_v7ve_neon target support.
> 
> ---
> 
> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> index 
> 7d67d2cfee9f4edc91f187e940be40c07ff726cd..5e6608a30f17bf8185464e3fd0b202a71ff83fc8
>  100644
> --- a/gcc/config/arm/arm.cc
> +++ b/gcc/config/arm/arm.cc
> @@ -8858,6 +8858,28 @@ arm_legitimate_index_p (machine_mode mode, rtx index, 
> RTX_CODE outer,
>           && INTVAL (index) > -1024
>           && (INTVAL (index) & 3) == 0);
>  
> +  if (arm_address_register_rtx_p (index, strict_p)
> +      && (GET_MODE_SIZE (mode) <= 4))
> +    return 1;
> +
> +  /* This handles DFmode only if !TARGET_HARD_FLOAT.  */
> +  if (mode == DImode || mode == DFmode)
> +    {
> +      if (code == CONST_INT)
> +     {
> +       HOST_WIDE_INT val = INTVAL (index);
> +
> +       /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
> +          If vldr is selected it uses arm_coproc_mem_operand.  */
> +       if (TARGET_LDRD)
> +         return val > -256 && val < 256;
> +       else
> +         return val > -4096 && val < 4092;
> +     }
> +
> +      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
> +    }
> +
>    /* For quad modes, we restrict the constant offset to be slightly less
>       than what the instruction format permits.  We do this because for
>       quad mode moves, we will actually decompose them into two separate
> @@ -8870,7 +8892,7 @@ arm_legitimate_index_p (machine_mode mode, rtx index, 
> RTX_CODE outer,
>           && (INTVAL (index) & 3) == 0);
>  
>    /* We have no such constraint on double mode offsets, so we permit the
> -     full range of the instruction format.  */
> +     full range of the instruction format.  Note DImode is included here.  */
>    if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
>      return (code == CONST_INT
>           && INTVAL (index) < 1024
> @@ -8883,27 +8905,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, 
> RTX_CODE outer,
>           && INTVAL (index) > -1024
>           && (INTVAL (index) & 3) == 0);
>  
> -  if (arm_address_register_rtx_p (index, strict_p)
> -      && (GET_MODE_SIZE (mode) <= 4))
> -    return 1;
> -
> -  if (mode == DImode || mode == DFmode)
> -    {
> -      if (code == CONST_INT)
> -     {
> -       HOST_WIDE_INT val = INTVAL (index);
> -
> -       /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
> -          If vldr is selected it uses arm_coproc_mem_operand.  */
> -       if (TARGET_LDRD)
> -         return val > -256 && val < 256;
> -       else
> -         return val > -4096 && val < 4092;
> -     }
> -
> -      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
> -    }
> -
>    if (GET_MODE_SIZE (mode) <= 4
>        && ! (arm_arch4
>           && (mode == HImode
> @@ -9006,7 +9007,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx 
> index, int strict_p)
>           && (INTVAL (index) & 3) == 0);
>  
>    /* We have no such constraint on double mode offsets, so we permit the
> -     full range of the instruction format.  */
> +     full range of the instruction format.  Note DImode is included here.  */
>    if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
>      return (code == CONST_INT
>           && INTVAL (index) < 1024
> @@ -9017,6 +9018,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx 
> index, int strict_p)
>        && (GET_MODE_SIZE (mode) <= 4))
>      return 1;
>  
> +  /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE.  */
>    if (mode == DImode || mode == DFmode)
>      {
>        if (code == CONST_INT)
> @@ -20865,10 +20867,9 @@ output_move_neon (rtx *operands)
>       int overlap = -1;
>       for (i = 0; i < nregs; i++)
>         {
> -         /* We're only using DImode here because it's a convenient
> -            size.  */
> -         ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
> -         ops[1] = adjust_address (mem, DImode, 8 * i);
> +         /* Use DFmode for vldr/vstr.  */
> +         ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * i);
> +         ops[1] = adjust_address_nv (mem, DFmode, 8 * i);
>           if (reg_overlap_mentioned_p (ops[0], mem))
>             {
>               gcc_assert (overlap == -1);
> @@ -20885,8 +20886,8 @@ output_move_neon (rtx *operands)
>         }
>       if (overlap != -1)
>         {
> -         ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
> -         ops[1] = adjust_address (mem, SImode, 8 * overlap);
> +         ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * overlap);
> +         ops[1] = adjust_address_nv (mem, DFmode, 8 * overlap);
>           if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
>             sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
>           else
> diff --git a/gcc/testsuite/gcc.target/arm/pr115153.c 
> b/gcc/testsuite/gcc.target/arm/pr115153.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..80b57acf87ec667123146873afab1cd4a581e7f5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/pr115153.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -marm" } */
> +/* { dg-require-effective-target arm_arch_v7ve_neon_ok } */
> +/* { dg-add-options arm_arch_v7ve_neon } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** f1:
> +**   add     r0, r0, #256
> +**   ldrd    r0, r1, \[r0\]
> +**   bx      lr
> +*/
> +long long f1 (long long *p)
> +{
> +  return __atomic_load_n (p + 32, __ATOMIC_RELAXED);
> +}
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index 
> ed30cd18ad6935b4d19f573285197cd71cf743bb..500329e0fb78a83d6cd8a4b1e0db4fd61350f17f
>  100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -5564,6 +5564,8 @@ foreach { armfunc armflag armdefs } {
>       v7em "-march=armv7e-m+fp -mthumb" __ARM_ARCH_7EM__
>       v7ve "-march=armv7ve+fp -marm"
>               "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV"
> +     v7ve_neon "-march=armv7ve+simd -mfpu=auto -mfloat-abi=softfp"
> +               "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV && __ARM_NEON__"
>       v8a "-march=armv8-a+simd" __ARM_ARCH_8A__
>       v8a_hard "-march=armv8-a+simd -mfpu=auto -mfloat-abi=hard" 
> __ARM_ARCH_8A__
>       v8_1a "-march=armv8.1-a+simd" __ARM_ARCH_8A__
>

Re: [PATCH v3] Arm: Fix ldrd offset range [PR115153]

Reply via email to