On 27/06/2024 17:25, Wilco Dijkstra wrote: > Hi Richard, > >> The Linaro CI is reporting an ICE while building libgfortran with this >> change. > > So it looks like Thumb-2 oddly enough restricts the negative range of DFmode > eventhough that is unnecessary and inefficient. The easiest workaround turned > out to avoid using checked adjust_address.
It is necessary because DFmode has constraints such as "r" (GENERAL_REGS) and "m" in a load, so the range needs to be legitimate here as well as when a VFP_REG is used. It might be possible to fix this, but it would involve a lot of rewriting the code to get it right. But you're right that we don't need to validate this change: we just assume it's right anyway and if it were wrong something bigger has already gone wrong earlier and we'd end up with invalid offsets in the assembly code. OK. R. > > Cheers, > Wilco > > > v3: Use adjust_address_nv with DFmode. > > The valid offset range of LDRD in arm_legitimate_index_p is increased to > -1024..1020 if NEON is enabled since VALID_NEON_DREG_MODE includes DImode. > Fix this by moving the LDRD check earlier. > > Passes bootstrap & regress, OK for commit and backport to GCC14.2? > > gcc: > PR target/115153 > * config/arm/arm.cc (arm_legitimate_index_p): Move LDRD case before > NEON. > (thumb2_legitimate_index_p): Update comments. > (output_move_neon): Use DFmode for vldr/vstr and non-checking > adjust_address. > > gcc/testsuite: > PR target/115153 > * gcc.target/arm/pr115153.c: Add new test. > * lib/target-supports.exp: Add arm_arch_v7ve_neon target support. > > --- > > diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc > index > 7d67d2cfee9f4edc91f187e940be40c07ff726cd..5e6608a30f17bf8185464e3fd0b202a71ff83fc8 > 100644 > --- a/gcc/config/arm/arm.cc > +++ b/gcc/config/arm/arm.cc > @@ -8858,6 +8858,28 @@ arm_legitimate_index_p (machine_mode mode, rtx index, > RTX_CODE outer, > && INTVAL (index) > -1024 > && (INTVAL (index) & 3) == 0); > > + if (arm_address_register_rtx_p (index, strict_p) > + && (GET_MODE_SIZE (mode) <= 4)) > + return 1; > + > + /* This handles DFmode only if !TARGET_HARD_FLOAT. */ > + if (mode == DImode || mode == DFmode) > + { > + if (code == CONST_INT) > + { > + HOST_WIDE_INT val = INTVAL (index); > + > + /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD. > + If vldr is selected it uses arm_coproc_mem_operand. */ > + if (TARGET_LDRD) > + return val > -256 && val < 256; > + else > + return val > -4096 && val < 4092; > + } > + > + return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p); > + } > + > /* For quad modes, we restrict the constant offset to be slightly less > than what the instruction format permits. We do this because for > quad mode moves, we will actually decompose them into two separate > @@ -8870,7 +8892,7 @@ arm_legitimate_index_p (machine_mode mode, rtx index, > RTX_CODE outer, > && (INTVAL (index) & 3) == 0); > > /* We have no such constraint on double mode offsets, so we permit the > - full range of the instruction format. */ > + full range of the instruction format. Note DImode is included here. */ > if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) > return (code == CONST_INT > && INTVAL (index) < 1024 > @@ -8883,27 +8905,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, > RTX_CODE outer, > && INTVAL (index) > -1024 > && (INTVAL (index) & 3) == 0); > > - if (arm_address_register_rtx_p (index, strict_p) > - && (GET_MODE_SIZE (mode) <= 4)) > - return 1; > - > - if (mode == DImode || mode == DFmode) > - { > - if (code == CONST_INT) > - { > - HOST_WIDE_INT val = INTVAL (index); > - > - /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD. > - If vldr is selected it uses arm_coproc_mem_operand. */ > - if (TARGET_LDRD) > - return val > -256 && val < 256; > - else > - return val > -4096 && val < 4092; > - } > - > - return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p); > - } > - > if (GET_MODE_SIZE (mode) <= 4 > && ! (arm_arch4 > && (mode == HImode > @@ -9006,7 +9007,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx > index, int strict_p) > && (INTVAL (index) & 3) == 0); > > /* We have no such constraint on double mode offsets, so we permit the > - full range of the instruction format. */ > + full range of the instruction format. Note DImode is included here. */ > if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) > return (code == CONST_INT > && INTVAL (index) < 1024 > @@ -9017,6 +9018,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx > index, int strict_p) > && (GET_MODE_SIZE (mode) <= 4)) > return 1; > > + /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE. */ > if (mode == DImode || mode == DFmode) > { > if (code == CONST_INT) > @@ -20865,10 +20867,9 @@ output_move_neon (rtx *operands) > int overlap = -1; > for (i = 0; i < nregs; i++) > { > - /* We're only using DImode here because it's a convenient > - size. */ > - ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i); > - ops[1] = adjust_address (mem, DImode, 8 * i); > + /* Use DFmode for vldr/vstr. */ > + ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * i); > + ops[1] = adjust_address_nv (mem, DFmode, 8 * i); > if (reg_overlap_mentioned_p (ops[0], mem)) > { > gcc_assert (overlap == -1); > @@ -20885,8 +20886,8 @@ output_move_neon (rtx *operands) > } > if (overlap != -1) > { > - ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap); > - ops[1] = adjust_address (mem, SImode, 8 * overlap); > + ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * overlap); > + ops[1] = adjust_address_nv (mem, DFmode, 8 * overlap); > if (TARGET_HAVE_MVE && LABEL_REF_P (addr)) > sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st"); > else > diff --git a/gcc/testsuite/gcc.target/arm/pr115153.c > b/gcc/testsuite/gcc.target/arm/pr115153.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..80b57acf87ec667123146873afab1cd4a581e7f5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/pr115153.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -marm" } */ > +/* { dg-require-effective-target arm_arch_v7ve_neon_ok } */ > +/* { dg-add-options arm_arch_v7ve_neon } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +/* > +** f1: > +** add r0, r0, #256 > +** ldrd r0, r1, \[r0\] > +** bx lr > +*/ > +long long f1 (long long *p) > +{ > + return __atomic_load_n (p + 32, __ATOMIC_RELAXED); > +} > diff --git a/gcc/testsuite/lib/target-supports.exp > b/gcc/testsuite/lib/target-supports.exp > index > ed30cd18ad6935b4d19f573285197cd71cf743bb..500329e0fb78a83d6cd8a4b1e0db4fd61350f17f > 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -5564,6 +5564,8 @@ foreach { armfunc armflag armdefs } { > v7em "-march=armv7e-m+fp -mthumb" __ARM_ARCH_7EM__ > v7ve "-march=armv7ve+fp -marm" > "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV" > + v7ve_neon "-march=armv7ve+simd -mfpu=auto -mfloat-abi=softfp" > + "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV && __ARM_NEON__" > v8a "-march=armv8-a+simd" __ARM_ARCH_8A__ > v8a_hard "-march=armv8-a+simd -mfpu=auto -mfloat-abi=hard" > __ARM_ARCH_8A__ > v8_1a "-march=armv8.1-a+simd" __ARM_ARCH_8A__ >