On 11/06/2024 17:42, Wilco Dijkstra wrote: > v2: use a new arm_arch_v7ve_neon, fix use of DImode in output_move_neon > > The valid offset range of LDRD in arm_legitimate_index_p is increased to > -1024..1020 if NEON is enabled since VALID_NEON_DREG_MODE includes DImode. > Fix this by moving the LDRD check earlier. > > Passes bootstrap & regress, OK for commit? > > gcc: > PR target/115153 > * config/arm/arm.cc (arm_legitimate_index_p): Move LDRD case before > NEON. > (thumb2_legitimate_index_p): Update comments. > (output_move_neon): Use DFmode for vldr/vstr. > * lib/target-supports.exp: Add arm_arch_v7ve_neon target support. > > gcc/testsuite: > PR target/11515> * gcc.target/arm/pr115153.c: Add new test.
The Linaro CI is reporting an ICE while building libgfortran with this change. # 00:14:58 /home/tcwg-build/workspace/tcwg_gnu_3/abe/snapshots/gcc.git~master/libgfortran/generated/matmul_i1.c:3006:1: internal compiler error: in change_address_1, at emit-rtl.cc:2299 # 00:14:58 make[3]: *** [Makefile:4262: generated/matmul_i1.lo] Error 1 # 00:14:58 make[2]: *** [Makefile:1861: all] Error 2 # 00:14:58 make[1]: *** [Makefile:15767: all-target-libgfortran] Error 2 # 00:14:58 make: *** [Makefile:1065: all] Error 2 Could you investigate please? R. > > --- > > diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc > index > ea0c963a4d67ecd70e1571624e84dfe46d757df9..7dec0254f5a953050c9c52aa297fad7f3dfb6c74 > 100644 > --- a/gcc/config/arm/arm.cc > +++ b/gcc/config/arm/arm.cc > @@ -8852,6 +8852,28 @@ arm_legitimate_index_p (machine_mode mode, rtx index, > RTX_CODE outer, > && INTVAL (index) > -1024 > && (INTVAL (index) & 3) == 0); > > + if (arm_address_register_rtx_p (index, strict_p) > + && (GET_MODE_SIZE (mode) <= 4)) > + return 1; > + > + /* This handles DFmode only if !TARGET_HARD_FLOAT. */ > + if (mode == DImode || mode == DFmode) > + { > + if (code == CONST_INT) > + { > + HOST_WIDE_INT val = INTVAL (index); > + > + /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD. > + If vldr is selected it uses arm_coproc_mem_operand. */ > + if (TARGET_LDRD) > + return val > -256 && val < 256; > + else > + return val > -4096 && val < 4092; > + } > + > + return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p); > + } > + > /* For quad modes, we restrict the constant offset to be slightly less > than what the instruction format permits. We do this because for > quad mode moves, we will actually decompose them into two separate > @@ -8864,7 +8886,7 @@ arm_legitimate_index_p (machine_mode mode, rtx index, > RTX_CODE outer, > && (INTVAL (index) & 3) == 0); > > /* We have no such constraint on double mode offsets, so we permit the > - full range of the instruction format. */ > + full range of the instruction format. Note DImode is included here. */ > if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) > return (code == CONST_INT > && INTVAL (index) < 1024 > @@ -8877,27 +8899,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, > RTX_CODE outer, > && INTVAL (index) > -1024 > && (INTVAL (index) & 3) == 0); > > - if (arm_address_register_rtx_p (index, strict_p) > - && (GET_MODE_SIZE (mode) <= 4)) > - return 1; > - > - if (mode == DImode || mode == DFmode) > - { > - if (code == CONST_INT) > - { > - HOST_WIDE_INT val = INTVAL (index); > - > - /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD. > - If vldr is selected it uses arm_coproc_mem_operand. */ > - if (TARGET_LDRD) > - return val > -256 && val < 256; > - else > - return val > -4096 && val < 4092; > - } > - > - return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p); > - } > - > if (GET_MODE_SIZE (mode) <= 4 > && ! (arm_arch4 > && (mode == HImode > @@ -9000,7 +9001,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx > index, int strict_p) > && (INTVAL (index) & 3) == 0); > > /* We have no such constraint on double mode offsets, so we permit the > - full range of the instruction format. */ > + full range of the instruction format. Note DImode is included here. */ > if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) > return (code == CONST_INT > && INTVAL (index) < 1024 > @@ -9011,6 +9012,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx > index, int strict_p) > && (GET_MODE_SIZE (mode) <= 4)) > return 1; > > + /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE. */ > if (mode == DImode || mode == DFmode) > { > if (code == CONST_INT) > @@ -20854,7 +20856,7 @@ output_move_neon (rtx *operands) > /* We're only using DImode here because it's a convenient > size. */ > ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i); > - ops[1] = adjust_address (mem, DImode, 8 * i); > + ops[1] = adjust_address (mem, DFmode, 8 * i); > if (reg_overlap_mentioned_p (ops[0], mem)) > { > gcc_assert (overlap == -1); > @@ -20872,7 +20874,7 @@ output_move_neon (rtx *operands) > if (overlap != -1) > { > ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap); > - ops[1] = adjust_address (mem, SImode, 8 * overlap); > + ops[1] = adjust_address (mem, DFmode, 8 * overlap); > if (TARGET_HAVE_MVE && LABEL_REF_P (addr)) > sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st"); > else > diff --git a/gcc/testsuite/gcc.target/arm/pr115153.c > b/gcc/testsuite/gcc.target/arm/pr115153.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..80b57acf87ec667123146873afab1cd4a581e7f5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/pr115153.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -marm" } */ > +/* { dg-require-effective-target arm_arch_v7ve_neon_ok } */ > +/* { dg-add-options arm_arch_v7ve_neon } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +/* > +** f1: > +** add r0, r0, #256 > +** ldrd r0, r1, \[r0\] > +** bx lr > +*/ > +long long f1 (long long *p) > +{ > + return __atomic_load_n (p + 32, __ATOMIC_RELAXED); > +} > diff --git a/gcc/testsuite/lib/target-supports.exp > b/gcc/testsuite/lib/target-supports.exp > index > 5c0a3dade222777402d2a44e63cb5be305341471..20ea318bba65ff7685d1378bb898a15cb6c4f00b > 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -5518,6 +5518,8 @@ foreach { armfunc armflag armdefs } { > v7em "-march=armv7e-m+fp -mthumb" __ARM_ARCH_7EM__ > v7ve "-march=armv7ve+fp -marm" > "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV" > + v7ve_neon "-march=armv7ve+simd -mfpu=auto -mfloat-abi=softfp" > + "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV && __ARM_NEON__" > v8a "-march=armv8-a+simd" __ARM_ARCH_8A__ > v8a_hard "-march=armv8-a+simd -mfpu=auto -mfloat-abi=hard" > __ARM_ARCH_8A__ > v8_1a "-march=armv8.1-a+simd" __ARM_ARCH_8A__ >