GCC 15 is going to be the first release to support FPMR. While working on a follow-up patch, I noticed that for:
(set (reg:DI R) ...) ... (set (reg:DI fpmr) (reg:DI R)) IRA would prefer to spill R to memory rather than allocate a GPR. This is because the register move cost for GENERAL_REGS to MOVEABLE_SYSREGS is very high: /* Moves to/from sysregs are expensive, and must go via GPR. */ if (from == MOVEABLE_SYSREGS) return 80 + aarch64_register_move_cost (mode, GENERAL_REGS, to); if (to == MOVEABLE_SYSREGS) return 80 + aarch64_register_move_cost (mode, from, GENERAL_REGS); but the memory cost for MOVEABLE_SYSREGS was the same as for GENERAL_REGS, making memory much cheaper. Loading and storing FPMR involves a GPR temporary, so the cost should account for moving into and out of that temporary. This did show up indirectly in some of the existing asm tests, where the stack frame allocated 16 bytes for callee saves (D8) and another 16 bytes for spilling a temporary register. It's possible that other registers need the same treatment and it's more than probable that this code needs a rework. None of that seems suitable for stage 4 though. Tested on aarch64-linux-gnu. I'll push in about 24 hours if there are no comments before then. Richard gcc/ * config/aarch64/aarch64.cc (aarch64_memory_move_cost): Account for the cost of moving in and out of GENERAL_SYSREGS. gcc/testsuite/ * gcc.target/aarch64/acle/fpmr-5.c: New test. * gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c: Don't expect a spill slot to be allocated. * gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c: Likewise. --- gcc/config/aarch64/aarch64.cc | 11 +++++++++-- gcc/testsuite/gcc.target/aarch64/acle/fpmr-5.c | 16 ++++++++++++++++ .../aarch64/sve2/acle/asm/dot_lane_mf8.c | 4 ++-- .../aarch64/sve2/acle/asm/mlalb_lane_mf8.c | 2 +- .../aarch64/sve2/acle/asm/mlallbb_lane_mf8.c | 2 +- .../aarch64/sve2/acle/asm/mlallbt_lane_mf8.c | 2 +- .../aarch64/sve2/acle/asm/mlalltb_lane_mf8.c | 2 +- .../aarch64/sve2/acle/asm/mlalltt_lane_mf8.c | 2 +- .../aarch64/sve2/acle/asm/mlalt_lane_mf8.c | 2 +- 9 files changed, 33 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/fpmr-5.c diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index dba779a8e51..a1f5619a615 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -15858,9 +15858,16 @@ aarch64_memory_move_cost (machine_mode mode, reg_class_t rclass_i, bool in) ? aarch64_tune_params.memmov_cost.load_fp : aarch64_tune_params.memmov_cost.store_fp); + /* If the move needs to go through GPRs, add the cost of doing that. */ + int base = 0; + if (rclass_i == MOVEABLE_SYSREGS) + base += (in + ? aarch64_register_move_cost (DImode, GENERAL_REGS, rclass_i) + : aarch64_register_move_cost (DImode, rclass_i, GENERAL_REGS)); + return (in - ? aarch64_tune_params.memmov_cost.load_int - : aarch64_tune_params.memmov_cost.store_int); + ? base + aarch64_tune_params.memmov_cost.load_int + : base + aarch64_tune_params.memmov_cost.store_int); } /* Implement TARGET_INSN_COST. We have the opportunity to do something diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fpmr-5.c b/gcc/testsuite/gcc.target/aarch64/acle/fpmr-5.c new file mode 100644 index 00000000000..da6d7f62f90 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/fpmr-5.c @@ -0,0 +1,16 @@ +/* { dg-options "-O" } */ + +#include <stdint.h> + +void f(int cond) +{ + uint64_t x; + asm volatile ("" : "=r" (x)); + if (cond) + { + register uint64_t fpmr asm ("fpmr") = x; + asm volatile ("" :: "Umv" (fpmr)); + } +} + +/* { dg-final { scan-assembler-not {\tsub\tsp,} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c index 9e54cd11c4b..83fe5cff5d3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c @@ -70,7 +70,7 @@ TEST_DUAL_Z (dot_lane_1_f16, svfloat16_t, svmfloat8_t, ** msr fpmr, x0 ** mov (z[0-7])\.d, z8\.d ** fdot z0\.h, z1\.b, \1\.b\[1\] -** ldr d8, \[sp\], 32 +** ldr d8, \[sp\], 16 ** ret */ TEST_DUAL_LANE_REG (dot_lane_z8_f16, svfloat16_t, svmfloat8_t, z8, @@ -151,7 +151,7 @@ TEST_DUAL_Z (dot_lane_1_f32, svfloat32_t, svmfloat8_t, ** msr fpmr, x0 ** mov (z[0-7])\.d, z8\.d ** fdot z0\.s, z1\.b, \1\.b\[1\] -** ldr d8, \[sp\], 32 +** ldr d8, \[sp\], 16 ** ret */ TEST_DUAL_LANE_REG (dot_lane_z8_f32, svfloat32_t, svmfloat8_t, z8, diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c index e7af1b6dcc6..39a272f60e0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c @@ -70,7 +70,7 @@ TEST_DUAL_Z (mlalb_lane_1_f16, svfloat16_t, svmfloat8_t, ** msr fpmr, x0 ** mov (z[0-7])\.d, z8\.d ** fmlalb z0\.h, z1\.b, \1\.b\[1\] -** ldr d8, \[sp\], 32 +** ldr d8, \[sp\], 16 ** ret */ TEST_DUAL_LANE_REG (mlalb_lane_z8_f16, svfloat16_t, svmfloat8_t, z8, diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c index 07a529d8dc9..2d084515c85 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c @@ -70,7 +70,7 @@ TEST_DUAL_Z (mlallbb_lane_1_f32, svfloat32_t, svmfloat8_t, ** msr fpmr, x0 ** mov (z[0-7])\.d, z8\.d ** fmlallbb z0\.s, z1\.b, \1\.b\[1\] -** ldr d8, \[sp\], 32 +** ldr d8, \[sp\], 16 ** ret */ TEST_DUAL_LANE_REG (mlallbb_lane_z8_f32, svfloat32_t, svmfloat8_t, z8, diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c index 9da29fbfb0b..6dadbcf2e17 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c @@ -70,7 +70,7 @@ TEST_DUAL_Z (mlallbt_lane_1_f32, svfloat32_t, svmfloat8_t, ** msr fpmr, x0 ** mov (z[0-7])\.d, z8\.d ** fmlallbt z0\.s, z1\.b, \1\.b\[1\] -** ldr d8, \[sp\], 32 +** ldr d8, \[sp\], 16 ** ret */ TEST_DUAL_LANE_REG (mlallbt_lane_z8_f32, svfloat32_t, svmfloat8_t, z8, diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c index cbe297c188b..6b9e06c7550 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c @@ -70,7 +70,7 @@ TEST_DUAL_Z (mlalltb_lane_1_f32, svfloat32_t, svmfloat8_t, ** msr fpmr, x0 ** mov (z[0-7])\.d, z8\.d ** fmlalltb z0\.s, z1\.b, \1\.b\[1\] -** ldr d8, \[sp\], 32 +** ldr d8, \[sp\], 16 ** ret */ TEST_DUAL_LANE_REG (mlalltb_lane_z8_f32, svfloat32_t, svmfloat8_t, z8, diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c index fc5bfba7877..f33c4fe7965 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c @@ -70,7 +70,7 @@ TEST_DUAL_Z (mlalltt_lane_1_f32, svfloat32_t, svmfloat8_t, ** msr fpmr, x0 ** mov (z[0-7])\.d, z8\.d ** fmlalltt z0\.s, z1\.b, \1\.b\[1\] -** ldr d8, \[sp\], 32 +** ldr d8, \[sp\], 16 ** ret */ TEST_DUAL_LANE_REG (mlalltt_lane_z8_f32, svfloat32_t, svmfloat8_t, z8, diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c index 4f5a1045420..604d3325d60 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c @@ -70,7 +70,7 @@ TEST_DUAL_Z (mlalt_lane_1_f16, svfloat16_t, svmfloat8_t, ** msr fpmr, x0 ** mov (z[0-7])\.d, z8\.d ** fmlalt z0\.h, z1\.b, \1\.b\[1\] -** ldr d8, \[sp\], 32 +** ldr d8, \[sp\], 16 ** ret */ TEST_DUAL_LANE_REG (mlalt_lane_z8_f16, svfloat16_t, svmfloat8_t, z8, -- 2.25.1