https://gcc.gnu.org/g:ce6fc67da7f600b63985abeb39ba85440cbad549

commit r15-7160-gce6fc67da7f600b63985abeb39ba85440cbad549
Author: Richard Sandiford <richard.sandif...@arm.com>
Date:   Thu Jan 23 13:57:01 2025 +0000

    aarch64: Fix memory cost for FPM_REGNUM
    
    GCC 15 is going to be the first release to support FPMR.
    While working on a follow-up patch, I noticed that for:
    
        (set (reg:DI R) ...)
        ...
        (set (reg:DI fpmr) (reg:DI R))
    
    IRA would prefer to spill R to memory rather than allocate a GPR.
    This is because the register move cost for GENERAL_REGS to
    MOVEABLE_SYSREGS is very high:
    
      /* Moves to/from sysregs are expensive, and must go via GPR.  */
      if (from == MOVEABLE_SYSREGS)
        return 80 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
      if (to == MOVEABLE_SYSREGS)
        return 80 + aarch64_register_move_cost (mode, from, GENERAL_REGS);
    
    but the memory cost for MOVEABLE_SYSREGS was the same as for
    GENERAL_REGS, making memory much cheaper.
    
    Loading and storing FPMR involves a GPR temporary, so the cost should
    account for moving into and out of that temporary.
    
    This did show up indirectly in some of the existing asm tests,
    where the stack frame allocated 16 bytes for callee saves (D8)
    and another 16 bytes for spilling a temporary register.
    
    It's possible that other registers need the same treatment
    and it's more than probable that this code needs a rework.
    None of that seems suitable for stage 4 though.
    
    gcc/
            * config/aarch64/aarch64.cc (aarch64_memory_move_cost): Account
            for the cost of moving in and out of GENERAL_SYSREGS.
    
    gcc/testsuite/
            * gcc.target/aarch64/acle/fpmr-5.c: New test.
            * gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c: Don't expect
            a spill slot to be allocated.
            * gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64.cc                            | 11 +++++++++--
 gcc/testsuite/gcc.target/aarch64/acle/fpmr-5.c           | 16 ++++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c      |  4 ++--
 .../gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c    |  2 +-
 .../gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c  |  2 +-
 .../gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c  |  2 +-
 .../gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c  |  2 +-
 .../gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c  |  2 +-
 .../gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c    |  2 +-
 9 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index dba779a8e51e..a1f5619a6152 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -15858,9 +15858,16 @@ aarch64_memory_move_cost (machine_mode mode, 
reg_class_t rclass_i, bool in)
            ? aarch64_tune_params.memmov_cost.load_fp
            : aarch64_tune_params.memmov_cost.store_fp);
 
+  /* If the move needs to go through GPRs, add the cost of doing that.  */
+  int base = 0;
+  if (rclass_i == MOVEABLE_SYSREGS)
+    base += (in
+            ? aarch64_register_move_cost (DImode, GENERAL_REGS, rclass_i)
+            : aarch64_register_move_cost (DImode, rclass_i, GENERAL_REGS));
+
   return (in
-         ? aarch64_tune_params.memmov_cost.load_int
-         : aarch64_tune_params.memmov_cost.store_int);
+         ? base + aarch64_tune_params.memmov_cost.load_int
+         : base + aarch64_tune_params.memmov_cost.store_int);
 }
 
 /* Implement TARGET_INSN_COST.  We have the opportunity to do something
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fpmr-5.c 
b/gcc/testsuite/gcc.target/aarch64/acle/fpmr-5.c
new file mode 100644
index 000000000000..da6d7f62f908
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/fpmr-5.c
@@ -0,0 +1,16 @@
+/* { dg-options "-O" } */
+
+#include <stdint.h>
+
+void f(int cond)
+{
+  uint64_t x;
+  asm volatile ("" : "=r" (x));
+  if (cond)
+    {
+      register uint64_t fpmr asm ("fpmr") = x;
+      asm volatile ("" :: "Umv" (fpmr));
+    }
+}
+
+/* { dg-final { scan-assembler-not {\tsub\tsp,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c
index 9e54cd11c4b7..83fe5cff5d3c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c
@@ -70,7 +70,7 @@ TEST_DUAL_Z (dot_lane_1_f16, svfloat16_t, svmfloat8_t,
 **     msr     fpmr, x0
 **     mov     (z[0-7])\.d, z8\.d
 **     fdot    z0\.h, z1\.b, \1\.b\[1\]
-**     ldr     d8, \[sp\], 32
+**     ldr     d8, \[sp\], 16
 **     ret
 */
 TEST_DUAL_LANE_REG (dot_lane_z8_f16, svfloat16_t, svmfloat8_t, z8,
@@ -151,7 +151,7 @@ TEST_DUAL_Z (dot_lane_1_f32, svfloat32_t, svmfloat8_t,
 **     msr     fpmr, x0
 **     mov     (z[0-7])\.d, z8\.d
 **     fdot    z0\.s, z1\.b, \1\.b\[1\]
-**     ldr     d8, \[sp\], 32
+**     ldr     d8, \[sp\], 16
 **     ret
 */
 TEST_DUAL_LANE_REG (dot_lane_z8_f32, svfloat32_t, svmfloat8_t, z8,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c
index e7af1b6dcc69..39a272f60e0e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c
@@ -70,7 +70,7 @@ TEST_DUAL_Z (mlalb_lane_1_f16, svfloat16_t, svmfloat8_t,
 **     msr     fpmr, x0
 **     mov     (z[0-7])\.d, z8\.d
 **     fmlalb  z0\.h, z1\.b, \1\.b\[1\]
-**     ldr     d8, \[sp\], 32
+**     ldr     d8, \[sp\], 16
 **     ret
 */
 TEST_DUAL_LANE_REG (mlalb_lane_z8_f16, svfloat16_t, svmfloat8_t, z8,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c
index 07a529d8dc9b..2d084515c856 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c
@@ -70,7 +70,7 @@ TEST_DUAL_Z (mlallbb_lane_1_f32, svfloat32_t, svmfloat8_t,
 **     msr     fpmr, x0
 **     mov     (z[0-7])\.d, z8\.d
 **     fmlallbb        z0\.s, z1\.b, \1\.b\[1\]
-**     ldr     d8, \[sp\], 32
+**     ldr     d8, \[sp\], 16
 **     ret
 */
 TEST_DUAL_LANE_REG (mlallbb_lane_z8_f32, svfloat32_t, svmfloat8_t, z8,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c
index 9da29fbfb0b7..6dadbcf2e173 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c
@@ -70,7 +70,7 @@ TEST_DUAL_Z (mlallbt_lane_1_f32, svfloat32_t, svmfloat8_t,
 **     msr     fpmr, x0
 **     mov     (z[0-7])\.d, z8\.d
 **     fmlallbt        z0\.s, z1\.b, \1\.b\[1\]
-**     ldr     d8, \[sp\], 32
+**     ldr     d8, \[sp\], 16
 **     ret
 */
 TEST_DUAL_LANE_REG (mlallbt_lane_z8_f32, svfloat32_t, svmfloat8_t, z8,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c
index cbe297c188b7..6b9e06c7550c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c
@@ -70,7 +70,7 @@ TEST_DUAL_Z (mlalltb_lane_1_f32, svfloat32_t, svmfloat8_t,
 **     msr     fpmr, x0
 **     mov     (z[0-7])\.d, z8\.d
 **     fmlalltb        z0\.s, z1\.b, \1\.b\[1\]
-**     ldr     d8, \[sp\], 32
+**     ldr     d8, \[sp\], 16
 **     ret
 */
 TEST_DUAL_LANE_REG (mlalltb_lane_z8_f32, svfloat32_t, svmfloat8_t, z8,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c
index fc5bfba7877c..f33c4fe79658 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c
@@ -70,7 +70,7 @@ TEST_DUAL_Z (mlalltt_lane_1_f32, svfloat32_t, svmfloat8_t,
 **     msr     fpmr, x0
 **     mov     (z[0-7])\.d, z8\.d
 **     fmlalltt        z0\.s, z1\.b, \1\.b\[1\]
-**     ldr     d8, \[sp\], 32
+**     ldr     d8, \[sp\], 16
 **     ret
 */
 TEST_DUAL_LANE_REG (mlalltt_lane_z8_f32, svfloat32_t, svmfloat8_t, z8,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c
index 4f5a1045420a..604d3325d605 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c
@@ -70,7 +70,7 @@ TEST_DUAL_Z (mlalt_lane_1_f16, svfloat16_t, svmfloat8_t,
 **     msr     fpmr, x0
 **     mov     (z[0-7])\.d, z8\.d
 **     fmlalt  z0\.h, z1\.b, \1\.b\[1\]
-**     ldr     d8, \[sp\], 32
+**     ldr     d8, \[sp\], 16
 **     ret
 */
 TEST_DUAL_LANE_REG (mlalt_lane_z8_f16, svfloat16_t, svmfloat8_t, z8,

Reply via email to