The IRA combine_and_move pass runs if the scheduler is disabled and aggressively
combines moves. The movsf/df patterns allow all FP immediates since they rely
on a split pattern. However splits do not happen during IRA, so the result is
extra literal loads. To avoid this, use a more accurate check that blocks
creating FP immediates that need a split during combine_and_move.
double f(void) { return 128.0; }
-O2 -fno-schedule-insns gives:
adrp x0, .LC0
ldr d0, [x0, #:lo12:.LC0]
ret
After patch:
mov x0, 4638707616191610880
fmov d0, x0
ret
Passes bootstrap & regress, OK for commit?
gcc/ChangeLog:
* config/aarch64/aarch64.md (movhf_aarch64): Use aarch64_valid_fp_move.
(movsf_aarch64): Likewise.
(movdf_aarch64): Likewise.
* config/aarch64/aarch64.cc (aarch64_valid_fp_move): New function.
* config/aarch64/aarch64-protos.h (aarch64_valid_fp_move): Likewise.
---
diff --git a/gcc/config/aarch64/aarch64-protos.h
b/gcc/config/aarch64/aarch64-protos.h
index
9be64913091443a62dc6d1a80c295dc52aaeb950..f4839413cf3e995871b728e2a36e332b89cd6abf
100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -758,6 +758,7 @@ bool aarch64_advsimd_struct_mode_p (machine_mode mode);
opt_machine_mode aarch64_vq_mode (scalar_mode);
opt_machine_mode aarch64_full_sve_mode (scalar_mode);
bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
+bool aarch64_valid_fp_move (rtx, rtx, machine_mode);
bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT,
HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index
a6cc00e74abd4d96fa47f5612f271eb4fc95e7a1..130c1ff1e363db253b008e71c7e8e5deec8c46c8
100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -11144,6 +11144,37 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
return aarch64_simd_valid_mov_imm (v_op);
}
+/* Return TRUE if DST and SRC with mode MODE is a valid fp move. */
+bool
+aarch64_valid_fp_move (rtx dst, rtx src, machine_mode mode)
+{
+ if (!TARGET_FLOAT)
+ return false;
+
+ if (aarch64_reg_or_fp_zero (src, mode))
+ return true;
+
+ if (!register_operand (dst, mode))
+ return false;
+
+ if (MEM_P (src))
+ return true;
+
+ if (!DECIMAL_FLOAT_MODE_P (mode))
+ {
+ if (aarch64_can_const_movi_rtx_p (src, mode)
+ || aarch64_float_const_representable_p (src)
+ || aarch64_float_const_zero_rtx_p (src))
+ return true;
+
+ /* Block combine_and_move pass from creating FP immediates which
+ require a split during IRA - only allow this before regalloc. */
+ if (aarch64_float_const_rtx_p (src))
+ return can_create_pseudo_p () && !ira_in_progress;
+ }
+
+ return can_create_pseudo_p ();
+}
/* Return the fixed registers used for condition codes. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index
20956fc49d8232763b127629ded17037ad7d7960..5d3fa9628952031f52474291e160b957d774b011
100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1644,8 +1644,7 @@ (define_expand "mov<mode>"
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:HFBF 0 "nonimmediate_operand")
(match_operand:HFBF 1 "general_operand"))]
- "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
- || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+ "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
{@ [ cons: =0 , 1 ; attrs: type , arch ]
[ w , Y ; neon_move , simd ] movi\t%0.4h, #0
[ w , ?rY ; f_mcr , fp16 ] fmov\t%h0, %w1
@@ -1668,8 +1667,7 @@ (define_insn "*mov<mode>_aarch64"
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:SFD 0 "nonimmediate_operand")
(match_operand:SFD 1 "general_operand"))]
- "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
- || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+ "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
{@ [ cons: =0 , 1 ; attrs: type , arch ]
[ w , Y ; neon_move , simd ] movi\t%0.2s, #0
[ w , ?rY ; f_mcr , * ] fmov\t%s0, %w1
@@ -1689,8 +1687,7 @@ (define_insn "*mov<mode>_aarch64"
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:DFD 0 "nonimmediate_operand")
(match_operand:DFD 1 "general_operand"))]
- "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
- || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+ "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
{@ [ cons: =0 , 1 ; attrs: type , arch ]
[ w , Y ; neon_move , simd ] movi\t%d0, #0
[ w , ?rY ; f_mcr , * ] fmov\t%d0, %x1