The IRA combine_and_move pass runs if the scheduler is disabled and aggressively combines moves. The movsf/df patterns allow all FP immediates since they rely on a split pattern. However splits do not happen during IRA, so the result is extra literal loads. To avoid this, use a more accurate check that blocks creating FP immediates that need a split during combine_and_move.
double f(void) { return 128.0; } -O2 -fno-schedule-insns gives: adrp x0, .LC0 ldr d0, [x0, #:lo12:.LC0] ret After patch: mov x0, 4638707616191610880 fmov d0, x0 ret Passes bootstrap & regress, OK for commit? gcc/ChangeLog: * config/aarch64/aarch64.md (movhf_aarch64): Use aarch64_valid_fp_move. (movsf_aarch64): Likewise. (movdf_aarch64): Likewise. * config/aarch64/aarch64.cc (aarch64_valid_fp_move): New function. * config/aarch64/aarch64-protos.h (aarch64_valid_fp_move): Likewise. --- diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 9be64913091443a62dc6d1a80c295dc52aaeb950..f4839413cf3e995871b728e2a36e332b89cd6abf 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -758,6 +758,7 @@ bool aarch64_advsimd_struct_mode_p (machine_mode mode); opt_machine_mode aarch64_vq_mode (scalar_mode); opt_machine_mode aarch64_full_sve_mode (scalar_mode); bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode); +bool aarch64_valid_fp_move (rtx, rtx, machine_mode); bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index a6cc00e74abd4d96fa47f5612f271eb4fc95e7a1..130c1ff1e363db253b008e71c7e8e5deec8c46c8 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -11144,6 +11144,37 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode) return aarch64_simd_valid_mov_imm (v_op); } +/* Return TRUE if DST and SRC with mode MODE is a valid fp move. */ +bool +aarch64_valid_fp_move (rtx dst, rtx src, machine_mode mode) +{ + if (!TARGET_FLOAT) + return false; + + if (aarch64_reg_or_fp_zero (src, mode)) + return true; + + if (!register_operand (dst, mode)) + return false; + + if (MEM_P (src)) + return true; + + if (!DECIMAL_FLOAT_MODE_P (mode)) + { + if (aarch64_can_const_movi_rtx_p (src, mode) + || aarch64_float_const_representable_p (src) + || aarch64_float_const_zero_rtx_p (src)) + return true; + + /* Block combine_and_move pass from creating FP immediates which + require a split during IRA - only allow this before regalloc. */ + if (aarch64_float_const_rtx_p (src)) + return can_create_pseudo_p () && !ira_in_progress; + } + + return can_create_pseudo_p (); +} /* Return the fixed registers used for condition codes. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 20956fc49d8232763b127629ded17037ad7d7960..5d3fa9628952031f52474291e160b957d774b011 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1644,8 +1644,7 @@ (define_expand "mov<mode>" (define_insn "*mov<mode>_aarch64" [(set (match_operand:HFBF 0 "nonimmediate_operand") (match_operand:HFBF 1 "general_operand"))] - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)" {@ [ cons: =0 , 1 ; attrs: type , arch ] [ w , Y ; neon_move , simd ] movi\t%0.4h, #0 [ w , ?rY ; f_mcr , fp16 ] fmov\t%h0, %w1 @@ -1668,8 +1667,7 @@ (define_insn "*mov<mode>_aarch64" (define_insn "*mov<mode>_aarch64" [(set (match_operand:SFD 0 "nonimmediate_operand") (match_operand:SFD 1 "general_operand"))] - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)" {@ [ cons: =0 , 1 ; attrs: type , arch ] [ w , Y ; neon_move , simd ] movi\t%0.2s, #0 [ w , ?rY ; f_mcr , * ] fmov\t%s0, %w1 @@ -1689,8 +1687,7 @@ (define_insn "*mov<mode>_aarch64" (define_insn "*mov<mode>_aarch64" [(set (match_operand:DFD 0 "nonimmediate_operand") (match_operand:DFD 1 "general_operand"))] - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)" {@ [ cons: =0 , 1 ; attrs: type , arch ] [ w , Y ; neon_move , simd ] movi\t%d0, #0 [ w , ?rY ; f_mcr , * ] fmov\t%d0, %x1