Hi, This patch adds Decimal floating point support to aarch64. It is the base support in that since there is no hardware support for DFP, it just defines the ABI. The ABI I chose is that _Decimal32 is treated like float, _Decimal64 is treated like double and _Decimal128 is treated like long double. In that they are passed via the floating registers (sN, dN, qN). Is this ok an ABI?
Is the patch ok? Bootstrapped and tested on aarch64-linux-gnu with --enable-decimal-float with no regressions and all of the dfp testcases pass. Thanks, Andrew Pinski gcc/ChangeLog: * config/aarch64/aarch64.c (aarch64_split_128bit_move): Handle TDmode. (aarch64_classify_address): Likewise. (aarch64_legitimize_address_displacement): Likewise. (aarch64_legitimize_address): Likewise. (aarch64_constant_pool_reload_icode): Handle SD, DD, and TD modes. (aarch64_secondary_reload): Handle TDmode. (aarch64_valid_floating_const): For decimal floating point return false. (aarch64_gimplify_va_arg_expr): Handle SD, DD, and TD modes. (aapcs_vfp_sub_candidate): Likewise. (aarch64_vfp_is_call_or_return_candidate): Handle MODE_DECIMAL_FLOAT. (aarch64_scalar_mode_supported_p): For DECIMAL_FLOAT_MODE_P, return default_decimal_float_supported_p. * config/aarch64/iterators.md (GPF_TF_F16): Add SD, DD, and TD modes. (SFD): New iterator. (DFD): New iterator. (TFD): New iterator. (GPF_TF): Add SD, DD, and TD modes. (TX): Add TD mode. * config/aarch64/aarch64.md (*movsf_aarch64): Use SFD iterator. (*movdf_aarch64): Use DFD iterator. (*movtf_aarch64): Use TFD iterator. (define_split for TF): Use TFD iterator. gcc/testsuite/ChangeLog: * c-c++-common/dfp/pr39986.c: Allow for word instead of just long. libgcc/ChangeLog: * config.host (aarch64*-*-elf): Add t-dfprules to tmake_file. (aarch64*-*-freebsd*): Likewise. (aarch64*-*-linux*): Likewise.
Index: config/aarch64/aarch64.c =================================================================== --- config/aarch64/aarch64.c (revision 250186) +++ config/aarch64/aarch64.c (working copy) @@ -1653,7 +1653,7 @@ aarch64_split_128bit_move (rtx dst, rtx machine_mode mode = GET_MODE (dst); - gcc_assert (mode == TImode || mode == TFmode); + gcc_assert (mode == TImode || mode == TFmode || mode == TDmode); gcc_assert (!(side_effects_p (src) || side_effects_p (dst))); gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode); @@ -1673,11 +1673,16 @@ aarch64_split_128bit_move (rtx dst, rtx emit_insn (gen_aarch64_movtilow_di (dst, src_lo)); emit_insn (gen_aarch64_movtihigh_di (dst, src_hi)); } - else + else if (mode == TFmode) { emit_insn (gen_aarch64_movtflow_di (dst, src_lo)); emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi)); } + else + { + emit_insn (gen_aarch64_movtdlow_di (dst, src_lo)); + emit_insn (gen_aarch64_movtdhigh_di (dst, src_hi)); + } return; } else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno)) @@ -1690,11 +1695,16 @@ aarch64_split_128bit_move (rtx dst, rtx emit_insn (gen_aarch64_movdi_tilow (dst_lo, src)); emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src)); } - else + else if (mode == TFmode) { emit_insn (gen_aarch64_movdi_tflow (dst_lo, src)); emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src)); } + else if (mode == TDmode) + { + emit_insn (gen_aarch64_movdi_tdlow (dst_lo, src)); + emit_insn (gen_aarch64_movdi_tdhigh (dst_hi, src)); + } return; } } @@ -4420,10 +4430,11 @@ aarch64_classify_address (struct aarch64 rtx op0, op1; /* On BE, we use load/store pair for all large int mode load/stores. - TI/TFmode may also use a load/store pair. */ + TI/TF/TDmode may also use a load/store pair. */ bool load_store_pair_p = (outer_code == PARALLEL || mode == TImode || mode == TFmode + || mode == TDmode || (BYTES_BIG_ENDIAN && aarch64_vect_struct_mode_p (mode))); @@ -4473,7 +4484,7 @@ aarch64_classify_address (struct aarch64 info->base = op0; info->offset = op1; - /* TImode and TFmode values are allowed in both pairs of X + /* TImode and TFmode and TDmode values are allowed in both pairs of X registers and individual Q registers. The available address modes are: X,X: 7-bit signed scaled offset @@ -4482,7 +4493,7 @@ aarch64_classify_address (struct aarch64 When performing the check for pairs of X registers i.e. LDP/STP pass down DImode since that is the natural size of the LDP/STP instruction memory accesses. */ - if (mode == TImode || mode == TFmode) + if (mode == TImode || mode == TFmode || mode == TDmode) return (aarch64_offset_7bit_signed_scaled_p (DImode, offset) && (offset_9bit_signed_unscaled_p (mode, offset) || offset_12bit_unsigned_scaled_p (mode, offset))); @@ -4558,14 +4569,14 @@ aarch64_classify_address (struct aarch64 info->offset = XEXP (XEXP (x, 1), 1); offset = INTVAL (info->offset); - /* TImode and TFmode values are allowed in both pairs of X + /* TImode and TFmode and TDmode values are allowed in both pairs of X registers and individual Q registers. The available address modes are: X,X: 7-bit signed scaled offset Q: 9-bit signed offset We conservatively require an offset representable in either mode. */ - if (mode == TImode || mode == TFmode) + if (mode == TImode || mode == TFmode || mode == TDmode) return (aarch64_offset_7bit_signed_scaled_p (mode, offset) && offset_9bit_signed_unscaled_p (mode, offset)); @@ -4714,7 +4725,7 @@ aarch64_legitimize_address_displacement HOST_WIDE_INT offset = INTVAL (*disp); HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc); - if (mode == TImode || mode == TFmode + if (mode == TImode || mode == TFmode || mode == TDmode || (offset & (GET_MODE_SIZE (mode) - 1)) != 0) base = (offset + 0x100) & ~0x1ff; @@ -5649,7 +5660,7 @@ aarch64_legitimize_address (rtx x, rtx / /* Small negative offsets are supported. */ else if (IN_RANGE (offset, -256, 0)) base_offset = 0; - else if (mode == TImode || mode == TFmode) + else if (mode == TImode || mode == TFmode || mode == TDmode) base_offset = (offset + 0x100) & ~0x1ff; /* Use 12-bit offset by access size. */ else @@ -5681,6 +5692,15 @@ aarch64_constant_pool_reload_icode (mach case TFmode: return CODE_FOR_aarch64_reload_movcptfdi; + case SDmode: + return CODE_FOR_aarch64_reload_movcpsddi; + + case DDmode: + return CODE_FOR_aarch64_reload_movcpdddi; + + case TDmode: + return CODE_FOR_aarch64_reload_movcptddi; + case V8QImode: return CODE_FOR_aarch64_reload_movcpv8qidi; @@ -5731,25 +5751,27 @@ aarch64_secondary_reload (bool in_p ATTR /* Without the TARGET_SIMD instructions we cannot move a Q register to a Q register directly. We need a scratch. */ - if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x) + if (REG_P (x) && (mode == TFmode || mode == TDmode || mode == TImode) && mode == GET_MODE (x) && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD && reg_class_subset_p (rclass, FP_REGS)) { if (mode == TFmode) sri->icode = CODE_FOR_aarch64_reload_movtf; + else if (mode == TDmode) + sri->icode = CODE_FOR_aarch64_reload_movtd; else if (mode == TImode) sri->icode = CODE_FOR_aarch64_reload_movti; return NO_REGS; } - /* A TFmode or TImode memory access should be handled via an FP_REGS + /* A TFmode or TImode or TDmode memory access should be handled via an FP_REGS because AArch64 has richer addressing modes for LDR/STR instructions than LDP/STP instructions. */ if (TARGET_FLOAT && rclass == GENERAL_REGS && GET_MODE_SIZE (mode) == 16 && MEM_P (x)) return FP_REGS; - if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) + if (rclass == FP_REGS && (mode == TImode || mode == TFmode || mode == TDmode) && CONSTANT_P(x)) return GENERAL_REGS; return NO_REGS; @@ -10215,6 +10237,10 @@ aarch64_legitimate_pic_operand_p (rtx x) static bool aarch64_valid_floating_const (machine_mode mode, rtx x) { + /* FIXME: Handle decimal floating point constants. */ + if (GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT) + return false; + if (!CONST_DOUBLE_P (x)) return false; @@ -10636,6 +10662,18 @@ aarch64_gimplify_va_arg_expr (tree valis field_t = long_double_type_node; field_ptr_t = long_double_ptr_type_node; break; + case SDmode: + field_t = dfloat32_type_node; + field_ptr_t = dfloat32_ptr_type_node; + break; + case DDmode: + field_t = dfloat64_type_node; + field_ptr_t = dfloat64_ptr_type_node; + break; + case TDmode: + field_t = dfloat128_type_node; + field_ptr_t = dfloat128_ptr_type_node; + break; case HFmode: field_t = aarch64_fp16_type_node; field_ptr_t = aarch64_fp16_ptr_type_node; @@ -10802,7 +10840,9 @@ aapcs_vfp_sub_candidate (const_tree type case REAL_TYPE: mode = TYPE_MODE (type); if (mode != DFmode && mode != SFmode - && mode != TFmode && mode != HFmode) + && mode != TFmode && mode != HFmode + && mode != DDmode && mode != SDmode + && mode != TDmode) return -1; if (*modep == VOIDmode) @@ -11030,7 +11070,9 @@ aarch64_vfp_is_call_or_return_candidate if (is_ha != NULL) *is_ha = false; - if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT) + if ((!composite_p + && (GET_MODE_CLASS (mode) == MODE_FLOAT + || GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT)) || aarch64_short_vector_p (type, mode)) { *count = 1; @@ -15028,6 +15070,9 @@ aarch64_libgcc_floating_mode_supported_p static bool aarch64_scalar_mode_supported_p (machine_mode mode) { + if (DECIMAL_FLOAT_MODE_P (mode)) + return default_decimal_float_supported_p (); + return (mode == HFmode ? true : default_scalar_mode_supported_p (mode)); Index: config/aarch64/aarch64.md =================================================================== --- config/aarch64/aarch64.md (revision 250186) +++ config/aarch64/aarch64.md (working copy) @@ -1081,11 +1081,11 @@ (define_insn "*movhf_aarch64" (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")] ) -(define_insn "*movsf_aarch64" - [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") - (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] - "TARGET_FLOAT && (register_operand (operands[0], SFmode) - || aarch64_reg_or_fp_zero (operands[1], SFmode))" +(define_insn "*mov<mode>_aarch64" + [(set (match_operand:SFD 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") + (match_operand:SFD 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] + "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) + || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" "@ movi\\t%0.2s, #0 fmov\\t%s0, %w1 @@ -1102,11 +1102,11 @@ (define_insn "*movsf_aarch64" (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] ) -(define_insn "*movdf_aarch64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") - (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] - "TARGET_FLOAT && (register_operand (operands[0], DFmode) - || aarch64_reg_or_fp_zero (operands[1], DFmode))" +(define_insn "*mov<mode>_aarch64" + [(set (match_operand:DFD 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") + (match_operand:DFD 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] + "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) + || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" "@ movi\\t%d0, #0 fmov\\t%d0, %x1 @@ -1123,13 +1123,13 @@ (define_insn "*movdf_aarch64" (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] ) -(define_insn "*movtf_aarch64" - [(set (match_operand:TF 0 +(define_insn "*mov<mode>_aarch64" + [(set (match_operand:TFD 0 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m") - (match_operand:TF 1 + (match_operand:TFD 1 "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))] - "TARGET_FLOAT && (register_operand (operands[0], TFmode) - || aarch64_reg_or_fp_zero (operands[1], TFmode))" + "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) + || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" "@ mov\\t%0.16b, %1.16b # @@ -1149,8 +1149,8 @@ (define_insn "*movtf_aarch64" ) (define_split - [(set (match_operand:TF 0 "register_operand" "") - (match_operand:TF 1 "aarch64_reg_or_imm" ""))] + [(set (match_operand:TFD 0 "register_operand" "") + (match_operand:TFD 1 "aarch64_reg_or_imm" ""))] "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])" [(const_int 0)] { Index: config/aarch64/iterators.md =================================================================== --- config/aarch64/iterators.md (revision 250186) +++ config/aarch64/iterators.md (working copy) @@ -44,14 +44,23 @@ (define_mode_iterator GPF [SF DF]) ;; Iterator for all scalar floating point modes (HF, SF, DF) (define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF]) -;; Iterator for all scalar floating point modes (HF, SF, DF and TF) -(define_mode_iterator GPF_TF_F16 [HF SF DF TF]) +;; Iterator for all scalar floating point modes (HF, SF, DF and TF [SD, DD, and TD]) +(define_mode_iterator GPF_TF_F16 [HF SF DF TF SD DD TD]) + +;; Iterator for scalar 32bit fp modes (SF, SD) +(define_mode_iterator SFD [SD SF]) + +;; Iterator for scalar 64bit fp modes (DF, DD) +(define_mode_iterator DFD [DD DF]) + +;; Iterator for scalar 128bit fp modes (TF, TD) +(define_mode_iterator TFD [TD TF]) ;; Double vector modes. (define_mode_iterator VDF [V2SF V4HF]) -;; Iterator for all scalar floating point modes (SF, DF and TF) -(define_mode_iterator GPF_TF [SF DF TF]) +;; Iterator for all scalar floating point modes (SF, DF and TF [SD, DD, and TD]) +(define_mode_iterator GPF_TF [SF DF TF SD DD TD]) ;; Integer vector modes. (define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI]) @@ -224,7 +233,7 @@ (define_mode_iterator VB [V8QI V16QI]) ;; 2 and 4 lane SI modes. (define_mode_iterator VS [V2SI V4SI]) -(define_mode_iterator TX [TI TF]) +(define_mode_iterator TX [TI TF TD]) ;; Opaque structure modes. (define_mode_iterator VSTRUCT [OI CI XI]) Index: testsuite/c-c++-common/dfp/pr39986.c =================================================================== --- testsuite/c-c++-common/dfp/pr39986.c (revision 250186) +++ testsuite/c-c++-common/dfp/pr39986.c (working copy) @@ -14,19 +14,19 @@ _Decimal128 f = -678901.234e-6dl; /* The first value is DPD, the second is BID. The order differs depending on whether the target is big-endian or little-endian. */ -/* { dg-final { scan-assembler ".long\t(572653859|822183807)\n" } } */ +/* { dg-final { scan-assembler ".(word|long)\t(572653859|822183807)\n" } } */ -/* { dg-final { scan-assembler ".long\t(-1572863965|-1308622825)\n" } } */ +/* { dg-final { scan-assembler ".(word|long)\t(-1572863965|-1308622825)\n" } } */ -/* { dg-final { scan-assembler ".long\t(52|34)\n" } } */ -/* { dg-final { scan-assembler ".long\t(572784640|824180736)\n" } } */ +/* { dg-final { scan-assembler ".(word|long)\t(52|34)\n" } } */ +/* { dg-final { scan-assembler ".(word|long)\t(572784640|824180736)\n" } } */ -/* { dg-final { scan-assembler ".long\t(4736|4500)\n" } } */ -/* { dg-final { scan-assembler ".long\t(-1574174720|-1319108608)\n" } } */ +/* { dg-final { scan-assembler ".(word|long)\t(4736|4500)\n" } } */ +/* { dg-final { scan-assembler ".(word|long)\t(-1574174720|-1319108608)\n" } } */ -/* { dg-final { scan-assembler ".long\t(-1975952433|957645077)\n" } } */ -/* { dg-final { scan-assembler ".long\t(190215|132222)\n" } } */ -/* { dg-final { scan-assembler ".long\t(574193664|835452928)\n" } } */ +/* { dg-final { scan-assembler ".(word|long)\t(-1975952433|957645077)\n" } } */ +/* { dg-final { scan-assembler ".(word|long)\t(190215|132222)\n" } } */ +/* { dg-final { scan-assembler ".(word|long)\t(574193664|835452928)\n" } } */ -/* { dg-final { scan-assembler ".long\t(931280180|678901234)\n" } } */ -/* { dg-final { scan-assembler ".long\t(-1576681472|-1339162624)\n" } } */ +/* { dg-final { scan-assembler ".(word|long)\t(931280180|678901234)\n" } } */ +/* { dg-final { scan-assembler ".(word|long)\t(-1576681472|-1339162624)\n" } } */