https://gcc.gnu.org/g:12c60ff1ffd03410eb583c4231188f431ae81fa8
commit r15-5503-g12c60ff1ffd03410eb583c4231188f431ae81fa8 Author: Richard Sandiford <richard.sandif...@arm.com> Date: Wed Nov 20 13:27:38 2024 +0000 aarch64: Rework sme_2mode_function insns Many of the SME ZA intrinsics have two type suffixes: one for ZA and one for the vectors. The ZA suffix only conveys an element size, while the vector suffix conveys both an element type and an element size. Internally, the ZA suffix maps to an integer mode; e.g. za32 maps to VNx4SI. For SME2, it was relatively convenient to use the modes associated with both suffixes directly. For example, the (non-widening) FMLA intrinsics used SME_ZA_SDF_I to iterate over the possible ZA modes, used SME_ZA_SDFx24 to iterate over the possible vector tuple modes, and used a C++ condition to make sure that the element sizes agree. However, for later patches it's more convenient to rely only on the vector mode in cases where the ZA and vector element sizes are the same. This means splitting the widening MOPA/S patterns from the non-widening ones, but otherwise it's not a big change. gcc/ * config/aarch64/iterators.md (SME_ZA_SDF_I): Delete. (SME_MOP_HSDF): Replace with... (SME_MOP_SDF): ...this. * config/aarch64/aarch64-sme.md: Change the non-widening FMLA and FMLS patterns so that both mode parameters are the same, rather than using both SME_ZA_SDF_I and SME_ZA_SDFx24 and checking that their element sizes are the same. Split the FMOPA and FMOPS patterns into separate non-widening and widening forms, then update the non-widening forms in a similar way to FMLA and FMLS. * config/aarch64/aarch64-sve-builtins-functions.h (sme_2mode_function_t::expand): If the two type suffixes have the same element size, use the vector tuple mode for both mode parameters. Diff: --- gcc/config/aarch64/aarch64-sme.md | 114 +++++++++++---------- .../aarch64/aarch64-sve-builtins-functions.h | 15 ++- gcc/config/aarch64/iterators.md | 5 +- 3 files changed, 73 insertions(+), 61 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index 8fca138314c2..088bdd8d869d 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -1633,54 +1633,51 @@ ;; - FMLS ;; ------------------------------------------------------------------------- -(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "@aarch64_sme_<optab><mode><mode>" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:SI 0 "register_operand" "Uci") (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>") (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2" ) -(define_insn "*aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "*aarch64_sme_<optab><mode><mode>_plus" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (plus:SI (match_operand:SI 0 "register_operand" "Uci") (match_operand:SI 1 "const_0_to_7_operand")) (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>") (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw<vector_count>")] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3" ) -(define_insn "@aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "@aarch64_sme_single_<optab><mode><mode>" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:SI 0 "register_operand" "Uci") (match_operand:SME_ZA_SDFx24 1 "register_operand" "w") (vec_duplicate:SME_ZA_SDFx24 (match_operand:<VSINGLE> 2 "register_operand" "x"))] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>" ) -(define_insn "*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "*aarch64_sme_single_<optab><mode><mode>_plus" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (plus:SI (match_operand:SI 0 "register_operand" "Uci") (match_operand:SI 1 "const_0_to_7_operand")) @@ -1688,15 +1685,14 @@ (vec_duplicate:SME_ZA_SDFx24 (match_operand:<VSINGLE> 3 "register_operand" "x"))] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>" ) -(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "@aarch64_sme_lane_<optab><mode><mode>" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:SI 0 "register_operand" "Uci") (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>") @@ -1705,15 +1701,14 @@ (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>[%3]" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>[%3]" ) -(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "*aarch64_sme_lane_<optab><mode><mode>" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (plus:SI (match_operand:SI 0 "register_operand" "Uci") (match_operand:SI 1 "const_0_to_7_operand")) @@ -1723,9 +1718,8 @@ (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>[%4]" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>[%4]" ) ;; ------------------------------------------------------------------------- @@ -1876,20 +1870,34 @@ ;; - FMOPS ;; ------------------------------------------------------------------------- -(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_MOP_HSDF:mode>" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "@aarch64_sme_<optab><mode><mode>" + [(set (reg:SME_MOP_SDF ZA_REGNUM) + (unspec:SME_MOP_SDF + [(reg:SME_MOP_SDF ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:DI 0 "const_int_operand") - (match_operand:<SME_ZA_SDF_I:VPRED> 1 "register_operand" "Upl") - (match_operand:<SME_ZA_SDF_I:VPRED> 2 "register_operand" "Upl") - (match_operand:SME_MOP_HSDF 3 "register_operand" "w") - (match_operand:SME_MOP_HSDF 4 "register_operand" "w")] + (match_operand:<VPRED> 1 "register_operand" "Upl") + (match_operand:<VPRED> 2 "register_operand" "Upl") + (match_operand:SME_MOP_SDF 3 "register_operand" "w") + (match_operand:SME_MOP_SDF 4 "register_operand" "w")] SME_FP_MOP))] - "TARGET_STREAMING - && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)" - "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>" + "TARGET_STREAMING" + "<b><optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>, %4.<Vetype>" +) + +(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:DI 0 "const_int_operand") + (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl") + (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl") + (match_operand:SVE_FULL_HF 3 "register_operand" "w") + (match_operand:SVE_FULL_HF 4 "register_operand" "w")] + SME_FP_MOP))] + "TARGET_STREAMING" + "<b><optab>\tza%0.<VNx4SI_ONLY:Vetype>, %1/m, %2/m, %3.<SVE_FULL_HF:Vetype>, %4.<SVE_FULL_HF:Vetype>" ) ;; ========================================================================= diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h index 08443ebd5bb9..409062ca3ddd 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h @@ -443,7 +443,11 @@ public: }; /* General SME unspec-based functions, parameterized on both the ZA mode - and the vector mode. */ + and the vector mode. If the elements of the ZA and vector modes are + the same size (e.g. _za64_f64 or _za32_s32) then the two mode arguments + are equal, otherwise the first mode argument is the single-vector integer + mode associated with the ZA suffix and the second mode argument is the + tuple mode associated with the vector suffix. */ template<insn_code (*CODE) (int, machine_mode, machine_mode), insn_code (*CODE_SINGLE) (int, machine_mode, machine_mode)> class sme_2mode_function_t : public read_write_za<unspec_based_function_base> @@ -460,11 +464,14 @@ public: expand (function_expander &e) const override { insn_code icode; + machine_mode za_mode = e.vector_mode (0); + machine_mode v_mode = e.tuple_mode (1); + if (GET_MODE_UNIT_BITSIZE (za_mode) == GET_MODE_UNIT_BITSIZE (v_mode)) + za_mode = v_mode; if (e.mode_suffix_id == MODE_single) - icode = CODE_SINGLE (unspec_for (e), e.vector_mode (0), - e.tuple_mode (1)); + icode = CODE_SINGLE (unspec_for (e), za_mode, v_mode); else - icode = CODE (unspec_for (e), e.vector_mode (0), e.tuple_mode (1)); + icode = CODE (unspec_for (e), za_mode, v_mode); return e.use_exact_insn (icode); } }; diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index d7cb27e18852..bded779de480 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -652,8 +652,6 @@ (define_mode_iterator SME_ZA_I [VNx16QI VNx8HI VNx4SI VNx2DI VNx1TI]) (define_mode_iterator SME_ZA_SDI [VNx4SI (VNx2DI "TARGET_SME_I16I64")]) -(define_mode_iterator SME_ZA_SDF_I [VNx4SI (VNx2DI "TARGET_SME_F64F64")]) - (define_mode_iterator SME_ZA_BIx24 [VNx32QI VNx64QI]) (define_mode_iterator SME_ZA_BHIx124 [VNx16QI VNx32QI VNx64QI @@ -678,8 +676,7 @@ ;; The modes for which outer product instructions are supported. (define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")]) -(define_mode_iterator SME_MOP_HSDF [VNx8BF VNx8HF VNx4SF - (VNx2DF "TARGET_SME_F64F64")]) +(define_mode_iterator SME_MOP_SDF [VNx4SF (VNx2DF "TARGET_SME_F64F64")]) ;; ------------------------------------------------------------------ ;; Unspec enumerations for Advance SIMD. These could well go into