Many of the SME ZA intrinsics have two type suffixes: one for ZA and one for the vectors. The ZA suffix only conveys an element size, while the vector suffix conveys both an element type and an element size. Internally, the ZA suffix maps to an integer mode; e.g. za32 maps to VNx4SI.
For SME2, it was relatively convenient to use the modes associated with both suffixes directly. For example, the (non-widening) FMLA intrinsics used SME_ZA_SDF_I to iterate over the possible ZA modes, used SME_ZA_SDFx24 to iterate over the possible vector tuple modes, and used a C++ condition to make sure that the element sizes agree. However, for later patches it's more convenient to rely only on the vector mode in cases where the ZA and vector element sizes are the same. This means splitting the widening MOPA/S patterns from the non-widening ones, but otherwise it's not a big change. gcc/ * config/aarch64/iterators.md (SME_ZA_SDF_I): Delete. (SME_MOP_HSDF): Replace with... (SME_MOP_SDF): ...this. * config/aarch64/aarch64-sme.md: Change the non-widening FMLA and FMLS patterns so that both mode parameters are the same, rather than using both SME_ZA_SDF_I and SME_ZA_SDFx24 and checking that their element sizes are the same. Split the FMOPA and FMOPS patterns into separate non-widening and widening forms, then update the non-widening forms in a similar way to FMLA and FMLS. * config/aarch64/aarch64-sve-builtins-functions.h (sme_2mode_function_t::expand): If the two type suffixes have the same element size, use the vector tuple mode for both mode parameters. --- gcc/config/aarch64/aarch64-sme.md | 114 ++++++++++-------- .../aarch64/aarch64-sve-builtins-functions.h | 15 ++- gcc/config/aarch64/iterators.md | 5 +- 3 files changed, 73 insertions(+), 61 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index 8fca138314c..088bdd8d869 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -1633,54 +1633,51 @@ (define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plu ;; - FMLS ;; ------------------------------------------------------------------------- -(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "@aarch64_sme_<optab><mode><mode>" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:SI 0 "register_operand" "Uci") (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>") (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2" ) -(define_insn "*aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "*aarch64_sme_<optab><mode><mode>_plus" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (plus:SI (match_operand:SI 0 "register_operand" "Uci") (match_operand:SI 1 "const_0_to_7_operand")) (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>") (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw<vector_count>")] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3" ) -(define_insn "@aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "@aarch64_sme_single_<optab><mode><mode>" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:SI 0 "register_operand" "Uci") (match_operand:SME_ZA_SDFx24 1 "register_operand" "w") (vec_duplicate:SME_ZA_SDFx24 (match_operand:<VSINGLE> 2 "register_operand" "x"))] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>" ) -(define_insn "*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "*aarch64_sme_single_<optab><mode><mode>_plus" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (plus:SI (match_operand:SI 0 "register_operand" "Uci") (match_operand:SI 1 "const_0_to_7_operand")) @@ -1688,15 +1685,14 @@ (define_insn "*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode> (vec_duplicate:SME_ZA_SDFx24 (match_operand:<VSINGLE> 3 "register_operand" "x"))] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>" ) -(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "@aarch64_sme_lane_<optab><mode><mode>" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:SI 0 "register_operand" "Uci") (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>") @@ -1705,15 +1701,14 @@ (define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>[%3]" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>[%3]" ) -(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "*aarch64_sme_lane_<optab><mode><mode>" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (plus:SI (match_operand:SI 0 "register_operand" "Uci") (match_operand:SI 1 "const_0_to_7_operand")) @@ -1723,9 +1718,8 @@ (define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SME_FP_TERNARY_SLICE))] - "TARGET_STREAMING_SME2 - && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" - "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>[%4]" + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>[%4]" ) ;; ------------------------------------------------------------------------- @@ -1876,20 +1870,34 @@ (define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>" ;; - FMOPS ;; ------------------------------------------------------------------------- -(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_MOP_HSDF:mode>" - [(set (reg:SME_ZA_SDF_I ZA_REGNUM) - (unspec:SME_ZA_SDF_I - [(reg:SME_ZA_SDF_I ZA_REGNUM) +(define_insn "@aarch64_sme_<optab><mode><mode>" + [(set (reg:SME_MOP_SDF ZA_REGNUM) + (unspec:SME_MOP_SDF + [(reg:SME_MOP_SDF ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:DI 0 "const_int_operand") - (match_operand:<SME_ZA_SDF_I:VPRED> 1 "register_operand" "Upl") - (match_operand:<SME_ZA_SDF_I:VPRED> 2 "register_operand" "Upl") - (match_operand:SME_MOP_HSDF 3 "register_operand" "w") - (match_operand:SME_MOP_HSDF 4 "register_operand" "w")] + (match_operand:<VPRED> 1 "register_operand" "Upl") + (match_operand:<VPRED> 2 "register_operand" "Upl") + (match_operand:SME_MOP_SDF 3 "register_operand" "w") + (match_operand:SME_MOP_SDF 4 "register_operand" "w")] SME_FP_MOP))] - "TARGET_STREAMING - && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)" - "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>" + "TARGET_STREAMING" + "<b><optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>, %4.<Vetype>" +) + +(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:DI 0 "const_int_operand") + (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl") + (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl") + (match_operand:SVE_FULL_HF 3 "register_operand" "w") + (match_operand:SVE_FULL_HF 4 "register_operand" "w")] + SME_FP_MOP))] + "TARGET_STREAMING" + "<b><optab>\tza%0.<VNx4SI_ONLY:Vetype>, %1/m, %2/m, %3.<SVE_FULL_HF:Vetype>, %4.<SVE_FULL_HF:Vetype>" ) ;; ========================================================================= diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h index 08443ebd5bb..409062ca3dd 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h @@ -443,7 +443,11 @@ public: }; /* General SME unspec-based functions, parameterized on both the ZA mode - and the vector mode. */ + and the vector mode. If the elements of the ZA and vector modes are + the same size (e.g. _za64_f64 or _za32_s32) then the two mode arguments + are equal, otherwise the first mode argument is the single-vector integer + mode associated with the ZA suffix and the second mode argument is the + tuple mode associated with the vector suffix. */ template<insn_code (*CODE) (int, machine_mode, machine_mode), insn_code (*CODE_SINGLE) (int, machine_mode, machine_mode)> class sme_2mode_function_t : public read_write_za<unspec_based_function_base> @@ -460,11 +464,14 @@ public: expand (function_expander &e) const override { insn_code icode; + machine_mode za_mode = e.vector_mode (0); + machine_mode v_mode = e.tuple_mode (1); + if (GET_MODE_UNIT_BITSIZE (za_mode) == GET_MODE_UNIT_BITSIZE (v_mode)) + za_mode = v_mode; if (e.mode_suffix_id == MODE_single) - icode = CODE_SINGLE (unspec_for (e), e.vector_mode (0), - e.tuple_mode (1)); + icode = CODE_SINGLE (unspec_for (e), za_mode, v_mode); else - icode = CODE (unspec_for (e), e.vector_mode (0), e.tuple_mode (1)); + icode = CODE (unspec_for (e), za_mode, v_mode); return e.use_exact_insn (icode); } }; diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index ce8f032c141..147cc95c269 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -652,8 +652,6 @@ (define_mode_iterator SVE_SFx24 [VNx8SF VNx16SF]) (define_mode_iterator SME_ZA_I [VNx16QI VNx8HI VNx4SI VNx2DI VNx1TI]) (define_mode_iterator SME_ZA_SDI [VNx4SI (VNx2DI "TARGET_SME_I16I64")]) -(define_mode_iterator SME_ZA_SDF_I [VNx4SI (VNx2DI "TARGET_SME_F64F64")]) - (define_mode_iterator SME_ZA_BIx24 [VNx32QI VNx64QI]) (define_mode_iterator SME_ZA_BHIx124 [VNx16QI VNx32QI VNx64QI @@ -678,8 +676,7 @@ (define_mode_iterator SME_ZA_SDFx24 [VNx8SF (VNx4DF "TARGET_SME_F64F64") ;; The modes for which outer product instructions are supported. (define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")]) -(define_mode_iterator SME_MOP_HSDF [VNx8BF VNx8HF VNx4SF - (VNx2DF "TARGET_SME_F64F64")]) +(define_mode_iterator SME_MOP_SDF [VNx4SF (VNx2DF "TARGET_SME_F64F64")]) ;; ------------------------------------------------------------------ ;; Unspec enumerations for Advance SIMD. These could well go into -- 2.25.1