https://gcc.gnu.org/g:12c60ff1ffd03410eb583c4231188f431ae81fa8

commit r15-5503-g12c60ff1ffd03410eb583c4231188f431ae81fa8
Author: Richard Sandiford <richard.sandif...@arm.com>
Date:   Wed Nov 20 13:27:38 2024 +0000

    aarch64: Rework sme_2mode_function insns
    
    Many of the SME ZA intrinsics have two type suffixes: one for ZA
    and one for the vectors.  The ZA suffix only conveys an element
    size, while the vector suffix conveys both an element type and
    an element size.  Internally, the ZA suffix maps to an integer mode;
    e.g. za32 maps to VNx4SI.
    
    For SME2, it was relatively convenient to use the modes associated
    with both suffixes directly.  For example, the (non-widening) FMLA
    intrinsics used SME_ZA_SDF_I to iterate over the possible ZA modes,
    used SME_ZA_SDFx24 to iterate over the possible vector tuple modes,
    and used a C++ condition to make sure that the element sizes agree.
    
    However, for later patches it's more convenient to rely only on
    the vector mode in cases where the ZA and vector element sizes
    are the same.  This means splitting the widening MOPA/S patterns
    from the non-widening ones, but otherwise it's not a big change.
    
    gcc/
            * config/aarch64/iterators.md (SME_ZA_SDF_I): Delete.
            (SME_MOP_HSDF): Replace with...
            (SME_MOP_SDF): ...this.
            * config/aarch64/aarch64-sme.md: Change the non-widening FMLA and
            FMLS patterns so that both mode parameters are the same, rather than
            using both SME_ZA_SDF_I and SME_ZA_SDFx24 and checking that their
            element sizes are the same.  Split the FMOPA and FMOPS patterns
            into separate non-widening and widening forms, then update the
            non-widening forms in a similar way to FMLA and FMLS.
            * config/aarch64/aarch64-sve-builtins-functions.h
            (sme_2mode_function_t::expand): If the two type suffixes have the 
same
            element size, use the vector tuple mode for both mode parameters.

Diff:
---
 gcc/config/aarch64/aarch64-sme.md                  | 114 +++++++++++----------
 .../aarch64/aarch64-sve-builtins-functions.h       |  15 ++-
 gcc/config/aarch64/iterators.md                    |   5 +-
 3 files changed, 73 insertions(+), 61 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sme.md 
b/gcc/config/aarch64/aarch64-sme.md
index 8fca138314c2..088bdd8d869d 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -1633,54 +1633,51 @@
 ;; - FMLS
 ;; -------------------------------------------------------------------------
 
-(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-       (unspec:SME_ZA_SDF_I
-         [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "@aarch64_sme_<optab><mode><mode>"
+  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
+       (unspec:SME_ZA_SDFx24
+         [(reg:SME_ZA_SDFx24 ZA_REGNUM)
           (reg:DI SME_STATE_REGNUM)
           (match_operand:SI 0 "register_operand" "Uci")
           (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" 
"Uw<vector_count>")
           (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" 
"Uw<vector_count>")]
          SME_FP_TERNARY_SLICE))]
-  "TARGET_STREAMING_SME2
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
 )
 
-(define_insn "*aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-       (unspec:SME_ZA_SDF_I
-         [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
+  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
+       (unspec:SME_ZA_SDFx24
+         [(reg:SME_ZA_SDFx24 ZA_REGNUM)
           (reg:DI SME_STATE_REGNUM)
           (plus:SI (match_operand:SI 0 "register_operand" "Uci")
                    (match_operand:SI 1 "const_0_to_7_operand"))
           (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" 
"Uw<vector_count>")
           (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" 
"Uw<vector_count>")]
          SME_FP_TERNARY_SLICE))]
-  "TARGET_STREAMING_SME2
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
 )
 
-(define_insn 
"@aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-       (unspec:SME_ZA_SDF_I
-         [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "@aarch64_sme_single_<optab><mode><mode>"
+  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
+       (unspec:SME_ZA_SDFx24
+         [(reg:SME_ZA_SDFx24 ZA_REGNUM)
           (reg:DI SME_STATE_REGNUM)
           (match_operand:SI 0 "register_operand" "Uci")
           (match_operand:SME_ZA_SDFx24 1 "register_operand" "w")
           (vec_duplicate:SME_ZA_SDFx24
             (match_operand:<VSINGLE> 2 "register_operand" "x"))]
          SME_FP_TERNARY_SLICE))]
-  "TARGET_STREAMING_SME2
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, 
%2.<SME_ZA_SDFx24:Vetype>"
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>"
 )
 
-(define_insn 
"*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-       (unspec:SME_ZA_SDF_I
-         [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "*aarch64_sme_single_<optab><mode><mode>_plus"
+  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
+       (unspec:SME_ZA_SDFx24
+         [(reg:SME_ZA_SDFx24 ZA_REGNUM)
           (reg:DI SME_STATE_REGNUM)
           (plus:SI (match_operand:SI 0 "register_operand" "Uci")
                    (match_operand:SI 1 "const_0_to_7_operand"))
@@ -1688,15 +1685,14 @@
           (vec_duplicate:SME_ZA_SDFx24
             (match_operand:<VSINGLE> 3 "register_operand" "x"))]
          SME_FP_TERNARY_SLICE))]
-  "TARGET_STREAMING_SME2
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, 
%3.<SME_ZA_SDFx24:Vetype>"
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
 )
 
-(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-       (unspec:SME_ZA_SDF_I
-         [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "@aarch64_sme_lane_<optab><mode><mode>"
+  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
+       (unspec:SME_ZA_SDFx24
+         [(reg:SME_ZA_SDFx24 ZA_REGNUM)
           (reg:DI SME_STATE_REGNUM)
           (match_operand:SI 0 "register_operand" "Uci")
           (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" 
"Uw<vector_count>")
@@ -1705,15 +1701,14 @@
              (match_operand:SI 3 "const_int_operand")]
             UNSPEC_SVE_LANE_SELECT)]
          SME_FP_TERNARY_SLICE))]
-  "TARGET_STREAMING_SME2
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, 
%2.<SME_ZA_SDFx24:Vetype>[%3]"
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>[%3]"
 )
 
-(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-       (unspec:SME_ZA_SDF_I
-         [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "*aarch64_sme_lane_<optab><mode><mode>"
+  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
+       (unspec:SME_ZA_SDFx24
+         [(reg:SME_ZA_SDFx24 ZA_REGNUM)
           (reg:DI SME_STATE_REGNUM)
           (plus:SI (match_operand:SI 0 "register_operand" "Uci")
                    (match_operand:SI 1 "const_0_to_7_operand"))
@@ -1723,9 +1718,8 @@
              (match_operand:SI 4 "const_int_operand")]
             UNSPEC_SVE_LANE_SELECT)]
          SME_FP_TERNARY_SLICE))]
-  "TARGET_STREAMING_SME2
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, 
%3.<SME_ZA_SDFx24:Vetype>[%4]"
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>[%4]"
 )
 
 ;; -------------------------------------------------------------------------
@@ -1876,20 +1870,34 @@
 ;; - FMOPS
 ;; -------------------------------------------------------------------------
 
-(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_MOP_HSDF:mode>"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-       (unspec:SME_ZA_SDF_I
-         [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "@aarch64_sme_<optab><mode><mode>"
+  [(set (reg:SME_MOP_SDF ZA_REGNUM)
+       (unspec:SME_MOP_SDF
+         [(reg:SME_MOP_SDF ZA_REGNUM)
           (reg:DI SME_STATE_REGNUM)
           (match_operand:DI 0 "const_int_operand")
-          (match_operand:<SME_ZA_SDF_I:VPRED> 1 "register_operand" "Upl")
-          (match_operand:<SME_ZA_SDF_I:VPRED> 2 "register_operand" "Upl")
-          (match_operand:SME_MOP_HSDF 3 "register_operand" "w")
-          (match_operand:SME_MOP_HSDF 4 "register_operand" "w")]
+          (match_operand:<VPRED> 1 "register_operand" "Upl")
+          (match_operand:<VPRED> 2 "register_operand" "Upl")
+          (match_operand:SME_MOP_SDF 3 "register_operand" "w")
+          (match_operand:SME_MOP_SDF 4 "register_operand" "w")]
          SME_FP_MOP))]
-  "TARGET_STREAMING
-   && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)"
-  "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, 
%3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>"
+  "TARGET_STREAMING"
+  "<b><optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>, %4.<Vetype>"
+)
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+       (unspec:VNx4SI_ONLY
+         [(reg:VNx4SI_ONLY ZA_REGNUM)
+          (reg:DI SME_STATE_REGNUM)
+          (match_operand:DI 0 "const_int_operand")
+          (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
+          (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
+          (match_operand:SVE_FULL_HF 3 "register_operand" "w")
+          (match_operand:SVE_FULL_HF 4 "register_operand" "w")]
+         SME_FP_MOP))]
+  "TARGET_STREAMING"
+  "<b><optab>\tza%0.<VNx4SI_ONLY:Vetype>, %1/m, %2/m, %3.<SVE_FULL_HF:Vetype>, 
%4.<SVE_FULL_HF:Vetype>"
 )
 
 ;; =========================================================================
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h 
b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
index 08443ebd5bb9..409062ca3ddd 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
@@ -443,7 +443,11 @@ public:
 };
 
 /* General SME unspec-based functions, parameterized on both the ZA mode
-   and the vector mode.  */
+   and the vector mode.  If the elements of the ZA and vector modes are
+   the same size (e.g. _za64_f64 or _za32_s32) then the two mode arguments
+   are equal, otherwise the first mode argument is the single-vector integer
+   mode associated with the ZA suffix and the second mode argument is the
+   tuple mode associated with the vector suffix.  */
 template<insn_code (*CODE) (int, machine_mode, machine_mode),
         insn_code (*CODE_SINGLE) (int, machine_mode, machine_mode)>
 class sme_2mode_function_t : public read_write_za<unspec_based_function_base>
@@ -460,11 +464,14 @@ public:
   expand (function_expander &e) const override
   {
     insn_code icode;
+    machine_mode za_mode = e.vector_mode (0);
+    machine_mode v_mode = e.tuple_mode (1);
+    if (GET_MODE_UNIT_BITSIZE (za_mode) == GET_MODE_UNIT_BITSIZE (v_mode))
+      za_mode = v_mode;
     if (e.mode_suffix_id == MODE_single)
-      icode = CODE_SINGLE (unspec_for (e), e.vector_mode (0),
-                          e.tuple_mode (1));
+      icode = CODE_SINGLE (unspec_for (e), za_mode, v_mode);
     else
-      icode = CODE (unspec_for (e), e.vector_mode (0), e.tuple_mode (1));
+      icode = CODE (unspec_for (e), za_mode, v_mode);
     return e.use_exact_insn (icode);
   }
 };
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index d7cb27e18852..bded779de480 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -652,8 +652,6 @@
 (define_mode_iterator SME_ZA_I [VNx16QI VNx8HI VNx4SI VNx2DI VNx1TI])
 (define_mode_iterator SME_ZA_SDI [VNx4SI (VNx2DI "TARGET_SME_I16I64")])
 
-(define_mode_iterator SME_ZA_SDF_I [VNx4SI (VNx2DI "TARGET_SME_F64F64")])
-
 (define_mode_iterator SME_ZA_BIx24 [VNx32QI VNx64QI])
 
 (define_mode_iterator SME_ZA_BHIx124 [VNx16QI VNx32QI VNx64QI
@@ -678,8 +676,7 @@
 
 ;; The modes for which outer product instructions are supported.
 (define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")])
-(define_mode_iterator SME_MOP_HSDF [VNx8BF VNx8HF VNx4SF
-                                   (VNx2DF "TARGET_SME_F64F64")])
+(define_mode_iterator SME_MOP_SDF [VNx4SF (VNx2DF "TARGET_SME_F64F64")])
 
 ;; ------------------------------------------------------------------
 ;; Unspec enumerations for Advance SIMD. These could well go into

Reply via email to