This patch adds support for FEAT_SME2p1. There are two sets of new instructions: MOVAZ to read from ZA and zero the source data, and new forms of ZERO. All of them require streaming mode.
MOVAZ can't reuse the existing UNSPEC_SME_READ* patterns because
of the write to ZA. I did wonder about trying to use a define_subst,
but it seemed a bit too awkward.
gcc/
* config/aarch64/aarch64-option-extensions.def (sme2p1): New extension.
* doc/invoke.texi: Document it.
* config/aarch64/aarch64.h (TARGET_STREAMING_SME2p1): New macro.
* config/aarch64/iterators.md (UNSPEC_SME_READZ, UNSPEC_SME_READZ_HOR)
(UNSPEC_SME_READZ_VER): New unspecs.
(optab, hv): Handle them.
(SME_READZ_HV): New int iterator.
* config/aarch64/aarch64-sme.md
(UNSPEC_SME_ZERO_SLICES): New unspec.
(@aarch64_sme_<SME_READZ_HV:optab><v_int_container><mode>)
(*aarch64_sme_<SME_READZ_HV:optab><v_int_container><mode>_plus)
(@aarch64_sme_<SME_READZ_HV:optab><VNx1TI_ONLY:mode><SVE_FULL:mode>)
(@aarch64_sme_<SME_READZ_HV:optab><SVE_FULLx24:mode><mode>)
(*aarch64_sme_<SME_READZ_HV:optab><SVE_FULLx24:mode><mode>_plus)
(@aarch64_sme_readz<mode>, *aarch64_sme_readz<mode>_plus)
(@aarch64_sme_zero_za_slices<mode>): New patterns.
(*aarch64_sme_zero_za_slices<mode>_plus): Likewise.
* config/aarch64/aarch64-sve-builtins-shapes.h
(inherent_za_slice): Declare.
* config/aarch64/aarch64-sve-builtins-shapes.cc
(inherent_za_slice_def, inherent_za_slice): New shape.
* config/aarch64/aarch64-sve-builtins-sme.h (svreadz_za)
(svreadz_hor_za, svreadz_ver_za): Declare.
* config/aarch64/aarch64-sve-builtins-sme.cc
(svread_za_slice_base): New class, split out from...
(svread_za_impl): ...here.
(svreadz_za_impl, svreadz_za_tile_impl): New type aliases.
(zero_slices_mode): New function.
(svzero_za_impl::expand): Handle the slice forms.
(svreadz_za, svreadz_hor_za, svreadz_ver_za): New functions.
* config/aarch64/aarch64-sve-builtins-sme.def: Add the SME2p1
instructions.
gcc/testsuite/
* lib/target-supports.exp: Test the assembler for sve-b16b16 support.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c: New test.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za16.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za16_vg2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za16_vg4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za32.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za32_vg2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za32_vg4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za64.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za64_vg2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za64_vg4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za16.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za16_vg2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za16_vg4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za32.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za32_vg2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za32_vg4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za64.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za64_vg2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za64_vg4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_za16_vg1x2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_za16_vg1x4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_za32_vg1x2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_za32_vg1x4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_za64_vg1x2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_za64_vg1x4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/zero_za64_vg1x2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/zero_za64_vg1x4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/zero_za64_vg2x1.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/zero_za64_vg2x2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/zero_za64_vg2x4.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/zero_za64_vg4x1.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/zero_za64_vg4x2.c: Likewise.
* gcc.target/aarch64/sme2/acle-asm/zero_za64_vg4x4.c: Likewise.
---
gcc/config/aarch64/aarch64-c.cc | 2 +
.../aarch64/aarch64-option-extensions.def | 2 +
gcc/config/aarch64/aarch64-sme.md | 217 +++++++++++++++++-
.../aarch64/aarch64-sve-builtins-shapes.cc | 11 +
.../aarch64/aarch64-sve-builtins-shapes.h | 1 +
.../aarch64/aarch64-sve-builtins-sme.cc | 61 ++++-
.../aarch64/aarch64-sve-builtins-sme.def | 11 +
gcc/config/aarch64/aarch64-sve-builtins-sme.h | 3 +
gcc/config/aarch64/aarch64.h | 2 +
gcc/config/aarch64/iterators.md | 8 +
gcc/doc/invoke.texi | 3 +
.../gcc.target/aarch64/pragma_cpp_predefs_4.c | 11 +
.../aarch64/sme2/acle-asm/readz_hor_za128.c | 187 +++++++++++++++
.../aarch64/sme2/acle-asm/readz_hor_za16.c | 127 ++++++++++
.../sme2/acle-asm/readz_hor_za16_vg2.c | 144 ++++++++++++
.../sme2/acle-asm/readz_hor_za16_vg4.c | 142 ++++++++++++
.../aarch64/sme2/acle-asm/readz_hor_za32.c | 137 +++++++++++
.../sme2/acle-asm/readz_hor_za32_vg2.c | 116 ++++++++++
.../sme2/acle-asm/readz_hor_za32_vg4.c | 133 +++++++++++
.../aarch64/sme2/acle-asm/readz_hor_za64.c | 127 ++++++++++
.../sme2/acle-asm/readz_hor_za64_vg2.c | 117 ++++++++++
.../sme2/acle-asm/readz_hor_za64_vg4.c | 133 +++++++++++
.../aarch64/sme2/acle-asm/readz_hor_za8.c | 87 +++++++
.../aarch64/sme2/acle-asm/readz_hor_za8_vg2.c | 144 ++++++++++++
.../aarch64/sme2/acle-asm/readz_hor_za8_vg4.c | 160 +++++++++++++
.../aarch64/sme2/acle-asm/readz_ver_za16.c | 127 ++++++++++
.../sme2/acle-asm/readz_ver_za16_vg2.c | 144 ++++++++++++
.../sme2/acle-asm/readz_ver_za16_vg4.c | 142 ++++++++++++
.../aarch64/sme2/acle-asm/readz_ver_za32.c | 137 +++++++++++
.../sme2/acle-asm/readz_ver_za32_vg2.c | 116 ++++++++++
.../sme2/acle-asm/readz_ver_za32_vg4.c | 133 +++++++++++
.../aarch64/sme2/acle-asm/readz_ver_za64.c | 127 ++++++++++
.../sme2/acle-asm/readz_ver_za64_vg2.c | 117 ++++++++++
.../sme2/acle-asm/readz_ver_za64_vg4.c | 133 +++++++++++
.../aarch64/sme2/acle-asm/readz_ver_za8.c | 87 +++++++
.../aarch64/sme2/acle-asm/readz_ver_za8_vg2.c | 144 ++++++++++++
.../aarch64/sme2/acle-asm/readz_ver_za8_vg4.c | 160 +++++++++++++
.../aarch64/sme2/acle-asm/readz_za16_vg1x2.c | 126 ++++++++++
.../aarch64/sme2/acle-asm/readz_za16_vg1x4.c | 141 ++++++++++++
.../aarch64/sme2/acle-asm/readz_za32_vg1x2.c | 126 ++++++++++
.../aarch64/sme2/acle-asm/readz_za32_vg1x4.c | 141 ++++++++++++
.../aarch64/sme2/acle-asm/readz_za64_vg1x2.c | 126 ++++++++++
.../aarch64/sme2/acle-asm/readz_za64_vg1x4.c | 141 ++++++++++++
.../aarch64/sme2/acle-asm/readz_za8_vg1x2.c | 126 ++++++++++
.../aarch64/sme2/acle-asm/readz_za8_vg1x4.c | 141 ++++++++++++
.../aarch64/sme2/acle-asm/zero_za64_vg1x2.c | 97 ++++++++
.../aarch64/sme2/acle-asm/zero_za64_vg1x4.c | 97 ++++++++
.../aarch64/sme2/acle-asm/zero_za64_vg2x1.c | 117 ++++++++++
.../aarch64/sme2/acle-asm/zero_za64_vg2x2.c | 97 ++++++++
.../aarch64/sme2/acle-asm/zero_za64_vg2x4.c | 97 ++++++++
.../aarch64/sme2/acle-asm/zero_za64_vg4x1.c | 127 ++++++++++
.../aarch64/sme2/acle-asm/zero_za64_vg4x2.c | 97 ++++++++
.../aarch64/sme2/acle-asm/zero_za64_vg4x4.c | 97 ++++++++
gcc/testsuite/lib/target-supports.exp | 2 +-
54 files changed, 5538 insertions(+), 11 deletions(-)
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za16.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za16_vg2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za16_vg4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za32.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za32_vg2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za32_vg4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za64.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za64_vg2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za64_vg4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za16.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za16_vg2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za16_vg4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za32.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za32_vg2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za32_vg4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za64.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za64_vg2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za64_vg4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za16_vg1x2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za16_vg1x4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za32_vg1x2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za32_vg1x4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za64_vg1x2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za64_vg1x4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_za64_vg1x2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_za64_vg1x4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_za64_vg2x1.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_za64_vg2x2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_za64_vg2x4.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_za64_vg4x1.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_za64_vg4x2.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_za64_vg4x4.c
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index b2b7c8d238a..d6dc609a00a 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -274,6 +274,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
"__ARM_FEATURE_SME_F16F16", pfile);
aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile);
aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
+ aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1),
+ "__ARM_FEATURE_SME2p1", pfile);
/* Not for ACLE, but required to keep "float.h" correct if we switch
target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def
b/gcc/config/aarch64/aarch64-option-extensions.def
index 7ad966ac7f0..f4cf6618238 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -225,6 +225,8 @@ AARCH64_FMV_FEATURE("sme-i16i64", SME_I64, (SME_I16I64))
AARCH64_OPT_FMV_EXTENSION("sme2", SME2, (SME), (), (), "sme2")
+AARCH64_OPT_EXTENSION("sme2p1", SME2p1, (SME2), (), (), "sme2p1")
+
AARCH64_OPT_EXTENSION("sme-b16b16", SME_B16B16, (SME2, SVE_B16B16), (), (), "")
AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "")
diff --git a/gcc/config/aarch64/aarch64-sme.md
b/gcc/config/aarch64/aarch64-sme.md
index 2dda831b7c0..0f362671f75 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -651,6 +651,7 @@ (define_insn "aarch64_sme_str_zt0"
;; -------------------------------------------------------------------------
;; Includes:
;; - MOVA
+;; - MOVAZ
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
@@ -697,6 +698,72 @@ (define_insn
"@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
"mova\t%0.q, %2/m, za%3<hv>.q[%w4, 0]"
)
+(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(reg:<V_INT_CONTAINER> ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:DI 1 "const_int_operand")
+ (match_operand:SI 2 "register_operand" "Ucj")
+ (const_int 0)]
+ SME_READZ_HV))
+ (set (reg:<V_INT_CONTAINER> ZA_REGNUM)
+ (unspec:<V_INT_CONTAINER>
+ [(reg:<V_INT_CONTAINER> ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_dup 1)
+ (match_dup 2)
+ (const_int 1)]
+ SME_READZ_HV))]
+ "TARGET_STREAMING_SME2p1"
+ "movaz\t%0.<Vetype>, za%1<hv>.<Vetype>[%w2, 0]"
+)
+
+(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(reg:<V_INT_CONTAINER> ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:DI 1 "const_int_operand")
+ (plus:SI (match_operand:SI 2 "register_operand" "Ucj")
+ (match_operand:SI 3 "const_int_operand"))
+ (const_int 0)]
+ SME_READZ_HV))
+ (set (reg:<V_INT_CONTAINER> ZA_REGNUM)
+ (unspec:<V_INT_CONTAINER>
+ [(reg:<V_INT_CONTAINER> ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_dup 1)
+ (plus:SI (match_dup 2)
+ (match_dup 3))
+ (const_int 1)]
+ SME_READZ_HV))]
+ "TARGET_STREAMING_SME2p1
+ && UINTVAL (operands[3]) < 128 / <elem_bits>"
+ "movaz\t%0.<Vetype>, za%1<hv>.<Vetype>[%w2, %3]"
+)
+
+(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(reg:VNx1TI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:DI 1 "const_int_operand")
+ (match_operand:SI 2 "register_operand" "Ucj")
+ (const_int 0)]
+ SME_READZ_HV))
+ (set (reg:VNx1TI_ONLY ZA_REGNUM)
+ (unspec:VNx1TI_ONLY
+ [(reg:VNx1TI_ONLY ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_dup 1)
+ (match_dup 2)
+ (const_int 0)]
+ SME_READZ_HV))]
+ "TARGET_STREAMING_SME2p1"
+ "movaz\t%0.q, za%1<hv>.q[%w2, 0]"
+)
+
(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
[(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
(unspec:<V_INT_CONTAINER>
@@ -746,6 +813,7 @@ (define_insn
"@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
;; -------------------------------------------------------------------------
;; Includes:
;; - MOVA
+;; - MOVAZ
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sme_<optab><mode><mode>"
@@ -782,6 +850,60 @@ (define_insn "*aarch64_sme_<optab><mode><mode>_plus"
}
)
+(define_insn "@aarch64_sme_<optab><mode><mode>"
+ [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand"
"=Uw<vector_count>")
+ (unspec:SVE_FULLx24
+ [(reg:SVE_FULLx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:DI 1 "const_int_operand")
+ (match_operand:SI 2 "register_operand" "Ucj")
+ (const_int 0)]
+ SME_READZ_HV))
+ (set (reg:SVE_FULLx24 ZA_REGNUM)
+ (unspec:SVE_FULLx24
+ [(reg:SVE_FULLx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_dup 1)
+ (match_dup 2)
+ (const_int 1)]
+ SME_READZ_HV))]
+ "TARGET_STREAMING_SME2p1"
+ {
+ operands[3] = GEN_INT (<vector_count> - 1);
+ return "movaz\t%0, za%1<hv>.<Vetype>[%w2, 0:%3]";
+ }
+)
+
+(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
+ [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand"
"=Uw<vector_count>")
+ (unspec:SVE_FULLx24
+ [(reg:SVE_FULLx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:DI 1 "const_int_operand")
+ (plus:SI
+ (match_operand:SI 2 "register_operand" "Ucj")
+ (match_operand:SI 3 "const_int_operand"))
+ (const_int 0)]
+ SME_READZ_HV))
+ (set (reg:SVE_FULLx24 ZA_REGNUM)
+ (unspec:SVE_FULLx24
+ [(reg:SVE_FULLx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_dup 1)
+ (plus:SI
+ (match_dup 2)
+ (match_dup 3))
+ (const_int 1)]
+ SME_READZ_HV))]
+ "TARGET_STREAMING_SME2p1
+ && UINTVAL (operands[3]) % <vector_count> == 0
+ && UINTVAL (operands[3]) < 128 / <elem_bits>"
+ {
+ operands[4] = GEN_INT (INTVAL (operands[3]) + <vector_count> - 1);
+ return "movaz\t%0, za%1<hv>.<Vetype>[%w2, %3:%4]";
+ }
+)
+
(define_insn "@aarch64_sme_read<mode>"
[(set (match_operand:SVE_DIx24 0 "aligned_register_operand"
"=Uw<vector_count>")
(unspec:SVE_DIx24
@@ -805,6 +927,46 @@ (define_insn "*aarch64_sme_read<mode>_plus"
"mova\t%0, za.d[%w1, %2, vgx<vector_count>]"
)
+(define_insn "@aarch64_sme_readz<mode>"
+ [(set (match_operand:SVE_DIx24 0 "aligned_register_operand"
"=Uw<vector_count>")
+ (unspec:SVE_DIx24
+ [(reg:SVE_DIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_operand:SI 1 "register_operand" "Uci")
+ (const_int 0)]
+ UNSPEC_SME_READZ))
+ (set (reg:SVE_DIx24 ZA_REGNUM)
+ (unspec:SVE_DIx24
+ [(reg:SVE_DIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (match_dup 1)
+ (const_int 1)]
+ UNSPEC_SME_READZ))]
+ "TARGET_STREAMING_SME2p1"
+ "movaz\t%0, za.d[%w1, 0, vgx<vector_count>]"
+)
+
+(define_insn "*aarch64_sme_readz<mode>_plus"
+ [(set (match_operand:SVE_DIx24 0 "aligned_register_operand"
"=Uw<vector_count>")
+ (unspec:SVE_DIx24
+ [(reg:SVE_DIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_operand:SI 1 "register_operand" "Uci")
+ (match_operand:SI 2 "const_0_to_7_operand"))
+ (const_int 0)]
+ UNSPEC_SME_READZ))
+ (set (reg:SVE_DIx24 ZA_REGNUM)
+ (unspec:SVE_DIx24
+ [(reg:SVE_DIx24 ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (plus:SI (match_dup 1)
+ (match_dup 2))
+ (const_int 1)]
+ UNSPEC_SME_READZ))]
+ "TARGET_STREAMING_SME2p1"
+ "movaz\t%0, za.d[%w1, %2, vgx<vector_count>]"
+)
+
(define_insn "@aarch64_sme_<optab><mode><mode>"
[(set (reg:SVE_FULLx24 ZA_REGNUM)
(unspec:SVE_FULLx24
@@ -873,7 +1035,7 @@ (define_insn "*aarch64_sme_write<mode>_plus"
;; - ZERO
;; -------------------------------------------------------------------------
-(define_c_enum "unspec" [UNSPEC_SME_ZERO])
+(define_c_enum "unspec" [UNSPEC_SME_ZERO UNSPEC_SME_ZERO_SLICES])
(define_insn "aarch64_sme_zero_za"
[(set (reg:VNx16QI ZA_REGNUM)
@@ -887,6 +1049,59 @@ (define_insn "aarch64_sme_zero_za"
}
)
+(define_insn "@aarch64_sme_zero_za_slices<mode>"
+ [(set (reg:VNx16QI ZA_REGNUM)
+ (unspec:VNx16QI
+ [(reg:VNx16QI ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (scratch:SME_ZA_SDIx24)
+ (match_operand:SI 0 "register_operand" "Uci")]
+ UNSPEC_SME_ZERO_SLICES))]
+ "TARGET_STREAMING_SME2p1"
+ "zero\tza.d[%w0, 0, vgx<vector_count>]"
+)
+
+(define_insn "*aarch64_sme_zero_za_slices<mode>_plus"
+ [(set (reg:VNx16QI ZA_REGNUM)
+ (unspec:VNx16QI
+ [(reg:VNx16QI ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (scratch:SME_ZA_SDIx24)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_0_to_7_operand"))]
+ UNSPEC_SME_ZERO_SLICES))]
+ "TARGET_STREAMING_SME2p1"
+ "zero\tza.d[%w0, %1, vgx<vector_count>]"
+)
+
+(define_insn "@aarch64_sme_zero_za_slices<mode>"
+ [(set (reg:VNx16QI ZA_REGNUM)
+ (unspec:VNx16QI
+ [(reg:VNx16QI ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (scratch:SME_ZA_BHIx124)
+ (match_operand:SI 0 "register_operand" "Uci")]
+ UNSPEC_SME_ZERO_SLICES))]
+ "TARGET_STREAMING_SME2p1"
+ "zero\tza.d[%w0, 0:<za32_last_offset><vg_modifier>]"
+)
+
+(define_insn "*aarch64_sme_zero_za_slices<mode>_plus"
+ [(set (reg:VNx16QI ZA_REGNUM)
+ (unspec:VNx16QI
+ [(reg:VNx16QI ZA_REGNUM)
+ (reg:DI SME_STATE_REGNUM)
+ (scratch:SME_ZA_BHIx124)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_<za32_offset_range>_operand"))]
+ UNSPEC_SME_ZERO_SLICES))]
+ "TARGET_STREAMING_SME2p1"
+ {
+ operands[2] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
+ return "zero\tza.d[%w0, %1:%2<vg_modifier>]";
+ }
+)
+
(define_insn "aarch64_sme_zero_zt0"
[(set (reg:V8DI ZT0_REGNUM)
(const_int 0))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 072d3a965c0..371507513c3 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -2765,6 +2765,17 @@ struct inherent_za_def : public nonoverloaded_base
};
SHAPE (inherent_za)
+/* void svfoo_t0(uint64_t). */
+struct inherent_za_slice_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "_,su32", group, MODE_none);
+ }
+};
+SHAPE (inherent_za_slice)
+
/* void svfoo_zt(uint64_t)
where the argument must be zero. */
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
index 12ef2c99238..e1d661c5a46 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -140,6 +140,7 @@ namespace aarch64_sve
extern const function_shape *const inherent;
extern const function_shape *const inherent_b;
extern const function_shape *const inherent_za;
+ extern const function_shape *const inherent_za_slice;
extern const function_shape *const inherent_zt;
extern const function_shape *const inherent_mask_za;
extern const function_shape *const ldr_zt;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
index b66b35ae60b..022b2a6ade5 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
@@ -352,27 +352,31 @@ public:
unsigned int m_bits;
};
-class svread_za_impl : public function_base
+template<insn_code (*CODE) (machine_mode)>
+class svread_za_slice_base : public function_base
{
public:
- unsigned int
- call_properties (const function_instance &) const override
- {
- return CP_READ_ZA;
- }
-
rtx
expand (function_expander &e) const override
{
machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
- rtx res = e.use_exact_insn (code_for_aarch64_sme_read (mode));
+ rtx res = e.use_exact_insn (CODE (mode));
return aarch64_sve_reinterpret (e.result_mode (), res);
}
};
+using svread_za_impl = add_call_properties
+ <svread_za_slice_base<code_for_aarch64_sme_read>, CP_READ_ZA>;
+
using svread_za_tile_impl = add_call_properties<read_write_za_base,
CP_READ_ZA>;
+using svreadz_za_impl = add_call_properties
+ <svread_za_slice_base<code_for_aarch64_sme_readz>, CP_READ_ZA | CP_WRITE_ZA>;
+
+using svreadz_za_tile_impl = add_call_properties<read_write_za_base,
+ CP_READ_ZA | CP_WRITE_ZA>;
+
class svst1_za_impl : public store_za_base
{
public:
@@ -476,12 +480,48 @@ public:
}
};
+/* Return the mode iterator value that is used to represent a zeroing
+ of the ZA vectors described by GROUP. */
+static machine_mode
+zero_slices_mode (group_suffix_index group)
+{
+ switch (group)
+ {
+ case GROUP_vg1x2:
+ return VNx8SImode;
+ case GROUP_vg1x4:
+ return VNx16SImode;
+
+ case GROUP_vg2x1:
+ return VNx8HImode;
+ case GROUP_vg2x2:
+ return VNx16HImode;
+ case GROUP_vg2x4:
+ return VNx32HImode;
+
+ case GROUP_vg4x1:
+ return VNx16QImode;
+ case GROUP_vg4x2:
+ return VNx32QImode;
+ case GROUP_vg4x4:
+ return VNx64QImode;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
class svzero_za_impl : public write_za<function_base>
{
public:
rtx
- expand (function_expander &) const override
+ expand (function_expander &e) const override
{
+ if (e.args.length () == 1)
+ {
+ auto mode = zero_slices_mode (e.group_suffix_id);
+ return e.use_exact_insn (code_for_aarch64_sme_zero_za_slices (mode));
+ }
emit_insn (gen_aarch64_sme_zero_za (gen_int_mode (0xff, SImode)));
return const0_rtx;
}
@@ -546,6 +586,9 @@ FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS,
UNSPEC_SME_UMOPS,
FUNCTION (svread_za, svread_za_impl,)
FUNCTION (svread_hor_za, svread_za_tile_impl, (UNSPEC_SME_READ_HOR))
FUNCTION (svread_ver_za, svread_za_tile_impl, (UNSPEC_SME_READ_VER))
+FUNCTION (svreadz_za, svreadz_za_impl,)
+FUNCTION (svreadz_hor_za, svreadz_za_tile_impl, (UNSPEC_SME_READZ_HOR))
+FUNCTION (svreadz_ver_za, svreadz_za_tile_impl, (UNSPEC_SME_READZ_VER))
FUNCTION (svst1_hor_za, svst1_za_impl, (UNSPEC_SME_ST1_HOR))
FUNCTION (svst1_ver_za, svst1_za_impl, (UNSPEC_SME_ST1_VER))
FUNCTION (svstr_za, svstr_za_impl, )
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def
b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
index 115f011c967..dd6c4fb97cd 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
@@ -236,6 +236,17 @@ DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_h_bfloat,
za_m)
DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_h_bfloat, vg1x24, none)
#undef REQUIRED_EXTENSIONS
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2p1)
+DEF_SME_ZA_FUNCTION_GS (svreadz, read_za_slice, za_bhsd_data, vg1x24, none)
+DEF_SME_ZA_FUNCTION (svreadz_hor, read_za, za_all_data, none)
+DEF_SME_ZA_FUNCTION_GS (svreadz_hor, read_za, za_bhsd_data, vg24, none)
+DEF_SME_ZA_FUNCTION (svreadz_ver, read_za, za_all_data, none)
+DEF_SME_ZA_FUNCTION_GS (svreadz_ver, read_za, za_bhsd_data, vg24, none)
+DEF_SME_ZA_FUNCTION_GS (svzero, inherent_za_slice, d_za, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svzero, inherent_za_slice, d_za, vg2, none)
+DEF_SME_ZA_FUNCTION_GS (svzero, inherent_za_slice, d_za, vg4, none)
+#undef REQUIRED_EXTENSIONS
+
#undef DEF_SME_ZA_FUNCTION
#undef DEF_SME_ZA_FUNCTION_GS
#undef DEF_SME_FUNCTION
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.h
b/gcc/config/aarch64/aarch64-sve-builtins-sme.h
index 1ed8d980577..e320d88bb9d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.h
@@ -53,6 +53,9 @@ namespace aarch64_sve
extern const function_base *const svread_za;
extern const function_base *const svread_hor_za;
extern const function_base *const svread_ver_za;
+ extern const function_base *const svreadz_za;
+ extern const function_base *const svreadz_hor_za;
+ extern const function_base *const svreadz_ver_za;
extern const function_base *const svst1_hor_za;
extern const function_base *const svst1_ver_za;
extern const function_base *const svstr_za;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 79f502a4757..e0b363fa4f3 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -366,6 +366,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
/* Same with streaming mode enabled. */
#define TARGET_STREAMING_SME2 (TARGET_STREAMING && TARGET_SME2)
+#define TARGET_STREAMING_SME2p1 (TARGET_STREAMING && AARCH64_HAVE_ISA (SME2p1))
+
#define TARGET_SME_B16B16 AARCH64_HAVE_ISA (SME_B16B16)
/* ARMv8.3-A features. */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 6a75a9cc2ba..cf1b430be18 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1151,6 +1151,9 @@ (define_c_enum "unspec"
UNSPEC_SME_READ
UNSPEC_SME_READ_HOR
UNSPEC_SME_READ_VER
+ UNSPEC_SME_READZ
+ UNSPEC_SME_READZ_HOR
+ UNSPEC_SME_READZ_VER
UNSPEC_SME_SDOT
UNSPEC_SME_SVDOT
UNSPEC_SME_SMLA
@@ -3672,6 +3675,7 @@ (define_int_iterator UNSPEC_REVD_ONLY [UNSPEC_REVD])
(define_int_iterator SME_LD1 [UNSPEC_SME_LD1_HOR UNSPEC_SME_LD1_VER])
(define_int_iterator SME_READ_HV [UNSPEC_SME_READ_HOR UNSPEC_SME_READ_VER])
+(define_int_iterator SME_READZ_HV [UNSPEC_SME_READZ_HOR UNSPEC_SME_READZ_VER])
(define_int_iterator SME_ST1 [UNSPEC_SME_ST1_HOR UNSPEC_SME_ST1_VER])
(define_int_iterator SME_WRITE_HV [UNSPEC_SME_WRITE_HOR UNSPEC_SME_WRITE_VER])
@@ -3828,6 +3832,8 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
(UNSPEC_SME_LD1_VER "ld1_ver")
(UNSPEC_SME_READ_HOR "read_hor")
(UNSPEC_SME_READ_VER "read_ver")
+ (UNSPEC_SME_READZ_HOR "readz_hor")
+ (UNSPEC_SME_READZ_VER "readz_ver")
(UNSPEC_SME_SDOT "sdot")
(UNSPEC_SME_SVDOT "svdot")
(UNSPEC_SME_SMLA "smla")
@@ -4685,6 +4691,8 @@ (define_int_attr hv [(UNSPEC_SME_LD1_HOR "h")
(UNSPEC_SME_LD1_VER "v")
(UNSPEC_SME_READ_HOR "h")
(UNSPEC_SME_READ_VER "v")
+ (UNSPEC_SME_READZ_HOR "h")
+ (UNSPEC_SME_READZ_VER "v")
(UNSPEC_SME_ST1_HOR "h")
(UNSPEC_SME_ST1_VER "v")
(UNSPEC_SME_WRITE_HOR "h")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 756452bccba..8d01e5e95b3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21801,6 +21801,9 @@ and SVE_B16B16 instructions.
@item sme-f16f16
Enable the FEAT_SME_F16F16 extension to SME. This also enables SME2
instructions.
+@item sme2p1
+Enable the Scalable Matrix Extension version 2.1. This also enables SME2
+instructions.
@item lse128
Enable the LSE128 128-bit atomic instructions extension. This also
enables LSE instructions.
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
index adad9627cd4..936167c5187 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
@@ -167,3 +167,14 @@
#ifndef __ARM_FEATURE_SME
#error Foo
#endif
+
+#pragma GCC target "+nothing+sme2p1"
+#ifndef __ARM_FEATURE_SME
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_SME2
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_SME2p1
+#error Foo
+#endif
diff --git a/gcc/testsuite/lib/target-supports.exp
b/gcc/testsuite/lib/target-supports.exp
index 492a7704bb8..cd692101b87 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -12122,7 +12122,7 @@ proc check_effective_target_aarch64_tiny { } {
foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
"i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64"
"sme" "sme-i16i64" "sme2" "sve-b16b16"
- "sme-b16b16" "sme-f16f16" } {
+ "sme-b16b16" "sme-f16f16" "sme2p1" } {
eval [string map [list FUNC $aarch64_ext] {
proc check_effective_target_aarch64_asm_FUNC_ok { } {
if { [istarget aarch64*-*-*] } {
--
2.25.1
tests.diff.xz
Description: application/xz
