bryanpkc updated this revision to Diff 495592.
bryanpkc marked 4 inline comments as done.
bryanpkc added a comment.
Rebased on trunk and addressed review comments.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D127910/new/
https://reviews.llvm.org/D127910
Files:
clang/include/clang/Basic/BuiltinsSME.def
clang/include/clang/Basic/CMakeLists.txt
clang/include/clang/Basic/TargetBuiltins.h
clang/include/clang/Basic/arm_sme.td
clang/include/clang/Basic/arm_sve.td
clang/include/clang/Basic/arm_sve_sme_incl.td
clang/lib/Basic/Targets/AArch64.cpp
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/lib/Headers/CMakeLists.txt
clang/lib/Sema/SemaChecking.cpp
clang/lib/Sema/SemaDeclAttr.cpp
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
clang/utils/TableGen/SveEmitter.cpp
clang/utils/TableGen/TableGen.cpp
clang/utils/TableGen/TableGenBackends.h
Index: clang/utils/TableGen/TableGenBackends.h
===================================================================
--- clang/utils/TableGen/TableGenBackends.h
+++ clang/utils/TableGen/TableGenBackends.h
@@ -101,6 +101,11 @@
void EmitSveTypeFlags(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSveRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+
void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
Index: clang/utils/TableGen/TableGen.cpp
===================================================================
--- clang/utils/TableGen/TableGen.cpp
+++ clang/utils/TableGen/TableGen.cpp
@@ -81,6 +81,10 @@
GenArmSveBuiltinCG,
GenArmSveTypeFlags,
GenArmSveRangeChecks,
+ GenArmSmeHeader,
+ GenArmSmeBuiltins,
+ GenArmSmeBuiltinCG,
+ GenArmSmeRangeChecks,
GenArmCdeHeader,
GenArmCdeBuiltinDef,
GenArmCdeBuiltinSema,
@@ -219,6 +223,14 @@
"Generate arm_sve_typeflags.inc for clang"),
clEnumValN(GenArmSveRangeChecks, "gen-arm-sve-sema-rangechecks",
"Generate arm_sve_sema_rangechecks.inc for clang"),
+ clEnumValN(GenArmSmeHeader, "gen-arm-sme-header",
+ "Generate arm_sme.h for clang"),
+ clEnumValN(GenArmSmeBuiltins, "gen-arm-sme-builtins",
+ "Generate arm_sme_builtins.inc for clang"),
+ clEnumValN(GenArmSmeBuiltinCG, "gen-arm-sme-builtin-codegen",
+ "Generate arm_sme_builtin_cg_map.inc for clang"),
+ clEnumValN(GenArmSmeRangeChecks, "gen-arm-sme-sema-rangechecks",
+ "Generate arm_sme_sema_rangechecks.inc for clang"),
clEnumValN(GenArmMveHeader, "gen-arm-mve-header",
"Generate arm_mve.h for clang"),
clEnumValN(GenArmMveBuiltinDef, "gen-arm-mve-builtin-def",
@@ -438,6 +450,18 @@
case GenArmSveRangeChecks:
EmitSveRangeChecks(Records, OS);
break;
+ case GenArmSmeHeader:
+ EmitSmeHeader(Records, OS);
+ break;
+ case GenArmSmeBuiltins:
+ EmitSmeBuiltins(Records, OS);
+ break;
+ case GenArmSmeBuiltinCG:
+ EmitSmeBuiltinCG(Records, OS);
+ break;
+ case GenArmSmeRangeChecks:
+ EmitSmeRangeChecks(Records, OS);
+ break;
case GenArmCdeHeader:
EmitCdeHeader(Records, OS);
break;
Index: clang/utils/TableGen/SveEmitter.cpp
===================================================================
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -228,7 +228,7 @@
}
/// Emits the intrinsic declaration to the ostream.
- void emitIntrinsic(raw_ostream &OS) const;
+ void emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter) const;
private:
std::string getMergeSuffix() const { return MergeSuffix; }
@@ -346,8 +346,21 @@
/// Create the SVETypeFlags used in CGBuiltins
void createTypeFlags(raw_ostream &o);
+ /// Emit arm_sme.h.
+ void createSMEHeader(raw_ostream &o);
+
+ /// Emit all the SME __builtin prototypes and code needed by Sema.
+ void createSMEBuiltins(raw_ostream &o);
+
+ /// Emit all the information needed to map builtin -> LLVM IR intrinsic.
+ void createSMECodeGenMap(raw_ostream &o);
+
+ /// Emit all the range checks for the immediates.
+ void createSMERangeChecks(raw_ostream &o);
+
/// Create intrinsic and add it to \p Out
- void createIntrinsic(Record *R, SmallVectorImpl<std::unique_ptr<Intrinsic>> &Out);
+ void createIntrinsic(Record *R,
+ SmallVectorImpl<std::unique_ptr<Intrinsic>> &Out);
};
} // end anonymous namespace
@@ -480,6 +493,9 @@
case 'l':
ElementBitwidth = 64;
break;
+ case 'q':
+ ElementBitwidth = 128;
+ break;
case 'h':
Float = true;
ElementBitwidth = 16;
@@ -757,6 +773,11 @@
NumVectors = 0;
Signed = true;
break;
+ case '%':
+ Pointer = true;
+ Void = true;
+ NumVectors = 0;
+ break;
case 'A':
Pointer = true;
ElementBitwidth = Bitwidth = 8;
@@ -918,15 +939,29 @@
getMergeSuffix();
}
-void Intrinsic::emitIntrinsic(raw_ostream &OS) const {
+void Intrinsic::emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter) const {
bool IsOverloaded = getClassKind() == ClassG && getProto().size() > 1;
std::string FullName = mangleName(ClassS);
std::string ProtoName = mangleName(getClassKind());
+ std::string SMEAttrs = "";
+
+ if (Flags & Emitter.getEnumValueForFlag("IsStreaming"))
+ SMEAttrs += ", arm_streaming";
+ if (Flags & Emitter.getEnumValueForFlag("IsStreamingCompatible"))
+ SMEAttrs += ", arm_streaming_compatible";
+ if (Flags & Emitter.getEnumValueForFlag("IsSharedZA"))
+ SMEAttrs += ", arm_shared_za";
+ if (Flags & Emitter.getEnumValueForFlag("IsPreservesZA"))
+ SMEAttrs += ", arm_preserves_za";
OS << (IsOverloaded ? "__aio " : "__ai ")
<< "__attribute__((__clang_arm_builtin_alias("
- << "__builtin_sve_" << FullName << ")))\n";
+ << (SMEAttrs.empty() ? "__builtin_sve_" : "__builtin_sme_")
+ << FullName << ")";
+ if (!SMEAttrs.empty())
+ OS << SMEAttrs;
+ OS << "))\n";
OS << getTypes()[0].str() << " " << ProtoName << "(";
for (unsigned I = 0; I < getTypes().size() - 1; ++I) {
@@ -969,6 +1004,8 @@
return encodeEltType("EltTyBool32");
case 64:
return encodeEltType("EltTyBool64");
+ case 128:
+ return encodeEltType("EltTyBool128");
default:
llvm_unreachable("Unhandled predicate element bitwidth!");
}
@@ -983,6 +1020,8 @@
return encodeEltType("EltTyInt32");
case 64:
return encodeEltType("EltTyInt64");
+ case 128:
+ return encodeEltType("EltTyInt128");
default:
llvm_unreachable("Unhandled integer element bitwidth!");
}
@@ -1226,7 +1265,7 @@
// Actually emit the intrinsic declarations.
for (auto &I : Defs)
- I->emitIntrinsic(OS);
+ I->emitIntrinsic(OS, *this);
OS << "#define svcvtnt_bf16_x svcvtnt_bf16_m\n";
OS << "#define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m\n";
@@ -1375,6 +1414,164 @@
OS << "#endif\n\n";
}
+void SVEEmitter::createSMEHeader(raw_ostream &OS) {
+ OS << "/*===---- arm_sme.h - ARM SME intrinsics "
+ "-----------------------------------===\n"
+ " *\n"
+ " *\n"
+ " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
+ "Exceptions.\n"
+ " * See https://llvm.org/LICENSE.txt for license information.\n"
+ " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+ " *\n"
+ " *===-----------------------------------------------------------------"
+ "------===\n"
+ " */\n\n";
+
+ OS << "#ifndef __ARM_SME_H\n";
+ OS << "#define __ARM_SME_H\n\n";
+
+ OS << "#if !defined(__LITTLE_ENDIAN__)\n";
+ OS << "#error \"Big endian is currently not supported for arm_sme.h\"\n";
+ OS << "#endif\n";
+
+ OS << "#include <arm_sve.h> \n\n";
+
+ OS << "/* Function attributes */\n";
+ OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
+ "__nodebug__))\n\n";
+
+ OS << "#ifdef __cplusplus\n";
+ OS << "extern \"C\" {\n";
+ OS << "#endif\n\n";
+
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ for (auto *R : RV)
+ createIntrinsic(R, Defs);
+
+ // Sort intrinsics in header file by following order/priority similar to SVE:
+ // - Architectural guard
+ // - Class (is intrinsic overloaded or not)
+ // - Intrinsic name
+ std::stable_sort(Defs.begin(), Defs.end(),
+ [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ auto ToTuple = [](const std::unique_ptr<Intrinsic> &I) {
+ return std::make_tuple(I->getGuard(),
+ (unsigned)I->getClassKind(),
+ I->getName());
+ };
+ return ToTuple(A) < ToTuple(B);
+ });
+
+ // Actually emit the intrinsic declaration.
+ for (auto &I : Defs) {
+ I->emitIntrinsic(OS, *this);
+ }
+
+ OS << "#ifdef __cplusplus\n";
+ OS << "} // extern \"C\"\n";
+ OS << "#endif\n\n";
+ OS << "#undef __ai\n\n";
+ OS << "#endif /* __ARM_SME_H */\n";
+}
+
+void SVEEmitter::createSMEBuiltins(raw_ostream &OS) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ for (auto *R : RV) {
+ createIntrinsic(R, Defs);
+ }
+
+ // The mappings must be sorted based on BuiltinID.
+ llvm::sort(Defs, [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ return A->getMangledName() < B->getMangledName();
+ });
+
+ OS << "#ifdef GET_SME_BUILTINS\n";
+ for (auto &Def : Defs) {
+ // Only create BUILTINs for non-overloaded intrinsics, as overloaded
+ // declarations only live in the header file.
+ if (Def->getClassKind() != ClassG)
+ OS << "BUILTIN(__builtin_sme_" << Def->getMangledName() << ", \""
+ << Def->getBuiltinTypeStr() << "\", \"n\")\n";
+ }
+
+ OS << "#endif\n\n";
+}
+
+void SVEEmitter::createSMECodeGenMap(raw_ostream &OS) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ for (auto *R : RV) {
+ createIntrinsic(R, Defs);
+ }
+
+ // The mappings must be sorted based on BuiltinID.
+ llvm::sort(Defs, [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ return A->getMangledName() < B->getMangledName();
+ });
+
+ OS << "#ifdef GET_SME_LLVM_INTRINSIC_MAP\n";
+ for (auto &Def : Defs) {
+ // Builtins only exist for non-overloaded intrinsics, overloaded
+ // declarations only live in the header file.
+ if (Def->getClassKind() == ClassG)
+ continue;
+
+ uint64_t Flags = Def->getFlags();
+ auto FlagString = std::to_string(Flags);
+
+ std::string LLVMName = Def->getLLVMName();
+ std::string Builtin = Def->getMangledName();
+ if (!LLVMName.empty())
+ OS << "SMEMAP1(" << Builtin << ", " << LLVMName << ", " << FlagString
+ << "),\n";
+ else
+ OS << "SMEMAP2(" << Builtin << ", " << FlagString << "),\n";
+ }
+ OS << "#endif\n\n";
+}
+
+void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ for (auto *R : RV) {
+ createIntrinsic(R, Defs);
+ }
+
+ // The mappings must be sorted based on BuiltinID.
+ llvm::sort(Defs, [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ return A->getMangledName() < B->getMangledName();
+ });
+
+
+ OS << "#ifdef GET_SME_IMMEDIATE_CHECK\n";
+
+ // Ensure these are only emitted once.
+ std::set<std::string> Emitted;
+
+ for (auto &Def : Defs) {
+ if (Emitted.find(Def->getMangledName()) != Emitted.end() ||
+ Def->getImmChecks().empty())
+ continue;
+
+ OS << "case SME::BI__builtin_sme_" << Def->getMangledName() << ":\n";
+ for (auto &Check : Def->getImmChecks())
+ OS << "ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
+ << Check.getKind() << ", " << Check.getElementSizeInBits() << "));\n";
+ OS << " break;\n";
+
+ Emitted.insert(Def->getMangledName());
+ }
+
+ OS << "#endif\n\n";
+}
+
namespace clang {
void EmitSveHeader(RecordKeeper &Records, raw_ostream &OS) {
SVEEmitter(Records).createHeader(OS);
@@ -1396,4 +1593,19 @@
SVEEmitter(Records).createTypeFlags(OS);
}
+void EmitSmeHeader(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createSMEHeader(OS);
+}
+
+void EmitSmeBuiltins(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createSMEBuiltins(OS);
+}
+
+void EmitSmeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createSMECodeGenMap(OS);
+}
+
+void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createSMERangeChecks(OS);
+}
} // End namespace clang
Index: clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
===================================================================
--- /dev/null
+++ clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
@@ -0,0 +1,81 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -fsyntax-only -verify -verify-ignore-unexpected=error %s
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#include <arm_sme.h>
+
+__attribute__((arm_streaming))
+void test_range_0_0(svbool_t pg, void *ptr) {
+ // expected-error-re@+1 {{argument value 0 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svld1_hor_za8,,,)(-1, -1, 0, pg, ptr);
+ // expected-error-re@+1 {{argument value 0 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svst1_ver_za8,,,)(-1, -1, 15, pg, ptr);
+ // expected-error-re@+1 {{argument value 2 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svld1_hor_za128,,,)(0, -1, -1, pg, ptr);
+ // expected-error-re@+1 {{argument value 2 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svst1_ver_za128,,,)(15, -1, -1, pg, ptr);
+}
+
+__attribute__((arm_streaming))
+void test_range_0_1(svbool_t pg, void *ptr) {
+ // expected-error-re@+1 {{argument value 0 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svld1_hor_za16,,,)(-1, -1, 0, pg, ptr);
+ // expected-error-re@+1 {{argument value 0 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svst1_ver_za16,,,)(-1, -1, 7, pg, ptr);
+ // expected-error-re@+1 {{argument value 2 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svld1_hor_za64,,,)(0, -1, -1, pg, ptr);
+ // expected-error-re@+1 {{argument value 2 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svst1_ver_za64,,,)(7, -1, -1, pg, ptr);
+}
+
+__attribute__((arm_streaming))
+void test_range_0_3(svbool_t pg, void *ptr) {
+ // expected-error-re@+1 {{argument value 0 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svld1_hor_za32,,,)(-1, -1, 0, pg, ptr);
+ // expected-error-re@+1 {{argument value 0 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svst1_ver_za32,,,)(-1, -1, 3, pg, ptr);
+ // expected-error-re@+1 {{argument value 2 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svld1_hor_za32,,,)(0, -1, -1, pg, ptr);
+ // expected-error-re@+1 {{argument value 2 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svst1_ver_za32,,,)(3, -1, -1, pg, ptr);
+}
+
+__attribute__((arm_streaming))
+void test_range_0_7(svbool_t pg, void *ptr) {
+ // expected-error-re@+1 {{argument value 0 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svld1_hor_za64,,,)(-1, -1, 0, pg, ptr);
+ // expected-error-re@+1 {{argument value 0 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svst1_ver_za64,,,)(-1, -1, 1, pg, ptr);
+ // expected-error-re@+1 {{argument value 2 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svld1_hor_za16,,,)(0, -1, -1, pg, ptr);
+ // expected-error-re@+1 {{argument value 2 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svst1_ver_za16,,,)(1, -1, -1, pg, ptr);
+}
+
+__attribute__((arm_streaming))
+void test_range_0_15() {
+ // expected-error-re@+1 {{argument value 0 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svld1_hor_za128,,,)(-1, -1, 0, pg, ptr);
+ // expected-error-re@+1 {{argument value 0 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svst1_ver_za128,,,)(-1, -1, 0, pg, ptr);
+ // expected-error-re@+1 {{argument value 2 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svld1_hor_za8,,,)(0, -1, -1, pg, ptr);
+ // expected-error-re@+1 {{argument value 2 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svst1_ver_za8,,,)(0, -1, -1, pg, ptr);
+}
+
+__attribute__((arm_streaming))
+void test_constant(uint64_t u64, svbool_t pg, void *ptr) {
+ SVE_ACLE_FUNC(svld1_hor_za8,,,)(u64, u64, 0, pg, ptr); // expected-error {{argument to 'svld1_hor_za8' must be a constant integer}}
+ SVE_ACLE_FUNC(svld1_ver_za16,,,)(0, u64, u64, pg, ptr); // expected-error {{argument to 'svld1_ver_za16' must be a constant integer}}
+ SVE_ACLE_FUNC(svst1_hor_za32,,,)(u64, u64, 0, pg, ptr); // expected-error {{argument to 'svst1_hor_za32' must be a constant integer}}
+ SVE_ACLE_FUNC(svst1_ver_za64,,,)(0, u64, u64, pg, ptr); // expected-error {{argument to 'svst1_ver_za64' must be a constant integer}}
+}
Index: clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
@@ -0,0 +1,172 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -no-opaque-pointers -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme.h>
+
+// CHECK-C-LABEL: @test_svst1_hor_vnum_za8(
+// CHECK-CXX-LABEL: @_Z23test_svst1_hor_vnum_za8ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_hor_vnum_za8(0, slice_base, 0, pg, ptr, vnum);
+ svst1_hor_vnum_za8(0, slice_base, 15, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_vnum_za16(
+// CHECK-CXX-LABEL: @_Z24test_svst1_hor_vnum_za16ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_hor_vnum_za16(0, slice_base, 0, pg, ptr, vnum);
+ svst1_hor_vnum_za16(1, slice_base, 7, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_vnum_za32(
+// CHECK-CXX-LABEL: @_Z24test_svst1_hor_vnum_za32ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_hor_vnum_za32(0, slice_base, 0, pg, ptr, vnum);
+ svst1_hor_vnum_za32(3, slice_base, 3, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_vnum_za64(
+// CHECK-CXX-LABEL: @_Z24test_svst1_hor_vnum_za64ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_hor_vnum_za64(0, slice_base, 0, pg, ptr, vnum);
+ svst1_hor_vnum_za64(7, slice_base, 1, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_vnum_za128(
+// CHECK-CXX-LABEL: @_Z25test_svst1_hor_vnum_za128ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_hor_vnum_za128(0, slice_base, 0, pg, ptr, vnum);
+ svst1_hor_vnum_za128(15, slice_base, 0, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_vnum_za8(
+// CHECK-CXX-LABEL: @_Z23test_svst1_ver_vnum_za8ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_ver_vnum_za8(0, slice_base, 0, pg, ptr, vnum);
+ svst1_ver_vnum_za8(0, slice_base, 15, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_vnum_za16(
+// CHECK-CXX-LABEL: @_Z24test_svst1_ver_vnum_za16ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_ver_vnum_za16(0, slice_base, 0, pg, ptr, vnum);
+ svst1_ver_vnum_za16(1, slice_base, 7, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_vnum_za32(
+// CHECK-CXX-LABEL: @_Z24test_svst1_ver_vnum_za32ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_ver_vnum_za32(0, slice_base, 0, pg, ptr, vnum);
+ svst1_ver_vnum_za32(3, slice_base, 3, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_vnum_za64(
+// CHECK-CXX-LABEL: @_Z24test_svst1_ver_vnum_za64ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_ver_vnum_za64(0, slice_base, 0, pg, ptr, vnum);
+ svst1_ver_vnum_za64(7, slice_base, 1, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_vnum_za128(
+// CHECK-CXX-LABEL: @_Z25test_svst1_ver_vnum_za128ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_ver_vnum_za128(0, slice_base, 0, pg, ptr, vnum);
+ svst1_ver_vnum_za128(15, slice_base, 0, pg, ptr, vnum);
+}
Index: clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
@@ -0,0 +1,142 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -no-opaque-pointers -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme.h>
+
+// CHECK-C-LABEL: @test_svst1_hor_za8(
+// CHECK-CXX-LABEL: @_Z18test_svst1_hor_za8ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_hor_za8(0, slice_base, 0, pg, ptr);
+ svst1_hor_za8(0, slice_base, 15, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_za16(
+// CHECK-CXX-LABEL: @_Z19test_svst1_hor_za16ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_hor_za16(0, slice_base, 0, pg, ptr);
+ svst1_hor_za16(1, slice_base, 7, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_za32(
+// CHECK-CXX-LABEL: @_Z19test_svst1_hor_za32ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_hor_za32(0, slice_base, 0, pg, ptr);
+ svst1_hor_za32(3, slice_base, 3, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_za64(
+// CHECK-CXX-LABEL: @_Z19test_svst1_hor_za64ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_hor_za64(0, slice_base, 0, pg, ptr);
+ svst1_hor_za64(7, slice_base, 1, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_za128(
+// CHECK-CXX-LABEL: @_Z20test_svst1_hor_za128ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_hor_za128(0, slice_base, 0, pg, ptr);
+ svst1_hor_za128(15, slice_base, 0, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_za8(
+// CHECK-CXX-LABEL: @_Z18test_svst1_ver_za8ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_ver_za8(0, slice_base, 0, pg, ptr);
+ svst1_ver_za8(0, slice_base, 15, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_za16(
+// CHECK-CXX-LABEL: @_Z19test_svst1_ver_za16ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_ver_za16(0, slice_base, 0, pg, ptr);
+ svst1_ver_za16(1, slice_base, 7, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_za32(
+// CHECK-CXX-LABEL: @_Z19test_svst1_ver_za32ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_ver_za32(0, slice_base, 0, pg, ptr);
+ svst1_ver_za32(3, slice_base, 3, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_za64(
+// CHECK-CXX-LABEL: @_Z19test_svst1_ver_za64ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_ver_za64(0, slice_base, 0, pg, ptr);
+ svst1_ver_za64(7, slice_base, 1, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_za128(
+// CHECK-CXX-LABEL: @_Z20test_svst1_ver_za128ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_ver_za128(0, slice_base, 0, pg, ptr);
+ svst1_ver_za128(15, slice_base, 0, pg, ptr);
+}
Index: clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
@@ -0,0 +1,172 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -no-opaque-pointers -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme.h>
+
+// CHECK-C-LABEL: @test_svld1_hor_vnum_za8(
+// CHECK-CXX-LABEL: @_Z23test_svld1_hor_vnum_za8ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_hor_vnum_za8(0, slice_base, 0, pg, ptr, vnum);
+ svld1_hor_vnum_za8(0, slice_base, 15, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_vnum_za16(
+// CHECK-CXX-LABEL: @_Z24test_svld1_hor_vnum_za16ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_hor_vnum_za16(0, slice_base, 0, pg, ptr, vnum);
+ svld1_hor_vnum_za16(1, slice_base, 7, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_vnum_za32(
+// CHECK-CXX-LABEL: @_Z24test_svld1_hor_vnum_za32ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_hor_vnum_za32(0, slice_base, 0, pg, ptr, vnum);
+ svld1_hor_vnum_za32(3, slice_base, 3, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_vnum_za64(
+// CHECK-CXX-LABEL: @_Z24test_svld1_hor_vnum_za64ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_hor_vnum_za64(0, slice_base, 0, pg, ptr, vnum);
+ svld1_hor_vnum_za64(7, slice_base, 1, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_vnum_za128(
+// CHECK-CXX-LABEL: @_Z25test_svld1_hor_vnum_za128ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_hor_vnum_za128(0, slice_base, 0, pg, ptr, vnum);
+ svld1_hor_vnum_za128(15, slice_base, 0, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_hor_za8(
+// CHECK-CXX-LABEL: @_Z22test_svld1_ver_hor_za8ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_ver_vnum_za8(0, slice_base, 0, pg, ptr, vnum);
+ svld1_ver_vnum_za8(0, slice_base, 15, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_vnum_za16(
+// CHECK-CXX-LABEL: @_Z24test_svld1_ver_vnum_za16ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_ver_vnum_za16(0, slice_base, 0, pg, ptr, vnum);
+ svld1_ver_vnum_za16(1, slice_base, 7, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_vnum_za32(
+// CHECK-CXX-LABEL: @_Z24test_svld1_ver_vnum_za32ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_ver_vnum_za32(0, slice_base, 0, pg, ptr, vnum);
+ svld1_ver_vnum_za32(3, slice_base, 3, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_vnum_za64(
+// CHECK-CXX-LABEL: @_Z24test_svld1_ver_vnum_za64ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_ver_vnum_za64(0, slice_base, 0, pg, ptr, vnum);
+ svld1_ver_vnum_za64(7, slice_base, 1, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_vnum_za128(
+// CHECK-CXX-LABEL: @_Z25test_svld1_ver_vnum_za128ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_ver_vnum_za128(0, slice_base, 0, pg, ptr, vnum);
+ svld1_ver_vnum_za128(15, slice_base, 0, pg, ptr, vnum);
+}
Index: clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
@@ -0,0 +1,142 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -no-opaque-pointers -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme.h>
+
+// CHECK-C-LABEL: @test_svld1_hor_za8(
+// CHECK-CXX-LABEL: @_Z18test_svld1_hor_za8ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_hor_za8(0, slice_base, 0, pg, ptr);
+ svld1_hor_za8(0, slice_base, 15, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_za16(
+// CHECK-CXX-LABEL: @_Z19test_svld1_hor_za16ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_hor_za16(0, slice_base, 0, pg, ptr);
+ svld1_hor_za16(1, slice_base, 7, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_za32(
+// CHECK-CXX-LABEL: @_Z19test_svld1_hor_za32ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_hor_za32(0, slice_base, 0, pg, ptr);
+ svld1_hor_za32(3, slice_base, 3, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_za64(
+// CHECK-CXX-LABEL: @_Z19test_svld1_hor_za64ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_hor_za64(0, slice_base, 0, pg, ptr);
+ svld1_hor_za64(7, slice_base, 1, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_za128(
+// CHECK-CXX-LABEL: @_Z20test_svld1_hor_za128ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_hor_za128(0, slice_base, 0, pg, ptr);
+ svld1_hor_za128(15, slice_base, 0, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_za8(
+// CHECK-CXX-LABEL: @_Z18test_svld1_ver_za8ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_ver_za8(0, slice_base, 0, pg, ptr);
+ svld1_ver_za8(0, slice_base, 15, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_za16(
+// CHECK-CXX-LABEL: @_Z19test_svld1_ver_za16ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_ver_za16(0, slice_base, 0, pg, ptr);
+ svld1_ver_za16(1, slice_base, 7, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_za32(
+// CHECK-CXX-LABEL: @_Z19test_svld1_ver_za32ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_ver_za32(0, slice_base, 0, pg, ptr);
+ svld1_ver_za32(3, slice_base, 3, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_za64(
+// CHECK-CXX-LABEL: @_Z19test_svld1_ver_za64ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_ver_za64(0, slice_base, 0, pg, ptr);
+ svld1_ver_za64(7, slice_base, 1, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_za128(
+// CHECK-CXX-LABEL: @_Z20test_svld1_ver_za128ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+__attribute__((arm_streaming)) void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_ver_za128(0, slice_base, 0, pg, ptr);
+ svld1_ver_za128(15, slice_base, 0, pg, ptr);
+}
Index: clang/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -5696,6 +5696,14 @@
BuiltinID <= AArch64::LastSVEBuiltin;
}
+static bool ArmSmeAliasValid(ASTContext &Context, unsigned BuiltinID,
+ StringRef AliasName) {
+ if (Context.BuiltinInfo.isAuxBuiltinID(BuiltinID))
+ BuiltinID = Context.BuiltinInfo.getAuxBuiltinID(BuiltinID);
+ return BuiltinID >= AArch64::FirstSMEBuiltin &&
+ BuiltinID <= AArch64::LastSMEBuiltin;
+}
+
static void handleArmBuiltinAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (!AL.isArgIdent(0)) {
S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
@@ -5708,7 +5716,8 @@
StringRef AliasName = cast<FunctionDecl>(D)->getIdentifier()->getName();
bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
- if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
+ if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName) &&
+ !ArmSmeAliasValid(S.Context, BuiltinID, AliasName)) ||
(!IsAArch64 && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName))) {
S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias);
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -2864,6 +2864,7 @@
return false;
#define GET_SVE_IMMEDIATE_CHECK
#include "clang/Basic/arm_sve_sema_rangechecks.inc"
+#include "clang/Basic/arm_sme_sema_rangechecks.inc"
#undef GET_SVE_IMMEDIATE_CHECK
}
@@ -2970,6 +2971,14 @@
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
HasError = true;
break;
+ case SVETypeFlags::ImmCheck0_0:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_15:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
+ HasError = true;
+ break;
}
}
Index: clang/lib/Headers/CMakeLists.txt
===================================================================
--- clang/lib/Headers/CMakeLists.txt
+++ clang/lib/Headers/CMakeLists.txt
@@ -328,6 +328,8 @@
clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h)
# Generate arm_sve.h
clang_generate_header(-gen-arm-sve-header arm_sve.td arm_sve.h)
+ # Generate arm_sme.h
+ clang_generate_header(-gen-arm-sme-header arm_sme.td arm_sme.h)
# Generate arm_bf16.h
clang_generate_header(-gen-arm-bf16 arm_bf16.td arm_bf16.h)
# Generate arm_mve.h
@@ -348,6 +350,7 @@
list(APPEND aarch64_only_generated_files
"${CMAKE_CURRENT_BINARY_DIR}/arm_sve.h"
+ "${CMAKE_CURRENT_BINARY_DIR}/arm_sme.h"
"${CMAKE_CURRENT_BINARY_DIR}/arm_bf16.h"
)
endif()
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -4234,6 +4234,7 @@
llvm::Value *EmitSVEMaskedStore(const CallExpr *,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
+ llvm::Value *EmitTileslice(llvm::Value *Offset, llvm::Value *Base);
llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
@@ -4248,6 +4249,11 @@
unsigned IntID);
llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags,
+ llvm::SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned IntID);
+ llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
llvm::Triple::ArchType Arch);
llvm::Value *EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -6683,11 +6683,29 @@
#undef SVEMAP1
#undef SVEMAP2
+#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
+ { \
+ #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
+ TypeModifier \
+ }
+
+#define SMEMAP2(NameBase, TypeModifier) \
+ { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
+static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
+#define GET_SME_LLVM_INTRINSIC_MAP
+#include "clang/Basic/arm_sme_builtin_cg.inc"
+#undef GET_SME_LLVM_INTRINSIC_MAP
+};
+
+#undef SMEMAP1
+#undef SMEMAP2
+
static bool NEONSIMDIntrinsicsProvenSorted = false;
static bool AArch64SIMDIntrinsicsProvenSorted = false;
static bool AArch64SISDIntrinsicsProvenSorted = false;
static bool AArch64SVEIntrinsicsProvenSorted = false;
+static bool AArch64SMEIntrinsicsProvenSorted = false;
static const ARMVectorIntrinsicInfo *
findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
@@ -8836,6 +8854,8 @@
return Builder.getInt32Ty();
case SVETypeFlags::EltTyInt64:
return Builder.getInt64Ty();
+ case SVETypeFlags::EltTyInt128:
+ return Builder.getInt128Ty();
case SVETypeFlags::EltTyFloat16:
return Builder.getHalfTy();
@@ -8851,6 +8871,7 @@
case SVETypeFlags::EltTyBool16:
case SVETypeFlags::EltTyBool32:
case SVETypeFlags::EltTyBool64:
+ case SVETypeFlags::EltTyBool128:
return Builder.getInt1Ty();
}
}
@@ -8954,6 +8975,7 @@
switch (VTy->getMinNumElements()) {
default:
llvm_unreachable("unsupported element count!");
+ case 1:
case 2:
case 4:
case 8:
@@ -9315,6 +9337,41 @@
return Store;
}
+Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
+ llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false);
+ return Builder.CreateAdd(Base, CastOffset, "tileslice");
+}
+
+Value *CodeGenFunction::EmitSMELd1St1(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned IntID) {
+ Ops[3] = EmitSVEPredicateCast(
+ Ops[3], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags)));
+
+ SmallVector<Value *> NewOps;
+ NewOps.push_back(Ops[3]);
+
+ llvm::Value *BasePtr = Ops[4];
+
+ // If the intrinsic contains the vnum parameter, multiply it with the vector
+ // size in bytes.
+ if (Ops.size() == 6) {
+ Function *StreamingVectorLength =
+ CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+ llvm::Value *StreamingVectorLengthCall =
+ Builder.CreateCall(StreamingVectorLength);
+ llvm::Value *Mulvl =
+ Builder.CreateMul(StreamingVectorLengthCall, Ops[5], "mulvl");
+ // The type of the ptr parameter is void *, so use Int8Ty here.
+ BasePtr = Builder.CreateGEP(Int8Ty, Ops[4], Mulvl);
+ }
+ NewOps.push_back(BasePtr);
+ NewOps.push_back(Ops[0]);
+ NewOps.push_back(EmitTileslice(Ops[2], Ops[1]));
+ Function *F = CGM.getIntrinsic(IntID);
+ return Builder.CreateCall(F, NewOps);
+}
+
// Limit the usage of scalable llvm IR generated by the ACLE by using the
// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
@@ -9738,6 +9795,43 @@
return nullptr;
}
+Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ // Find out if any arguments are required to be integer constant expressions.
+ unsigned ICEArguments = 0;
+ ASTContext::GetBuiltinTypeError Error;
+ getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+ assert(Error == ASTContext::GE_None && "Should not codegen an error");
+
+ llvm::SmallVector<Value *, 4> Ops;
+ for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
+ if ((ICEArguments & (1 << i)) == 0)
+ Ops.push_back(EmitScalarExpr(E->getArg(i)));
+ else {
+ // If this is required to be a constant, constant fold it so that we know
+ // that the generated intrinsic gets a ConstantInt.
+ std::optional<llvm::APSInt> Result =
+ E->getArg(i)->getIntegerConstantExpr(getContext());
+ assert(Result && "Expected argument to be a constant");
+
+ // Immediates for SVE llvm intrinsics are always 32bit. We can safely
+ // truncate because the immediate has been range checked and no valid
+ // immediate requires more than a handful of bits.
+ *Result = Result->extOrTrunc(32);
+ Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
+ }
+ }
+
+ auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
+ AArch64SMEIntrinsicsProvenSorted);
+ SVETypeFlags TypeFlags(Builtin->TypeModifier);
+ if (TypeFlags.isLoad() || TypeFlags.isStore())
+ return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+
+ /// Should not happen
+ return nullptr;
+}
+
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
llvm::Triple::ArchType Arch) {
@@ -9745,6 +9839,10 @@
BuiltinID <= clang::AArch64::LastSVEBuiltin)
return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
+ if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
+ BuiltinID <= clang::AArch64::LastSMEBuiltin)
+ return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
+
unsigned HintID = static_cast<unsigned>(-1);
switch (BuiltinID) {
default: break;
Index: clang/lib/Basic/Targets/AArch64.cpp
===================================================================
--- clang/lib/Basic/Targets/AArch64.cpp
+++ clang/lib/Basic/Targets/AArch64.cpp
@@ -37,6 +37,12 @@
{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
#include "clang/Basic/BuiltinsSVE.def"
+#define BUILTIN(ID, TYPE, ATTRS) \
+ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
+ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#include "clang/Basic/BuiltinsSME.def"
+
#define BUILTIN(ID, TYPE, ATTRS) \
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \
Index: clang/include/clang/Basic/arm_sve_sme_incl.td
===================================================================
--- /dev/null
+++ clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -0,0 +1,274 @@
+//===--- arm_sve_sme_incl.td - ARM SVE/SME compiler interface -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines common properites of TableGen definitions use for both
+// SVE and SME intrinsics.
+//
+// https://developer.arm.com/architectures/system-architectures/software-standards/acle
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions
+//===----------------------------------------------------------------------===//
+// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
+// a sequence of typespecs.
+//
+// The name is the base name of the intrinsic, for example "svld1". This is
+// then mangled by the tblgen backend to add type information ("svld1_s16").
+//
+// A typespec is a sequence of uppercase characters (modifiers) followed by one
+// lowercase character. A typespec encodes a particular "base type" of the
+// intrinsic.
+//
+// An example typespec is "Us" - unsigned short - svuint16_t. The available
+// typespec codes are given below.
+//
+// The string given to an Inst class is a sequence of typespecs. The intrinsic
+// is instantiated for every typespec in the sequence. For example "sdUsUd".
+//
+// The prototype is a string that defines the return type of the intrinsic
+// and the type of each argument. The return type and every argument gets a
+// "modifier" that can change in some way the "base type" of the intrinsic.
+//
+// The modifier 'd' means "default" and does not modify the base type in any
+// way. The available modifiers are given below.
+//
+// Typespecs
+// ---------
+// c: char
+// s: short
+// i: int
+// l: long
+// q: int128_t
+// f: float
+// h: half-float
+// d: double
+// b: bfloat
+
+// Typespec modifiers
+// ------------------
+// P: boolean
+// U: unsigned
+
+// Prototype modifiers
+// -------------------
+// prototype: return (arg, arg, ...)
+//
+// 2,3,4: array of default vectors
+// v: void
+// x: vector of signed integers
+// u: vector of unsigned integers
+// d: default
+// c: const pointer type
+// P: predicate type
+// s: scalar of element type
+// a: scalar of element type (splat to vector type)
+// R: scalar of 1/2 width element type (splat to vector type)
+// r: scalar of 1/4 width element type (splat to vector type)
+// @: unsigned scalar of 1/4 width element type (splat to vector type)
+// e: 1/2 width unsigned elements, 2x element count
+// b: 1/4 width unsigned elements, 4x element count
+// h: 1/2 width elements, 2x element count
+// q: 1/4 width elements, 4x element count
+// o: 4x width elements, 1/4 element count
+//
+// w: vector of element type promoted to 64bits, vector maintains
+// signedness of its element type.
+// f: element type promoted to uint64_t (splat to vector type)
+// j: element type promoted to 64bits (splat to vector type)
+// K: element type bitcast to a signed integer (splat to vector type)
+// L: element type bitcast to an unsigned integer (splat to vector type)
+//
+// i: constant uint64_t
+// k: int32_t
+// l: int64_t
+// m: uint32_t
+// n: uint64_t
+
+// t: svint32_t
+// z: svuint32_t
+// g: svuint64_t
+// O: svfloat16_t
+// M: svfloat32_t
+// N: svfloat64_t
+
+// J: Prefetch type (sv_prfop)
+
+// %: pointer to void
+
+// A: pointer to int8_t
+// B: pointer to int16_t
+// C: pointer to int32_t
+// D: pointer to int64_t
+
+// E: pointer to uint8_t
+// F: pointer to uint16_t
+// G: pointer to uint32_t
+// H: pointer to uint64_t
+
+// Q: const pointer to void
+
+// S: const pointer to int8_t
+// T: const pointer to int16_t
+// U: const pointer to int32_t
+// V: const pointer to int64_t
+//
+// W: const pointer to uint8_t
+// X: const pointer to uint16_t
+// Y: const pointer to uint32_t
+// Z: const pointer to uint64_t
+
+class MergeType<int val, string suffix=""> {
+ int Value = val;
+ string Suffix = suffix;
+}
+def MergeNone : MergeType<0>;
+def MergeAny : MergeType<1, "_x">;
+def MergeOp1 : MergeType<2, "_m">;
+def MergeZero : MergeType<3, "_z">;
+def MergeAnyExp : MergeType<4, "_x">; // Use merged builtin with explicit
+def MergeZeroExp : MergeType<5, "_z">; // generation of its inactive argument.
+
+class EltType<int val> {
+ int Value = val;
+}
+def EltTyInvalid : EltType<0>;
+def EltTyInt8 : EltType<1>;
+def EltTyInt16 : EltType<2>;
+def EltTyInt32 : EltType<3>;
+def EltTyInt64 : EltType<4>;
+def EltTyInt128 : EltType<5>;
+def EltTyFloat16 : EltType<6>;
+def EltTyFloat32 : EltType<7>;
+def EltTyFloat64 : EltType<8>;
+def EltTyBool8 : EltType<9>;
+def EltTyBool16 : EltType<10>;
+def EltTyBool32 : EltType<11>;
+def EltTyBool64 : EltType<12>;
+def EltTyBool128 : EltType<13>;
+def EltTyBFloat16 : EltType<14>;
+
+class MemEltType<int val> {
+ int Value = val;
+}
+def MemEltTyDefault : MemEltType<0>;
+def MemEltTyInt8 : MemEltType<1>;
+def MemEltTyInt16 : MemEltType<2>;
+def MemEltTyInt32 : MemEltType<3>;
+def MemEltTyInt64 : MemEltType<4>;
+
+class FlagType<int val> {
+ int Value = val;
+}
+
+// These must be kept in sync with the flags in utils/TableGen/SveEmitter.h
+// and include/clang/Basic/TargetBuiltins.h
+def NoFlags : FlagType<0x00000000>;
+def FirstEltType : FlagType<0x00000001>;
+// : :
+// : :
+def EltTypeMask : FlagType<0x0000000f>;
+def FirstMemEltType : FlagType<0x00000010>;
+// : :
+// : :
+def MemEltTypeMask : FlagType<0x00000070>;
+def FirstMergeTypeMask : FlagType<0x00000080>;
+// : :
+// : :
+def MergeTypeMask : FlagType<0x00000380>;
+def FirstSplatOperand : FlagType<0x00000400>;
+// : :
+// These flags are used to specify which scalar operand
+// needs to be duplicated/splatted into a vector.
+// : :
+def SplatOperandMask : FlagType<0x00001C00>;
+def IsLoad : FlagType<0x00002000>;
+def IsStore : FlagType<0x00004000>;
+def IsGatherLoad : FlagType<0x00008000>;
+def IsScatterStore : FlagType<0x00010000>;
+def IsStructLoad : FlagType<0x00020000>;
+def IsStructStore : FlagType<0x00040000>;
+def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default
+def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types.
+def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types.
+def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types.
+def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
+def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
+def IsByteIndexed : FlagType<0x01000000>;
+def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand.
+def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand.
+def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
+def IsGatherPrefetch : FlagType<0x10000000>;
+def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped.
+def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped.
+def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type.
+def IsTupleCreate : FlagType<0x100000000>;
+def IsTupleGet : FlagType<0x200000000>;
+def IsTupleSet : FlagType<0x400000000>;
+def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X.
+def IsStreaming : FlagType<0x1000000000>;
+def IsStreamingCompatible : FlagType<0x2000000000>;
+def IsSharedZA : FlagType<0x4000000000>;
+def IsPreservesZA : FlagType<0x8000000000>;
+
+// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
+class ImmCheckType<int val> {
+ int Value = val;
+}
+def ImmCheck0_31 : ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns)
+def ImmCheck1_16 : ImmCheckType<1>; // 1..16
+def ImmCheckExtract : ImmCheckType<2>; // 0..(2048/sizeinbits(elt) - 1)
+def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt)
+def ImmCheckShiftRightNarrow : ImmCheckType<4>; // 1..sizeinbits(elt)/2
+def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1)
+def ImmCheck0_7 : ImmCheckType<6>; // 0..7
+def ImmCheckLaneIndex : ImmCheckType<7>; // 0..(128/(1*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexDot : ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1)
+def ImmCheckComplexRot90_270 : ImmCheckType<10>; // [90,270]
+def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270]
+def ImmCheck0_13 : ImmCheckType<12>; // 0..13
+def ImmCheck0_1 : ImmCheckType<13>; // 0..1
+def ImmCheck0_2 : ImmCheckType<14>; // 0..2
+def ImmCheck0_3 : ImmCheckType<15>; // 0..3
+def ImmCheck0_0 : ImmCheckType<16>; // 0..0
+def ImmCheck0_15 : ImmCheckType<17>; // 0..15
+
+class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
+ int Arg = arg;
+ int EltSizeArg = eltSizeArg;
+ ImmCheckType Kind = kind;
+}
+
+class Inst<string n, string p, string t, MergeType mt, string i,
+ list<FlagType> ft, list<ImmCheck> ch, MemEltType met> {
+ string Name = n;
+ string Prototype = p;
+ string Types = t;
+ string TargetGuard = "sve";
+ int Merge = mt.Value;
+ string MergeSuffix = mt.Suffix;
+ string LLVMIntrinsic = i;
+ list<FlagType> Flags = ft;
+ list<ImmCheck> ImmChecks = ch;
+ int MemEltType = met.Value;
+}
+
+// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8")
+class SInst<string n, string p, string t, MergeType mt, string i = "",
+ list<FlagType> ft = [], list<ImmCheck> ch = []>
+ : Inst<n, p, t, mt, i, ft, ch, MemEltTyDefault> {
+}
+
+// MInst: Instructions which access memory
+class MInst<string n, string p, string t, list<FlagType> f,
+ MemEltType met = MemEltTyDefault, string i = "",
+ list<ImmCheck> ch = []>
+ : Inst<n, p, t, MergeNone, i, f, ch, met> {
+}
Index: clang/include/clang/Basic/arm_sve.td
===================================================================
--- clang/include/clang/Basic/arm_sve.td
+++ clang/include/clang/Basic/arm_sve.td
@@ -13,252 +13,7 @@
//
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Instruction definitions
-//===----------------------------------------------------------------------===//
-// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
-// a sequence of typespecs.
-//
-// The name is the base name of the intrinsic, for example "svld1". This is
-// then mangled by the tblgen backend to add type information ("svld1_s16").
-//
-// A typespec is a sequence of uppercase characters (modifiers) followed by one
-// lowercase character. A typespec encodes a particular "base type" of the
-// intrinsic.
-//
-// An example typespec is "Us" - unsigned short - svuint16_t. The available
-// typespec codes are given below.
-//
-// The string given to an Inst class is a sequence of typespecs. The intrinsic
-// is instantiated for every typespec in the sequence. For example "sdUsUd".
-//
-// The prototype is a string that defines the return type of the intrinsic
-// and the type of each argument. The return type and every argument gets a
-// "modifier" that can change in some way the "base type" of the intrinsic.
-//
-// The modifier 'd' means "default" and does not modify the base type in any
-// way. The available modifiers are given below.
-//
-// Typespecs
-// ---------
-// c: char
-// s: short
-// i: int
-// l: long
-// f: float
-// h: half-float
-// d: double
-// b: bfloat
-
-// Typespec modifiers
-// ------------------
-// P: boolean
-// U: unsigned
-
-// Prototype modifiers
-// -------------------
-// prototype: return (arg, arg, ...)
-//
-// 2,3,4: array of default vectors
-// v: void
-// x: vector of signed integers
-// u: vector of unsigned integers
-// d: default
-// c: const pointer type
-// P: predicate type
-// s: scalar of element type
-// a: scalar of element type (splat to vector type)
-// R: scalar of 1/2 width element type (splat to vector type)
-// r: scalar of 1/4 width element type (splat to vector type)
-// @: unsigned scalar of 1/4 width element type (splat to vector type)
-// e: 1/2 width unsigned elements, 2x element count
-// b: 1/4 width unsigned elements, 4x element count
-// h: 1/2 width elements, 2x element count
-// q: 1/4 width elements, 4x element count
-// o: 4x width elements, 1/4 element count
-//
-// w: vector of element type promoted to 64bits, vector maintains
-// signedness of its element type.
-// f: element type promoted to uint64_t (splat to vector type)
-// j: element type promoted to 64bits (splat to vector type)
-// K: element type bitcast to a signed integer (splat to vector type)
-// L: element type bitcast to an unsigned integer (splat to vector type)
-//
-// i: constant uint64_t
-// k: int32_t
-// l: int64_t
-// m: uint32_t
-// n: uint64_t
-
-// t: svint32_t
-// z: svuint32_t
-// g: svuint64_t
-// O: svfloat16_t
-// M: svfloat32_t
-// N: svfloat64_t
-
-// J: Prefetch type (sv_prfop)
-// A: pointer to int8_t
-// B: pointer to int16_t
-// C: pointer to int32_t
-// D: pointer to int64_t
-
-// E: pointer to uint8_t
-// F: pointer to uint16_t
-// G: pointer to uint32_t
-// H: pointer to uint64_t
-
-// Q: const pointer to void
-
-// S: const pointer to int8_t
-// T: const pointer to int16_t
-// U: const pointer to int32_t
-// V: const pointer to int64_t
-//
-// W: const pointer to uint8_t
-// X: const pointer to uint16_t
-// Y: const pointer to uint32_t
-// Z: const pointer to uint64_t
-
-class MergeType<int val, string suffix=""> {
- int Value = val;
- string Suffix = suffix;
-}
-def MergeNone : MergeType<0>;
-def MergeAny : MergeType<1, "_x">;
-def MergeOp1 : MergeType<2, "_m">;
-def MergeZero : MergeType<3, "_z">;
-def MergeAnyExp : MergeType<4, "_x">; // Use merged builtin with explicit
-def MergeZeroExp : MergeType<5, "_z">; // generation of its inactive argument.
-
-class EltType<int val> {
- int Value = val;
-}
-def EltTyInvalid : EltType<0>;
-def EltTyInt8 : EltType<1>;
-def EltTyInt16 : EltType<2>;
-def EltTyInt32 : EltType<3>;
-def EltTyInt64 : EltType<4>;
-def EltTyFloat16 : EltType<5>;
-def EltTyFloat32 : EltType<6>;
-def EltTyFloat64 : EltType<7>;
-def EltTyBool8 : EltType<8>;
-def EltTyBool16 : EltType<9>;
-def EltTyBool32 : EltType<10>;
-def EltTyBool64 : EltType<11>;
-def EltTyBFloat16 : EltType<12>;
-
-class MemEltType<int val> {
- int Value = val;
-}
-def MemEltTyDefault : MemEltType<0>;
-def MemEltTyInt8 : MemEltType<1>;
-def MemEltTyInt16 : MemEltType<2>;
-def MemEltTyInt32 : MemEltType<3>;
-def MemEltTyInt64 : MemEltType<4>;
-
-class FlagType<int val> {
- int Value = val;
-}
-
-// These must be kept in sync with the flags in utils/TableGen/SveEmitter.h
-// and include/clang/Basic/TargetBuiltins.h
-def NoFlags : FlagType<0x00000000>;
-def FirstEltType : FlagType<0x00000001>;
-// : :
-// : :
-def EltTypeMask : FlagType<0x0000000f>;
-def FirstMemEltType : FlagType<0x00000010>;
-// : :
-// : :
-def MemEltTypeMask : FlagType<0x00000070>;
-def FirstMergeTypeMask : FlagType<0x00000080>;
-// : :
-// : :
-def MergeTypeMask : FlagType<0x00000380>;
-def FirstSplatOperand : FlagType<0x00000400>;
-// : :
-// These flags are used to specify which scalar operand
-// needs to be duplicated/splatted into a vector.
-// : :
-def SplatOperandMask : FlagType<0x00001C00>;
-def IsLoad : FlagType<0x00002000>;
-def IsStore : FlagType<0x00004000>;
-def IsGatherLoad : FlagType<0x00008000>;
-def IsScatterStore : FlagType<0x00010000>;
-def IsStructLoad : FlagType<0x00020000>;
-def IsStructStore : FlagType<0x00040000>;
-def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default
-def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types.
-def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types.
-def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types.
-def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
-def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
-def IsByteIndexed : FlagType<0x01000000>;
-def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand.
-def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand.
-def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
-def IsGatherPrefetch : FlagType<0x10000000>;
-def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped.
-def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped.
-def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type.
-def IsTupleCreate : FlagType<0x100000000>;
-def IsTupleGet : FlagType<0x200000000>;
-def IsTupleSet : FlagType<0x400000000>;
-def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X.
-
-// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
-class ImmCheckType<int val> {
- int Value = val;
-}
-def ImmCheck0_31 : ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns)
-def ImmCheck1_16 : ImmCheckType<1>; // 1..16
-def ImmCheckExtract : ImmCheckType<2>; // 0..(2048/sizeinbits(elt) - 1)
-def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt)
-def ImmCheckShiftRightNarrow : ImmCheckType<4>; // 1..sizeinbits(elt)/2
-def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1)
-def ImmCheck0_7 : ImmCheckType<6>; // 0..7
-def ImmCheckLaneIndex : ImmCheckType<7>; // 0..(128/(1*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexDot : ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1)
-def ImmCheckComplexRot90_270 : ImmCheckType<10>; // [90,270]
-def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270]
-def ImmCheck0_13 : ImmCheckType<12>; // 0..13
-def ImmCheck0_1 : ImmCheckType<13>; // 0..1
-def ImmCheck0_2 : ImmCheckType<14>; // 0..2
-def ImmCheck0_3 : ImmCheckType<15>; // 0..3
-
-class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
- int Arg = arg;
- int EltSizeArg = eltSizeArg;
- ImmCheckType Kind = kind;
-}
-
-class Inst<string n, string p, string t, MergeType mt, string i,
- list<FlagType> ft, list<ImmCheck> ch, MemEltType met> {
- string Name = n;
- string Prototype = p;
- string Types = t;
- string TargetGuard = "sve";
- int Merge = mt.Value;
- string MergeSuffix = mt.Suffix;
- string LLVMIntrinsic = i;
- list<FlagType> Flags = ft;
- list<ImmCheck> ImmChecks = ch;
- int MemEltType = met.Value;
-}
-
-// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8")
-class SInst<string n, string p, string t, MergeType mt, string i = "",
- list<FlagType> ft = [], list<ImmCheck> ch = []>
- : Inst<n, p, t, mt, i, ft, ch, MemEltTyDefault> {
-}
-
-// MInst: Instructions which access memory
-class MInst<string n, string p, string t, list<FlagType> f,
- MemEltType met = MemEltTyDefault, string i = "">
- : Inst<n, p, t, MergeNone, i, f, [], met> {
-}
+include "arm_sve_sme_incl.td"
////////////////////////////////////////////////////////////////////////////////
// Loads
Index: clang/include/clang/Basic/arm_sme.td
===================================================================
--- /dev/null
+++ clang/include/clang/Basic/arm_sme.td
@@ -0,0 +1,70 @@
+//===--- arm_sme.td - ARM SME compiler interface ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TableGen definitions from which the ARM SME header
+// file will be generated. See:
+//
+// https://developer.arm.com/architectures/system-architectures/software-standards/acle
+//
+//===----------------------------------------------------------------------===//
+
+include "arm_sve_sme_incl.td"
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads
+
+let TargetGuard = "sme" in {
+ def SVLD1_HOR_ZA8 : MInst<"svld1_hor_za8", "vimiPQ", "c", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1b_horiz", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVLD1_HOR_ZA16 : MInst<"svld1_hor_za16", "vimiPQ", "s", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1h_horiz", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVLD1_HOR_ZA32 : MInst<"svld1_hor_za32", "vimiPQ", "i", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1w_horiz", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVLD1_HOR_ZA64 : MInst<"svld1_hor_za64", "vimiPQ", "l", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1d_horiz", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
+ def SVLD1_HOR_ZA128 : MInst<"svld1_hor_za128", "vimiPQ", "q", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1q_horiz", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
+ def SVLD1_VER_ZA8 : MInst<"svld1_ver_za8", "vimiPQ", "c", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1b_vert", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVLD1_VER_ZA16 : MInst<"svld1_ver_za16", "vimiPQ", "s", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1h_vert", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVLD1_VER_ZA32 : MInst<"svld1_ver_za32", "vimiPQ", "i", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1w_vert", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVLD1_VER_ZA64 : MInst<"svld1_ver_za64", "vimiPQ", "l", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1d_vert", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
+ def SVLD1_VER_ZA128 : MInst<"svld1_ver_za128", "vimiPQ", "q", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1q_vert", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
+
+ def SVLD1_HOR_VNUM_ZA8 : MInst<"svld1_hor_vnum_za8", "vimiPQl", "c", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1b_horiz", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVLD1_HOR_VNUM_ZA16 : MInst<"svld1_hor_vnum_za16", "vimiPQl", "s", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1h_horiz", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVLD1_HOR_VNUM_ZA32 : MInst<"svld1_hor_vnum_za32", "vimiPQl", "i", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1w_horiz", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVLD1_HOR_VNUM_ZA64 : MInst<"svld1_hor_vnum_za64", "vimiPQl", "l", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1d_horiz", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
+ def SVLD1_HOR_VNUM_ZA128 : MInst<"svld1_hor_vnum_za128", "vimiPQl", "q", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1q_horiz", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
+ def SVLD1_VER_VNUM_ZA8 : MInst<"svld1_ver_vnum_za8", "vimiPQl", "c", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1b_vert", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVLD1_VER_VNUM_ZA16 : MInst<"svld1_ver_vnum_za16", "vimiPQl", "s", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1h_vert", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVLD1_VER_VNUM_ZA32 : MInst<"svld1_ver_vnum_za32", "vimiPQl", "i", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1w_vert", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVLD1_VER_VNUM_ZA64 : MInst<"svld1_ver_vnum_za64", "vimiPQl", "l", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1d_vert", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
+ def SVLD1_VER_VNUM_ZA128 : MInst<"svld1_ver_vnum_za128", "vimiPQl", "q", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1q_vert", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Stores
+
+let TargetGuard = "sme" in {
+ def SVST1_HOR_ZA8 : MInst<"svst1_hor_za8", "vimiP%", "c", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1b_horiz", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVST1_HOR_ZA16 : MInst<"svst1_hor_za16", "vimiP%", "s", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1h_horiz", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVST1_HOR_ZA32 : MInst<"svst1_hor_za32", "vimiP%", "i", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1w_horiz", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVST1_HOR_ZA64 : MInst<"svst1_hor_za64", "vimiP%", "l", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1d_horiz", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
+ def SVST1_HOR_ZA128 : MInst<"svst1_hor_za128", "vimiP%", "q", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1q_horiz", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
+ def SVST1_VER_ZA8 : MInst<"svst1_ver_za8", "vimiP%", "c", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1b_vert", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVST1_VER_ZA16 : MInst<"svst1_ver_za16", "vimiP%", "s", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1h_vert", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVST1_VER_ZA32 : MInst<"svst1_ver_za32", "vimiP%", "i", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1w_vert", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVST1_VER_ZA64 : MInst<"svst1_ver_za64", "vimiP%", "l", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1d_vert", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
+ def SVST1_VER_ZA128 : MInst<"svst1_ver_za128", "vimiP%", "q", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1q_vert", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
+
+ def SVST1_HOR_VNUM_ZA8 : MInst<"svst1_hor_vnum_za8", "vimiP%l", "c", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1b_horiz", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVST1_HOR_VNUM_ZA16 : MInst<"svst1_hor_vnum_za16", "vimiP%l", "s", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1h_horiz", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVST1_HOR_VNUM_ZA32 : MInst<"svst1_hor_vnum_za32", "vimiP%l", "i", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1w_horiz", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVST1_HOR_VNUM_ZA64 : MInst<"svst1_hor_vnum_za64", "vimiP%l", "l", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1d_horiz", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
+ def SVST1_HOR_VNUM_ZA128 : MInst<"svst1_hor_vnum_za128", "vimiP%l", "q", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1q_horiz", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
+ def SVST1_VER_VNUM_ZA8 : MInst<"svst1_ver_vnum_za8", "vimiP%l", "c", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1b_vert", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVST1_VER_VNUM_ZA16 : MInst<"svst1_ver_vnum_za16", "vimiP%l", "s", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1h_vert", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVST1_VER_VNUM_ZA32 : MInst<"svst1_ver_vnum_za32", "vimiP%l", "i", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1w_vert", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVST1_VER_VNUM_ZA64 : MInst<"svst1_ver_vnum_za64", "vimiP%l", "l", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1d_vert", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
+ def SVST1_VER_VNUM_ZA128 : MInst<"svst1_ver_vnum_za128", "vimiP%l", "q", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1q_vert", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
+}
Index: clang/include/clang/Basic/TargetBuiltins.h
===================================================================
--- clang/include/clang/Basic/TargetBuiltins.h
+++ clang/include/clang/Basic/TargetBuiltins.h
@@ -53,6 +53,15 @@
};
}
+ namespace SME {
+ enum {
+ LastSVEBuiltin = SVE::FirstTSBuiltin - 1,
+#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#include "clang/Basic/BuiltinsSME.def"
+ FirstTSBuiltin,
+ };
+ }
+
/// AArch64 builtins
namespace AArch64 {
enum {
@@ -60,6 +69,8 @@
LastNEONBuiltin = NEON::FirstTSBuiltin - 1,
FirstSVEBuiltin = NEON::FirstTSBuiltin,
LastSVEBuiltin = SVE::FirstTSBuiltin - 1,
+ FirstSMEBuiltin = SVE::FirstTSBuiltin,
+ LastSMEBuiltin = SME::FirstTSBuiltin - 1,
#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
#include "clang/Basic/BuiltinsAArch64.def"
LastTSBuiltin
Index: clang/include/clang/Basic/CMakeLists.txt
===================================================================
--- clang/include/clang/Basic/CMakeLists.txt
+++ clang/include/clang/Basic/CMakeLists.txt
@@ -72,6 +72,15 @@
clang_tablegen(arm_sve_sema_rangechecks.inc -gen-arm-sve-sema-rangechecks
SOURCE arm_sve.td
TARGET ClangARMSveSemaRangeChecks)
+clang_tablegen(arm_sme_builtins.inc -gen-arm-sme-builtins
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeBuiltins)
+clang_tablegen(arm_sme_builtin_cg.inc -gen-arm-sme-builtin-codegen
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeBuiltinCG)
+clang_tablegen(arm_sme_sema_rangechecks.inc -gen-arm-sme-sema-rangechecks
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeSemaRangeChecks)
clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def
SOURCE arm_cde.td
TARGET ClangARMCdeBuiltinsDef)
Index: clang/include/clang/Basic/BuiltinsSME.def
===================================================================
--- /dev/null
+++ clang/include/clang/Basic/BuiltinsSME.def
@@ -0,0 +1,20 @@
+//===--- BuiltinsSME.def - SME Builtin function database --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SME-specific builtin function database. Users of
+// this file must define the BUILTIN macro to make use of this information.
+//
+//===----------------------------------------------------------------------===//
+
+// The format of this database matches clang/Basic/Builtins.def.
+
+#define GET_SME_BUILTINS
+#include "clang/Basic/arm_sme_builtins.inc"
+#undef GET_SME_BUILTINS
+
+#undef BUILTIN
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits