sagarkulkarni19 created this revision.
sagarkulkarni19 added reviewers: sdesmalen, rsandifo-arm, david-arm.
sagarkulkarni19 added a project: clang.
Herald added a subscriber: kristof.beyls.
Herald added a project: All.
sagarkulkarni19 requested review of this revision.
Herald added a subscriber: cfe-commits.
This patch adds support for the following SME ACLE intrinsics:
svldr_vnum_za
svstr_vnum_za
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D134678
Files:
clang/include/clang/Basic/TargetBuiltins.h
clang/include/clang/Basic/arm_sve.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
clang/utils/TableGen/SveEmitter.cpp
Index: clang/utils/TableGen/SveEmitter.cpp
===================================================================
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -873,6 +873,10 @@
this->SMEAttributes = "arm_streaming, arm_shared_za";
else if (this->Flags & Emitter.getEnumValueForFlag("IsSMEZero"))
this->SMEAttributes = "arm_streaming_compatible, arm_shared_za";
+ else if (this->Flags & Emitter.getEnumValueForFlag("IsSMELdr"))
+ this->SMEAttributes = "arm_streaming_compatible, arm_shared_za";
+ else if (this->Flags & Emitter.getEnumValueForFlag("IsSMEStr"))
+ this->SMEAttributes = "arm_streaming_compatible, arm_shared_za, arm_preserves_za";
else
llvm_unreachable("Unknown SME instruction");
} else
Index: clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
@@ -0,0 +1,62 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme.h>
+
+
+// CHECK-LABEL: @test_svstr_vnum_za(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z18test_svstr_vnum_zajPv(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svstr_vnum_za(uint32_t slice_base, void *ptr) {
+ svstr_vnum_za(slice_base, 0, ptr);
+}
+
+// CHECK-LABEL: @test_svstr_vnum_za_1(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svstr_vnum_za_1jPv(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) {
+ svstr_vnum_za(slice_base, 15, ptr);
+}
+
+// CHECK-LABEL: @test_svstr_vnum_za_2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svstr_vnum_za_2jPv(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) {
+ svstr_vnum_za(slice_base, 16, ptr);
+}
Index: clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -0,0 +1,61 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme.h>
+
+// CHECK-LABEL: @test_svldr_vnum_za(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z18test_svldr_vnum_zajPKv(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) {
+ svldr_vnum_za(slice_base, 0, ptr);
+}
+
+// CHECK-LABEL: @test_svldr_vnum_za_1(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svldr_vnum_za_1jPKv(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) {
+ svldr_vnum_za(slice_base, 15, ptr);
+}
+
+// CHECK-LABEL: @test_svldr_vnum_za_2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svldr_vnum_za_2jPKv(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) {
+ svldr_vnum_za(slice_base, 16, ptr);
+}
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -4243,6 +4243,9 @@
llvm::Value *EmitSMEZero(SVETypeFlags TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
+ llvm::Value *EmitSMELdrStr(SVETypeFlags TypeFlags,
+ llvm::SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned IntID);
llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -9134,6 +9134,21 @@
return Builder.CreateCall(F, Ops);
}
+Value *CodeGenFunction::EmitSMELdrStr(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned IntID) {
+ Function *Vscale = CGM.getIntrinsic(Intrinsic::vscale, Int64Ty);
+ llvm::Value *VscaleCall = Builder.CreateCall(Vscale, {}, "vscale");
+ llvm::Value *MulVL = Builder.CreateMul(
+ VscaleCall,
+ Builder.getInt64(16 * cast<llvm::ConstantInt>(Ops[1])->getZExtValue()),
+ "mulvl");
+ Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL);
+ Ops.erase(&Ops[1]);
+ Function *F = CGM.getIntrinsic(IntID, {});
+ return Builder.CreateCall(F, Ops);
+}
+
// Limit the usage of scalable llvm IR generated by the ACLE by using the
// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
@@ -9272,6 +9287,8 @@
return EmitSMEWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isSMEZero())
return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isSMELdr() || TypeFlags.isSMEStr())
+ return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isGatherLoad())
return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isScatterStore())
Index: clang/include/clang/Basic/arm_sve.td
===================================================================
--- clang/include/clang/Basic/arm_sve.td
+++ clang/include/clang/Basic/arm_sve.td
@@ -212,6 +212,8 @@
def IsSMERead : FlagType<0x4000000000>;
def IsSMEWrite : FlagType<0x8000000000>;
def IsSMEZero : FlagType<0x10000000000>;
+def IsSMELdr : FlagType<0x20000000000>;
+def IsSMEStr : FlagType<0x40000000000>;
// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
class ImmCheckType<int val> {
@@ -571,6 +573,8 @@
def SVLD1_VER_VNUM_ZA64 : MInst<"svld1_ver_vnum_za64", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1d_vert">;
def SVLD1_VER_VNUM_ZA128 : MInst<"svld1_ver_vnum_za128", "vimiPQl", "", [IsOverloadNone, IsSME, IsSMELd1], MemEltTyDefault, "aarch64_sme_ld1q_vert">;
+def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmiQ", "", [IsOverloadNone, IsSME, IsSMELdr], MemEltTyDefault, "aarch64_sme_ldr">;
+
////////////////////////////////////////////////////////////////////////////////
// Stores
@@ -715,6 +719,8 @@
def SVST1_VER_VNUM_ZA64 : MInst<"svst1_ver_vnum_za64", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1d_vert">;
def SVST1_VER_VNUM_ZA128 : MInst<"svst1_ver_vnum_za128", "vimiP%l", "", [IsOverloadNone, IsSME, IsSMESt1], MemEltTyDefault, "aarch64_sme_st1q_vert">;
+def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vmi%", "", [IsOverloadNone, IsSME, IsSMEStr], MemEltTyDefault, "aarch64_sme_str">;
+
////////////////////////////////////////////////////////////////////////////////
// Prefetches
Index: clang/include/clang/Basic/TargetBuiltins.h
===================================================================
--- clang/include/clang/Basic/TargetBuiltins.h
+++ clang/include/clang/Basic/TargetBuiltins.h
@@ -287,6 +287,8 @@
bool isSMERead() const { return Flags & IsSMERead; }
bool isSMEWrite() const { return Flags & IsSMEWrite; }
bool isSMEZero() const { return Flags & IsSMEZero; }
+ bool isSMELdr() const { return Flags & IsSMELdr; }
+ bool isSMEStr() const { return Flags & IsSMEStr; }
uint64_t getBits() const { return Flags; }
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits