jhuber6 updated this revision to Diff 483640.
jhuber6 added a comment.
Changing to use the same CUDA global attributes. This requires a few extra
checks for whether or not we were in CUDA mode since previously it just assume
any time we saw one of these globals we were in that mode. I added a different
spelling as well just for consistency.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D140226/new/
https://reviews.llvm.org/D140226
Files:
clang/include/clang/Basic/Attr.td
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGen/nvptx_attributes.c
Index: clang/test/CodeGen/nvptx_attributes.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/nvptx_attributes.c
@@ -0,0 +1,51 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -o - | FileCheck %s
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@device
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+//
+int device() {return 1;};
+
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@foo
+// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RET_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[RET]], ptr [[RET_ADDR]], align 8
+// CHECK-NEXT: [[CALL:%.*]] = call i32 @device()
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
+// CHECK-NEXT: store i32 [[CALL]], ptr [[TMP0]], align 4
+// CHECK-NEXT: ret void
+//
+__attribute__((nvptx_kernel)) void foo(int *ret) {
+ *ret = device();
+}
+
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@bar
+// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RET_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[RET]], ptr [[RET_ADDR]], align 8
+// CHECK-NEXT: [[CALL:%.*]] = call i32 @device()
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
+// CHECK-NEXT: store i32 [[CALL]], ptr [[TMP0]], align 4
+// CHECK-NEXT: ret void
+//
+__attribute__((nvptx_kernel, nvptx_launch_bounds(1, 128))) void bar(int *ret) {
+ *ret = device();
+}
+
+
+//.
+// CHECK: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx32,+sm_61" }
+//.
+// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
+// CHECK: !1 = !{ptr @bar, !"kernel", i32 1}
+// CHECK: !2 = !{ptr @bar, !"maxntidx", i32 1}
+// CHECK: !3 = !{ptr @bar, !"minctasm", i32 128}
+// CHECK: !4 = !{i32 1, !"wchar_size", i32 4}
+// CHECK: !5 = !{!"clang version 16.0.0"}
+//.
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7328,32 +7328,29 @@
}
}
- // Perform special handling in CUDA mode.
- if (M.getLangOpts().CUDA) {
- // CUDA __global__ functions get a kernel metadata entry. Since
- // __global__ functions cannot be called from the device, we do not
- // need to set the noinline attribute.
- if (FD->hasAttr<CUDAGlobalAttr>()) {
- // Create !{<func-ref>, metadata !"kernel", i32 1} node
- addNVVMMetadata(F, "kernel", 1);
- }
- if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
- // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
- llvm::APSInt MaxThreads(32);
- MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
- if (MaxThreads > 0)
- addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
-
- // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
- // not specified in __launch_bounds__ or if the user specified a 0 value,
- // we don't have to add a PTX directive.
- if (Attr->getMinBlocks()) {
- llvm::APSInt MinBlocks(32);
- MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
- if (MinBlocks > 0)
- // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
- addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
- }
+ // CUDA __global__ functions get a kernel metadata entry. Since
+ // __global__ functions cannot be called from the device, we do not
+ // need to set the noinline attribute.
+ if (FD->hasAttr<CUDAGlobalAttr>()) {
+ // Create !{<func-ref>, metadata !"kernel", i32 1} node
+ addNVVMMetadata(F, "kernel", 1);
+ }
+ if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
+ // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
+ llvm::APSInt MaxThreads(32);
+ MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
+ if (MaxThreads > 0)
+ addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
+
+ // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
+ // not specified in __launch_bounds__ or if the user specified a 0 value,
+ // we don't have to add a PTX directive.
+ if (Attr->getMinBlocks()) {
+ llvm::APSInt MinBlocks(32);
+ MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
+ if (MinBlocks > 0)
+ // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+ addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
}
}
}
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1411,7 +1411,7 @@
if (FD &&
FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) {
Out << "__regcall3__" << II->getName();
- } else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
+ } else if (FD && CGM.getLangOpts().CUDA && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
Out << "__device_stub__" << II->getName();
} else {
@@ -1547,6 +1547,7 @@
// device-mangling in host compilation could help catching certain ones.
assert(!isa<FunctionDecl>(ND) || !ND->hasAttr<CUDAGlobalAttr>() ||
getContext().shouldExternalize(ND) || getLangOpts().CUDAIsDevice ||
+ !getLangOpts().CUDA ||
(getContext().getAuxTargetInfo() &&
(getContext().getAuxTargetInfo()->getCXXABI() !=
getContext().getTargetInfo().getCXXABI())) ||
Index: clang/include/clang/Basic/Attr.td
===================================================================
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -408,6 +408,7 @@
def TargetX86 : TargetArch<["x86"]>;
def TargetAnyX86 : TargetArch<["x86", "x86_64"]>;
def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>;
+def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>;
def TargetWindows : TargetSpec {
let OSes = ["Win32"];
}
@@ -1194,10 +1195,9 @@
def : MutualExclusions<[CUDADeviceBuiltinSurfaceType,
CUDADeviceBuiltinTextureType]>;
-def CUDAGlobal : InheritableAttr {
- let Spellings = [GNU<"global">, Declspec<"__global__">];
+def CUDAGlobal : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
+ let Spellings = [GNU<"global">, Declspec<"__global__">, Clang<"nvptx_kernel">];
let Subjects = SubjectList<[Function]>;
- let LangOpts = [CUDA];
let Documentation = [Undocumented];
}
def : MutualExclusions<[CUDADevice, CUDAGlobal]>;
@@ -1225,10 +1225,9 @@
let Documentation = [InternalOnly];
}
-def CUDALaunchBounds : InheritableAttr {
- let Spellings = [GNU<"launch_bounds">, Declspec<"__launch_bounds__">];
+def CUDALaunchBounds : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
+ let Spellings = [GNU<"launch_bounds">, Declspec<"__launch_bounds__">, Clang<"nvptx_launch_bounds">];
let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>];
- let LangOpts = [CUDA];
let Subjects = SubjectList<[ObjCMethod, FunctionLike]>;
// An AST node is created for this attribute, but is not used by other parts
// of the compiler. However, this node needs to exist in the AST because
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits