cdevadas created this revision.
cdevadas added reviewers: b-sumner, yaxunl.
Herald added subscribers: cfe-commits, t-tye, Anastasia, tpr, dstuttard, wdng,
kzhuravl.
Herald added a project: clang.
Enable 48-bytes of implicit arguments for HIP as well. Earlier it was enabled
for OpenCL. This code is specific to AMDGPU target.
Repository:
rC Clang
https://reviews.llvm.org/D62244
Files:
lib/CodeGen/TargetInfo.cpp
test/CodeGenHIP/Inputs/hip.h
test/CodeGenHIP/implicit-kernarg.cpp
Index: test/CodeGenHIP/implicit-kernarg.cpp
===================================================================
--- /dev/null
+++ test/CodeGenHIP/implicit-kernarg.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -x hip -o - %s |
FileCheck %s
+#include "Inputs/hip.h"
+
+__global__ void hip_kernel_temp() {
+}
+
+// CHECK-DAG: attributes #0 = { noinline nounwind optnone
"amdgpu-implicitarg-num-bytes"="48"
Index: test/CodeGenHIP/Inputs/hip.h
===================================================================
--- /dev/null
+++ test/CodeGenHIP/Inputs/hip.h
@@ -0,0 +1,3 @@
+/* Minimal declarations for HIP support. Testing purposes only. */
+
+#define __global__ __attribute__((global))
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -7853,7 +7853,8 @@
const auto *ReqdWGS = M.getLangOpts().OpenCL ?
FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
- if (M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>() &&
+ if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) ||
+ (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) &&
(M.getTriple().getOS() == llvm::Triple::AMDHSA))
F->addFnAttr("amdgpu-implicitarg-num-bytes", "48");
Index: test/CodeGenHIP/implicit-kernarg.cpp
===================================================================
--- /dev/null
+++ test/CodeGenHIP/implicit-kernarg.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -x hip -o - %s | FileCheck %s
+#include "Inputs/hip.h"
+
+__global__ void hip_kernel_temp() {
+}
+
+// CHECK-DAG: attributes #0 = { noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48"
Index: test/CodeGenHIP/Inputs/hip.h
===================================================================
--- /dev/null
+++ test/CodeGenHIP/Inputs/hip.h
@@ -0,0 +1,3 @@
+/* Minimal declarations for HIP support. Testing purposes only. */
+
+#define __global__ __attribute__((global))
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -7853,7 +7853,8 @@
const auto *ReqdWGS = M.getLangOpts().OpenCL ?
FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
- if (M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>() &&
+ if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) ||
+ (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) &&
(M.getTriple().getOS() == llvm::Triple::AMDHSA))
F->addFnAttr("amdgpu-implicitarg-num-bytes", "48");
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits