yaxunl updated this revision to Diff 272171.
yaxunl retitled this revision from "[HIP] Add -fhip-lambda-host-device" to
"[CUDA][HIP] Let non-caputuring lambda be host device".
yaxunl edited the summary of this revision.
yaxunl added a comment.
Revised by Richard's comments.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D78655/new/
https://reviews.llvm.org/D78655
Files:
clang/include/clang/Sema/Sema.h
clang/lib/Sema/SemaCUDA.cpp
clang/lib/Sema/SemaLambda.cpp
clang/test/CodeGenCUDA/lambda.cu
clang/test/SemaCUDA/Inputs/cuda.h
clang/test/SemaCUDA/lambda.cu
Index: clang/test/SemaCUDA/lambda.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/lambda.cu
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+// file-scope lambda is implicitly host device function.
+auto global_lambda = [] () { return 123; };
+
+template<class F>
+__global__ void kernel(F f) { f(); }
+// expected-error@-1 6{{no matching function for call to object of type}}
+
+constexpr __host__ __device__ void hd();
+
+int main(void) {
+ auto lambda_kernel = [&]__global__(){};
+ // expected-error@-1 {{kernel function 'operator()' must be a free function or static member function}}
+
+ int b;
+ kernel<<<1,1>>>(global_lambda);
+
+ kernel<<<1,1>>>([](){ hd(); });
+
+ kernel<<<1,1>>>([=](){ hd(); });
+ // expected-note@-1 {{in instantiation of function template specialization 'kernel<(lambda at}}
+ // expected-note@-2 {{candidate function not viable: call to __host__ function from __global__ function}}
+
+ kernel<<<1,1>>>([b](){ hd(); });
+ // expected-note@-1 {{in instantiation of function template specialization 'kernel<(lambda at}}
+ // expected-note@-2 {{candidate function not viable: call to __host__ function from __global__ function}}
+
+ kernel<<<1,1>>>([&]()constexpr{ hd(); });
+ // expected-note@-1 {{in instantiation of function template specialization 'kernel<(lambda at}}
+ // expected-note@-2 {{candidate function not viable: call to __host__ function from __global__ function}}
+
+ kernel<<<1,1>>>([&](){ hd(); });
+ // expected-note@-1 {{in instantiation of function template specialization 'kernel<(lambda at}}
+ // expected-note@-2 {{candidate function not viable: call to __host__ function from __global__ function}}
+
+ kernel<<<1,1>>>([=, &b](){ hd(); });
+ // expected-note@-1 {{in instantiation of function template specialization 'kernel<(lambda at}}
+ // expected-note@-2 {{candidate function not viable: call to __host__ function from __global__ function}}
+
+ kernel<<<1,1>>>([&, b](){ hd(); });
+ // expected-note@-1 {{in instantiation of function template specialization 'kernel<(lambda at}}
+ // expected-note@-2 {{candidate function not viable: call to __host__ function from __global__ function}}
+
+ kernel<<<1,1>>>([](){
+ auto f = [&]{ hd(); };
+ f();
+ });
+
+ return 0;
+}
Index: clang/test/SemaCUDA/Inputs/cuda.h
===================================================================
--- clang/test/SemaCUDA/Inputs/cuda.h
+++ clang/test/SemaCUDA/Inputs/cuda.h
@@ -17,6 +17,19 @@
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
};
+#ifdef __HIP__
+typedef struct hipStream *hipStream_t;
+typedef enum hipError {} hipError_t;
+int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
+ hipStream_t stream = 0);
+extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ hipStream_t stream = 0);
+extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
+#else
typedef struct cudaStream *cudaStream_t;
typedef enum cudaError {} cudaError_t;
@@ -29,6 +42,7 @@
extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
dim3 blockDim, void **args,
size_t sharedMem, cudaStream_t stream);
+#endif
// Host- and device-side placement new overloads.
void *operator new(__SIZE_TYPE__, void *p) { return p; }
Index: clang/test/CodeGenCUDA/lambda.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/lambda.cu
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: -triple x86_64-linux-gnu \
+// RUN: | FileCheck -check-prefix=HOST %s
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: | FileCheck -check-prefix=DEV %s
+
+#include "Inputs/cuda.h"
+
+// Device side kernel name.
+// HOST: @[[KERN_CAPTURE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_capturevEUlvE_EvT_\00"
+// HOST: @[[KERN_RESOLVE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_resolvevEUlvE_EvT_\00"
+
+// Check functions emitted for test_capture in host compilation.
+// Check lambda is not emitted in host compilation.
+// HOST-LABEL: define void @_Z12test_capturev
+// HOST: call void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_
+// HOST-LABEL: define internal void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_
+// HOST: call void @_Z16__device_stub__gIZ12test_capturevEUlvE_EvT_
+// HOST-NOT: define{{.*}}@_ZZ4mainENKUlvE_clEv
+
+// Check functions emitted for test_resolve in host compilation.
+// Check host version of template function 'overloaded' is emitted and called
+// by the lambda function.
+// HOST-LABEL: define void @_Z12test_resolvev
+// HOST: call void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_()
+// HOST-LABEL: define internal void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_
+// HOST: call void @_Z16__device_stub__gIZ12test_resolvevEUlvE_EvT_
+// HOST: call void @_ZZ12test_resolvevENKUlvE_clEv
+// HOST-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv
+// HOST: call i32 @_Z10overloadedIiET_v
+// HOST-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v
+// HOST: ret i32 2
+
+// Check kernel is registered with correct device side kernel name.
+// HOST: @__hipRegisterFunction({{.*}}@[[KERN_CAPTURE]]
+// HOST: @__hipRegisterFunction({{.*}}@[[KERN_RESOLVE]]
+
+// DEV: @a = addrspace(1) externally_initialized global i32 0
+
+// Check functions emitted for test_capture in device compilation.
+// Check lambda is emitted in device compilation and accessing device variable.
+// DEV-LABEL: define amdgpu_kernel void @_Z1gIZ12test_capturevEUlvE_EvT_
+// DEV: call void @_ZZ12test_capturevENKUlvE_clEv
+// DEV-LABEL: define internal void @_ZZ12test_capturevENKUlvE_clEv
+// DEV: store i32 1, i32* addrspacecast (i32 addrspace(1)* @a to i32*)
+
+// Check functions emitted for test_resolve in device compilation.
+// Check device version of template function 'overloaded' is emitted and called
+// by the lambda function.
+// DEV-LABEL: define amdgpu_kernel void @_Z1gIZ12test_resolvevEUlvE_EvT_
+// DEV: call void @_ZZ12test_resolvevENKUlvE_clEv
+// DEV-LABE: define internal void @_ZZ12test_resolvevENKUlvE_clEv
+// DEV: call i32 @_Z10overloadedIiET_v
+// DEV-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v
+// DEV: ret i32 1
+
+__device__ int a;
+
+template<class T>
+__device__ T overloaded() { return 1; }
+
+template<class T>
+__host__ T overloaded() { return 2; }
+
+template<class F>
+__global__ void g(F f) { f(); }
+
+template<class F>
+void test_capture_helper(F f) { g<<<1,1>>>(f); }
+
+template<class F>
+void test_resolve_helper(F f) { g<<<1,1>>>(f); f(); }
+
+// Test capture of device variable in lambda function.
+void test_capture(void) {
+ test_capture_helper([](){ a = 1;});
+}
+
+// Test resolving host/device function in lambda function.
+// Callee should resolve to correct host/device function based on where
+// the lambda function is called, not where it is defined.
+void test_resolve(void) {
+ test_resolve_helper([](){ overloaded<int>();});
+}
Index: clang/lib/Sema/SemaLambda.cpp
===================================================================
--- clang/lib/Sema/SemaLambda.cpp
+++ clang/lib/Sema/SemaLambda.cpp
@@ -993,7 +993,7 @@
// CUDA lambdas get implicit attributes based on the scope in which they're
// declared.
if (getLangOpts().CUDA)
- CUDASetLambdaAttrs(Method);
+ CUDASetLambdaAttrs(Method, Intro);
// Number the lambda for linkage purposes if necessary.
handleLambdaNumbering(Class, Method);
Index: clang/lib/Sema/SemaCUDA.cpp
===================================================================
--- clang/lib/Sema/SemaCUDA.cpp
+++ clang/lib/Sema/SemaCUDA.cpp
@@ -746,13 +746,16 @@
DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack;
}
-void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
+void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method, LambdaIntroducer &LI) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
return;
FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext);
- if (!CurFn)
+ if (!CurFn || (LI.Default == LCD_None && LI.Captures.size() == 0)) {
+ Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+ Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
return;
+ }
CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn);
if (Target == CFT_Global || Target == CFT_Device) {
Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -11768,7 +11768,7 @@
/// CUDA lambdas declared inside __device__ or __global__ functions inherit
/// the __device__ attribute. Similarly, lambdas inside __host__ __device__
/// functions become __host__ __device__ themselves.
- void CUDASetLambdaAttrs(CXXMethodDecl *Method);
+ void CUDASetLambdaAttrs(CXXMethodDecl *Method, LambdaIntroducer &LI);
/// Finds a function in \p Matches with highest calling priority
/// from \p Caller context and erases all functions with lower
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits