yaxunl created this revision.
yaxunl added reviewers: tra, rjmccall.
yaxunl edited the summary of this revision.
Currently clang fails to compile the following CUDA program in device
compilation:
__host__ int foo(int x) {
return 1;
}
template<class T>
__device__ __host__ int foo(T x) {
return 2;
}
__device__ __host__ int bar() {
return foo(1);
}
__global__ void test(int *a) {
*a = bar();
}
This is due to foo is resolved to the `__host__ foo` instead of `__device__
__host__ foo`.
This seems to be a bug since `__device__ __host__ foo` is a viable callee for
foo whereas
clang is unable to choose it.
nvcc has similar issue
https://cuda.godbolt.org/z/bGijLc
Although it only emits a warning and does not fail to compile. It emits a trap
in the code
so that it will fail at run time.
This patch fixes that.
https://reviews.llvm.org/D77954
Files:
clang/lib/Sema/SemaOverload.cpp
clang/test/SemaCUDA/function-overload.cu
Index: clang/test/SemaCUDA/function-overload.cu
===================================================================
--- clang/test/SemaCUDA/function-overload.cu
+++ clang/test/SemaCUDA/function-overload.cu
@@ -331,9 +331,6 @@
// If we have a mix of HD and H-only or D-only candidates in the overload set,
// normal C++ overload resolution rules apply first.
template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
-#ifdef __CUDA_ARCH__
-//expected-note@-2 {{declared here}}
-#endif
{
return TemplateReturnTy();
}
@@ -342,11 +339,13 @@
}
__host__ __device__ void test_host_device_calls_hd_template() {
- HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
- TemplateReturnTy ret2 = template_vs_hd_function(1);
#ifdef __CUDA_ARCH__
- // expected-error@-2 {{reference to __host__ function
'template_vs_hd_function<int>' in __host__ __device__ function}}
+ typedef HostDeviceReturnTy ExpectedReturnTy;
+#else
+ typedef TemplateReturnTy ExpectedReturnTy;
#endif
+ HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
+ ExpectedReturnTy ret2 = template_vs_hd_function(1);
}
__host__ void test_host_calls_hd_template() {
Index: clang/lib/Sema/SemaOverload.cpp
===================================================================
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -9821,8 +9821,10 @@
llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
// Check viable function only.
return Cand->Viable && Cand->Function &&
- S.IdentifyCUDAPreference(Caller, Cand->Function) ==
- Sema::CFP_SameSide;
+ (S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ Sema::CFP_SameSide ||
+ S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ Sema::CFP_HostDevice);
});
if (ContainsSameSideCandidate) {
auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
Index: clang/test/SemaCUDA/function-overload.cu
===================================================================
--- clang/test/SemaCUDA/function-overload.cu
+++ clang/test/SemaCUDA/function-overload.cu
@@ -331,9 +331,6 @@
// If we have a mix of HD and H-only or D-only candidates in the overload set,
// normal C++ overload resolution rules apply first.
template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
-#ifdef __CUDA_ARCH__
-//expected-note@-2 {{declared here}}
-#endif
{
return TemplateReturnTy();
}
@@ -342,11 +339,13 @@
}
__host__ __device__ void test_host_device_calls_hd_template() {
- HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
- TemplateReturnTy ret2 = template_vs_hd_function(1);
#ifdef __CUDA_ARCH__
- // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}}
+ typedef HostDeviceReturnTy ExpectedReturnTy;
+#else
+ typedef TemplateReturnTy ExpectedReturnTy;
#endif
+ HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
+ ExpectedReturnTy ret2 = template_vs_hd_function(1);
}
__host__ void test_host_calls_hd_template() {
Index: clang/lib/Sema/SemaOverload.cpp
===================================================================
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -9821,8 +9821,10 @@
llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
// Check viable function only.
return Cand->Viable && Cand->Function &&
- S.IdentifyCUDAPreference(Caller, Cand->Function) ==
- Sema::CFP_SameSide;
+ (S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ Sema::CFP_SameSide ||
+ S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ Sema::CFP_HostDevice);
});
if (ContainsSameSideCandidate) {
auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits