This revision was automatically updated to reflect the committed changes.
Closed by commit rL260697: [CUDA] Tweak attribute-based overload resolution to
match nvcc behavior. (authored by tra).
Changed prior to commit:
http://reviews.llvm.org/D16870?vs=47753&id=47819#toc
Repository:
rL LLVM
http://reviews.llvm.org/D16870
Files:
cfe/trunk/include/clang/Sema/Sema.h
cfe/trunk/lib/Sema/SemaCUDA.cpp
cfe/trunk/lib/Sema/SemaOverload.cpp
cfe/trunk/test/CodeGenCUDA/function-overload.cu
cfe/trunk/test/SemaCUDA/function-overload.cu
Index: cfe/trunk/test/CodeGenCUDA/function-overload.cu
===================================================================
--- cfe/trunk/test/CodeGenCUDA/function-overload.cu
+++ cfe/trunk/test/CodeGenCUDA/function-overload.cu
@@ -7,7 +7,8 @@
// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
// RUN: -fcuda-target-overloads -emit-llvm -o - %s \
-// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s
+// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \
+// RUN: -check-prefix=CHECK-DEVICE-STRICT %s
// Check target overloads handling with disabled call target checks.
// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \
@@ -77,24 +78,120 @@
extern "C" __host__ __device__ int chd(void) {return 14;}
// CHECK-BOTH: ret i32 14
+// HD functions are sometimes allowed to call H or D functions -- this
+// is an artifact of the source-to-source splitting performed by nvcc
+// that we need to mimic. During device mode compilation in nvcc, host
+// functions aren't present at all, so don't participate in
+// overloading. But in clang, H and D functions are present in both
+// compilation modes. Clang normally uses the target attribute as a
+// tiebreaker between overloads with otherwise identical priority, but
+// in order to match nvcc's behavior, we sometimes need to wholly
+// discard overloads that would not be present during compilation
+// under nvcc.
+
+template <typename T> T template_vs_function(T arg) { return 15; }
+__device__ float template_vs_function(float arg) { return 16; }
+
+// Here we expect to call the templated function during host
+// compilation, even if -fcuda-disable-target-call-checks is passed,
+// and even though C++ overload rules prefer the non-templated
+// function.
+// CHECK-BOTH-LABEL: define void @_Z5hd_tfv()
+__host__ __device__ void hd_tf(void) {
+ template_vs_function(1.0f);
+ // CHECK-HOST: call float @_Z20template_vs_functionIfET_S0_(float
+ // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
+ template_vs_function(2.0);
+ // CHECK-HOST: call double @_Z20template_vs_functionIdET_S0_(double
+ // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
+}
+
+// Calls from __host__ and __device__ functions should always call the
+// overloaded function that matches their mode.
+// CHECK-HOST-LABEL: define void @_Z4h_tfv()
+__host__ void h_tf() {
+ template_vs_function(1.0f);
+ // CHECK-HOST: call float @_Z20template_vs_functionIfET_S0_(float
+ template_vs_function(2.0);
+ // CHECK-HOST: call double @_Z20template_vs_functionIdET_S0_(double
+}
+
+// CHECK-DEVICE-LABEL: define void @_Z4d_tfv()
+__device__ void d_tf() {
+ template_vs_function(1.0f);
+ // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
+ template_vs_function(2.0);
+ // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
+}
+
+// In case we have a mix of HD and H-only or D-only candidates in the
+// overload set, normal C++ overload resolution rules apply first.
+template <typename T> T template_vs_hd_function(T arg) { return 15; }
+__host__ __device__ float template_vs_hd_function(float arg) { return 16; }
+
+// CHECK-BOTH-LABEL: define void @_Z7hd_thdfv()
+__host__ __device__ void hd_thdf() {
+ template_vs_hd_function(1.0f);
+ // CHECK-HOST: call float @_Z23template_vs_hd_functionf(float
+ // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float
+ template_vs_hd_function(1);
+ // CHECK-HOST: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32
+ // CHECK-DEVICE-STRICT: call float @_Z23template_vs_hd_functionf(float
+ // CHECK-DEVICE-NC: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32
+}
+
+// CHECK-HOST-LABEL: define void @_Z6h_thdfv()
+__host__ void h_thdf() {
+ template_vs_hd_function(1.0f);
+ // CHECK-HOST: call float @_Z23template_vs_hd_functionf(float
+ template_vs_hd_function(1);
+ // CHECK-HOST: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32
+}
+
+// CHECK-DEVICE-LABEL: define void @_Z6d_thdfv()
+__device__ void d_thdf() {
+ template_vs_hd_function(1.0f);
+ // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float
+ template_vs_hd_function(1);
+ // Host-only function template is not callable with strict call checks,
+ // so for device side HD function will be the only choice.
+ // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float
+}
+
+// Check that overloads still work the same way on both host and
+// device side when the overload set contains only functions from one
+// side of compilation.
+__device__ float device_only_function(int arg) { return 17; }
+__device__ float device_only_function(float arg) { return 18; }
+
+__host__ float host_only_function(int arg) { return 19; }
+__host__ float host_only_function(float arg) { return 20; }
+
+// CHECK-BOTH-LABEL: define void @_Z6hd_dofv()
+__host__ __device__ void hd_dof() {
+#ifdef NOCHECKS
+ device_only_function(1.0f);
+ // CHECK-BOTH-NC: call float @_Z20device_only_functionf(float
+ device_only_function(1);
+ // CHECK-BOTH-NC: call float @_Z20device_only_functioni(i32
+ host_only_function(1.0f);
+ // CHECK-BOTH-NC: call float @_Z18host_only_functionf(float
+ host_only_function(1);
+ // CHECK-BOTH-NC: call float @_Z18host_only_functioni(i32
+#endif
+}
+
+
// CHECK-HOST-LABEL: define void @_Z5hostfv()
__host__ void hostf(void) {
-#if defined (NOCHECKS)
- fp_t dp = d; // CHECK-HOST-NC: store {{.*}} @_Z1dv, {{.*}} %dp,
- fp_t cdp = cd; // CHECK-HOST-NC: store {{.*}} @cd, {{.*}} %cdp,
-#endif
fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp,
fp_t chp = ch; // CHECK-HOST: store {{.*}} @ch, {{.*}} %chp,
fp_t dhp = dh; // CHECK-HOST: store {{.*}} @_Z2dhv, {{.*}} %dhp,
fp_t cdhp = cdh; // CHECK-HOST: store {{.*}} @cdh, {{.*}} %cdhp,
fp_t hdp = hd; // CHECK-HOST: store {{.*}} @_Z2hdv, {{.*}} %hdp,
fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp,
gp_t gp = g; // CHECK-HOST: store {{.*}} @_Z1gv, {{.*}} %gp,
-#if defined (NOCHECKS)
- d(); // CHECK-HOST-NC: call i32 @_Z1dv()
- cd(); // CHECK-HOST-NC: call i32 @cd()
-#endif
h(); // CHECK-HOST: call i32 @_Z1hv()
ch(); // CHECK-HOST: call i32 @ch()
dh(); // CHECK-HOST: call i32 @_Z2dhv()
@@ -106,21 +203,13 @@
__device__ void devicef(void) {
fp_t dp = d; // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp,
fp_t cdp = cd; // CHECK-DEVICE: store {{.*}} @cd, {{.*}} %cdp,
-#if defined (NOCHECKS)
- fp_t hp = h; // CHECK-DEVICE-NC: store {{.*}} @_Z1hv, {{.*}} %hp,
- fp_t chp = ch; // CHECK-DEVICE-NC: store {{.*}} @ch, {{.*}} %chp,
-#endif
fp_t dhp = dh; // CHECK-DEVICE: store {{.*}} @_Z2dhv, {{.*}} %dhp,
fp_t cdhp = cdh; // CHECK-DEVICE: store {{.*}} @cdh, {{.*}} %cdhp,
fp_t hdp = hd; // CHECK-DEVICE: store {{.*}} @_Z2hdv, {{.*}} %hdp,
fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp,
d(); // CHECK-DEVICE: call i32 @_Z1dv()
cd(); // CHECK-DEVICE: call i32 @cd()
-#if defined (NOCHECKS)
- h(); // CHECK-DEVICE-NC: call i32 @_Z1hv()
- ch(); // CHECK-DEVICE-NC: call i32 @ch()
-#endif
dh(); // CHECK-DEVICE: call i32 @_Z2dhv()
cdh(); // CHECK-DEVICE: call i32 @cdh()
}
Index: cfe/trunk/test/SemaCUDA/function-overload.cu
===================================================================
--- cfe/trunk/test/SemaCUDA/function-overload.cu
+++ cfe/trunk/test/SemaCUDA/function-overload.cu
@@ -70,27 +70,23 @@
__host__ void hostf(void) {
fp_t dp = d;
- fp_t cdp = cd;
-#if !defined(NOCHECKS)
- // expected-error@-3 {{reference to __device__ function 'd' in __host__ function}}
+ // expected-error@-1 {{reference to __device__ function 'd' in __host__ function}}
// expected-note@65 {{'d' declared here}}
- // expected-error@-4 {{reference to __device__ function 'cd' in __host__ function}}
+ fp_t cdp = cd;
+ // expected-error@-1 {{reference to __device__ function 'cd' in __host__ function}}
// expected-note@68 {{'cd' declared here}}
-#endif
fp_t hp = h;
fp_t chp = ch;
fp_t dhp = dh;
fp_t cdhp = cdh;
gp_t gp = g;
d();
- cd();
-#if !defined(NOCHECKS)
- // expected-error@-3 {{no matching function for call to 'd'}}
+ // expected-error@-1 {{no matching function for call to 'd'}}
// expected-note@65 {{candidate function not viable: call to __device__ function from __host__ function}}
- // expected-error@-4 {{no matching function for call to 'cd'}}
+ cd();
+ // expected-error@-1 {{no matching function for call to 'cd'}}
// expected-note@68 {{candidate function not viable: call to __device__ function from __host__ function}}
-#endif
h();
ch();
dh();
@@ -104,28 +100,22 @@
fp_t dp = d;
fp_t cdp = cd;
fp_t hp = h;
- fp_t chp = ch;
-#if !defined(NOCHECKS)
- // expected-error@-3 {{reference to __host__ function 'h' in __device__ function}}
+ // expected-error@-1 {{reference to __host__ function 'h' in __device__ function}}
// expected-note@66 {{'h' declared here}}
- // expected-error@-4 {{reference to __host__ function 'ch' in __device__ function}}
+ fp_t chp = ch;
+ // expected-error@-1 {{reference to __host__ function 'ch' in __device__ function}}
// expected-note@69 {{'ch' declared here}}
-#endif
fp_t dhp = dh;
fp_t cdhp = cdh;
gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __device__ function}}
// expected-note@67 {{'g' declared here}}
d();
cd();
- h();
- ch();
-#if !defined(NOCHECKS)
- // expected-error@-3 {{no matching function for call to 'h'}}
+ h(); // expected-error {{no matching function for call to 'h'}}
// expected-note@66 {{candidate function not viable: call to __host__ function from __device__ function}}
- // expected-error@-4 {{no matching function for call to 'ch'}}
+ ch(); // expected-error {{no matching function for call to 'ch'}}
// expected-note@69 {{candidate function not viable: call to __host__ function from __device__ function}}
-#endif
dh();
cdh();
g(); // expected-error {{no matching function for call to 'g'}}
@@ -138,28 +128,25 @@
fp_t dp = d;
fp_t cdp = cd;
fp_t hp = h;
- fp_t chp = ch;
-#if !defined(NOCHECKS)
- // expected-error@-3 {{reference to __host__ function 'h' in __global__ function}}
+ // expected-error@-1 {{reference to __host__ function 'h' in __global__ function}}
// expected-note@66 {{'h' declared here}}
- // expected-error@-4 {{reference to __host__ function 'ch' in __global__ function}}
+ fp_t chp = ch;
+ // expected-error@-1 {{reference to __host__ function 'ch' in __global__ function}}
// expected-note@69 {{'ch' declared here}}
-#endif
fp_t dhp = dh;
fp_t cdhp = cdh;
- gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __global__ function}}
- // expected-note@67 {{'g' declared here}}
+ gp_t gp = g;
+ // expected-error@-1 {{reference to __global__ function 'g' in __global__ function}}
+ // expected-note@67 {{'g' declared here}}
d();
cd();
h();
- ch();
-#if !defined(NOCHECKS)
- // expected-error@-3 {{no matching function for call to 'h'}}
+ // expected-error@-1 {{no matching function for call to 'h'}}
// expected-note@66 {{candidate function not viable: call to __host__ function from __global__ function}}
- // expected-error@-4 {{no matching function for call to 'ch'}}
+ ch();
+ // expected-error@-1 {{no matching function for call to 'ch'}}
// expected-note@69 {{candidate function not viable: call to __host__ function from __global__ function}}
-#endif
dh();
cdh();
g(); // expected-error {{no matching function for call to 'g'}}
Index: cfe/trunk/lib/Sema/SemaOverload.cpp
===================================================================
--- cfe/trunk/lib/Sema/SemaOverload.cpp
+++ cfe/trunk/lib/Sema/SemaOverload.cpp
@@ -8722,14 +8722,44 @@
OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
iterator &Best,
bool UserDefinedConversion) {
+ llvm::SmallVector<OverloadCandidate *, 16> Candidates;
+ std::transform(begin(), end(), std::back_inserter(Candidates),
+ [](OverloadCandidate &Cand) { return &Cand; });
+
+ // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA
+ // but accepted by both clang and NVCC. However during a particular
+ // compilation mode only one call variant is viable. We need to
+ // exclude non-viable overload candidates from consideration based
+ // only on their host/device attributes. Specifically, if one
+ // candidate call is WrongSide and the other is SameSide, we ignore
+ // the WrongSide candidate.
+ if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads) {
+ const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
+ bool ContainsSameSideCandidate =
+ llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
+ return Cand->Function &&
+ S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ Sema::CFP_SameSide;
+ });
+ if (ContainsSameSideCandidate) {
+ auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
+ return Cand->Function &&
+ S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ Sema::CFP_WrongSide;
+ };
+ Candidates.erase(std::remove_if(Candidates.begin(), Candidates.end(),
+ IsWrongSideCandidate),
+ Candidates.end());
+ }
+ }
+
// Find the best viable function.
Best = end();
- for (iterator Cand = begin(); Cand != end(); ++Cand) {
+ for (auto *Cand : Candidates)
if (Cand->Viable)
if (Best == end() || isBetterOverloadCandidate(S, *Cand, *Best, Loc,
UserDefinedConversion))
Best = Cand;
- }
// If we didn't find any viable functions, abort.
if (Best == end())
@@ -8739,7 +8769,7 @@
// Make sure that this function is better than every other viable
// function. If not, we have an ambiguity.
- for (iterator Cand = begin(); Cand != end(); ++Cand) {
+ for (auto *Cand : Candidates) {
if (Cand->Viable &&
Cand != Best &&
!isBetterOverloadCandidate(S, *Best, *Cand, Loc,
Index: cfe/trunk/lib/Sema/SemaCUDA.cpp
===================================================================
--- cfe/trunk/lib/Sema/SemaCUDA.cpp
+++ cfe/trunk/lib/Sema/SemaCUDA.cpp
@@ -68,26 +68,26 @@
// Ph - preference in host mode
// Pd - preference in device mode
// H - handled in (x)
-// Preferences: b-best, f-fallback, l-last resort, n-never.
+// Preferences: N:native, HD:host-device, SS:same side, WS:wrong side, --:never.
//
-// | F | T | Ph | Pd | H |
-// |----+----+----+----+-----+
-// | d | d | b | b | (b) |
-// | d | g | n | n | (a) |
-// | d | h | l | l | (e) |
-// | d | hd | f | f | (c) |
-// | g | d | b | b | (b) |
-// | g | g | n | n | (a) |
-// | g | h | l | l | (e) |
-// | g | hd | f | f | (c) |
-// | h | d | l | l | (e) |
-// | h | g | b | b | (b) |
-// | h | h | b | b | (b) |
-// | h | hd | f | f | (c) |
-// | hd | d | l | f | (d) |
-// | hd | g | f | n |(d/a)|
-// | hd | h | f | l | (d) |
-// | hd | hd | b | b | (b) |
+// | F | T | Ph | Pd | H |
+// |----+----+-----+-----+-----+
+// | d | d | N | N | (c) |
+// | d | g | -- | -- | (a) |
+// | d | h | -- | -- | (e) |
+// | d | hd | HD | HD | (b) |
+// | g | d | N | N | (c) |
+// | g | g | -- | -- | (a) |
+// | g | h | -- | -- | (e) |
+// | g | hd | HD | HD | (b) |
+// | h | d | -- | -- | (e) |
+// | h | g | N | N | (c) |
+// | h | h | N | N | (c) |
+// | h | hd | HD | HD | (b) |
+// | hd | d | WS | SS | (d) |
+// | hd | g | SS | -- |(d/a)|
+// | hd | h | SS | WS | (d) |
+// | hd | hd | HD | HD | (b) |
Sema::CUDAFunctionPreference
Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
@@ -112,39 +112,38 @@
(CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
return CFP_Never;
- // (b) Best case scenarios
+ // (b) Calling HostDevice is OK for everyone.
+ if (CalleeTarget == CFT_HostDevice)
+ return CFP_HostDevice;
+
+ // (c) Best case scenarios
if (CalleeTarget == CallerTarget ||
(CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
(CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
- return CFP_Best;
-
- // (c) Calling HostDevice is OK as a fallback that works for everyone.
- if (CalleeTarget == CFT_HostDevice)
- return CFP_Fallback;
-
- // Figure out what should be returned 'last resort' cases. Normally
- // those would not be allowed, but we'll consider them if
- // CUDADisableTargetCallChecks is true.
- CUDAFunctionPreference QuestionableResult =
- getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
+ return CFP_Native;
// (d) HostDevice behavior depends on compilation mode.
if (CallerTarget == CFT_HostDevice) {
- // Calling a function that matches compilation mode is OK.
- // Calling a function from the other side is frowned upon.
- if (getLangOpts().CUDAIsDevice)
- return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
- else
- return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
- ? CFP_Fallback
- : QuestionableResult;
+ // It's OK to call a compilation-mode matching function from an HD one.
+ if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
+ (!getLangOpts().CUDAIsDevice &&
+ (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
+ return CFP_SameSide;
+
+ // We'll allow calls to non-mode-matching functions if target call
+ // checks are disabled. This is needed to avoid complaining about
+ // HD->H calls when we compile for device side and vice versa.
+ if (getLangOpts().CUDADisableTargetCallChecks)
+ return CFP_WrongSide;
+
+ return CFP_Never;
}
// (e) Calling across device/host boundary is not something you should do.
if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
(CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
(CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
- return QuestionableResult;
+ return CFP_Never;
llvm_unreachable("All cases should've been handled by now.");
}
Index: cfe/trunk/include/clang/Sema/Sema.h
===================================================================
--- cfe/trunk/include/clang/Sema/Sema.h
+++ cfe/trunk/include/clang/Sema/Sema.h
@@ -8794,12 +8794,18 @@
CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D);
+ // CUDA function call preference. Must be ordered numerically from
+ // worst to best.
enum CUDAFunctionPreference {
CFP_Never, // Invalid caller/callee combination.
- CFP_LastResort, // Lowest priority. Only in effect if
+ CFP_WrongSide, // Calls from host-device to host or device
+ // function that do not match current compilation
+ // mode. Only in effect if
// LangOpts.CUDADisableTargetCallChecks is true.
- CFP_Fallback, // Low priority caller/callee combination
- CFP_Best, // Preferred caller/callee combination
+ CFP_HostDevice, // Any calls to host/device functions.
+ CFP_SameSide, // Calls from host-device to host or device
+ // function matching current compilation mode.
+ CFP_Native, // host-to-host or device-to-device calls.
};
/// Identifies relative preference of a given Caller/Callee
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits