tra updated this revision to Diff 46927.
tra marked an inline comment as done.
tra added a comment.
Addressed Jingyue's comments.
Fixed function-overload.cu tests to reflect stricter call target checks.
http://reviews.llvm.org/D16870
Files:
include/clang/Sema/Sema.h
lib/Sema/SemaCUDA.cpp
lib/Sema/SemaOverload.cpp
test/CodeGenCUDA/function-overload.cu
test/SemaCUDA/function-overload.cu
Index: test/SemaCUDA/function-overload.cu
===================================================================
--- test/SemaCUDA/function-overload.cu
+++ test/SemaCUDA/function-overload.cu
@@ -70,27 +70,23 @@
__host__ void hostf(void) {
fp_t dp = d;
- fp_t cdp = cd;
-#if !defined(NOCHECKS)
- // expected-error@-3 {{reference to __device__ function 'd' in __host__ function}}
+ // expected-error@-1 {{reference to __device__ function 'd' in __host__ function}}
// expected-note@65 {{'d' declared here}}
- // expected-error@-4 {{reference to __device__ function 'cd' in __host__ function}}
+ fp_t cdp = cd;
+ // expected-error@-1 {{reference to __device__ function 'cd' in __host__ function}}
// expected-note@68 {{'cd' declared here}}
-#endif
fp_t hp = h;
fp_t chp = ch;
fp_t dhp = dh;
fp_t cdhp = cdh;
gp_t gp = g;
d();
- cd();
-#if !defined(NOCHECKS)
- // expected-error@-3 {{no matching function for call to 'd'}}
+ // expected-error@-1 {{no matching function for call to 'd'}}
// expected-note@65 {{candidate function not viable: call to __device__ function from __host__ function}}
- // expected-error@-4 {{no matching function for call to 'cd'}}
+ cd();
+ // expected-error@-1 {{no matching function for call to 'cd'}}
// expected-note@68 {{candidate function not viable: call to __device__ function from __host__ function}}
-#endif
h();
ch();
dh();
@@ -104,28 +100,22 @@
fp_t dp = d;
fp_t cdp = cd;
fp_t hp = h;
- fp_t chp = ch;
-#if !defined(NOCHECKS)
- // expected-error@-3 {{reference to __host__ function 'h' in __device__ function}}
+ // expected-error@-1 {{reference to __host__ function 'h' in __device__ function}}
// expected-note@66 {{'h' declared here}}
- // expected-error@-4 {{reference to __host__ function 'ch' in __device__ function}}
+ fp_t chp = ch;
+ // expected-error@-1 {{reference to __host__ function 'ch' in __device__ function}}
// expected-note@69 {{'ch' declared here}}
-#endif
fp_t dhp = dh;
fp_t cdhp = cdh;
gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __device__ function}}
// expected-note@67 {{'g' declared here}}
d();
cd();
- h();
- ch();
-#if !defined(NOCHECKS)
- // expected-error@-3 {{no matching function for call to 'h'}}
+ h(); // expected-error {{no matching function for call to 'h'}}
// expected-note@66 {{candidate function not viable: call to __host__ function from __device__ function}}
- // expected-error@-4 {{no matching function for call to 'ch'}}
+ ch(); // expected-error {{no matching function for call to 'ch'}}
// expected-note@69 {{candidate function not viable: call to __host__ function from __device__ function}}
-#endif
dh();
cdh();
g(); // expected-error {{no matching function for call to 'g'}}
@@ -138,28 +128,25 @@
fp_t dp = d;
fp_t cdp = cd;
fp_t hp = h;
- fp_t chp = ch;
-#if !defined(NOCHECKS)
- // expected-error@-3 {{reference to __host__ function 'h' in __global__ function}}
+ // expected-error@-1 {{reference to __host__ function 'h' in __global__ function}}
// expected-note@66 {{'h' declared here}}
- // expected-error@-4 {{reference to __host__ function 'ch' in __global__ function}}
+ fp_t chp = ch;
+ // expected-error@-1 {{reference to __host__ function 'ch' in __global__ function}}
// expected-note@69 {{'ch' declared here}}
-#endif
fp_t dhp = dh;
fp_t cdhp = cdh;
- gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __global__ function}}
- // expected-note@67 {{'g' declared here}}
+ gp_t gp = g;
+ // expected-error@-1 {{reference to __global__ function 'g' in __global__ function}}
+ // expected-note@67 {{'g' declared here}}
d();
cd();
h();
- ch();
-#if !defined(NOCHECKS)
- // expected-error@-3 {{no matching function for call to 'h'}}
+ // expected-error@-1 {{no matching function for call to 'h'}}
// expected-note@66 {{candidate function not viable: call to __host__ function from __global__ function}}
- // expected-error@-4 {{no matching function for call to 'ch'}}
+ ch();
+ // expected-error@-1 {{no matching function for call to 'ch'}}
// expected-note@69 {{candidate function not viable: call to __host__ function from __global__ function}}
-#endif
dh();
cdh();
g(); // expected-error {{no matching function for call to 'g'}}
Index: test/CodeGenCUDA/function-overload.cu
===================================================================
--- test/CodeGenCUDA/function-overload.cu
+++ test/CodeGenCUDA/function-overload.cu
@@ -77,24 +77,98 @@
extern "C" __host__ __device__ int chd(void) {return 14;}
// CHECK-BOTH: ret i32 14
+// NOTE: this is an artefact of split-mode CUDA compilation that we
+// need to mimic. HD functions are sometimes allowed to call H or D
+// functions. Due to split compilation mode device-side compilation
+// will not see host-only function and thus they will not be
+// considered at all. For clang both H and D variants will become
+// function overloads. Normally target attribute is considered only if
+// C++ rules can not determine which function is better. However in
+// this case we need to discard functions that would not be present
+// during current compilation phase before we apply normal overload
+// resolution rules.
+
+// Large enough difference in calling preferences should have
+// precedence over standard C++ overloading rules.
+template <typename T> T template_vs_function(T arg) { return 15; }
+__device__ float template_vs_function(float arg) { return 16; }
+
+// In this case during host compilation we expect to cal function
+// template even if __device__ function may be available and allowed
+// by -fcuda-disable-target-call-checks and, according to C++ overload
+// resolution rules, would be prefered over function template.
+// CHECK-BOTH-LABEL: define void @_Z5hd_tfv()
+__host__ __device__ void hd_tf(void) {
+ template_vs_function(1.0f);
+ // CHECK-HOST: call float @_Z20template_vs_functionIfET_S0_(float
+ // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
+ template_vs_function(2.0);
+ // CHECK-HOST: call double @_Z20template_vs_functionIdET_S0_(double
+ // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
+}
+
+// Calls from __host__ and __device__ functions should always call
+// overloaded function that matches their mode.
+// CHECK-HOST-LABEL: define void @_Z4h_tfv()
+__host__ void h_tf() {
+ template_vs_function(1.0f);
+ // CHECK-HOST: call float @_Z20template_vs_functionIfET_S0_(float
+ template_vs_function(2.0);
+ // CHECK-HOST: call double @_Z20template_vs_functionIdET_S0_(double
+}
+
+// CHECK-DEVICE-LABEL: define void @_Z4d_tfv()
+__device__ void d_tf() {
+ template_vs_function(1.0f);
+ // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
+ template_vs_function(2.0);
+ // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
+}
+
+// In case of smaller difference between calling preferences
+// (HD->{HD,H} call), C++ rules take precedence. So, when we need to pick
+// between (host or device) function template and HD function, C++
+// rules will have precedence.
+
+template <typename T> T template_vs_hd_function(T arg) { return 15; }
+__host__ __device__ float template_vs_hd_function(float arg) { return 16; }
+
+// CHECK-BOTH-LABEL: define void @_Z7hd_thdfv()
+__host__ __device__ void hd_thdf() {
+ template_vs_hd_function(1.0f);
+ // CHECK-HOST: call float @_Z23template_vs_hd_functionf(float
+ template_vs_hd_function(1.0);
+ // CHECK-HOST: call double @_Z23template_vs_hd_functionIdET_S0_(double
+}
+
+// CHECK-HOST-LABEL: define void @_Z6h_thdfv()
+__host__ void h_thdf() {
+ template_vs_hd_function(1.0f);
+ // CHECK-HOST: call float @_Z23template_vs_hd_functionf(float
+ template_vs_hd_function(1.0);
+ // CHECK-HOST: call double @_Z23template_vs_hd_functionIdET_S0_(double
+}
+
+// CHECK-DEVICE-LABEL: define void @_Z6d_thdfv()
+__device__ void d_thdf() {
+ template_vs_hd_function(1.0f);
+ // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float
+ template_vs_hd_function(1.0);
+ // Host-only function template is not callable with strict call checks,
+ // so for device side HD function will be the only choice.
+ // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float
+}
+
// CHECK-HOST-LABEL: define void @_Z5hostfv()
__host__ void hostf(void) {
-#if defined (NOCHECKS)
- fp_t dp = d; // CHECK-HOST-NC: store {{.*}} @_Z1dv, {{.*}} %dp,
- fp_t cdp = cd; // CHECK-HOST-NC: store {{.*}} @cd, {{.*}} %cdp,
-#endif
fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp,
fp_t chp = ch; // CHECK-HOST: store {{.*}} @ch, {{.*}} %chp,
fp_t dhp = dh; // CHECK-HOST: store {{.*}} @_Z2dhv, {{.*}} %dhp,
fp_t cdhp = cdh; // CHECK-HOST: store {{.*}} @cdh, {{.*}} %cdhp,
fp_t hdp = hd; // CHECK-HOST: store {{.*}} @_Z2hdv, {{.*}} %hdp,
fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp,
gp_t gp = g; // CHECK-HOST: store {{.*}} @_Z1gv, {{.*}} %gp,
-#if defined (NOCHECKS)
- d(); // CHECK-HOST-NC: call i32 @_Z1dv()
- cd(); // CHECK-HOST-NC: call i32 @cd()
-#endif
h(); // CHECK-HOST: call i32 @_Z1hv()
ch(); // CHECK-HOST: call i32 @ch()
dh(); // CHECK-HOST: call i32 @_Z2dhv()
@@ -106,21 +180,13 @@
__device__ void devicef(void) {
fp_t dp = d; // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp,
fp_t cdp = cd; // CHECK-DEVICE: store {{.*}} @cd, {{.*}} %cdp,
-#if defined (NOCHECKS)
- fp_t hp = h; // CHECK-DEVICE-NC: store {{.*}} @_Z1hv, {{.*}} %hp,
- fp_t chp = ch; // CHECK-DEVICE-NC: store {{.*}} @ch, {{.*}} %chp,
-#endif
fp_t dhp = dh; // CHECK-DEVICE: store {{.*}} @_Z2dhv, {{.*}} %dhp,
fp_t cdhp = cdh; // CHECK-DEVICE: store {{.*}} @cdh, {{.*}} %cdhp,
fp_t hdp = hd; // CHECK-DEVICE: store {{.*}} @_Z2hdv, {{.*}} %hdp,
fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp,
d(); // CHECK-DEVICE: call i32 @_Z1dv()
cd(); // CHECK-DEVICE: call i32 @cd()
-#if defined (NOCHECKS)
- h(); // CHECK-DEVICE-NC: call i32 @_Z1hv()
- ch(); // CHECK-DEVICE-NC: call i32 @ch()
-#endif
dh(); // CHECK-DEVICE: call i32 @_Z2dhv()
cdh(); // CHECK-DEVICE: call i32 @cdh()
}
Index: lib/Sema/SemaOverload.cpp
===================================================================
--- lib/Sema/SemaOverload.cpp
+++ lib/Sema/SemaOverload.cpp
@@ -8527,6 +8527,27 @@
else if (!Cand1.Viable)
return false;
+ // [CUDA] If HD function calls a function which has host-only and
+ // device-only variants, nvcc sees only host function during host
+ // compilation and device function only during device-side
+ // compilation. It appears to be a side effect of nvcc's splitting
+ // of host and device code into separate TUs. Alas we need to be
+ // compatible with existing code that relies on this. If we see such
+ // a case, return better variant right away.
+ if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
+ Cand1.Function && Cand2.Function) {
+ const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
+ const Sema::CUDAFunctionPreference CFP1 =
+ S.IdentifyCUDAPreference(Caller, Cand1.Function);
+ const Sema::CUDAFunctionPreference CFP2 =
+ S.IdentifyCUDAPreference(Caller, Cand2.Function);
+ if (((CFP1 == Sema::CFP_SameSide || CFP1 == Sema::CFP_Native) &&
+ (CFP2 <= Sema::CFP_WrongSide)) ||
+ ((CFP1 <= Sema::CFP_WrongSide) &&
+ (CFP2 == Sema::CFP_SameSide || CFP2 == Sema::CFP_Native)))
+ return CFP1 > CFP2;
+ }
+
// C++ [over.match.best]p1:
//
// -- if F is a static member function, ICS1(F) is defined such
Index: lib/Sema/SemaCUDA.cpp
===================================================================
--- lib/Sema/SemaCUDA.cpp
+++ lib/Sema/SemaCUDA.cpp
@@ -68,26 +68,26 @@
// Ph - preference in host mode
// Pd - preference in device mode
// H - handled in (x)
-// Preferences: b-best, f-fallback, l-last resort, n-never.
+// Preferences: +:native, *:host-device, o:same side, .:wrong side, -:never.
//
// | F | T | Ph | Pd | H |
// |----+----+----+----+-----+
-// | d | d | b | b | (b) |
-// | d | g | n | n | (a) |
-// | d | h | l | l | (e) |
-// | d | hd | f | f | (c) |
-// | g | d | b | b | (b) |
-// | g | g | n | n | (a) |
-// | g | h | l | l | (e) |
-// | g | hd | f | f | (c) |
-// | h | d | l | l | (e) |
-// | h | g | b | b | (b) |
-// | h | h | b | b | (b) |
-// | h | hd | f | f | (c) |
-// | hd | d | l | f | (d) |
-// | hd | g | f | n |(d/a)|
-// | hd | h | f | l | (d) |
-// | hd | hd | b | b | (b) |
+// | d | d | + | + | (c) |
+// | d | g | - | - | (a) |
+// | d | h | - | - | (e) |
+// | d | hd | * | * | (b) |
+// | g | d | + | + | (c) |
+// | g | g | - | - | (a) |
+// | g | h | - | - | (e) |
+// | g | hd | * | * | (b) |
+// | h | d | - | - | (e) |
+// | h | g | + | + | (c) |
+// | h | h | + | + | (c) |
+// | h | hd | * | * | (b) |
+// | hd | d | . | o | (d) |
+// | hd | g | o | - |(d/a)|
+// | hd | h | o | . | (d) |
+// | hd | hd | * | * | (b) |
Sema::CUDAFunctionPreference
Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
@@ -112,39 +112,38 @@
(CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
return CFP_Never;
- // (b) Best case scenarios
+ // (b) Calling HostDevice is OK as a fallback that works for everyone.
+ if (CalleeTarget == CFT_HostDevice)
+ return CFP_HostDevice;
+
+ // (c) Best case scenarios
if (CalleeTarget == CallerTarget ||
(CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
(CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
- return CFP_Best;
-
- // (c) Calling HostDevice is OK as a fallback that works for everyone.
- if (CalleeTarget == CFT_HostDevice)
- return CFP_Fallback;
-
- // Figure out what should be returned 'last resort' cases. Normally
- // those would not be allowed, but we'll consider them if
- // CUDADisableTargetCallChecks is true.
- CUDAFunctionPreference QuestionableResult =
- getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
+ return CFP_Native;
// (d) HostDevice behavior depends on compilation mode.
if (CallerTarget == CFT_HostDevice) {
- // Calling a function that matches compilation mode is OK.
- // Calling a function from the other side is frowned upon.
- if (getLangOpts().CUDAIsDevice)
- return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
- else
- return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
- ? CFP_Fallback
- : QuestionableResult;
+ // It's OK to call mode-matching function from HD one.
+ if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
+ (!getLangOpts().CUDAIsDevice &&
+ (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
+ return CFP_SameSide;
+
+ // We'll allow calls to non-mode-matching functions if target call
+ // checks are disabled. This is needed to avoid complaining about
+ // HD->H calls when we compile for device side and vice versa.
+ if (getLangOpts().CUDADisableTargetCallChecks)
+ return CFP_WrongSide;
+
+ return CFP_Never;
}
// (e) Calling across device/host boundary is not something you should do.
if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
(CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
(CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
- return QuestionableResult;
+ return CFP_Never;
llvm_unreachable("All cases should've been handled by now.");
}
Index: include/clang/Sema/Sema.h
===================================================================
--- include/clang/Sema/Sema.h
+++ include/clang/Sema/Sema.h
@@ -8792,12 +8792,18 @@
CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D);
+ // CUDA function call preference. Must be ordered numerically from
+ // worst to best.
enum CUDAFunctionPreference {
CFP_Never, // Invalid caller/callee combination.
- CFP_LastResort, // Lowest priority. Only in effect if
+ CFP_WrongSide, // Calls from host-device to host or device
+ // function that do not match current compilation
+ // mode. Only in effect if
// LangOpts.CUDADisableTargetCallChecks is true.
- CFP_Fallback, // Low priority caller/callee combination
- CFP_Best, // Preferred caller/callee combination
+ CFP_SameSide, // Calls from host-device to host or device
+ // function matching current compilation mode.
+ CFP_HostDevice, // Any calls to host/device functions.
+ CFP_Native, // host-to-host or device-to-device calls.
};
/// Identifies relative preference of a given Caller/Callee
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits