hliao updated this revision to Diff 247675.
hliao added a comment.
Remove unncessary condition checking.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D71227/new/
https://reviews.llvm.org/D71227
Files:
clang/include/clang/Sema/Sema.h
clang/lib/Parse/ParseDecl.cpp
clang/lib/Sema/SemaCUDA.cpp
clang/lib/Sema/SemaDeclCXX.cpp
clang/lib/Sema/SemaExprCXX.cpp
clang/lib/Sema/SemaOverload.cpp
clang/test/SemaCUDA/function-overload.cu
clang/test/SemaCUDA/global-initializers-host.cu
clang/test/SemaCUDA/hip-pinned-shadow.cu
Index: clang/test/SemaCUDA/hip-pinned-shadow.cu
===================================================================
--- clang/test/SemaCUDA/hip-pinned-shadow.cu
+++ clang/test/SemaCUDA/hip-pinned-shadow.cu
@@ -13,13 +13,19 @@
template <class T, int texType, int hipTextureReadMode>
struct texture : public textureReference {
-texture() { a = 1; }
+ // expected-note@-1{{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}}
+ // expected-note@-2{{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}}
+ // expected-note@-3{{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}}
+ // expected-note@-4{{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}}
+ texture() { a = 1; }
+ // expected-note@-1{{candidate constructor not viable: call to __host__ function from __device__ function}}
+ // expected-note@-2{{candidate constructor not viable: call to __host__ function from __device__ function}}
};
__hip_pinned_shadow__ texture<float, 2, 1> tex;
-__device__ __hip_pinned_shadow__ texture<float, 2, 1> tex2; // expected-error{{'hip_pinned_shadow' and 'device' attributes are not compatible}}
- // expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables}}
- // expected-note@-2{{conflicting attribute is here}}
+__device__ __hip_pinned_shadow__ texture<float, 2, 1> tex2; // expected-error{{'hip_pinned_shadow' and 'device' attributes are not compatible}}
+ // expected-note@-1{{conflicting attribute is here}}
+ // expected-error@-2{{no matching constructor for initialization of 'texture<float, 2, 1>'}}
__constant__ __hip_pinned_shadow__ texture<float, 2, 1> tex3; // expected-error{{'hip_pinned_shadow' and 'constant' attributes are not compatible}}
- // expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables}}
- // expected-note@-2{{conflicting attribute is here}}
+ // expected-note@-1{{conflicting attribute is here}}
+ // expected-error@-2{{no matching constructor for initialization of 'texture<float, 2, 1>'}}
Index: clang/test/SemaCUDA/global-initializers-host.cu
===================================================================
--- clang/test/SemaCUDA/global-initializers-host.cu
+++ clang/test/SemaCUDA/global-initializers-host.cu
@@ -6,12 +6,14 @@
// module initializer.
struct S {
+ // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}}
+ // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}}
__device__ S() {}
- // expected-note@-1 {{'S' declared here}}
+ // expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}}
};
S s;
-// expected-error@-1 {{reference to __device__ function 'S' in global initializer}}
+// expected-error@-1 {{no matching constructor for initialization of 'S'}}
struct T {
__host__ __device__ T() {}
@@ -19,14 +21,17 @@
T t; // No error, this is OK.
struct U {
+ // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int' to 'const U' for 1st argument}}
+ // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int' to 'U' for 1st argument}}
__host__ U() {}
+ // expected-note@-1 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}}
__device__ U(int) {}
- // expected-note@-1 {{'U' declared here}}
+ // expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}}
};
U u(42);
-// expected-error@-1 {{reference to __device__ function 'U' in global initializer}}
+// expected-error@-1 {{no matching constructor for initialization of 'U'}}
__device__ int device_fn() { return 42; }
-// expected-note@-1 {{'device_fn' declared here}}
+// expected-note@-1 {{candidate function not viable: call to __device__ function from __host__ function}}
int n = device_fn();
-// expected-error@-1 {{reference to __device__ function 'device_fn' in global initializer}}
+// expected-error@-1 {{no matching function for call to 'device_fn'}}
Index: clang/test/SemaCUDA/function-overload.cu
===================================================================
--- clang/test/SemaCUDA/function-overload.cu
+++ clang/test/SemaCUDA/function-overload.cu
@@ -214,8 +214,10 @@
// Test for address of overloaded function resolution in the global context.
HostFnPtr fp_h = h;
HostFnPtr fp_ch = ch;
+#if !defined(__CUDA_ARCH__)
CurrentFnPtr fp_dh = dh;
CurrentFnPtr fp_cdh = cdh;
+#endif
GlobalFnPtr fp_g = g;
@@ -419,3 +421,28 @@
int test_constexpr_overload(C2 &x, C2 &y) {
return constexpr_overload(x, y);
}
+
+__device__ float fn(int);
+__host__ float fn(float);
+
+// Overload resolution in the global initialization should follow the same rule
+// as the one in other places. That is, we prefer a callable function over a
+// non-callable function with a better signature match. In this test case, even
+// though the device function has exactly matching with the integer argument,
+// it can't be executed.
+float gvar1 = fn(1);
+
+__device__ float dev_only_fn(int);
+// expected-note@-1 {{candidate function not viable: call to __device__ function from __host__ function}}
+
+float gvar2 = dev_only_fn(1); // expected-error {{no matching function for call to 'dev_only_fn'}}
+
+#ifdef __CUDA_ARCH__
+__device__ DeviceReturnTy gvar3 = template_vs_function(1.f);
+// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}}
+__device__ int gvar4 = template_overload(1);
+// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}}
+#else
+TemplateReturnTy gvar3 = template_vs_function(2.f);
+int gvar4 = template_overload(1);
+#endif
Index: clang/lib/Sema/SemaOverload.cpp
===================================================================
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -6301,17 +6301,12 @@
}
// (CUDA B.1): Check for invalid calls between targets.
- if (getLangOpts().CUDA)
- if (const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext))
- // Skip the check for callers that are implicit members, because in this
- // case we may not yet know what the member's target is; the target is
- // inferred for the member automatically, based on the bases and fields of
- // the class.
- if (!Caller->isImplicit() && !IsAllowedCUDACall(Caller, Function)) {
- Candidate.Viable = false;
- Candidate.FailureKind = ovl_fail_bad_target;
- return;
- }
+ if (getLangOpts().CUDA &&
+ !isCUDACallAllowed(Function, Sema::SkipImplicitCaller)) {
+ Candidate.Viable = false;
+ Candidate.FailureKind = ovl_fail_bad_target;
+ return;
+ }
if (Function->getTrailingRequiresClause()) {
ConstraintSatisfaction Satisfaction;
@@ -6822,13 +6817,11 @@
}
// (CUDA B.1): Check for invalid calls between targets.
- if (getLangOpts().CUDA)
- if (const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext))
- if (!IsAllowedCUDACall(Caller, Method)) {
- Candidate.Viable = false;
- Candidate.FailureKind = ovl_fail_bad_target;
- return;
- }
+ if (getLangOpts().CUDA && !isCUDACallAllowed(Method)) {
+ Candidate.Viable = false;
+ Candidate.FailureKind = ovl_fail_bad_target;
+ return;
+ }
if (Method->getTrailingRequiresClause()) {
ConstraintSatisfaction Satisfaction;
@@ -9666,9 +9659,9 @@
}
if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
- FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
- return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
- S.IdentifyCUDAPreference(Caller, Cand2.Function);
+ const Decl *ContextDecl = S.getCUDAContextDecl();
+ return S.IdentifyCUDAPreference(ContextDecl, Cand1.Function) >
+ S.IdentifyCUDAPreference(ContextDecl, Cand2.Function);
}
bool HasPS1 = Cand1.Function != nullptr &&
@@ -9772,19 +9765,19 @@
// candidate call is WrongSide and the other is SameSide, we ignore
// the WrongSide candidate.
if (S.getLangOpts().CUDA) {
- const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
+ const Decl *ContextDecl = S.getCUDAContextDecl();
bool ContainsSameSideCandidate =
llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
// Check viable function only.
return Cand->Viable && Cand->Function &&
- S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ S.IdentifyCUDAPreference(ContextDecl, Cand->Function) ==
Sema::CFP_SameSide;
});
if (ContainsSameSideCandidate) {
auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
// Check viable function only to avoid unnecessary data copying/moving.
return Cand->Viable && Cand->Function &&
- S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ S.IdentifyCUDAPreference(ContextDecl, Cand->Function) ==
Sema::CFP_WrongSide;
};
llvm::erase_if(Candidates, IsWrongSideCandidate);
@@ -10771,10 +10764,10 @@
/// CUDA: diagnose an invalid call across targets.
static void DiagnoseBadTarget(Sema &S, OverloadCandidate *Cand) {
- FunctionDecl *Caller = cast<FunctionDecl>(S.CurContext);
+ const Decl *ContextDecl = S.getCUDAContextDecl();
FunctionDecl *Callee = Cand->Function;
- Sema::CUDAFunctionTarget CallerTarget = S.IdentifyCUDATarget(Caller),
+ Sema::CUDAFunctionTarget CallerTarget = S.IdentifyCUDATarget(ContextDecl),
CalleeTarget = S.IdentifyCUDATarget(Callee);
std::string FnDesc;
@@ -11810,10 +11803,9 @@
return false;
if (FunctionDecl *FunDecl = dyn_cast<FunctionDecl>(Fn)) {
- if (S.getLangOpts().CUDA)
- if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext))
- if (!Caller->isImplicit() && !S.IsAllowedCUDACall(Caller, FunDecl))
- return false;
+ if (S.getLangOpts().CUDA &&
+ !S.isCUDACallAllowed(FunDecl, Sema::SkipImplicitCaller))
+ return false;
if (FunDecl->isMultiVersion()) {
const auto *TA = FunDecl->getAttr<TargetAttr>();
if (TA && !TA->isDefaultVersion())
@@ -11927,9 +11919,7 @@
}
}
- void EliminateSuboptimalCudaMatches() {
- S.EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(S.CurContext), Matches);
- }
+ void EliminateSuboptimalCudaMatches() { S.EraseUnwantedCUDAMatches(Matches); }
public:
void ComplainNoMatchesFound() const {
Index: clang/lib/Sema/SemaExprCXX.cpp
===================================================================
--- clang/lib/Sema/SemaExprCXX.cpp
+++ clang/lib/Sema/SemaExprCXX.cpp
@@ -1513,9 +1513,9 @@
bool Sema::isUsualDeallocationFunction(const CXXMethodDecl *Method) {
// [CUDA] Ignore this function, if we can't call it.
- const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
+ const Decl *ContextDecl = getCUDAContextDecl();
if (getLangOpts().CUDA &&
- IdentifyCUDAPreference(Caller, Method) <= CFP_WrongSide)
+ IdentifyCUDAPreference(ContextDecl, Method) <= CFP_WrongSide)
return false;
SmallVector<const FunctionDecl*, 4> PreventedBy;
@@ -1529,7 +1529,7 @@
return llvm::none_of(PreventedBy, [&](const FunctionDecl *FD) {
assert(FD->getNumParams() == 1 &&
"Only single-operand functions should be in PreventedBy");
- return IdentifyCUDAPreference(Caller, FD) >= CFP_HostDevice;
+ return IdentifyCUDAPreference(ContextDecl, FD) >= CFP_HostDevice;
});
}
@@ -1592,8 +1592,7 @@
// In CUDA, determine how much we'd like / dislike to call this.
if (S.getLangOpts().CUDA)
- if (auto *Caller = dyn_cast<FunctionDecl>(S.CurContext))
- CUDAPref = S.IdentifyCUDAPreference(Caller, FD);
+ CUDAPref = S.IdentifyCUDAPreference(S.getCUDAContextDecl(), FD);
}
explicit operator bool() const { return FD; }
@@ -2697,7 +2696,7 @@
}
if (getLangOpts().CUDA)
- EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
+ EraseUnwantedCUDAMatches(Matches);
} else {
// C++1y [expr.new]p22:
// For a non-placement allocation function, the normal deallocation
Index: clang/lib/Sema/SemaDeclCXX.cpp
===================================================================
--- clang/lib/Sema/SemaDeclCXX.cpp
+++ clang/lib/Sema/SemaDeclCXX.cpp
@@ -16691,6 +16691,20 @@
return false;
}
+void Sema::pushCUDANonLocalVariable(const Decl *D) {
+ if (!D || !isNonlocalVariable(D))
+ return;
+ CUDANonLocalVariableStack.push_back(D);
+}
+
+void Sema::popCUDANonLocalVariable(const Decl *D) {
+ if (!D || !isNonlocalVariable(D))
+ return;
+ assert(!CUDANonLocalVariableStack.empty() &&
+ CUDANonLocalVariableStack.back() == D);
+ CUDANonLocalVariableStack.pop_back();
+}
+
/// Invoked when we are about to parse an initializer for the declaration
/// 'Dcl'.
///
Index: clang/lib/Sema/SemaCUDA.cpp
===================================================================
--- clang/lib/Sema/SemaCUDA.cpp
+++ clang/lib/Sema/SemaCUDA.cpp
@@ -95,7 +95,7 @@
}
template <typename A>
-static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
+static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
return isa<A>(Attribute) &&
!(IgnoreImplicitAttr && Attribute->isImplicit());
@@ -130,6 +130,41 @@
return CFT_Host;
}
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const VarDecl *D,
+ bool IgnoreImplicitHDAttr) {
+ if (D == nullptr)
+ return CFT_Host;
+
+ assert(D->hasGlobalStorage() && "Only non-local variable needs identifying.");
+
+ if (D->hasAttr<CUDAInvalidTargetAttr>())
+ return CFT_InvalidTarget;
+
+ if (hasAttr<HIPPinnedShadowAttr>(D, IgnoreImplicitHDAttr))
+ return CFT_Host;
+
+ if (hasAttr<CUDAConstantAttr>(D, IgnoreImplicitHDAttr) ||
+ hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr) ||
+ hasAttr<CUDASharedAttr>(D, IgnoreImplicitHDAttr))
+ return CFT_Device;
+
+ return CFT_Host;
+}
+
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const Decl *D,
+ bool IgnoreImplicitHDAttr) {
+ if (D == nullptr)
+ return CFT_Host;
+
+ if (auto FD = dyn_cast<FunctionDecl>(D))
+ return IdentifyCUDATarget(FD, IgnoreImplicitHDAttr);
+
+ if (auto VD = dyn_cast<VarDecl>(D))
+ return IdentifyCUDATarget(VD, IgnoreImplicitHDAttr);
+
+ llvm_unreachable("Unexpected decl for CUDA target identification.");
+}
+
// * CUDA Call preference table
//
// F - from,
@@ -159,10 +194,10 @@
// | hd | hd | HD | HD | (b) |
Sema::CUDAFunctionPreference
-Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
+Sema::IdentifyCUDAPreference(const Decl *ContextDecl,
const FunctionDecl *Callee) {
assert(Callee && "Callee must be valid.");
- CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
+ CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(ContextDecl);
CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
// If one of the targets is invalid, the check always fails, no matter what
@@ -211,16 +246,17 @@
}
void Sema::EraseUnwantedCUDAMatches(
- const FunctionDecl *Caller,
SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
if (Matches.size() <= 1)
return;
using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
- // Gets the CUDA function preference for a call from Caller to Match.
+ const Decl *ContextDecl = getCUDAContextDecl();
+
+ // Gets the CUDA function preference for a call from call context to Match.
auto GetCFP = [&](const Pair &Match) {
- return IdentifyCUDAPreference(Caller, Match.second);
+ return IdentifyCUDAPreference(ContextDecl, Match.second);
};
// Find the best call preference among the functions in Matches.
Index: clang/lib/Parse/ParseDecl.cpp
===================================================================
--- clang/lib/Parse/ParseDecl.cpp
+++ clang/lib/Parse/ParseDecl.cpp
@@ -2342,6 +2342,8 @@
}
}
+ Actions.pushCUDANonLocalVariable(ThisDecl);
+
// Parse declarator '=' initializer.
// If a '==' or '+=' is found, suggest a fixit to '='.
if (isTokenEqualOrEqualTypo()) {
@@ -2474,6 +2476,8 @@
Actions.ActOnUninitializedDecl(ThisDecl);
}
+ Actions.popCUDANonLocalVariable(ThisDecl);
+
Actions.FinalizeDeclaration(ThisDecl);
return ThisDecl;
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -11396,9 +11396,14 @@
///
/// Use this rather than examining the function's attributes yourself -- you
/// will get it wrong. Returns CFT_Host if D is null.
+ CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs);
CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D,
bool IgnoreImplicitHDAttr = false);
- CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs);
+ CUDAFunctionTarget IdentifyCUDATarget(const VarDecl *D,
+ bool IgnoreImplicitHDAttr = false);
+ // This routine is the top level dispatcher to more specific variants above.
+ CUDAFunctionTarget IdentifyCUDATarget(const Decl *D,
+ bool IgnoreImplicitHDAttr = false);
/// Gets the CUDA target for the current context.
CUDAFunctionTarget CurrentCUDATarget() {
@@ -11418,24 +11423,53 @@
CFP_Native, // host-to-host or device-to-device calls.
};
- /// Identifies relative preference of a given Caller/Callee
+ /// Identifies relative preference of a given callee and that call context
/// combination, based on their host/device attributes.
- /// \param Caller function which needs address of \p Callee.
- /// nullptr in case of global context.
- /// \param Callee target function
+ /// \param CallContextDecl The context decl which needs address of \p Callee.
+ /// Null in case of the global context.
+ /// \param Callee Target function.
///
/// \returns preference value for particular Caller/Callee combination.
- CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller,
+ CUDAFunctionPreference IdentifyCUDAPreference(const Decl *CallContextDecl,
const FunctionDecl *Callee);
+ SmallVector<const Decl *, 8> CUDANonLocalVariableStack;
+
+ void pushCUDANonLocalVariable(const Decl *D);
+ void popCUDANonLocalVariable(const Decl *D);
+
+ const Decl *getCUDACurrentNonLocalVariable() const {
+ return CUDANonLocalVariableStack.empty() ? nullptr
+ : CUDANonLocalVariableStack.back();
+ }
+
+ const Decl *getCUDAContextDecl() const {
+ if (CurContext->isFunctionOrMethod())
+ return cast<Decl>(CurContext);
+ if (!CurContext->isFileContext()) {
+ // TODO: There are cases where proper checking is required, such as the
+ // default member initializer.
+ return nullptr;
+ }
+ // Check the current variable being initialized in the global context.
+ return getCUDACurrentNonLocalVariable();
+ }
+
/// Determines whether Caller may invoke Callee, based on their CUDA
/// host/device attributes. Returns false if the call is not allowed.
///
/// Note: Will return true for CFP_WrongSide calls. These may appear in
/// semantically correct CUDA programs, but only if they're never codegen'ed.
- bool IsAllowedCUDACall(const FunctionDecl *Caller,
- const FunctionDecl *Callee) {
- return IdentifyCUDAPreference(Caller, Callee) != CFP_Never;
+ enum SkipCallerKind_t { SkipNoneCaller, SkipImplicitCaller };
+ bool isCUDACallAllowed(const FunctionDecl *Callee,
+ SkipCallerKind_t Kind = SkipNoneCaller) {
+ // Skip contexts where no real call could be performed.
+ if (!CurContext->isFileContext() && !CurContext->isFunctionOrMethod())
+ return true;
+ if (const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext))
+ if (Kind == SkipImplicitCaller && Caller->isImplicit())
+ return true;
+ return IdentifyCUDAPreference(getCUDAContextDecl(), Callee) != CFP_Never;
}
/// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD,
@@ -11469,10 +11503,9 @@
void CUDASetLambdaAttrs(CXXMethodDecl *Method);
/// Finds a function in \p Matches with highest calling priority
- /// from \p Caller context and erases all functions with lower
+ /// from the current context and erases all functions with lower
/// calling priority.
void EraseUnwantedCUDAMatches(
- const FunctionDecl *Caller,
SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches);
/// Given a implicit special member, infer its CUDA target from the
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits