[PATCH] D77954: [CUDA][HIP] Fix host/device based overload resolution

Yaxun Liu via Phabricator via cfe-commits Sun, 12 Apr 2020 18:43:45 -0700

yaxunl updated this revision to Diff 256903.
yaxunl retitled this revision from "[CUDA][HIP] Fix overload resolution issue 
for device host functions" to "[CUDA][HIP] Fix host/device based overload 
resolution".
yaxunl added a comment.


Revised by John's comments.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77954/new/

https://reviews.llvm.org/D77954

Files:
  clang/lib/Sema/SemaOverload.cpp
  clang/test/SemaCUDA/function-overload.cu

Index: clang/test/SemaCUDA/function-overload.cu
===================================================================
--- clang/test/SemaCUDA/function-overload.cu
+++ clang/test/SemaCUDA/function-overload.cu
@@ -331,9 +331,6 @@
 // If we have a mix of HD and H-only or D-only candidates in the overload set,
 // normal C++ overload resolution rules apply first.
 template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
-#ifdef __CUDA_ARCH__
-//expected-note@-2 {{declared here}}
-#endif
 {
   return TemplateReturnTy();
 }
@@ -342,11 +339,13 @@
 }
 
 __host__ __device__ void test_host_device_calls_hd_template() {
-  HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
-  TemplateReturnTy ret2 = template_vs_hd_function(1);
 #ifdef __CUDA_ARCH__
-  // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}}
+  typedef HostDeviceReturnTy ExpectedReturnTy;
+#else
+  typedef TemplateReturnTy ExpectedReturnTy;
 #endif
+  HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
+  ExpectedReturnTy ret2 = template_vs_hd_function(1);
 }
 
 __host__ void test_host_calls_hd_template() {
Index: clang/lib/Sema/SemaOverload.cpp
===================================================================
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -9475,6 +9475,35 @@
   else if (!Cand1.Viable)
     return false;
 
+  // If Cand1 can be emitted and Cand2 cannot be emitted in the current context,
+  // Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2 can be
+  // emitted, Cand1 is not better than Cand2. This rule should have precedence
+  // over other rules.
+  //
+  // If both Cand1 and Cand2 can be emitted, or neither can be emitted, then
+  // other rules should be used to determine which is better.
+  //
+  // If other rules cannot determine which is better, CUDA preference will be
+  // used again to determine which is better.
+  //
+  // TODO: Currently IdentifyCUDAPreference does not return correct values
+  // for functions called in global variable initializers due to missing
+  // correct context about device/host. Therefore we can only enforce this
+  // rule when there is a caller. We should enforce this rule for functions
+  // in global variable initializers once proper context is added.
+  if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
+    if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
+      auto Cand1Emittable = S.IdentifyCUDAPreference(Caller, Cand1.Function) >
+                            Sema::CFP_WrongSide;
+      auto Cand2Emittable = S.IdentifyCUDAPreference(Caller, Cand2.Function) >
+                            Sema::CFP_WrongSide;
+      if (Cand1Emittable && !Cand2Emittable)
+        return true;
+      if (!Cand1Emittable && Cand2Emittable)
+        return false;
+    }
+  }
+
   // C++ [over.match.best]p1:
   //
   //   -- if F is a static member function, ICS1(F) is defined such
@@ -9709,12 +9738,6 @@
       return Cmp == Comparison::Better;
   }
 
-  if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
-    FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
-    return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
-           S.IdentifyCUDAPreference(Caller, Cand2.Function);
-  }
-
   bool HasPS1 = Cand1.Function != nullptr &&
                 functionHasPassObjectSizeParams(Cand1.Function);
   bool HasPS2 = Cand2.Function != nullptr &&
@@ -9722,7 +9745,19 @@
   if (HasPS1 != HasPS2 && HasPS1)
     return true;
 
-  return isBetterMultiversionCandidate(Cand1, Cand2);
+  if (isBetterMultiversionCandidate(Cand1, Cand2))
+    return true;
+
+  // If other rules cannot determine which is better, CUDA preference is used
+  // to determine which is better.
+  if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
+    if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
+      return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
+             S.IdentifyCUDAPreference(Caller, Cand2.Function);
+    }
+  }
+
+  return false;
 }
 
 /// Determine whether two declarations are "equivalent" for the purposes of
@@ -9808,33 +9843,6 @@
   std::transform(begin(), end(), std::back_inserter(Candidates),
                  [](OverloadCandidate &Cand) { return &Cand; });
 
-  // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but
-  // are accepted by both clang and NVCC. However, during a particular
-  // compilation mode only one call variant is viable. We need to
-  // exclude non-viable overload candidates from consideration based
-  // only on their host/device attributes. Specifically, if one
-  // candidate call is WrongSide and the other is SameSide, we ignore
-  // the WrongSide candidate.
-  if (S.getLangOpts().CUDA) {
-    const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
-    bool ContainsSameSideCandidate =
-        llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
-          // Check viable function only.
-          return Cand->Viable && Cand->Function &&
-                 S.IdentifyCUDAPreference(Caller, Cand->Function) ==
-                     Sema::CFP_SameSide;
-        });
-    if (ContainsSameSideCandidate) {
-      auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
-        // Check viable function only to avoid unnecessary data copying/moving.
-        return Cand->Viable && Cand->Function &&
-               S.IdentifyCUDAPreference(Caller, Cand->Function) ==
-                   Sema::CFP_WrongSide;
-      };
-      llvm::erase_if(Candidates, IsWrongSideCandidate);
-    }
-  }
-
   // Find the best viable function.
   Best = end();
   for (auto *Cand : Candidates) {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77954: [CUDA][HIP] Fix host/device based overload resolution

Reply via email to