https://github.com/steffenlarsen created 
https://github.com/llvm/llvm-project/pull/201049

This commit changes SemaOverload to resolve an otherwise diagnosed ambiguity 
between addresses of template specializations of functions that are overloaded 
for both device and host. Similar to how it works for non-templated function 
overloads, these changes prioritizes the specializations that corresponds to 
the target of the owning function, i.e. if compiling for host, the address of 
the host specialization takes precedence over the device specialization and 
vice versa.

Fixes https://github.com/llvm/llvm-project/issues/199299

>From b88265cd48b8e363d539bb5c1862634c7fb3e2af Mon Sep 17 00:00:00 2001
From: Steffen Holst Larsen <[email protected]>
Date: Tue, 2 Jun 2026 02:39:11 -0500
Subject: [PATCH] [clang][CUDA] Avoid ambiguity in host/device template
 specializations

This commit changes SemaOverload to resolve an otherwise diagnosed
ambiguity between addresses of template specializations of functions
that are overloaded for both device and host. Similar to how it works
for non-templated function overloads, these changes prioritizes the
specializations that corresponds to the target of the owning function,
i.e. if compiling for host, the address of the host specialization takes
precedence over the device specialization and vice versa.

Fixes https://github.com/llvm/llvm-project/issues/199299

Signed-off-by: Steffen Holst Larsen <[email protected]>
---
 clang/lib/Sema/SemaOverload.cpp               |  6 ++--
 clang/test/SemaCUDA/addr-of-overloaded-fn.cu  |  2 ++
 .../addr-of-overloaded-template-fn.cu         | 28 +++++++++++++++++++
 3 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/SemaCUDA/addr-of-overloaded-template-fn.cu

diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index e11bbd7085798..ecda430c8424a 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -13742,6 +13742,9 @@ class AddressOfFunctionResolver {
       OvlExpr->copyTemplateArgumentsInto(OvlExplicitTemplateArgs);
 
     if (FindAllFunctionsThatMatchTargetTypeExactly()) {
+      if (Matches.size() > 1 && S.getLangOpts().CUDA)
+        EliminateSuboptimalCudaMatches();
+
       // C++ [over.over]p4:
       //   If more than one function is selected, [...]
       if (Matches.size() > 1 && !eliminiateSuboptimalOverloadCandidates()) {
@@ -13752,9 +13755,6 @@ class AddressOfFunctionResolver {
           EliminateAllExceptMostSpecializedTemplate();
       }
     }
-
-    if (S.getLangOpts().CUDA && Matches.size() > 1)
-      EliminateSuboptimalCudaMatches();
   }
 
   bool hasComplained() const { return HasComplained; }
diff --git a/clang/test/SemaCUDA/addr-of-overloaded-fn.cu 
b/clang/test/SemaCUDA/addr-of-overloaded-fn.cu
index 03c7f7c3bd5b7..d91ee8d80d006 100644
--- a/clang/test/SemaCUDA/addr-of-overloaded-fn.cu
+++ b/clang/test/SemaCUDA/addr-of-overloaded-fn.cu
@@ -2,6 +2,8 @@
 
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device 
-verify %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fsyntax-only -fcuda-is-device 
-verify %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -fcuda-is-device 
-verify %s
 
 #include "Inputs/cuda.h"
 
diff --git a/clang/test/SemaCUDA/addr-of-overloaded-template-fn.cu 
b/clang/test/SemaCUDA/addr-of-overloaded-template-fn.cu
new file mode 100644
index 0000000000000..16df30fc0f375
--- /dev/null
+++ b/clang/test/SemaCUDA/addr-of-overloaded-template-fn.cu
@@ -0,0 +1,28 @@
+// expected-no-diagnostics
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device 
-verify %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fsyntax-only -fcuda-is-device 
-verify %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -fcuda-is-device 
-verify %s
+
+// Tests that no ambiguities are diagnosed when resolving addresses of
+// specialized template functions with the same overloads on host and device.
+
+#include "Inputs/cuda.h"
+
+template <typename T> __host__ void overload(T) {}
+template <typename T> __device__ void overload(T) {}
+
+__host__ __device__ void test_hd() {
+  void (*x)(int) = overload<int>;
+  void (*y)(float) = overload<float>;
+}
+
+__host__ void test_host() {
+  void (*x)(int) = overload<int>;
+  void (*y)(float) = overload<float>;
+}
+__device__ void test_device() {
+  void (*x)(int) = overload<int>;
+  void (*y)(float) = overload<float>;
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to