llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: None (Ritanya-B-Bharadwaj)

<details>
<summary>Changes</summary>

Fixing the issue - [#<!-- -->101458 
](https://github.com/llvm/llvm-project/issues/101458)
As mentioned in the issue, the order of the functions in the asm output from 
clang is non-deterministic. Here is the reproducer:

```
#include "hip/hip_runtime.h"
 
#define CHECK(cmd)                                                              
                  \
   {                                                                            
                 \
       hipError_t error = cmd;                                                  
                 \
       if (error != hipSuccess) {                                               
                 \
           fprintf(stderr, "error: '%s'(%d) at %s:%d\n", 
hipGetErrorString(error), error,        \
                   __FILE__, __LINE__);                                         
                 \
           exit(EXIT_FAILURE);                                                  
                 \
       }                                                                        
                 \
   }
 
template&lt;int i&gt;
__global__ void kernel() {
 printf("Hello from kernel %d\n", i);
}
 
template __global__ void kernel&lt;1&gt;();
template __global__ void kernel&lt;2&gt;();
template __global__ void kernel&lt;3&gt;();
 
int main(int argc, char* argv[]) {
   hipLaunchKernelGGL(kernel&lt;1&gt;, dim3(1), dim3(1), 0, 0);
   CHECK(hipDeviceSynchronize());
   hipLaunchKernelGGL(kernel&lt;2&gt;, dim3(1), dim3(1), 0, 0);
   CHECK(hipDeviceSynchronize());
   hipLaunchKernelGGL(kernel&lt;3&gt;, dim3(1), dim3(1), 0, 0);
   CHECK(hipDeviceSynchronize());
}
```
```
for i in $(seq 5); do
 clang -x hip --offload-arch=gfx908 -save-temps -fgpu-rdc -Ofast test_hip.cpp
 md5sum test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
 llvm-dis test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
 cp test_hip-hip-amdgcn-amd-amdhsa-gfx908.ll 
test_hip-hip-amdgcn-amd-amdhsa-gfx908.$i.ll
done
75be8654e3a6c39e1e83f5c8b7dda364 test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
bde823a75c56e9af933be309d8e433f3 test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
e18bbc2e4768556c52864c716cba9c02 test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
e18bbc2e4768556c52864c716cba9c02 test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
75be8654e3a6c39e1e83f5c8b7dda364 test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
```

The order of functions referenced in `__clang_gpu_used_external` changes each 
time:
 
```
diff --git a/test_hip-hip-amdgcn-amd-amdhsa-gfx908.1.ll 
b/test_hip-hip-amdgcn-amd-amdhsa-gfx908.2.ll
index 91c0453..abd2b01 100644
--- a/test_hip-hip-amdgcn-amd-amdhsa-gfx908.1.ll
+++ b/test_hip-hip-amdgcn-amd-amdhsa-gfx908.2.ll
@@ -17,7 +17,7 @@ $_Z6kernelILi2EEvv = comdat any
 $_Z6kernelILi3EEvv = comdat any
 
 @.str = private unnamed_addr addrspace(4) constant [22 x i8] c"Hello from 
kernel %d\0A\00", align 1
-@<!-- -->__clang_gpu_used_external = internal addrspace(1) global [3 x ptr] 
[ptr @<!-- -->_Z6kernelILi1EEvv, ptr @<!-- -->_Z6kernelILi2EEvv, ptr @<!-- 
-->_Z6kernelILi3EEvv]
+@<!-- -->__clang_gpu_used_external = internal addrspace(1) global [3 x ptr] 
[ptr @<!-- -->_Z6kernelILi2EEvv, ptr @<!-- -->_Z6kernelILi3EEvv, ptr @<!-- 
-->_Z6kernelILi1EEvv]
 @<!-- -->__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) 
constant i32 500
 @<!-- -->llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr 
addrspacecast (ptr addrspace(1) @<!-- -->__clang_gpu_used_external to ptr)], 
section "llvm.metadata"
```
 
The order is determined by the order the functions are stored in the `DenseSet` 
`CUDAExternalDeviceDeclODRUsedByHost `(which is non-deterministic). Hence 
changing `CUDAExternalDeviceDeclODRUsedByHost` from `llvm::DenseSet` to 
`llvm::SetVector` for a deterministic behaviour.

---
Full diff: https://github.com/llvm/llvm-project/pull/101627.diff


2 Files Affected:

- (modified) clang/include/clang/AST/ASTContext.h (+2-1) 
- (added) clang/test/CodeGenHIP/hip-checksum.cpp (+27) 


``````````diff
diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index ec8b32533eca8..9368a35818a92 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -34,6 +34,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
@@ -1193,7 +1194,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
 
   /// Keep track of CUDA/HIP external kernels or device variables ODR-used by
   /// host code.
-  llvm::DenseSet<const ValueDecl *> CUDAExternalDeviceDeclODRUsedByHost;
+  llvm::SetVector<const ValueDecl *> CUDAExternalDeviceDeclODRUsedByHost;
 
   /// Keep track of CUDA/HIP implicit host device functions used on device side
   /// in device compilation.
diff --git a/clang/test/CodeGenHIP/hip-checksum.cpp 
b/clang/test/CodeGenHIP/hip-checksum.cpp
new file mode 100644
index 0000000000000..e56bd6f33a97f
--- /dev/null
+++ b/clang/test/CodeGenHIP/hip-checksum.cpp
@@ -0,0 +1,27 @@
+// RUN: x=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s 
-o - | md5sum | awk '{ print $1 }') && echo $x > %t.md5
+// RUN: y1=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s 
-o - | md5sum | awk '{ print $1 }') && echo $y1 >> %t.md5
+// RUN: y2=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s 
-o - | md5sum | awk '{ print $1 }') && echo $y2 >> %t.md5
+// RUN: y3=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s 
-o - | md5sum | awk '{ print $1 }') && echo $y3 >> %t.md5
+// RUN: y4=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s 
-o - | md5sum | awk '{ print $1 }') && echo $y4 >> %t.md5
+// RUN: y5=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s 
-o - | md5sum | awk '{ print $1 }') && echo $y5 >> %t.md5
+// RUN: if grep -qv "$x" %t.md5; then echo "Test failed"; else echo "Test 
passed"; fi
+// CHECK: Test passed
+// CHECK-NOT: Test failed
+
+#include "hip/hip_runtime.h"
+
+template<int i>
+__attribute__((global)) void kernel() {
+  printf("Hello from kernel %d\n", i);
+}
+
+template __attribute__((global)) void kernel<1>();
+template __attribute__((global)) void kernel<2>();
+template __attribute__((global)) void kernel<3>();
+
+int main(int argc, char* argv[]) {
+    hipLaunchKernel(reinterpret_cast<void*>(kernel<1>), dim3(1), 
dim3(1),nullptr, 0, 0);
+    hipLaunchKernel(reinterpret_cast<void*>(kernel<2>), dim3(1), 
dim3(1),nullptr, 0, 0);
+    hipLaunchKernel(reinterpret_cast<void*>(kernel<3>), dim3(1), 
dim3(1),nullptr, 0, 0);
+}
+

``````````

</details>


https://github.com/llvm/llvm-project/pull/101627
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to