[clang] Fixing Clang HIP inconsistent order for template functions (PR #101627)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Aug 2 00:32:31 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: None (Ritanya-B-Bharadwaj)
<details>
<summary>Changes</summary>
Fixing the issue - [#<!-- -->101458 ](https://github.com/llvm/llvm-project/issues/101458)
As mentioned in the issue, the order of the functions in the asm output from clang is non-deterministic. Here is the reproducer:
```
#include "hip/hip_runtime.h"
#define CHECK(cmd) \
{ \
hipError_t error = cmd; \
if (error != hipSuccess) { \
fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error, \
__FILE__, __LINE__); \
exit(EXIT_FAILURE); \
} \
}
template<int i>
__global__ void kernel() {
printf("Hello from kernel %d\n", i);
}
template __global__ void kernel<1>();
template __global__ void kernel<2>();
template __global__ void kernel<3>();
int main(int argc, char* argv[]) {
hipLaunchKernelGGL(kernel<1>, dim3(1), dim3(1), 0, 0);
CHECK(hipDeviceSynchronize());
hipLaunchKernelGGL(kernel<2>, dim3(1), dim3(1), 0, 0);
CHECK(hipDeviceSynchronize());
hipLaunchKernelGGL(kernel<3>, dim3(1), dim3(1), 0, 0);
CHECK(hipDeviceSynchronize());
}
```
```
for i in $(seq 5); do
clang -x hip --offload-arch=gfx908 -save-temps -fgpu-rdc -Ofast test_hip.cpp
md5sum test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
llvm-dis test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
cp test_hip-hip-amdgcn-amd-amdhsa-gfx908.ll test_hip-hip-amdgcn-amd-amdhsa-gfx908.$i.ll
done
75be8654e3a6c39e1e83f5c8b7dda364 test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
bde823a75c56e9af933be309d8e433f3 test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
e18bbc2e4768556c52864c716cba9c02 test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
e18bbc2e4768556c52864c716cba9c02 test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
75be8654e3a6c39e1e83f5c8b7dda364 test_hip-hip-amdgcn-amd-amdhsa-gfx908.bc
```
The order of functions referenced in `__clang_gpu_used_external` changes each time:
```
diff --git a/test_hip-hip-amdgcn-amd-amdhsa-gfx908.1.ll b/test_hip-hip-amdgcn-amd-amdhsa-gfx908.2.ll
index 91c0453..abd2b01 100644
--- a/test_hip-hip-amdgcn-amd-amdhsa-gfx908.1.ll
+++ b/test_hip-hip-amdgcn-amd-amdhsa-gfx908.2.ll
@@ -17,7 +17,7 @@ $_Z6kernelILi2EEvv = comdat any
$_Z6kernelILi3EEvv = comdat any
@.str = private unnamed_addr addrspace(4) constant [22 x i8] c"Hello from kernel %d\0A\00", align 1
-@<!-- -->__clang_gpu_used_external = internal addrspace(1) global [3 x ptr] [ptr @<!-- -->_Z6kernelILi1EEvv, ptr @<!-- -->_Z6kernelILi2EEvv, ptr @<!-- -->_Z6kernelILi3EEvv]
+@<!-- -->__clang_gpu_used_external = internal addrspace(1) global [3 x ptr] [ptr @<!-- -->_Z6kernelILi2EEvv, ptr @<!-- -->_Z6kernelILi3EEvv, ptr @<!-- -->_Z6kernelILi1EEvv]
@<!-- -->__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
@<!-- -->llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @<!-- -->__clang_gpu_used_external to ptr)], section "llvm.metadata"
```
The order is determined by the order the functions are stored in the `DenseSet` `CUDAExternalDeviceDeclODRUsedByHost `(which is non-deterministic). Hence changing `CUDAExternalDeviceDeclODRUsedByHost` from `llvm::DenseSet` to `llvm::SetVector` for a deterministic behaviour.
---
Full diff: https://github.com/llvm/llvm-project/pull/101627.diff
2 Files Affected:
- (modified) clang/include/clang/AST/ASTContext.h (+2-1)
- (added) clang/test/CodeGenHIP/hip-checksum.cpp (+27)
``````````diff
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index ec8b32533eca8..9368a35818a92 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -34,6 +34,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@@ -1193,7 +1194,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
/// Keep track of CUDA/HIP external kernels or device variables ODR-used by
/// host code.
- llvm::DenseSet<const ValueDecl *> CUDAExternalDeviceDeclODRUsedByHost;
+ llvm::SetVector<const ValueDecl *> CUDAExternalDeviceDeclODRUsedByHost;
/// Keep track of CUDA/HIP implicit host device functions used on device side
/// in device compilation.
diff --git a/clang/test/CodeGenHIP/hip-checksum.cpp b/clang/test/CodeGenHIP/hip-checksum.cpp
new file mode 100644
index 0000000000000..e56bd6f33a97f
--- /dev/null
+++ b/clang/test/CodeGenHIP/hip-checksum.cpp
@@ -0,0 +1,27 @@
+// RUN: x=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s -o - | md5sum | awk '{ print $1 }') && echo $x > %t.md5
+// RUN: y1=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s -o - | md5sum | awk '{ print $1 }') && echo $y1 >> %t.md5
+// RUN: y2=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s -o - | md5sum | awk '{ print $1 }') && echo $y2 >> %t.md5
+// RUN: y3=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s -o - | md5sum | awk '{ print $1 }') && echo $y3 >> %t.md5
+// RUN: y4=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s -o - | md5sum | awk '{ print $1 }') && echo $y4 >> %t.md5
+// RUN: y5=$(%clangxx -x hip --offload-arch=gfx908 -S -emit-llvm -fgpu-rdc %s -o - | md5sum | awk '{ print $1 }') && echo $y5 >> %t.md5
+// RUN: if grep -qv "$x" %t.md5; then echo "Test failed"; else echo "Test passed"; fi
+// CHECK: Test passed
+// CHECK-NOT: Test failed
+
+#include "hip/hip_runtime.h"
+
+template<int i>
+__attribute__((global)) void kernel() {
+ printf("Hello from kernel %d\n", i);
+}
+
+template __attribute__((global)) void kernel<1>();
+template __attribute__((global)) void kernel<2>();
+template __attribute__((global)) void kernel<3>();
+
+int main(int argc, char* argv[]) {
+ hipLaunchKernel(reinterpret_cast<void*>(kernel<1>), dim3(1), dim3(1),nullptr, 0, 0);
+ hipLaunchKernel(reinterpret_cast<void*>(kernel<2>), dim3(1), dim3(1),nullptr, 0, 0);
+ hipLaunchKernel(reinterpret_cast<void*>(kernel<3>), dim3(1), dim3(1),nullptr, 0, 0);
+}
+
``````````
</details>
https://github.com/llvm/llvm-project/pull/101627
More information about the cfe-commits
mailing list