[clang] [clang] Fixing Clang HIP inconsistent order for template functions (PR #101627)

via cfe-commits cfe-commits at lists.llvm.org
Tue Aug 13 04:46:53 PDT 2024


https://github.com/Ritanya-B-Bharadwaj updated https://github.com/llvm/llvm-project/pull/101627

>From 5e5fa51a772ceae4f7a2d91965061302b4183864 Mon Sep 17 00:00:00 2001
From: Ritanya B Bharadwaj <ritanya.b.bharadwaj at gmail.com>
Date: Fri, 2 Aug 2024 02:14:03 -0500
Subject: [PATCH] Fixing Clang HIP inconsistent order for template functions

---
 clang/include/clang/AST/ASTContext.h   |  3 ++-
 clang/test/CodeGenHIP/hip-checksum.cpp | 27 ++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGenHIP/hip-checksum.cpp

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index ec8b32533eca89..9368a35818a926 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -34,6 +34,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
@@ -1193,7 +1194,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
 
   /// Keep track of CUDA/HIP external kernels or device variables ODR-used by
   /// host code.
-  llvm::DenseSet<const ValueDecl *> CUDAExternalDeviceDeclODRUsedByHost;
+  llvm::SetVector<const ValueDecl *> CUDAExternalDeviceDeclODRUsedByHost;
 
   /// Keep track of CUDA/HIP implicit host device functions used on device side
   /// in device compilation.
diff --git a/clang/test/CodeGenHIP/hip-checksum.cpp b/clang/test/CodeGenHIP/hip-checksum.cpp
new file mode 100644
index 00000000000000..a6db6ded4aab7d
--- /dev/null
+++ b/clang/test/CodeGenHIP/hip-checksum.cpp
@@ -0,0 +1,27 @@
+// RUN: x=$(%clang_cc1 -x hip -triple amdgcn -target-cpu gfx908 -emit-llvm -fcuda-is-device %s -o - | md5sum | awk '{ print $1 }') && echo $x
+// RUN: y1=$(%clang_cc1 -x hip -triple amdgcn -target-cpu gfx908 -emit-llvm -fcuda-is-device %s -o - | md5sum | awk '{ print $1 }') && echo $y1 >> %t.md5
+// RUN: y2=$(%clang_cc1 -x hip -triple amdgcn -target-cpu gfx908 -emit-llvm -fcuda-is-device %s -o - | md5sum | awk '{ print $1 }') && echo $y2 >> %t.md5
+// RUN: y3=$(%clang_cc1 -x hip -triple amdgcn -target-cpu gfx908 -emit-llvm -fcuda-is-device %s -o - | md5sum | awk '{ print $1 }') && echo $y3 >> %t.md5
+// RUN: y4=$(%clang_cc1 -x hip -triple amdgcn -target-cpu gfx908 -emit-llvm -fcuda-is-device %s -o - | md5sum | awk '{ print $1 }') && echo $y4 >> %t.md5
+// RUN: y5=$(%clang_cc1 -x hip -triple amdgcn -target-cpu gfx908 -emit-llvm -fcuda-is-device %s -o - | md5sum | awk '{ print $1 }') && echo $y5 >> %t.md5
+// RUN: if grep -qv "$x" %t.md5; then echo "Test failed"; else echo "Test passed"; fi
+// CHECK: Test passed
+// CHECK-NOT: Test failed
+
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+template<int i>
+__attribute__((global)) void kernel() {
+  printf("Hello from kernel %d\n", i);
+}
+
+template __attribute__((global)) void kernel<1>();
+template __attribute__((global)) void kernel<2>();
+template __attribute__((global)) void kernel<3>();
+
+int main(int argc, char* argv[]) {
+    hipLaunchKernel(reinterpret_cast<void*>(kernel<1>), dim3(1), dim3(1),nullptr, 0, 0);
+    hipLaunchKernel(reinterpret_cast<void*>(kernel<2>), dim3(1), dim3(1),nullptr, 0, 0);
+    hipLaunchKernel(reinterpret_cast<void*>(kernel<3>), dim3(1), dim3(1),nullptr, 0, 0);
+}
+



More information about the cfe-commits mailing list