[PATCH] D154822: Support '-fgpu-default-stream=per-thread' for NVIDIA CUDA
boxu.zhang via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 10 19:33:41 PDT 2023
boxu-zhang updated this revision to Diff 538894.
boxu-zhang added a comment.
Append 'CUDA_API_PER_THREAD_DEFAULT_STREAM' as a defined macro
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D154822/new/
https://reviews.llvm.org/D154822
Files:
clang/lib/CodeGen/CGCUDANV.cpp
clang/lib/Frontend/InitPreprocessor.cpp
clang/test/CodeGenCUDA/Inputs/cuda.h
clang/test/CodeGenCUDA/kernel-call.cu
Index: clang/test/CodeGenCUDA/kernel-call.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-call.cu
+++ clang/test/CodeGenCUDA/kernel-call.cu
@@ -2,6 +2,9 @@
// RUN: | FileCheck %s --check-prefixes=CUDA-OLD,CHECK
// RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \
// RUN: | FileCheck %s --check-prefixes=CUDA-NEW,CHECK
+// RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \
+// RUN: -fgpu-default-stream=per-thread -DCUDA_API_PER_THREAD_DEFAULT_STREAM \
+// RUN: | FileCheck %s --check-prefixes=CUDA-PTH,CHECK
// RUN: %clang_cc1 -x hip -emit-llvm %s -o - \
// RUN: | FileCheck %s --check-prefixes=HIP-OLD,CHECK
// RUN: %clang_cc1 -fhip-new-launch-api -x hip -emit-llvm %s -o - \
@@ -25,6 +28,7 @@
// CUDA-OLD: call{{.*}}cudaLaunch
// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration
// CUDA-NEW: call{{.*}}cudaLaunchKernel
+// CUDA-PTH: call{{.*}}cudaLaunchKernel_ptsz
__global__ void g1(int x) {}
// CHECK-LABEL: define{{.*}}main
Index: clang/test/CodeGenCUDA/Inputs/cuda.h
===================================================================
--- clang/test/CodeGenCUDA/Inputs/cuda.h
+++ clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -58,6 +58,10 @@
extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
dim3 blockDim, void **args,
size_t sharedMem, cudaStream_t stream);
+extern "C" cudaError_t cudaLaunchKernel_ptsz(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem, cudaStream_t stream);
+
#endif
extern "C" __device__ int printf(const char*, ...);
Index: clang/lib/Frontend/InitPreprocessor.cpp
===================================================================
--- clang/lib/Frontend/InitPreprocessor.cpp
+++ clang/lib/Frontend/InitPreprocessor.cpp
@@ -574,6 +574,9 @@
Builder.defineMacro("__CLANG_RDC__");
if (!LangOpts.HIP)
Builder.defineMacro("__CUDA__");
+ if (LangOpts.GPUDefaultStream ==
+ LangOptions::GPUDefaultStreamKind::PerThread)
+ Builder.defineMacro("CUDA_API_PER_THREAD_DEFAULT_STREAM");
}
if (LangOpts.HIP) {
Builder.defineMacro("__HIP__");
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -358,9 +358,13 @@
TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
std::string KernelLaunchAPI = "LaunchKernel";
- if (CGF.getLangOpts().HIP && CGF.getLangOpts().GPUDefaultStream ==
- LangOptions::GPUDefaultStreamKind::PerThread)
- KernelLaunchAPI = KernelLaunchAPI + "_spt";
+ if (CGF.getLangOpts().GPUDefaultStream ==
+ LangOptions::GPUDefaultStreamKind::PerThread) {
+ if (CGF.getLangOpts().HIP)
+ KernelLaunchAPI = KernelLaunchAPI + "_spt";
+ else if (CGF.getLangOpts().CUDA)
+ KernelLaunchAPI = KernelLaunchAPI + "_ptsz";
+ }
auto LaunchKernelName = addPrefixToName(KernelLaunchAPI);
IdentifierInfo &cudaLaunchKernelII =
CGM.getContext().Idents.get(LaunchKernelName);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D154822.538894.patch
Type: text/x-patch
Size: 3334 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20230711/2a3108f8/attachment.bin>
More information about the cfe-commits
mailing list