[clang] [Clang][CUDA][HIP] Externalize static global texture var (PR #115819)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 11 21:56:31 PST 2024
https://github.com/guopsh-sugon created https://github.com/llvm/llvm-project/pull/115819
Externalize static global texture variable in CUDA/HIP.
Reason:
CUDA/HIP runtime needs reference the texture symbol in device elf when program is running. If a texture var has internal linkage type a runtime error will occur when running. To sovle this problem, CUDA nvcc externalizes static global texture var. But clang doesn't do it right now.
>From 2219475fcafe81117521ad5d08ef64b9366548d0 Mon Sep 17 00:00:00 2001
From: guopsh <guopsh at sugon.com>
Date: Tue, 12 Nov 2024 13:36:42 +0800
Subject: [PATCH] externalize static global texture var
---
clang/lib/CodeGen/CGCUDANV.cpp | 14 ++++++++
clang/lib/CodeGen/CodeGenModule.cpp | 15 +++++++++
.../CodeGenCUDA/static-global-texture-var.cu | 33 +++++++++++++++++++
3 files changed, 62 insertions(+)
create mode 100644 clang/test/CodeGenCUDA/static-global-texture-var.cu
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index ae14d74f2d9151..333b06a80e0cc7 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -306,6 +306,20 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
CGM.printPostfixForExternalizedDecl(Out, ND);
DeviceSideName = std::string(Out.str());
}
+
+ // Make unique name for static global tetxure variable for HIP/CUDA.
+ if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) {
+ if (VD->getType()->isCUDADeviceBuiltinTextureType() &&
+ VD->getStorageClass() == SC_Static && VD->hasGlobalStorage() &&
+ !VD->isStaticDataMember()) {
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ Out << DeviceSideName;
+ CGM.printPostfixForExternalizedDecl(Out, ND);
+ DeviceSideName = std::string(Out.str());
+ }
+ }
+
return DeviceSideName;
}
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index ba376f9ecfacde..859f707741e23e 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1952,6 +1952,15 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
CGM.getLangOpts().CUDAIsDevice)
CGM.printPostfixForExternalizedDecl(Out, ND);
+ // Make unique name for static global tetxure variable for HIP/CUDA.
+ if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) {
+ if (VD->getType()->isCUDADeviceBuiltinTextureType() &&
+ VD->getStorageClass() == SC_Static && VD->hasGlobalStorage() &&
+ !VD->isStaticDataMember()) {
+ CGM.printPostfixForExternalizedDecl(Out, ND);
+ }
+ }
+
return std::string(Out.str());
}
@@ -5608,6 +5617,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
// Set the llvm linkage type as appropriate.
llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(D);
+ // Make static global texture variable externally visible.
+ if (D->getType()->isCUDADeviceBuiltinTextureType() &&
+ D->getStorageClass() == SC_Static && !D->isStaticDataMember()) {
+ Linkage = llvm::GlobalValue::ExternalLinkage;
+ }
+
// CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
// the device. [...]"
// CUDA B.2.2 "The __constant__ qualifier, optionally used together with
diff --git a/clang/test/CodeGenCUDA/static-global-texture-var.cu b/clang/test/CodeGenCUDA/static-global-texture-var.cu
new file mode 100644
index 00000000000000..86b5fa8d68548b
--- /dev/null
+++ b/clang/test/CodeGenCUDA/static-global-texture-var.cu
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CUDA-DEVICE %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device -std=c++11 \
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=HIP-DEVICE %s
+
+struct textureReference {
+ int desc;
+};
+
+enum ReadMode {
+ ElementType = 0,
+ NormalizedFloat = 1
+};
+
+template <typename T, int dim = 1, enum ReadMode mode = ElementType>
+struct __attribute__((device_builtin_texture_type)) texture : public textureReference {
+};
+
+// Confirm static global texture is externally visible and has a unique name.
+static texture<float, 2, ElementType> texRef;
+//CUDA-DEVICE: @_ZL6texRef__static__{{.*}} = addrspace(1) externally_initialized global i64 undef, align 4
+//HIP-DEVICE: @_ZL6texRef.static.{{.*}} = addrspace(1) externally_initialized global %struct.texture undef, align 4
+
+struct v4f {
+ float x, y, z, w;
+};
+
+__attribute__((device)) v4f tex2d_ld(texture<float, 2, ElementType>, float, float) asm("llvm.nvvm.texRef.unified.2d.v4f32.f32");
+
+__attribute__((device)) float foo(float x, float y) {
+ return tex2d_ld(texRef, x, y).x;
+}
More information about the cfe-commits
mailing list