[clang] [CIR][CUDA][NFC] Add CIR-to-LLVM lowering checks for existing registration support (PR #195002)
David Rivera via cfe-commits
cfe-commits at lists.llvm.org
Mon May 4 09:18:46 PDT 2026
https://github.com/RiverDave updated https://github.com/llvm/llvm-project/pull/195002
>From 2b812928a63ba29115b33e59f58c5117e8dfb3b3 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Thu, 30 Apr 2026 00:46:32 -0400
Subject: [PATCH 1/3] [CIR][CUDA][NFC] Add CIR-to-LLVM lowering checks for
existing registration support
---
clang/test/CIR/CodeGenCUDA/device-stub.cu | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu
index 0f9d4d68d67ff..4b3b920a63b84 100644
--- a/clang/test/CIR/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu
@@ -7,6 +7,10 @@
// RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fclangir -emit-llvm %s -x cuda \
+// RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -x cuda \
// RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.ll
// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
@@ -105,6 +109,25 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// OGCG: load ptr, ptr @__cuda_gpubin_handle
// OGCG: call void @__cudaUnregisterFatBinary
+// LLVM: constant [25 x i8] c"GPU binary would be here.", section ".nv_fatbin", align 8
+// LLVM: @__cuda_fatbin_wrapper = {{.*}}constant { i32, i32, ptr, ptr } { i32 1180844977, i32 1, ptr @{{.*}}, ptr null }, section ".nvFatBinSegment"
+// LLVM: @__cuda_gpubin_handle = internal global ptr null
+// LLVM: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor
+
+// LLVM: define internal void @__cuda_module_dtor
+// LLVM: load ptr, ptr @__cuda_gpubin_handle
+// LLVM: call void @__cudaUnregisterFatBinary
+
+// LLVM: define internal void @__cuda_register_globals
+// LLVM: call{{.*}}@__cudaRegisterFunction(ptr %{{.*}}, ptr @{{.*}}kernelfunc{{.*}}, ptr @{{.*}}, ptr @{{.*}}, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
+// LLVM: ret void
+
+// LLVM: define internal void @__cuda_module_ctor
+// LLVM: call{{.*}}@__cudaRegisterFatBinary(ptr @__cuda_fatbin_wrapper)
+// LLVM: store ptr %{{.*}}, ptr @__cuda_gpubin_handle
+// LLVM-NEXT: call void @__cuda_register_globals
+// LLVM: call i32 @atexit(ptr @__cuda_module_dtor)
+
// No GPU binary — no registration infrastructure at all.
// NOGPUBIN-NOT: fatbin
// NOGPUBIN-NOT: gpubin
>From 6bd20b11be25b4ebc6b79681ec49905f85dfc41f Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Mon, 4 May 2026 12:14:51 -0400
Subject: [PATCH 2/3] Add type to StringAttr wrapped by ConstArray attributes.
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 4 ++--
clang/test/CIR/CodeGenCUDA/device-stub.cu | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index bd3c8bc0aa8d1..208c440a8c062 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1958,7 +1958,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
GlobalLinkageKind::PrivateLinkage);
fatbinStr.setAlignment(8);
fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get(
- fatbinType, builder.getStringAttr(gpuBinary->getBuffer())));
+ fatbinType, StringAttr::get(gpuBinary->getBuffer(), fatbinType)));
fatbinStr.setSection(fatbinConstName);
fatbinStr.setPrivate();
@@ -2199,7 +2199,7 @@ void LoweringPreparePass::buildCUDARegisterGlobalFunctions(
// We must make the string zero-terminated.
tmpString.setInitialValueAttr(ConstArrayAttr::get(
- strType, StringAttr::get(&getContext(), str + "\0")));
+ strType, StringAttr::get(str + "\0", strType)));
tmpString.setPrivate();
return tmpString;
};
diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu
index 4b3b920a63b84..0e95c74324592 100644
--- a/clang/test/CIR/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu
@@ -47,7 +47,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// CIR: cir.func private @__cudaRegisterFunction(!cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !s32i, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>) -> !s32i
// Check the device-side name string for kernelfunc (mangled, null-terminated).
-// CIR: cir.global "private" constant cir_private @".str_Z10kernelfunciii" = #cir.const_array<"_Z10kernelfunciii", trailing_zeros> : !cir.array<!u8i x 18>
+// CIR: cir.global "private" constant cir_private @".str_Z10kernelfunciii" = #cir.const_array<"_Z10kernelfunciii" : !cir.array<!u8i x 18>, trailing_zeros> : !cir.array<!u8i x 18>
// Check __cuda_register_globals body: one __cudaRegisterFunction call per kernel.
// CIR: cir.func internal private @__cuda_register_globals(%arg0: !cir.ptr<!cir.ptr<!void>>
@@ -60,7 +60,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// CIR-NEXT: cir.call @__cudaRegisterFunction(%{{.*}}, %[[HOST_FUNC]], %[[DEVICE_FUNC]], %[[DEVICE_FUNC]], %[[THREAD_LIMIT]], %[[NULL]], %[[NULL]], %[[NULL]], %[[NULL]], %[[NULL]])
// CIR-NEXT: cir.return
-// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here."> : !cir.array<!u8i x 25> {alignment = 8 : i64, section = ".nv_fatbin"}
+// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here." : !cir.array<!u8i x 25>> : !cir.array<!u8i x 25> {alignment = 8 : i64, section = ".nv_fatbin"}
// Check the fatbin wrapper struct: { magic, version, ptr to fatbin, null }, with section.
// CIR: cir.global constant cir_private @__cuda_fatbin_wrapper = #cir.const_record<{
>From 984420a4ae7d4daa6c7c445644d4e946f766afa0 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Mon, 4 May 2026 12:18:21 -0400
Subject: [PATCH 3/3] fix fmt
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 208c440a8c062..dc56a2f21aced 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -2198,8 +2198,8 @@ void LoweringPreparePass::buildCUDARegisterGlobalFunctions(
/*linkage=*/cir::GlobalLinkageKind::PrivateLinkage);
// We must make the string zero-terminated.
- tmpString.setInitialValueAttr(ConstArrayAttr::get(
- strType, StringAttr::get(str + "\0", strType)));
+ tmpString.setInitialValueAttr(
+ ConstArrayAttr::get(strType, StringAttr::get(str + "\0", strType)));
tmpString.setPrivate();
return tmpString;
};
More information about the cfe-commits
mailing list