[clang] [CIR][CUDA][NFC] Add CIR-to-LLVM lowering checks for existing registration support (PR #195002)

David Rivera via cfe-commits cfe-commits at lists.llvm.org
Mon May 4 09:18:46 PDT 2026


https://github.com/RiverDave updated https://github.com/llvm/llvm-project/pull/195002

>From 2b812928a63ba29115b33e59f58c5117e8dfb3b3 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Thu, 30 Apr 2026 00:46:32 -0400
Subject: [PATCH 1/3] [CIR][CUDA][NFC] Add CIR-to-LLVM lowering checks for
 existing registration support

---
 clang/test/CIR/CodeGenCUDA/device-stub.cu | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu
index 0f9d4d68d67ff..4b3b920a63b84 100644
--- a/clang/test/CIR/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu
@@ -7,6 +7,10 @@
 // RUN:   -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.cir
 // RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
 
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fclangir -emit-llvm %s -x cuda \
+// RUN:   -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -x cuda \
 // RUN:   -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.ll
 // RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
@@ -105,6 +109,25 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 // OGCG: load ptr, ptr @__cuda_gpubin_handle
 // OGCG: call void @__cudaUnregisterFatBinary
 
+// LLVM: constant [25 x i8] c"GPU binary would be here.", section ".nv_fatbin", align 8
+// LLVM: @__cuda_fatbin_wrapper = {{.*}}constant { i32, i32, ptr, ptr } { i32 1180844977, i32 1, ptr @{{.*}}, ptr null }, section ".nvFatBinSegment"
+// LLVM: @__cuda_gpubin_handle = internal global ptr null
+// LLVM: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor
+
+// LLVM: define internal void @__cuda_module_dtor
+// LLVM: load ptr, ptr @__cuda_gpubin_handle
+// LLVM: call void @__cudaUnregisterFatBinary
+
+// LLVM: define internal void @__cuda_register_globals
+// LLVM: call{{.*}}@__cudaRegisterFunction(ptr %{{.*}}, ptr @{{.*}}kernelfunc{{.*}}, ptr @{{.*}}, ptr @{{.*}}, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
+// LLVM: ret void
+
+// LLVM: define internal void @__cuda_module_ctor
+// LLVM: call{{.*}}@__cudaRegisterFatBinary(ptr @__cuda_fatbin_wrapper)
+// LLVM: store ptr %{{.*}}, ptr @__cuda_gpubin_handle
+// LLVM-NEXT: call void @__cuda_register_globals
+// LLVM: call i32 @atexit(ptr @__cuda_module_dtor)
+
 // No GPU binary — no registration infrastructure at all.
 // NOGPUBIN-NOT: fatbin
 // NOGPUBIN-NOT: gpubin

>From 6bd20b11be25b4ebc6b79681ec49905f85dfc41f Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Mon, 4 May 2026 12:14:51 -0400
Subject: [PATCH 2/3] Add type to StringAttr wrapped by ConstArray attributes.

---
 clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 4 ++--
 clang/test/CIR/CodeGenCUDA/device-stub.cu            | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index bd3c8bc0aa8d1..208c440a8c062 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1958,7 +1958,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
                                         GlobalLinkageKind::PrivateLinkage);
   fatbinStr.setAlignment(8);
   fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get(
-      fatbinType, builder.getStringAttr(gpuBinary->getBuffer())));
+      fatbinType, StringAttr::get(gpuBinary->getBuffer(), fatbinType)));
   fatbinStr.setSection(fatbinConstName);
   fatbinStr.setPrivate();
 
@@ -2199,7 +2199,7 @@ void LoweringPreparePass::buildCUDARegisterGlobalFunctions(
 
     // We must make the string zero-terminated.
     tmpString.setInitialValueAttr(ConstArrayAttr::get(
-        strType, StringAttr::get(&getContext(), str + "\0")));
+        strType, StringAttr::get(str + "\0", strType)));
     tmpString.setPrivate();
     return tmpString;
   };
diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu
index 4b3b920a63b84..0e95c74324592 100644
--- a/clang/test/CIR/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu
@@ -47,7 +47,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 // CIR: cir.func private @__cudaRegisterFunction(!cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !s32i, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>) -> !s32i
 
 // Check the device-side name string for kernelfunc (mangled, null-terminated).
-// CIR: cir.global "private" constant cir_private @".str_Z10kernelfunciii" = #cir.const_array<"_Z10kernelfunciii", trailing_zeros> : !cir.array<!u8i x 18>
+// CIR: cir.global "private" constant cir_private @".str_Z10kernelfunciii" = #cir.const_array<"_Z10kernelfunciii" : !cir.array<!u8i x 18>, trailing_zeros> : !cir.array<!u8i x 18>
 
 // Check __cuda_register_globals body: one __cudaRegisterFunction call per kernel.
 // CIR: cir.func internal private @__cuda_register_globals(%arg0: !cir.ptr<!cir.ptr<!void>>
@@ -60,7 +60,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 // CIR-NEXT: cir.call @__cudaRegisterFunction(%{{.*}}, %[[HOST_FUNC]], %[[DEVICE_FUNC]], %[[DEVICE_FUNC]], %[[THREAD_LIMIT]], %[[NULL]], %[[NULL]], %[[NULL]], %[[NULL]], %[[NULL]])
 // CIR-NEXT: cir.return
 
-// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here."> : !cir.array<!u8i x 25> {alignment = 8 : i64, section = ".nv_fatbin"}
+// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here." : !cir.array<!u8i x 25>> : !cir.array<!u8i x 25> {alignment = 8 : i64, section = ".nv_fatbin"}
 
 // Check the fatbin wrapper struct: { magic, version, ptr to fatbin, null }, with section.
 // CIR: cir.global constant cir_private @__cuda_fatbin_wrapper = #cir.const_record<{

>From 984420a4ae7d4daa6c7c445644d4e946f766afa0 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Mon, 4 May 2026 12:18:21 -0400
Subject: [PATCH 3/3] fix fmt

---
 clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 208c440a8c062..dc56a2f21aced 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -2198,8 +2198,8 @@ void LoweringPreparePass::buildCUDARegisterGlobalFunctions(
         /*linkage=*/cir::GlobalLinkageKind::PrivateLinkage);
 
     // We must make the string zero-terminated.
-    tmpString.setInitialValueAttr(ConstArrayAttr::get(
-        strType, StringAttr::get(str + "\0", strType)));
+    tmpString.setInitialValueAttr(
+        ConstArrayAttr::get(strType, StringAttr::get(str + "\0", strType)));
     tmpString.setPrivate();
     return tmpString;
   };



More information about the cfe-commits mailing list