[clang] [CIR][CUDA] Handle CUDA module constructor and destructor emission. (PR #188673)
David Rivera via cfe-commits
cfe-commits at lists.llvm.org
Mon Apr 13 20:55:24 PDT 2026
https://github.com/RiverDave updated https://github.com/llvm/llvm-project/pull/188673
>From 325065d3ee9653399c9bdb5a028b3245e77b03d2 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Wed, 18 Mar 2026 21:10:59 -0400
Subject: [PATCH 01/15] [CIR][CUDA] Global emission for fatbin symbols
---
.../clang/CIR/Dialect/IR/CIRCUDAAttrs.td | 17 ++
.../clang/CIR/Dialect/IR/CIRDialect.td | 1 +
clang/include/clang/CIR/MissingFeatures.h | 2 +
clang/lib/CIR/CodeGen/CIRGenModule.cpp | 10 ++
.../Dialect/Transforms/LoweringPrepare.cpp | 154 ++++++++++++++++++
clang/test/CIR/CodeGenCUDA/device-stub.cu | 50 ++++++
6 files changed, 234 insertions(+)
create mode 100644 clang/test/CIR/CodeGenCUDA/device-stub.cu
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
index 5932db8323196..a5374f4ffd79b 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
@@ -50,5 +50,22 @@ def CUDAExternallyInitializedAttr : CIR_Attr<"CUDAExternallyInitialized",
}];
let canHaveIllegalCXXABIType = 0;
}
+def CIR_CUDABinaryHandleAttr : CIR_Attr<
+ "CUDABinaryHandle", "cu.binary_handle"
+> {
+ let summary = "Fat binary handle for device code.";
+ let description =
+ [{
+ This attribute is attached to the ModuleOp and records the binary file
+ name passed to host.
+
+ CUDA first compiles device-side code into a fat binary file. The file
+ name is then passed into host-side code, which is used to create a handle
+ and then generate various registration functions.
+ }];
+
+ let parameters = (ins "std::string":$name);
+ let assemblyFormat = "`<` $name `>`";
+}
#endif // CLANG_CIR_DIALECT_IR_CIRCUDAATTRS_TD
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
index 13095464a3fd2..b57f874c34393 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
@@ -76,6 +76,7 @@ def CIR_Dialect : Dialect {
static llvm::StringRef getResAttrsAttrName() { return "res_attrs"; }
static llvm::StringRef getArgAttrsAttrName() { return "arg_attrs"; }
static llvm::StringRef getRecordLayoutsAttrName() { return "cir.record_layouts"; }
+ static llvm::StringRef getCUDABinaryHandleAttrName() { return "cir.cu.binary_handle"; }
static llvm::StringRef getAMDGPUCodeObjectVersionAttrName() { return "cir.amdhsa_code_object_version"; }
static llvm::StringRef getAMDGPUPrintfKindAttrName() { return "cir.amdgpu_printf_kind"; }
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index b9a6b83daa13c..ac02433fb504a 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -244,6 +244,8 @@ struct MissingFeatures {
static bool ctorConstLvalueToRvalueConversion() { return false; }
static bool ctorMemcpyizer() { return false; }
static bool cudaSupport() { return false; }
+ static bool hipModuleCtor() { return false; }
+ static bool globalRegistration() { return false; }
static bool dataLayoutTypeIsSized() { return false; }
static bool dataLayoutTypeAllocSize() { return false; }
static bool dataLayoutTypeStoreSize() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 46635ea1e2482..7fdd8e11688f1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -162,6 +162,16 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext,
/*line=*/0,
/*column=*/0));
}
+
+ // Set CUDA GPU binary handle.
+ if (langOpts.CUDA) {
+ std::string cudaBinaryName = codeGenOpts.CudaGpuBinaryFileName;
+ if (!cudaBinaryName.empty()) {
+ theModule->setAttr(
+ cir::CIRDialect::getCUDABinaryHandleAttrName(),
+ cir::CUDABinaryHandleAttr::get(&mlirContext, cudaBinaryName));
+ }
+ }
}
CIRGenModule::~CIRGenModule() = default;
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 8a9bfbf81d453..f82f45646309a 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -10,6 +10,7 @@
#include "mlir/IR/Attributes.h"
#include "mlir/IR/IRMapping.h"
#include "clang/AST/ASTContext.h"
+#include "clang/AST/Attrs.inc"
#include "clang/AST/Mangle.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/Specifiers.h"
@@ -108,6 +109,17 @@ struct LoweringPreparePass
cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage,
cir::VisibilityKind visibility = cir::VisibilityKind::Default);
+
+ /// ------------
+ /// CUDA registration related
+ /// ------------
+
+ llvm::StringMap<FuncOp> cudaKernelMap;
+
+ /// Build the CUDA module constructor that registers the fat binary
+ /// with the CUDA runtime.
+ void buildCUDAModuleCtor();
+
/// Handle static local variable initialization with guard variables.
void handleStaticLocal(cir::GlobalOp globalOp, cir::GetGlobalOp getGlobalOp);
@@ -1712,11 +1724,150 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) {
globalCtorList.emplace_back(fnOp.getName(), globalCtor.value());
else if (auto globalDtor = fnOp.getGlobalDtorPriority())
globalDtorList.emplace_back(fnOp.getName(), globalDtor.value());
+
+ if (auto attr = fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) {
+ auto kernelNameAttr = dyn_cast<CUDAKernelNameAttr>(attr);
+ std::string kernelName = kernelNameAttr.getKernelName();
+ cudaKernelMap[kernelName] = fnOp;
+ }
} else if (auto threeWayCmp = dyn_cast<cir::CmpThreeWayOp>(op)) {
lowerThreeWayCmpOp(threeWayCmp);
}
}
+static std::string getCUDAPrefix(clang::ASTContext *astCtx) {
+ if (astCtx->getLangOpts().HIP)
+ return "hip";
+ return "cuda";
+}
+
+static std::string addUnderscoredPrefix(llvm::StringRef prefix,
+ llvm::StringRef name) {
+ return ("__" + prefix + name).str();
+}
+
+/// Creates a global constructor function for the module:
+///
+/// For CUDA:
+/// \code
+/// void __cuda_module_ctor() {
+/// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
+/// __cuda_register_globals(Handle);
+/// }
+/// \endcode
+///
+/// For HIP:
+/// \code
+/// void __hip_module_ctor() {
+/// if (__hip_gpubin_handle == 0) {
+/// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob);
+/// __hip_register_globals(__hip_gpubin_handle);
+/// }
+/// }
+/// \endcode
+void LoweringPreparePass::buildCUDAModuleCtor() {
+ bool isHIP = astCtx->getLangOpts().HIP;
+
+ if (isHIP)
+ assert(!cir::MissingFeatures::hipModuleCtor());
+ if (astCtx->getLangOpts().GPURelocatableDeviceCode)
+ llvm_unreachable("GPU RDC NYI");
+
+ // For CUDA without -fgpu-rdc, it's safe to stop generating ctor
+ // if there's nothing to register.
+ if (cudaKernelMap.empty())
+ return;
+
+ // There's no device-side binary, so no need to proceed for CUDA.
+ // HIP has to create an external symbol in this case, which is NYI.
+ mlir::Attribute cudaBinaryHandleAttr =
+ mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName());
+ if (!cudaBinaryHandleAttr) {
+ if (astCtx->getLangOpts().HIP)
+ assert(!cir::MissingFeatures::hipModuleCtor());
+ return;
+ }
+
+ std::string cudaGPUBinaryName =
+ mlir::cast<CUDABinaryHandleAttr>(cudaBinaryHandleAttr).getName();
+
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> gpuBinaryOrErr =
+ llvm::MemoryBuffer::getFile(cudaGPUBinaryName);
+ if (std::error_code ec = gpuBinaryOrErr.getError()) {
+ mlirModule->emitError("cannot open GPU binary file: " + cudaGPUBinaryName +
+ ": " + ec.message());
+ return;
+ }
+ std::unique_ptr<llvm::MemoryBuffer> gpuBinary =
+ std::move(gpuBinaryOrErr.get());
+
+ // Set up common types and builder.
+ std::string cudaPrefix = getCUDAPrefix(astCtx);
+ mlir::Location loc = mlirModule->getLoc();
+ CIRBaseBuilderTy builder(getContext());
+ builder.setInsertionPointToStart(mlirModule.getBody());
+
+ auto voidTy = builder.getVoidTy();
+ auto voidPtrTy = builder.getVoidPtrTy();
+ auto voidPtrPtrTy = builder.getPointerTo(voidPtrTy);
+ auto intTy = builder.getSIntNTy(32);
+ auto charTy = cir::IntType::get(&getContext(), astCtx->getCharWidth(),
+ /*isSigned=*/false);
+
+ // --- Create fatbin globals ---
+
+ // Create the fatbin string constant with GPU binary contents.
+ auto fatbinType =
+ ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size());
+ std::string fatbinStrName = addUnderscoredPrefix(cudaPrefix, "_fatbin_str");
+ GlobalOp fatbinStr =
+ GlobalOp::create(builder, loc, fatbinStrName, fatbinType,
+ /*isConstant=*/true, GlobalLinkageKind::PrivateLinkage);
+ fatbinStr.setAlignment(8);
+ fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get(
+ fatbinType, builder.getStringAttr(gpuBinary->getBuffer())));
+ assert(!cir::MissingFeatures::opGlobalSection());
+ fatbinStr.setPrivate();
+
+ // Create the fatbin wrapper struct:
+ // struct { int magic; int version; void *fatbin; void *unused; };
+ auto fatbinWrapperType = RecordType::get(
+ &getContext(), {intTy, intTy, voidPtrTy, voidPtrTy},
+ /*packed=*/false, /*padded=*/false, RecordType::RecordKind::Struct);
+ std::string fatbinWrapperName =
+ addUnderscoredPrefix(cudaPrefix, "_fatbin_wrapper");
+ GlobalOp fatbinWrapper =
+ GlobalOp::create(builder, loc, fatbinWrapperName, fatbinWrapperType,
+ /*isConstant=*/true, GlobalLinkageKind::PrivateLinkage);
+
+ constexpr unsigned cudaFatMagic = 0x466243b1;
+ constexpr unsigned hipFatMagic = 0x48495046;
+ unsigned fatMagic = isHIP ? hipFatMagic : cudaFatMagic;
+
+ auto magicInit = IntAttr::get(intTy, fatMagic);
+ auto versionInit = IntAttr::get(intTy, 1);
+ auto fatbinStrSymbol =
+ mlir::FlatSymbolRefAttr::get(fatbinStr.getSymNameAttr());
+ auto fatbinInit = GlobalViewAttr::get(voidPtrTy, fatbinStrSymbol);
+ auto unusedInit = builder.getConstNullPtrAttr(voidPtrTy);
+ fatbinWrapper.setInitialValueAttr(cir::ConstRecordAttr::get(
+ fatbinWrapperType,
+ mlir::ArrayAttr::get(&getContext(),
+ {magicInit, versionInit, fatbinInit, unusedInit})));
+
+ // Create the GPU binary handle global variable.
+ std::string gpubinHandleName =
+ addUnderscoredPrefix(cudaPrefix, "_gpubin_handle");
+ GlobalOp gpuBinHandle = GlobalOp::create(
+ builder, loc, gpubinHandleName, voidPtrPtrTy,
+ /*isConstant=*/false, GlobalLinkageKind::InternalLinkage);
+ gpuBinHandle.setInitialValueAttr(builder.getConstNullPtrAttr(voidPtrPtrTy));
+ gpuBinHandle.setPrivate();
+
+ // TODO: ctor/dtor/register_globals
+ assert(!cir::MissingFeatures::globalRegistration());
+}
+
void LoweringPreparePass::runOnOperation() {
mlir::Operation *op = getOperation();
if (isa<::mlir::ModuleOp>(op))
@@ -1737,6 +1888,9 @@ void LoweringPreparePass::runOnOperation() {
runOnOp(o);
buildCXXGlobalInitFunc();
+ if (astCtx->getLangOpts().CUDA && !astCtx->getLangOpts().CUDAIsDevice)
+ buildCUDAModuleCtor();
+
buildGlobalCtorDtorList();
}
diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu
new file mode 100644
index 0000000000000..59bfd5b31d522
--- /dev/null
+++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu
@@ -0,0 +1,50 @@
+// Based on clang/test/CodeGenCUDA/device-stub.cu (incubator).
+
+// Create a dummy GPU binary file for registration.
+// RUN: echo -n "GPU binary would be here." > %t
+
+// CIR output — check fatbin globals are created correctly.
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-cir %s -x cuda \
+// RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+
+// OGCG output — check LLVM IR parity with original codegen.
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -x cuda \
+// RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+// No GPU binary — nothing should be generated.
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-cir %s -x cuda \
+// RUN: -target-sdk-version=12.3 -o %t.nogpu.cir
+// RUN: FileCheck --input-file=%t.nogpu.cir %s --check-prefix=NOGPUBIN
+
+#include "Inputs/cuda.h"
+
+__global__ void kernelfunc(int i, int j, int k) {}
+
+void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
+
+// Check the fatbin string constant with GPU binary contents.
+// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here."> : !cir.array<!u8i x 25> {alignment = 8 : i64}
+
+// Check the fatbin wrapper struct: { magic, version, ptr to fatbin, null }.
+// CIR: cir.global constant cir_private @__cuda_fatbin_wrapper = #cir.const_record<{
+// CIR-SAME: #cir.int<1180844977> : !s32i,
+// CIR-SAME: #cir.int<1> : !s32i,
+// CIR-SAME: #cir.global_view<@__cuda_fatbin_str> : !cir.ptr<!void>,
+// CIR-SAME: #cir.ptr<null> : !cir.ptr<!void>
+// CIR-SAME: }>
+
+// Check the GPU binary handle global.
+// CIR: cir.global "private" internal @__cuda_gpubin_handle = #cir.ptr<null> : !cir.ptr<!cir.ptr<!void>>
+
+// OGCG: constant [25 x i8] c"GPU binary would be here.", section ".nv_fatbin", align 8
+// OGCG: @__cuda_fatbin_wrapper = internal constant { i32, i32, ptr, ptr } { i32 1180844977, i32 1, ptr @{{.*}}, ptr null }, section ".nvFatBinSegment"
+// OGCG: @__cuda_gpubin_handle = internal global ptr null
+
+// No GPU binary — no registration infrastructure at all.
+// NOGPUBIN-NOT: fatbin
+// NOGPUBIN-NOT: gpubin
+// NOGPUBIN-NOT: __cuda_register_globals
+// NOGPUBIN-NOT: __cuda_module_ctor
+// NOGPUBIN-NOT: __cuda_module_dtor
>From a133039e4e0528e08e66f805e9cf1075448201cf Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Mon, 23 Mar 2026 20:19:09 -0400
Subject: [PATCH 02/15] fix tests and remove unnecessary comments.
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 1 -
clang/test/CIR/CodeGenCUDA/device-stub.cu | 3 ---
2 files changed, 4 deletions(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index f82f45646309a..82f7f184f0747 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -109,7 +109,6 @@ struct LoweringPreparePass
cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage,
cir::VisibilityKind visibility = cir::VisibilityKind::Default);
-
/// ------------
/// CUDA registration related
/// ------------
diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu
index 59bfd5b31d522..b3e8baa17c7a4 100644
--- a/clang/test/CIR/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu
@@ -3,17 +3,14 @@
// Create a dummy GPU binary file for registration.
// RUN: echo -n "GPU binary would be here." > %t
-// CIR output — check fatbin globals are created correctly.
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-cir %s -x cuda \
// RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
-// OGCG output — check LLVM IR parity with original codegen.
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -x cuda \
// RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.ll
// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
-// No GPU binary — nothing should be generated.
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-cir %s -x cuda \
// RUN: -target-sdk-version=12.3 -o %t.nogpu.cir
// RUN: FileCheck --input-file=%t.nogpu.cir %s --check-prefix=NOGPUBIN
>From f6a89ee2751e5a4320a4f1ce103d53c723dfd235 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Mon, 23 Mar 2026 20:46:17 -0400
Subject: [PATCH 03/15] fix global builder ordering
---
.../CIR/Dialect/Transforms/LoweringPrepare.cpp | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 82f7f184f0747..5f23414d34241 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1819,9 +1819,9 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
auto fatbinType =
ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size());
std::string fatbinStrName = addUnderscoredPrefix(cudaPrefix, "_fatbin_str");
- GlobalOp fatbinStr =
- GlobalOp::create(builder, loc, fatbinStrName, fatbinType,
- /*isConstant=*/true, GlobalLinkageKind::PrivateLinkage);
+ GlobalOp fatbinStr = GlobalOp::create(builder, loc, fatbinStrName, fatbinType,
+ /*isConstant=*/true, {},
+ GlobalLinkageKind::PrivateLinkage);
fatbinStr.setAlignment(8);
fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get(
fatbinType, builder.getStringAttr(gpuBinary->getBuffer())));
@@ -1835,9 +1835,9 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
/*packed=*/false, /*padded=*/false, RecordType::RecordKind::Struct);
std::string fatbinWrapperName =
addUnderscoredPrefix(cudaPrefix, "_fatbin_wrapper");
- GlobalOp fatbinWrapper =
- GlobalOp::create(builder, loc, fatbinWrapperName, fatbinWrapperType,
- /*isConstant=*/true, GlobalLinkageKind::PrivateLinkage);
+ GlobalOp fatbinWrapper = GlobalOp::create(
+ builder, loc, fatbinWrapperName, fatbinWrapperType,
+ /*isConstant=*/true, {}, GlobalLinkageKind::PrivateLinkage);
constexpr unsigned cudaFatMagic = 0x466243b1;
constexpr unsigned hipFatMagic = 0x48495046;
@@ -1857,9 +1857,10 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
// Create the GPU binary handle global variable.
std::string gpubinHandleName =
addUnderscoredPrefix(cudaPrefix, "_gpubin_handle");
+
GlobalOp gpuBinHandle = GlobalOp::create(
builder, loc, gpubinHandleName, voidPtrPtrTy,
- /*isConstant=*/false, GlobalLinkageKind::InternalLinkage);
+ /*isConstant=*/false, {}, cir::GlobalLinkageKind::InternalLinkage);
gpuBinHandle.setInitialValueAttr(builder.getConstNullPtrAttr(voidPtrPtrTy));
gpuBinHandle.setPrivate();
>From a65d48402aecccf2e47f421d6c0dc4f7299e377d Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Wed, 25 Mar 2026 23:14:58 -0400
Subject: [PATCH 04/15] Avoid copies from `std::string`
---
.../clang/CIR/Dialect/IR/CIRCUDAAttrs.td | 2 +-
clang/lib/CIR/CodeGen/CIRGenModule.cpp | 11 +++--
.../Dialect/Transforms/LoweringPrepare.cpp | 40 +++++++++++--------
3 files changed, 31 insertions(+), 22 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
index a5374f4ffd79b..d9aabd602a279 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
@@ -64,7 +64,7 @@ def CIR_CUDABinaryHandleAttr : CIR_Attr<
and then generate various registration functions.
}];
- let parameters = (ins "std::string":$name);
+ let parameters = (ins "mlir::StringAttr":$name);
let assemblyFormat = "`<` $name `>`";
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 7fdd8e11688f1..3836edbfb6bb2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -32,10 +32,12 @@
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/Interfaces/CIROpInterfaces.h"
#include "clang/CIR/MissingFeatures.h"
+#include "llvm/ADT/StringRef.h"
#include "CIRGenFunctionInfo.h"
#include "TargetInfo.h"
#include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h"
+#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Location.h"
#include "mlir/IR/MLIRContext.h"
@@ -165,11 +167,12 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext,
// Set CUDA GPU binary handle.
if (langOpts.CUDA) {
- std::string cudaBinaryName = codeGenOpts.CudaGpuBinaryFileName;
+ llvm::StringRef cudaBinaryName = codeGenOpts.CudaGpuBinaryFileName;
if (!cudaBinaryName.empty()) {
- theModule->setAttr(
- cir::CIRDialect::getCUDABinaryHandleAttrName(),
- cir::CUDABinaryHandleAttr::get(&mlirContext, cudaBinaryName));
+ theModule->setAttr(cir::CIRDialect::getCUDABinaryHandleAttrName(),
+ cir::CUDABinaryHandleAttr::get(
+ &mlirContext, mlir::StringAttr::get(
+ &mlirContext, cudaBinaryName)));
}
}
}
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 5f23414d34241..32fc2fea9f953 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -9,6 +9,7 @@
#include "PassDetail.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attrs.inc"
#include "clang/AST/Mangle.h"
@@ -21,9 +22,11 @@
#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/Dialect/Passes.h"
#include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
#include "clang/CIR/MissingFeatures.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Path.h"
@@ -1724,7 +1727,7 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) {
else if (auto globalDtor = fnOp.getGlobalDtorPriority())
globalDtorList.emplace_back(fnOp.getName(), globalDtor.value());
- if (auto attr = fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) {
+ if (mlir::Attribute attr = fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) {
auto kernelNameAttr = dyn_cast<CUDAKernelNameAttr>(attr);
std::string kernelName = kernelNameAttr.getKernelName();
cudaKernelMap[kernelName] = fnOp;
@@ -1734,15 +1737,15 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) {
}
}
-static std::string getCUDAPrefix(clang::ASTContext *astCtx) {
+static llvm::StringRef getCUDAPrefix(clang::ASTContext *astCtx) {
if (astCtx->getLangOpts().HIP)
return "hip";
return "cuda";
}
-static std::string addUnderscoredPrefix(llvm::StringRef prefix,
+static llvm::StringRef addUnderscoredPrefix(llvm::StringRef prefix,
llvm::StringRef name) {
- return ("__" + prefix + name).str();
+ return ("__" + prefix + name).getSingleStringRef();
}
/// Creates a global constructor function for the module:
@@ -1787,8 +1790,10 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
return;
}
- std::string cudaGPUBinaryName =
- mlir::cast<CUDABinaryHandleAttr>(cudaBinaryHandleAttr).getName();
+ llvm::StringRef cudaGPUBinaryName =
+ mlir::cast<CUDABinaryHandleAttr>(cudaBinaryHandleAttr)
+ .getName()
+ .getValue();
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> gpuBinaryOrErr =
llvm::MemoryBuffer::getFile(cudaGPUBinaryName);
@@ -1801,24 +1806,25 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
std::move(gpuBinaryOrErr.get());
// Set up common types and builder.
- std::string cudaPrefix = getCUDAPrefix(astCtx);
+ llvm::StringRef cudaPrefix = getCUDAPrefix(astCtx);
mlir::Location loc = mlirModule->getLoc();
CIRBaseBuilderTy builder(getContext());
builder.setInsertionPointToStart(mlirModule.getBody());
- auto voidTy = builder.getVoidTy();
- auto voidPtrTy = builder.getVoidPtrTy();
- auto voidPtrPtrTy = builder.getPointerTo(voidPtrTy);
- auto intTy = builder.getSIntNTy(32);
- auto charTy = cir::IntType::get(&getContext(), astCtx->getCharWidth(),
- /*isSigned=*/false);
+ VoidType voidTy = builder.getVoidTy();
+ PointerType voidPtrTy = builder.getVoidPtrTy();
+ PointerType voidPtrPtrTy = builder.getPointerTo(voidPtrTy);
+ IntType intTy = builder.getSIntNTy(32);
+ IntType charTy = cir::IntType::get(&getContext(), astCtx->getCharWidth(),
+ /*isSigned=*/false);
// --- Create fatbin globals ---
// Create the fatbin string constant with GPU binary contents.
auto fatbinType =
ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size());
- std::string fatbinStrName = addUnderscoredPrefix(cudaPrefix, "_fatbin_str");
+ llvm::StringRef fatbinStrName =
+ addUnderscoredPrefix(cudaPrefix, "_fatbin_str");
GlobalOp fatbinStr = GlobalOp::create(builder, loc, fatbinStrName, fatbinType,
/*isConstant=*/true, {},
GlobalLinkageKind::PrivateLinkage);
@@ -1833,7 +1839,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
auto fatbinWrapperType = RecordType::get(
&getContext(), {intTy, intTy, voidPtrTy, voidPtrTy},
/*packed=*/false, /*padded=*/false, RecordType::RecordKind::Struct);
- std::string fatbinWrapperName =
+ llvm::StringRef fatbinWrapperName =
addUnderscoredPrefix(cudaPrefix, "_fatbin_wrapper");
GlobalOp fatbinWrapper = GlobalOp::create(
builder, loc, fatbinWrapperName, fatbinWrapperType,
@@ -1848,14 +1854,14 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
auto fatbinStrSymbol =
mlir::FlatSymbolRefAttr::get(fatbinStr.getSymNameAttr());
auto fatbinInit = GlobalViewAttr::get(voidPtrTy, fatbinStrSymbol);
- auto unusedInit = builder.getConstNullPtrAttr(voidPtrTy);
+ mlir::TypedAttr unusedInit = builder.getConstNullPtrAttr(voidPtrTy);
fatbinWrapper.setInitialValueAttr(cir::ConstRecordAttr::get(
fatbinWrapperType,
mlir::ArrayAttr::get(&getContext(),
{magicInit, versionInit, fatbinInit, unusedInit})));
// Create the GPU binary handle global variable.
- std::string gpubinHandleName =
+ llvm::StringRef gpubinHandleName =
addUnderscoredPrefix(cudaPrefix, "_gpubin_handle");
GlobalOp gpuBinHandle = GlobalOp::create(
>From 2c5efb942ba386ff995f096ef4b6acd2931599f0 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Thu, 26 Mar 2026 01:09:34 -0400
Subject: [PATCH 05/15] address more string copies stuff yo
---
clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td | 2 +-
clang/lib/CIR/CodeGen/CIRGenCall.cpp | 6 ++++--
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 7 ++++---
3 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
index d9aabd602a279..8341819e84c62 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
@@ -32,7 +32,7 @@ def CIR_CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName", "cu.kernel_name"> {
respective function runtime registration on the host.
}];
- let parameters = (ins "std::string":$kernel_name);
+ let parameters = (ins "mlir::StringAttr":$kernel_name);
let assemblyFormat = "`<` $kernel_name `>`";
let canHaveIllegalCXXABIType = 0;
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
index 876fef687b477..ccf49c6c76d66 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
@@ -16,6 +16,7 @@
#include "CIRGenFunction.h"
#include "CIRGenFunctionInfo.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/Attributes.h"
#include "clang/CIR/ABIArgInfo.h"
#include "clang/CIR/MissingFeatures.h"
#include "llvm/Support/TypeSize.h"
@@ -421,8 +422,9 @@ void CIRGenModule::constructAttributeList(
GlobalDecl kernel(calleeInfo.getCalleeDecl());
llvm::StringRef kernelName = getMangledName(
kernel.getWithKernelReferenceKind(KernelReferenceKind::Kernel));
- auto attr =
- cir::CUDAKernelNameAttr::get(&getMLIRContext(), kernelName.str());
+ auto attr = cir::CUDAKernelNameAttr::get(
+ &getMLIRContext(),
+ mlir::StringAttr::get(&getMLIRContext(), kernelName));
attrs.set(attr.getMnemonic(), attr);
}
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 32fc2fea9f953..39ceb3fef58b6 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1727,9 +1727,10 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) {
else if (auto globalDtor = fnOp.getGlobalDtorPriority())
globalDtorList.emplace_back(fnOp.getName(), globalDtor.value());
- if (mlir::Attribute attr = fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) {
+ if (mlir::Attribute attr =
+ fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) {
auto kernelNameAttr = dyn_cast<CUDAKernelNameAttr>(attr);
- std::string kernelName = kernelNameAttr.getKernelName();
+ llvm::StringRef kernelName = kernelNameAttr.getKernelName();
cudaKernelMap[kernelName] = fnOp;
}
} else if (auto threeWayCmp = dyn_cast<cir::CmpThreeWayOp>(op)) {
@@ -1785,7 +1786,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
mlir::Attribute cudaBinaryHandleAttr =
mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName());
if (!cudaBinaryHandleAttr) {
- if (astCtx->getLangOpts().HIP)
+ if (isHIP)
assert(!cir::MissingFeatures::hipModuleCtor());
return;
}
>From 25969b9d929b27a7170876603b917502b5db5724 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Fri, 27 Mar 2026 03:38:57 -0400
Subject: [PATCH 06/15] fix twine crashes
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 39ceb3fef58b6..fce228e759ba4 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1744,9 +1744,9 @@ static llvm::StringRef getCUDAPrefix(clang::ASTContext *astCtx) {
return "cuda";
}
-static llvm::StringRef addUnderscoredPrefix(llvm::StringRef prefix,
+static std::string addUnderscoredPrefix(llvm::StringRef prefix,
llvm::StringRef name) {
- return ("__" + prefix + name).getSingleStringRef();
+ return ("__" + prefix + name).str();
}
/// Creates a global constructor function for the module:
@@ -1824,8 +1824,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
// Create the fatbin string constant with GPU binary contents.
auto fatbinType =
ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size());
- llvm::StringRef fatbinStrName =
- addUnderscoredPrefix(cudaPrefix, "_fatbin_str");
+ std::string fatbinStrName = addUnderscoredPrefix(cudaPrefix, "_fatbin_str");
GlobalOp fatbinStr = GlobalOp::create(builder, loc, fatbinStrName, fatbinType,
/*isConstant=*/true, {},
GlobalLinkageKind::PrivateLinkage);
@@ -1840,7 +1839,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
auto fatbinWrapperType = RecordType::get(
&getContext(), {intTy, intTy, voidPtrTy, voidPtrTy},
/*packed=*/false, /*padded=*/false, RecordType::RecordKind::Struct);
- llvm::StringRef fatbinWrapperName =
+ std::string fatbinWrapperName =
addUnderscoredPrefix(cudaPrefix, "_fatbin_wrapper");
GlobalOp fatbinWrapper = GlobalOp::create(
builder, loc, fatbinWrapperName, fatbinWrapperType,
@@ -1862,7 +1861,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
{magicInit, versionInit, fatbinInit, unusedInit})));
// Create the GPU binary handle global variable.
- llvm::StringRef gpubinHandleName =
+ std::string gpubinHandleName =
addUnderscoredPrefix(cudaPrefix, "_gpubin_handle");
GlobalOp gpuBinHandle = GlobalOp::create(
>From 39d03dcd0466233a681d4083175e0e3d64e725e8 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Thu, 2 Apr 2026 13:45:13 -0400
Subject: [PATCH 07/15] fix fmt
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index fce228e759ba4..fa0d834b50a8c 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -8,8 +8,8 @@
#include "PassDetail.h"
#include "mlir/IR/Attributes.h"
-#include "mlir/IR/IRMapping.h"
#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/IRMapping.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attrs.inc"
#include "clang/AST/Mangle.h"
>From 3d709396d522904ecf95c540dc0d6c470e4b8041 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Thu, 2 Apr 2026 15:58:55 -0400
Subject: [PATCH 08/15] Fix conflicts and add section to fatbin globals
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 10 +++++++++-
clang/test/CIR/CodeGenCUDA/device-stub.cu | 7 +++----
clang/test/CIR/CodeGenCUDA/kernel-call.cu | 4 ++--
clang/test/CIR/CodeGenCUDA/kernel-stub-name.cu | 6 +++---
clang/test/CIR/CodeGenHIP/simple.cpp | 2 +-
5 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index fa0d834b50a8c..30b318879fc84 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1821,6 +1821,13 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
// --- Create fatbin globals ---
+ // The section names are different for MAC OS X.
+ llvm::StringRef fatbinConstName =
+ astCtx->getLangOpts().HIP ? ".hip_fatbin" : ".nv_fatbin";
+
+ llvm::StringRef fatbinSectionName =
+ astCtx->getLangOpts().HIP ? ".hipFatBinSegment" : ".nvFatBinSegment";
+
// Create the fatbin string constant with GPU binary contents.
auto fatbinType =
ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size());
@@ -1831,7 +1838,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
fatbinStr.setAlignment(8);
fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get(
fatbinType, builder.getStringAttr(gpuBinary->getBuffer())));
- assert(!cir::MissingFeatures::opGlobalSection());
+ fatbinStr.setSection(fatbinConstName);
fatbinStr.setPrivate();
// Create the fatbin wrapper struct:
@@ -1844,6 +1851,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
GlobalOp fatbinWrapper = GlobalOp::create(
builder, loc, fatbinWrapperName, fatbinWrapperType,
/*isConstant=*/true, {}, GlobalLinkageKind::PrivateLinkage);
+ fatbinWrapper.setSection(fatbinSectionName);
constexpr unsigned cudaFatMagic = 0x466243b1;
constexpr unsigned hipFatMagic = 0x48495046;
diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu
index b3e8baa17c7a4..2e9deaee9b225 100644
--- a/clang/test/CIR/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu
@@ -21,16 +21,15 @@ __global__ void kernelfunc(int i, int j, int k) {}
void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
-// Check the fatbin string constant with GPU binary contents.
-// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here."> : !cir.array<!u8i x 25> {alignment = 8 : i64}
+// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here."> : !cir.array<!u8i x 25> {alignment = 8 : i64, section = ".nv_fatbin"}
-// Check the fatbin wrapper struct: { magic, version, ptr to fatbin, null }.
+// Check the fatbin wrapper struct: { magic, version, ptr to fatbin, null }, with section.
// CIR: cir.global constant cir_private @__cuda_fatbin_wrapper = #cir.const_record<{
// CIR-SAME: #cir.int<1180844977> : !s32i,
// CIR-SAME: #cir.int<1> : !s32i,
// CIR-SAME: #cir.global_view<@__cuda_fatbin_str> : !cir.ptr<!void>,
// CIR-SAME: #cir.ptr<null> : !cir.ptr<!void>
-// CIR-SAME: }>
+// CIR-SAME: }> : !rec_anon_struct {section = ".nvFatBinSegment"}
// Check the GPU binary handle global.
// CIR: cir.global "private" internal @__cuda_gpubin_handle = #cir.ptr<null> : !cir.ptr<!cir.ptr<!void>>
diff --git a/clang/test/CIR/CodeGenCUDA/kernel-call.cu b/clang/test/CIR/CodeGenCUDA/kernel-call.cu
index 230bcdfe6e22c..34719a2d3acb1 100644
--- a/clang/test/CIR/CodeGenCUDA/kernel-call.cu
+++ b/clang/test/CIR/CodeGenCUDA/kernel-call.cu
@@ -118,13 +118,13 @@ int main(void) {
// CUDA-NEW: } else {
// CUDA-NEW: cir.const #cir.int<42> : !s32i
// CUDA-NEW: cir.const #cir.fp<1.000000e+00> : !cir.float
- // CUDA-NEW: cir.call @_Z21__device_stub__kernelif({{.*}}) {cu.kernel_name = #cir.cu.kernel_name<_Z6kernelif>} : (!s32i {llvm.noundef}, !cir.float {llvm.noundef}) -> ()
+ // CUDA-NEW: cir.call @_Z21__device_stub__kernelif({{.*}}) {cu.kernel_name = #cir.cu.kernel_name<"_Z6kernelif">} : (!s32i {llvm.noundef}, !cir.float {llvm.noundef}) -> ()
// CUDA-NEW: }
// HIP-NEW: cir.if %{{.*}} {
// HIP-NEW: } else {
// HIP-NEW: cir.const #cir.int<42> : !s32i
// HIP-NEW: cir.const #cir.fp<1.000000e+00> : !cir.float
- // HIP-NEW: cir.call @_Z21__device_stub__kernelif({{.*}}) {cu.kernel_name = #cir.cu.kernel_name<_Z6kernelif>} : (!s32i {llvm.noundef}, !cir.float {llvm.noundef}) -> ()
+ // HIP-NEW: cir.call @_Z21__device_stub__kernelif({{.*}}) {cu.kernel_name = #cir.cu.kernel_name<"_Z6kernelif">} : (!s32i {llvm.noundef}, !cir.float {llvm.noundef}) -> ()
// HIP-NEW: }
kernel<<<1, 1>>>(42, 1.0f);
}
diff --git a/clang/test/CIR/CodeGenCUDA/kernel-stub-name.cu b/clang/test/CIR/CodeGenCUDA/kernel-stub-name.cu
index 1a874d9e9fada..42c8f10430b1f 100644
--- a/clang/test/CIR/CodeGenCUDA/kernel-stub-name.cu
+++ b/clang/test/CIR/CodeGenCUDA/kernel-stub-name.cu
@@ -6,17 +6,17 @@
#include "Inputs/cuda.h"
-// CHECK: cir.func {{.*}} @[[CSTUB:__device_stub__ckernel]]() attributes {cu.kernel_name = #cir.cu.kernel_name<ckernel>{{.*}}}
+// CHECK: cir.func {{.*}} @[[CSTUB:__device_stub__ckernel]]() attributes {cu.kernel_name = #cir.cu.kernel_name<"ckernel">{{.*}}}
// CHECK: cir.return
// CHECK-NEXT: }
extern "C" __global__ void ckernel() {}
-// CHECK: cir.func {{.*}} @_ZN2ns23__device_stub__nskernelEv() attributes {cu.kernel_name = #cir.cu.kernel_name<_ZN2ns8nskernelEv>{{.*}}}
+// CHECK: cir.func {{.*}} @_ZN2ns23__device_stub__nskernelEv() attributes {cu.kernel_name = #cir.cu.kernel_name<"_ZN2ns8nskernelEv">{{.*}}}
namespace ns {
__global__ void nskernel() {}
} // namespace ns
-// CHECK: cir.func {{.*}} @_Z25__device_stub__kernelfuncIiEvv() attributes {cu.kernel_name = #cir.cu.kernel_name<_Z10kernelfuncIiEvv>{{.*}}}
+// CHECK: cir.func {{.*}} @_Z25__device_stub__kernelfuncIiEvv() attributes {cu.kernel_name = #cir.cu.kernel_name<"_Z10kernelfuncIiEvv">{{.*}}}
template <class T>
__global__ void kernelfunc() {}
template __global__ void kernelfunc<int>();
diff --git a/clang/test/CIR/CodeGenHIP/simple.cpp b/clang/test/CIR/CodeGenHIP/simple.cpp
index 15240fd7a3038..b3df34aed6afb 100644
--- a/clang/test/CIR/CodeGenHIP/simple.cpp
+++ b/clang/test/CIR/CodeGenHIP/simple.cpp
@@ -42,7 +42,7 @@ __global__ void global_fn(int a) {}
// CIR-DEVICE: cir.func {{.*}}{{.*}} @_Z9global_fni
// OGCG-DEVICE: define protected amdgpu_kernel void @_Z9global_fni
-// CIR-HOST: @_Z24__device_stub__global_fni{{.*}}attributes {cu.kernel_name = #cir.cu.kernel_name<_Z9global_fni>{{.*}}}
+// CIR-HOST: @_Z24__device_stub__global_fni{{.*}}attributes {cu.kernel_name = #cir.cu.kernel_name<"_Z9global_fni">{{.*}}}
// CIR-HOST: %[[#CIRKernelArgs:]] = cir.alloca {{.*}}"kernel_args"
// CIR-HOST: %[[#Decayed:]] = cir.cast array_to_ptrdecay %[[#CIRKernelArgs]]
// CIR-HOST: cir.call @__hipPopCallConfiguration
>From 31fd99be392b7bdb4bb9320f3c73455e79e1d042 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Thu, 2 Apr 2026 17:19:29 -0400
Subject: [PATCH 09/15] remove accidental .inc include
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 30b318879fc84..c46427422f508 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -11,7 +11,6 @@
#include "mlir/IR/BuiltinAttributeInterfaces.h"
#include "mlir/IR/IRMapping.h"
#include "clang/AST/ASTContext.h"
-#include "clang/AST/Attrs.inc"
#include "clang/AST/Mangle.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/Specifiers.h"
>From 289c766eec5dedcc4c50371ed8d1ff2bc12b278c Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Thu, 2 Apr 2026 17:31:27 -0400
Subject: [PATCH 10/15] Fix missing include for memoryBuffer on linux ci
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index c46427422f508..f831335642724 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -27,6 +27,7 @@
#include "clang/CIR/MissingFeatures.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include <memory>
>From 4ec77d8218ad0d974004a34d3f1efeb144a5f516 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Thu, 2 Apr 2026 17:47:02 -0400
Subject: [PATCH 11/15] remove unused var
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index f831335642724..d6b3cdd5e42a2 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1812,7 +1812,6 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
CIRBaseBuilderTy builder(getContext());
builder.setInsertionPointToStart(mlirModule.getBody());
- VoidType voidTy = builder.getVoidTy();
PointerType voidPtrTy = builder.getVoidPtrTy();
PointerType voidPtrPtrTy = builder.getPointerTo(voidPtrTy);
IntType intTy = builder.getSIntNTy(32);
>From 502a9483f34c4a734301e703f4dc849ff7e837fd Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Thu, 2 Apr 2026 18:49:30 -0400
Subject: [PATCH 12/15] Use vfs from ast context to get gpubinary
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index d6b3cdd5e42a2..c30c3ef4fa3af 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -13,6 +13,7 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/Mangle.h"
#include "clang/Basic/Module.h"
+#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Specifiers.h"
#include "clang/Basic/TargetCXXABI.h"
#include "clang/Basic/TargetInfo.h"
@@ -27,8 +28,8 @@
#include "clang/CIR/MissingFeatures.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TypeSwitch.h"
-#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include <memory>
@@ -1796,8 +1797,10 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
.getName()
.getValue();
+ llvm::vfs::FileSystem &vfs =
+ astCtx->getSourceManager().getFileManager().getVirtualFileSystem();
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> gpuBinaryOrErr =
- llvm::MemoryBuffer::getFile(cudaGPUBinaryName);
+ vfs.getBufferForFile(cudaGPUBinaryName);
if (std::error_code ec = gpuBinaryOrErr.getError()) {
mlirModule->emitError("cannot open GPU binary file: " + cudaGPUBinaryName +
": " + ec.message());
>From dda778a5493e142d2d55e4fdf6707c5ba068eb48 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Wed, 25 Mar 2026 22:29:47 -0400
Subject: [PATCH 13/15] [CIR][CUDA] Handle CUDA module constructor and
destructor emission.
---
.../Dialect/Transforms/LoweringPrepare.cpp | 124 +++++++++++++++++-
clang/test/CIR/CodeGenCUDA/device-stub.cu | 41 ++++++
2 files changed, 163 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index c30c3ef4fa3af..6bc1d8b6b76f1 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -10,8 +10,10 @@
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinAttributeInterfaces.h"
#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/Value.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Mangle.h"
+#include "clang/Basic/Cuda.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Specifiers.h"
@@ -28,10 +30,13 @@
#include "clang/CIR/MissingFeatures.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/VirtualFileSystem.h"
#include <memory>
+#include <optional>
using namespace mlir;
using namespace cir;
@@ -122,6 +127,7 @@ struct LoweringPreparePass
/// Build the CUDA module constructor that registers the fat binary
/// with the CUDA runtime.
void buildCUDAModuleCtor();
+ std::optional<FuncOp> buildCUDAModuleDtor();
/// Handle static local variable initialization with guard variables.
void handleStaticLocal(cir::GlobalOp globalOp, cir::GetGlobalOp getGlobalOp);
@@ -1880,8 +1886,122 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
gpuBinHandle.setInitialValueAttr(builder.getConstNullPtrAttr(voidPtrPtrTy));
gpuBinHandle.setPrivate();
- // TODO: ctor/dtor/register_globals
- assert(!cir::MissingFeatures::globalRegistration());
+ // Declare this function:
+ // void **__{cuda|hip}RegisterFatBinary(void *);
+
+ std::string regFuncName =
+ addUnderscoredPrefix(cudaPrefix, "RegisterFatBinary");
+ FuncType regFuncType = FuncType::get({voidPtrTy}, voidPtrPtrTy);
+ cir::FuncOp regFunc =
+ buildRuntimeFunction(builder, regFuncName, loc, regFuncType);
+
+ std::string moduleCtorName = addUnderscoredPrefix(cudaPrefix, "_module_ctor");
+ cir::FuncOp moduleCtor = buildRuntimeFunction(
+ builder, moduleCtorName, loc, FuncType::get({}, voidTy),
+ GlobalLinkageKind::InternalLinkage);
+
+ globalCtorList.emplace_back(moduleCtorName,
+ cir::GlobalCtorAttr::getDefaultPriority());
+ builder.setInsertionPointToStart(moduleCtor.addEntryBlock());
+ assert(!cir::MissingFeatures::opGlobalCtorPriority());
+ if (isHIP) {
+ llvm_unreachable("HIP Module Constructor Support");
+ } else if (!astCtx->getLangOpts().GPURelocatableDeviceCode) {
+
+ // --- Create CUDA CTOR-DTOR ---
+ // Register binary with CUDA runtime. This is substantially different in
+ // default mode vs. separate compilation.
+ // Corresponding code:
+ // gpuBinaryHandle = __cudaRegisterFatBinary(&fatbinWrapper);
+ mlir::Value wrapper = builder.createGetGlobal(fatbinWrapper);
+ mlir::Value fatbinVoidPtr = builder.createBitcast(wrapper, voidPtrTy);
+ cir::CallOp gpuBinaryHandleCall =
+ builder.createCallOp(loc, regFunc, fatbinVoidPtr);
+ mlir::Value gpuBinaryHandle = gpuBinaryHandleCall.getResult();
+ // Store the value back to the global `__cuda_gpubin_handle`.
+ mlir::Value gpuBinaryHandleGlobal = builder.createGetGlobal(gpuBinHandle);
+ builder.createStore(loc, gpuBinaryHandle, gpuBinaryHandleGlobal);
+
+ // TODO: Generate __cuda_register_globals and emit a call.
+ assert(!cir::MissingFeatures::globalRegistration());
+
+ // From CUDA 10.1 onwards, we must call this function to end registration:
+ // void __cudaRegisterFatBinaryEnd(void **fatbinHandle);
+ // This is CUDA-specific, so no need to use `addUnderscoredPrefix`.
+ if (clang::CudaFeatureEnabled(
+ astCtx->getTargetInfo().getSDKVersion(),
+ clang::CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
+ cir::CIRBaseBuilderTy globalBuilder(getContext());
+ globalBuilder.setInsertionPointToStart(mlirModule.getBody());
+ FuncOp endFunc =
+ buildRuntimeFunction(globalBuilder, "__cudaRegisterFatBinaryEnd", loc,
+ FuncType::get({voidPtrPtrTy}, voidTy));
+ builder.createCallOp(loc, endFunc, gpuBinaryHandle);
+ }
+ }
+
+ // Create destructor and register it with atexit() the way NVCC does it. Doing
+ // it during regular destructor phase worked in CUDA before 9.2 but results in
+ // double-free in 9.2.
+ if (std::optional<FuncOp> dtor = buildCUDAModuleDtor()) {
+
+ // extern "C" int atexit(void (*f)(void));
+ cir::CIRBaseBuilderTy globalBuilder(getContext());
+ globalBuilder.setInsertionPointToStart(mlirModule.getBody());
+ FuncOp atexit = buildRuntimeFunction(
+ globalBuilder, "atexit", loc,
+ FuncType::get(PointerType::get(dtor->getFunctionType()), intTy));
+ mlir::Value dtorFunc = GetGlobalOp::create(
+ builder, loc, PointerType::get(dtor->getFunctionType()),
+ mlir::FlatSymbolRefAttr::get(dtor->getSymNameAttr()));
+ builder.createCallOp(loc, atexit, dtorFunc);
+ }
+ cir::ReturnOp::create(builder, loc);
+}
+
+std::optional<FuncOp> LoweringPreparePass::buildCUDAModuleDtor() {
+ if (!mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName()))
+ return {};
+
+ llvm::StringRef prefix = getCUDAPrefix(astCtx);
+
+ VoidType voidTy = VoidType::get(&getContext());
+ PointerType voidPtrPtrTy = PointerType::get(PointerType::get(voidTy));
+
+ mlir::Location loc = mlirModule.getLoc();
+
+ cir::CIRBaseBuilderTy builder(getContext());
+ builder.setInsertionPointToStart(mlirModule.getBody());
+
+ // define: void __cudaUnregisterFatBinary(void ** handle);
+ std::string unregisterFuncName =
+ addUnderscoredPrefix(prefix, "UnregisterFatBinary");
+ FuncOp unregisterFunc = buildRuntimeFunction(
+ builder, unregisterFuncName, loc, FuncType::get({voidPtrPtrTy}, voidTy));
+
+ // void __cuda_module_dtor();
+ // Despite the name, OG doesn't treat it as a destructor, so it shouldn't be
+ // put into globalDtorList. If it were a real dtor, then it would cause
+ // double free above CUDA 9.2. The way to use it is to manually call
+ // atexit() at end of module ctor.
+ std::string dtorName = addUnderscoredPrefix(prefix, "_module_dtor");
+ FuncOp dtor =
+ buildRuntimeFunction(builder, dtorName, loc, FuncType::get({}, voidTy),
+ GlobalLinkageKind::InternalLinkage);
+
+ builder.setInsertionPointToStart(dtor.addEntryBlock());
+
+ // For dtor, we only need to call:
+ // __cudaUnregisterFatBinary(__cuda_gpubin_handle);
+
+ std::string gpubinName = addUnderscoredPrefix(prefix, "_gpubin_handle");
+ GlobalOp gpubinGlobal = cast<GlobalOp>(mlirModule.lookupSymbol(gpubinName));
+ mlir::Value gpubinAddress = builder.createGetGlobal(gpubinGlobal);
+ mlir::Value gpubin = builder.createLoad(loc, gpubinAddress);
+ builder.createCallOp(loc, unregisterFunc, gpubin);
+ ReturnOp::create(builder, loc);
+
+ return dtor;
}
void LoweringPreparePass::runOnOperation() {
diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu
index 2e9deaee9b225..4562bf1523141 100644
--- a/clang/test/CIR/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu
@@ -21,6 +21,22 @@ __global__ void kernelfunc(int i, int j, int k) {}
void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
+// Check module constructor is registered in module attributes.
+// CIR: cir.global_ctors = [#cir.global_ctor<"__cuda_module_ctor", 65535>]
+
+// Check runtime function declarations (appear before dtor in output).
+// CIR: cir.func private @atexit(!cir.ptr<!cir.func<()>>) -> !s32i
+// CIR: cir.func private @__cudaUnregisterFatBinary(!cir.ptr<!cir.ptr<!void>>)
+
+// Check the module destructor body: load handle and call UnregisterFatBinary.
+// CIR: cir.func internal private @__cuda_module_dtor()
+// CIR-NEXT: %[[HANDLE_ADDR:.*]] = cir.get_global @__cuda_gpubin_handle
+// CIR-NEXT: %[[HANDLE:.*]] = cir.load %[[HANDLE_ADDR]]
+// CIR-NEXT: cir.call @__cudaUnregisterFatBinary(%[[HANDLE]])
+// CIR-NEXT: cir.return
+
+// CIR: cir.func private @__cudaRegisterFatBinaryEnd(!cir.ptr<!cir.ptr<!void>>)
+
// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here."> : !cir.array<!u8i x 25> {alignment = 8 : i64, section = ".nv_fatbin"}
// Check the fatbin wrapper struct: { magic, version, ptr to fatbin, null }, with section.
@@ -34,9 +50,34 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// Check the GPU binary handle global.
// CIR: cir.global "private" internal @__cuda_gpubin_handle = #cir.ptr<null> : !cir.ptr<!cir.ptr<!void>>
+// CIR: cir.func private @__cudaRegisterFatBinary(!cir.ptr<!void>) -> !cir.ptr<!cir.ptr<!void>>
+
+// Check the module constructor body: register fatbin, store handle,
+// call RegisterFatBinaryEnd (CUDA >= 10.1), then register dtor with atexit.
+// CIR: cir.func internal private @__cuda_module_ctor()
+// CIR-NEXT: %[[WRAPPER:.*]] = cir.get_global @__cuda_fatbin_wrapper
+// CIR-NEXT: %[[VOID_PTR:.*]] = cir.cast bitcast %[[WRAPPER]]
+// CIR-NEXT: %[[RET:.*]] = cir.call @__cudaRegisterFatBinary(%[[VOID_PTR]])
+// CIR-NEXT: %[[HANDLE_ADDR:.*]] = cir.get_global @__cuda_gpubin_handle
+// CIR-NEXT: cir.store %[[RET]], %[[HANDLE_ADDR]]
+// CIR-NEXT: cir.call @__cudaRegisterFatBinaryEnd(%[[RET]])
+// CIR-NEXT: %[[DTOR_PTR:.*]] = cir.get_global @__cuda_module_dtor
+// CIR-NEXT: {{.*}} = cir.call @atexit(%[[DTOR_PTR]])
+// CIR-NEXT: cir.return
+
// OGCG: constant [25 x i8] c"GPU binary would be here.", section ".nv_fatbin", align 8
// OGCG: @__cuda_fatbin_wrapper = internal constant { i32, i32, ptr, ptr } { i32 1180844977, i32 1, ptr @{{.*}}, ptr null }, section ".nvFatBinSegment"
// OGCG: @__cuda_gpubin_handle = internal global ptr null
+// OGCG: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor
+
+// OGCG: define internal void @__cuda_module_ctor
+// OGCG: call{{.*}}__cudaRegisterFatBinary(ptr @__cuda_fatbin_wrapper)
+// OGCG: store ptr %{{.*}}, ptr @__cuda_gpubin_handle
+// OGCG: call i32 @atexit(ptr @__cuda_module_dtor)
+
+// OGCG: define internal void @__cuda_module_dtor
+// OGCG: load ptr, ptr @__cuda_gpubin_handle
+// OGCG: call void @__cudaUnregisterFatBinary
// No GPU binary — no registration infrastructure at all.
// NOGPUBIN-NOT: fatbin
>From aa744fd173a68ed7bab450da44b523f49067dac9 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Sun, 29 Mar 2026 14:04:44 -0400
Subject: [PATCH 14/15] unreachable on RDC compilation
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 6bc1d8b6b76f1..1c68edb805dd0 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1938,7 +1938,8 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
FuncType::get({voidPtrPtrTy}, voidTy));
builder.createCallOp(loc, endFunc, gpuBinaryHandle);
}
- }
+ } else
+ llvm_unreachable("GPU RDC NYI");
// Create destructor and register it with atexit() the way NVCC does it. Doing
// it during regular destructor phase worked in CUDA before 9.2 but results in
>From 36f2505f96239ddb3060cf01b31b0f9e4b464998 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Thu, 2 Apr 2026 18:01:00 -0400
Subject: [PATCH 15/15] fix undefined void ty
---
clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 1c68edb805dd0..2b38f9cbecfba 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1821,6 +1821,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
CIRBaseBuilderTy builder(getContext());
builder.setInsertionPointToStart(mlirModule.getBody());
+ Type voidTy = builder.getVoidTy();
PointerType voidPtrTy = builder.getVoidPtrTy();
PointerType voidPtrPtrTy = builder.getPointerTo(voidPtrTy);
IntType intTy = builder.getSIntNTy(32);
More information about the cfe-commits
mailing list