[llvm-branch-commits] [clang] [CIR] Ext init for `__device__`, `__constant__` and internalize host shadows (PR #186568)
David Rivera via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Mar 14 00:31:25 PDT 2026
https://github.com/RiverDave created https://github.com/llvm/llvm-project/pull/186568
None
>From d471d96cc7c2f73242166b2a3ee1a2381c03b49c Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Sat, 14 Mar 2026 03:30:58 -0400
Subject: [PATCH] [CIR] Ext init for `__device__`, `__constant__` and
internalize host shadows
---
.../include/clang/CIR/Dialect/IR/CIRAttrs.td | 14 +++++++++++
clang/include/clang/CIR/MissingFeatures.h | 1 +
clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp | 23 +++++++++++++++++++
clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h | 4 ++++
clang/lib/CIR/CodeGen/CIRGenModule.cpp | 23 +++++++++++++++++++
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 10 ++++++++
clang/test/CIR/CodeGenCUDA/address-spaces.cu | 16 ++++++-------
7 files changed, 83 insertions(+), 8 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
index b1be1d5daf4e0..66a86a08c9439 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
@@ -1393,6 +1393,20 @@ class CIR_AST<string name, string prefix, list<Trait> traits = []>
}];
}
+def CIR_ExternallyInitializedAttr : CIR_Attr<
+ "ExternallyInitializedAttr", "cir.externally_initialized"
+> {
+ let summary = "The marked variable is externally initialized.";
+ let description =
+ [{
+ CUDA __device__ and __constant__ variables, along with surface and
+ textures, might be initialized by host, hence "externally initialized".
+ Therefore they must be emitted even if they are not referenced.
+
+ The attribute corresponds to the attribute on LLVM with the same name.
+ }];
+}
+
def CIR_ASTVarDeclAttr : CIR_AST<"VarDecl", "var.decl", [
ASTVarDeclInterface
]>;
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index f688795a1616b..4486bee268b82 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -252,6 +252,7 @@ struct MissingFeatures {
static bool ctorConstLvalueToRvalueConversion() { return false; }
static bool ctorMemcpyizer() { return false; }
static bool cudaSupport() { return false; }
+ static bool offloadRegistration() { return false; }
static bool dataLayoutTypeIsSized() { return false; }
static bool dataLayoutTypeAllocSize() { return false; }
static bool dataLayoutTypeStoreSize() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
index 8b8e99023eceb..cd41d40cff595 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
@@ -64,6 +64,9 @@ class CIRGenNVCUDARuntime : public CIRGenCUDARuntime {
void emitDeviceStub(CIRGenFunction &cgf, cir::FuncOp fn,
FunctionArgList &args) override;
+
+ void internalizeDeviceSideVar(const VarDecl *vd,
+ cir::GlobalLinkageKind &linkage) override;
};
} // namespace
@@ -342,3 +345,23 @@ mlir::Operation *CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn,
return globalOp;
}
+
+void CIRGenNVCUDARuntime::internalizeDeviceSideVar(
+ const VarDecl *d, cir::GlobalLinkageKind &linkage) {
+ if (cgm.getLangOpts().GPURelocatableDeviceCode)
+ cgm.errorNYI("GPU Relocatable Deviced Code (RDC)");
+
+ // __shared__ variables are odd. Shadows do get created, but
+ // they are not registered with the CUDA runtime, so they
+ // can't really be used to access their device-side
+ // counterparts. It's not clear yet whether it's nvcc's bug or
+ // a feature, but we've got to do the same for compatibility.
+ if (d->hasAttr<CUDADeviceAttr>() || d->hasAttr<CUDAConstantAttr>() ||
+ d->hasAttr<CUDASharedAttr>()) {
+ linkage = cir::GlobalLinkageKind::InternalLinkage;
+ }
+
+ if (d->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ d->getType()->isCUDADeviceBuiltinTextureType())
+ cgm.errorNYI("CUDA Surface/Texture support");
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h
index ba33602511e3b..589321d66c2b1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h
@@ -47,6 +47,10 @@ class CIRGenCUDARuntime {
virtual mlir::Operation *getKernelHandle(cir::FuncOp fn, GlobalDecl gd) = 0;
virtual mlir::Operation *getKernelStub(mlir::Operation *handle) = 0;
+
+ /// Adjust linkage of shadow variables in host compilation.
+ virtual void internalizeDeviceSideVar(const VarDecl *vd,
+ cir::GlobalLinkageKind &linkage) = 0;
};
CIRGenCUDARuntime *createNVCUDARuntime(CIRGenModule &cgm);
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index b06d108d15f02..25181f272b19c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -1031,6 +1031,29 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd,
cir::GlobalLinkageKind linkage =
getCIRLinkageVarDefinition(vd, /*IsConstant=*/false);
+ // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+ // the device. [...]"
+ // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+ // __device__, declares a variable that: [...]
+ // Is accessible from all the threads within the grid and from the host
+ // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+ // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
+ if (langOpts.CUDA) {
+ if (langOpts.CUDAIsDevice) {
+ if (linkage != cir::GlobalLinkageKind::InternalLinkage &&
+ (vd->hasAttr<CUDADeviceAttr>() || vd->hasAttr<CUDAConstantAttr>() ||
+ vd->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ vd->getType()->isCUDADeviceBuiltinTextureType())) {
+ gv->setAttr(cir::ExternallyInitializedAttrAttr::getMnemonic(),
+ cir::ExternallyInitializedAttrAttr::get(&getMLIRContext()));
+ } else {
+ getCUDARuntime().internalizeDeviceSideVar(vd, linkage);
+ }
+ }
+
+ assert(!cir::MissingFeatures::offloadRegistration());
+ }
+
// Set CIR linkage and DLL storage class.
gv.setLinkage(linkage);
// FIXME(cir): setLinkage should likely set MLIR's visibility automatically.
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 3a19cd5ecdb9e..15a74e5a55459 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2507,6 +2507,12 @@ void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp(
mlir::SymbolRefAttr comdatAttr = getComdatAttr(op, rewriter);
SmallVector<mlir::NamedAttribute> attributes;
+
+ if (auto extInit =
+ op->getAttr(cir::ExternallyInitializedAttrAttr::getMnemonic()))
+ attributes.push_back(rewriter.getNamedAttr("externally_initialized",
+ rewriter.getUnitAttr()));
+
mlir::LLVM::GlobalOp newGlobalOp =
rewriter.replaceOpWithNewOp<mlir::LLVM::GlobalOp>(
op, llvmType, isConst, linkage, symbol, nullptr, alignment, addrSpace,
@@ -2568,6 +2574,10 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite(
const StringRef symbol = op.getSymName();
SmallVector<mlir::NamedAttribute> attributes;
+ if (op->getAttr(cir::ExternallyInitializedAttrAttr::getMnemonic()))
+ attributes.push_back(rewriter.getNamedAttr("externally_initialized",
+ rewriter.getUnitAttr()));
+
if (init.has_value()) {
if (mlir::isa<cir::FPAttr, cir::IntAttr, cir::BoolAttr>(init.value())) {
GlobalInitAttrRewriter initRewriter(llvmType, rewriter);
diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
index 49dacf1200a2d..c3c19cbabd053 100644
--- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu
+++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
@@ -21,25 +21,25 @@
// CIR-PRE: cir.global external lang_address_space(offload_global) @i = #cir.int<0> : !s32i
// CIR-POST: cir.global external target_address_space(1) @i = #cir.int<0> : !s32i
-// CIR-LLVM-DAG: @i = addrspace(1) global i32 0, align 4
+// CIR-LLVM-DAG: @i = addrspace(1) externally_initialized global i32 0, align 4
// OGCG-DAG: @i = addrspace(1) externally_initialized global i32 0, align 4
__device__ int i;
// CIR-PRE: cir.global constant external lang_address_space(offload_constant) @j = #cir.int<0> : !s32i
// CIR-POST: cir.global constant external target_address_space(4) @j = #cir.int<0> : !s32i
-// CIR-LLVM-DAG: @j = addrspace(4) constant i32 0, align 4
+// CIR-LLVM-DAG: @j = addrspace(4) externally_initialized constant i32 0, align 4
// OGCG-DAG: @j = addrspace(4) externally_initialized constant i32 0, align 4
__constant__ int j;
-// CIR-PRE: cir.global external lang_address_space(offload_local) @k = #cir.poison : !s32i
-// CIR-POST: cir.global external target_address_space(3) @k = #cir.poison : !s32i
-// CIR-LLVM-DAG: @k = addrspace(3) global i32 poison, align 4
+// CIR-PRE: cir.global "private" internal dso_local lang_address_space(offload_local) @k = #cir.poison : !s32i
+// CIR-POST: cir.global "private" internal dso_local target_address_space(3) @k = #cir.poison : !s32i
+// CIR-LLVM-DAG: @k = internal addrspace(3) global i32 poison, align 4
// OGCG-DAG: @k = addrspace(3) global i32 undef, align 4
__shared__ int k;
-// CIR-PRE: cir.global external lang_address_space(offload_local) @b = #cir.poison : !cir.float
-// CIR-POST: cir.global external target_address_space(3) @b = #cir.poison : !cir.float
-// CIR-LLVM-DAG: @b = addrspace(3) global float poison, align 4
+// CIR-PRE: cir.global "private" internal dso_local lang_address_space(offload_local) @b = #cir.poison : !cir.float
+// CIR-POST: cir.global "private" internal dso_local target_address_space(3) @b = #cir.poison : !cir.float
+// CIR-LLVM-DAG: @b = internal addrspace(3) global float poison, align 4
// OGCG-DAG: @b = addrspace(3) global float undef, align 4
__shared__ float b;
More information about the llvm-branch-commits
mailing list