[llvm-branch-commits] [clang] [CIR] Ext init for `__device__`, `__constant__` and internalize host shadows (PR #186568)

David Rivera via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Mar 14 00:31:25 PDT 2026


https://github.com/RiverDave created https://github.com/llvm/llvm-project/pull/186568

None

>From d471d96cc7c2f73242166b2a3ee1a2381c03b49c Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Sat, 14 Mar 2026 03:30:58 -0400
Subject: [PATCH] [CIR] Ext init for `__device__`, `__constant__` and
 internalize host shadows

---
 .../include/clang/CIR/Dialect/IR/CIRAttrs.td  | 14 +++++++++++
 clang/include/clang/CIR/MissingFeatures.h     |  1 +
 clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp        | 23 +++++++++++++++++++
 clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h     |  4 ++++
 clang/lib/CIR/CodeGen/CIRGenModule.cpp        | 23 +++++++++++++++++++
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 10 ++++++++
 clang/test/CIR/CodeGenCUDA/address-spaces.cu  | 16 ++++++-------
 7 files changed, 83 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
index b1be1d5daf4e0..66a86a08c9439 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
@@ -1393,6 +1393,20 @@ class CIR_AST<string name, string prefix, list<Trait> traits = []>
   }];
 }
 
+def CIR_ExternallyInitializedAttr : CIR_Attr<
+  "ExternallyInitializedAttr", "cir.externally_initialized"
+> {
+  let summary = "The marked variable is externally initialized.";
+  let description =
+  [{
+    CUDA __device__ and __constant__ variables, along with surface and
+    textures, might be initialized by host, hence "externally initialized".
+    Therefore they must be emitted even if they are not referenced.
+
+    The attribute corresponds to the attribute on LLVM with the same name.
+  }];
+}
+
 def CIR_ASTVarDeclAttr : CIR_AST<"VarDecl", "var.decl", [
   ASTVarDeclInterface
 ]>;
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index f688795a1616b..4486bee268b82 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -252,6 +252,7 @@ struct MissingFeatures {
   static bool ctorConstLvalueToRvalueConversion() { return false; }
   static bool ctorMemcpyizer() { return false; }
   static bool cudaSupport() { return false; }
+  static bool offloadRegistration() { return false; }
   static bool dataLayoutTypeIsSized() { return false; }
   static bool dataLayoutTypeAllocSize() { return false; }
   static bool dataLayoutTypeStoreSize() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
index 8b8e99023eceb..cd41d40cff595 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
@@ -64,6 +64,9 @@ class CIRGenNVCUDARuntime : public CIRGenCUDARuntime {
 
   void emitDeviceStub(CIRGenFunction &cgf, cir::FuncOp fn,
                       FunctionArgList &args) override;
+
+  void internalizeDeviceSideVar(const VarDecl *vd,
+                                cir::GlobalLinkageKind &linkage) override;
 };
 
 } // namespace
@@ -342,3 +345,23 @@ mlir::Operation *CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn,
 
   return globalOp;
 }
+
+void CIRGenNVCUDARuntime::internalizeDeviceSideVar(
+    const VarDecl *d, cir::GlobalLinkageKind &linkage) {
+  if (cgm.getLangOpts().GPURelocatableDeviceCode)
+    cgm.errorNYI("GPU Relocatable Deviced Code (RDC)");
+
+  // __shared__ variables are odd. Shadows do get created, but
+  // they are not registered with the CUDA runtime, so they
+  // can't really be used to access their device-side
+  // counterparts. It's not clear yet whether it's nvcc's bug or
+  // a feature, but we've got to do the same for compatibility.
+  if (d->hasAttr<CUDADeviceAttr>() || d->hasAttr<CUDAConstantAttr>() ||
+      d->hasAttr<CUDASharedAttr>()) {
+    linkage = cir::GlobalLinkageKind::InternalLinkage;
+  }
+
+  if (d->getType()->isCUDADeviceBuiltinSurfaceType() ||
+      d->getType()->isCUDADeviceBuiltinTextureType())
+    cgm.errorNYI("CUDA Surface/Texture support");
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h
index ba33602511e3b..589321d66c2b1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h
@@ -47,6 +47,10 @@ class CIRGenCUDARuntime {
   virtual mlir::Operation *getKernelHandle(cir::FuncOp fn, GlobalDecl gd) = 0;
 
   virtual mlir::Operation *getKernelStub(mlir::Operation *handle) = 0;
+
+  /// Adjust linkage of shadow variables in host compilation.
+  virtual void internalizeDeviceSideVar(const VarDecl *vd,
+                                        cir::GlobalLinkageKind &linkage) = 0;
 };
 
 CIRGenCUDARuntime *createNVCUDARuntime(CIRGenModule &cgm);
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index b06d108d15f02..25181f272b19c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -1031,6 +1031,29 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd,
   cir::GlobalLinkageKind linkage =
       getCIRLinkageVarDefinition(vd, /*IsConstant=*/false);
 
+  // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+  // the device. [...]"
+  // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+  // __device__, declares a variable that: [...]
+  // Is accessible from all the threads within the grid and from the host
+  // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+  // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
+  if (langOpts.CUDA) {
+    if (langOpts.CUDAIsDevice) {
+      if (linkage != cir::GlobalLinkageKind::InternalLinkage &&
+          (vd->hasAttr<CUDADeviceAttr>() || vd->hasAttr<CUDAConstantAttr>() ||
+           vd->getType()->isCUDADeviceBuiltinSurfaceType() ||
+           vd->getType()->isCUDADeviceBuiltinTextureType())) {
+        gv->setAttr(cir::ExternallyInitializedAttrAttr::getMnemonic(),
+                    cir::ExternallyInitializedAttrAttr::get(&getMLIRContext()));
+      } else {
+        getCUDARuntime().internalizeDeviceSideVar(vd, linkage);
+      }
+    }
+
+    assert(!cir::MissingFeatures::offloadRegistration());
+  }
+
   // Set CIR linkage and DLL storage class.
   gv.setLinkage(linkage);
   // FIXME(cir): setLinkage should likely set MLIR's visibility automatically.
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 3a19cd5ecdb9e..15a74e5a55459 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2507,6 +2507,12 @@ void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp(
   mlir::SymbolRefAttr comdatAttr = getComdatAttr(op, rewriter);
 
   SmallVector<mlir::NamedAttribute> attributes;
+
+  if (auto extInit =
+          op->getAttr(cir::ExternallyInitializedAttrAttr::getMnemonic()))
+    attributes.push_back(rewriter.getNamedAttr("externally_initialized",
+                                               rewriter.getUnitAttr()));
+
   mlir::LLVM::GlobalOp newGlobalOp =
       rewriter.replaceOpWithNewOp<mlir::LLVM::GlobalOp>(
           op, llvmType, isConst, linkage, symbol, nullptr, alignment, addrSpace,
@@ -2568,6 +2574,10 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite(
   const StringRef symbol = op.getSymName();
   SmallVector<mlir::NamedAttribute> attributes;
 
+  if (op->getAttr(cir::ExternallyInitializedAttrAttr::getMnemonic()))
+    attributes.push_back(rewriter.getNamedAttr("externally_initialized",
+                                               rewriter.getUnitAttr()));
+
   if (init.has_value()) {
     if (mlir::isa<cir::FPAttr, cir::IntAttr, cir::BoolAttr>(init.value())) {
       GlobalInitAttrRewriter initRewriter(llvmType, rewriter);
diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
index 49dacf1200a2d..c3c19cbabd053 100644
--- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu
+++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
@@ -21,25 +21,25 @@
 
 // CIR-PRE: cir.global external  lang_address_space(offload_global) @i = #cir.int<0> : !s32i
 // CIR-POST: cir.global external  target_address_space(1) @i = #cir.int<0> : !s32i
-// CIR-LLVM-DAG: @i = addrspace(1) global i32 0, align 4
+// CIR-LLVM-DAG: @i = addrspace(1) externally_initialized global i32 0, align 4
 // OGCG-DAG: @i = addrspace(1) externally_initialized global i32 0, align 4
 __device__ int i;
 
 // CIR-PRE: cir.global constant external  lang_address_space(offload_constant) @j = #cir.int<0> : !s32i
 // CIR-POST: cir.global constant external  target_address_space(4) @j = #cir.int<0> : !s32i
-// CIR-LLVM-DAG: @j = addrspace(4) constant i32 0, align 4
+// CIR-LLVM-DAG: @j = addrspace(4) externally_initialized constant i32 0, align 4
 // OGCG-DAG: @j = addrspace(4) externally_initialized constant i32 0, align 4
 __constant__ int j;
 
-// CIR-PRE: cir.global external  lang_address_space(offload_local) @k = #cir.poison : !s32i
-// CIR-POST: cir.global external  target_address_space(3) @k = #cir.poison : !s32i
-// CIR-LLVM-DAG: @k = addrspace(3) global i32 poison, align 4
+// CIR-PRE: cir.global "private" internal dso_local  lang_address_space(offload_local) @k = #cir.poison : !s32i
+// CIR-POST: cir.global "private" internal dso_local  target_address_space(3) @k = #cir.poison : !s32i
+// CIR-LLVM-DAG: @k = internal addrspace(3) global i32 poison, align 4
 // OGCG-DAG: @k = addrspace(3) global i32 undef, align 4
 __shared__ int k;
 
-// CIR-PRE: cir.global external  lang_address_space(offload_local) @b = #cir.poison : !cir.float
-// CIR-POST: cir.global external  target_address_space(3) @b = #cir.poison : !cir.float
-// CIR-LLVM-DAG: @b = addrspace(3) global float poison, align 4
+// CIR-PRE: cir.global "private" internal dso_local  lang_address_space(offload_local) @b = #cir.poison : !cir.float
+// CIR-POST: cir.global "private" internal dso_local  target_address_space(3) @b = #cir.poison : !cir.float
+// CIR-LLVM-DAG: @b = internal addrspace(3) global float poison, align 4
 // OGCG-DAG: @b = addrspace(3) global float undef, align 4
 __shared__ float b;
 



More information about the llvm-branch-commits mailing list