[clang] [CIR][CUDA] Handle local, __device__, and __shared__ variables (PR #184248)
Zaky Hermawan via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 5 11:46:09 PST 2026
https://github.com/ZakyHermawan updated https://github.com/llvm/llvm-project/pull/184248
>From 809121a3d2fe82e146da5cc8177e134b862b2a01 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Tue, 3 Mar 2026 05:16:23 +0700
Subject: [PATCH 1/2] [CIR][CUDA] Handle __device__ and __shared__ variables
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/lib/CIR/CodeGen/CIRGenDecl.cpp | 14 ++--
clang/lib/CIR/CodeGen/CIRGenModule.cpp | 53 ++++++++++++-
clang/lib/CIR/CodeGen/CIRGenModule.h | 10 +++
clang/lib/CIR/CodeGen/TargetInfo.cpp | 9 +++
clang/lib/CIR/CodeGen/TargetInfo.h | 7 ++
clang/test/CIR/CodeGenCUDA/address-spaces.cu | 78 ++++++++++++++++++++
clang/test/CIR/CodeGenCUDA/global-vars.cu | 47 ++++++++++++
7 files changed, 206 insertions(+), 12 deletions(-)
create mode 100644 clang/test/CIR/CodeGenCUDA/address-spaces.cu
create mode 100644 clang/test/CIR/CodeGenCUDA/global-vars.cu
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index bb3117dfb2c98..b19e48d0f51d4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -433,12 +433,15 @@ CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &d,
mlir::Type lty = getTypes().convertTypeForMem(ty);
assert(!cir::MissingFeatures::addressSpace());
- if (d.hasAttr<LoaderUninitializedAttr>() || d.hasAttr<CUDASharedAttr>())
+ mlir::Attribute init = nullptr;
+ if (d.hasAttr<LoaderUninitializedAttr>())
errorNYI(d.getSourceRange(),
"getOrCreateStaticVarDecl: LoaderUninitializedAttr");
- assert(!cir::MissingFeatures::addressSpace());
+ else if (ty.getAddressSpace() != LangAS::opencl_local &&
+ !d.hasAttr<CUDASharedAttr>())
+ init = builder.getZeroInitAttr(convertType(ty));
- mlir::Attribute init = builder.getZeroInitAttr(convertType(ty));
+ assert(!cir::MissingFeatures::addressSpace());
cir::GlobalOp gv = builder.createVersionedGlobal(
getModule(), getLoc(d.getLocation()), name, lty, false, linkage);
@@ -665,11 +668,6 @@ void CIRGenFunction::emitStaticVarDecl(const VarDecl &d,
var.setAlignment(alignment.getAsAlign().value());
- // There are a lot of attributes that need to be handled here. Until
- // we start to support them, we just report an error if there are any.
- if (d.hasAttrs())
- cgm.errorNYI(d.getSourceRange(), "static var with attrs");
-
if (cgm.getCodeGenOpts().KeepPersistentStorageVariables)
cgm.errorNYI(d.getSourceRange(), "static var keep persistent storage");
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 223b53731359a..1517058af8782 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -797,6 +797,22 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty,
"external const declaration with initializer");
}
+ // TODO(cir): if this method is used to handle functions we must have
+ // something closer to GlobalValue::isDeclaration instead of checking for
+ // initializer.
+ if (gv.isDeclaration()) {
+ // TODO(cir): set target attributes
+
+ // External HIP managed variables needed to be recorded for transformation
+ // in both device and host compilations.
+ // External HIP managed variables needed to be recorded for transformation
+ // in both device and host compilations.
+ if (getLangOpts().CUDA && d && d->hasAttr<HIPManagedAttr>() &&
+ d->hasExternalStorage())
+ llvm_unreachable("NYI");
+ }
+
+ // TODO(cir): address space cast when needed for DAddrSpace.
return gv;
}
@@ -947,10 +963,6 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd,
errorNYI(vd->getSourceRange(), "annotate global variable");
}
- if (langOpts.CUDA) {
- errorNYI(vd->getSourceRange(), "CUDA global variable");
- }
-
// Set initializer and finalize emission
CIRGenModule::setInitializer(gv, init);
if (emitter)
@@ -1563,6 +1575,39 @@ CIRGenModule::getAddrOfConstantStringFromLiteral(const StringLiteral *s,
return builder.getGlobalViewAttr(ptrTy, gv);
}
+LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *d) {
+ if (langOpts.OpenCL) {
+ LangAS as = d ? d->getType().getAddressSpace() : LangAS::opencl_global;
+ assert(as == LangAS::opencl_global || as == LangAS::opencl_global_device ||
+ as == LangAS::opencl_global_host || as == LangAS::opencl_constant ||
+ as == LangAS::opencl_local || as >= LangAS::FirstTargetAddressSpace);
+ return as;
+ }
+
+ if (langOpts.SYCLIsDevice &&
+ (!d || d->getType().getAddressSpace() == LangAS::Default))
+ llvm_unreachable("NYI");
+
+ if (langOpts.CUDA && langOpts.CUDAIsDevice) {
+ if (d) {
+ if (d->hasAttr<CUDAConstantAttr>())
+ return LangAS::cuda_constant;
+ if (d->hasAttr<CUDASharedAttr>())
+ return LangAS::cuda_shared;
+ if (d->hasAttr<CUDADeviceAttr>())
+ return LangAS::cuda_device;
+ if (d->getType().isConstQualified())
+ return LangAS::cuda_constant;
+ }
+ return LangAS::cuda_device;
+ }
+
+ if (langOpts.OpenMP)
+ llvm_unreachable("NYI");
+
+ return getTargetCIRGenInfo().getGlobalVarAddressSpace(*this, d);
+}
+
// TODO(cir): this could be a common AST helper for both CIR and LLVM codegen.
LangAS CIRGenModule::getLangTempAllocaAddressSpace() const {
if (getLangOpts().OpenCL)
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 52464a8bc30c4..d9173234868ee 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -359,6 +359,16 @@ class CIRGenModule : public CIRGenTypeCache {
getAddrOfConstantStringFromLiteral(const StringLiteral *s,
llvm::StringRef name = ".str");
+ /// Return the AST address space of the underlying global variable for D, as
+ /// determined by its declaration. Normally this is the same as the address
+ /// space of D's type, but in CUDA, address spaces are associated with
+ /// declarations, not types. If D is nullptr, return the default address
+ /// space for global variable.
+ ///
+ /// For languages without explicit address spaces, if D has default address
+ /// space, target-specific global or constant address space may be returned.
+ LangAS getGlobalVarAddressSpace(const VarDecl *d);
+
/// Returns the address space for temporary allocations in the language. This
/// ensures that the allocated variable's address space matches the
/// expectations of the AST, rather than using the target's allocation address
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp
index 2f3824d3d47a7..70ffb46050ea1 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp
@@ -91,3 +91,12 @@ bool TargetCIRGenInfo::isNoProtoCallVariadic(
// For everything else, we just prefer false unless we opt out.
return false;
}
+
+clang::LangAS
+TargetCIRGenInfo::getGlobalVarAddressSpace(CIRGenModule &cgm,
+ const clang::VarDecl *d) const {
+ assert(!cgm.getLangOpts().OpenCL &&
+ !(cgm.getLangOpts().CUDA && cgm.getLangOpts().CUDAIsDevice) &&
+ "Address space agnostic languages only");
+ return d ? d->getType().getAddressSpace() : LangAS::Default;
+}
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h
index f4792d5309e36..8db2cbbce5d23 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.h
+++ b/clang/lib/CIR/CodeGen/TargetInfo.h
@@ -49,6 +49,13 @@ class TargetCIRGenInfo {
/// Returns ABI info helper for the target.
const ABIInfo &getABIInfo() const { return *info; }
+ /// Get target favored AST address space of a global variable for languages
+ /// other than OpenCL and CUDA.
+ /// If \p d is nullptr, returns the default target favored address space
+ /// for global variable.
+ virtual clang::LangAS getGlobalVarAddressSpace(CIRGenModule &cgm,
+ const clang::VarDecl *d) const;
+
/// Get the address space for alloca.
virtual mlir::ptr::MemorySpaceAttrInterface getCIRAllocaAddressSpace() const {
return cir::LangAddressSpaceAttr::get(&info->cgt.getMLIRContext(),
diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
new file mode 100644
index 0000000000000..68905a6616ca7
--- /dev/null
+++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
@@ -0,0 +1,78 @@
+#include "Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN: -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN: -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
+// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+__global__ void fn() {
+ int i = 0;
+ __shared__ int j;
+ j = i;
+}
+
+// CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j : !s32i
+// CIR-DEVICE: cir.func {{.*}}@_Z2fnv() {{.*}} {
+// CIR-DEVICE: %[[I:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init]
+// CIR-DEVICE: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-DEVICE: cir.store {{.*}}%[[ZERO]], %[[I]] : !s32i, !cir.ptr<!s32i>
+// CIR-DEVICE: %[[J:.*]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr<!s32i>
+// CIR-DEVICE: %[[VAL:.*]] = cir.load {{.*}}%[[I]] : !cir.ptr<!s32i>, !s32i
+// CIR-DEVICE: cir.store {{.*}}%[[VAL]], %[[J]] : !s32i, !cir.ptr<!s32i>
+// CIR-DEVICE: cir.return
+
+// CIR-HOST: cir.func private dso_local @__cudaPopCallConfiguration
+// CIR-HOST: cir.func private dso_local @cudaLaunchKernel
+// CIR-HOST: cir.func {{.*}}@_Z17__device_stub__fnv()
+
+// LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4
+// LLVM-DEVICE: define dso_local void @_Z2fnv()
+// LLVM-DEVICE: %[[ALLOCA:.*]] = alloca i32, i64 1, align 4
+// LLVM-DEVICE: store i32 0, ptr %[[ALLOCA]], align 4
+// LLVM-DEVICE: %[[VAL:.*]] = load i32, ptr %[[ALLOCA]], align 4
+// LLVM-DEVICE: store i32 %[[VAL]], ptr @_ZZ2fnvE1j, align 4
+// LLVM-DEVICE: ret void
+
+// LLVM-HOST: %struct.dim3 = type { i32, i32, i32 }
+// LLVM-HOST: declare {{.*}}i32 @__cudaPopCallConfiguration(ptr, ptr, ptr, ptr)
+// LLVM-HOST: declare {{.*}}i32 @cudaLaunchKernel(ptr, %struct.dim3, %struct.dim3, ptr, i64, ptr)
+// LLVM-HOST: define dso_local void @_Z17__device_stub__fnv()
+
+// OGCG-HOST: define dso_local void @_Z17__device_stub__fnv()
+// OGCG-HOST: entry:
+// OGCG-HOST: call i32 @__cudaPopCallConfiguration
+// OGCG-HOST: call {{.*}}i32 @cudaLaunchKernel
+
+// OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4
+// OGCG-DEVICE: define dso_local ptx_kernel void @_Z2fnv()
+// OGCG-DEVICE: entry:
+// OGCG-DEVICE: %[[I:.*]] = alloca i32, align 4
+// OGCG-DEVICE: store i32 0, ptr %[[I]], align 4
+// OGCG-DEVICE: %[[VAL:.*]] = load i32, ptr %[[I]], align 4
+// OGCG-DEVICE: store i32 %[[VAL]], ptr addrspacecast (ptr addrspace(3) @_ZZ2fnvE1j to ptr), align 4
+// OGCG-DEVICE: ret void
diff --git a/clang/test/CIR/CodeGenCUDA/global-vars.cu b/clang/test/CIR/CodeGenCUDA/global-vars.cu
new file mode 100644
index 0000000000000..f497d0e7f5f64
--- /dev/null
+++ b/clang/test/CIR/CodeGenCUDA/global-vars.cu
@@ -0,0 +1,47 @@
+#include "Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN: -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN: -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
+// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN: -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+__shared__ int a;
+// CIR-DEVICE: cir.global external [[SHARED:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64}
+// CIR-HOST: cir.global external [[SHARED_HOST:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64}
+// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 0, align 4
+// LLVM-HOST: @[[SHARED_LH:.*]] = global i32 0, align 4
+// OGCG-DEVICE: @[[SHARED_OD:.*]] = addrspace(3) global i32 undef, align 4
+// OGCG-HOST: @[[SHARED_OH:.*]] = internal global i32 undef, align 4
+
+__device__ int b;
+// CIR-DEVICE: cir.global external [[DEV:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64}
+// CIR-HOST: cir.global external [[DEV_HOST:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64}
+// LLVM-DEVICE: @[[DEV_LD:.*]] = global i32 0, align 4
+// LLVM-HOST: @[[DEV_LH:.*]] = global i32 0, align 4
+// OGCG-HOST: @[[DEV_OH:.*]] = internal global i32 undef, align 4
+// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global i32 0, align 4
>From 512dd3acf7be7d06505243603fa2af9106a2b58b Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Fri, 6 Mar 2026 02:39:06 +0700
Subject: [PATCH 2/2] [CIR][CUDA] handle __constant__ variable Remove CIR-HOST
LLVM-HOST and OGCG-HOST from global-vars.cu because shadow variables did not
handled properly, yet Make few changes to handle __device__, __shared__, and
__constant__ global variables using reference from OGCG Create and call a
hook (setTargetAttributes) if the variable is global and declaration only.
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
.../clang/CIR/Dialect/IR/CIRCUDAAttrs.td | 14 +++-
clang/lib/CIR/CodeGen/CIRGenDecl.cpp | 22 ++++++
clang/lib/CIR/CodeGen/CIRGenModule.cpp | 68 ++++++++++++++-----
clang/lib/CIR/CodeGen/TargetInfo.h | 9 +++
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 7 ++
clang/test/CIR/CodeGenCUDA/global-vars.cu | 39 ++++-------
6 files changed, 115 insertions(+), 44 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
index cf6635fc893fa..257cf396abce7 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
@@ -36,5 +36,17 @@ def CIR_CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName", "cu.kernel_name"> {
let assemblyFormat = "`<` $kernel_name `>`";
}
+def CUDAExternallyInitializedAttr : CIR_Attr<"CUDAExternallyInitialized",
+ "cu.externally_initialized"> {
+ let summary = "The marked variable is externally initialized.";
+ let description =
+ [{
+ CUDA __device__ and __constant__ variables, along with surface and
+ textures, might be initialized by host, hence "externally initialized".
+ Therefore they must be emitted even if they are not referenced.
+
+ The attribute corresponds to the attribute on LLVM with the same name.
+ }];
+}
-#endif // CLANG_CIR_DIALECT_IR_CIRCUDAATTRS_TD
\ No newline at end of file
+#endif // CLANG_CIR_DIALECT_IR_CIRCUDAATTRS_TD
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index b19e48d0f51d4..a636c07876964 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -14,10 +14,12 @@
#include "CIRGenFunction.h"
#include "mlir/IR/Location.h"
#include "clang/AST/Attr.h"
+#include "clang/AST/Attrs.inc"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclOpenACC.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
+#include "clang/Basic/Cuda.h"
#include "clang/CIR/MissingFeatures.h"
using namespace clang;
@@ -668,6 +670,26 @@ void CIRGenFunction::emitStaticVarDecl(const VarDecl &d,
var.setAlignment(alignment.getAsAlign().value());
+ // There are a lot of attributes that need to be handled here. Until
+ // we start to support them, we just report an error if there are any.
+ if (d.hasAttr<AnnotateAttr>())
+ cgm.errorNYI(d.getSourceRange(), "Global annotations are NYI");
+ if (d.getAttr<PragmaClangBSSSectionAttr>())
+ cgm.errorNYI(d.getSourceRange(), "CIR global BSS section attribute is NYI");
+ if (d.getAttr<PragmaClangDataSectionAttr>())
+ cgm.errorNYI(d.getSourceRange(),
+ "CIR global Data section attribute is NYI");
+ if (d.getAttr<PragmaClangRodataSectionAttr>())
+ cgm.errorNYI(d.getSourceRange(),
+ "CIR global Rodata section attribute is NYI");
+ if (d.getAttr<PragmaClangRelroSectionAttr>())
+ cgm.errorNYI(d.getSourceRange(),
+ "CIR global Relro section attribute is NYI");
+
+ if (d.getAttr<SectionAttr>())
+ cgm.errorNYI(d.getSourceRange(),
+ "CIR global object file section attribute is NYI");
+
if (cgm.getCodeGenOpts().KeepPersistentStorageVariables)
cgm.errorNYI(d.getSourceRange(), "static var keep persistent storage");
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 1517058af8782..bd4d2d4e5c1a5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -18,6 +18,7 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTLambda.h"
+#include "clang/AST/Attrs.inc"
#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclOpenACC.h"
#include "clang/AST/GlobalDecl.h"
@@ -797,22 +798,19 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty,
"external const declaration with initializer");
}
- // TODO(cir): if this method is used to handle functions we must have
- // something closer to GlobalValue::isDeclaration instead of checking for
- // initializer.
- if (gv.isDeclaration()) {
+ if (d &&
+ d->isThisDeclarationADefinition(astContext) == VarDecl::DeclarationOnly) {
+ getTargetCIRGenInfo().setTargetAttributes(d, gv, *this);
// TODO(cir): set target attributes
-
- // External HIP managed variables needed to be recorded for transformation
- // in both device and host compilations.
// External HIP managed variables needed to be recorded for transformation
// in both device and host compilations.
if (getLangOpts().CUDA && d && d->hasAttr<HIPManagedAttr>() &&
d->hasExternalStorage())
- llvm_unreachable("NYI");
+ errorNYI(d->getSourceRange(), "HIP managed attribute");
}
// TODO(cir): address space cast when needed for DAddrSpace.
+ assert(!cir::MissingFeatures::addressSpace());
return gv;
}
@@ -896,9 +894,18 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd,
assert(!cir::MissingFeatures::cudaSupport());
- if (vd->hasAttr<LoaderUninitializedAttr>()) {
+ // CUDA E.2.4.1 "__shared__ variables cannot have an initialization
+ // as part of their declaration." Sema has already checked for
+ // error cases, so we just need to set Init to UndefValue.
+ bool isCUDASharedVar =
+ getLangOpts().CUDAIsDevice && vd->hasAttr<CUDASharedAttr>();
+ // TODO(cir): implement isCUDAShadowVar and isCUDADeviceShadowVar, reference:
+ // OGCG
+
+ if (getLangOpts().CUDA && isCUDASharedVar) {
+ init = cir::UndefAttr::get(&getMLIRContext(), convertType(vd->getType()));
+ } else if (vd->hasAttr<LoaderUninitializedAttr>()) {
errorNYI(vd->getSourceRange(), "loader uninitialized attribute");
- return;
} else if (!initExpr) {
// This is a tentative definition; tentative definitions are
// implicitly initialized with { 0 }.
@@ -963,6 +970,39 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd,
errorNYI(vd->getSourceRange(), "annotate global variable");
}
+ // Set CIR's linkage type as appropriate.
+ cir::GlobalLinkageKind linkage =
+ getCIRLinkageVarDefinition(vd, /*IsConstant=*/false);
+
+ // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+ // the device. [...]"
+ // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+ // __device__, declares a variable that: [...]
+ // Is accessible from all the threads within the grid and from the host
+ // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+ // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
+ if (langOpts.CUDA) {
+ if (langOpts.CUDAIsDevice) {
+ // __shared__ variables is not marked as externally initialized,
+ // because they must not be initialized.
+ if (linkage != cir::GlobalLinkageKind::InternalLinkage &&
+ !vd->isConstexpr() && !vd->getType().isConstQualified() &&
+ (vd->hasAttr<CUDADeviceAttr>() || vd->hasAttr<CUDAConstantAttr>() ||
+ vd->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ vd->getType()->isCUDADeviceBuiltinTextureType())) {
+ gv->setAttr(cir::CUDAExternallyInitializedAttr::getMnemonic(),
+ cir::CUDAExternallyInitializedAttr::get(&getMLIRContext()));
+ }
+ } else {
+ // TODO(cir):
+ // Adjust linkage of shadow variables in host compilation
+ // getCUDARuntime().internalizeDeviceSideVar(vd, linkage);
+ }
+ // TODO(cir):
+ // Handle variable registration
+ // getCUDARuntime().handleVarRegistration(vd, gv);
+ }
+
// Set initializer and finalize emission
CIRGenModule::setInitializer(gv, init);
if (emitter)
@@ -977,10 +1017,6 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd,
/*ExcludeDtor=*/true)));
assert(!cir::MissingFeatures::opGlobalSection());
- // Set CIR's linkage type as appropriate.
- cir::GlobalLinkageKind linkage =
- getCIRLinkageVarDefinition(vd, /*IsConstant=*/false);
-
// Set CIR linkage and DLL storage class.
gv.setLinkage(linkage);
// FIXME(cir): setLinkage should likely set MLIR's visibility automatically.
@@ -1586,7 +1622,7 @@ LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *d) {
if (langOpts.SYCLIsDevice &&
(!d || d->getType().getAddressSpace() == LangAS::Default))
- llvm_unreachable("NYI");
+ errorNYI(d->getSourceRange(), "global as for SYCL device");
if (langOpts.CUDA && langOpts.CUDAIsDevice) {
if (d) {
@@ -1603,7 +1639,7 @@ LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *d) {
}
if (langOpts.OpenMP)
- llvm_unreachable("NYI");
+ errorNYI(d->getSourceRange(), "global as for OpenMP");
return getTargetCIRGenInfo().getGlobalVarAddressSpace(*this, d);
}
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h
index 8db2cbbce5d23..9ba155b220fbc 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.h
+++ b/clang/lib/CIR/CodeGen/TargetInfo.h
@@ -106,6 +106,15 @@ class TargetCIRGenInfo {
/// right thing when calling a function with no know signature.
virtual bool isNoProtoCallVariadic(const FunctionNoProtoType *fnType) const;
+ /// Provides a convenient hook to handle extra target-specific attributes
+ /// for the given global.
+ /// In OG, the function receives an llvm::GlobalValue. However, functions
+ /// and global variables are separate types in Clang IR, so we use a general
+ /// mlir::Operation*.
+ virtual void setTargetAttributes(const clang::Decl *decl,
+ mlir::Operation *global,
+ CIRGenModule &module) const {}
+
virtual bool isScalarizableAsmOperand(CIRGenFunction &cgf,
mlir::Type ty) const {
return false;
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 03085ad29ab78..eda07dab4d97b 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2557,6 +2557,13 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite(
const StringRef symbol = op.getSymName();
SmallVector<mlir::NamedAttribute> attributes;
+ // Mark externally_initialized for __device__ and __constant__
+ if (auto extInit =
+ op->getAttr(CUDAExternallyInitializedAttr::getMnemonic())) {
+ attributes.push_back(rewriter.getNamedAttr("externally_initialized",
+ rewriter.getUnitAttr()));
+ }
+
if (init.has_value()) {
if (mlir::isa<cir::FPAttr, cir::IntAttr, cir::BoolAttr>(init.value())) {
GlobalInitAttrRewriter initRewriter(llvmType, rewriter);
diff --git a/clang/test/CIR/CodeGenCUDA/global-vars.cu b/clang/test/CIR/CodeGenCUDA/global-vars.cu
index f497d0e7f5f64..4791f145d1bae 100644
--- a/clang/test/CIR/CodeGenCUDA/global-vars.cu
+++ b/clang/test/CIR/CodeGenCUDA/global-vars.cu
@@ -5,43 +5,28 @@
// RUN: -I%S/Inputs/ %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
-// RUN: -x cuda -emit-cir -target-sdk-version=12.3 \
-// RUN: -I%S/Inputs/ %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
-
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
// RUN: -I%S/Inputs/ %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
-// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
-// RUN: -I%S/Inputs/ %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
-
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
// RUN: -I%S/Inputs/ %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
-// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
-// RUN: -I%S/Inputs/ %s -o %t.ll
-// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
-__shared__ int a;
-// CIR-DEVICE: cir.global external [[SHARED:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64}
-// CIR-HOST: cir.global external [[SHARED_HOST:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64}
-// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 0, align 4
-// LLVM-HOST: @[[SHARED_LH:.*]] = global i32 0, align 4
+__device__ int a;
+// CIR-DEVICE: cir.global external @[[DEV:.*]] = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized}
+// LLVM-DEVICE: @[[DEV_LD:.*]] = externally_initialized global i32 0, align 4
+// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global i32 0, align 4
+
+__shared__ int b;
+// CIR-DEVICE: cir.global external @[[SHARED:.*]] = #cir.undef : !s32i {alignment = 4 : i64}
+// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 undef, align 4
// OGCG-DEVICE: @[[SHARED_OD:.*]] = addrspace(3) global i32 undef, align 4
-// OGCG-HOST: @[[SHARED_OH:.*]] = internal global i32 undef, align 4
-__device__ int b;
-// CIR-DEVICE: cir.global external [[DEV:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64}
-// CIR-HOST: cir.global external [[DEV_HOST:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64}
-// LLVM-DEVICE: @[[DEV_LD:.*]] = global i32 0, align 4
-// LLVM-HOST: @[[DEV_LH:.*]] = global i32 0, align 4
-// OGCG-HOST: @[[DEV_OH:.*]] = internal global i32 undef, align 4
-// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global i32 0, align 4
+__constant__ int c;
+// CIR-DEVICE: cir.global constant external @[[CONST:.*]] = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized}
+// LLVM-DEVICE: @[[CONST_LL:.*]] = externally_initialized constant i32 0, align 4
+// OGCG-DEVICE: @[[CONST_OD:.*]] = addrspace(4) externally_initialized constant i32 0, align 4
More information about the cfe-commits
mailing list