[clang] [CIR][CUDA][HIP] Add NVPTX target info and CUDA/HIP global emission filtering (PR #177827)
David Rivera via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 3 02:32:28 PST 2026
https://github.com/RiverDave updated https://github.com/llvm/llvm-project/pull/177827
>From fed5831729bf024045e3ee3018ddbaa21a44657f Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Sat, 24 Jan 2026 23:56:28 -0500
Subject: [PATCH 1/7] [CIR][CUDA] Add NVPTX target info and CUDA/HIP global
emission filtering
---
clang/lib/CIR/CodeGen/CIRGenModule.cpp | 66 +++++++++++++++++++++++
clang/lib/CIR/CodeGen/CIRGenModule.h | 4 ++
clang/lib/CIR/CodeGen/TargetInfo.cpp | 19 +++++++
clang/lib/CIR/CodeGen/TargetInfo.h | 2 +
clang/test/CIR/CodeGenCUDA/filter-decl.cu | 37 +++++++++++++
clang/test/CIR/CodeGenCUDA/nvptx-basic.cu | 30 +++++++++++
6 files changed, 158 insertions(+)
create mode 100644 clang/test/CIR/CodeGenCUDA/filter-decl.cu
create mode 100644 clang/test/CIR/CodeGenCUDA/nvptx-basic.cu
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index fd99a47b8b445..1675c34b25c8a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -18,6 +18,7 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclBase.h"
+#include "clang/AST/ASTLambda.h"
#include "clang/AST/DeclOpenACC.h"
#include "clang/AST/GlobalDecl.h"
#include "clang/AST/RecordLayout.h"
@@ -29,6 +30,7 @@
#include "clang/CIR/MissingFeatures.h"
#include "CIRGenFunctionInfo.h"
+#include "TargetInfo.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Location.h"
#include "mlir/IR/MLIRContext.h"
@@ -252,6 +254,10 @@ const TargetCIRGenInfo &CIRGenModule::getTargetCIRGenInfo() {
return *theTargetCIRGenInfo;
}
}
+ case llvm::Triple::nvptx:
+ case llvm::Triple::nvptx64:
+ theTargetCIRGenInfo = createNVPTXTargetCIRGenInfo(genTypes);
+ return *theTargetCIRGenInfo;
}
}
@@ -368,6 +374,35 @@ void CIRGenModule::emitDeferred() {
}
}
+template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *decl) {
+ if (!decl)
+ return false;
+ if (auto *attr = decl->getAttr<AttrT>())
+ return attr->isImplicit();
+ return decl->isImplicit();
+}
+
+// This function returns true if M is a specialization, a template,
+// or a non-generic lambda call operator.
+inline bool isLambdaCallOperator(const CXXMethodDecl *MD) {
+ const CXXRecordDecl *LambdaClass = MD->getParent();
+ if (!LambdaClass || !LambdaClass->isLambda()) return false;
+ return MD->getOverloadedOperator() == OO_Call;
+}
+
+bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const {
+ assert(langOpts.CUDA && "Should not be called by non-CUDA languages");
+ // We need to emit host-side 'shadows' for all global
+ // device-side variables because the CUDA runtime needs their
+ // size and host-side address in order to provide access to
+ // their device-side incarnations.
+ return !langOpts.CUDAIsDevice || global->hasAttr<CUDADeviceAttr>() ||
+ global->hasAttr<CUDAConstantAttr>() ||
+ global->hasAttr<CUDASharedAttr>() ||
+ global->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ global->getType()->isCUDADeviceBuiltinTextureType();
+}
+
void CIRGenModule::emitGlobal(clang::GlobalDecl gd) {
if (const auto *cd = dyn_cast<clang::OpenACCConstructDecl>(gd.getDecl())) {
emitGlobalOpenACCDecl(cd);
@@ -382,6 +417,35 @@ void CIRGenModule::emitGlobal(clang::GlobalDecl gd) {
const auto *global = cast<ValueDecl>(gd.getDecl());
+ // If this is CUDA, be selective about which declarations we emit.
+ // Non-constexpr non-lambda implicit host device functions are not emitted
+ // unless they are used on device side.
+ if (langOpts.CUDA) {
+ assert((isa<FunctionDecl>(global) || isa<VarDecl>(global)) &&
+ "Expected Variable or Function");
+ if (const auto *varDecl = dyn_cast<VarDecl>(global)) {
+ if (!shouldEmitCUDAGlobalVar(varDecl))
+ return;
+ } else if (langOpts.CUDAIsDevice) {
+ const auto *functionDecl = dyn_cast<FunctionDecl>(global);
+ if ((!global->hasAttr<CUDADeviceAttr>() ||
+ (langOpts.OffloadImplicitHostDeviceTemplates &&
+ hasImplicitAttr<CUDAHostAttr>(functionDecl) &&
+ hasImplicitAttr<CUDADeviceAttr>(functionDecl) &&
+ !functionDecl->isConstexpr() &&
+ !isLambdaCallOperator(functionDecl) &&
+ !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(
+ functionDecl))) &&
+ !global->hasAttr<CUDAGlobalAttr>() &&
+ !(langOpts.HIPStdPar && isa<FunctionDecl>(global) &&
+ !global->hasAttr<CUDAHostAttr>()))
+ return;
+ // Device-only functions are the only things we skip.
+ } else if (!global->hasAttr<CUDAHostAttr>() &&
+ global->hasAttr<CUDADeviceAttr>())
+ return;
+ }
+
if (const auto *fd = dyn_cast<FunctionDecl>(global)) {
// Update deferred annotations with the latest declaration if the function
// was already used or defined.
@@ -1997,6 +2061,8 @@ bool CIRGenModule::mayBeEmittedEagerly(const ValueDecl *global) {
return true;
}
+
+
static bool shouldAssumeDSOLocal(const CIRGenModule &cgm,
cir::CIRGlobalValueInterface gv) {
if (gv.hasLocalLinkage())
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 46ef0ad426ae1..88b66129a8348 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -567,6 +567,10 @@ class CIRGenModule : public CIRGenTypeCache {
static void setInitializer(cir::GlobalOp &op, mlir::Attribute value);
+ // Whether a global variable should be emitted by CUDA/HIP host/device
+ // related attributes.
+ bool shouldEmitCUDAGlobalVar(const VarDecl *global) const;
+
void replaceUsesOfNonProtoTypeWithRealFunction(mlir::Operation *old,
cir::FuncOp newFn);
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp
index 377c532e492d9..dc29dc0204c19 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp
@@ -56,6 +56,25 @@ class X8664TargetCIRGenInfo : public TargetCIRGenInfo {
} // namespace
+namespace {
+
+class NVPTXABIInfo : public ABIInfo {
+public:
+ NVPTXABIInfo(CIRGenTypes &cgt) : ABIInfo(cgt) {}
+};
+
+class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo {
+public:
+ NVPTXTargetCIRGenInfo(CIRGenTypes &cgt)
+ : TargetCIRGenInfo(std::make_unique<NVPTXABIInfo>(cgt)) {}
+};
+} // namespace
+
+std::unique_ptr<TargetCIRGenInfo>
+clang::CIRGen::createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt) {
+ return std::make_unique<NVPTXTargetCIRGenInfo>(cgt);
+}
+
std::unique_ptr<TargetCIRGenInfo>
clang::CIRGen::createX8664TargetCIRGenInfo(CIRGenTypes &cgt) {
return std::make_unique<X8664TargetCIRGenInfo>(cgt);
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h
index 9535ba94fb08b..bab838692e215 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.h
+++ b/clang/lib/CIR/CodeGen/TargetInfo.h
@@ -124,6 +124,8 @@ class TargetCIRGenInfo {
std::unique_ptr<TargetCIRGenInfo> createX8664TargetCIRGenInfo(CIRGenTypes &cgt);
+std::unique_ptr<TargetCIRGenInfo> createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt);
+
} // namespace clang::CIRGen
#endif // LLVM_CLANG_LIB_CIR_TARGETINFO_H
diff --git a/clang/test/CIR/CodeGenCUDA/filter-decl.cu b/clang/test/CIR/CodeGenCUDA/filter-decl.cu
new file mode 100644
index 0000000000000..ac1e7aeb4f1e1
--- /dev/null
+++ b/clang/test/CIR/CodeGenCUDA/filter-decl.cu
@@ -0,0 +1,37 @@
+// Based on clang/test/CodeGenCUDA/filter-decl.cu tailored for CIR current capabilities.
+// Tests that host/device functions are emitted only on the appropriate side.
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x cuda \
+// RUN: -I%S/../inputs -emit-cir %s -o %t.host.cir
+// RUN: FileCheck --input-file=%t.host.cir %s --check-prefix=CHECK-HOST
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
+// RUN: -I%S/../inputs -fcuda-is-device -emit-cir %s -o %t.device.cir
+// RUN: FileCheck --input-file=%t.device.cir %s --check-prefix=CHECK-DEVICE
+
+#include "cuda.h"
+
+// Implicit host function (no attribute) — host only
+// CHECK-HOST: cir.func {{.*}} @_Z20implicithostonlyfuncv()
+// CHECK-DEVICE-NOT: @_Z20implicithostonlyfuncv
+void implicithostonlyfunc(void) {}
+
+// Explicit __host__ function — host only
+// CHECK-HOST: cir.func {{.*}} @_Z20explicithostonlyfuncv()
+// CHECK-DEVICE-NOT: @_Z20explicithostonlyfuncv
+__host__ void explicithostonlyfunc(void) {}
+
+// __device__ function — device only
+// CHECK-HOST-NOT: @_Z14deviceonlyfuncv
+// CHECK-DEVICE: cir.func {{.*}} @_Z14deviceonlyfuncv()
+__device__ void deviceonlyfunc(void) {}
+
+// __host__ __device__ function — both sides
+// CHECK-HOST: cir.func {{.*}} @_Z14hostdevicefuncv()
+// CHECK-DEVICE: cir.func {{.*}} @_Z14hostdevicefuncv()
+__host__ __device__ void hostdevicefunc(void) {}
+
+// __global__ kernel — both sides (stub on host, kernel on device)
+// CHECK-HOST: cir.func {{.*}} @__device_stub__globalfunc()
+// CHECK-DEVICE: cir.func {{.*}} @_Z10globalfuncv()
+__global__ void globalfunc(void) {}
diff --git a/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu b/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu
new file mode 100644
index 0000000000000..fe2233de4d10c
--- /dev/null
+++ b/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu
@@ -0,0 +1,30 @@
+// Based on clang/test/CodeGenCUDA/ptx-kernels.cu tailored for CIR current capabilities.
+// Tests basic device-side compilation with NVPTX target.
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
+// RUN: -I%S/../inputs -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "cuda.h"
+
+// CHECK: cir.func {{.*}} @device_function()
+extern "C"
+__device__ void device_function() {}
+
+// CHECK: cir.func {{.*}} @global_function()
+// CHECK: cir.call @device_function()
+extern "C"
+__global__ void global_function() {
+ device_function();
+}
+
+// Template kernel with explicit instantiation
+template <typename T> __global__ void templated_kernel(T param) {}
+template __global__ void templated_kernel<int>(int);
+// CHECK: cir.func {{.*}} @_Z16templated_kernelIiEvT_
+
+// Anonymous namespace kernel
+namespace {
+__global__ void anonymous_ns_kernel() {}
+// CHECK: cir.func {{.*}} @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv
+}
>From ac0327a92a78249c7a0a3311c5929049355079e5 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Sun, 25 Jan 2026 00:08:46 -0500
Subject: [PATCH 2/7] le format monseiur
---
clang/lib/CIR/CodeGen/CIRGenModule.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 1675c34b25c8a..87ba3edadaff2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -17,8 +17,8 @@
#include "CIRGenFunction.h"
#include "clang/AST/ASTContext.h"
-#include "clang/AST/DeclBase.h"
#include "clang/AST/ASTLambda.h"
+#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclOpenACC.h"
#include "clang/AST/GlobalDecl.h"
#include "clang/AST/RecordLayout.h"
@@ -386,7 +386,8 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *decl) {
// or a non-generic lambda call operator.
inline bool isLambdaCallOperator(const CXXMethodDecl *MD) {
const CXXRecordDecl *LambdaClass = MD->getParent();
- if (!LambdaClass || !LambdaClass->isLambda()) return false;
+ if (!LambdaClass || !LambdaClass->isLambda())
+ return false;
return MD->getOverloadedOperator() == OO_Call;
}
@@ -2061,8 +2062,6 @@ bool CIRGenModule::mayBeEmittedEagerly(const ValueDecl *global) {
return true;
}
-
-
static bool shouldAssumeDSOLocal(const CIRGenModule &cgm,
cir::CIRGlobalValueInterface gv) {
if (gv.hasLocalLinkage())
>From da4338614a3b3fd242c2100d11bcdd0b38884f42 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Sun, 25 Jan 2026 00:46:49 -0500
Subject: [PATCH 3/7] fix nit test case
---
clang/test/CIR/CodeGenCUDA/filter-decl.cu | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/CIR/CodeGenCUDA/filter-decl.cu b/clang/test/CIR/CodeGenCUDA/filter-decl.cu
index ac1e7aeb4f1e1..c34117d6e9e71 100644
--- a/clang/test/CIR/CodeGenCUDA/filter-decl.cu
+++ b/clang/test/CIR/CodeGenCUDA/filter-decl.cu
@@ -32,6 +32,6 @@ __device__ void deviceonlyfunc(void) {}
__host__ __device__ void hostdevicefunc(void) {}
// __global__ kernel — both sides (stub on host, kernel on device)
-// CHECK-HOST: cir.func {{.*}} @__device_stub__globalfunc()
+// CHECK-HOST: cir.func {{.*}} @_Z25__device_stub__globalfuncv()
// CHECK-DEVICE: cir.func {{.*}} @_Z10globalfuncv()
__global__ void globalfunc(void) {}
>From d9fbd4aadbeaede3d1c6e88762ff2745be0a2732 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Fri, 30 Jan 2026 17:35:54 -0500
Subject: [PATCH 4/7] address comments
---
clang/lib/CIR/CodeGen/CIRGenModule.cpp | 11 +-----
clang/test/CIR/CodeGenCUDA/filter-decl.cu | 46 ++++++++++++++++-------
2 files changed, 34 insertions(+), 23 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 87ba3edadaff2..09fd2103cf673 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -382,15 +382,7 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *decl) {
return decl->isImplicit();
}
-// This function returns true if M is a specialization, a template,
-// or a non-generic lambda call operator.
-inline bool isLambdaCallOperator(const CXXMethodDecl *MD) {
- const CXXRecordDecl *LambdaClass = MD->getParent();
- if (!LambdaClass || !LambdaClass->isLambda())
- return false;
- return MD->getOverloadedOperator() == OO_Call;
-}
-
+// TODO(cir): This should be shared with OG Codegen.
bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const {
assert(langOpts.CUDA && "Should not be called by non-CUDA languages");
// We need to emit host-side 'shadows' for all global
@@ -427,6 +419,7 @@ void CIRGenModule::emitGlobal(clang::GlobalDecl gd) {
if (const auto *varDecl = dyn_cast<VarDecl>(global)) {
if (!shouldEmitCUDAGlobalVar(varDecl))
return;
+ // TODO(cir): This should be shared with OG Codegen.
} else if (langOpts.CUDAIsDevice) {
const auto *functionDecl = dyn_cast<FunctionDecl>(global);
if ((!global->hasAttr<CUDADeviceAttr>() ||
diff --git a/clang/test/CIR/CodeGenCUDA/filter-decl.cu b/clang/test/CIR/CodeGenCUDA/filter-decl.cu
index c34117d6e9e71..b41893001c195 100644
--- a/clang/test/CIR/CodeGenCUDA/filter-decl.cu
+++ b/clang/test/CIR/CodeGenCUDA/filter-decl.cu
@@ -1,37 +1,55 @@
// Based on clang/test/CodeGenCUDA/filter-decl.cu tailored for CIR current capabilities.
// Tests that host/device functions are emitted only on the appropriate side.
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x cuda \
-// RUN: -I%S/../inputs -emit-cir %s -o %t.host.cir
-// RUN: FileCheck --input-file=%t.host.cir %s --check-prefix=CHECK-HOST
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-sdk-version=9.2 \
+// RUN: -x cuda -I%S/../inputs -emit-cir %s -o %t.host.cir
+// RUN: FileCheck --input-file=%t.host.cir %s --check-prefix=CIR-HOST
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
// RUN: -I%S/../inputs -fcuda-is-device -emit-cir %s -o %t.device.cir
-// RUN: FileCheck --input-file=%t.device.cir %s --check-prefix=CHECK-DEVICE
+// RUN: FileCheck --input-file=%t.device.cir %s --check-prefix=CIR-DEVICE
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-sdk-version=9.2 \
+// RUN: -x cuda -I%S/../inputs -emit-llvm %s -o %t.host.ll
+// RUN: FileCheck --input-file=%t.host.ll %s --check-prefix=OGCG-HOST
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
+// RUN: -I%S/../inputs -fcuda-is-device -emit-llvm %s -o %t.device.ll
+// RUN: FileCheck --input-file=%t.device.ll %s --check-prefix=OGCG-DEVICE
#include "cuda.h"
// Implicit host function (no attribute) — host only
-// CHECK-HOST: cir.func {{.*}} @_Z20implicithostonlyfuncv()
-// CHECK-DEVICE-NOT: @_Z20implicithostonlyfuncv
+// CIR-HOST: cir.func {{.*}} @_Z20implicithostonlyfuncv()
+// CIR-DEVICE-NOT: @_Z20implicithostonlyfuncv
+// OGCG-HOST: define{{.*}} void @_Z20implicithostonlyfuncv()
+// OGCG-DEVICE-NOT: @_Z20implicithostonlyfuncv
void implicithostonlyfunc(void) {}
// Explicit __host__ function — host only
-// CHECK-HOST: cir.func {{.*}} @_Z20explicithostonlyfuncv()
-// CHECK-DEVICE-NOT: @_Z20explicithostonlyfuncv
+// CIR-HOST: cir.func {{.*}} @_Z20explicithostonlyfuncv()
+// CIR-DEVICE-NOT: @_Z20explicithostonlyfuncv
+// OGCG-HOST: define{{.*}} void @_Z20explicithostonlyfuncv()
+// OGCG-DEVICE-NOT: @_Z20explicithostonlyfuncv
__host__ void explicithostonlyfunc(void) {}
// __device__ function — device only
-// CHECK-HOST-NOT: @_Z14deviceonlyfuncv
-// CHECK-DEVICE: cir.func {{.*}} @_Z14deviceonlyfuncv()
+// CIR-HOST-NOT: @_Z14deviceonlyfuncv
+// CIR-DEVICE: cir.func {{.*}} @_Z14deviceonlyfuncv()
+// OGCG-HOST-NOT: @_Z14deviceonlyfuncv
+// OGCG-DEVICE: define{{.*}} void @_Z14deviceonlyfuncv()
__device__ void deviceonlyfunc(void) {}
// __host__ __device__ function — both sides
-// CHECK-HOST: cir.func {{.*}} @_Z14hostdevicefuncv()
-// CHECK-DEVICE: cir.func {{.*}} @_Z14hostdevicefuncv()
+// CIR-HOST: cir.func {{.*}} @_Z14hostdevicefuncv()
+// CIR-DEVICE: cir.func {{.*}} @_Z14hostdevicefuncv()
+// OGCG-HOST: define{{.*}} void @_Z14hostdevicefuncv()
+// OGCG-DEVICE: define{{.*}} void @_Z14hostdevicefuncv()
__host__ __device__ void hostdevicefunc(void) {}
// __global__ kernel — both sides (stub on host, kernel on device)
-// CHECK-HOST: cir.func {{.*}} @_Z25__device_stub__globalfuncv()
-// CHECK-DEVICE: cir.func {{.*}} @_Z10globalfuncv()
+// CIR-HOST: cir.func {{.*}} @_Z25__device_stub__globalfuncv()
+// CIR-DEVICE: cir.func {{.*}} @_Z10globalfuncv()
+// OGCG-HOST: define{{.*}} void @_Z25__device_stub__globalfuncv()
+// OGCG-DEVICE: define{{.*}} void @_Z10globalfuncv()
__global__ void globalfunc(void) {}
>From 503be9f5ecc861623580cea914f1134dd5a26437 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Fri, 30 Jan 2026 17:41:12 -0500
Subject: [PATCH 5/7] fmt yo
---
clang/lib/CIR/CodeGen/CIRGenModule.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 09fd2103cf673..508e941517c3d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -419,7 +419,7 @@ void CIRGenModule::emitGlobal(clang::GlobalDecl gd) {
if (const auto *varDecl = dyn_cast<VarDecl>(global)) {
if (!shouldEmitCUDAGlobalVar(varDecl))
return;
- // TODO(cir): This should be shared with OG Codegen.
+ // TODO(cir): This should be shared with OG Codegen.
} else if (langOpts.CUDAIsDevice) {
const auto *functionDecl = dyn_cast<FunctionDecl>(global);
if ((!global->hasAttr<CUDADeviceAttr>() ||
>From bb7054c1532087237f2f98ec1acd1a7b31a3415b Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Tue, 3 Feb 2026 02:50:24 -0500
Subject: [PATCH 6/7] nit: fix lit includes
---
clang/test/CIR/CodeGenCUDA/filter-decl.cu | 8 ++++----
clang/test/CIR/CodeGenCUDA/nvptx-basic.cu | 2 +-
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/clang/test/CIR/CodeGenCUDA/filter-decl.cu b/clang/test/CIR/CodeGenCUDA/filter-decl.cu
index b41893001c195..7825691f57836 100644
--- a/clang/test/CIR/CodeGenCUDA/filter-decl.cu
+++ b/clang/test/CIR/CodeGenCUDA/filter-decl.cu
@@ -2,19 +2,19 @@
// Tests that host/device functions are emitted only on the appropriate side.
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-sdk-version=9.2 \
-// RUN: -x cuda -I%S/../inputs -emit-cir %s -o %t.host.cir
+// RUN: -x cuda -I%S/../Inputs -emit-cir %s -o %t.host.cir
// RUN: FileCheck --input-file=%t.host.cir %s --check-prefix=CIR-HOST
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
-// RUN: -I%S/../inputs -fcuda-is-device -emit-cir %s -o %t.device.cir
+// RUN: -I%S/../Inputs -fcuda-is-device -emit-cir %s -o %t.device.cir
// RUN: FileCheck --input-file=%t.device.cir %s --check-prefix=CIR-DEVICE
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-sdk-version=9.2 \
-// RUN: -x cuda -I%S/../inputs -emit-llvm %s -o %t.host.ll
+// RUN: -x cuda -I%S/../Inputs -emit-llvm %s -o %t.host.ll
// RUN: FileCheck --input-file=%t.host.ll %s --check-prefix=OGCG-HOST
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
-// RUN: -I%S/../inputs -fcuda-is-device -emit-llvm %s -o %t.device.ll
+// RUN: -I%S/../Inputs -fcuda-is-device -emit-llvm %s -o %t.device.ll
// RUN: FileCheck --input-file=%t.device.ll %s --check-prefix=OGCG-DEVICE
#include "cuda.h"
diff --git a/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu b/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu
index fe2233de4d10c..02094cca0cedd 100644
--- a/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu
+++ b/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu
@@ -2,7 +2,7 @@
// Tests basic device-side compilation with NVPTX target.
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
-// RUN: -I%S/../inputs -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: -I%S/../Inputs -fcuda-is-device -emit-cir %s -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s
#include "cuda.h"
>From b204f16e7ca0f1c7bbef428261d69156d05b4425 Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Tue, 3 Feb 2026 04:04:16 -0500
Subject: [PATCH 7/7] fix lit includes yet again
---
clang/test/CIR/CodeGenCUDA/filter-decl.cu | 10 +++++-----
clang/test/CIR/CodeGenCUDA/nvptx-basic.cu | 4 ++--
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/clang/test/CIR/CodeGenCUDA/filter-decl.cu b/clang/test/CIR/CodeGenCUDA/filter-decl.cu
index 7825691f57836..a2a7e9ba9a1b0 100644
--- a/clang/test/CIR/CodeGenCUDA/filter-decl.cu
+++ b/clang/test/CIR/CodeGenCUDA/filter-decl.cu
@@ -2,22 +2,22 @@
// Tests that host/device functions are emitted only on the appropriate side.
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-sdk-version=9.2 \
-// RUN: -x cuda -I%S/../Inputs -emit-cir %s -o %t.host.cir
+// RUN: -x cuda -emit-cir %s -o %t.host.cir
// RUN: FileCheck --input-file=%t.host.cir %s --check-prefix=CIR-HOST
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
-// RUN: -I%S/../Inputs -fcuda-is-device -emit-cir %s -o %t.device.cir
+// RUN: -fcuda-is-device -emit-cir %s -o %t.device.cir
// RUN: FileCheck --input-file=%t.device.cir %s --check-prefix=CIR-DEVICE
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-sdk-version=9.2 \
-// RUN: -x cuda -I%S/../Inputs -emit-llvm %s -o %t.host.ll
+// RUN: -x cuda -emit-llvm %s -o %t.host.ll
// RUN: FileCheck --input-file=%t.host.ll %s --check-prefix=OGCG-HOST
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
-// RUN: -I%S/../Inputs -fcuda-is-device -emit-llvm %s -o %t.device.ll
+// RUN: -fcuda-is-device -emit-llvm %s -o %t.device.ll
// RUN: FileCheck --input-file=%t.device.ll %s --check-prefix=OGCG-DEVICE
-#include "cuda.h"
+#include "Inputs/cuda.h"
// Implicit host function (no attribute) — host only
// CIR-HOST: cir.func {{.*}} @_Z20implicithostonlyfuncv()
diff --git a/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu b/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu
index 02094cca0cedd..99f0164a18506 100644
--- a/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu
+++ b/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu
@@ -2,10 +2,10 @@
// Tests basic device-side compilation with NVPTX target.
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
-// RUN: -I%S/../Inputs -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: -fcuda-is-device -emit-cir %s -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s
-#include "cuda.h"
+#include "Inputs/cuda.h"
// CHECK: cir.func {{.*}} @device_function()
extern "C"
More information about the cfe-commits
mailing list