[clang] [clang][OpenMP][SPIR-V] Fix addrspace of pointer kernel arguments (PR #157172)
Nick Sarnie via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 9 07:53:56 PDT 2025
https://github.com/sarnex updated https://github.com/llvm/llvm-project/pull/157172
>From fe7a7617d383e367081085b8514b33abc076dc6c Mon Sep 17 00:00:00 2001
From: "Sarnie, Nick" <nick.sarnie at intel.com>
Date: Fri, 5 Sep 2025 13:33:17 -0700
Subject: [PATCH 1/3] [OpenMP][SPIR-V] Fix addrspace of pointer kernel
arguments
Signed-off-by: Sarnie, Nick <nick.sarnie at intel.com>
---
clang/lib/CodeGen/CGCall.cpp | 5 +--
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 4 +-
clang/lib/CodeGen/CGStmtOpenMP.cpp | 41 ++++++++++++--------
clang/lib/CodeGen/CodeGenFunction.h | 5 ++-
clang/lib/CodeGen/CodeGenSYCL.cpp | 2 +-
clang/lib/CodeGen/CodeGenTypes.h | 6 +--
clang/lib/CodeGen/Targets/SPIR.cpp | 8 ++--
clang/test/OpenMP/spirv_kernel_addrspace.cpp | 24 ++++++++++++
8 files changed, 64 insertions(+), 31 deletions(-)
create mode 100644 clang/test/OpenMP/spirv_kernel_addrspace.cpp
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a94a7ed51521c..0b2fce4244fb6 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -752,9 +752,8 @@ const CGFunctionInfo &CodeGenTypes::arrangeBuiltinFunctionDeclaration(
RequiredArgs::All);
}
-const CGFunctionInfo &
-CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType,
- const FunctionArgList &args) {
+const CGFunctionInfo &CodeGenTypes::arrangeDeviceKernelCallerDeclaration(
+ QualType resultType, const FunctionArgList &args) {
CanQualTypeList argTypes = getArgTypesForDeclaration(Context, args);
return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None,
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index b38eb54036e60..8d67fe21367ac 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1238,7 +1238,7 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction(
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
+ return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
}
std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
@@ -6227,7 +6227,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
+ return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
};
cantFail(OMPBuilder.emitTargetRegionFunction(
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 1360680cc6640..d72cd8fbfd608 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -471,12 +471,13 @@ struct FunctionOptions {
const StringRef FunctionName;
/// Location of the non-debug version of the outlined function.
SourceLocation Loc;
+ const bool IsDeviceKernel = false;
explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
bool RegisterCastedArgsOnly, StringRef FunctionName,
- SourceLocation Loc)
+ SourceLocation Loc, bool IsDeviceKernel)
: S(S), UIntPtrCastRequired(UIntPtrCastRequired),
RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
- FunctionName(FunctionName), Loc(Loc) {}
+ FunctionName(FunctionName), Loc(Loc), IsDeviceKernel(IsDeviceKernel) {}
};
} // namespace
@@ -570,7 +571,11 @@ static llvm::Function *emitOutlinedFunctionPrologue(
// Create the function declaration.
const CGFunctionInfo &FuncInfo =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
+ FO.IsDeviceKernel
+ ? CGM.getTypes().arrangeDeviceKernelCallerDeclaration(Ctx.VoidTy,
+ TargetArgs)
+ : CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy,
+ TargetArgs);
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
auto *F =
@@ -664,9 +669,9 @@ static llvm::Function *emitOutlinedFunctionPrologue(
return F;
}
-llvm::Function *
-CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
- SourceLocation Loc) {
+llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunction(
+ const CapturedStmt &S, const OMPExecutableDirective &D) {
+ SourceLocation Loc = D.getBeginLoc();
assert(
CapturedStmtInfo &&
"CapturedStmtInfo should be set when generating the captured function");
@@ -682,7 +687,10 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << CapturedStmtInfo->getHelperName();
-
+ OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D);
+ bool IsDeviceKernel = CGM.getOpenMPRuntime().isGPU() &&
+ isOpenMPTargetExecutionDirective(EKind) &&
+ D.getCapturedStmt(OMPD_target) == &S;
CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
llvm::Function *WrapperF = nullptr;
if (NeedWrapperFunction) {
@@ -690,7 +698,8 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
// OpenMPI-IR-Builder.
FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
/*RegisterCastedArgsOnly=*/true,
- CapturedStmtInfo->getHelperName(), Loc);
+ CapturedStmtInfo->getHelperName(), Loc,
+ IsDeviceKernel);
WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
WrapperF =
emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
@@ -698,7 +707,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
Out << "_debug__";
}
FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
- Out.str(), Loc);
+ Out.str(), Loc, !NeedWrapperFunction && IsDeviceKernel);
llvm::Function *F = emitOutlinedFunctionPrologue(
*this, WrapperArgs, WrapperLocalAddrs, WrapperVLASizes, CXXThisValue, FO);
CodeGenFunction::OMPPrivateScope LocalScope(*this);
@@ -6119,13 +6128,13 @@ void CodeGenFunction::EmitOMPDistributeDirective(
emitOMPDistributeDirective(S, *this, CGM);
}
-static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
- const CapturedStmt *S,
- SourceLocation Loc) {
+static llvm::Function *
+emitOutlinedOrderedFunction(CodeGenModule &CGM, const CapturedStmt *S,
+ const OMPExecutableDirective &D) {
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
CGF.CapturedStmtInfo = &CapStmtInfo;
- llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
+ llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, D);
Fn->setDoesNotRecurse();
return Fn;
}
@@ -6190,8 +6199,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
Builder, /*CreateBranch=*/false, ".ordered.after");
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
GenerateOpenMPCapturedVars(*CS, CapturedVars);
- llvm::Function *OutlinedFn =
- emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
+ llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS, S);
assert(S.getBeginLoc().isValid() &&
"Outlined function call location must be valid.");
ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
@@ -6233,8 +6241,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
if (C) {
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
- llvm::Function *OutlinedFn =
- emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
+ llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS, S);
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
OutlinedFn, CapturedVars);
} else {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 123cb4f51f828..727487b46054f 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3694,8 +3694,9 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
Address GenerateCapturedStmtArgument(const CapturedStmt &S);
- llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
- SourceLocation Loc);
+ llvm::Function *
+ GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
+ const OMPExecutableDirective &D);
void GenerateOpenMPCapturedVars(const CapturedStmt &S,
SmallVectorImpl<llvm::Value *> &CapturedVars);
void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy,
diff --git a/clang/lib/CodeGen/CodeGenSYCL.cpp b/clang/lib/CodeGen/CodeGenSYCL.cpp
index b9a96fe8ab838..7d66d96ad0a1b 100644
--- a/clang/lib/CodeGen/CodeGenSYCL.cpp
+++ b/clang/lib/CodeGen/CodeGenSYCL.cpp
@@ -49,7 +49,7 @@ void CodeGenModule::EmitSYCLKernelCaller(const FunctionDecl *KernelEntryPointFn,
// Compute the function info and LLVM function type.
const CGFunctionInfo &FnInfo =
- getTypes().arrangeSYCLKernelCallerDeclaration(Ctx.VoidTy, Args);
+ getTypes().arrangeDeviceKernelCallerDeclaration(Ctx.VoidTy, Args);
llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo);
// Retrieve the generated name for the SYCL kernel caller function.
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 29f6f1ec80bc3..9de7e0a83579d 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -229,12 +229,12 @@ class CodeGenTypes {
const CGFunctionInfo &arrangeBuiltinFunctionCall(QualType resultType,
const CallArgList &args);
- /// A SYCL kernel caller function is an offload device entry point function
+ /// A device kernel caller function is an offload device entry point function
/// with a target device dependent calling convention such as amdgpu_kernel,
/// ptx_kernel, or spir_kernel.
const CGFunctionInfo &
- arrangeSYCLKernelCallerDeclaration(QualType resultType,
- const FunctionArgList &args);
+ arrangeDeviceKernelCallerDeclaration(QualType resultType,
+ const FunctionArgList &args);
/// Objective-C methods are C functions with some implicit parameters.
const CGFunctionInfo &arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD);
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 53806249ded60..01c33d1470765 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -132,10 +132,12 @@ ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
}
ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
- if (getContext().getLangOpts().CUDAIsDevice) {
+ if (getContext().getLangOpts().CUDAIsDevice ||
+ getContext().getLangOpts().OpenMPIsTargetDevice) {
// Coerce pointer arguments with default address space to CrossWorkGroup
- // pointers for HIPSPV/CUDASPV. When the language mode is HIP/CUDA, the
- // SPIRTargetInfo maps cuda_device to SPIR-V's CrossWorkGroup address space.
+ // pointers for HIPSPV/CUDASPV/OMPSPV. When the language mode is
+ // HIP/CUDA/OMP, the SPIRTargetInfo maps cuda_device to SPIR-V's
+ // CrossWorkGroup address space.
llvm::Type *LTy = CGT.ConvertType(Ty);
auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
diff --git a/clang/test/OpenMP/spirv_kernel_addrspace.cpp b/clang/test/OpenMP/spirv_kernel_addrspace.cpp
new file mode 100644
index 0000000000000..cea7e9958c341
--- /dev/null
+++ b/clang/test/OpenMP/spirv_kernel_addrspace.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-host.bc -DTEAMS
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-host.bc -DTEAMS -o - | FileCheck %s
+// expected-no-diagnostics
+
+// CHECK: define weak_odr protected spir_kernel void @__omp_offloading_{{.*}}(ptr addrspace(1) noalias noundef %{{.*}}, ptr addrspace(1) noundef align 4 dereferenceable(128) %{{.*}})
+
+int main() {
+ int x[32] = {0};
+
+#ifdef TEAMS
+#pragma omp target teams
+#else
+#pragma omp target
+#endif
+ for(int i = 0; i < 32; i++) {
+ if(i > 0)
+ x[i] = x[i-1] + i;
+ }
+
+return x[31];
+}
+
>From 1bab6dd32f4071fb5a85e4271fe5bab1e3a58fc1 Mon Sep 17 00:00:00 2001
From: "Sarnie, Nick" <nick.sarnie at intel.com>
Date: Tue, 9 Sep 2025 07:44:36 -0700
Subject: [PATCH 2/3] do it for all target devices
Signed-off-by: Sarnie, Nick <nick.sarnie at intel.com>
---
clang/lib/CodeGen/Targets/SPIR.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 01c33d1470765..37dd282cc3c5a 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -132,12 +132,10 @@ ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
}
ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
- if (getContext().getLangOpts().CUDAIsDevice ||
- getContext().getLangOpts().OpenMPIsTargetDevice) {
+ if (getContext().getLangOpts().isTargetDevice()) {
// Coerce pointer arguments with default address space to CrossWorkGroup
- // pointers for HIPSPV/CUDASPV/OMPSPV. When the language mode is
- // HIP/CUDA/OMP, the SPIRTargetInfo maps cuda_device to SPIR-V's
- // CrossWorkGroup address space.
+ // pointers for target devices as default address space kernel arguments
+ // are not allowed.
llvm::Type *LTy = CGT.ConvertType(Ty);
auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
>From aafdc15ac4c3a8af025f9e92e801e07bb14c1e17 Mon Sep 17 00:00:00 2001
From: "Sarnie, Nick" <nick.sarnie at intel.com>
Date: Tue, 9 Sep 2025 07:53:30 -0700
Subject: [PATCH 3/3] nvm fix comment and issue it informed me about
Signed-off-by: Sarnie, Nick <nick.sarnie at intel.com>
---
clang/lib/CodeGen/Targets/SPIR.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 37dd282cc3c5a..2e3fc53c58edc 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -135,10 +135,11 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
if (getContext().getLangOpts().isTargetDevice()) {
// Coerce pointer arguments with default address space to CrossWorkGroup
// pointers for target devices as default address space kernel arguments
- // are not allowed.
+ // are not allowed. We use the opencl_global language address space which
+ // always maps to CrossWorkGroup.
llvm::Type *LTy = CGT.ConvertType(Ty);
auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
- auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
+ auto GlobalAS = getContext().getTargetAddressSpace(LangAS::opencl_global);
auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(LTy);
if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) {
LTy = llvm::PointerType::get(PtrTy->getContext(), GlobalAS);
More information about the cfe-commits
mailing list