[clang] [Clang][OpenCL][AMDGPU] Allow a kernel to call another kernel (PR #115821)
Aniket Lal via cfe-commits
cfe-commits at lists.llvm.org
Fri Nov 29 00:36:41 PST 2024
https://github.com/lalaniket8 updated https://github.com/llvm/llvm-project/pull/115821
>From 107f8dbc6b2fa157ce3936a086093882012b9d62 Mon Sep 17 00:00:00 2001
From: anikelal <anikelal at amd.com>
Date: Mon, 25 Nov 2024 14:18:36 +0530
Subject: [PATCH 1/2] [Clang][OpenCL][AMDGPU] Allow a kernel to call another
kernel
This feature is currently not supported in the compiler.
To facilitate this we emit a stub version of each kernel
function body with different name mangling scheme, and
replaces the respective kernel call-sites appropriately.
Fixes https://github.com/llvm/llvm-project/issues/60313
D120566 was an earlier attempt made to upstream a solution
for this issue.
---
clang/include/clang/AST/GlobalDecl.h | 37 +++++++++----
clang/lib/AST/Expr.cpp | 3 +-
clang/lib/AST/ItaniumMangle.cpp | 15 ++++++
clang/lib/AST/Mangle.cpp | 2 +-
clang/lib/AST/MicrosoftMangle.cpp | 6 +++
clang/lib/CodeGen/CGBlocks.cpp | 16 +++---
clang/lib/CodeGen/CGCall.cpp | 11 +++-
clang/lib/CodeGen/CGExpr.cpp | 5 +-
clang/lib/CodeGen/CGOpenCLRuntime.cpp | 11 +++-
clang/lib/CodeGen/CGOpenCLRuntime.h | 4 +-
clang/lib/CodeGen/CodeGenModule.cpp | 7 +++
.../test/CodeGenOpenCL/opencl-kernel-call.cl | 43 +++++++++++++++
clang/test/CodeGenOpenCL/reflect.cl | 2 +-
clang/test/CodeGenOpenCL/spir-calling-conv.cl | 4 +-
clang/test/CodeGenOpenCL/visibility.cl | 53 ++++++++++++++-----
15 files changed, 178 insertions(+), 41 deletions(-)
create mode 100644 clang/test/CodeGenOpenCL/opencl-kernel-call.cl
diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h
index 386693cabb1fbb..8a9f4b4c60e5e5 100644
--- a/clang/include/clang/AST/GlobalDecl.h
+++ b/clang/include/clang/AST/GlobalDecl.h
@@ -71,6 +71,10 @@ class GlobalDecl {
GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0)
: MultiVersionIndex(MVIndex) {
if (!D->hasAttr<CUDAGlobalAttr>()) {
+ if (D->hasAttr<OpenCLKernelAttr>()) {
+ Value.setPointerAndInt(D, unsigned(KernelReferenceKind::Kernel));
+ return;
+ }
Init(D);
return;
}
@@ -78,7 +82,8 @@ class GlobalDecl {
}
GlobalDecl(const FunctionDecl *D, KernelReferenceKind Kind)
: Value(D, unsigned(Kind)) {
- assert(D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!");
+ assert((D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!") ||
+ (D->hasAttr<OpenCLKernelAttr>() && "Decl is not a OpenCL kernel!"));
}
GlobalDecl(const NamedDecl *D) { Init(D); }
GlobalDecl(const BlockDecl *D) { Init(D); }
@@ -130,13 +135,15 @@ class GlobalDecl {
}
KernelReferenceKind getKernelReferenceKind() const {
- assert(((isa<FunctionDecl>(getDecl()) &&
- cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
- (isa<FunctionTemplateDecl>(getDecl()) &&
- cast<FunctionTemplateDecl>(getDecl())
- ->getTemplatedDecl()
- ->hasAttr<CUDAGlobalAttr>())) &&
- "Decl is not a GPU kernel!");
+ assert((((isa<FunctionDecl>(getDecl()) &&
+ cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
+ (isa<FunctionTemplateDecl>(getDecl()) &&
+ cast<FunctionTemplateDecl>(getDecl())
+ ->getTemplatedDecl()
+ ->hasAttr<CUDAGlobalAttr>())) &&
+ "Decl is not a GPU kernel!") ||
+ (isDeclOpenCLKernel() && "Decl is not a OpenCL kernel!"));
+
return static_cast<KernelReferenceKind>(Value.getInt());
}
@@ -196,13 +203,21 @@ class GlobalDecl {
}
GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind) {
- assert(isa<FunctionDecl>(getDecl()) &&
- cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
- "Decl is not a GPU kernel!");
+ assert((isa<FunctionDecl>(getDecl()) &&
+ cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
+ "Decl is not a GPU kernel!") ||
+ (isDeclOpenCLKernel() && "Decl is not a OpenCL kernel!"));
GlobalDecl Result(*this);
Result.Value.setInt(unsigned(Kind));
return Result;
}
+
+ bool isDeclOpenCLKernel() const {
+ auto FD = dyn_cast<FunctionDecl>(getDecl());
+ if (FD)
+ return FD->hasAttr<OpenCLKernelAttr>();
+ return FD;
+ }
};
} // namespace clang
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index a4fb4d5a1f2ec4..ddd88ac6a21050 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -693,7 +693,8 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
GD = GlobalDecl(CD, Ctor_Base);
else if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(ND))
GD = GlobalDecl(DD, Dtor_Base);
- else if (ND->hasAttr<CUDAGlobalAttr>())
+ else if (ND->hasAttr<CUDAGlobalAttr>() ||
+ ND->hasAttr<OpenCLKernelAttr>())
GD = GlobalDecl(cast<FunctionDecl>(ND));
else
GD = GlobalDecl(ND);
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 27a993a631dae9..7e46d6c520fb69 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -526,6 +526,7 @@ class CXXNameMangler {
void mangleSourceName(const IdentifierInfo *II);
void mangleRegCallName(const IdentifierInfo *II);
void mangleDeviceStubName(const IdentifierInfo *II);
+ void mangleOCLDeviceStubName(const IdentifierInfo *II);
void mangleSourceNameWithAbiTags(
const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr);
void mangleLocalName(GlobalDecl GD,
@@ -1561,8 +1562,13 @@ void CXXNameMangler::mangleUnqualifiedName(
bool IsDeviceStub =
FD && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+ bool IsOCLDeviceStub =
+ FD && FD->hasAttr<OpenCLKernelAttr>() &&
+ GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
if (IsDeviceStub)
mangleDeviceStubName(II);
+ else if (IsOCLDeviceStub)
+ mangleOCLDeviceStubName(II);
else if (IsRegCall)
mangleRegCallName(II);
else
@@ -1780,6 +1786,15 @@ void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) {
<< II->getName();
}
+void CXXNameMangler::mangleOCLDeviceStubName(const IdentifierInfo *II) {
+ // <source-name> ::= <positive length number> __clang_ocl_kern_imp_
+ // <identifier> <number> ::= [n] <non-negative decimal integer> <identifier>
+ // ::= <unqualified source code identifier>
+ StringRef OCLDeviceStubNamePrefix = "__clang_ocl_kern_imp_";
+ Out << II->getLength() + OCLDeviceStubNamePrefix.size() - 1
+ << OCLDeviceStubNamePrefix << II->getName();
+}
+
void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) {
// <source-name> ::= <positive length number> <identifier>
// <number> ::= [n] <non-negative decimal integer>
diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp
index 15be9c62bf8880..1d1c4dd0e39b7a 100644
--- a/clang/lib/AST/Mangle.cpp
+++ b/clang/lib/AST/Mangle.cpp
@@ -539,7 +539,7 @@ class ASTNameGenerator::Implementation {
GD = GlobalDecl(CtorD, Ctor_Complete);
else if (const auto *DtorD = dyn_cast<CXXDestructorDecl>(D))
GD = GlobalDecl(DtorD, Dtor_Complete);
- else if (D->hasAttr<CUDAGlobalAttr>())
+ else if (D->hasAttr<CUDAGlobalAttr>() || D->hasAttr<OpenCLKernelAttr>())
GD = GlobalDecl(cast<FunctionDecl>(D));
else
GD = GlobalDecl(D);
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 94a7ce6c1321d3..e439875a2538ba 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1161,9 +1161,15 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
->getTemplatedDecl()
->hasAttr<CUDAGlobalAttr>())) &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+ bool IsOCLDeviceStub =
+ ND && (isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>()) &&
+ GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
if (IsDeviceStub)
mangleSourceName(
(llvm::Twine("__device_stub__") + II->getName()).str());
+ else if (IsOCLDeviceStub)
+ mangleSourceName(
+ (llvm::Twine("__clang_ocl_kern_imp_") + II->getName()).str());
else
mangleSourceName(II->getName());
break;
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index a7584a95c8ca7b..7f98a897c36907 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -48,7 +48,7 @@ CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name)
BlockByrefHelpers::~BlockByrefHelpers() {}
/// Build the given block as a global block.
-static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
+static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, GlobalDecl GD,
const CGBlockInfo &blockInfo,
llvm::Constant *blockFn);
@@ -1085,8 +1085,10 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType()));
if (IsOpenCL) {
- CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn,
- result, blockInfo.StructureType);
+ CGM.getOpenCLRuntime().recordBlockInfo(
+ blockInfo.BlockExpression, InvokeFn, result, blockInfo.StructureType,
+ CurGD && CurGD.isDeclOpenCLKernel() &&
+ (CurGD.getKernelReferenceKind() == KernelReferenceKind::Kernel));
}
return result;
@@ -1285,7 +1287,7 @@ CodeGenModule::GetAddrOfGlobalBlock(const BlockExpr *BE,
return getAddrOfGlobalBlockIfEmitted(BE);
}
-static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
+static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, GlobalDecl GD,
const CGBlockInfo &blockInfo,
llvm::Constant *blockFn) {
assert(blockInfo.CanBeGlobal);
@@ -1378,7 +1380,9 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
CGM.getOpenCLRuntime().recordBlockInfo(
blockInfo.BlockExpression,
cast<llvm::Function>(blockFn->stripPointerCasts()), Result,
- literal->getValueType());
+ literal->getValueType(),
+ GD && GD.isDeclOpenCLKernel() &&
+ (GD.getKernelReferenceKind() == KernelReferenceKind::Kernel));
return Result;
}
@@ -1487,7 +1491,7 @@ llvm::Function *CodeGenFunction::GenerateBlockFunction(
auto GenVoidPtrTy = getContext().getLangOpts().OpenCL
? CGM.getOpenCLRuntime().getGenericVoidPointerType()
: VoidPtrTy;
- buildGlobalBlock(CGM, blockInfo,
+ buildGlobalBlock(CGM, CurGD, blockInfo,
llvm::ConstantExpr::getPointerCast(fn, GenVoidPtrTy));
}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 20455dbb820914..215fe5bd9da8dd 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2343,6 +2343,15 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
// Collect function IR attributes from the CC lowering.
// We'll collect the paramete and result attributes later.
CallingConv = FI.getEffectiveCallingConvention();
+ GlobalDecl GD = CalleeInfo.getCalleeDecl();
+ const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
+ if (TargetDecl) {
+ if (auto FD = dyn_cast<FunctionDecl>(TargetDecl)) {
+ if (FD->hasAttr<OpenCLKernelAttr>() &&
+ GD.getKernelReferenceKind() == KernelReferenceKind::Stub)
+ CallingConv = llvm::CallingConv::C;
+ }
+ }
if (FI.isNoReturn())
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
if (FI.isCmseNSCall())
@@ -2352,8 +2361,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
CalleeInfo.getCalleeFunctionProtoType());
- const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
-
// Attach assumption attributes to the declaration. If this is a call
// site, attach assumptions from the caller to the call as well.
AddAttributesFromOMPAssumes(FuncAttrs, TargetDecl);
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 229f0e29f02341..e2222a6393b9b1 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5692,7 +5692,10 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
// Resolve direct calls.
} else if (auto DRE = dyn_cast<DeclRefExpr>(E)) {
if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
- return EmitDirectCallee(*this, FD);
+ auto CalleeDecl = FD->hasAttr<OpenCLKernelAttr>()
+ ? GlobalDecl(FD, KernelReferenceKind::Stub)
+ : FD;
+ return EmitDirectCallee(*this, CalleeDecl);
}
} else if (auto ME = dyn_cast<MemberExpr>(E)) {
if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 115b618056a445..a78d783831293e 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -126,14 +126,21 @@ static const BlockExpr *getBlockExpr(const Expr *E) {
/// corresponding block expression.
void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
llvm::Function *InvokeF,
- llvm::Value *Block, llvm::Type *BlockTy) {
- assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
+ llvm::Value *Block, llvm::Type *BlockTy,
+ bool isBlkExprInOCLKern) {
+
+ // FIXME: Since OpenCL Kernels are emitted twice (kernel version and stub
+ // version), its constituent BlockExpr will also be emitted twice.
+ assert((!EnqueuedBlockMap.contains(E) ||
+ EnqueuedBlockMap[E].isBlkExprInOCLKern != isBlkExprInOCLKern) &&
+ "Block expression emitted twice");
assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
assert(Block->getType()->isPointerTy() && "Invalid block literal type");
EnqueuedBlockMap[E].InvokeFunc = InvokeF;
EnqueuedBlockMap[E].BlockArg = Block;
EnqueuedBlockMap[E].BlockTy = BlockTy;
EnqueuedBlockMap[E].KernelHandle = nullptr;
+ EnqueuedBlockMap[E].isBlkExprInOCLKern = isBlkExprInOCLKern;
}
llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.h b/clang/lib/CodeGen/CGOpenCLRuntime.h
index 34613c3516f374..78bb5980cd87dc 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.h
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.h
@@ -46,6 +46,7 @@ class CGOpenCLRuntime {
llvm::Value *KernelHandle; /// Enqueued block kernel reference.
llvm::Value *BlockArg; /// The first argument to enqueued block kernel.
llvm::Type *BlockTy; /// Type of the block argument.
+ bool isBlkExprInOCLKern; /// Does the BlockExpr reside in an OpenCL Kernel.
};
/// Maps block expression to block information.
llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap;
@@ -93,7 +94,8 @@ class CGOpenCLRuntime {
/// \param InvokeF invoke function emitted for the block expression.
/// \param Block block literal emitted for the block expression.
void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
- llvm::Value *Block, llvm::Type *BlockTy);
+ llvm::Value *Block, llvm::Type *BlockTy,
+ bool isBlkExprInOCLKern);
/// \return LLVM block invoke function emitted for an expression derived from
/// the block expression.
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 7189a4689e8156..162752241a45e6 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1887,6 +1887,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
} else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
Out << "__device_stub__" << II->getName();
+ } else if (FD && FD->hasAttr<OpenCLKernelAttr>() &&
+ GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
+ Out << "__clang_ocl_kern_imp_" << II->getName();
} else {
Out << II->getName();
}
@@ -3850,6 +3853,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// Ignore declarations, they will be emitted on their first use.
if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
+
+ if (FD->hasAttr<OpenCLKernelAttr>() && FD->doesThisDeclarationHaveABody())
+ addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub));
+
// Update deferred annotations with the latest declaration if the function
// function was already used or defined.
if (FD->hasAttr<AnnotateAttr>()) {
diff --git a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
new file mode 100644
index 00000000000000..f575728f237630
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: define dso_local amdgpu_kernel void @callee_kern({{.*}})
+__attribute__((noinline)) kernel void callee_kern(global int *A){
+ *A = 1;
+}
+
+__attribute__((noinline)) kernel void ext_callee_kern(global int *A);
+
+// CHECK: define dso_local void @callee_func({{.*}})
+__attribute__((noinline)) void callee_func(global int *A){
+ *A = 2;
+}
+
+// CHECK: define dso_local amdgpu_kernel void @caller_kern({{.*}})
+kernel void caller_kern(global int* A){
+ callee_kern(A);
+ // CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}})
+ ext_callee_kern(A);
+ // CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}})
+ callee_func(A);
+ // CHECK: tail call void @callee_func({{.*}})
+
+}
+
+// CHECK: define dso_local void @__clang_ocl_kern_imp_callee_kern({{.*}})
+
+// CHECK: declare void @__clang_ocl_kern_imp_ext_callee_kern({{.*}})
+
+// CHECK: define dso_local void @caller_func({{.*}})
+void caller_func(global int* A){
+ callee_kern(A);
+ // CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}}) #7
+ ext_callee_kern(A);
+ // CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}}) #8
+ callee_func(A);
+ // CHECK: tail call void @callee_func({{.*}})
+}
+
+// CHECK: define dso_local void @__clang_ocl_kern_imp_caller_kern({{.*}})
+// CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}})
+// CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}})
+// CHECK: tail call void @callee_func({{.*}})
diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl
index 9ae4a5f027d358..0e3e50be9745eb 100644
--- a/clang/test/CodeGenOpenCL/reflect.cl
+++ b/clang/test/CodeGenOpenCL/reflect.cl
@@ -13,7 +13,7 @@ bool device_function() {
}
// CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
-// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
diff --git a/clang/test/CodeGenOpenCL/spir-calling-conv.cl b/clang/test/CodeGenOpenCL/spir-calling-conv.cl
index 569ea0cbe1af60..6c8f20511b8bd6 100644
--- a/clang/test/CodeGenOpenCL/spir-calling-conv.cl
+++ b/clang/test/CodeGenOpenCL/spir-calling-conv.cl
@@ -11,8 +11,8 @@ kernel void foo(global int *A)
// CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
A[id] = id;
bar(A);
- // CHECK: tail call spir_kernel void @bar(ptr addrspace(1) noundef align 4 %A)
+ // CHECK: tail call void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4 %A)
}
// CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
-// CHECK: declare spir_kernel void @bar(ptr addrspace(1) noundef align 4)
+// CHECK: declare void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4)
diff --git a/clang/test/CodeGenOpenCL/visibility.cl b/clang/test/CodeGenOpenCL/visibility.cl
index addfe33377f939..e5dc5b29c5140b 100644
--- a/clang/test/CodeGenOpenCL/visibility.cl
+++ b/clang/test/CodeGenOpenCL/visibility.cl
@@ -85,31 +85,42 @@ __attribute__((visibility("default"))) extern void ext_func_default();
void use() {
glob = ext + ext_hidden + ext_protected + ext_default;
ext_kern();
+ // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern()
+ // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern()
+ // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern()
ext_kern_hidden();
+ // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
+ // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
+ // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
ext_kern_protected();
+ // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
+ // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
+ // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
ext_kern_default();
+ // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_default()
+ // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_default()
+ // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_default()
ext_func();
ext_func_hidden();
ext_func_protected();
ext_func_default();
}
-// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern()
-// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern()
-// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern()
+// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern()
+// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern()
+// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern()
-// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden()
-// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden()
-// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden()
+// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
+// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
+// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
-// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected()
-// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected()
-// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected()
-
-// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default()
-// FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default()
-// FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default()
+// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
+// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
+// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
+// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern_default()
+// FVIS-PROTECTED: declare void @__clang_ocl_kern_imp_ext_kern_default()
+// FVIS-HIDDEN: declare void @__clang_ocl_kern_imp_ext_kern_default()
// FVIS-DEFAULT: declare void @ext_func()
// FVIS-PROTECTED: declare protected void @ext_func()
@@ -126,3 +137,19 @@ void use() {
// FVIS-DEFAULT: declare void @ext_func_default()
// FVIS-PROTECTED: declare void @ext_func_default()
// FVIS-HIDDEN: declare void @ext_func_default()
+
+// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern()
+// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern()
+// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern()
+
+// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_hidden()
+// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_hidden()
+// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_hidden()
+
+// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_protected()
+// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_protected()
+// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_protected()
+
+// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
+// FVIS-PROTECTED: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
+// FVIS-HIDDEN: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
>From 013801b1fc82ec6806b876d93da24d4f0f2ed098 Mon Sep 17 00:00:00 2001
From: anikelal <anikelal at amd.com>
Date: Fri, 29 Nov 2024 14:03:57 +0530
Subject: [PATCH 2/2] [Clang][OpenCL][AMDGPU] Allow a kernel to call another
kernel
Simplifying isDeclOpenCLKernel() and removing resolved comments
---
clang/include/clang/AST/GlobalDecl.h | 5 ++---
clang/lib/CodeGen/CGOpenCLRuntime.cpp | 2 --
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h
index 8a9f4b4c60e5e5..342232dcf56418 100644
--- a/clang/include/clang/AST/GlobalDecl.h
+++ b/clang/include/clang/AST/GlobalDecl.h
@@ -213,10 +213,9 @@ class GlobalDecl {
}
bool isDeclOpenCLKernel() const {
- auto FD = dyn_cast<FunctionDecl>(getDecl());
- if (FD)
+ if (auto FD = dyn_cast<FunctionDecl>(getDecl()))
return FD->hasAttr<OpenCLKernelAttr>();
- return FD;
+ return false;
}
};
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index a78d783831293e..31d40a8774b7b1 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -129,8 +129,6 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
llvm::Value *Block, llvm::Type *BlockTy,
bool isBlkExprInOCLKern) {
- // FIXME: Since OpenCL Kernels are emitted twice (kernel version and stub
- // version), its constituent BlockExpr will also be emitted twice.
assert((!EnqueuedBlockMap.contains(E) ||
EnqueuedBlockMap[E].isBlkExprInOCLKern != isBlkExprInOCLKern) &&
"Block expression emitted twice");
More information about the cfe-commits
mailing list