[clang] [Clang] Emit stub version of OpenCL Kernel (PR #115821)
Aniket Lal via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 11 22:09:20 PST 2024
https://github.com/lalaniket8 created https://github.com/llvm/llvm-project/pull/115821
OpenCL allows a kernel function to call another kernel function.
To facilitate this we emit a stub version of each kernel function
with different name mangling scheme, and replace the kernel
callsite appropriately.
https://github.com/llvm/llvm-project/issues/60313
https://ontrack-internal.amd.com/browse/SWDEV-245936
>From b9f96f95cd014a44a4137b649aabbc8fe9ecf110 Mon Sep 17 00:00:00 2001
From: anikelal <anikelal at amd.com>
Date: Tue, 12 Nov 2024 11:30:45 +0530
Subject: [PATCH] [Clang] Emit stub version of OpenCL Kernel
OpenCL allows a kernel function to call another kernel function.
To facilitate this we emit a stub version of each kernel function
with different name mangling scheme, and replace the kernel
callsite appropriately.
https://github.com/llvm/llvm-project/issues/60313
https://ontrack-internal.amd.com/browse/SWDEV-245936
---
clang/include/clang/AST/GlobalDecl.h | 41 ++++++++++++++++++-------
clang/include/clang/AST/Type.h | 5 ++++
clang/lib/AST/Expr.cpp | 3 +-
clang/lib/AST/ItaniumMangle.cpp | 14 +++++++++
clang/lib/AST/Mangle.cpp | 2 +-
clang/lib/AST/MicrosoftMangle.cpp | 11 +++++++
clang/lib/CodeGen/CGCall.cpp | 29 +++++++++++++++---
clang/lib/CodeGen/CGExpr.cpp | 43 +++++++++++++++++++++++++++
clang/lib/CodeGen/CGOpenCLRuntime.cpp | 5 +++-
clang/lib/CodeGen/CodeGenModule.cpp | 17 +++++++++--
clang/lib/CodeGen/CodeGenTypes.h | 4 ++-
clang/lib/CodeGen/TargetInfo.h | 2 ++
clang/lib/CodeGen/Targets/AMDGPU.cpp | 8 +++++
13 files changed, 163 insertions(+), 21 deletions(-)
diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h
index 386693cabb1fbb..4d19d84773d597 100644
--- a/clang/include/clang/AST/GlobalDecl.h
+++ b/clang/include/clang/AST/GlobalDecl.h
@@ -71,6 +71,10 @@ class GlobalDecl {
GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0)
: MultiVersionIndex(MVIndex) {
if (!D->hasAttr<CUDAGlobalAttr>()) {
+ if (D->hasAttr<OpenCLKernelAttr>()) {
+ Value.setPointerAndInt(D, unsigned(KernelReferenceKind::Kernel));
+ return;
+ }
Init(D);
return;
}
@@ -78,7 +82,8 @@ class GlobalDecl {
}
GlobalDecl(const FunctionDecl *D, KernelReferenceKind Kind)
: Value(D, unsigned(Kind)) {
- assert(D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!");
+ assert((D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!") ||
+ (D->hasAttr<OpenCLKernelAttr>() && "Decl is not a OpenCL kernel!"));
}
GlobalDecl(const NamedDecl *D) { Init(D); }
GlobalDecl(const BlockDecl *D) { Init(D); }
@@ -130,13 +135,20 @@ class GlobalDecl {
}
KernelReferenceKind getKernelReferenceKind() const {
- assert(((isa<FunctionDecl>(getDecl()) &&
- cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
- (isa<FunctionTemplateDecl>(getDecl()) &&
- cast<FunctionTemplateDecl>(getDecl())
- ->getTemplatedDecl()
- ->hasAttr<CUDAGlobalAttr>())) &&
- "Decl is not a GPU kernel!");
+ assert((((isa<FunctionDecl>(getDecl()) &&
+ cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
+ (isa<FunctionTemplateDecl>(getDecl()) &&
+ cast<FunctionTemplateDecl>(getDecl())
+ ->getTemplatedDecl()
+ ->hasAttr<CUDAGlobalAttr>())) &&
+ "Decl is not a GPU kernel!") ||
+ (((isa<FunctionDecl>(getDecl()) &&
+ cast<FunctionDecl>(getDecl())->hasAttr<OpenCLKernelAttr>()) ||
+ (isa<FunctionTemplateDecl>(getDecl()) &&
+ cast<FunctionTemplateDecl>(getDecl())
+ ->getTemplatedDecl()
+ ->hasAttr<OpenCLKernelAttr>())) &&
+ "Decl is not a OpenCL kernel!"));
return static_cast<KernelReferenceKind>(Value.getInt());
}
@@ -196,9 +208,16 @@ class GlobalDecl {
}
GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind) {
- assert(isa<FunctionDecl>(getDecl()) &&
- cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
- "Decl is not a GPU kernel!");
+ assert((isa<FunctionDecl>(getDecl()) &&
+ cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
+ "Decl is not a GPU kernel!") ||
+ (((isa<FunctionDecl>(getDecl()) &&
+ cast<FunctionDecl>(getDecl())->hasAttr<OpenCLKernelAttr>()) ||
+ (isa<FunctionTemplateDecl>(getDecl()) &&
+ cast<FunctionTemplateDecl>(getDecl())
+ ->getTemplatedDecl()
+ ->hasAttr<OpenCLKernelAttr>())) &&
+ "Decl is not a OpenCL kernel!"));
GlobalDecl Result(*this);
Result.Value.setInt(unsigned(Kind));
return Result;
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 8979129017163b..69372b99fd6280 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -4668,6 +4668,11 @@ class FunctionType : public Type {
CallingConv getCallConv() const { return getExtInfo().getCC(); }
ExtInfo getExtInfo() const { return ExtInfo(FunctionTypeBits.ExtInfo); }
+ void setCC(unsigned cc) {
+ FunctionTypeBits.ExtInfo =
+ (FunctionTypeBits.ExtInfo & ~ExtInfo::CallConvMask) | cc;
+ }
+
static_assert((~Qualifiers::FastMask & Qualifiers::CVRMask) == 0,
"Const, volatile and restrict are assumed to be a subset of "
"the fast qualifiers.");
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index bf2c1b92fa6b49..84e6eefc4a8a96 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -694,7 +694,8 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
GD = GlobalDecl(CD, Ctor_Base);
else if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(ND))
GD = GlobalDecl(DD, Dtor_Base);
- else if (ND->hasAttr<CUDAGlobalAttr>())
+ else if (ND->hasAttr<CUDAGlobalAttr>() ||
+ ND->hasAttr<OpenCLKernelAttr>())
GD = GlobalDecl(cast<FunctionDecl>(ND));
else
GD = GlobalDecl(ND);
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 14bc260d0245fb..9a57f7f4c42a48 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -528,6 +528,7 @@ class CXXNameMangler {
void mangleSourceName(const IdentifierInfo *II);
void mangleRegCallName(const IdentifierInfo *II);
void mangleDeviceStubName(const IdentifierInfo *II);
+ void mangleOCLDeviceStubName(const IdentifierInfo *II);
void mangleSourceNameWithAbiTags(
const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr);
void mangleLocalName(GlobalDecl GD,
@@ -1563,8 +1564,13 @@ void CXXNameMangler::mangleUnqualifiedName(
bool IsDeviceStub =
FD && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+ bool IsOCLDeviceStub =
+ FD && FD->hasAttr<OpenCLKernelAttr>() &&
+ GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
if (IsDeviceStub)
mangleDeviceStubName(II);
+ else if (IsOCLDeviceStub)
+ mangleOCLDeviceStubName(II);
else if (IsRegCall)
mangleRegCallName(II);
else
@@ -1782,6 +1788,14 @@ void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) {
<< II->getName();
}
+void CXXNameMangler::mangleOCLDeviceStubName(const IdentifierInfo *II) {
+ // <source-name> ::= <positive length number> __clang_ocl_kern_imp_
+ // <identifier> <number> ::= [n] <non-negative decimal integer> <identifier>
+ // ::= <unqualified source code identifier>
+ Out << II->getLength() + sizeof("__clang_ocl_kern_imp_") - 1
+ << "__clang_ocl_kern_imp_" << II->getName();
+}
+
void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) {
// <source-name> ::= <positive length number> <identifier>
// <number> ::= [n] <non-negative decimal integer>
diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp
index 4875e8537b3c11..2e3b2a684dd2d7 100644
--- a/clang/lib/AST/Mangle.cpp
+++ b/clang/lib/AST/Mangle.cpp
@@ -540,7 +540,7 @@ class ASTNameGenerator::Implementation {
GD = GlobalDecl(CtorD, Ctor_Complete);
else if (const auto *DtorD = dyn_cast<CXXDestructorDecl>(D))
GD = GlobalDecl(DtorD, Dtor_Complete);
- else if (D->hasAttr<CUDAGlobalAttr>())
+ else if (D->hasAttr<CUDAGlobalAttr>() || D->hasAttr<OpenCLKernelAttr>())
GD = GlobalDecl(cast<FunctionDecl>(D));
else
GD = GlobalDecl(D);
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index dbc161347025c0..6de8c531232e0d 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1163,9 +1163,20 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
->getTemplatedDecl()
->hasAttr<CUDAGlobalAttr>())) &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+ bool IsOCLDeviceStub =
+ ND &&
+ ((isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>()) ||
+ (isa<FunctionTemplateDecl>(ND) &&
+ cast<FunctionTemplateDecl>(ND)
+ ->getTemplatedDecl()
+ ->hasAttr<OpenCLKernelAttr>())) &&
+ GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
if (IsDeviceStub)
mangleSourceName(
(llvm::Twine("__device_stub__") + II->getName()).str());
+ else if (IsOCLDeviceStub)
+ mangleSourceName(
+ (llvm::Twine("__clang_ocl_kern_imp_") + II->getName()).str());
else
mangleSourceName(II->getName());
break;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 8f4f5d3ed81601..d5e09914e3b76b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -300,6 +300,16 @@ static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM,
}
}
+static void setOCLKernelStubCallingConvention(CanQualType &FTy,
+ CodeGenModule &CGM,
+ const FunctionDecl *FD) {
+ if (FD->hasAttr<OpenCLKernelAttr>()) {
+ const FunctionType *FT = FTy->getAs<FunctionType>();
+ CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT);
+ FTy = FT->getCanonicalTypeUnqualified();
+ }
+}
+
/// Arrange the argument and result information for a declaration or
/// definition of the given C++ non-static member function. The
/// member function must be an ordinary function, i.e. not a
@@ -460,15 +470,19 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
/// Arrange the argument and result information for the declaration or
/// definition of the given function.
const CGFunctionInfo &
-CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
+CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD,
+ CanQualType *FTy_ptr /* = nullptr*/) {
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD))
if (MD->isImplicitObjectMemberFunction())
return arrangeCXXMethodDeclaration(MD);
- CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
+ CanQualType FTy = FTy_ptr == nullptr
+ ? FD->getType()->getCanonicalTypeUnqualified()
+ : *FTy_ptr;
assert(isa<FunctionType>(FTy));
- setCUDAKernelCallingConvention(FTy, CGM, FD);
+ if (!FD->getLangOpts().OpenCL)
+ setCUDAKernelCallingConvention(FTy, CGM, FD);
// When declaring a function without a prototype, always use a
// non-variadic type.
@@ -548,7 +562,14 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
isa<CXXDestructorDecl>(GD.getDecl()))
return arrangeCXXStructorDeclaration(GD);
- return arrangeFunctionDeclaration(FD);
+ CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
+ if (FD->hasAttr<OpenCLKernelAttr>() &&
+ GD.getKernelReferenceKind() ==
+ KernelReferenceKind::
+ Stub) { // OCLKernelReferenceKind::ClangOCLKernelImpl){
+ setOCLKernelStubCallingConvention(FTy, CGM, FD);
+ }
+ return arrangeFunctionDeclaration(FD, &FTy);
}
/// Arrange a thunk that takes 'this' as the first parameter followed by
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 096f4c4f550435..0001fa685ed20e 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5607,6 +5607,43 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
return EmitCXXPseudoDestructorExpr(callee.getPseudoDestructorExpr());
}
+ // Change calling convention of callee function at callsite
+
+ const Expr *calleeExpr = E->getCallee()->IgnoreParens();
+
+ while (auto ICE = dyn_cast<ImplicitCastExpr>(calleeExpr)) {
+ if (ICE->getCastKind() == CK_FunctionToPointerDecay ||
+ ICE->getCastKind() == CK_BuiltinFnToFnPtr)
+ calleeExpr = ICE->getSubExpr()->IgnoreParens();
+ else
+ break;
+ }
+
+ if (auto DRE = dyn_cast<DeclRefExpr>(calleeExpr)) {
+ if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
+ if (FD->hasAttr<OpenCLKernelAttr>() && !FD->getBuiltinID()) {
+ const FunctionType *ft =
+ cast<FunctionType>(cast<PointerType>(getContext().getCanonicalType(
+ E->getCallee()->getType()))
+ ->getPointeeType());
+ FunctionType *ftnc = const_cast<FunctionType *>(ft);
+ ftnc->setCC(CC_C);
+ }
+ }
+ }
+ if (auto ME = dyn_cast<MemberExpr>(calleeExpr)) {
+ if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {
+ if (FD->hasAttr<OpenCLKernelAttr>() && !FD->getBuiltinID()) {
+ const FunctionType *ft =
+ cast<FunctionType>(cast<PointerType>(getContext().getCanonicalType(
+ E->getCallee()->getType()))
+ ->getPointeeType());
+ FunctionType *ftnc = const_cast<FunctionType *>(ft);
+ ftnc->setCC(CC_C);
+ }
+ }
+ }
+
return EmitCall(E->getCallee()->getType(), callee, E, ReturnValue,
/*Chain=*/nullptr, CallOrInvoke);
}
@@ -5695,11 +5732,17 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
// Resolve direct calls.
} else if (auto DRE = dyn_cast<DeclRefExpr>(E)) {
if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
+ if (FD->hasAttr<OpenCLKernelAttr>())
+ return EmitDirectCallee(*this,
+ GlobalDecl(FD, KernelReferenceKind::Stub));
return EmitDirectCallee(*this, FD);
}
} else if (auto ME = dyn_cast<MemberExpr>(E)) {
if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {
EmitIgnoredExpr(ME->getBase());
+ if (FD->hasAttr<OpenCLKernelAttr>())
+ return EmitDirectCallee(*this,
+ GlobalDecl(FD, KernelReferenceKind::Stub));
return EmitDirectCallee(*this, FD);
}
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 115b618056a445..e5db0fa33c1868 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -127,7 +127,10 @@ static const BlockExpr *getBlockExpr(const Expr *E) {
void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
llvm::Function *InvokeF,
llvm::Value *Block, llvm::Type *BlockTy) {
- assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
+
+ // Since OpenCL Kernels are emitted twice (kernel version and device version),
+ // its constituent BlockExpr will also be emitted twice
+ // assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
assert(Block->getType()->isPointerTy() && "Invalid block literal type");
EnqueuedBlockMap[E].InvokeFunc = InvokeF;
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index ba376f9ecfacde..3476b8301ee9b8 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1888,6 +1888,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
} else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
Out << "__device_stub__" << II->getName();
+ } else if (FD && FD->hasAttr<OpenCLKernelAttr>() &&
+ GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
+ Out << "__clang_ocl_kern_imp_" << II->getName();
} else {
Out << II->getName();
}
@@ -3283,8 +3286,14 @@ void CodeGenModule::EmitDeferred() {
if (LangOpts.OpenMP && OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(D))
continue;
- // Otherwise, emit the definition and move on to the next one.
- EmitGlobalDefinition(D, GV);
+ // Otherwise, emit the definition and move on to the next one,
+ // Donot emit definition for a device version of OpenCL kernel that does not
+ // have a body
+ if (!(isa<FunctionDecl>(D.getDecl()) &&
+ (cast<FunctionDecl>(D.getDecl()))->hasAttr<OpenCLKernelAttr>() &&
+ D.getKernelReferenceKind() == KernelReferenceKind::Stub &&
+ !((cast<FunctionDecl>(D.getDecl()))->doesThisDeclarationHaveABody())))
+ EmitGlobalDefinition(D, GV);
// If we found out that we need to emit more decls, do that recursively.
// This has the advantage that the decls are emitted in a DFS and related
@@ -3842,6 +3851,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// Ignore declarations, they will be emitted on their first use.
if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
+
+ if (FD->hasAttr<OpenCLKernelAttr>())
+ addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub));
+
// Update deferred annotations with the latest declaration if the function
// function was already used or defined.
if (FD->hasAttr<AnnotateAttr>()) {
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 5aebf9a2122372..fd9f37de67f187 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -207,7 +207,9 @@ class CodeGenTypes {
/// Free functions are functions that are compatible with an ordinary
/// C function pointer type.
- const CGFunctionInfo &arrangeFunctionDeclaration(const FunctionDecl *FD);
+ const CGFunctionInfo &
+ arrangeFunctionDeclaration(const FunctionDecl *FD,
+ CanQualType *FTy_ptr = nullptr);
const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args,
const FunctionType *Ty,
bool ChainCall);
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 373f8b8a80fdb1..2673b1f7e32c28 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -382,6 +382,8 @@ class TargetCodeGenInfo {
virtual bool shouldEmitDWARFBitFieldSeparators() const { return false; }
virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
+ virtual void
+ setOCLKernelStubCallingConvention(const FunctionType *&FT) const {}
/// Return the device-side type for the CUDA device builtin surface type.
virtual llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const {
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 56ad0503a11ab2..37e07b16193e5f 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -321,6 +321,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
bool shouldEmitStaticExternCAliases() const override;
bool shouldEmitDWARFBitFieldSeparators() const override;
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+ void
+ setOCLKernelStubCallingConvention(const FunctionType *&FT) const override;
};
}
@@ -598,6 +600,12 @@ void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
}
+void AMDGPUTargetCodeGenInfo::setOCLKernelStubCallingConvention(
+ const FunctionType *&FT) const {
+ FT = getABIInfo().getContext().adjustFunctionType(
+ FT, FT->getExtInfo().withCallingConv(CC_C));
+}
+
/// Create an OpenCL kernel for an enqueued block.
///
/// The type of the first argument (the block literal) is the struct type
More information about the cfe-commits
mailing list