[clang] [Clang] Emit stub version of OpenCL Kernel (PR #115821)

Aniket Lal via cfe-commits cfe-commits at lists.llvm.org
Mon Nov 11 22:09:20 PST 2024


https://github.com/lalaniket8 created https://github.com/llvm/llvm-project/pull/115821

OpenCL allows a kernel function to call another kernel function.
To facilitate this we emit a stub version of each kernel function
with different name mangling scheme, and replace the kernel
callsite appropriately.

https://github.com/llvm/llvm-project/issues/60313
https://ontrack-internal.amd.com/browse/SWDEV-245936

>From b9f96f95cd014a44a4137b649aabbc8fe9ecf110 Mon Sep 17 00:00:00 2001
From: anikelal <anikelal at amd.com>
Date: Tue, 12 Nov 2024 11:30:45 +0530
Subject: [PATCH] [Clang] Emit stub version of OpenCL Kernel

OpenCL allows a kernel function to call another kernel function.
To facilitate this we emit a stub version of each kernel function
with different name mangling scheme, and replace the kernel
callsite appropriately.

https://github.com/llvm/llvm-project/issues/60313
https://ontrack-internal.amd.com/browse/SWDEV-245936
---
 clang/include/clang/AST/GlobalDecl.h  | 41 ++++++++++++++++++-------
 clang/include/clang/AST/Type.h        |  5 ++++
 clang/lib/AST/Expr.cpp                |  3 +-
 clang/lib/AST/ItaniumMangle.cpp       | 14 +++++++++
 clang/lib/AST/Mangle.cpp              |  2 +-
 clang/lib/AST/MicrosoftMangle.cpp     | 11 +++++++
 clang/lib/CodeGen/CGCall.cpp          | 29 +++++++++++++++---
 clang/lib/CodeGen/CGExpr.cpp          | 43 +++++++++++++++++++++++++++
 clang/lib/CodeGen/CGOpenCLRuntime.cpp |  5 +++-
 clang/lib/CodeGen/CodeGenModule.cpp   | 17 +++++++++--
 clang/lib/CodeGen/CodeGenTypes.h      |  4 ++-
 clang/lib/CodeGen/TargetInfo.h        |  2 ++
 clang/lib/CodeGen/Targets/AMDGPU.cpp  |  8 +++++
 13 files changed, 163 insertions(+), 21 deletions(-)

diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h
index 386693cabb1fbb..4d19d84773d597 100644
--- a/clang/include/clang/AST/GlobalDecl.h
+++ b/clang/include/clang/AST/GlobalDecl.h
@@ -71,6 +71,10 @@ class GlobalDecl {
   GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0)
       : MultiVersionIndex(MVIndex) {
     if (!D->hasAttr<CUDAGlobalAttr>()) {
+      if (D->hasAttr<OpenCLKernelAttr>()) {
+        Value.setPointerAndInt(D, unsigned(KernelReferenceKind::Kernel));
+        return;
+      }
       Init(D);
       return;
     }
@@ -78,7 +82,8 @@ class GlobalDecl {
   }
   GlobalDecl(const FunctionDecl *D, KernelReferenceKind Kind)
       : Value(D, unsigned(Kind)) {
-    assert(D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!");
+    assert((D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!") ||
+           (D->hasAttr<OpenCLKernelAttr>() && "Decl is not a OpenCL kernel!"));
   }
   GlobalDecl(const NamedDecl *D) { Init(D); }
   GlobalDecl(const BlockDecl *D) { Init(D); }
@@ -130,13 +135,20 @@ class GlobalDecl {
   }
 
   KernelReferenceKind getKernelReferenceKind() const {
-    assert(((isa<FunctionDecl>(getDecl()) &&
-             cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
-            (isa<FunctionTemplateDecl>(getDecl()) &&
-             cast<FunctionTemplateDecl>(getDecl())
-                 ->getTemplatedDecl()
-                 ->hasAttr<CUDAGlobalAttr>())) &&
-           "Decl is not a GPU kernel!");
+    assert((((isa<FunctionDecl>(getDecl()) &&
+              cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
+             (isa<FunctionTemplateDecl>(getDecl()) &&
+              cast<FunctionTemplateDecl>(getDecl())
+                  ->getTemplatedDecl()
+                  ->hasAttr<CUDAGlobalAttr>())) &&
+            "Decl is not a GPU kernel!") ||
+           (((isa<FunctionDecl>(getDecl()) &&
+              cast<FunctionDecl>(getDecl())->hasAttr<OpenCLKernelAttr>()) ||
+             (isa<FunctionTemplateDecl>(getDecl()) &&
+              cast<FunctionTemplateDecl>(getDecl())
+                  ->getTemplatedDecl()
+                  ->hasAttr<OpenCLKernelAttr>())) &&
+            "Decl is not a OpenCL kernel!"));
     return static_cast<KernelReferenceKind>(Value.getInt());
   }
 
@@ -196,9 +208,16 @@ class GlobalDecl {
   }
 
   GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind) {
-    assert(isa<FunctionDecl>(getDecl()) &&
-           cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
-           "Decl is not a GPU kernel!");
+    assert((isa<FunctionDecl>(getDecl()) &&
+            cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
+            "Decl is not a GPU kernel!") ||
+           (((isa<FunctionDecl>(getDecl()) &&
+              cast<FunctionDecl>(getDecl())->hasAttr<OpenCLKernelAttr>()) ||
+             (isa<FunctionTemplateDecl>(getDecl()) &&
+              cast<FunctionTemplateDecl>(getDecl())
+                  ->getTemplatedDecl()
+                  ->hasAttr<OpenCLKernelAttr>())) &&
+            "Decl is not a OpenCL kernel!"));
     GlobalDecl Result(*this);
     Result.Value.setInt(unsigned(Kind));
     return Result;
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 8979129017163b..69372b99fd6280 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -4668,6 +4668,11 @@ class FunctionType : public Type {
   CallingConv getCallConv() const { return getExtInfo().getCC(); }
   ExtInfo getExtInfo() const { return ExtInfo(FunctionTypeBits.ExtInfo); }
 
+  void setCC(unsigned cc) {
+    FunctionTypeBits.ExtInfo =
+        (FunctionTypeBits.ExtInfo & ~ExtInfo::CallConvMask) | cc;
+  }
+
   static_assert((~Qualifiers::FastMask & Qualifiers::CVRMask) == 0,
                 "Const, volatile and restrict are assumed to be a subset of "
                 "the fast qualifiers.");
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index bf2c1b92fa6b49..84e6eefc4a8a96 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -694,7 +694,8 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
           GD = GlobalDecl(CD, Ctor_Base);
         else if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(ND))
           GD = GlobalDecl(DD, Dtor_Base);
-        else if (ND->hasAttr<CUDAGlobalAttr>())
+        else if (ND->hasAttr<CUDAGlobalAttr>() ||
+                 ND->hasAttr<OpenCLKernelAttr>())
           GD = GlobalDecl(cast<FunctionDecl>(ND));
         else
           GD = GlobalDecl(ND);
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 14bc260d0245fb..9a57f7f4c42a48 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -528,6 +528,7 @@ class CXXNameMangler {
   void mangleSourceName(const IdentifierInfo *II);
   void mangleRegCallName(const IdentifierInfo *II);
   void mangleDeviceStubName(const IdentifierInfo *II);
+  void mangleOCLDeviceStubName(const IdentifierInfo *II);
   void mangleSourceNameWithAbiTags(
       const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr);
   void mangleLocalName(GlobalDecl GD,
@@ -1563,8 +1564,13 @@ void CXXNameMangler::mangleUnqualifiedName(
       bool IsDeviceStub =
           FD && FD->hasAttr<CUDAGlobalAttr>() &&
           GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+      bool IsOCLDeviceStub =
+          FD && FD->hasAttr<OpenCLKernelAttr>() &&
+          GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
       if (IsDeviceStub)
         mangleDeviceStubName(II);
+      else if (IsOCLDeviceStub)
+        mangleOCLDeviceStubName(II);
       else if (IsRegCall)
         mangleRegCallName(II);
       else
@@ -1782,6 +1788,14 @@ void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) {
       << II->getName();
 }
 
+void CXXNameMangler::mangleOCLDeviceStubName(const IdentifierInfo *II) {
+  // <source-name> ::= <positive length number> __clang_ocl_kern_imp_
+  // <identifier> <number> ::= [n] <non-negative decimal integer> <identifier>
+  // ::= <unqualified source code identifier>
+  Out << II->getLength() + sizeof("__clang_ocl_kern_imp_") - 1
+      << "__clang_ocl_kern_imp_" << II->getName();
+}
+
 void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) {
   // <source-name> ::= <positive length number> <identifier>
   // <number> ::= [n] <non-negative decimal integer>
diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp
index 4875e8537b3c11..2e3b2a684dd2d7 100644
--- a/clang/lib/AST/Mangle.cpp
+++ b/clang/lib/AST/Mangle.cpp
@@ -540,7 +540,7 @@ class ASTNameGenerator::Implementation {
         GD = GlobalDecl(CtorD, Ctor_Complete);
       else if (const auto *DtorD = dyn_cast<CXXDestructorDecl>(D))
         GD = GlobalDecl(DtorD, Dtor_Complete);
-      else if (D->hasAttr<CUDAGlobalAttr>())
+      else if (D->hasAttr<CUDAGlobalAttr>() || D->hasAttr<OpenCLKernelAttr>())
         GD = GlobalDecl(cast<FunctionDecl>(D));
       else
         GD = GlobalDecl(D);
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index dbc161347025c0..6de8c531232e0d 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1163,9 +1163,20 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
                   ->getTemplatedDecl()
                   ->hasAttr<CUDAGlobalAttr>())) &&
             GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+        bool IsOCLDeviceStub =
+            ND &&
+            ((isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>()) ||
+             (isa<FunctionTemplateDecl>(ND) &&
+              cast<FunctionTemplateDecl>(ND)
+                  ->getTemplatedDecl()
+                  ->hasAttr<OpenCLKernelAttr>())) &&
+            GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
         if (IsDeviceStub)
           mangleSourceName(
               (llvm::Twine("__device_stub__") + II->getName()).str());
+        else if (IsOCLDeviceStub)
+          mangleSourceName(
+              (llvm::Twine("__clang_ocl_kern_imp_") + II->getName()).str());
         else
           mangleSourceName(II->getName());
         break;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 8f4f5d3ed81601..d5e09914e3b76b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -300,6 +300,16 @@ static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM,
   }
 }
 
+static void setOCLKernelStubCallingConvention(CanQualType &FTy,
+                                              CodeGenModule &CGM,
+                                              const FunctionDecl *FD) {
+  if (FD->hasAttr<OpenCLKernelAttr>()) {
+    const FunctionType *FT = FTy->getAs<FunctionType>();
+    CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT);
+    FTy = FT->getCanonicalTypeUnqualified();
+  }
+}
+
 /// Arrange the argument and result information for a declaration or
 /// definition of the given C++ non-static member function.  The
 /// member function must be an ordinary function, i.e. not a
@@ -460,15 +470,19 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
 /// Arrange the argument and result information for the declaration or
 /// definition of the given function.
 const CGFunctionInfo &
-CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
+CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD,
+                                         CanQualType *FTy_ptr /* = nullptr*/) {
   if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD))
     if (MD->isImplicitObjectMemberFunction())
       return arrangeCXXMethodDeclaration(MD);
 
-  CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
+  CanQualType FTy = FTy_ptr == nullptr
+                        ? FD->getType()->getCanonicalTypeUnqualified()
+                        : *FTy_ptr;
 
   assert(isa<FunctionType>(FTy));
-  setCUDAKernelCallingConvention(FTy, CGM, FD);
+  if (!FD->getLangOpts().OpenCL)
+    setCUDAKernelCallingConvention(FTy, CGM, FD);
 
   // When declaring a function without a prototype, always use a
   // non-variadic type.
@@ -548,7 +562,14 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
       isa<CXXDestructorDecl>(GD.getDecl()))
     return arrangeCXXStructorDeclaration(GD);
 
-  return arrangeFunctionDeclaration(FD);
+  CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
+  if (FD->hasAttr<OpenCLKernelAttr>() &&
+      GD.getKernelReferenceKind() ==
+          KernelReferenceKind::
+              Stub) { // OCLKernelReferenceKind::ClangOCLKernelImpl){
+    setOCLKernelStubCallingConvention(FTy, CGM, FD);
+  }
+  return arrangeFunctionDeclaration(FD, &FTy);
 }
 
 /// Arrange a thunk that takes 'this' as the first parameter followed by
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 096f4c4f550435..0001fa685ed20e 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5607,6 +5607,43 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
     return EmitCXXPseudoDestructorExpr(callee.getPseudoDestructorExpr());
   }
 
+  // Change calling convention of callee function at callsite
+
+  const Expr *calleeExpr = E->getCallee()->IgnoreParens();
+
+  while (auto ICE = dyn_cast<ImplicitCastExpr>(calleeExpr)) {
+    if (ICE->getCastKind() == CK_FunctionToPointerDecay ||
+        ICE->getCastKind() == CK_BuiltinFnToFnPtr)
+      calleeExpr = ICE->getSubExpr()->IgnoreParens();
+    else
+      break;
+  }
+
+  if (auto DRE = dyn_cast<DeclRefExpr>(calleeExpr)) {
+    if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
+      if (FD->hasAttr<OpenCLKernelAttr>() && !FD->getBuiltinID()) {
+        const FunctionType *ft =
+            cast<FunctionType>(cast<PointerType>(getContext().getCanonicalType(
+                                                     E->getCallee()->getType()))
+                                   ->getPointeeType());
+        FunctionType *ftnc = const_cast<FunctionType *>(ft);
+        ftnc->setCC(CC_C);
+      }
+    }
+  }
+  if (auto ME = dyn_cast<MemberExpr>(calleeExpr)) {
+    if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {
+      if (FD->hasAttr<OpenCLKernelAttr>() && !FD->getBuiltinID()) {
+        const FunctionType *ft =
+            cast<FunctionType>(cast<PointerType>(getContext().getCanonicalType(
+                                                     E->getCallee()->getType()))
+                                   ->getPointeeType());
+        FunctionType *ftnc = const_cast<FunctionType *>(ft);
+        ftnc->setCC(CC_C);
+      }
+    }
+  }
+
   return EmitCall(E->getCallee()->getType(), callee, E, ReturnValue,
                   /*Chain=*/nullptr, CallOrInvoke);
 }
@@ -5695,11 +5732,17 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
   // Resolve direct calls.
   } else if (auto DRE = dyn_cast<DeclRefExpr>(E)) {
     if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
+      if (FD->hasAttr<OpenCLKernelAttr>())
+        return EmitDirectCallee(*this,
+                                GlobalDecl(FD, KernelReferenceKind::Stub));
       return EmitDirectCallee(*this, FD);
     }
   } else if (auto ME = dyn_cast<MemberExpr>(E)) {
     if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {
       EmitIgnoredExpr(ME->getBase());
+      if (FD->hasAttr<OpenCLKernelAttr>())
+        return EmitDirectCallee(*this,
+                                GlobalDecl(FD, KernelReferenceKind::Stub));
       return EmitDirectCallee(*this, FD);
     }
 
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 115b618056a445..e5db0fa33c1868 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -127,7 +127,10 @@ static const BlockExpr *getBlockExpr(const Expr *E) {
 void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
                                       llvm::Function *InvokeF,
                                       llvm::Value *Block, llvm::Type *BlockTy) {
-  assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
+
+  // Since OpenCL Kernels are emitted twice (kernel version and device version),
+  // its constituent BlockExpr will also be emitted twice
+  // assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
   assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
   assert(Block->getType()->isPointerTy() && "Invalid block literal type");
   EnqueuedBlockMap[E].InvokeFunc = InvokeF;
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index ba376f9ecfacde..3476b8301ee9b8 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1888,6 +1888,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
     } else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
                GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
       Out << "__device_stub__" << II->getName();
+    } else if (FD && FD->hasAttr<OpenCLKernelAttr>() &&
+               GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
+      Out << "__clang_ocl_kern_imp_" << II->getName();
     } else {
       Out << II->getName();
     }
@@ -3283,8 +3286,14 @@ void CodeGenModule::EmitDeferred() {
     if (LangOpts.OpenMP && OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(D))
       continue;
 
-    // Otherwise, emit the definition and move on to the next one.
-    EmitGlobalDefinition(D, GV);
+    // Otherwise, emit the definition and move on to the next one,
+    // Donot emit definition for a device version of OpenCL kernel that does not
+    // have a body
+    if (!(isa<FunctionDecl>(D.getDecl()) &&
+          (cast<FunctionDecl>(D.getDecl()))->hasAttr<OpenCLKernelAttr>() &&
+          D.getKernelReferenceKind() == KernelReferenceKind::Stub &&
+          !((cast<FunctionDecl>(D.getDecl()))->doesThisDeclarationHaveABody())))
+      EmitGlobalDefinition(D, GV);
 
     // If we found out that we need to emit more decls, do that recursively.
     // This has the advantage that the decls are emitted in a DFS and related
@@ -3842,6 +3851,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
 
   // Ignore declarations, they will be emitted on their first use.
   if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
+
+    if (FD->hasAttr<OpenCLKernelAttr>())
+      addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub));
+
     // Update deferred annotations with the latest declaration if the function
     // function was already used or defined.
     if (FD->hasAttr<AnnotateAttr>()) {
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 5aebf9a2122372..fd9f37de67f187 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -207,7 +207,9 @@ class CodeGenTypes {
 
   /// Free functions are functions that are compatible with an ordinary
   /// C function pointer type.
-  const CGFunctionInfo &arrangeFunctionDeclaration(const FunctionDecl *FD);
+  const CGFunctionInfo &
+  arrangeFunctionDeclaration(const FunctionDecl *FD,
+                             CanQualType *FTy_ptr = nullptr);
   const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args,
                                                 const FunctionType *Ty,
                                                 bool ChainCall);
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 373f8b8a80fdb1..2673b1f7e32c28 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -382,6 +382,8 @@ class TargetCodeGenInfo {
   virtual bool shouldEmitDWARFBitFieldSeparators() const { return false; }
 
   virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
+  virtual void
+  setOCLKernelStubCallingConvention(const FunctionType *&FT) const {}
 
   /// Return the device-side type for the CUDA device builtin surface type.
   virtual llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const {
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 56ad0503a11ab2..37e07b16193e5f 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -321,6 +321,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
   bool shouldEmitStaticExternCAliases() const override;
   bool shouldEmitDWARFBitFieldSeparators() const override;
   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+  void
+  setOCLKernelStubCallingConvention(const FunctionType *&FT) const override;
 };
 }
 
@@ -598,6 +600,12 @@ void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
       FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
 }
 
+void AMDGPUTargetCodeGenInfo::setOCLKernelStubCallingConvention(
+    const FunctionType *&FT) const {
+  FT = getABIInfo().getContext().adjustFunctionType(
+      FT, FT->getExtInfo().withCallingConv(CC_C));
+}
+
 /// Create an OpenCL kernel for an enqueued block.
 ///
 /// The type of the first argument (the block literal) is the struct type



More information about the cfe-commits mailing list