[clang] [Clang] Emit stub version of OpenCL Kernel (PR #115821)

Aniket Lal via cfe-commits cfe-commits at lists.llvm.org
Wed Nov 13 02:13:25 PST 2024


https://github.com/lalaniket8 updated https://github.com/llvm/llvm-project/pull/115821

>From 9449cea8e3dd822508c8933d38f15290306bc9ab Mon Sep 17 00:00:00 2001
From: anikelal <anikelal at amd.com>
Date: Tue, 12 Nov 2024 11:30:45 +0530
Subject: [PATCH] [Clang][AMDGPU] Emit stub version of OpenCL Kernel

OpenCL allows a kernel function to call another kernel function.
To facilitate this we emit a stub version of each kernel function
with different name mangling scheme, and replace the kernel
callsite appropriately.

This commit fixes https://github.com/llvm/llvm-project/issues/60313
https://ontrack-internal.amd.com/browse/SWDEV-245936

D120566 was an earlier effort to upstream a fix for this issue.
---
 clang/include/clang/AST/GlobalDecl.h          | 37 ++++++++++++-----
 clang/include/clang/AST/Type.h                |  5 +++
 clang/lib/AST/Expr.cpp                        |  3 +-
 clang/lib/AST/ItaniumMangle.cpp               | 14 +++++++
 clang/lib/AST/Mangle.cpp                      |  2 +-
 clang/lib/AST/MicrosoftMangle.cpp             | 11 +++++
 clang/lib/CodeGen/CGCall.cpp                  | 27 +++++++++++--
 clang/lib/CodeGen/CGExpr.cpp                  | 40 +++++++++++++++++++
 clang/lib/CodeGen/CGOpenCLRuntime.cpp         |  5 ++-
 clang/lib/CodeGen/CodeGenModule.cpp           | 15 ++++++-
 clang/lib/CodeGen/CodeGenTypes.h              |  4 +-
 clang/lib/CodeGen/TargetInfo.h                |  2 +
 clang/lib/CodeGen/Targets/AMDGPU.cpp          |  8 ++++
 clang/test/CodeGenOpenCL/reflect.cl           |  2 +-
 clang/test/CodeGenOpenCL/spir-calling-conv.cl |  4 +-
 clang/test/CodeGenOpenCL/visibility.cl        |  8 ++--
 16 files changed, 160 insertions(+), 27 deletions(-)

diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h
index 386693cabb1fbb..39d779ab7aad31 100644
--- a/clang/include/clang/AST/GlobalDecl.h
+++ b/clang/include/clang/AST/GlobalDecl.h
@@ -71,6 +71,10 @@ class GlobalDecl {
   GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0)
       : MultiVersionIndex(MVIndex) {
     if (!D->hasAttr<CUDAGlobalAttr>()) {
+      if (D->hasAttr<OpenCLKernelAttr>()) {
+        Value.setPointerAndInt(D, unsigned(KernelReferenceKind::Kernel));
+        return;
+      }
       Init(D);
       return;
     }
@@ -78,7 +82,8 @@ class GlobalDecl {
   }
   GlobalDecl(const FunctionDecl *D, KernelReferenceKind Kind)
       : Value(D, unsigned(Kind)) {
-    assert(D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!");
+    assert((D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!") ||
+           (D->hasAttr<OpenCLKernelAttr>() && "Decl is not a OpenCL kernel!"));
   }
   GlobalDecl(const NamedDecl *D) { Init(D); }
   GlobalDecl(const BlockDecl *D) { Init(D); }
@@ -130,13 +135,20 @@ class GlobalDecl {
   }
 
   KernelReferenceKind getKernelReferenceKind() const {
-    assert(((isa<FunctionDecl>(getDecl()) &&
-             cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
-            (isa<FunctionTemplateDecl>(getDecl()) &&
-             cast<FunctionTemplateDecl>(getDecl())
-                 ->getTemplatedDecl()
-                 ->hasAttr<CUDAGlobalAttr>())) &&
-           "Decl is not a GPU kernel!");
+    assert((((isa<FunctionDecl>(getDecl()) &&
+              cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
+             (isa<FunctionTemplateDecl>(getDecl()) &&
+              cast<FunctionTemplateDecl>(getDecl())
+                  ->getTemplatedDecl()
+                  ->hasAttr<CUDAGlobalAttr>())) &&
+            "Decl is not a GPU kernel!") ||
+           (((isa<FunctionDecl>(getDecl()) &&
+              cast<FunctionDecl>(getDecl())->hasAttr<OpenCLKernelAttr>()) ||
+             (isa<FunctionTemplateDecl>(getDecl()) &&
+              cast<FunctionTemplateDecl>(getDecl())
+                  ->getTemplatedDecl()
+                  ->hasAttr<OpenCLKernelAttr>())) &&
+            "Decl is not a OpenCL kernel!"));
     return static_cast<KernelReferenceKind>(Value.getInt());
   }
 
@@ -196,9 +208,12 @@ class GlobalDecl {
   }
 
   GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind) {
-    assert(isa<FunctionDecl>(getDecl()) &&
-           cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
-           "Decl is not a GPU kernel!");
+    assert((isa<FunctionDecl>(getDecl()) &&
+            cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
+            "Decl is not a GPU kernel!") ||
+           (isa<FunctionDecl>(getDecl()) &&
+            cast<FunctionDecl>(getDecl())->hasAttr<OpenCLKernelAttr>() &&
+            "Decl is not a OpenCL kernel!"));
     GlobalDecl Result(*this);
     Result.Value.setInt(unsigned(Kind));
     return Result;
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 8979129017163b..69372b99fd6280 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -4668,6 +4668,11 @@ class FunctionType : public Type {
   CallingConv getCallConv() const { return getExtInfo().getCC(); }
   ExtInfo getExtInfo() const { return ExtInfo(FunctionTypeBits.ExtInfo); }
 
+  void setCC(unsigned cc) {
+    FunctionTypeBits.ExtInfo =
+        (FunctionTypeBits.ExtInfo & ~ExtInfo::CallConvMask) | cc;
+  }
+
   static_assert((~Qualifiers::FastMask & Qualifiers::CVRMask) == 0,
                 "Const, volatile and restrict are assumed to be a subset of "
                 "the fast qualifiers.");
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index bf2c1b92fa6b49..84e6eefc4a8a96 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -694,7 +694,8 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
           GD = GlobalDecl(CD, Ctor_Base);
         else if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(ND))
           GD = GlobalDecl(DD, Dtor_Base);
-        else if (ND->hasAttr<CUDAGlobalAttr>())
+        else if (ND->hasAttr<CUDAGlobalAttr>() ||
+                 ND->hasAttr<OpenCLKernelAttr>())
           GD = GlobalDecl(cast<FunctionDecl>(ND));
         else
           GD = GlobalDecl(ND);
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 14bc260d0245fb..9a57f7f4c42a48 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -528,6 +528,7 @@ class CXXNameMangler {
   void mangleSourceName(const IdentifierInfo *II);
   void mangleRegCallName(const IdentifierInfo *II);
   void mangleDeviceStubName(const IdentifierInfo *II);
+  void mangleOCLDeviceStubName(const IdentifierInfo *II);
   void mangleSourceNameWithAbiTags(
       const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr);
   void mangleLocalName(GlobalDecl GD,
@@ -1563,8 +1564,13 @@ void CXXNameMangler::mangleUnqualifiedName(
       bool IsDeviceStub =
           FD && FD->hasAttr<CUDAGlobalAttr>() &&
           GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+      bool IsOCLDeviceStub =
+          FD && FD->hasAttr<OpenCLKernelAttr>() &&
+          GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
       if (IsDeviceStub)
         mangleDeviceStubName(II);
+      else if (IsOCLDeviceStub)
+        mangleOCLDeviceStubName(II);
       else if (IsRegCall)
         mangleRegCallName(II);
       else
@@ -1782,6 +1788,14 @@ void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) {
       << II->getName();
 }
 
+void CXXNameMangler::mangleOCLDeviceStubName(const IdentifierInfo *II) {
+  // <source-name> ::= <positive length number> __clang_ocl_kern_imp_
+  // <identifier> <number> ::= [n] <non-negative decimal integer> <identifier>
+  // ::= <unqualified source code identifier>
+  Out << II->getLength() + sizeof("__clang_ocl_kern_imp_") - 1
+      << "__clang_ocl_kern_imp_" << II->getName();
+}
+
 void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) {
   // <source-name> ::= <positive length number> <identifier>
   // <number> ::= [n] <non-negative decimal integer>
diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp
index 4875e8537b3c11..2e3b2a684dd2d7 100644
--- a/clang/lib/AST/Mangle.cpp
+++ b/clang/lib/AST/Mangle.cpp
@@ -540,7 +540,7 @@ class ASTNameGenerator::Implementation {
         GD = GlobalDecl(CtorD, Ctor_Complete);
       else if (const auto *DtorD = dyn_cast<CXXDestructorDecl>(D))
         GD = GlobalDecl(DtorD, Dtor_Complete);
-      else if (D->hasAttr<CUDAGlobalAttr>())
+      else if (D->hasAttr<CUDAGlobalAttr>() || D->hasAttr<OpenCLKernelAttr>())
         GD = GlobalDecl(cast<FunctionDecl>(D));
       else
         GD = GlobalDecl(D);
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index dbc161347025c0..6de8c531232e0d 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1163,9 +1163,20 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
                   ->getTemplatedDecl()
                   ->hasAttr<CUDAGlobalAttr>())) &&
             GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+        bool IsOCLDeviceStub =
+            ND &&
+            ((isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>()) ||
+             (isa<FunctionTemplateDecl>(ND) &&
+              cast<FunctionTemplateDecl>(ND)
+                  ->getTemplatedDecl()
+                  ->hasAttr<OpenCLKernelAttr>())) &&
+            GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
         if (IsDeviceStub)
           mangleSourceName(
               (llvm::Twine("__device_stub__") + II->getName()).str());
+        else if (IsOCLDeviceStub)
+          mangleSourceName(
+              (llvm::Twine("__clang_ocl_kern_imp_") + II->getName()).str());
         else
           mangleSourceName(II->getName());
         break;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 8f4f5d3ed81601..59c467cc99d6f1 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -300,6 +300,16 @@ static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM,
   }
 }
 
+static void setOCLKernelStubCallingConvention(CanQualType &FTy,
+                                              CodeGenModule &CGM,
+                                              const FunctionDecl *FD) {
+  if (FD->hasAttr<OpenCLKernelAttr>()) {
+    const FunctionType *FT = FTy->getAs<FunctionType>();
+    CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT);
+    FTy = FT->getCanonicalTypeUnqualified();
+  }
+}
+
 /// Arrange the argument and result information for a declaration or
 /// definition of the given C++ non-static member function.  The
 /// member function must be an ordinary function, i.e. not a
@@ -460,15 +470,19 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
 /// Arrange the argument and result information for the declaration or
 /// definition of the given function.
 const CGFunctionInfo &
-CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
+CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD,
+                                         CanQualType *FTy_ptr /* = nullptr*/) {
   if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD))
     if (MD->isImplicitObjectMemberFunction())
       return arrangeCXXMethodDeclaration(MD);
 
-  CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
+  CanQualType FTy = FTy_ptr == nullptr
+                        ? FD->getType()->getCanonicalTypeUnqualified()
+                        : *FTy_ptr;
 
   assert(isa<FunctionType>(FTy));
-  setCUDAKernelCallingConvention(FTy, CGM, FD);
+  if (!FD->getLangOpts().OpenCL)
+    setCUDAKernelCallingConvention(FTy, CGM, FD);
 
   // When declaring a function without a prototype, always use a
   // non-variadic type.
@@ -548,7 +562,12 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
       isa<CXXDestructorDecl>(GD.getDecl()))
     return arrangeCXXStructorDeclaration(GD);
 
-  return arrangeFunctionDeclaration(FD);
+  CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
+  if (FD->hasAttr<OpenCLKernelAttr>() &&
+      GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
+    setOCLKernelStubCallingConvention(FTy, CGM, FD);
+  }
+  return arrangeFunctionDeclaration(FD, &FTy);
 }
 
 /// Arrange a thunk that takes 'this' as the first parameter followed by
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 096f4c4f550435..10c189c97e4cb6 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5607,6 +5607,40 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
     return EmitCXXPseudoDestructorExpr(callee.getPseudoDestructorExpr());
   }
 
+  // Change calling convention of callee function at callsite.
+  const Expr *CalleeExpr = E->getCallee()->IgnoreParens();
+  while (auto ICE = dyn_cast<ImplicitCastExpr>(CalleeExpr)) {
+    if (ICE->getCastKind() != CK_FunctionToPointerDecay &&
+        ICE->getCastKind() != CK_BuiltinFnToFnPtr)
+      break;
+    CalleeExpr = ICE->getSubExpr()->IgnoreParens();
+  }
+
+  if (auto DRE = dyn_cast<DeclRefExpr>(CalleeExpr)) {
+    if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
+      if (FD->hasAttr<OpenCLKernelAttr>() && !FD->getBuiltinID()) {
+        const FunctionType *FT =
+            cast<FunctionType>(cast<PointerType>(getContext().getCanonicalType(
+                                                     E->getCallee()->getType()))
+                                   ->getPointeeType());
+        FunctionType *FTNC = const_cast<FunctionType *>(FT);
+        FTNC->setCC(CC_C);
+      }
+    }
+  }
+  if (auto ME = dyn_cast<MemberExpr>(CalleeExpr)) {
+    if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {
+      if (FD->hasAttr<OpenCLKernelAttr>() && !FD->getBuiltinID()) {
+        const FunctionType *FT =
+            cast<FunctionType>(cast<PointerType>(getContext().getCanonicalType(
+                                                     E->getCallee()->getType()))
+                                   ->getPointeeType());
+        FunctionType *FTNC = const_cast<FunctionType *>(FT);
+        FTNC->setCC(CC_C);
+      }
+    }
+  }
+
   return EmitCall(E->getCallee()->getType(), callee, E, ReturnValue,
                   /*Chain=*/nullptr, CallOrInvoke);
 }
@@ -5695,11 +5729,17 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
   // Resolve direct calls.
   } else if (auto DRE = dyn_cast<DeclRefExpr>(E)) {
     if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
+      if (FD->hasAttr<OpenCLKernelAttr>())
+        return EmitDirectCallee(*this,
+                                GlobalDecl(FD, KernelReferenceKind::Stub));
       return EmitDirectCallee(*this, FD);
     }
   } else if (auto ME = dyn_cast<MemberExpr>(E)) {
     if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {
       EmitIgnoredExpr(ME->getBase());
+      if (FD->hasAttr<OpenCLKernelAttr>())
+        return EmitDirectCallee(*this,
+                                GlobalDecl(FD, KernelReferenceKind::Stub));
       return EmitDirectCallee(*this, FD);
     }
 
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 115b618056a445..c77de0701e0e6c 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -127,7 +127,10 @@ static const BlockExpr *getBlockExpr(const Expr *E) {
 void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
                                       llvm::Function *InvokeF,
                                       llvm::Value *Block, llvm::Type *BlockTy) {
-  assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
+
+  // FIXME: Since OpenCL Kernels are emitted twice (kernel version and stub
+  // version), its constituent BlockExpr will also be emitted twice.
+  // assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
   assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
   assert(Block->getType()->isPointerTy() && "Invalid block literal type");
   EnqueuedBlockMap[E].InvokeFunc = InvokeF;
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index ba376f9ecfacde..4454bda69165ac 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1888,6 +1888,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
     } else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
                GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
       Out << "__device_stub__" << II->getName();
+    } else if (FD && FD->hasAttr<OpenCLKernelAttr>() &&
+               GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
+      Out << "__clang_ocl_kern_imp_" << II->getName();
     } else {
       Out << II->getName();
     }
@@ -3284,7 +3287,13 @@ void CodeGenModule::EmitDeferred() {
       continue;
 
     // Otherwise, emit the definition and move on to the next one.
-    EmitGlobalDefinition(D, GV);
+    // Do not emit definition for a device version of OpenCL kernel that does
+    // not have a body.
+    if (!(isa<FunctionDecl>(D.getDecl()) &&
+          (cast<FunctionDecl>(D.getDecl()))->hasAttr<OpenCLKernelAttr>() &&
+          D.getKernelReferenceKind() == KernelReferenceKind::Stub &&
+          !((cast<FunctionDecl>(D.getDecl()))->doesThisDeclarationHaveABody())))
+      EmitGlobalDefinition(D, GV);
 
     // If we found out that we need to emit more decls, do that recursively.
     // This has the advantage that the decls are emitted in a DFS and related
@@ -3842,6 +3851,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
 
   // Ignore declarations, they will be emitted on their first use.
   if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
+
+    if (FD->hasAttr<OpenCLKernelAttr>())
+      addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub));
+
     // Update deferred annotations with the latest declaration if the function
     // function was already used or defined.
     if (FD->hasAttr<AnnotateAttr>()) {
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 5aebf9a2122372..fd9f37de67f187 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -207,7 +207,9 @@ class CodeGenTypes {
 
   /// Free functions are functions that are compatible with an ordinary
   /// C function pointer type.
-  const CGFunctionInfo &arrangeFunctionDeclaration(const FunctionDecl *FD);
+  const CGFunctionInfo &
+  arrangeFunctionDeclaration(const FunctionDecl *FD,
+                             CanQualType *FTy_ptr = nullptr);
   const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args,
                                                 const FunctionType *Ty,
                                                 bool ChainCall);
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 373f8b8a80fdb1..2673b1f7e32c28 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -382,6 +382,8 @@ class TargetCodeGenInfo {
   virtual bool shouldEmitDWARFBitFieldSeparators() const { return false; }
 
   virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
+  virtual void
+  setOCLKernelStubCallingConvention(const FunctionType *&FT) const {}
 
   /// Return the device-side type for the CUDA device builtin surface type.
   virtual llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const {
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 56ad0503a11ab2..37e07b16193e5f 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -321,6 +321,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
   bool shouldEmitStaticExternCAliases() const override;
   bool shouldEmitDWARFBitFieldSeparators() const override;
   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+  void
+  setOCLKernelStubCallingConvention(const FunctionType *&FT) const override;
 };
 }
 
@@ -598,6 +600,12 @@ void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
       FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
 }
 
+void AMDGPUTargetCodeGenInfo::setOCLKernelStubCallingConvention(
+    const FunctionType *&FT) const {
+  FT = getABIInfo().getContext().adjustFunctionType(
+      FT, FT->getExtInfo().withCallingConv(CC_C));
+}
+
 /// Create an OpenCL kernel for an enqueued block.
 ///
 /// The type of the first argument (the block literal) is the struct type
diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl
index 9ae4a5f027d358..0e3e50be9745eb 100644
--- a/clang/test/CodeGenOpenCL/reflect.cl
+++ b/clang/test/CodeGenOpenCL/reflect.cl
@@ -13,7 +13,7 @@ bool device_function() {
 }
 
 // CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
-// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
 // CHECK-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
diff --git a/clang/test/CodeGenOpenCL/spir-calling-conv.cl b/clang/test/CodeGenOpenCL/spir-calling-conv.cl
index 569ea0cbe1af60..26b9a224479200 100644
--- a/clang/test/CodeGenOpenCL/spir-calling-conv.cl
+++ b/clang/test/CodeGenOpenCL/spir-calling-conv.cl
@@ -11,8 +11,8 @@ kernel void foo(global int *A)
   // CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
   A[id] = id;
   bar(A);
-  // CHECK: tail call spir_kernel void @bar(ptr addrspace(1) noundef align 4 %A)
+  // CHECK: tail call void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4 %A)
 }
 
 // CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
-// CHECK: declare spir_kernel void @bar(ptr addrspace(1) noundef align 4)
+// CHECK: declare spir_kernel void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4)
diff --git a/clang/test/CodeGenOpenCL/visibility.cl b/clang/test/CodeGenOpenCL/visibility.cl
index addfe33377f939..4af2a38aa4f77a 100644
--- a/clang/test/CodeGenOpenCL/visibility.cl
+++ b/clang/test/CodeGenOpenCL/visibility.cl
@@ -94,19 +94,19 @@ void use() {
     ext_func_default();
 }
 
-// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern()
+// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern()
 // FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern()
 // FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern()
 
-// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden()
+// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
 // FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden()
 // FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden()
 
-// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected()
+// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
 // FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected()
 // FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected()
 
-// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default()
+// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern_default()
 // FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default()
 // FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default()
 



More information about the cfe-commits mailing list