[clang] [Clang][OpenCL][AMDGPU] Allow a kernel to call another kernel (PR #115821)

Aniket Lal via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 29 00:36:41 PST 2024


https://github.com/lalaniket8 updated https://github.com/llvm/llvm-project/pull/115821

>From 107f8dbc6b2fa157ce3936a086093882012b9d62 Mon Sep 17 00:00:00 2001
From: anikelal <anikelal at amd.com>
Date: Mon, 25 Nov 2024 14:18:36 +0530
Subject: [PATCH 1/2] [Clang][OpenCL][AMDGPU] Allow a kernel to call another
 kernel

This feature is currently not supported in the compiler.
To facilitate this we emit a stub version of each kernel
function body with different name mangling scheme, and
replaces the respective kernel call-sites appropriately.

Fixes https://github.com/llvm/llvm-project/issues/60313

D120566 was an earlier attempt made to upstream a solution
for this issue.
---
 clang/include/clang/AST/GlobalDecl.h          | 37 +++++++++----
 clang/lib/AST/Expr.cpp                        |  3 +-
 clang/lib/AST/ItaniumMangle.cpp               | 15 ++++++
 clang/lib/AST/Mangle.cpp                      |  2 +-
 clang/lib/AST/MicrosoftMangle.cpp             |  6 +++
 clang/lib/CodeGen/CGBlocks.cpp                | 16 +++---
 clang/lib/CodeGen/CGCall.cpp                  | 11 +++-
 clang/lib/CodeGen/CGExpr.cpp                  |  5 +-
 clang/lib/CodeGen/CGOpenCLRuntime.cpp         | 11 +++-
 clang/lib/CodeGen/CGOpenCLRuntime.h           |  4 +-
 clang/lib/CodeGen/CodeGenModule.cpp           |  7 +++
 .../test/CodeGenOpenCL/opencl-kernel-call.cl  | 43 +++++++++++++++
 clang/test/CodeGenOpenCL/reflect.cl           |  2 +-
 clang/test/CodeGenOpenCL/spir-calling-conv.cl |  4 +-
 clang/test/CodeGenOpenCL/visibility.cl        | 53 ++++++++++++++-----
 15 files changed, 178 insertions(+), 41 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCL/opencl-kernel-call.cl

diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h
index 386693cabb1fbb..8a9f4b4c60e5e5 100644
--- a/clang/include/clang/AST/GlobalDecl.h
+++ b/clang/include/clang/AST/GlobalDecl.h
@@ -71,6 +71,10 @@ class GlobalDecl {
   GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0)
       : MultiVersionIndex(MVIndex) {
     if (!D->hasAttr<CUDAGlobalAttr>()) {
+      if (D->hasAttr<OpenCLKernelAttr>()) {
+        Value.setPointerAndInt(D, unsigned(KernelReferenceKind::Kernel));
+        return;
+      }
       Init(D);
       return;
     }
@@ -78,7 +82,8 @@ class GlobalDecl {
   }
   GlobalDecl(const FunctionDecl *D, KernelReferenceKind Kind)
       : Value(D, unsigned(Kind)) {
-    assert(D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!");
+    assert((D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!") ||
+           (D->hasAttr<OpenCLKernelAttr>() && "Decl is not a OpenCL kernel!"));
   }
   GlobalDecl(const NamedDecl *D) { Init(D); }
   GlobalDecl(const BlockDecl *D) { Init(D); }
@@ -130,13 +135,15 @@ class GlobalDecl {
   }
 
   KernelReferenceKind getKernelReferenceKind() const {
-    assert(((isa<FunctionDecl>(getDecl()) &&
-             cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
-            (isa<FunctionTemplateDecl>(getDecl()) &&
-             cast<FunctionTemplateDecl>(getDecl())
-                 ->getTemplatedDecl()
-                 ->hasAttr<CUDAGlobalAttr>())) &&
-           "Decl is not a GPU kernel!");
+    assert((((isa<FunctionDecl>(getDecl()) &&
+              cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
+             (isa<FunctionTemplateDecl>(getDecl()) &&
+              cast<FunctionTemplateDecl>(getDecl())
+                  ->getTemplatedDecl()
+                  ->hasAttr<CUDAGlobalAttr>())) &&
+            "Decl is not a GPU kernel!") ||
+           (isDeclOpenCLKernel() && "Decl is not a OpenCL kernel!"));
+
     return static_cast<KernelReferenceKind>(Value.getInt());
   }
 
@@ -196,13 +203,21 @@ class GlobalDecl {
   }
 
   GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind) {
-    assert(isa<FunctionDecl>(getDecl()) &&
-           cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
-           "Decl is not a GPU kernel!");
+    assert((isa<FunctionDecl>(getDecl()) &&
+            cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
+            "Decl is not a GPU kernel!") ||
+           (isDeclOpenCLKernel() && "Decl is not a OpenCL kernel!"));
     GlobalDecl Result(*this);
     Result.Value.setInt(unsigned(Kind));
     return Result;
   }
+
+  bool isDeclOpenCLKernel() const {
+    auto FD = dyn_cast<FunctionDecl>(getDecl());
+    if (FD)
+      return FD->hasAttr<OpenCLKernelAttr>();
+    return FD;
+  }
 };
 
 } // namespace clang
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index a4fb4d5a1f2ec4..ddd88ac6a21050 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -693,7 +693,8 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
           GD = GlobalDecl(CD, Ctor_Base);
         else if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(ND))
           GD = GlobalDecl(DD, Dtor_Base);
-        else if (ND->hasAttr<CUDAGlobalAttr>())
+        else if (ND->hasAttr<CUDAGlobalAttr>() ||
+                 ND->hasAttr<OpenCLKernelAttr>())
           GD = GlobalDecl(cast<FunctionDecl>(ND));
         else
           GD = GlobalDecl(ND);
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 27a993a631dae9..7e46d6c520fb69 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -526,6 +526,7 @@ class CXXNameMangler {
   void mangleSourceName(const IdentifierInfo *II);
   void mangleRegCallName(const IdentifierInfo *II);
   void mangleDeviceStubName(const IdentifierInfo *II);
+  void mangleOCLDeviceStubName(const IdentifierInfo *II);
   void mangleSourceNameWithAbiTags(
       const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr);
   void mangleLocalName(GlobalDecl GD,
@@ -1561,8 +1562,13 @@ void CXXNameMangler::mangleUnqualifiedName(
       bool IsDeviceStub =
           FD && FD->hasAttr<CUDAGlobalAttr>() &&
           GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+      bool IsOCLDeviceStub =
+          FD && FD->hasAttr<OpenCLKernelAttr>() &&
+          GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
       if (IsDeviceStub)
         mangleDeviceStubName(II);
+      else if (IsOCLDeviceStub)
+        mangleOCLDeviceStubName(II);
       else if (IsRegCall)
         mangleRegCallName(II);
       else
@@ -1780,6 +1786,15 @@ void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) {
       << II->getName();
 }
 
+void CXXNameMangler::mangleOCLDeviceStubName(const IdentifierInfo *II) {
+  // <source-name> ::= <positive length number> __clang_ocl_kern_imp_
+  // <identifier> <number> ::= [n] <non-negative decimal integer> <identifier>
+  // ::= <unqualified source code identifier>
+  StringRef OCLDeviceStubNamePrefix = "__clang_ocl_kern_imp_";
+  Out << II->getLength() + OCLDeviceStubNamePrefix.size() - 1
+      << OCLDeviceStubNamePrefix << II->getName();
+}
+
 void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) {
   // <source-name> ::= <positive length number> <identifier>
   // <number> ::= [n] <non-negative decimal integer>
diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp
index 15be9c62bf8880..1d1c4dd0e39b7a 100644
--- a/clang/lib/AST/Mangle.cpp
+++ b/clang/lib/AST/Mangle.cpp
@@ -539,7 +539,7 @@ class ASTNameGenerator::Implementation {
         GD = GlobalDecl(CtorD, Ctor_Complete);
       else if (const auto *DtorD = dyn_cast<CXXDestructorDecl>(D))
         GD = GlobalDecl(DtorD, Dtor_Complete);
-      else if (D->hasAttr<CUDAGlobalAttr>())
+      else if (D->hasAttr<CUDAGlobalAttr>() || D->hasAttr<OpenCLKernelAttr>())
         GD = GlobalDecl(cast<FunctionDecl>(D));
       else
         GD = GlobalDecl(D);
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 94a7ce6c1321d3..e439875a2538ba 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1161,9 +1161,15 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
                   ->getTemplatedDecl()
                   ->hasAttr<CUDAGlobalAttr>())) &&
             GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+        bool IsOCLDeviceStub =
+            ND && (isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>()) &&
+            GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
         if (IsDeviceStub)
           mangleSourceName(
               (llvm::Twine("__device_stub__") + II->getName()).str());
+        else if (IsOCLDeviceStub)
+          mangleSourceName(
+              (llvm::Twine("__clang_ocl_kern_imp_") + II->getName()).str());
         else
           mangleSourceName(II->getName());
         break;
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index a7584a95c8ca7b..7f98a897c36907 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -48,7 +48,7 @@ CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name)
 BlockByrefHelpers::~BlockByrefHelpers() {}
 
 /// Build the given block as a global block.
-static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
+static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, GlobalDecl GD,
                                         const CGBlockInfo &blockInfo,
                                         llvm::Constant *blockFn);
 
@@ -1085,8 +1085,10 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
       blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType()));
 
   if (IsOpenCL) {
-    CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn,
-                                           result, blockInfo.StructureType);
+    CGM.getOpenCLRuntime().recordBlockInfo(
+        blockInfo.BlockExpression, InvokeFn, result, blockInfo.StructureType,
+        CurGD && CurGD.isDeclOpenCLKernel() &&
+            (CurGD.getKernelReferenceKind() == KernelReferenceKind::Kernel));
   }
 
   return result;
@@ -1285,7 +1287,7 @@ CodeGenModule::GetAddrOfGlobalBlock(const BlockExpr *BE,
   return getAddrOfGlobalBlockIfEmitted(BE);
 }
 
-static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
+static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, GlobalDecl GD,
                                         const CGBlockInfo &blockInfo,
                                         llvm::Constant *blockFn) {
   assert(blockInfo.CanBeGlobal);
@@ -1378,7 +1380,9 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
     CGM.getOpenCLRuntime().recordBlockInfo(
         blockInfo.BlockExpression,
         cast<llvm::Function>(blockFn->stripPointerCasts()), Result,
-        literal->getValueType());
+        literal->getValueType(),
+        GD && GD.isDeclOpenCLKernel() &&
+            (GD.getKernelReferenceKind() == KernelReferenceKind::Kernel));
   return Result;
 }
 
@@ -1487,7 +1491,7 @@ llvm::Function *CodeGenFunction::GenerateBlockFunction(
     auto GenVoidPtrTy = getContext().getLangOpts().OpenCL
                             ? CGM.getOpenCLRuntime().getGenericVoidPointerType()
                             : VoidPtrTy;
-    buildGlobalBlock(CGM, blockInfo,
+    buildGlobalBlock(CGM, CurGD, blockInfo,
                      llvm::ConstantExpr::getPointerCast(fn, GenVoidPtrTy));
   }
 
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 20455dbb820914..215fe5bd9da8dd 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2343,6 +2343,15 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
   // Collect function IR attributes from the CC lowering.
   // We'll collect the paramete and result attributes later.
   CallingConv = FI.getEffectiveCallingConvention();
+  GlobalDecl GD = CalleeInfo.getCalleeDecl();
+  const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
+  if (TargetDecl) {
+    if (auto FD = dyn_cast<FunctionDecl>(TargetDecl)) {
+      if (FD->hasAttr<OpenCLKernelAttr>() &&
+          GD.getKernelReferenceKind() == KernelReferenceKind::Stub)
+        CallingConv = llvm::CallingConv::C;
+    }
+  }
   if (FI.isNoReturn())
     FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
   if (FI.isCmseNSCall())
@@ -2352,8 +2361,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
   AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
                                      CalleeInfo.getCalleeFunctionProtoType());
 
-  const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
-
   // Attach assumption attributes to the declaration. If this is a call
   // site, attach assumptions from the caller to the call as well.
   AddAttributesFromOMPAssumes(FuncAttrs, TargetDecl);
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 229f0e29f02341..e2222a6393b9b1 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5692,7 +5692,10 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
   // Resolve direct calls.
   } else if (auto DRE = dyn_cast<DeclRefExpr>(E)) {
     if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
-      return EmitDirectCallee(*this, FD);
+      auto CalleeDecl = FD->hasAttr<OpenCLKernelAttr>()
+                            ? GlobalDecl(FD, KernelReferenceKind::Stub)
+                            : FD;
+      return EmitDirectCallee(*this, CalleeDecl);
     }
   } else if (auto ME = dyn_cast<MemberExpr>(E)) {
     if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 115b618056a445..a78d783831293e 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -126,14 +126,21 @@ static const BlockExpr *getBlockExpr(const Expr *E) {
 /// corresponding block expression.
 void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
                                       llvm::Function *InvokeF,
-                                      llvm::Value *Block, llvm::Type *BlockTy) {
-  assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
+                                      llvm::Value *Block, llvm::Type *BlockTy,
+                                      bool isBlkExprInOCLKern) {
+
+  // FIXME: Since OpenCL Kernels are emitted twice (kernel version and stub
+  // version), its constituent BlockExpr will also be emitted twice.
+  assert((!EnqueuedBlockMap.contains(E) ||
+          EnqueuedBlockMap[E].isBlkExprInOCLKern != isBlkExprInOCLKern) &&
+         "Block expression emitted twice");
   assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
   assert(Block->getType()->isPointerTy() && "Invalid block literal type");
   EnqueuedBlockMap[E].InvokeFunc = InvokeF;
   EnqueuedBlockMap[E].BlockArg = Block;
   EnqueuedBlockMap[E].BlockTy = BlockTy;
   EnqueuedBlockMap[E].KernelHandle = nullptr;
+  EnqueuedBlockMap[E].isBlkExprInOCLKern = isBlkExprInOCLKern;
 }
 
 llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.h b/clang/lib/CodeGen/CGOpenCLRuntime.h
index 34613c3516f374..78bb5980cd87dc 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.h
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.h
@@ -46,6 +46,7 @@ class CGOpenCLRuntime {
     llvm::Value *KernelHandle;  /// Enqueued block kernel reference.
     llvm::Value *BlockArg;      /// The first argument to enqueued block kernel.
     llvm::Type *BlockTy;        /// Type of the block argument.
+    bool isBlkExprInOCLKern; /// Does the BlockExpr reside in an OpenCL Kernel.
   };
   /// Maps block expression to block information.
   llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap;
@@ -93,7 +94,8 @@ class CGOpenCLRuntime {
   /// \param InvokeF invoke function emitted for the block expression.
   /// \param Block block literal emitted for the block expression.
   void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
-                       llvm::Value *Block, llvm::Type *BlockTy);
+                       llvm::Value *Block, llvm::Type *BlockTy,
+                       bool isBlkExprInOCLKern);
 
   /// \return LLVM block invoke function emitted for an expression derived from
   /// the block expression.
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 7189a4689e8156..162752241a45e6 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1887,6 +1887,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
     } else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
                GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
       Out << "__device_stub__" << II->getName();
+    } else if (FD && FD->hasAttr<OpenCLKernelAttr>() &&
+               GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
+      Out << "__clang_ocl_kern_imp_" << II->getName();
     } else {
       Out << II->getName();
     }
@@ -3850,6 +3853,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
 
   // Ignore declarations, they will be emitted on their first use.
   if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
+
+    if (FD->hasAttr<OpenCLKernelAttr>() && FD->doesThisDeclarationHaveABody())
+      addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub));
+
     // Update deferred annotations with the latest declaration if the function
     // function was already used or defined.
     if (FD->hasAttr<AnnotateAttr>()) {
diff --git a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
new file mode 100644
index 00000000000000..f575728f237630
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: define dso_local amdgpu_kernel void @callee_kern({{.*}})
+__attribute__((noinline)) kernel void callee_kern(global int *A){
+  *A = 1;
+}
+
+__attribute__((noinline)) kernel void ext_callee_kern(global int *A);
+
+// CHECK: define dso_local void @callee_func({{.*}})
+__attribute__((noinline)) void callee_func(global int *A){
+  *A = 2;
+}
+
+// CHECK: define dso_local amdgpu_kernel void @caller_kern({{.*}})
+kernel void caller_kern(global int* A){
+  callee_kern(A);
+  // CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}})
+  ext_callee_kern(A);
+  // CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}})
+  callee_func(A);
+  // CHECK: tail call void @callee_func({{.*}})
+
+}
+
+// CHECK: define dso_local void @__clang_ocl_kern_imp_callee_kern({{.*}})
+
+// CHECK: declare void @__clang_ocl_kern_imp_ext_callee_kern({{.*}})
+
+// CHECK: define dso_local void @caller_func({{.*}})
+void caller_func(global int* A){
+  callee_kern(A);
+  // CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}}) #7
+  ext_callee_kern(A);
+  // CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}}) #8
+  callee_func(A);
+  // CHECK: tail call void @callee_func({{.*}})
+}
+
+// CHECK: define dso_local void @__clang_ocl_kern_imp_caller_kern({{.*}}) 
+// CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}}) 
+// CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}}) 
+// CHECK: tail call void @callee_func({{.*}})
diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl
index 9ae4a5f027d358..0e3e50be9745eb 100644
--- a/clang/test/CodeGenOpenCL/reflect.cl
+++ b/clang/test/CodeGenOpenCL/reflect.cl
@@ -13,7 +13,7 @@ bool device_function() {
 }
 
 // CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
-// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
 // CHECK-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
diff --git a/clang/test/CodeGenOpenCL/spir-calling-conv.cl b/clang/test/CodeGenOpenCL/spir-calling-conv.cl
index 569ea0cbe1af60..6c8f20511b8bd6 100644
--- a/clang/test/CodeGenOpenCL/spir-calling-conv.cl
+++ b/clang/test/CodeGenOpenCL/spir-calling-conv.cl
@@ -11,8 +11,8 @@ kernel void foo(global int *A)
   // CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
   A[id] = id;
   bar(A);
-  // CHECK: tail call spir_kernel void @bar(ptr addrspace(1) noundef align 4 %A)
+  // CHECK: tail call void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4 %A)
 }
 
 // CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
-// CHECK: declare spir_kernel void @bar(ptr addrspace(1) noundef align 4)
+// CHECK: declare void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4)
diff --git a/clang/test/CodeGenOpenCL/visibility.cl b/clang/test/CodeGenOpenCL/visibility.cl
index addfe33377f939..e5dc5b29c5140b 100644
--- a/clang/test/CodeGenOpenCL/visibility.cl
+++ b/clang/test/CodeGenOpenCL/visibility.cl
@@ -85,31 +85,42 @@ __attribute__((visibility("default"))) extern void ext_func_default();
 void use() {
     glob = ext + ext_hidden + ext_protected + ext_default;
     ext_kern();
+    // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern()
+    // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern()
+    // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern()
     ext_kern_hidden();
+    // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
+    // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
+    // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
     ext_kern_protected();
+    // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
+    // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
+    // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
     ext_kern_default();
+    // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_default()
+    // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_default()
+    // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_default()
     ext_func();
     ext_func_hidden();
     ext_func_protected();
     ext_func_default();
 }
 
-// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern()
-// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern()
-// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern()
+// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern()
+// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern()
+// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern()
 
-// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden()
-// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden()
-// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden()
+// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
+// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
+// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
 
-// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected()
-// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected()
-// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected()
-
-// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default()
-// FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default()
-// FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default()
+// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
+// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
+// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
 
+// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern_default()
+// FVIS-PROTECTED: declare void @__clang_ocl_kern_imp_ext_kern_default()
+// FVIS-HIDDEN: declare void @__clang_ocl_kern_imp_ext_kern_default()
 
 // FVIS-DEFAULT: declare void @ext_func()
 // FVIS-PROTECTED: declare protected void @ext_func()
@@ -126,3 +137,19 @@ void use() {
 // FVIS-DEFAULT: declare void @ext_func_default()
 // FVIS-PROTECTED: declare void @ext_func_default()
 // FVIS-HIDDEN: declare void @ext_func_default()
+
+// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern()
+// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern()
+// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern()
+
+// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_hidden()
+// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_hidden()
+// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_hidden()
+
+// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_protected()
+// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_protected()
+// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_protected()
+
+// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
+// FVIS-PROTECTED: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
+// FVIS-HIDDEN: define{{.*}} void @__clang_ocl_kern_imp_kern_default()

>From 013801b1fc82ec6806b876d93da24d4f0f2ed098 Mon Sep 17 00:00:00 2001
From: anikelal <anikelal at amd.com>
Date: Fri, 29 Nov 2024 14:03:57 +0530
Subject: [PATCH 2/2] [Clang][OpenCL][AMDGPU] Allow a kernel to call another
 kernel

Simplifying isDeclOpenCLKernel() and removing resolved comments
---
 clang/include/clang/AST/GlobalDecl.h  | 5 ++---
 clang/lib/CodeGen/CGOpenCLRuntime.cpp | 2 --
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h
index 8a9f4b4c60e5e5..342232dcf56418 100644
--- a/clang/include/clang/AST/GlobalDecl.h
+++ b/clang/include/clang/AST/GlobalDecl.h
@@ -213,10 +213,9 @@ class GlobalDecl {
   }
 
   bool isDeclOpenCLKernel() const {
-    auto FD = dyn_cast<FunctionDecl>(getDecl());
-    if (FD)
+    if (auto FD = dyn_cast<FunctionDecl>(getDecl()))
       return FD->hasAttr<OpenCLKernelAttr>();
-    return FD;
+    return false;
   }
 };
 
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index a78d783831293e..31d40a8774b7b1 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -129,8 +129,6 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
                                       llvm::Value *Block, llvm::Type *BlockTy,
                                       bool isBlkExprInOCLKern) {
 
-  // FIXME: Since OpenCL Kernels are emitted twice (kernel version and stub
-  // version), its constituent BlockExpr will also be emitted twice.
   assert((!EnqueuedBlockMap.contains(E) ||
           EnqueuedBlockMap[E].isBlkExprInOCLKern != isBlkExprInOCLKern) &&
          "Block expression emitted twice");



More information about the cfe-commits mailing list