[clang] [Clang] Emit stub version of OpenCL Kernel (PR #115821)

Aniket Lal via cfe-commits cfe-commits at lists.llvm.org
Mon Nov 25 01:07:32 PST 2024


https://github.com/lalaniket8 updated https://github.com/llvm/llvm-project/pull/115821

>From 61c90047a2e82e8dd051363f9e56e52e9931c481 Mon Sep 17 00:00:00 2001
From: anikelal <anikelal at amd.com>
Date: Mon, 25 Nov 2024 14:18:36 +0530
Subject: [PATCH] OpenCL allows a kernel function to call another kernel
 function. To facilitate this we emit a stub version of each kernel function
 with different name mangling scheme, and replace the kernel callsite
 appropriately.

https://github.com/llvm/llvm-project/issues/60313
https://ontrack-internal.amd.com/browse/SWDEV-245936
---
 clang/include/clang/AST/GlobalDecl.h          | 37 +++++++++----
 clang/lib/AST/Expr.cpp                        |  3 +-
 clang/lib/AST/ItaniumMangle.cpp               | 15 ++++++
 clang/lib/AST/Mangle.cpp                      |  2 +-
 clang/lib/AST/MicrosoftMangle.cpp             |  6 +++
 clang/lib/CodeGen/CGCall.cpp                  | 11 +++-
 clang/lib/CodeGen/CGExpr.cpp                  |  5 +-
 clang/lib/CodeGen/CGOpenCLRuntime.cpp         |  5 +-
 clang/lib/CodeGen/CodeGenModule.cpp           |  7 +++
 .../test/CodeGenOpenCL/opencl-kernel-call.cl  | 43 +++++++++++++++
 clang/test/CodeGenOpenCL/reflect.cl           |  2 +-
 clang/test/CodeGenOpenCL/spir-calling-conv.cl |  4 +-
 clang/test/CodeGenOpenCL/visibility.cl        | 53 ++++++++++++++-----
 13 files changed, 160 insertions(+), 33 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCL/opencl-kernel-call.cl

diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h
index 386693cabb1fbb..8a9f4b4c60e5e5 100644
--- a/clang/include/clang/AST/GlobalDecl.h
+++ b/clang/include/clang/AST/GlobalDecl.h
@@ -71,6 +71,10 @@ class GlobalDecl {
   GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0)
       : MultiVersionIndex(MVIndex) {
     if (!D->hasAttr<CUDAGlobalAttr>()) {
+      if (D->hasAttr<OpenCLKernelAttr>()) {
+        Value.setPointerAndInt(D, unsigned(KernelReferenceKind::Kernel));
+        return;
+      }
       Init(D);
       return;
     }
@@ -78,7 +82,8 @@ class GlobalDecl {
   }
   GlobalDecl(const FunctionDecl *D, KernelReferenceKind Kind)
       : Value(D, unsigned(Kind)) {
-    assert(D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!");
+    assert((D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!") ||
+           (D->hasAttr<OpenCLKernelAttr>() && "Decl is not a OpenCL kernel!"));
   }
   GlobalDecl(const NamedDecl *D) { Init(D); }
   GlobalDecl(const BlockDecl *D) { Init(D); }
@@ -130,13 +135,15 @@ class GlobalDecl {
   }
 
   KernelReferenceKind getKernelReferenceKind() const {
-    assert(((isa<FunctionDecl>(getDecl()) &&
-             cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
-            (isa<FunctionTemplateDecl>(getDecl()) &&
-             cast<FunctionTemplateDecl>(getDecl())
-                 ->getTemplatedDecl()
-                 ->hasAttr<CUDAGlobalAttr>())) &&
-           "Decl is not a GPU kernel!");
+    assert((((isa<FunctionDecl>(getDecl()) &&
+              cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
+             (isa<FunctionTemplateDecl>(getDecl()) &&
+              cast<FunctionTemplateDecl>(getDecl())
+                  ->getTemplatedDecl()
+                  ->hasAttr<CUDAGlobalAttr>())) &&
+            "Decl is not a GPU kernel!") ||
+           (isDeclOpenCLKernel() && "Decl is not a OpenCL kernel!"));
+
     return static_cast<KernelReferenceKind>(Value.getInt());
   }
 
@@ -196,13 +203,21 @@ class GlobalDecl {
   }
 
   GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind) {
-    assert(isa<FunctionDecl>(getDecl()) &&
-           cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
-           "Decl is not a GPU kernel!");
+    assert((isa<FunctionDecl>(getDecl()) &&
+            cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
+            "Decl is not a GPU kernel!") ||
+           (isDeclOpenCLKernel() && "Decl is not a OpenCL kernel!"));
     GlobalDecl Result(*this);
     Result.Value.setInt(unsigned(Kind));
     return Result;
   }
+
+  bool isDeclOpenCLKernel() const {
+    auto FD = dyn_cast<FunctionDecl>(getDecl());
+    if (FD)
+      return FD->hasAttr<OpenCLKernelAttr>();
+    return FD;
+  }
 };
 
 } // namespace clang
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index a4fb4d5a1f2ec4..ddd88ac6a21050 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -693,7 +693,8 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
           GD = GlobalDecl(CD, Ctor_Base);
         else if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(ND))
           GD = GlobalDecl(DD, Dtor_Base);
-        else if (ND->hasAttr<CUDAGlobalAttr>())
+        else if (ND->hasAttr<CUDAGlobalAttr>() ||
+                 ND->hasAttr<OpenCLKernelAttr>())
           GD = GlobalDecl(cast<FunctionDecl>(ND));
         else
           GD = GlobalDecl(ND);
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 27a993a631dae9..7e46d6c520fb69 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -526,6 +526,7 @@ class CXXNameMangler {
   void mangleSourceName(const IdentifierInfo *II);
   void mangleRegCallName(const IdentifierInfo *II);
   void mangleDeviceStubName(const IdentifierInfo *II);
+  void mangleOCLDeviceStubName(const IdentifierInfo *II);
   void mangleSourceNameWithAbiTags(
       const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr);
   void mangleLocalName(GlobalDecl GD,
@@ -1561,8 +1562,13 @@ void CXXNameMangler::mangleUnqualifiedName(
       bool IsDeviceStub =
           FD && FD->hasAttr<CUDAGlobalAttr>() &&
           GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+      bool IsOCLDeviceStub =
+          FD && FD->hasAttr<OpenCLKernelAttr>() &&
+          GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
       if (IsDeviceStub)
         mangleDeviceStubName(II);
+      else if (IsOCLDeviceStub)
+        mangleOCLDeviceStubName(II);
       else if (IsRegCall)
         mangleRegCallName(II);
       else
@@ -1780,6 +1786,15 @@ void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) {
       << II->getName();
 }
 
+void CXXNameMangler::mangleOCLDeviceStubName(const IdentifierInfo *II) {
+  // <source-name> ::= <positive length number> __clang_ocl_kern_imp_
+  // <identifier> <number> ::= [n] <non-negative decimal integer> <identifier>
+  // ::= <unqualified source code identifier>
+  StringRef OCLDeviceStubNamePrefix = "__clang_ocl_kern_imp_";
+  Out << II->getLength() + OCLDeviceStubNamePrefix.size() - 1
+      << OCLDeviceStubNamePrefix << II->getName();
+}
+
 void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) {
   // <source-name> ::= <positive length number> <identifier>
   // <number> ::= [n] <non-negative decimal integer>
diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp
index 15be9c62bf8880..1d1c4dd0e39b7a 100644
--- a/clang/lib/AST/Mangle.cpp
+++ b/clang/lib/AST/Mangle.cpp
@@ -539,7 +539,7 @@ class ASTNameGenerator::Implementation {
         GD = GlobalDecl(CtorD, Ctor_Complete);
       else if (const auto *DtorD = dyn_cast<CXXDestructorDecl>(D))
         GD = GlobalDecl(DtorD, Dtor_Complete);
-      else if (D->hasAttr<CUDAGlobalAttr>())
+      else if (D->hasAttr<CUDAGlobalAttr>() || D->hasAttr<OpenCLKernelAttr>())
         GD = GlobalDecl(cast<FunctionDecl>(D));
       else
         GD = GlobalDecl(D);
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 94a7ce6c1321d3..e439875a2538ba 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1161,9 +1161,15 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
                   ->getTemplatedDecl()
                   ->hasAttr<CUDAGlobalAttr>())) &&
             GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+        bool IsOCLDeviceStub =
+            ND && (isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>()) &&
+            GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
         if (IsDeviceStub)
           mangleSourceName(
               (llvm::Twine("__device_stub__") + II->getName()).str());
+        else if (IsOCLDeviceStub)
+          mangleSourceName(
+              (llvm::Twine("__clang_ocl_kern_imp_") + II->getName()).str());
         else
           mangleSourceName(II->getName());
         break;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 35d495d4dfab82..ff5faa35bc7ede 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2343,6 +2343,15 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
   // Collect function IR attributes from the CC lowering.
   // We'll collect the paramete and result attributes later.
   CallingConv = FI.getEffectiveCallingConvention();
+  GlobalDecl GD = CalleeInfo.getCalleeDecl();
+  const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
+  if (TargetDecl) {
+    if (auto FD = dyn_cast<FunctionDecl>(TargetDecl)) {
+      if (FD->hasAttr<OpenCLKernelAttr>() &&
+          GD.getKernelReferenceKind() == KernelReferenceKind::Stub)
+        CallingConv = llvm::CallingConv::C;
+    }
+  }
   if (FI.isNoReturn())
     FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
   if (FI.isCmseNSCall())
@@ -2352,8 +2361,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
   AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
                                      CalleeInfo.getCalleeFunctionProtoType());
 
-  const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
-
   // Attach assumption attributes to the declaration. If this is a call
   // site, attach assumptions from the caller to the call as well.
   AddAttributesFromOMPAssumes(FuncAttrs, TargetDecl);
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d3f470d401b3d4..9bb99dd3335597 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5691,7 +5691,10 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
   // Resolve direct calls.
   } else if (auto DRE = dyn_cast<DeclRefExpr>(E)) {
     if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
-      return EmitDirectCallee(*this, FD);
+      auto CalleeDecl = FD->hasAttr<OpenCLKernelAttr>()
+                            ? GlobalDecl(FD, KernelReferenceKind::Stub)
+                            : FD;
+      return EmitDirectCallee(*this, CalleeDecl);
     }
   } else if (auto ME = dyn_cast<MemberExpr>(E)) {
     if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 115b618056a445..c77de0701e0e6c 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -127,7 +127,10 @@ static const BlockExpr *getBlockExpr(const Expr *E) {
 void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
                                       llvm::Function *InvokeF,
                                       llvm::Value *Block, llvm::Type *BlockTy) {
-  assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
+
+  // FIXME: Since OpenCL Kernels are emitted twice (kernel version and stub
+  // version), its constituent BlockExpr will also be emitted twice.
+  // assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
   assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
   assert(Block->getType()->isPointerTy() && "Invalid block literal type");
   EnqueuedBlockMap[E].InvokeFunc = InvokeF;
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index b854eeb62a80ce..8193488a45c91c 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1887,6 +1887,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
     } else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
                GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
       Out << "__device_stub__" << II->getName();
+    } else if (FD && FD->hasAttr<OpenCLKernelAttr>() &&
+               GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
+      Out << "__clang_ocl_kern_imp_" << II->getName();
     } else {
       Out << II->getName();
     }
@@ -3841,6 +3844,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
 
   // Ignore declarations, they will be emitted on their first use.
   if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
+
+    if (FD->hasAttr<OpenCLKernelAttr>() && FD->doesThisDeclarationHaveABody())
+      addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub));
+
     // Update deferred annotations with the latest declaration if the function
     // function was already used or defined.
     if (FD->hasAttr<AnnotateAttr>()) {
diff --git a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
new file mode 100644
index 00000000000000..f575728f237630
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: define dso_local amdgpu_kernel void @callee_kern({{.*}})
+__attribute__((noinline)) kernel void callee_kern(global int *A){
+  *A = 1;
+}
+
+__attribute__((noinline)) kernel void ext_callee_kern(global int *A);
+
+// CHECK: define dso_local void @callee_func({{.*}})
+__attribute__((noinline)) void callee_func(global int *A){
+  *A = 2;
+}
+
+// CHECK: define dso_local amdgpu_kernel void @caller_kern({{.*}})
+kernel void caller_kern(global int* A){
+  callee_kern(A);
+  // CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}})
+  ext_callee_kern(A);
+  // CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}})
+  callee_func(A);
+  // CHECK: tail call void @callee_func({{.*}})
+
+}
+
+// CHECK: define dso_local void @__clang_ocl_kern_imp_callee_kern({{.*}})
+
+// CHECK: declare void @__clang_ocl_kern_imp_ext_callee_kern({{.*}})
+
+// CHECK: define dso_local void @caller_func({{.*}})
+void caller_func(global int* A){
+  callee_kern(A);
+  // CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}}) #7
+  ext_callee_kern(A);
+  // CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}}) #8
+  callee_func(A);
+  // CHECK: tail call void @callee_func({{.*}})
+}
+
+// CHECK: define dso_local void @__clang_ocl_kern_imp_caller_kern({{.*}}) 
+// CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}}) 
+// CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}}) 
+// CHECK: tail call void @callee_func({{.*}})
diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl
index 9ae4a5f027d358..0e3e50be9745eb 100644
--- a/clang/test/CodeGenOpenCL/reflect.cl
+++ b/clang/test/CodeGenOpenCL/reflect.cl
@@ -13,7 +13,7 @@ bool device_function() {
 }
 
 // CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
-// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
 // CHECK-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
diff --git a/clang/test/CodeGenOpenCL/spir-calling-conv.cl b/clang/test/CodeGenOpenCL/spir-calling-conv.cl
index 569ea0cbe1af60..6c8f20511b8bd6 100644
--- a/clang/test/CodeGenOpenCL/spir-calling-conv.cl
+++ b/clang/test/CodeGenOpenCL/spir-calling-conv.cl
@@ -11,8 +11,8 @@ kernel void foo(global int *A)
   // CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
   A[id] = id;
   bar(A);
-  // CHECK: tail call spir_kernel void @bar(ptr addrspace(1) noundef align 4 %A)
+  // CHECK: tail call void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4 %A)
 }
 
 // CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
-// CHECK: declare spir_kernel void @bar(ptr addrspace(1) noundef align 4)
+// CHECK: declare void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4)
diff --git a/clang/test/CodeGenOpenCL/visibility.cl b/clang/test/CodeGenOpenCL/visibility.cl
index addfe33377f939..7be846605fd8fb 100644
--- a/clang/test/CodeGenOpenCL/visibility.cl
+++ b/clang/test/CodeGenOpenCL/visibility.cl
@@ -85,31 +85,42 @@ __attribute__((visibility("default"))) extern void ext_func_default();
 void use() {
     glob = ext + ext_hidden + ext_protected + ext_default;
     ext_kern();
+    // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern()
+    // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern()
+    // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern()
     ext_kern_hidden();
+    // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
+    // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
+    // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
     ext_kern_protected();
+    // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
+    // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
+    // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
     ext_kern_default();
+    // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_default()
+    // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_default()
+    // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_default()
     ext_func();
     ext_func_hidden();
     ext_func_protected();
     ext_func_default();
 }
 
-// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern()
-// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern()
-// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern()
+// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern()
+// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern()
+// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern()
 
-// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden()
-// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden()
-// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden()
+// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
+// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
+// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
 
-// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected()
-// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected()
-// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected()
-
-// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default()
-// FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default()
-// FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default()
+// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
+// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
+// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
 
+// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern_default()
+// FVIS-PROTECTED: declare void @__clang_ocl_kern_imp_ext_kern_default()
+// FVIS-HIDDEN: declare void @__clang_ocl_kern_imp_ext_kern_default()
 
 // FVIS-DEFAULT: declare void @ext_func()
 // FVIS-PROTECTED: declare protected void @ext_func()
@@ -126,3 +137,19 @@ void use() {
 // FVIS-DEFAULT: declare void @ext_func_default()
 // FVIS-PROTECTED: declare void @ext_func_default()
 // FVIS-HIDDEN: declare void @ext_func_default()
+
+// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern()
+// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern()
+// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern()
+
+// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_hidden()
+// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_hidden()
+// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_hidden()
+
+// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_protected()
+// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_protected()
+// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_protected()
+
+// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
+// FVIS-PROTECTED: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
+// FVIS-HIDDEN: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
\ No newline at end of file



More information about the cfe-commits mailing list