[llvm-branch-commits] [clang] 320af6b - Create SPIRABIInfo to enable SPIR_FUNC calling convention.

Melanie Blower via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Dec 12 05:53:25 PST 2020


Author: Melanie Blower
Date: 2020-12-12T05:48:20-08:00
New Revision: 320af6b138391d289fe70db39c51da92e8d3d9df

URL: https://github.com/llvm/llvm-project/commit/320af6b138391d289fe70db39c51da92e8d3d9df
DIFF: https://github.com/llvm/llvm-project/commit/320af6b138391d289fe70db39c51da92e8d3d9df.diff

LOG: Create SPIRABIInfo to enable SPIR_FUNC calling convention.

Background: Call to library arithmetic functions for div is emitted by the
compiler and it set wrong “C” calling convention for calls to these functions,
whereas library functions are declared with `spir_function` calling convention.
InstCombine optimization replaces such calls with “unreachable” instruction.
It looks like clang lacks SPIRABIInfo class which should specify default
calling conventions for “system” function calls. SPIR supports only
SPIR_FUNC and SPIR_KERNEL calling convention.

Reviewers: Erich Keane, Anastasia

Differential Revision: https://reviews.llvm.org/D92721

Added: 
    

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/CodeGen/TargetInfo.cpp
    clang/test/CodeGen/complex-math.c
    clang/test/CodeGenOpenCL/builtins.cl
    clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
    clang/test/CodeGenOpenCL/to_addr_builtin.cl
    clang/test/CodeGenOpenCLCXX/atexit.cl

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index db7ae582b1d6..316a60c31fd4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4313,8 +4313,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
       return RValue::get(
-          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
-                             {Arg0, BCast, PacketSize, PacketAlign}));
+          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+                          {Arg0, BCast, PacketSize, PacketAlign}));
     } else {
       assert(4 == E->getNumArgs() &&
              "Illegal number of parameters to pipe function");
@@ -4332,9 +4332,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       // it to i32.
       if (Arg2->getType() != Int32Ty)
         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
-      return RValue::get(Builder.CreateCall(
-          CGM.CreateRuntimeFunction(FTy, Name),
-          {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
+      return RValue::get(
+          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+                          {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
     }
   }
   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
@@ -4375,9 +4375,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     // it to i32.
     if (Arg1->getType() != Int32Ty)
       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
-    return RValue::get(
-        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
-                           {Arg0, Arg1, PacketSize, PacketAlign}));
+    return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+                                       {Arg0, Arg1, PacketSize, PacketAlign}));
   }
   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
   // functions
@@ -4413,9 +4412,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
 
-    return RValue::get(
-        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
-                           {Arg0, Arg1, PacketSize, PacketAlign}));
+    return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+                                       {Arg0, Arg1, PacketSize, PacketAlign}));
   }
   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
   case Builtin::BIget_pipe_num_packets:
@@ -4438,8 +4436,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     llvm::FunctionType *FTy = llvm::FunctionType::get(
         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
 
-    return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
-                                          {Arg0, PacketSize, PacketAlign}));
+    return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+                                       {Arg0, PacketSize, PacketAlign}));
   }
 
   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
@@ -4461,7 +4459,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
     auto NewCall =
-        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
+        EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
       ConvertType(E->getType())));
   }
@@ -4504,8 +4502,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
 
       auto RTCall =
-          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
-                             {Queue, Flags, Range, Kernel, Block});
+          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
+                          {Queue, Flags, Range, Kernel, Block});
       RTCall->setAttributes(ByValAttrSet);
       return RValue::get(RTCall);
     }
@@ -4564,7 +4562,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
       llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
       auto Call = RValue::get(
-          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
+          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
       if (TmpSize)
         EmitLifetimeEnd(TmpSize, TmpPtr);
       return Call;
@@ -4622,8 +4620,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
         llvm::FunctionType *FTy = llvm::FunctionType::get(
             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
         return RValue::get(
-            Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
-                               llvm::ArrayRef<llvm::Value *>(Args)));
+            EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+                            llvm::ArrayRef<llvm::Value *>(Args)));
       }
       // Has event info and variadics
       // Pass the number of variadics to the runtime function too.
@@ -4639,8 +4637,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       llvm::FunctionType *FTy = llvm::FunctionType::get(
           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
       auto Call =
-          RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
-                                         llvm::ArrayRef<llvm::Value *>(Args)));
+          RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+                                      llvm::ArrayRef<llvm::Value *>(Args)));
       if (TmpSize)
         EmitLifetimeEnd(TmpSize, TmpPtr);
       return Call;
@@ -4656,7 +4654,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
     Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
-    return RValue::get(Builder.CreateCall(
+    return RValue::get(EmitRuntimeCall(
         CGM.CreateRuntimeFunction(
             llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
                                     false),
@@ -4670,7 +4668,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
     Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
-    return RValue::get(Builder.CreateCall(
+    return RValue::get(EmitRuntimeCall(
         CGM.CreateRuntimeFunction(
             llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
                                     false),
@@ -4691,7 +4689,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
         BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
             ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
             : "__get_kernel_sub_group_count_for_ndrange_impl";
-    return RValue::get(Builder.CreateCall(
+    return RValue::get(EmitRuntimeCall(
         CGM.CreateRuntimeFunction(
             llvm::FunctionType::get(
                 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},

diff  --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index d4191b943ef5..8d3a343ca2aa 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -9935,15 +9935,28 @@ void XCoreTargetCodeGenInfo::emitTargetMetadata(
 // SPIR ABI Implementation
 //===----------------------------------------------------------------------===//
 
+namespace {
+class SPIRABIInfo : public DefaultABIInfo {
+public:
+  SPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); }
+
+private:
+  void setCCs();
+};
+} // end anonymous namespace
 namespace {
 class SPIRTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
-      : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
+      : TargetCodeGenInfo(std::make_unique<SPIRABIInfo>(CGT)) {}
   unsigned getOpenCLKernelCallingConv() const override;
 };
 
 } // End anonymous namespace.
+void SPIRABIInfo::setCCs() {
+  assert(getRuntimeCC() == llvm::CallingConv::C);
+  RuntimeCC = llvm::CallingConv::SPIR_FUNC;
+}
 
 namespace clang {
 namespace CodeGen {
@@ -11045,7 +11058,8 @@ TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF,
   llvm::SmallVector<llvm::Value *, 2> Args;
   for (auto &A : F->args())
     Args.push_back(&A);
-  Builder.CreateCall(Invoke, Args);
+  llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
+  call->setCallingConv(Invoke->getCallingConv());
   Builder.CreateRetVoid();
   Builder.restoreIP(IP);
   return F;
@@ -11109,7 +11123,8 @@ llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
   Args.push_back(Cast);
   for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I)
     Args.push_back(I);
-  Builder.CreateCall(Invoke, Args);
+  llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
+  call->setCallingConv(Invoke->getCallingConv());
   Builder.CreateRetVoid();
   Builder.restoreIP(IP);
 

diff  --git a/clang/test/CodeGen/complex-math.c b/clang/test/CodeGen/complex-math.c
index b527e38a3912..8d145b98bc27 100644
--- a/clang/test/CodeGen/complex-math.c
+++ b/clang/test/CodeGen/complex-math.c
@@ -6,6 +6,7 @@
 // RUN: %clang_cc1 %s -O0 -fno-experimental-new-pass-manager -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s --check-prefix=ARMHF
 // RUN: %clang_cc1 %s -O0 -fno-experimental-new-pass-manager -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s --check-prefix=ARM7K
 // RUN: %clang_cc1 %s -O0 -fno-experimental-new-pass-manager -emit-llvm -triple aarch64-unknown-unknown -ffast-math -ffp-contract=fast -o - | FileCheck %s --check-prefix=AARCH64-FASTMATH
+// RUN: %clang_cc1 %s -O0 -fno-experimental-new-pass-manager -emit-llvm -triple spir -o - | FileCheck %s --check-prefix=SPIR
 
 float _Complex add_float_rr(float a, float b) {
   // X86-LABEL: @add_float_rr(
@@ -107,6 +108,7 @@ float _Complex mul_float_cc(float _Complex a, float _Complex b) {
   // X86: fcmp uno float %[[RI]]
   // X86: call {{.*}} @__mulsc3(
   // X86: ret
+  // SPIR: call spir_func {{.*}} @__mulsc3(
   return a * b;
 }
 
@@ -131,6 +133,8 @@ float _Complex div_float_rc(float a, float _Complex b) {
   // X86: call {{.*}} @__divsc3(
   // X86: ret
 
+  // SPIR: call spir_func {{.*}} @__divsc3(
+
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_float_rc(float %a, [2 x float] %b.coerce)
   // A = a
@@ -158,6 +162,8 @@ float _Complex div_float_cc(float _Complex a, float _Complex b) {
   // X86: call {{.*}} @__divsc3(
   // X86: ret
 
+  // SPIR: call spir_func {{.*}} @__divsc3(
+
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_float_cc([2 x float] %a.coerce, [2 x float] %b.coerce)
   //
@@ -278,6 +284,8 @@ double _Complex mul_double_cc(double _Complex a, double _Complex b) {
   // X86: fcmp uno double %[[RI]]
   // X86: call {{.*}} @__muldc3(
   // X86: ret
+
+  // SPIR: call spir_func {{.*}} @__muldc3(
   return a * b;
 }
 
@@ -302,6 +310,8 @@ double _Complex div_double_rc(double a, double _Complex b) {
   // X86: call {{.*}} @__divdc3(
   // X86: ret
 
+  // SPIR: call spir_func {{.*}} @__divdc3(
+
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_double_rc(double %a, [2 x double] %b.coerce)
   // A = a
@@ -329,6 +339,8 @@ double _Complex div_double_cc(double _Complex a, double _Complex b) {
   // X86: call {{.*}} @__divdc3(
   // X86: ret
 
+  // SPIR: call spir_func {{.*}} @__divdc3(
+
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_double_cc([2 x double] %a.coerce, [2 x double] %b.coerce)
   //
@@ -463,6 +475,7 @@ long double _Complex mul_long_double_cc(long double _Complex a, long double _Com
   // PPC: fcmp uno ppc_fp128 %[[RI]]
   // PPC: call {{.*}} @__multc3(
   // PPC: ret
+  // SPIR: call spir_func {{.*}} @__muldc3(
   return a * b;
 }
 
@@ -490,6 +503,7 @@ long double _Complex div_long_double_rc(long double a, long double _Complex b) {
   // PPC-NOT: fdiv
   // PPC: call {{.*}} @__divtc3(
   // PPC: ret
+  // SPIR: call spir_func {{.*}} @__divdc3(
 
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_long_double_rc(fp128 %a, [2 x fp128] %b.coerce)
@@ -521,6 +535,7 @@ long double _Complex div_long_double_cc(long double _Complex a, long double _Com
   // PPC-NOT: fdiv
   // PPC: call {{.*}} @__divtc3(
   // PPC: ret
+  // SPIR: call spir_func {{.*}} @__divdc3(
 
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
   // AARCH64-FASTMATH-LABEL: @div_long_double_cc([2 x fp128] %a.coerce, [2 x fp128] %b.coerce)
@@ -603,6 +618,8 @@ _Complex double foo(_Complex double a, _Complex double b) {
   // ARM-LABEL: @foo(
   // ARM: call void @__muldc3
 
+  // SPIR: call spir_func void @__muldc3
+
   // ARMHF-LABEL: @foo(
   // ARMHF: call { double, double } @__muldc3
 

diff  --git a/clang/test/CodeGenOpenCL/builtins.cl b/clang/test/CodeGenOpenCL/builtins.cl
index 36a6fb6b664c..2ab6f12a9f4b 100644
--- a/clang/test/CodeGenOpenCL/builtins.cl
+++ b/clang/test/CodeGenOpenCL/builtins.cl
@@ -5,19 +5,19 @@ void testBranchingOnEnqueueKernel(queue_t default_queue, unsigned flags, ndrange
 
     if (enqueue_kernel(default_queue, flags, ndrange, ^(void) {}))
         (void)0;
-    // CHECK: [[P:%[0-9]+]] = call i32 @__enqueue_kernel
+    // CHECK: [[P:%[0-9]+]] = call spir_func i32 @__enqueue_kernel
     // CHECK-NEXT: [[Q:%[a-z0-9]+]] = icmp ne i32 [[P]], 0
     // CHECK-NEXT: br i1 [[Q]]
 
     if (get_kernel_work_group_size(^(void) {}))
         (void)0;
-    // CHECK: [[P:%[0-9]+]] = call i32 @__get_kernel_work_group_size
+    // CHECK: [[P:%[0-9]+]] = call spir_func i32 @__get_kernel_work_group_size
     // CHECK-NEXT: [[Q:%[a-z0-9]+]] = icmp ne i32 [[P]], 0
     // CHECK-NEXT: br i1 [[Q]]
 
     if (get_kernel_preferred_work_group_size_multiple(^(void) {}))
         (void)0;
-    // CHECK: [[P:%[0-9]+]] = call i32 @__get_kernel_preferred_work_group_size_multiple_impl
+    // CHECK: [[P:%[0-9]+]] = call spir_func i32 @__get_kernel_preferred_work_group_size_multiple_impl
     // CHECK-NEXT: [[Q:%[a-z0-9]+]] = icmp ne i32 [[P]], 0
     // CHECK-NEXT: br i1 [[Q]]
 }
@@ -27,32 +27,32 @@ void testBranchinOnPipeOperations(read_only pipe int r, write_only pipe int w, g
 
     if (read_pipe(r, ptr))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__read_pipe_2
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__read_pipe_2
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 
     if (write_pipe(w, ptr))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__write_pipe_2
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__write_pipe_2
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 
     if (get_pipe_num_packets(r))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_num_packets_ro
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__get_pipe_num_packets_ro
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 
     if (get_pipe_num_packets(w))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_num_packets_wo
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__get_pipe_num_packets_wo
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 
     if (get_pipe_max_packets(r))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_max_packets_ro
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__get_pipe_max_packets_ro
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 
     if (get_pipe_max_packets(w))
         (void)0;
-    // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_max_packets_wo
+    // CHECK: [[R:%[0-9]+]] = call spir_func i32 @__get_pipe_max_packets_wo
     // CHECK-NEXT: icmp ne i32 [[R]], 0
 }
 
@@ -61,21 +61,21 @@ void testBranchingOnAddressSpaceCast(generic long* ptr) {
 
     if (to_global(ptr))
         (void)0;
-    // CHECK:       [[P:%[0-9]+]] = call [[GLOBAL_VOID:i8 addrspace\(1\)\*]] @__to_global([[GENERIC_VOID:i8 addrspace\(4\)\*]] {{%[0-9]+}})
+    // CHECK:       [[P:%[0-9]+]] = call spir_func [[GLOBAL_VOID:i8 addrspace\(1\)\*]] @__to_global([[GENERIC_VOID:i8 addrspace\(4\)\*]] {{%[0-9]+}})
     // CHECK-NEXT:  [[Q:%[0-9]+]] = bitcast [[GLOBAL_VOID]] [[P]] to [[GLOBAL_i64:i64 addrspace\(1\)\*]]
     // CHECK-NEXT:  [[BOOL:%[a-z0-9]+]] = icmp ne [[GLOBAL_i64]] [[Q]], null
     // CHECK-NEXT:  br i1 [[BOOL]]
 
     if (to_local(ptr))
         (void)0;
-    // CHECK:       [[P:%[0-9]+]] = call [[LOCAL_VOID:i8 addrspace\(3\)\*]] @__to_local([[GENERIC_VOID]] {{%[0-9]+}})
+    // CHECK:       [[P:%[0-9]+]] = call spir_func [[LOCAL_VOID:i8 addrspace\(3\)\*]] @__to_local([[GENERIC_VOID]] {{%[0-9]+}})
     // CHECK-NEXT:  [[Q:%[0-9]+]] = bitcast [[LOCAL_VOID]] [[P]] to [[LOCAL_i64:i64 addrspace\(3\)\*]]
     // CHECK-NEXT:  [[BOOL:%[a-z0-9]+]] = icmp ne [[LOCAL_i64]] [[Q]], null
     // CHECK-NEXT:  br i1 [[BOOL]]
 
     if (to_private(ptr))
         (void)0;
-    // CHECK:       [[P:%[0-9]+]] = call [[PRIVATE_VOID:i8\*]] @__to_private([[GENERIC_VOID]] {{%[0-9]+}})
+    // CHECK:       [[P:%[0-9]+]] = call spir_func [[PRIVATE_VOID:i8\*]] @__to_private([[GENERIC_VOID]] {{%[0-9]+}})
     // CHECK-NEXT:  [[Q:%[0-9]+]] = bitcast [[PRIVATE_VOID]] [[P]] to [[PRIVATE_i64:i64\*]]
     // CHECK-NEXT:  [[BOOL:%[a-z0-9]+]] = icmp ne [[PRIVATE_i64]] [[Q]], null
     // CHECK-NEXT:  br i1 [[BOOL]]

diff  --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 73a146952d51..c4b7fb01a1de 100644
--- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -80,7 +80,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic*
   // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
   // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
-  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_basic(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
@@ -97,7 +97,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
   // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block4 to %struct.__opencl_block_literal_generic*
   // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
-  // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_basic_events
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
@@ -106,7 +106,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
                    a[i] = b[i];
                  });
 
-  // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_basic_events
   // COMMON-SAME: (%opencl.queue_t{{.*}}* {{%[0-9]+}}, i32 {{%[0-9]+}}, %struct.ndrange_t* {{.*}}, i32 1, %opencl.clk_event_t{{.*}}* addrspace(4)* null, %opencl.clk_event_t{{.*}}* addrspace(4)* null,
   enqueue_kernel(default_queue, flags, ndrange, 1, 0, 0,
                  ^(void) {
@@ -119,13 +119,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES1]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES1]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES1]], i32 0, i32 0
   // B32: store i32 256, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES1]], i32 0, i32 0
   // B64: store i64 256, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -144,13 +144,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES2]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES2]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES2]], i32 0, i32 0
   // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES2]], i32 0, i32 0
   // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -171,13 +171,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES3]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES3]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_events_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES3]], i32 0, i32 0
   // B32: store i32 256, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES3]], i32 0, i32 0
   // B64: store i64 256, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_events_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -198,13 +198,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES4]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES4]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_events_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES4]], i32 0, i32 0
   // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES4]], i32 0, i32 0
   // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_events_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -223,13 +223,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES5]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES5]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES5]], i32 0, i32 0
   // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES5]], i32 0, i32 0
   // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -247,7 +247,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [3 x i64]* %[[BLOCK_SIZES6]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [3 x i32], [3 x i32]* %[[BLOCK_SIZES6]], i32 0, i32 0
   // B32: store i32 1, i32* %[[TMP]], align 4
@@ -261,7 +261,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // B64: store i64 2, i64* %[[BLOCK_SIZES62]], align 8
   // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i32 0, i32 2
   // B64: store i64 4, i64* %[[BLOCK_SIZES63]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3,
@@ -279,13 +279,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES7]] to i8*
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES7]], i64 0, i64 0
-  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+  // CHECK-LIFETIMES-LABEL: call spir_func i32 @__enqueue_kernel_varargs(
   // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]])
   // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES7]], i32 0, i32 0
   // B32: store i32 0, i32* %[[TMP]], align 4
   // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES7]], i32 0, i32 0
   // B64: store i64 4294967296, i64* %[[TMP]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
@@ -318,14 +318,14 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_basic(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   enqueue_kernel(default_queue, flags, ndrange, block_A);
 
   // Uses block kernel [[INVGK8]] and global block literal [[BLG8]].
-  // COMMON: call i32 @__get_kernel_work_group_size_impl(
+  // COMMON: call spir_func i32 @__get_kernel_work_group_size_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   unsigned size = get_kernel_work_group_size(block_A);
@@ -352,7 +352,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
   b2(0);
   // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
-  // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+  // COMMON: call spir_func i32 @__get_kernel_preferred_work_group_size_multiple_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_preferred_work_group_size_multiple(b2);
@@ -365,38 +365,38 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
   // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
-  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-LABEL: call spir_func i32 @__enqueue_kernel_basic(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
   enqueue_kernel(default_queue, flags, ndrange, block_C);
 
   // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]].
-  // COMMON: call i32 @__get_kernel_work_group_size_impl(
+  // COMMON: call spir_func i32 @__get_kernel_work_group_size_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_work_group_size(block_B);
 
   // Uses global block literal [[BLG8]] and block kernel [[INVGK8]]. Make sure no redundant block literal ind invoke functions are emitted.
-  // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+  // COMMON: call spir_func i32 @__get_kernel_preferred_work_group_size_multiple_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_preferred_work_group_size_multiple(block_A);
 
   // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
-  // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+  // COMMON: call spir_func i32 @__get_kernel_preferred_work_group_size_multiple_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_preferred_work_group_size_multiple(block_G);
 
   // Emits global block literal [[BLG10]] and block kernel [[INVGK10]].
-  // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
+  // COMMON: call spir_func i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK10:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG10]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){});
 
   // Emits global block literal [[BLG11]] and block kernel [[INVGK11]].
-  // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
+  // COMMON: call spir_func i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK11:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG11]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){});
@@ -404,7 +404,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
 
 // COMMON: define internal spir_kernel void [[INVLK1]](i8 addrspace(4)* %0) #{{[0-9]+}} {
 // COMMON: entry:
-// COMMON:  call void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %0)
+// COMMON:  call spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %0)
 // COMMON:  ret void
 // COMMON: }
 // COMMON: define internal spir_kernel void [[INVLK2]](i8 addrspace(4)*{{.*}})
@@ -415,7 +415,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
 // COMMON: define internal spir_kernel void [[INVGK5]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK6]](i8 addrspace(4)* %0, i8 addrspace(3)* %1, i8 addrspace(3)* %2, i8 addrspace(3)* %3) #{{[0-9]+}} {
 // COMMON: entry:
-// COMMON:  call void @__device_side_enqueue_block_invoke_9(i8 addrspace(4)* %0, i8 addrspace(3)* %1, i8 addrspace(3)* %2, i8 addrspace(3)* %3)
+// COMMON:  call spir_func void @__device_side_enqueue_block_invoke_9(i8 addrspace(4)* %0, i8 addrspace(3)* %1, i8 addrspace(3)* %2, i8 addrspace(3)* %3)
 // COMMON:  ret void
 // COMMON: }
 // COMMON: define internal spir_kernel void [[INVGK7]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})

diff  --git a/clang/test/CodeGenOpenCL/to_addr_builtin.cl b/clang/test/CodeGenOpenCL/to_addr_builtin.cl
index 52dd72f18a34..b5b197079b74 100644
--- a/clang/test/CodeGenOpenCL/to_addr_builtin.cl
+++ b/clang/test/CodeGenOpenCL/to_addr_builtin.cl
@@ -14,74 +14,74 @@ void test(void) {
   generic int *gen;
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(1)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
   glob = to_global(glob);
   
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(3)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
   glob = to_global(loc);
  
   //CHECK: %[[ARG:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
   glob = to_global(priv);
  
   //CHECK: %[[ARG:.*]] = bitcast i32 addrspace(4)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
   glob = to_global(gen);
   
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(1)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
   loc = to_local(glob);
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(3)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
   loc = to_local(loc);
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
   loc = to_local(priv);
 
   //CHECK: %[[ARG:.*]] = bitcast i32 addrspace(4)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
   loc = to_local(gen);
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(1)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8* @__to_private(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
   priv = to_private(glob);
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(3)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8* @__to_private(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
   priv = to_private(loc);
 
   //CHECK: %[[ARG:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8* @__to_private(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
   priv = to_private(priv);
 
   //CHECK: %[[ARG:.*]] = bitcast i32 addrspace(4)* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8* @__to_private(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
   priv = to_private(gen);
 
   //CHECK: %[[ARG:.*]] = addrspacecast %[[A]]* %{{.*}} to i8 addrspace(4)*
-  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %[[RET:.*]] = call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
   //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to %[[A]] addrspace(1)*
   PA pA;
   GA gA = to_global(pA);
 
   //CHECK-NOT: addrspacecast
   //CHECK-NOT: bitcast
-  //CHECK: call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %{{.*}})
+  //CHECK: call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %{{.*}})
   //CHECK-NOT: addrspacecast
   //CHECK-NOT: bitcast
   generic void *gen_v;

diff  --git a/clang/test/CodeGenOpenCLCXX/atexit.cl b/clang/test/CodeGenOpenCLCXX/atexit.cl
index 7bcaede2c2a1..2b28aeaacf45 100644
--- a/clang/test/CodeGenOpenCLCXX/atexit.cl
+++ b/clang/test/CodeGenOpenCLCXX/atexit.cl
@@ -5,7 +5,7 @@ struct S {
 };
 S s;
 
-//CHECK-LABEL: define internal void @__cxx_global_var_init()
-//CHECK: call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S addrspace(4)*)* @_ZNU3AS41SD1Ev to void (i8*)*), i8* null, i8 addrspace(1)* @__dso_handle)
+//CHECK-LABEL: define internal spir_func void @__cxx_global_var_init()
+//CHECK: call spir_func i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S addrspace(4)*)* @_ZNU3AS41SD1Ev to void (i8*)*), i8* null, i8 addrspace(1)* @__dso_handle)
 
-//CHECK: declare i32 @__cxa_atexit(void (i8*)*, i8*, i8 addrspace(1)*)
+//CHECK: declare spir_func i32 @__cxa_atexit(void (i8*)*, i8*, i8 addrspace(1)*)


        


More information about the llvm-branch-commits mailing list