r326937 - [OpenCL] Remove block invoke function from emitted block literal struct

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 7 11:32:58 PST 2018


Author: yaxunl
Date: Wed Mar  7 11:32:58 2018
New Revision: 326937

URL: http://llvm.org/viewvc/llvm-project?rev=326937&view=rev
Log:
[OpenCL] Remove block invoke function from emitted block literal struct

OpenCL runtime tracks the invoke function emitted for
any block expression. Due to restrictions on blocks in
OpenCL (v2.0 s6.12.5), it is always possible to know the
block invoke function when emitting call of block expression
or __enqueue_kernel builtin functions. Since __enqueu_kernel
already has an argument for the invoke function, it is redundant
to have invoke function member in the llvm block literal structure.

This patch removes invoke function from the llvm block literal
structure. It also removes the bitcast of block invoke function
to the generic block literal type which is useless for OpenCL.

This will save some space for the kernel argument, and also
eliminate some store instructions.

Differential Revision: https://reviews.llvm.org/D43783

Modified:
    cfe/trunk/lib/CodeGen/CGBlocks.cpp
    cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
    cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
    cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
    cfe/trunk/test/CodeGenOpenCL/blocks.cl
    cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=326937&r1=326936&r2=326937&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Wed Mar  7 11:32:58 2018
@@ -307,25 +307,12 @@ static void initializeForBlockHeader(Cod
 
   assert(elementTypes.empty());
   if (CGM.getLangOpts().OpenCL) {
-    // The header is basically 'struct { int; int; generic void *;
+    // The header is basically 'struct { int; int;
     // custom_fields; }'. Assert that struct is packed.
-    auto GenericAS =
-        CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic);
-    auto GenPtrAlign =
-        CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8);
-    auto GenPtrSize =
-        CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8);
-    assert(CGM.getIntSize() <= GenPtrSize);
-    assert(CGM.getIntAlign() <= GenPtrAlign);
-    assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
     elementTypes.push_back(CGM.IntTy); /* total size */
     elementTypes.push_back(CGM.IntTy); /* align */
-    elementTypes.push_back(
-        CGM.getOpenCLRuntime()
-            .getGenericVoidPointerType()); /* invoke function */
-    unsigned Offset =
-        2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity();
-    unsigned BlockAlign = GenPtrAlign.getQuantity();
+    unsigned Offset = 2 * CGM.getIntSize().getQuantity();
+    unsigned BlockAlign = CGM.getIntAlign().getQuantity();
     if (auto *Helper =
             CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
       for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
@@ -771,20 +758,12 @@ llvm::Value *CodeGenFunction::EmitBlockL
 
 llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
-  auto GenVoidPtrTy =
-      IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
-  LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default;
-  auto GenVoidPtrSize = CharUnits::fromQuantity(
-      CGM.getTarget().getPointerWidth(
-          CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) /
-      8);
   // Using the computed layout, generate the actual block function.
   bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
   CodeGenFunction BlockCGF{CGM, true};
   BlockCGF.SanOpts = SanOpts;
   auto *InvokeFn = BlockCGF.GenerateBlockFunction(
       CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
-  auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
 
   // If there is nothing to capture, we can emit this as a global block.
   if (blockInfo.CanBeGlobal)
@@ -853,11 +832,12 @@ llvm::Value *CodeGenFunction::EmitBlockL
           llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()),
           getIntSize(), "block.align");
     }
-    addHeaderField(blockFn, GenVoidPtrSize, "block.invoke");
-    if (!IsOpenCL)
+    if (!IsOpenCL) {
+      addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy),
+                     getPointerSize(), "block.invoke");
       addHeaderField(descriptor, getPointerSize(), "block.descriptor");
-    else if (auto *Helper =
-                 CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+    } else if (auto *Helper =
+                   CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
       for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) {
         addHeaderField(
             I.first,
@@ -1060,38 +1040,23 @@ llvm::Type *CodeGenModule::getBlockDescr
 }
 
 llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
+  assert(!getLangOpts().OpenCL && "OpenCL does not need this");
+
   if (GenericBlockLiteralType)
     return GenericBlockLiteralType;
 
   llvm::Type *BlockDescPtrTy = getBlockDescriptorType();
 
-  if (getLangOpts().OpenCL) {
-    // struct __opencl_block_literal_generic {
-    //   int __size;
-    //   int __align;
-    //   __generic void *__invoke;
-    //   /* custom fields */
-    // };
-    SmallVector<llvm::Type *, 8> StructFields(
-        {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()});
-    if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
-      for (auto I : Helper->getCustomFieldTypes())
-        StructFields.push_back(I);
-    }
-    GenericBlockLiteralType = llvm::StructType::create(
-        StructFields, "struct.__opencl_block_literal_generic");
-  } else {
-    // struct __block_literal_generic {
-    //   void *__isa;
-    //   int __flags;
-    //   int __reserved;
-    //   void (*__invoke)(void *);
-    //   struct __block_descriptor *__descriptor;
-    // };
-    GenericBlockLiteralType =
-        llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
-                                 IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
-  }
+  // struct __block_literal_generic {
+  //   void *__isa;
+  //   int __flags;
+  //   int __reserved;
+  //   void (*__invoke)(void *);
+  //   struct __block_descriptor *__descriptor;
+  // };
+  GenericBlockLiteralType =
+      llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
+                               IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
 
   return GenericBlockLiteralType;
 }
@@ -1102,27 +1067,21 @@ RValue CodeGenFunction::EmitBlockCallExp
     E->getCallee()->getType()->getAs<BlockPointerType>();
 
   llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
+  llvm::Value *FuncPtr;
 
-  // Get a pointer to the generic block literal.
-  // For OpenCL we generate generic AS void ptr to be able to reuse the same
-  // block definition for blocks with captures generated as private AS local
-  // variables and without captures generated as global AS program scope
-  // variables.
-  unsigned AddrSpace = 0;
-  if (getLangOpts().OpenCL)
-    AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
-
-  llvm::Type *BlockLiteralTy =
-      llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
-
-  // Bitcast the callee to a block literal.
-  BlockPtr =
-      Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
-
-  // Get the function pointer from the literal.
-  llvm::Value *FuncPtr =
-      Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
-                              CGM.getLangOpts().OpenCL ? 2 : 3);
+  if (!CGM.getLangOpts().OpenCL) {
+    // Get a pointer to the generic block literal.
+    llvm::Type *BlockLiteralTy =
+        llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0);
+
+    // Bitcast the callee to a block literal.
+    BlockPtr =
+        Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
+
+    // Get the function pointer from the literal.
+    FuncPtr =
+        Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);
+  }
 
   // Add the block literal.
   CallArgList Args;
@@ -1145,7 +1104,11 @@ RValue CodeGenFunction::EmitBlockCallExp
   EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
 
   // Load the function.
-  llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+  llvm::Value *Func;
+  if (CGM.getLangOpts().OpenCL)
+    Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
+  else
+    Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
 
   const FunctionType *FuncTy = FnType->castAs<FunctionType>();
   const CGFunctionInfo &FnInfo =
@@ -1254,14 +1217,14 @@ static llvm::Constant *buildGlobalBlock(
 
     // Reserved
     fields.addInt(CGM.IntTy, 0);
+
+    // Function
+    fields.add(blockFn);
   } else {
     fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity());
     fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity());
   }
 
-  // Function
-  fields.add(blockFn);
-
   if (!IsOpenCL) {
     // Descriptor
     fields.add(buildBlockDescriptor(CGM, blockInfo));

Modified: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp?rev=326937&r1=326936&r2=326937&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp Wed Mar  7 11:32:58 2018
@@ -112,6 +112,25 @@ llvm::PointerType *CGOpenCLRuntime::getG
       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
 }
 
+// Get the block literal from an expression derived from the block expression.
+// OpenCL v2.0 s6.12.5:
+// Block variable declarations are implicitly qualified with const. Therefore
+// all block variables must be initialized at declaration time and may not be
+// reassigned.
+static const BlockExpr *getBlockExpr(const Expr *E) {
+  if (auto Cast = dyn_cast<CastExpr>(E)) {
+    E = Cast->getSubExpr();
+  }
+  if (auto DR = dyn_cast<DeclRefExpr>(E)) {
+    E = cast<VarDecl>(DR->getDecl())->getInit();
+  }
+  E = E->IgnoreImplicit();
+  if (auto Cast = dyn_cast<CastExpr>(E)) {
+    E = Cast->getSubExpr();
+  }
+  return cast<BlockExpr>(E);
+}
+
 /// Record emitted llvm invoke function and llvm block literal for the
 /// corresponding block expression.
 void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
@@ -126,21 +145,15 @@ void CGOpenCLRuntime::recordBlockInfo(co
   EnqueuedBlockMap[E].Kernel = nullptr;
 }
 
+llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
+  return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
+}
+
 CGOpenCLRuntime::EnqueuedBlockInfo
 CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
   CGF.EmitScalarExpr(E);
 
-  // The block literal may be assigned to a const variable. Chasing down
-  // to get the block literal.
-  if (auto DR = dyn_cast<DeclRefExpr>(E)) {
-    E = cast<VarDecl>(DR->getDecl())->getInit();
-  }
-  E = E->IgnoreImplicit();
-  if (auto Cast = dyn_cast<CastExpr>(E)) {
-    E = Cast->getSubExpr();
-  }
-  auto *Block = cast<BlockExpr>(E);
-
+  const BlockExpr *Block = getBlockExpr(E);
   assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
          "Block expression not emitted");
 

Modified: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h?rev=326937&r1=326936&r2=326937&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h Wed Mar  7 11:32:58 2018
@@ -87,6 +87,10 @@ public:
   /// \param Block block literal emitted for the block expression.
   void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
                        llvm::Value *Block);
+
+  /// \return LLVM block invoke function emitted for an expression derived from
+  /// the block expression.
+  llvm::Function *getInvokeFunction(const Expr *E);
 };
 
 }

Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl?rev=326937&r1=326936&r2=326937&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl Wed Mar  7 11:32:58 2018
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn | FileCheck %s
 
 typedef struct {int a;} ndrange_t;
 
@@ -36,23 +36,23 @@ kernel void test(global char *a, char b,
   enqueue_kernel(default_queue, flags, ndrange, block);
 }
 
-// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i8 }>)
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }>)
 // CHECK-SAME: #[[ATTR:[0-9]+]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
 // CHECK: entry:
-// CHECK:  %1 = alloca <{ i32, i32, i8*, i8 addrspace(1)*, i8 }>, align 8, addrspace(5)
-// CHECK:  store <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> addrspace(5)* %1, align 8
-// CHECK:  %2 = addrspacecast <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> addrspace(5)* %1 to i8*
+// CHECK:  %1 = alloca <{ i32, i32, i8 addrspace(1)*, i8 }>, align 8, addrspace(5)
+// CHECK:  store <{ i32, i32, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1, align 8
+// CHECK:  %2 = addrspacecast <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1 to i8*
 // CHECK:  call void @__test_block_invoke(i8* %2)
 // CHECK:  ret void
 // CHECK:}
 
-// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)
 // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
 
-// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
 // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
 
-// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i8*, i64, i64 addrspace(1)* }>)
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i64, i64 addrspace(1)* }>)
 // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
 
 // CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" }

Modified: cfe/trunk/test/CodeGenOpenCL/blocks.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/blocks.cl?rev=326937&r1=326936&r2=326937&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/blocks.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/blocks.cl Wed Mar  7 11:32:58 2018
@@ -1,10 +1,7 @@
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck -check-prefixes=COMMON,SPIR %s
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=COMMON,AMDGCN %s
 
-// SPIR: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
-// AMDGCN: %struct.__opencl_block_literal_generic = type { i32, i32, i8* }
-// SPIR: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) to i8 addrspace(4)*) }
-// AMDGCN: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8* } { i32 16, i32 8, i8* bitcast (void (i8*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) }
+// COMMON: @__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }
 // COMMON-NOT: .str
 
 // SPIR-LABEL: define internal {{.*}}void @block_A_block_invoke(i8 addrspace(4)* %.block_descriptor, i8 addrspace(3)* %a)
@@ -20,44 +17,32 @@ void foo(){
   // COMMON-NOT: %block.flags
   // COMMON-NOT: %block.reserved
   // COMMON-NOT: %block.descriptor
-  // SPIR: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 0
-  // AMDGCN: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %block, i32 0, i32 0
-  // SPIR: store i32 16, i32* %[[block_size]]
-  // AMDGCN: store i32 20, i32 addrspace(5)* %[[block_size]]
-  // SPIR: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 1
-  // AMDGCN: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %block, i32 0, i32 1
+  // SPIR: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block:.*]], i32 0, i32 0
+  // AMDGCN: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 0
+  // SPIR: store i32 12, i32* %[[block_size]]
+  // AMDGCN: store i32 12, i32 addrspace(5)* %[[block_size]]
+  // SPIR: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 1
+  // AMDGCN: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block]], i32 0, i32 1
   // SPIR: store i32 4, i32* %[[block_align]]
-  // AMDGCN: store i32 8, i32 addrspace(5)* %[[block_align]]
-  // SPIR: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block:.*]], i32 0, i32 2
-  // SPIR: store i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %[[block_invoke]]
-  // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
+  // AMDGCN: store i32 4, i32 addrspace(5)* %[[block_align]]
+  // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 2
   // SPIR: %[[i_value:.*]] = load i32, i32* %i
   // SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
-  // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
+  // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i32 }>* %[[block]] to i32 ()*
   // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
   // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
-  // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
-  // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
-  // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
-  // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
-  // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
-  // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
-  // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
-  // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
-  // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
-  // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
+  // SPIR: %[[block_literal:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
+  // SPIR: %[[blk_gen_ptr:.*]] = bitcast i32 () addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
+  // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
+  // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block]], i32 0, i32 2
   // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
   // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
-  // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
+  // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
   // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
   // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
-  // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
-  // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic*
-  // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
-  // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
-  // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
-  // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
-  // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
+  // AMDGCN: %[[block_literal:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
+  // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast i32 ()* %[[block_literal]] to i8*
+  // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
 
   int (^ block_B)(void) = ^{
     return i;
@@ -66,12 +51,12 @@ void foo(){
 }
 
 // SPIR-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor)
-// SPIR:  %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)*
-// SPIR:  %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)* %[[block]], i32 0, i32 3
+// SPIR:  %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 }> addrspace(4)*
+// SPIR:  %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(4)* %[[block]], i32 0, i32 2
 // SPIR:  %[[block_capture:.*]] = load i32, i32 addrspace(4)* %[[block_capture_addr]]
 // AMDGCN-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8* %.block_descriptor)
-// AMDGCN:  %[[block:.*]] = bitcast i8* %.block_descriptor to <{ i32, i32, i8*, i32 }>*
-// AMDGCN:  %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }>* %[[block]], i32 0, i32 3
+// AMDGCN:  %[[block:.*]] = bitcast i8* %.block_descriptor to <{ i32, i32, i32 }>*
+// AMDGCN:  %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 2
 // AMDGCN:  %[[block_capture:.*]] = load i32, i32* %[[block_capture_addr]]
 
 // COMMON-NOT: define{{.*}}@__foo_block_invoke_kernel

Modified: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=326937&r1=326936&r2=326937&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Wed Mar  7 11:32:58 2018
@@ -6,27 +6,25 @@
 typedef void (^bl_t)(local void *);
 typedef struct {int a;} ndrange_t;
 
-// COMMON: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
-
 // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
-// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
-// COMMON: @block_G =  addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
+// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: @block_G =  addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
 
 // For anonymous blocks without captures, emit block literals as global variable.
-// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVG8:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INVG9:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
+// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
 
 // Emits block literal [[BL_GLOBAL]], invoke function [[INV_G]] and global block variable @block_G
-// COMMON: define internal spir_func void [[INV_G]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}})
+// COMMON: define internal spir_func void [[INV_G:.*]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}})
 const bl_t block_G = (bl_t) ^ (local void *a) {};
 
 void callee(int id, __global int *out) {
@@ -52,9 +50,8 @@ kernel void device_side_enqueue(global i
   // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
-  // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
-  // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
+  // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
+  // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
   // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
   // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
@@ -70,8 +67,7 @@ kernel void device_side_enqueue(global i
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
   // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
   // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
-  // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
-  // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
+  // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
   // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
   // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
@@ -95,7 +91,7 @@ kernel void device_side_enqueue(global i
   // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
   // B32-SAME: i32* %[[TMP1]])
   // B64-SAME: i64* %[[TMP1]])
   enqueue_kernel(default_queue, flags, ndrange,
@@ -116,7 +112,7 @@ kernel void device_side_enqueue(global i
   // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
   // B32-SAME: i32* %[[TMP1]])
   // B64-SAME: i64* %[[TMP1]])
   enqueue_kernel(default_queue, flags, ndrange,
@@ -140,7 +136,7 @@ kernel void device_side_enqueue(global i
   // COMMON-LABEL: call i32 @__enqueue_kernel_events_vaargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
   // B32-SAME: i32* %[[TMP1]])
   // B64-SAME: i64* %[[TMP1]])
   enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
@@ -164,7 +160,7 @@ kernel void device_side_enqueue(global i
   // COMMON-LABEL: call i32 @__enqueue_kernel_events_vaargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
   // B32-SAME: i32* %[[TMP1]])
   // B64-SAME: i64* %[[TMP1]])
   enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
@@ -186,7 +182,7 @@ kernel void device_side_enqueue(global i
   // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
   // B32-SAME: i32* %[[TMP1]])
   // B64-SAME: i64* %[[TMP1]])
   enqueue_kernel(default_queue, flags, ndrange,
@@ -215,7 +211,7 @@ kernel void device_side_enqueue(global i
   // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3,
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3,
   // B32-SAME: i32* %[[TMP1]])
   // B64-SAME: i64* %[[TMP1]])
   enqueue_kernel(default_queue, flags, ndrange,
@@ -236,7 +232,7 @@ kernel void device_side_enqueue(global i
   // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
   // B32-SAME: i32* %[[TMP1]])
   // B64-SAME: i64* %[[TMP1]])
   enqueue_kernel(default_queue, flags, ndrange,
@@ -248,21 +244,19 @@ kernel void device_side_enqueue(global i
   // Emits global block literal [[BLG8]] and invoke function [[INVG8]].
   // The full type of these expressions are long (and repeated elsewhere), so we
   // capture it as part of the regex for convenience and clarity.
-  // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
+  // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
   void (^const block_A)(void) = ^{
     return;
   };
 
   // Emits global block literal [[BLG9]] and invoke function [[INVG9]].
-  // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
+  // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
   void (^const block_B)(local void *) = ^(local void *a) {
     return;
   };
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
-  // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
-  // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
-  // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON: call spir_func void [[INVG8:.*]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
   // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
@@ -271,19 +265,17 @@ kernel void device_side_enqueue(global i
   // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   enqueue_kernel(default_queue, flags, ndrange, block_A);
 
   // Uses block kernel [[INVGK8]] and global block literal [[BLG8]].
   // COMMON: call i32 @__get_kernel_work_group_size_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   unsigned size = get_kernel_work_group_size(block_A);
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
-  // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
-  // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
-  // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON: call spir_func void [[INVG8]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
   void (^block_C)(void) = ^{
@@ -291,7 +283,6 @@ kernel void device_side_enqueue(global i
   };
 
   // Emits block literal on stack and block kernel [[INVLK3]].
-  // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
   // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
@@ -301,34 +292,34 @@ kernel void device_side_enqueue(global i
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
   enqueue_kernel(default_queue, flags, ndrange, block_C);
 
-  // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]].
+  // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INVG9]].
   // COMMON: call i32 @__get_kernel_work_group_size_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG9]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_work_group_size(block_B);
 
   // Uses global block literal [[BLG8]] and block kernel [[INVGK8]]. Make sure no redundant block literal ind invoke functions are emitted.
   // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_preferred_work_group_size_multiple(block_A);
 
   // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
   // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_preferred_work_group_size_multiple(block_G);
 
   // Emits global block literal [[BLG10]] and block kernel [[INVGK10]].
   // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK10:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG10]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG10]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){});
 
   // Emits global block literal [[BLG11]] and block kernel [[INVGK11]].
   // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK11:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
-  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG11]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG11]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){});
 }
 
@@ -350,7 +341,7 @@ kernel void device_side_enqueue(global i
 // COMMON: }
 // COMMON: define internal spir_kernel void [[INVGK7]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
-// COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
+// COMMON: define internal spir_func void [[INVG9:.*]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})




More information about the cfe-commits mailing list