r325264 - [OpenCL] Fix __enqueue_block for block with captures

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Thu Feb 15 08:39:19 PST 2018


Author: yaxunl
Date: Thu Feb 15 08:39:19 2018
New Revision: 325264

URL: http://llvm.org/viewvc/llvm-project?rev=325264&view=rev
Log:
[OpenCL] Fix __enqueue_block for block with captures

The following test case causes issue with codegen of __enqueue_block

void (^block)(void) = ^{ callee(id, out); };

enqueue_kernel(queue, 0, ndrange, block);
Clang first does codegen for block expression in the first line and deletes its block info.
Clang then tries to do codegen for the same block expression again for the second line,
and fails because the block info is gone.

The fix is to do normal codegen for both lines. Introduce an API to OpenCL runtime to
record llvm block invoke function and llvm block literal emitted for each AST block
expression, and use the recorded information for generating the wrapper kernel.

The EmitBlockLiteral APIs are cleaned up to minimize changes to the normal codegen
of blocks.

Another minor issue is that some clean up AST expression is generated for block
with captures, which can be stripped by IgnoreImplicit.

Differential Revision: https://reviews.llvm.org/D43240

Modified:
    cfe/trunk/lib/CodeGen/CGBlocks.cpp
    cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
    cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
    cfe/trunk/lib/CodeGen/CodeGenFunction.h
    cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
    cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=325264&r1=325263&r2=325264&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Thu Feb 15 08:39:19 2018
@@ -740,27 +740,19 @@ void CodeGenFunction::destroyBlockInfos(
 }
 
 /// Emit a block literal expression in the current function.
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr,
-                                               llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
   // If the block has no captures, we won't have a pre-computed
   // layout for it.
   if (!blockExpr->getBlockDecl()->hasCaptures()) {
     // The block literal is emitted as a global variable, and the block invoke
     // function has to be extracted from its initializer.
     if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) {
-      if (InvokeF) {
-        auto *GV = cast<llvm::GlobalVariable>(
-            cast<llvm::Constant>(Block)->stripPointerCasts());
-        auto *BlockInit = cast<llvm::ConstantStruct>(GV->getInitializer());
-        *InvokeF = cast<llvm::Function>(
-            BlockInit->getAggregateElement(2)->stripPointerCasts());
-      }
       return Block;
     }
     CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName());
     computeBlockInfo(CGM, this, blockInfo);
     blockInfo.BlockExpression = blockExpr;
-    return EmitBlockLiteral(blockInfo, InvokeF);
+    return EmitBlockLiteral(blockInfo);
   }
 
   // Find the block info for this block and take ownership of it.
@@ -769,11 +761,10 @@ llvm::Value *CodeGenFunction::EmitBlockL
                                          blockExpr->getBlockDecl()));
 
   blockInfo->BlockExpression = blockExpr;
-  return EmitBlockLiteral(*blockInfo, InvokeF);
+  return EmitBlockLiteral(*blockInfo);
 }
 
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
-                                               llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
   auto GenVoidPtrTy =
       IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
@@ -788,8 +779,6 @@ llvm::Value *CodeGenFunction::EmitBlockL
   BlockCGF.SanOpts = SanOpts;
   auto *InvokeFn = BlockCGF.GenerateBlockFunction(
       CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
-  if (InvokeF)
-    *InvokeF = InvokeFn;
   auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
 
   // If there is nothing to capture, we can emit this as a global block.
@@ -1024,6 +1013,11 @@ llvm::Value *CodeGenFunction::EmitBlockL
   llvm::Value *result = Builder.CreatePointerCast(
       blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType()));
 
+  if (IsOpenCL) {
+    CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn,
+                                           result);
+  }
+
   return result;
 }
 
@@ -1287,6 +1281,10 @@ static llvm::Constant *buildGlobalBlock(
   llvm::Constant *Result =
       llvm::ConstantExpr::getPointerCast(literal, RequiredType);
   CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result);
+  if (CGM.getContext().getLangOpts().OpenCL)
+    CGM.getOpenCLRuntime().recordBlockInfo(
+        blockInfo.BlockExpression,
+        cast<llvm::Function>(blockFn->stripPointerCasts()), Result);
   return Result;
 }
 

Modified: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp?rev=325264&r1=325263&r2=325264&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp Thu Feb 15 08:39:19 2018
@@ -112,37 +112,51 @@ llvm::PointerType *CGOpenCLRuntime::getG
       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
 }
 
+/// Record emitted llvm invoke function and llvm block literal for the
+/// corresponding block expression.
+void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
+                                      llvm::Function *InvokeF,
+                                      llvm::Value *Block) {
+  assert(EnqueuedBlockMap.find(E) == EnqueuedBlockMap.end() &&
+         "Block expression emitted twice");
+  assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
+  assert(Block->getType()->isPointerTy() && "Invalid block literal type");
+  EnqueuedBlockMap[E].InvokeFunc = InvokeF;
+  EnqueuedBlockMap[E].BlockArg = Block;
+  EnqueuedBlockMap[E].Kernel = nullptr;
+}
+
 CGOpenCLRuntime::EnqueuedBlockInfo
 CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
+  CGF.EmitScalarExpr(E);
+
   // The block literal may be assigned to a const variable. Chasing down
   // to get the block literal.
   if (auto DR = dyn_cast<DeclRefExpr>(E)) {
     E = cast<VarDecl>(DR->getDecl())->getInit();
   }
+  E = E->IgnoreImplicit();
   if (auto Cast = dyn_cast<CastExpr>(E)) {
     E = Cast->getSubExpr();
   }
   auto *Block = cast<BlockExpr>(E);
 
-  // The same block literal may be enqueued multiple times. Cache it if
-  // possible.
-  auto Loc = EnqueuedBlockMap.find(Block);
-  if (Loc != EnqueuedBlockMap.end()) {
-    return Loc->second;
+  assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
+         "Block expression not emitted");
+
+  // Do not emit the block wrapper again if it has been emitted.
+  if (EnqueuedBlockMap[Block].Kernel) {
+    return EnqueuedBlockMap[Block];
   }
 
-  // Emit block literal as a common block expression and get the block invoke
-  // function.
-  llvm::Function *Invoke;
-  auto *V = CGF.EmitBlockLiteral(cast<BlockExpr>(Block), &Invoke);
   auto *F = CGF.getTargetHooks().createEnqueuedBlockKernel(
-      CGF, Invoke, V->stripPointerCasts());
+      CGF, EnqueuedBlockMap[Block].InvokeFunc,
+      EnqueuedBlockMap[Block].BlockArg->stripPointerCasts());
 
   // The common part of the post-processing of the kernel goes here.
   F->addFnAttr(llvm::Attribute::NoUnwind);
   F->setCallingConv(
       CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel));
-  EnqueuedBlockInfo Info{F, V};
-  EnqueuedBlockMap[Block] = Info;
-  return Info;
+  EnqueuedBlockMap[Block].Kernel = F;
+  return EnqueuedBlockMap[Block];
 }

Modified: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h?rev=325264&r1=325263&r2=325264&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h Thu Feb 15 08:39:19 2018
@@ -23,6 +23,7 @@
 
 namespace clang {
 
+class BlockExpr;
 class Expr;
 class VarDecl;
 
@@ -39,8 +40,9 @@ protected:
 
   /// Structure for enqueued block information.
   struct EnqueuedBlockInfo {
-    llvm::Function *Kernel; /// Enqueued block kernel.
-    llvm::Value *BlockArg;  /// The first argument to enqueued block kernel.
+    llvm::Function *InvokeFunc; /// Block invoke function.
+    llvm::Function *Kernel;     /// Enqueued block kernel.
+    llvm::Value *BlockArg;      /// The first argument to enqueued block kernel.
   };
   /// Maps block expression to block information.
   llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap;
@@ -76,6 +78,15 @@ public:
   /// \return enqueued block information for enqueued block.
   EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF,
                                             const Expr *E);
+
+  /// \brief Record invoke function and block literal emitted during normal
+  /// codegen for a block expression. The information is used by
+  /// emitOpenCLEnqueuedBlock to emit wrapper kernel.
+  ///
+  /// \param InvokeF invoke function emitted for the block expression.
+  /// \param Block block literal emitted for the block expression.
+  void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
+                       llvm::Value *Block);
 };
 
 }

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=325264&r1=325263&r2=325264&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Thu Feb 15 08:39:19 2018
@@ -1583,10 +1583,7 @@ public:
   /// \return an LLVM value which is a pointer to a struct which contains
   /// information about the block, including the block invoke function, the
   /// captured variables, etc.
-  /// \param InvokeF will contain the block invoke function if it is not
-  /// nullptr.
-  llvm::Value *EmitBlockLiteral(const BlockExpr *,
-                                llvm::Function **InvokeF = nullptr);
+  llvm::Value *EmitBlockLiteral(const BlockExpr *);
   static void destroyBlockInfos(CGBlockInfo *info);
 
   llvm::Function *GenerateBlockFunction(GlobalDecl GD,
@@ -3010,11 +3007,8 @@ public:
   LValue EmitOMPSharedLValue(const Expr *E);
 
 private:
-  /// Helpers for blocks. Returns invoke function by \p InvokeF if it is not
-  /// nullptr. It should be called without \p InvokeF if the caller does not
-  /// need invoke function to be returned.
-  llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info,
-                                llvm::Function **InvokeF = nullptr);
+  /// Helpers for blocks.
+  llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);
 
   /// struct with the values to be passed to the OpenMP loop-related functions
   struct OMPLoopArguments {

Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl?rev=325264&r1=325263&r2=325264&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl Thu Feb 15 08:39:19 2018
@@ -2,6 +2,10 @@
 
 typedef struct {int a;} ndrange_t;
 
+void callee(long id, global long *out) {
+  out[id] = id;
+}
+
 // CHECK-LABEL: define amdgpu_kernel void @test
 kernel void test(global char *a, char b, global long *c, long d) {
   queue_t default_queue;
@@ -24,6 +28,12 @@ kernel void test(global char *a, char b,
                  c[0] = d;
                  ((local int*)lp)[0] = 1;
                  }, 100);
+
+  void (^block)(void) = ^{
+    callee(d, c);
+  };
+
+  enqueue_kernel(default_queue, flags, ndrange, block);
 }
 
 // CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i8 }>)
@@ -42,4 +52,7 @@ kernel void test(global char *a, char b,
 // CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
 // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
 
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i8*, i64, i64 addrspace(1)* }>)
+// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
+
 // CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" }

Modified: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=325264&r1=325263&r2=325264&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Thu Feb 15 08:39:19 2018
@@ -29,6 +29,10 @@ typedef struct {int a;} ndrange_t;
 // COMMON: define internal spir_func void [[INV_G]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}})
 const bl_t block_G = (bl_t) ^ (local void *a) {};
 
+void callee(int id, __global int *out) {
+  out[id] = id;
+}
+
 // COMMON-LABEL: define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i)
 kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // COMMON: %default_queue = alloca %opencl.queue_t*
@@ -282,6 +286,21 @@ kernel void device_side_enqueue(global i
   // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
+  void (^block_C)(void) = ^{
+    callee(i, a);
+  };
+
+  // Emits block literal on stack and block kernel [[INVLK3]].
+  // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
+  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
+  enqueue_kernel(default_queue, flags, ndrange, block_C);
+
   // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]].
   // COMMON: call i32 @__get_kernel_work_group_size_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
@@ -333,6 +352,7 @@ kernel void device_side_enqueue(global i
 // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
 // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})




More information about the cfe-commits mailing list