r354568 - [OpenCL] Simplify LLVM IR generated for OpenCL blocks
Andrew Savonichev via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 21 03:02:11 PST 2019
Author: asavonic
Date: Thu Feb 21 03:02:10 2019
New Revision: 354568
URL: http://llvm.org/viewvc/llvm-project?rev=354568&view=rev
Log:
[OpenCL] Simplify LLVM IR generated for OpenCL blocks
Summary:
Emit direct call of block invoke functions when possible, i.e. in case the
block is not passed as a function argument.
Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()`
Reviewers: Anastasia, yaxunl, svenvh
Reviewed By: Anastasia
Subscribers: cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D58388
Modified:
cfe/trunk/lib/CodeGen/CGBlocks.cpp
cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
cfe/trunk/test/CodeGenOpenCL/blocks.cl
cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=354568&r1=354567&r2=354568&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Thu Feb 21 03:02:10 2019
@@ -1253,52 +1253,49 @@ RValue CodeGenFunction::EmitBlockCallExp
ReturnValueSlot ReturnValue) {
const BlockPointerType *BPT =
E->getCallee()->getType()->getAs<BlockPointerType>();
-
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
-
- // Get a pointer to the generic block literal.
- // For OpenCL we generate generic AS void ptr to be able to reuse the same
- // block definition for blocks with captures generated as private AS local
- // variables and without captures generated as global AS program scope
- // variables.
- unsigned AddrSpace = 0;
- if (getLangOpts().OpenCL)
- AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
-
- llvm::Type *BlockLiteralTy =
- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
-
- // Bitcast the callee to a block literal.
- BlockPtr =
- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
-
- // Get the function pointer from the literal.
- llvm::Value *FuncPtr =
- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
- CGM.getLangOpts().OpenCL ? 2 : 3);
-
- // Add the block literal.
+ llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType();
+ llvm::Value *Func = nullptr;
+ QualType FnType = BPT->getPointeeType();
+ ASTContext &Ctx = getContext();
CallArgList Args;
- QualType VoidPtrQualTy = getContext().VoidPtrTy;
- llvm::Type *GenericVoidPtrTy = VoidPtrTy;
if (getLangOpts().OpenCL) {
- GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType();
- VoidPtrQualTy =
- getContext().getPointerType(getContext().getAddrSpaceQualType(
- getContext().VoidTy, LangAS::opencl_generic));
- }
+ // For OpenCL, BlockPtr is already casted to generic block literal.
- BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy);
- Args.add(RValue::get(BlockPtr), VoidPtrQualTy);
+ // First argument of a block call is a generic block literal casted to
+ // generic void pointer, i.e. i8 addrspace(4)*
+ llvm::Value *BlockDescriptor = Builder.CreatePointerCast(
+ BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType());
+ QualType VoidPtrQualTy = Ctx.getPointerType(
+ Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic));
+ Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy);
+ // And the rest of the arguments.
+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
+
+ // We *can* call the block directly unless it is a function argument.
+ if (!isa<ParmVarDecl>(E->getCalleeDecl()))
+ Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
+ else {
+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2);
+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+ }
+ } else {
+ // Bitcast the block literal to a generic block literal.
+ BlockPtr = Builder.CreatePointerCast(
+ BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal");
+ // Get pointer to the block invoke function
+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3);
+
+ // First argument is a block literal casted to a void pointer
+ BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy);
+ Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy);
+ // And the rest of the arguments.
+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
- QualType FnType = BPT->getPointeeType();
-
- // And the rest of the arguments.
- EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
-
- // Load the function.
- llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+ // Load the function.
+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+ }
const FunctionType *FuncTy = FnType->castAs<FunctionType>();
const CGFunctionInfo &FnInfo =
Modified: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp?rev=354568&r1=354567&r2=354568&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp Thu Feb 21 03:02:10 2019
@@ -122,6 +122,23 @@ llvm::PointerType *CGOpenCLRuntime::getG
CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
}
+// Get the block literal from an expression derived from the block expression.
+// OpenCL v2.0 s6.12.5:
+// Block variable declarations are implicitly qualified with const. Therefore
+// all block variables must be initialized at declaration time and may not be
+// reassigned.
+static const BlockExpr *getBlockExpr(const Expr *E) {
+ const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop.
+ while(!isa<BlockExpr>(E) && E != Prev) {
+ Prev = E;
+ E = E->IgnoreCasts();
+ if (auto DR = dyn_cast<DeclRefExpr>(E)) {
+ E = cast<VarDecl>(DR->getDecl())->getInit();
+ }
+ }
+ return cast<BlockExpr>(E);
+}
+
/// Record emitted llvm invoke function and llvm block literal for the
/// corresponding block expression.
void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
@@ -136,20 +153,17 @@ void CGOpenCLRuntime::recordBlockInfo(co
EnqueuedBlockMap[E].Kernel = nullptr;
}
+llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
+ return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
+}
+
CGOpenCLRuntime::EnqueuedBlockInfo
CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
CGF.EmitScalarExpr(E);
// The block literal may be assigned to a const variable. Chasing down
// to get the block literal.
- if (auto DR = dyn_cast<DeclRefExpr>(E)) {
- E = cast<VarDecl>(DR->getDecl())->getInit();
- }
- E = E->IgnoreImplicit();
- if (auto Cast = dyn_cast<CastExpr>(E)) {
- E = Cast->getSubExpr();
- }
- auto *Block = cast<BlockExpr>(E);
+ const BlockExpr *Block = getBlockExpr(E);
assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
"Block expression not emitted");
Modified: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h?rev=354568&r1=354567&r2=354568&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h Thu Feb 21 03:02:10 2019
@@ -91,6 +91,10 @@ public:
/// \param Block block literal emitted for the block expression.
void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
llvm::Value *Block);
+
+ /// \return LLVM block invoke function emitted for an expression derived from
+ /// the block expression.
+ llvm::Function *getInvokeFunction(const Expr *E);
};
}
Modified: cfe/trunk/test/CodeGenOpenCL/blocks.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/blocks.cl?rev=354568&r1=354567&r2=354568&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/blocks.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/blocks.cl Thu Feb 21 03:02:10 2019
@@ -39,11 +39,8 @@ void foo(){
// SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
// SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
// SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
- // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
// SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
- // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
- // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
- // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
+ // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
// AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
// AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
// AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
@@ -53,11 +50,8 @@ void foo(){
// AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
// AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
// AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
- // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
// AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
- // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
- // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
- // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
+ // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
int (^ block_B)(void) = ^{
return i;
Modified: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=354568&r1=354567&r2=354568&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Thu Feb 21 03:02:10 2019
@@ -312,9 +312,7 @@ kernel void device_side_enqueue(global i
};
// Uses global block literal [[BLG8]] and invoke function [[INVG8]].
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
block_A();
// Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
@@ -333,15 +331,35 @@ kernel void device_side_enqueue(global i
unsigned size = get_kernel_work_group_size(block_A);
// Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
block_A();
+ // Make sure that block invoke function is resolved correctly after sequence of assignements.
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1,
+ bl_t b1 = block_G;
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2,
+ bl_t b2 = b1;
+ // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*)
+ // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
+ b2(0);
+ // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
+ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ size = get_kernel_preferred_work_group_size_multiple(b2);
+
void (^block_C)(void) = ^{
callee(i, a);
};
-
// Emits block literal on stack and block kernel [[INVLK3]].
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
@@ -404,8 +422,8 @@ kernel void device_side_enqueue(global i
// COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
// COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
-// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
More information about the cfe-commits
mailing list