r338899 - [OpenCL] Always emit alloca in entry block for enqueue_kernel builtin

Vlad Tsyrklevich via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 3 10:48:44 PDT 2018


This change is causing ASan failures on the sanitizer bots:
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/21898/steps/check-clang%20asan/logs/stdio

I've reverted it in r338904.

On Fri, Aug 3, 2018 at 8:51 AM Scott Linder via cfe-commits <
cfe-commits at lists.llvm.org> wrote:

> Author: scott.linder
> Date: Fri Aug  3 08:50:52 2018
> New Revision: 338899
>
> URL: http://llvm.org/viewvc/llvm-project?rev=338899&view=rev
> Log:
> [OpenCL] Always emit alloca in entry block for enqueue_kernel builtin
>
> Ensures the statically sized alloca is not converted to DYNAMIC_STACKALLOC
> later because it is not in the entry block.
>
> Differential Revision: https://reviews.llvm.org/D50104
>
>
> Added:
>     cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl
> Modified:
>     cfe/trunk/lib/CodeGen/CGBuiltin.cpp
>     cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
>
> Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=338899&r1=338898&r2=338899&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Aug  3 08:50:52 2018
> @@ -3338,23 +3338,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(
>      // Create a temporary array to hold the sizes of local pointer
> arguments
>      // for the block. \p First is the position of the first size argument.
>      auto CreateArrayForSizeVar = [=](unsigned First) {
> -      auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
> -      auto *Arr = Builder.CreateAlloca(AT);
> -      llvm::Value *Ptr;
> +      llvm::APInt ArraySize(32, NumArgs - First);
> +      QualType SizeArrayTy = getContext().getConstantArrayType(
> +          getContext().getSizeType(), ArraySize, ArrayType::Normal,
> +          /*IndexTypeQuals=*/0);
> +      auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
> +      llvm::Value *TmpPtr = Tmp.getPointer();
> +      llvm::Value *TmpSize = EmitLifetimeStart(
> +          CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()),
> TmpPtr);
> +      llvm::Value *ElemPtr;
>        // Each of the following arguments specifies the size of the
> corresponding
>        // argument passed to the enqueued block.
>        auto *Zero = llvm::ConstantInt::get(IntTy, 0);
>        for (unsigned I = First; I < NumArgs; ++I) {
>          auto *Index = llvm::ConstantInt::get(IntTy, I - First);
> -        auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
> +        auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index});
>          if (I == First)
> -          Ptr = GEP;
> +          ElemPtr = GEP;
>          auto *V =
>              Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)),
> SizeTy);
>          Builder.CreateAlignedStore(
>              V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
>        }
> -      return Ptr;
> +      return std::tie(ElemPtr, TmpSize, TmpPtr);
>      };
>
>      // Could have events and/or varargs.
> @@ -3366,24 +3372,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(
>        llvm::Value *Kernel =
>            Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
>        auto *Block = Builder.CreatePointerCast(Info.BlockArg,
> GenericVoidPtrTy);
> -      auto *PtrToSizeArray = CreateArrayForSizeVar(4);
> +      llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
> +      std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
>
>        // Create a vector of the arguments, as well as a constant value to
>        // express to the runtime the number of variadic arguments.
>        std::vector<llvm::Value *> Args = {
>            Queue,  Flags, Range,
>            Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
> -          PtrToSizeArray};
> +          ElemPtr};
>        std::vector<llvm::Type *> ArgTys = {
> -          QueueTy,          IntTy,            RangeTy,
> -          GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
> -          PtrToSizeArray->getType()};
> +          QueueTy,          IntTy, RangeTy,           GenericVoidPtrTy,
> +          GenericVoidPtrTy, IntTy, ElemPtr->getType()};
>
>        llvm::FunctionType *FTy = llvm::FunctionType::get(
>            Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
> -      return RValue::get(
> -          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
> -                             llvm::ArrayRef<llvm::Value *>(Args)));
> +      auto Call =
> +          RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy,
> Name),
> +                                         llvm::ArrayRef<llvm::Value
> *>(Args)));
> +      if (TmpSize)
> +        EmitLifetimeEnd(TmpSize, TmpPtr);
> +      return Call;
>      }
>      // Any calls now have event arguments passed.
>      if (NumArgs >= 7) {
> @@ -3430,15 +3439,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(
>        ArgTys.push_back(Int32Ty);
>        Name = "__enqueue_kernel_events_varargs";
>
> -      auto *PtrToSizeArray = CreateArrayForSizeVar(7);
> -      Args.push_back(PtrToSizeArray);
> -      ArgTys.push_back(PtrToSizeArray->getType());
> +      llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
> +      std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
> +      Args.push_back(ElemPtr);
> +      ArgTys.push_back(ElemPtr->getType());
>
>        llvm::FunctionType *FTy = llvm::FunctionType::get(
>            Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
> -      return RValue::get(
> -          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
> -                             llvm::ArrayRef<llvm::Value *>(Args)));
> +      auto Call =
> +          RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy,
> Name),
> +                                         llvm::ArrayRef<llvm::Value
> *>(Args)));
> +      if (TmpSize)
> +        EmitLifetimeEnd(TmpSize, TmpPtr);
> +      return Call;
>      }
>      LLVM_FALLTHROUGH;
>    }
>
> Modified: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=338899&r1=338898&r2=338899&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl (original)
> +++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Fri Aug  3
> 08:50:52 2018
> @@ -1,5 +1,6 @@
>  // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0
> -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s
> --check-prefix=COMMON --check-prefix=B32
>  // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0
> -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s
> --check-prefix=COMMON --check-prefix=B64
> +// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O1
> -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s
> --check-prefix=CHECK-LIFETIMES
>
>  #pragma OPENCL EXTENSION cl_khr_subgroups : enable
>
> @@ -46,8 +47,31 @@ kernel void device_side_enqueue(global i
>    // COMMON: %event_wait_list2 = alloca [1 x %opencl.clk_event_t*]
>    clk_event_t event_wait_list2[] = {clk_event};
>
> -  // Emits block literal on stack and block kernel [[INVLK1]].
>    // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4
> +
> +  // B32: %[[BLOCK_SIZES1:.*]] = alloca [1 x i32]
> +  // B64: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64]
> +  // CHECK-LIFETIMES: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64]
> +  // B32: %[[BLOCK_SIZES2:.*]] = alloca [1 x i32]
> +  // B64: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64]
> +  // CHECK-LIFETIMES: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64]
> +  // B32: %[[BLOCK_SIZES3:.*]] = alloca [1 x i32]
> +  // B64: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64]
> +  // CHECK-LIFETIMES: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64]
> +  // B32: %[[BLOCK_SIZES4:.*]] = alloca [1 x i32]
> +  // B64: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64]
> +  // CHECK-LIFETIMES: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64]
> +  // B32: %[[BLOCK_SIZES5:.*]] = alloca [1 x i32]
> +  // B64: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64]
> +  // CHECK-LIFETIMES: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64]
> +  // B32: %[[BLOCK_SIZES6:.*]] = alloca [3 x i32]
> +  // B64: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64]
> +  // CHECK-LIFETIMES: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64]
> +  // B32: %[[BLOCK_SIZES7:.*]] = alloca [1 x i32]
> +  // B64: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64]
> +  // CHECK-LIFETIMES: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64]
> +
> +  // Emits block literal on stack and block kernel [[INVLK1]].
>    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*,
> %opencl.queue_t{{.*}}** %default_queue
>    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
>    // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32,
> i32 addrspace(1)* }>* %block to void ()*
> @@ -73,7 +97,6 @@ kernel void device_side_enqueue(global i
>    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
> %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)*
> [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}}
> [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
>    // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
> -
>    enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list,
> &clk_event,
>                   ^(void) {
>                     a[i] = b[i];
> @@ -82,39 +105,46 @@ kernel void device_side_enqueue(global i
>    // Emits global block literal [[BLG1]] and block kernel [[INVGK1]].
>    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*,
> %opencl.queue_t{{.*}}** %default_queue
>    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
> -  // B32: %[[TMP:.*]] = alloca [1 x i32]
> -  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]],
> i32 0, i32 0
> -  // B32: store i32 256, i32* %[[TMP1]], align 4
> -  // B64: %[[TMP:.*]] = alloca [1 x i64]
> -  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]],
> i32 0, i32 0
> -  // B64: store i64 256, i64* %[[TMP1]], align 8
> +  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
> %[[BLOCK_SIZES1]] to i8*
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES1]], i64 0, i64 0
> +  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
> %[[BLOCK_SIZES1]], i32 0, i32 0
> +  // B32: store i32 256, i32* %[[TMP]], align 4
> +  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES1]], i32 0, i32 0
> +  // B64: store i64 256, i64* %[[TMP]], align 8
>    // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
>    // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
> %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}}
> [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
> bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8
> addrspace(4)*), i32 1,
> -  // B32-SAME: i32* %[[TMP1]])
> -  // B64-SAME: i64* %[[TMP1]])
> +  // B32-SAME: i32* %[[TMP]])
> +  // B64-SAME: i64* %[[TMP]])
>    enqueue_kernel(default_queue, flags, ndrange,
>                   ^(local void *p) {
>                     return;
>                   },
>                   256);
> +
>    char c;
>    // Emits global block literal [[BLG2]] and block kernel [[INVGK2]].
>    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*,
> %opencl.queue_t{{.*}}** %default_queue
>    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
> -  // B32: %[[TMP:.*]] = alloca [1 x i32]
> -  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]],
> i32 0, i32 0
> -  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
> -  // B64: %[[TMP:.*]] = alloca [1 x i64]
> -  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]],
> i32 0, i32 0
> -  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
> +  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
> %[[BLOCK_SIZES2]] to i8*
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES2]], i64 0, i64 0
> +  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
> %[[BLOCK_SIZES2]], i32 0, i32 0
> +  // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
> +  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES2]], i32 0, i32 0
> +  // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
>    // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
>    // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
> %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}}
> [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
> bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8
> addrspace(4)*), i32 1,
> -  // B32-SAME: i32* %[[TMP1]])
> -  // B64-SAME: i64* %[[TMP1]])
> +  // B32-SAME: i32* %[[TMP]])
> +  // B64-SAME: i64* %[[TMP]])
>    enqueue_kernel(default_queue, flags, ndrange,
>                   ^(local void *p) {
>                     return;
> @@ -127,18 +157,21 @@ kernel void device_side_enqueue(global i
>    // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x
> %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32
> 0, i32 0
>    // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast
> %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}*
> addrspace(4)*
>    // COMMON: [[EVNT:%[0-9]+]]  = addrspacecast
> %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}*
> addrspace(4)*
> -  // B32: %[[TMP:.*]] = alloca [1 x i32]
> -  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]],
> i32 0, i32 0
> -  // B32: store i32 256, i32* %[[TMP1]], align 4
> -  // B64: %[[TMP:.*]] = alloca [1 x i64]
> -  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]],
> i32 0, i32 0
> -  // B64: store i64 256, i64* %[[TMP1]], align 8
> +  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
> %[[BLOCK_SIZES3]] to i8*
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES3]], i64 0, i64 0
> +  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs(
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
> %[[BLOCK_SIZES3]], i32 0, i32 0
> +  // B32: store i32 256, i32* %[[TMP]], align 4
> +  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES3]], i32 0, i32 0
> +  // B64: store i64 256, i64* %[[TMP]], align 8
>    // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
>    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
> %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]],
> %opencl.clk_event_t{{.*}} [[EVNT]],
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}}
> [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
> bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8
> addrspace(4)*), i32 1,
> -  // B32-SAME: i32* %[[TMP1]])
> -  // B64-SAME: i64* %[[TMP1]])
> +  // B32-SAME: i32* %[[TMP]])
> +  // B64-SAME: i64* %[[TMP]])
>    enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2,
> &clk_event,
>                   ^(local void *p) {
>                     return;
> @@ -151,18 +184,21 @@ kernel void device_side_enqueue(global i
>    // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x
> %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32
> 0, i32 0
>    // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast
> %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}*
> addrspace(4)*
>    // COMMON: [[EVNT:%[0-9]+]]  = addrspacecast
> %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}*
> addrspace(4)*
> -  // B32: %[[TMP:.*]] = alloca [1 x i32]
> -  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]],
> i32 0, i32 0
> -  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
> -  // B64: %[[TMP:.*]] = alloca [1 x i64]
> -  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]],
> i32 0, i32 0
> -  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
> +  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
> %[[BLOCK_SIZES4]] to i8*
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES4]], i64 0, i64 0
> +  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs(
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
> %[[BLOCK_SIZES4]], i32 0, i32 0
> +  // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
> +  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES4]], i32 0, i32 0
> +  // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
>    // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
>    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
> %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)*
> [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}}
> [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
> bitcast ({ i32, i32 } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8
> addrspace(4)*), i32 1,
> -  // B32-SAME: i32* %[[TMP1]])
> -  // B64-SAME: i64* %[[TMP1]])
> +  // B32-SAME: i32* %[[TMP]])
> +  // B64-SAME: i64* %[[TMP]])
>    enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2,
> &clk_event,
>                   ^(local void *p) {
>                     return;
> @@ -173,18 +209,21 @@ kernel void device_side_enqueue(global i
>    // Emits global block literal [[BLG5]] and block kernel [[INVGK5]].
>    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*,
> %opencl.queue_t{{.*}}** %default_queue
>    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
> -  // B32: %[[TMP:.*]] = alloca [1 x i32]
> -  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]],
> i32 0, i32 0
> -  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
> -  // B64: %[[TMP:.*]] = alloca [1 x i64]
> -  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]],
> i32 0, i32 0
> -  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
> +  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
> %[[BLOCK_SIZES5]] to i8*
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES5]], i64 0, i64 0
> +  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
> %[[BLOCK_SIZES5]], i32 0, i32 0
> +  // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
> +  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES5]], i32 0, i32 0
> +  // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
>    // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
>    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
> %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}}
> [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
> bitcast ({ i32, i32 } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8
> addrspace(4)*), i32 1,
> -  // B32-SAME: i32* %[[TMP1]])
> -  // B64-SAME: i64* %[[TMP1]])
> +  // B32-SAME: i32* %[[TMP]])
> +  // B64-SAME: i64* %[[TMP]])
>    enqueue_kernel(default_queue, flags, ndrange,
>                   ^(local void *p) {
>                     return;
> @@ -194,26 +233,29 @@ kernel void device_side_enqueue(global i
>    // Emits global block literal [[BLG6]] and block kernel [[INVGK6]].
>    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*,
> %opencl.queue_t{{.*}}** %default_queue
>    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
> -  // B32: %[[TMP:.*]] = alloca [3 x i32]
> -  // B32: %[[TMP1:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]],
> i32 0, i32 0
> -  // B32: store i32 1, i32* %[[TMP1]], align 4
> -  // B32: %[[TMP2:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]],
> i32 0, i32 1
> -  // B32: store i32 2, i32* %[[TMP2]], align 4
> -  // B32: %[[TMP3:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]],
> i32 0, i32 2
> -  // B32: store i32 4, i32* %[[TMP3]], align 4
> -  // B64: %[[TMP:.*]] = alloca [3 x i64]
> -  // B64: %[[TMP1:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]],
> i32 0, i32 0
> -  // B64: store i64 1, i64* %[[TMP1]], align 8
> -  // B64: %[[TMP2:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]],
> i32 0, i32 1
> -  // B64: store i64 2, i64* %[[TMP2]], align 8
> -  // B64: %[[TMP3:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]],
> i32 0, i32 2
> -  // B64: store i64 4, i64* %[[TMP3]], align 8
> +  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [3 x i64]*
> %[[BLOCK_SIZES6]] to i8*
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 24,
> i8* nonnull [[LIFETIME_PTR]])
> +  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [3 x i64], [3 x i64]*
> %[[BLOCK_SIZES6]], i64 0, i64 0
> +  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 24, i8*
> nonnull [[LIFETIME_PTR]])
> +  // B32: %[[TMP:.*]] = getelementptr [3 x i32], [3 x i32]*
> %[[BLOCK_SIZES6]], i32 0, i32 0
> +  // B32: store i32 1, i32* %[[TMP]], align 4
> +  // B32: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i32], [3 x i32]*
> %[[BLOCK_SIZES6]], i32 0, i32 1
> +  // B32: store i32 2, i32* %[[BLOCK_SIZES62]], align 4
> +  // B32: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i32], [3 x i32]*
> %[[BLOCK_SIZES6]], i32 0, i32 2
> +  // B32: store i32 4, i32* %[[BLOCK_SIZES63]], align 4
> +  // B64: %[[TMP:.*]] = getelementptr [3 x i64], [3 x i64]*
> %[[BLOCK_SIZES6]], i32 0, i32 0
> +  // B64: store i64 1, i64* %[[TMP]], align 8
> +  // B64: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i64], [3 x i64]*
> %[[BLOCK_SIZES6]], i32 0, i32 1
> +  // B64: store i64 2, i64* %[[BLOCK_SIZES62]], align 8
> +  // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], [3 x i64]*
> %[[BLOCK_SIZES6]], i32 0, i32 2
> +  // B64: store i64 4, i64* %[[BLOCK_SIZES63]], align 8
>    // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
>    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
> %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}}
> [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
> bitcast ({ i32, i32 } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8
> addrspace(4)*), i32 3,
> -  // B32-SAME: i32* %[[TMP1]])
> -  // B64-SAME: i64* %[[TMP1]])
> +  // B32-SAME: i32* %[[TMP]])
> +  // B64-SAME: i64* %[[TMP]])
>    enqueue_kernel(default_queue, flags, ndrange,
>                   ^(local void *p1, local void *p2, local void *p3) {
>                     return;
> @@ -223,18 +265,21 @@ kernel void device_side_enqueue(global i
>    // Emits global block literal [[BLG7]] and block kernel [[INVGK7]].
>    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t**
> %default_queue
>    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
> -  // B32: %[[TMP:.*]] = alloca [1 x i32]
> -  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]],
> i32 0, i32 0
> -  // B32: store i32 0, i32* %[[TMP1]], align 4
> -  // B64: %[[TMP:.*]] = alloca [1 x i64]
> -  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]],
> i32 0, i32 0
> -  // B64: store i64 4294967296, i64* %[[TMP1]], align 8
> +  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
> %[[BLOCK_SIZES7]] to i8*
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES7]], i64 0, i64 0
> +  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
> +  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8*
> nonnull [[LIFETIME_PTR]])
> +  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
> %[[BLOCK_SIZES7]], i32 0, i32 0
> +  // B32: store i32 0, i32* %[[TMP]], align 4
> +  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
> %[[BLOCK_SIZES7]], i32 0, i32 0
> +  // B64: store i64 4294967296, i64* %[[TMP]], align 8
>    // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
>    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
> %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}}
> [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
>    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
> bitcast ({ i32, i32 } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8
> addrspace(4)*), i32 1,
> -  // B32-SAME: i32* %[[TMP1]])
> -  // B64-SAME: i64* %[[TMP1]])
> +  // B32-SAME: i32* %[[TMP]])
> +  // B64-SAME: i64* %[[TMP]])
>    enqueue_kernel(default_queue, flags, ndrange,
>                   ^(local void *p) {
>                     return;
>
> Added: cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl?rev=338899&view=auto
>
> ==============================================================================
> --- cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl (added)
> +++ cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl Fri
> Aug  3 08:50:52 2018
> @@ -0,0 +1,31 @@
> +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn < %s
> | FileCheck %s --check-prefixes=COMMON,AMDGPU
> +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple
> "spir-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR32
> +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple
> "spir64-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR64
> +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -debug-info-kind=limited -emit-llvm
> -o - -triple amdgcn < %s | FileCheck %s --check-prefixes=CHECK-DEBUG
> +
> +// Check that the enqueue_kernel array temporary is in the entry block to
> avoid
> +// a dynamic alloca
> +
> +typedef struct {int a;} ndrange_t;
> +
> +kernel void test(int i) {
> +// COMMON-LABEL: define {{.*}} void @test
> +// COMMON-LABEL: entry:
> +// AMDGPU: %block_sizes = alloca [1 x i64]
> +// SPIR32: %block_sizes = alloca [1 x i32]
> +// SPIR64: %block_sizes = alloca [1 x i64]
> +// COMMON-LABEL: if.then:
> +// COMMON-NOT: alloca
> +// CHECK-DEBUG: getelementptr {{.*}} %block_sizes, {{.*}} !dbg !34
> +// COMMON-LABEL: if.end
> +  queue_t default_queue;
> +  unsigned flags = 0;
> +  ndrange_t ndrange;
> +  if (i)
> +    enqueue_kernel(default_queue, flags, ndrange, ^(local void *a) { },
> 32);
> +}
> +
> +// Check that the temporary is scoped to the `if`
> +
> +// CHECK-DEBUG: !32 = distinct !DILexicalBlock(scope: !7, file: !1, line:
> 24)
> +// CHECK-DEBUG: !34 = !DILocation(line: 25, scope: !32)
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20180803/6e90786f/attachment-0001.html>


More information about the cfe-commits mailing list