This change is causing ASan failures on the sanitizer bots: http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/21898/steps/check-clang%20asan/logs/stdio
I've reverted it in r338904. On Fri, Aug 3, 2018 at 8:51 AM Scott Linder via cfe-commits < cfe-commits@lists.llvm.org> wrote: > Author: scott.linder > Date: Fri Aug 3 08:50:52 2018 > New Revision: 338899 > > URL: http://llvm.org/viewvc/llvm-project?rev=338899&view=rev > Log: > [OpenCL] Always emit alloca in entry block for enqueue_kernel builtin > > Ensures the statically sized alloca is not converted to DYNAMIC_STACKALLOC > later because it is not in the entry block. > > Differential Revision: https://reviews.llvm.org/D50104 > > > Added: > cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl > Modified: > cfe/trunk/lib/CodeGen/CGBuiltin.cpp > cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl > > Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=338899&r1=338898&r2=338899&view=diff > > ============================================================================== > --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) > +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Aug 3 08:50:52 2018 > @@ -3338,23 +3338,29 @@ RValue CodeGenFunction::EmitBuiltinExpr( > // Create a temporary array to hold the sizes of local pointer > arguments > // for the block. \p First is the position of the first size argument. > auto CreateArrayForSizeVar = [=](unsigned First) { > - auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); > - auto *Arr = Builder.CreateAlloca(AT); > - llvm::Value *Ptr; > + llvm::APInt ArraySize(32, NumArgs - First); > + QualType SizeArrayTy = getContext().getConstantArrayType( > + getContext().getSizeType(), ArraySize, ArrayType::Normal, > + /*IndexTypeQuals=*/0); > + auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); > + llvm::Value *TmpPtr = Tmp.getPointer(); > + llvm::Value *TmpSize = EmitLifetimeStart( > + CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), > TmpPtr); > + llvm::Value *ElemPtr; > // Each of the following arguments specifies the size of the > corresponding > // argument passed to the enqueued block. > auto *Zero = llvm::ConstantInt::get(IntTy, 0); > for (unsigned I = First; I < NumArgs; ++I) { > auto *Index = llvm::ConstantInt::get(IntTy, I - First); > - auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); > + auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index}); > if (I == First) > - Ptr = GEP; > + ElemPtr = GEP; > auto *V = > Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), > SizeTy); > Builder.CreateAlignedStore( > V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); > } > - return Ptr; > + return std::tie(ElemPtr, TmpSize, TmpPtr); > }; > > // Could have events and/or varargs. > @@ -3366,24 +3372,27 @@ RValue CodeGenFunction::EmitBuiltinExpr( > llvm::Value *Kernel = > Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); > auto *Block = Builder.CreatePointerCast(Info.BlockArg, > GenericVoidPtrTy); > - auto *PtrToSizeArray = CreateArrayForSizeVar(4); > + llvm::Value *ElemPtr, *TmpSize, *TmpPtr; > + std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4); > > // Create a vector of the arguments, as well as a constant value to > // express to the runtime the number of variadic arguments. > std::vector<llvm::Value *> Args = { > Queue, Flags, Range, > Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4), > - PtrToSizeArray}; > + ElemPtr}; > std::vector<llvm::Type *> ArgTys = { > - QueueTy, IntTy, RangeTy, > - GenericVoidPtrTy, GenericVoidPtrTy, IntTy, > - PtrToSizeArray->getType()}; > + QueueTy, IntTy, RangeTy, GenericVoidPtrTy, > + GenericVoidPtrTy, IntTy, ElemPtr->getType()}; > > llvm::FunctionType *FTy = llvm::FunctionType::get( > Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); > - return RValue::get( > - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), > - llvm::ArrayRef<llvm::Value *>(Args))); > + auto Call = > + RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, > Name), > + llvm::ArrayRef<llvm::Value > *>(Args))); > + if (TmpSize) > + EmitLifetimeEnd(TmpSize, TmpPtr); > + return Call; > } > // Any calls now have event arguments passed. > if (NumArgs >= 7) { > @@ -3430,15 +3439,19 @@ RValue CodeGenFunction::EmitBuiltinExpr( > ArgTys.push_back(Int32Ty); > Name = "__enqueue_kernel_events_varargs"; > > - auto *PtrToSizeArray = CreateArrayForSizeVar(7); > - Args.push_back(PtrToSizeArray); > - ArgTys.push_back(PtrToSizeArray->getType()); > + llvm::Value *ElemPtr, *TmpSize, *TmpPtr; > + std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7); > + Args.push_back(ElemPtr); > + ArgTys.push_back(ElemPtr->getType()); > > llvm::FunctionType *FTy = llvm::FunctionType::get( > Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); > - return RValue::get( > - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), > - llvm::ArrayRef<llvm::Value *>(Args))); > + auto Call = > + RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, > Name), > + llvm::ArrayRef<llvm::Value > *>(Args))); > + if (TmpSize) > + EmitLifetimeEnd(TmpSize, TmpPtr); > + return Call; > } > LLVM_FALLTHROUGH; > } > > Modified: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=338899&r1=338898&r2=338899&view=diff > > ============================================================================== > --- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl (original) > +++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Fri Aug 3 > 08:50:52 2018 > @@ -1,5 +1,6 @@ > // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 > -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s > --check-prefix=COMMON --check-prefix=B32 > // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 > -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s > --check-prefix=COMMON --check-prefix=B64 > +// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O1 > -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s > --check-prefix=CHECK-LIFETIMES > > #pragma OPENCL EXTENSION cl_khr_subgroups : enable > > @@ -46,8 +47,31 @@ kernel void device_side_enqueue(global i > // COMMON: %event_wait_list2 = alloca [1 x %opencl.clk_event_t*] > clk_event_t event_wait_list2[] = {clk_event}; > > - // Emits block literal on stack and block kernel [[INVLK1]]. > // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4 > + > + // B32: %[[BLOCK_SIZES1:.*]] = alloca [1 x i32] > + // B64: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64] > + // CHECK-LIFETIMES: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64] > + // B32: %[[BLOCK_SIZES2:.*]] = alloca [1 x i32] > + // B64: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64] > + // CHECK-LIFETIMES: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64] > + // B32: %[[BLOCK_SIZES3:.*]] = alloca [1 x i32] > + // B64: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64] > + // CHECK-LIFETIMES: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64] > + // B32: %[[BLOCK_SIZES4:.*]] = alloca [1 x i32] > + // B64: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64] > + // CHECK-LIFETIMES: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64] > + // B32: %[[BLOCK_SIZES5:.*]] = alloca [1 x i32] > + // B64: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64] > + // CHECK-LIFETIMES: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64] > + // B32: %[[BLOCK_SIZES6:.*]] = alloca [3 x i32] > + // B64: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64] > + // CHECK-LIFETIMES: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64] > + // B32: %[[BLOCK_SIZES7:.*]] = alloca [1 x i32] > + // B64: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64] > + // CHECK-LIFETIMES: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64] > + > + // Emits block literal on stack and block kernel [[INVLK1]]. > // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, > %opencl.queue_t{{.*}}** %default_queue > // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32, > i32 addrspace(1)* }>* %block to void ()* > @@ -73,7 +97,6 @@ kernel void device_side_enqueue(global i > // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* > [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} > [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > // COMMON-SAME: i8 addrspace(4)* [[BL_I8]]) > - > enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, > &clk_event, > ^(void) { > a[i] = b[i]; > @@ -82,39 +105,46 @@ kernel void device_side_enqueue(global i > // Emits global block literal [[BLG1]] and block kernel [[INVGK1]]. > // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, > %opencl.queue_t{{.*}}** %default_queue > // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > - // B32: %[[TMP:.*]] = alloca [1 x i32] > - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], > i32 0, i32 0 > - // B32: store i32 256, i32* %[[TMP1]], align 4 > - // B64: %[[TMP:.*]] = alloca [1 x i64] > - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], > i32 0, i32 0 > - // B64: store i64 256, i64* %[[TMP1]], align 8 > + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > %[[BLOCK_SIZES1]] to i8* > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* > %[[BLOCK_SIZES1]], i64 0, i64 0 > + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > %[[BLOCK_SIZES1]], i32 0, i32 0 > + // B32: store i32 256, i32* %[[TMP]], align 4 > + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > %[[BLOCK_SIZES1]], i32 0, i32 0 > + // B64: store i64 256, i64* %[[TMP]], align 8 > // COMMON-LABEL: call i32 @__enqueue_kernel_varargs( > // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} > [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 > addrspace(4)*), i32 1, > - // B32-SAME: i32* %[[TMP1]]) > - // B64-SAME: i64* %[[TMP1]]) > + // B32-SAME: i32* %[[TMP]]) > + // B64-SAME: i64* %[[TMP]]) > enqueue_kernel(default_queue, flags, ndrange, > ^(local void *p) { > return; > }, > 256); > + > char c; > // Emits global block literal [[BLG2]] and block kernel [[INVGK2]]. > // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, > %opencl.queue_t{{.*}}** %default_queue > // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > - // B32: %[[TMP:.*]] = alloca [1 x i32] > - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], > i32 0, i32 0 > - // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 > - // B64: %[[TMP:.*]] = alloca [1 x i64] > - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], > i32 0, i32 0 > - // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8 > + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > %[[BLOCK_SIZES2]] to i8* > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* > %[[BLOCK_SIZES2]], i64 0, i64 0 > + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > %[[BLOCK_SIZES2]], i32 0, i32 0 > + // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 > + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > %[[BLOCK_SIZES2]], i32 0, i32 0 > + // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 > // COMMON-LABEL: call i32 @__enqueue_kernel_varargs( > // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} > [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 > addrspace(4)*), i32 1, > - // B32-SAME: i32* %[[TMP1]]) > - // B64-SAME: i64* %[[TMP1]]) > + // B32-SAME: i32* %[[TMP]]) > + // B64-SAME: i64* %[[TMP]]) > enqueue_kernel(default_queue, flags, ndrange, > ^(local void *p) { > return; > @@ -127,18 +157,21 @@ kernel void device_side_enqueue(global i > // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x > %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 > 0, i32 0 > // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast > %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* > addrspace(4)* > // COMMON: [[EVNT:%[0-9]+]] = addrspacecast > %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* > addrspace(4)* > - // B32: %[[TMP:.*]] = alloca [1 x i32] > - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], > i32 0, i32 0 > - // B32: store i32 256, i32* %[[TMP1]], align 4 > - // B64: %[[TMP:.*]] = alloca [1 x i64] > - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], > i32 0, i32 0 > - // B64: store i64 256, i64* %[[TMP1]], align 8 > + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > %[[BLOCK_SIZES3]] to i8* > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* > %[[BLOCK_SIZES3]], i64 0, i64 0 > + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs( > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > %[[BLOCK_SIZES3]], i32 0, i32 0 > + // B32: store i32 256, i32* %[[TMP]], align 4 > + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > %[[BLOCK_SIZES3]], i32 0, i32 0 > + // B64: store i64 256, i64* %[[TMP]], align 8 > // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs > // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], > %opencl.clk_event_t{{.*}} [[EVNT]], > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} > [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 > addrspace(4)*), i32 1, > - // B32-SAME: i32* %[[TMP1]]) > - // B64-SAME: i64* %[[TMP1]]) > + // B32-SAME: i32* %[[TMP]]) > + // B64-SAME: i64* %[[TMP]]) > enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, > &clk_event, > ^(local void *p) { > return; > @@ -151,18 +184,21 @@ kernel void device_side_enqueue(global i > // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x > %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 > 0, i32 0 > // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast > %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* > addrspace(4)* > // COMMON: [[EVNT:%[0-9]+]] = addrspacecast > %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* > addrspace(4)* > - // B32: %[[TMP:.*]] = alloca [1 x i32] > - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], > i32 0, i32 0 > - // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 > - // B64: %[[TMP:.*]] = alloca [1 x i64] > - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], > i32 0, i32 0 > - // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8 > + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > %[[BLOCK_SIZES4]] to i8* > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* > %[[BLOCK_SIZES4]], i64 0, i64 0 > + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs( > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > %[[BLOCK_SIZES4]], i32 0, i32 0 > + // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 > + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > %[[BLOCK_SIZES4]], i32 0, i32 0 > + // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 > // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs > // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* > [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} > [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > bitcast ({ i32, i32 } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 > addrspace(4)*), i32 1, > - // B32-SAME: i32* %[[TMP1]]) > - // B64-SAME: i64* %[[TMP1]]) > + // B32-SAME: i32* %[[TMP]]) > + // B64-SAME: i64* %[[TMP]]) > enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, > &clk_event, > ^(local void *p) { > return; > @@ -173,18 +209,21 @@ kernel void device_side_enqueue(global i > // Emits global block literal [[BLG5]] and block kernel [[INVGK5]]. > // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, > %opencl.queue_t{{.*}}** %default_queue > // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > - // B32: %[[TMP:.*]] = alloca [1 x i32] > - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], > i32 0, i32 0 > - // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 > - // B64: %[[TMP:.*]] = alloca [1 x i64] > - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], > i32 0, i32 0 > - // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8 > + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > %[[BLOCK_SIZES5]] to i8* > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* > %[[BLOCK_SIZES5]], i64 0, i64 0 > + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > %[[BLOCK_SIZES5]], i32 0, i32 0 > + // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 > + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > %[[BLOCK_SIZES5]], i32 0, i32 0 > + // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 > // COMMON-LABEL: call i32 @__enqueue_kernel_varargs > // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} > [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > bitcast ({ i32, i32 } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 > addrspace(4)*), i32 1, > - // B32-SAME: i32* %[[TMP1]]) > - // B64-SAME: i64* %[[TMP1]]) > + // B32-SAME: i32* %[[TMP]]) > + // B64-SAME: i64* %[[TMP]]) > enqueue_kernel(default_queue, flags, ndrange, > ^(local void *p) { > return; > @@ -194,26 +233,29 @@ kernel void device_side_enqueue(global i > // Emits global block literal [[BLG6]] and block kernel [[INVGK6]]. > // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, > %opencl.queue_t{{.*}}** %default_queue > // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > - // B32: %[[TMP:.*]] = alloca [3 x i32] > - // B32: %[[TMP1:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], > i32 0, i32 0 > - // B32: store i32 1, i32* %[[TMP1]], align 4 > - // B32: %[[TMP2:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], > i32 0, i32 1 > - // B32: store i32 2, i32* %[[TMP2]], align 4 > - // B32: %[[TMP3:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], > i32 0, i32 2 > - // B32: store i32 4, i32* %[[TMP3]], align 4 > - // B64: %[[TMP:.*]] = alloca [3 x i64] > - // B64: %[[TMP1:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], > i32 0, i32 0 > - // B64: store i64 1, i64* %[[TMP1]], align 8 > - // B64: %[[TMP2:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], > i32 0, i32 1 > - // B64: store i64 2, i64* %[[TMP2]], align 8 > - // B64: %[[TMP3:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], > i32 0, i32 2 > - // B64: store i64 4, i64* %[[TMP3]], align 8 > + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [3 x i64]* > %[[BLOCK_SIZES6]] to i8* > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 24, > i8* nonnull [[LIFETIME_PTR]]) > + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [3 x i64], [3 x i64]* > %[[BLOCK_SIZES6]], i64 0, i64 0 > + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 24, i8* > nonnull [[LIFETIME_PTR]]) > + // B32: %[[TMP:.*]] = getelementptr [3 x i32], [3 x i32]* > %[[BLOCK_SIZES6]], i32 0, i32 0 > + // B32: store i32 1, i32* %[[TMP]], align 4 > + // B32: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i32], [3 x i32]* > %[[BLOCK_SIZES6]], i32 0, i32 1 > + // B32: store i32 2, i32* %[[BLOCK_SIZES62]], align 4 > + // B32: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i32], [3 x i32]* > %[[BLOCK_SIZES6]], i32 0, i32 2 > + // B32: store i32 4, i32* %[[BLOCK_SIZES63]], align 4 > + // B64: %[[TMP:.*]] = getelementptr [3 x i64], [3 x i64]* > %[[BLOCK_SIZES6]], i32 0, i32 0 > + // B64: store i64 1, i64* %[[TMP]], align 8 > + // B64: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i64], [3 x i64]* > %[[BLOCK_SIZES6]], i32 0, i32 1 > + // B64: store i64 2, i64* %[[BLOCK_SIZES62]], align 8 > + // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], [3 x i64]* > %[[BLOCK_SIZES6]], i32 0, i32 2 > + // B64: store i64 4, i64* %[[BLOCK_SIZES63]], align 8 > // COMMON-LABEL: call i32 @__enqueue_kernel_varargs > // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} > [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > bitcast ({ i32, i32 } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 > addrspace(4)*), i32 3, > - // B32-SAME: i32* %[[TMP1]]) > - // B64-SAME: i64* %[[TMP1]]) > + // B32-SAME: i32* %[[TMP]]) > + // B64-SAME: i64* %[[TMP]]) > enqueue_kernel(default_queue, flags, ndrange, > ^(local void *p1, local void *p2, local void *p3) { > return; > @@ -223,18 +265,21 @@ kernel void device_side_enqueue(global i > // Emits global block literal [[BLG7]] and block kernel [[INVGK7]]. > // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** > %default_queue > // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > - // B32: %[[TMP:.*]] = alloca [1 x i32] > - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], > i32 0, i32 0 > - // B32: store i32 0, i32* %[[TMP1]], align 4 > - // B64: %[[TMP:.*]] = alloca [1 x i64] > - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], > i32 0, i32 0 > - // B64: store i64 4294967296, i64* %[[TMP1]], align 8 > + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > %[[BLOCK_SIZES7]] to i8* > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* > %[[BLOCK_SIZES7]], i64 0, i64 0 > + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( > + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* > nonnull [[LIFETIME_PTR]]) > + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > %[[BLOCK_SIZES7]], i32 0, i32 0 > + // B32: store i32 0, i32* %[[TMP]], align 4 > + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > %[[BLOCK_SIZES7]], i32 0, i32 0 > + // B64: store i64 4294967296, i64* %[[TMP]], align 8 > // COMMON-LABEL: call i32 @__enqueue_kernel_varargs > // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} > [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > bitcast ({ i32, i32 } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 > addrspace(4)*), i32 1, > - // B32-SAME: i32* %[[TMP1]]) > - // B64-SAME: i64* %[[TMP1]]) > + // B32-SAME: i32* %[[TMP]]) > + // B64-SAME: i64* %[[TMP]]) > enqueue_kernel(default_queue, flags, ndrange, > ^(local void *p) { > return; > > Added: cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl?rev=338899&view=auto > > ============================================================================== > --- cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl (added) > +++ cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl Fri > Aug 3 08:50:52 2018 > @@ -0,0 +1,31 @@ > +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn < %s > | FileCheck %s --check-prefixes=COMMON,AMDGPU > +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple > "spir-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR32 > +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple > "spir64-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR64 > +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -debug-info-kind=limited -emit-llvm > -o - -triple amdgcn < %s | FileCheck %s --check-prefixes=CHECK-DEBUG > + > +// Check that the enqueue_kernel array temporary is in the entry block to > avoid > +// a dynamic alloca > + > +typedef struct {int a;} ndrange_t; > + > +kernel void test(int i) { > +// COMMON-LABEL: define {{.*}} void @test > +// COMMON-LABEL: entry: > +// AMDGPU: %block_sizes = alloca [1 x i64] > +// SPIR32: %block_sizes = alloca [1 x i32] > +// SPIR64: %block_sizes = alloca [1 x i64] > +// COMMON-LABEL: if.then: > +// COMMON-NOT: alloca > +// CHECK-DEBUG: getelementptr {{.*}} %block_sizes, {{.*}} !dbg !34 > +// COMMON-LABEL: if.end > + queue_t default_queue; > + unsigned flags = 0; > + ndrange_t ndrange; > + if (i) > + enqueue_kernel(default_queue, flags, ndrange, ^(local void *a) { }, > 32); > +} > + > +// Check that the temporary is scoped to the `if` > + > +// CHECK-DEBUG: !32 = distinct !DILexicalBlock(scope: !7, file: !1, line: > 24) > +// CHECK-DEBUG: !34 = !DILocation(line: 25, scope: !32) > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits >
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits