Have you tried setting detect_stack_use_after_return in ASAN_OPTIONS? The ASan buildbot sets the following ASAN_OPTIONS prior to running tests: export ASAN_OPTIONS="check_initialization_order=true:detect_stack_use_after_return=1:detect_leaks=1"
On Mon, Aug 6, 2018 at 7:34 AM <sc...@scottlinder.com> wrote: > I can't seem to reproduce the ASan failure locally, even after building > a clang with the latest compiler-rt, and then rebuilding my patch with > LLVM_USE_SANITIZER=Address > > I am pretty confident the problem should be fixed with a one-line change > to my patch: > > - auto CreateArrayForSizeVar = [=](unsigned First) { > + auto CreateArrayForSizeVar = [=](unsigned First) > + -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> { > > I don't want to commit something and then immediately have to revert, > though. Can you think of anything I might be missing locally to > reproduce the ASan failure? > > Thanks, > Scott > > On 2018-08-03 13:48, Vlad Tsyrklevich wrote: > > This change is causing ASan failures on the sanitizer bots: > > > http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/21898/steps/check-clang%20asan/logs/stdio > > [9] > > > > I've reverted it in r338904. > > > > On Fri, Aug 3, 2018 at 8:51 AM Scott Linder via cfe-commits > > <cfe-commits@lists.llvm.org> wrote: > > > >> Author: scott.linder > >> Date: Fri Aug 3 08:50:52 2018 > >> New Revision: 338899 > >> > >> URL: http://llvm.org/viewvc/llvm-project?rev=338899&view=rev [1] > >> Log: > >> [OpenCL] Always emit alloca in entry block for enqueue_kernel > >> builtin > >> > >> Ensures the statically sized alloca is not converted to > >> DYNAMIC_STACKALLOC > >> later because it is not in the entry block. > >> > >> Differential Revision: https://reviews.llvm.org/D50104 [2] > >> > >> Added: > >> cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl > >> [3] > >> Modified: > >> cfe/trunk/lib/CodeGen/CGBuiltin.cpp > >> cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl [4] > >> > >> Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp > >> URL: > >> > > > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=338899&r1=338898&r2=338899&view=diff > >> [5] > >> > > > ============================================================================== > >> --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) > >> +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Aug 3 08:50:52 2018 > >> @@ -3338,23 +3338,29 @@ RValue CodeGenFunction::EmitBuiltinExpr( > >> // Create a temporary array to hold the sizes of local pointer > >> arguments > >> // for the block. \p First is the position of the first size > >> argument. > >> auto CreateArrayForSizeVar = [=](unsigned First) { > >> - auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); > >> - auto *Arr = Builder.CreateAlloca(AT); > >> - llvm::Value *Ptr; > >> + llvm::APInt ArraySize(32, NumArgs - First); > >> + QualType SizeArrayTy = getContext().getConstantArrayType( > >> + getContext().getSizeType(), ArraySize, ArrayType::Normal, > >> + /*IndexTypeQuals=*/0); > >> + auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); > >> + llvm::Value *TmpPtr = Tmp.getPointer(); > >> + llvm::Value *TmpSize = EmitLifetimeStart( > >> + > >> CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr); > >> + llvm::Value *ElemPtr; > >> // Each of the following arguments specifies the size of the > >> corresponding > >> // argument passed to the enqueued block. > >> auto *Zero = llvm::ConstantInt::get(IntTy, 0); > >> for (unsigned I = First; I < NumArgs; ++I) { > >> auto *Index = llvm::ConstantInt::get(IntTy, I - First); > >> - auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); > >> + auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index}); > >> if (I == First) > >> - Ptr = GEP; > >> + ElemPtr = GEP; > >> auto *V = > >> Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), > >> SizeTy); > >> Builder.CreateAlignedStore( > >> V, GEP, > >> CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); > >> } > >> - return Ptr; > >> + return std::tie(ElemPtr, TmpSize, TmpPtr); > >> }; > >> > >> // Could have events and/or varargs. > >> @@ -3366,24 +3372,27 @@ RValue CodeGenFunction::EmitBuiltinExpr( > >> llvm::Value *Kernel = > >> Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); > >> auto *Block = Builder.CreatePointerCast(Info.BlockArg, > >> GenericVoidPtrTy); > >> - auto *PtrToSizeArray = CreateArrayForSizeVar(4); > >> + llvm::Value *ElemPtr, *TmpSize, *TmpPtr; > >> + std::tie(ElemPtr, TmpSize, TmpPtr) = > >> CreateArrayForSizeVar(4); > >> > >> // Create a vector of the arguments, as well as a constant > >> value to > >> // express to the runtime the number of variadic arguments. > >> std::vector<llvm::Value *> Args = { > >> Queue, Flags, Range, > >> Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4), > >> - PtrToSizeArray}; > >> + ElemPtr}; > >> std::vector<llvm::Type *> ArgTys = { > >> - QueueTy, IntTy, RangeTy, > >> - GenericVoidPtrTy, GenericVoidPtrTy, IntTy, > >> - PtrToSizeArray->getType()}; > >> + QueueTy, IntTy, RangeTy, > >> GenericVoidPtrTy, > >> + GenericVoidPtrTy, IntTy, ElemPtr->getType()}; > >> > >> llvm::FunctionType *FTy = llvm::FunctionType::get( > >> Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); > >> - return RValue::get( > >> - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), > >> - llvm::ArrayRef<llvm::Value *>(Args))); > >> + auto Call = > >> + > >> RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), > >> + llvm::ArrayRef<llvm::Value > >> *>(Args))); > >> + if (TmpSize) > >> + EmitLifetimeEnd(TmpSize, TmpPtr); > >> + return Call; > >> } > >> // Any calls now have event arguments passed. > >> if (NumArgs >= 7) { > >> @@ -3430,15 +3439,19 @@ RValue CodeGenFunction::EmitBuiltinExpr( > >> ArgTys.push_back(Int32Ty); > >> Name = "__enqueue_kernel_events_varargs"; > >> > >> - auto *PtrToSizeArray = CreateArrayForSizeVar(7); > >> - Args.push_back(PtrToSizeArray); > >> - ArgTys.push_back(PtrToSizeArray->getType()); > >> + llvm::Value *ElemPtr, *TmpSize, *TmpPtr; > >> + std::tie(ElemPtr, TmpSize, TmpPtr) = > >> CreateArrayForSizeVar(7); > >> + Args.push_back(ElemPtr); > >> + ArgTys.push_back(ElemPtr->getType()); > >> > >> llvm::FunctionType *FTy = llvm::FunctionType::get( > >> Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); > >> - return RValue::get( > >> - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), > >> - llvm::ArrayRef<llvm::Value *>(Args))); > >> + auto Call = > >> + > >> RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), > >> + llvm::ArrayRef<llvm::Value > >> *>(Args))); > >> + if (TmpSize) > >> + EmitLifetimeEnd(TmpSize, TmpPtr); > >> + return Call; > >> } > >> LLVM_FALLTHROUGH; > >> } > >> > >> Modified: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl > >> [4] > >> URL: > >> > > > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=338899&r1=338898&r2=338899&view=diff > >> [6] > >> > > > ============================================================================== > >> --- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl [4] > >> (original) > >> +++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl [4] Fri > >> Aug 3 08:50:52 2018 > >> @@ -1,5 +1,6 @@ > >> // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 > >> -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s > >> --check-prefix=COMMON --check-prefix=B32 > >> // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 > >> -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s > >> --check-prefix=COMMON --check-prefix=B64 > >> +// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O1 > >> -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s > >> --check-prefix=CHECK-LIFETIMES > >> > >> #pragma OPENCL EXTENSION cl_khr_subgroups : enable > >> > >> @@ -46,8 +47,31 @@ kernel void device_side_enqueue(global i > >> // COMMON: %event_wait_list2 = alloca [1 x %opencl.clk_event_t*] > >> clk_event_t event_wait_list2[] = {clk_event}; > >> > >> - // Emits block literal on stack and block kernel [[INVLK1]]. > >> // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4 > >> + > >> + // B32: %[[BLOCK_SIZES1:.*]] = alloca [1 x i32] > >> + // B64: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64] > >> + // CHECK-LIFETIMES: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64] > >> + // B32: %[[BLOCK_SIZES2:.*]] = alloca [1 x i32] > >> + // B64: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64] > >> + // CHECK-LIFETIMES: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64] > >> + // B32: %[[BLOCK_SIZES3:.*]] = alloca [1 x i32] > >> + // B64: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64] > >> + // CHECK-LIFETIMES: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64] > >> + // B32: %[[BLOCK_SIZES4:.*]] = alloca [1 x i32] > >> + // B64: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64] > >> + // CHECK-LIFETIMES: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64] > >> + // B32: %[[BLOCK_SIZES5:.*]] = alloca [1 x i32] > >> + // B64: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64] > >> + // CHECK-LIFETIMES: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64] > >> + // B32: %[[BLOCK_SIZES6:.*]] = alloca [3 x i32] > >> + // B64: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64] > >> + // CHECK-LIFETIMES: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64] > >> + // B32: %[[BLOCK_SIZES7:.*]] = alloca [1 x i32] > >> + // B64: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64] > >> + // CHECK-LIFETIMES: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64] > >> + > >> + // Emits block literal on stack and block kernel [[INVLK1]]. > >> // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, > >> %opencl.queue_t{{.*}}** %default_queue > >> // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > >> // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, > >> i32, i32 addrspace(1)* }>* %block to void ()* > >> @@ -73,7 +97,6 @@ kernel void device_side_enqueue(global i > >> // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > >> %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* > >> addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* > >> addrspace(4)* [[EVNT]], > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast > >> ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > >> // COMMON-SAME: i8 addrspace(4)* [[BL_I8]]) > >> - > >> enqueue_kernel(default_queue, flags, ndrange, 2, > >> &event_wait_list, &clk_event, > >> ^(void) { > >> a[i] = b[i]; > >> @@ -82,39 +105,46 @@ kernel void device_side_enqueue(global i > >> // Emits global block literal [[BLG1]] and block kernel > >> [[INVGK1]]. > >> // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, > >> %opencl.queue_t{{.*}}** %default_queue > >> // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > >> - // B32: %[[TMP:.*]] = alloca [1 x i32] > >> - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[TMP]], i32 0, i32 0 > >> - // B32: store i32 256, i32* %[[TMP1]], align 4 > >> - // B64: %[[TMP:.*]] = alloca [1 x i64] > >> - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[TMP]], i32 0, i32 0 > >> - // B64: store i64 256, i64* %[[TMP1]], align 8 > >> + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > >> %[[BLOCK_SIZES1]] to i8* > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 > >> 8, i8* nonnull [[LIFETIME_PTR]]) > >> + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x > >> i64]* %[[BLOCK_SIZES1]], i64 0, i64 0 > >> + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, > >> i8* nonnull [[LIFETIME_PTR]]) > >> + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[BLOCK_SIZES1]], i32 0, i32 0 > >> + // B32: store i32 256, i32* %[[TMP]], align 4 > >> + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[BLOCK_SIZES1]], i32 0, i32 0 > >> + // B64: store i64 256, i64* %[[TMP]], align 8 > >> // COMMON-LABEL: call i32 @__enqueue_kernel_varargs( > >> // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > >> %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast > >> ({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > >> bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to > >> i8 addrspace(4)*), i32 1, > >> - // B32-SAME: i32* %[[TMP1]]) > >> - // B64-SAME: i64* %[[TMP1]]) > >> + // B32-SAME: i32* %[[TMP]]) > >> + // B64-SAME: i64* %[[TMP]]) > >> enqueue_kernel(default_queue, flags, ndrange, > >> ^(local void *p) { > >> return; > >> }, > >> 256); > >> + > >> char c; > >> // Emits global block literal [[BLG2]] and block kernel > >> [[INVGK2]]. > >> // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, > >> %opencl.queue_t{{.*}}** %default_queue > >> // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > >> - // B32: %[[TMP:.*]] = alloca [1 x i32] > >> - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[TMP]], i32 0, i32 0 > >> - // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 > >> - // B64: %[[TMP:.*]] = alloca [1 x i64] > >> - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[TMP]], i32 0, i32 0 > >> - // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8 > >> + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > >> %[[BLOCK_SIZES2]] to i8* > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 > >> 8, i8* nonnull [[LIFETIME_PTR]]) > >> + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x > >> i64]* %[[BLOCK_SIZES2]], i64 0, i64 0 > >> + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, > >> i8* nonnull [[LIFETIME_PTR]]) > >> + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[BLOCK_SIZES2]], i32 0, i32 0 > >> + // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 > >> + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[BLOCK_SIZES2]], i32 0, i32 0 > >> + // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 > >> // COMMON-LABEL: call i32 @__enqueue_kernel_varargs( > >> // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > >> %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast > >> ({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > >> bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to > >> i8 addrspace(4)*), i32 1, > >> - // B32-SAME: i32* %[[TMP1]]) > >> - // B64-SAME: i64* %[[TMP1]]) > >> + // B32-SAME: i32* %[[TMP]]) > >> + // B64-SAME: i64* %[[TMP]]) > >> enqueue_kernel(default_queue, flags, ndrange, > >> ^(local void *p) { > >> return; > >> @@ -127,18 +157,21 @@ kernel void device_side_enqueue(global i > >> // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x > >> %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* > >> %event_wait_list2, i32 0, i32 0 > >> // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast > >> %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* > >> addrspace(4)* > >> // COMMON: [[EVNT:%[0-9]+]] = addrspacecast > >> %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* > >> addrspace(4)* > >> - // B32: %[[TMP:.*]] = alloca [1 x i32] > >> - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[TMP]], i32 0, i32 0 > >> - // B32: store i32 256, i32* %[[TMP1]], align 4 > >> - // B64: %[[TMP:.*]] = alloca [1 x i64] > >> - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[TMP]], i32 0, i32 0 > >> - // B64: store i64 256, i64* %[[TMP1]], align 8 > >> + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > >> %[[BLOCK_SIZES3]] to i8* > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 > >> 8, i8* nonnull [[LIFETIME_PTR]]) > >> + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x > >> i64]* %[[BLOCK_SIZES3]], i64 0, i64 0 > >> + // CHECK-LIFETIMES-LABEL: call i32 > >> @__enqueue_kernel_events_varargs( > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, > >> i8* nonnull [[LIFETIME_PTR]]) > >> + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[BLOCK_SIZES3]], i32 0, i32 0 > >> + // B32: store i32 256, i32* %[[TMP]], align 4 > >> + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[BLOCK_SIZES3]], i32 0, i32 0 > >> + // B64: store i64 256, i64* %[[TMP]], align 8 > >> // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs > >> // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > >> %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} > >> [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast > >> ({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > >> bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to > >> i8 addrspace(4)*), i32 1, > >> - // B32-SAME: i32* %[[TMP1]]) > >> - // B64-SAME: i64* %[[TMP1]]) > >> + // B32-SAME: i32* %[[TMP]]) > >> + // B64-SAME: i64* %[[TMP]]) > >> enqueue_kernel(default_queue, flags, ndrange, 2, > >> event_wait_list2, &clk_event, > >> ^(local void *p) { > >> return; > >> @@ -151,18 +184,21 @@ kernel void device_side_enqueue(global i > >> // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x > >> %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* > >> %event_wait_list2, i32 0, i32 0 > >> // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast > >> %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* > >> addrspace(4)* > >> // COMMON: [[EVNT:%[0-9]+]] = addrspacecast > >> %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* > >> addrspace(4)* > >> - // B32: %[[TMP:.*]] = alloca [1 x i32] > >> - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[TMP]], i32 0, i32 0 > >> - // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 > >> - // B64: %[[TMP:.*]] = alloca [1 x i64] > >> - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[TMP]], i32 0, i32 0 > >> - // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8 > >> + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > >> %[[BLOCK_SIZES4]] to i8* > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 > >> 8, i8* nonnull [[LIFETIME_PTR]]) > >> + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x > >> i64]* %[[BLOCK_SIZES4]], i64 0, i64 0 > >> + // CHECK-LIFETIMES-LABEL: call i32 > >> @__enqueue_kernel_events_varargs( > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, > >> i8* nonnull [[LIFETIME_PTR]]) > >> + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[BLOCK_SIZES4]], i32 0, i32 0 > >> + // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 > >> + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[BLOCK_SIZES4]], i32 0, i32 0 > >> + // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 > >> // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs > >> // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > >> %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* > >> addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* > >> addrspace(4)* [[EVNT]], > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast > >> ({{.*}} [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > >> bitcast ({ i32, i32 } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to > >> i8 addrspace(4)*), i32 1, > >> - // B32-SAME: i32* %[[TMP1]]) > >> - // B64-SAME: i64* %[[TMP1]]) > >> + // B32-SAME: i32* %[[TMP]]) > >> + // B64-SAME: i64* %[[TMP]]) > >> enqueue_kernel(default_queue, flags, ndrange, 2, > >> event_wait_list2, &clk_event, > >> ^(local void *p) { > >> return; > >> @@ -173,18 +209,21 @@ kernel void device_side_enqueue(global i > >> // Emits global block literal [[BLG5]] and block kernel > >> [[INVGK5]]. > >> // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, > >> %opencl.queue_t{{.*}}** %default_queue > >> // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > >> - // B32: %[[TMP:.*]] = alloca [1 x i32] > >> - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[TMP]], i32 0, i32 0 > >> - // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 > >> - // B64: %[[TMP:.*]] = alloca [1 x i64] > >> - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[TMP]], i32 0, i32 0 > >> - // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8 > >> + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > >> %[[BLOCK_SIZES5]] to i8* > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 > >> 8, i8* nonnull [[LIFETIME_PTR]]) > >> + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x > >> i64]* %[[BLOCK_SIZES5]], i64 0, i64 0 > >> + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, > >> i8* nonnull [[LIFETIME_PTR]]) > >> + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[BLOCK_SIZES5]], i32 0, i32 0 > >> + // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 > >> + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[BLOCK_SIZES5]], i32 0, i32 0 > >> + // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 > >> // COMMON-LABEL: call i32 @__enqueue_kernel_varargs > >> // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > >> %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast > >> ({{.*}} [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > >> bitcast ({ i32, i32 } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to > >> i8 addrspace(4)*), i32 1, > >> - // B32-SAME: i32* %[[TMP1]]) > >> - // B64-SAME: i64* %[[TMP1]]) > >> + // B32-SAME: i32* %[[TMP]]) > >> + // B64-SAME: i64* %[[TMP]]) > >> enqueue_kernel(default_queue, flags, ndrange, > >> ^(local void *p) { > >> return; > >> @@ -194,26 +233,29 @@ kernel void device_side_enqueue(global i > >> // Emits global block literal [[BLG6]] and block kernel > >> [[INVGK6]]. > >> // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, > >> %opencl.queue_t{{.*}}** %default_queue > >> // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > >> - // B32: %[[TMP:.*]] = alloca [3 x i32] > >> - // B32: %[[TMP1:.*]] = getelementptr [3 x i32], [3 x i32]* > >> %[[TMP]], i32 0, i32 0 > >> - // B32: store i32 1, i32* %[[TMP1]], align 4 > >> - // B32: %[[TMP2:.*]] = getelementptr [3 x i32], [3 x i32]* > >> %[[TMP]], i32 0, i32 1 > >> - // B32: store i32 2, i32* %[[TMP2]], align 4 > >> - // B32: %[[TMP3:.*]] = getelementptr [3 x i32], [3 x i32]* > >> %[[TMP]], i32 0, i32 2 > >> - // B32: store i32 4, i32* %[[TMP3]], align 4 > >> - // B64: %[[TMP:.*]] = alloca [3 x i64] > >> - // B64: %[[TMP1:.*]] = getelementptr [3 x i64], [3 x i64]* > >> %[[TMP]], i32 0, i32 0 > >> - // B64: store i64 1, i64* %[[TMP1]], align 8 > >> - // B64: %[[TMP2:.*]] = getelementptr [3 x i64], [3 x i64]* > >> %[[TMP]], i32 0, i32 1 > >> - // B64: store i64 2, i64* %[[TMP2]], align 8 > >> - // B64: %[[TMP3:.*]] = getelementptr [3 x i64], [3 x i64]* > >> %[[TMP]], i32 0, i32 2 > >> - // B64: store i64 4, i64* %[[TMP3]], align 8 > >> + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [3 x i64]* > >> %[[BLOCK_SIZES6]] to i8* > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 > >> 24, i8* nonnull [[LIFETIME_PTR]]) > >> + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [3 x i64], [3 x > >> i64]* %[[BLOCK_SIZES6]], i64 0, i64 0 > >> + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 > >> 24, i8* nonnull [[LIFETIME_PTR]]) > >> + // B32: %[[TMP:.*]] = getelementptr [3 x i32], [3 x i32]* > >> %[[BLOCK_SIZES6]], i32 0, i32 0 > >> + // B32: store i32 1, i32* %[[TMP]], align 4 > >> + // B32: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i32], [3 x > >> i32]* %[[BLOCK_SIZES6]], i32 0, i32 1 > >> + // B32: store i32 2, i32* %[[BLOCK_SIZES62]], align 4 > >> + // B32: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i32], [3 x > >> i32]* %[[BLOCK_SIZES6]], i32 0, i32 2 > >> + // B32: store i32 4, i32* %[[BLOCK_SIZES63]], align 4 > >> + // B64: %[[TMP:.*]] = getelementptr [3 x i64], [3 x i64]* > >> %[[BLOCK_SIZES6]], i32 0, i32 0 > >> + // B64: store i64 1, i64* %[[TMP]], align 8 > >> + // B64: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i64], [3 x > >> i64]* %[[BLOCK_SIZES6]], i32 0, i32 1 > >> + // B64: store i64 2, i64* %[[BLOCK_SIZES62]], align 8 > >> + // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], [3 x > >> i64]* %[[BLOCK_SIZES6]], i32 0, i32 2 > >> + // B64: store i64 4, i64* %[[BLOCK_SIZES63]], align 8 > >> // COMMON-LABEL: call i32 @__enqueue_kernel_varargs > >> // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > >> %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast > >> ({{.*}} [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > >> bitcast ({ i32, i32 } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to > >> i8 addrspace(4)*), i32 3, > >> - // B32-SAME: i32* %[[TMP1]]) > >> - // B64-SAME: i64* %[[TMP1]]) > >> + // B32-SAME: i32* %[[TMP]]) > >> + // B64-SAME: i64* %[[TMP]]) > >> enqueue_kernel(default_queue, flags, ndrange, > >> ^(local void *p1, local void *p2, local void *p3) > >> { > >> return; > >> @@ -223,18 +265,21 @@ kernel void device_side_enqueue(global i > >> // Emits global block literal [[BLG7]] and block kernel > >> [[INVGK7]]. > >> // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, > >> %opencl.queue_t** %default_queue > >> // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags > >> - // B32: %[[TMP:.*]] = alloca [1 x i32] > >> - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[TMP]], i32 0, i32 0 > >> - // B32: store i32 0, i32* %[[TMP1]], align 4 > >> - // B64: %[[TMP:.*]] = alloca [1 x i64] > >> - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[TMP]], i32 0, i32 0 > >> - // B64: store i64 4294967296, i64* %[[TMP1]], align 8 > >> + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* > >> %[[BLOCK_SIZES7]] to i8* > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 > >> 8, i8* nonnull [[LIFETIME_PTR]]) > >> + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x > >> i64]* %[[BLOCK_SIZES7]], i64 0, i64 0 > >> + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( > >> + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, > >> i8* nonnull [[LIFETIME_PTR]]) > >> + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* > >> %[[BLOCK_SIZES7]], i32 0, i32 0 > >> + // B32: store i32 0, i32* %[[TMP]], align 4 > >> + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* > >> %[[BLOCK_SIZES7]], i32 0, i32 0 > >> + // B64: store i64 4294967296, i64* %[[TMP]], align 8 > >> // COMMON-LABEL: call i32 @__enqueue_kernel_varargs > >> // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], > >> %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast > >> ({{.*}} [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), > >> // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* > >> bitcast ({ i32, i32 } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to > >> i8 addrspace(4)*), i32 1, > >> - // B32-SAME: i32* %[[TMP1]]) > >> - // B64-SAME: i64* %[[TMP1]]) > >> + // B32-SAME: i32* %[[TMP]]) > >> + // B64-SAME: i64* %[[TMP]]) > >> enqueue_kernel(default_queue, flags, ndrange, > >> ^(local void *p) { > >> return; > >> > >> Added: > >> cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl [3] > >> URL: > >> > > > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl?rev=338899&view=auto > >> [7] > >> > > > ============================================================================== > >> --- cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl > >> [3] (added) > >> +++ cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl > >> [3] Fri Aug 3 08:50:52 2018 > >> @@ -0,0 +1,31 @@ > >> +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn > >> < %s | FileCheck %s --check-prefixes=COMMON,AMDGPU > >> +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple > >> "spir-unknown-unknown" < %s | FileCheck %s > >> --check-prefixes=COMMON,SPIR32 > >> +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple > >> "spir64-unknown-unknown" < %s | FileCheck %s > >> --check-prefixes=COMMON,SPIR64 > >> +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -debug-info-kind=limited > >> -emit-llvm -o - -triple amdgcn < %s | FileCheck %s > >> --check-prefixes=CHECK-DEBUG > >> + > >> +// Check that the enqueue_kernel array temporary is in the entry > >> block to avoid > >> +// a dynamic alloca > >> + > >> +typedef struct {int a;} ndrange_t; > >> + > >> +kernel void test(int i) { > >> +// COMMON-LABEL: define {{.*}} void @test > >> +// COMMON-LABEL: entry: > >> +// AMDGPU: %block_sizes = alloca [1 x i64] > >> +// SPIR32: %block_sizes = alloca [1 x i32] > >> +// SPIR64: %block_sizes = alloca [1 x i64] > >> +// COMMON-LABEL: if.then: > >> +// COMMON-NOT: alloca > >> +// CHECK-DEBUG: getelementptr {{.*}} %block_sizes, {{.*}} !dbg !34 > >> +// COMMON-LABEL: if.end > >> + queue_t default_queue; > >> + unsigned flags = 0; > >> + ndrange_t ndrange; > >> + if (i) > >> + enqueue_kernel(default_queue, flags, ndrange, ^(local void *a) > >> { }, 32); > >> +} > >> + > >> +// Check that the temporary is scoped to the `if` > >> + > >> +// CHECK-DEBUG: !32 = distinct !DILexicalBlock(scope: !7, file: !1, > >> line: 24) > >> +// CHECK-DEBUG: !34 = !DILocation(line: 25, scope: !32) > >> > >> _______________________________________________ > >> cfe-commits mailing list > >> cfe-commits@lists.llvm.org > >> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits [8] > > > > > > Links: > > ------ > > [1] http://llvm.org/viewvc/llvm-project?rev=338899&view=rev > > [2] https://reviews.llvm.org/D50104 > > [3] http://enqueue-kernel-non-entry-block.cl > > [4] http://cl20-device-side-enqueue.cl > > [5] > > > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=338899&r1=338898&r2=338899&view=diff > > [6] > > > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=338899&r1=338898&r2=338899&view=diff > > [7] > > > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl?rev=338899&view=auto > > [8] http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits > > [9] > > > http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/21898/steps/check-clang%20asan/logs/stdio >
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits