https://github.com/Leporacanthicus updated https://github.com/llvm/llvm-project/pull/92430
>From 629f5785738fdc52d4dc8d193aa43b3d011b1039 Mon Sep 17 00:00:00 2001 From: Mats Petersson <mats.peters...@arm.com> Date: Fri, 19 Apr 2024 18:00:58 +0100 Subject: [PATCH 1/6] Fix for changed code at the end of AllocaIP. Some of the OpenMP code can change the instruction pointed at by the insertion point. This leads to an assert in the compiler about BB->getParent() and IP->getParent() not matching or something like that. The fix is to rebuild the insertionpoint from the block, rather than use builder.restoreIP. Also, move some of the alloca generation, rather than skipping back and forth between insert points (and ensure all the allocas are done before their users are created). A simple test, mainly to ensure the minimal reproducer doesn't fail to compile in the future is also added. --- .../OpenMP/parallel-reduction-allocate.f90 | 23 +++++++++++++++++++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 11 ++++++--- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 22 +++++++++++------- 3 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 flang/test/Lower/OpenMP/parallel-reduction-allocate.f90 diff --git a/flang/test/Lower/OpenMP/parallel-reduction-allocate.f90 b/flang/test/Lower/OpenMP/parallel-reduction-allocate.f90 new file mode 100644 index 0000000000000..fddce25ae22cc --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-reduction-allocate.f90 @@ -0,0 +1,23 @@ +!! The main point of this test is to check that the code compiles at all, so the +!! checking is not very detailed. Not hitting an assert, crashing or otherwise failing +!! to compile is the key point. Also, emitting llvm is required for this to happen. +! RUN: %flang_fc1 -emit-llvm -fopenmp -o - %s 2>&1 | FileCheck %s +subroutine proc + implicit none + real(8),allocatable :: F(:) + real(8),allocatable :: A(:) + +!$omp parallel private(A) reduction(+:F) + allocate(A(10)) +!$omp end parallel +end subroutine proc + +!CHECK-LABEL: define void @proc_() +!CHECK: call void +!CHECK-SAME: @__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @[[OMP_PAR:.*]], {{.*}}) + +!CHECK: define internal void @[[OMP_PAR]] +!CHECK: omp.par.region8: +!CHECK-NEXT: call ptr @malloc +!CHECK-SAME: i64 10 + diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 2c4b45255d059..d423b545a1faf 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1391,7 +1391,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( // Change the location to the outer alloca insertion point to create and // initialize the allocas we pass into the parallel region. - Builder.restoreIP(OuterAllocaIP); + InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->begin()); + Builder.restoreIP(NewOuter); AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); AllocaInst *ZeroAddrAlloca = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); @@ -2155,7 +2156,8 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc, // values. unsigned NumReductions = ReductionInfos.size(); Type *RedArrayTy = ArrayType::get(Builder.getPtrTy(), NumReductions); - Builder.restoreIP(AllocaIP); + Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); + // Builder.restoreIP(AllocaIP); Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array"); Builder.SetInsertPoint(InsertBlock, InsertBlock->end()); @@ -2556,7 +2558,10 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); // Allocate space for computed loop bounds as expected by the "init" function. - Builder.restoreIP(AllocaIP); + + // Builder.restoreIP(AllocaIP); + Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); + Type *I32Type = Type::getInt32Ty(M.getContext()); Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6ec4c120c11ea..47b07248ba84d 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1154,6 +1154,17 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, MutableArrayRef<BlockArgument> reductionArgs = opInst.getRegion().getArguments().take_back( opInst.getNumReductionVars()); + + SmallVector<llvm::Value *> byRefVars; + if (isByRef) { + for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { + // Allocate reduction variable (which is a pointer to the real reduciton + // variable allocated in the inlined region) + byRefVars.push_back(builder.CreateAlloca( + moduleTranslation.convertType(reductionDecls[i].getType()))); + } + } + for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { SmallVector<llvm::Value *> phis; @@ -1166,18 +1177,13 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, assert(phis.size() == 1 && "expected one value to be yielded from the " "reduction neutral element declaration region"); - builder.restoreIP(allocaIP); - if (isByRef[i]) { - // Allocate reduction variable (which is a pointer to the real reduciton - // variable allocated in the inlined region) - llvm::Value *var = builder.CreateAlloca( - moduleTranslation.convertType(reductionDecls[i].getType())); + if (isByRef) { // Store the result of the inlined region to the allocated reduction var // ptr - builder.CreateStore(phis[0], var); + builder.CreateStore(phis[0], byRefVars[i]); - privateReductionVariables.push_back(var); + privateReductionVariables.push_back(byRefVars[i]); moduleTranslation.mapValue(reductionArgs[i], phis[0]); reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]); } else { >From 5cd1a43f7ce7a0fe4e3271fecde1226f1e6f8758 Mon Sep 17 00:00:00 2001 From: Mats Petersson <mats.peters...@arm.com> Date: Wed, 29 May 2024 17:41:10 +0100 Subject: [PATCH 2/6] Make it work for both MLIR and Flang tests --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 5 +---- .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 9 ++++++++- .../Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir | 4 ++-- mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir | 8 +++++--- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index d423b545a1faf..60470d7f4b09a 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2157,10 +2157,9 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc, unsigned NumReductions = ReductionInfos.size(); Type *RedArrayTy = ArrayType::get(Builder.getPtrTy(), NumReductions); Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); - // Builder.restoreIP(AllocaIP); Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array"); - Builder.SetInsertPoint(InsertBlock, InsertBlock->end()); + Builder.SetInsertPoint(InsertBlock, InsertBlock->begin()); for (auto En : enumerate(ReductionInfos)) { unsigned Index = En.index(); @@ -2558,8 +2557,6 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); // Allocate space for computed loop bounds as expected by the "init" function. - - // Builder.restoreIP(AllocaIP); Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); Type *I32Type = Type::getInt32Ty(M.getContext()); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 47b07248ba84d..5617f9f48f315 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1155,6 +1155,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, opInst.getRegion().getArguments().take_back( opInst.getNumReductionVars()); + llvm::BasicBlock *initBlock = nullptr; SmallVector<llvm::Value *> byRefVars; if (isByRef) { for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { @@ -1163,6 +1164,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, byRefVars.push_back(builder.CreateAlloca( moduleTranslation.convertType(reductionDecls[i].getType()))); } + + initBlock = splitBB(builder, true, "omp.reduction.init"); + allocaIP = InsertPointTy(allocaIP.getBlock(), allocaIP.getBlock()->end()); } for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { @@ -1177,7 +1181,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, assert(phis.size() == 1 && "expected one value to be yielded from the " "reduction neutral element declaration region"); - + if (initBlock) + builder.SetInsertPoint(initBlock->getTerminator()); + else + builder.restoreIP(allocaIP); if (isByRef) { // Store the result of the inlined region to the allocated reduction var // ptr diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir index 84a487cb8c98f..8afa89f1d8368 100644 --- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir +++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir @@ -55,11 +55,11 @@ // Private reduction variable and its initialization. // CHECK: %tid.addr.local = alloca i32 -// CHECK: %[[MALLOC_I:.+]] = call ptr @malloc(i64 4) // CHECK: %[[PRIV_PTR_I:.+]] = alloca ptr +// CHECK: %[[PRIV_PTR_J:.+]] = alloca ptr +// CHECK: %[[MALLOC_I:.+]] = call ptr @malloc(i64 4) // CHECK: store ptr %[[MALLOC_I]], ptr %[[PRIV_PTR_I]] // CHECK: %[[MALLOC_J:.+]] = call ptr @malloc(i64 4) -// CHECK: %[[PRIV_PTR_J:.+]] = alloca ptr // CHECK: store ptr %[[MALLOC_J]], ptr %[[PRIV_PTR_J]] // Call to the reduction function. diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir index f4b77cbf413d4..361905f7cddeb 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir @@ -59,13 +59,15 @@ module { // CHECK: %[[VAL_17:.*]] = load i32, ptr %[[VAL_18:.*]], align 4 // CHECK: store i32 %[[VAL_17]], ptr %[[VAL_16]], align 4 // CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_16]], align 4 -// CHECK: %[[VAL_20:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_13]], align 8 // CHECK: %[[VAL_21:.*]] = alloca ptr, align 8 +// CHECK: %[[VAL_23:.*]] = alloca ptr, align 8 +// CHECK: %[[VAL_20:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_13]], align 8 +// CHECK: %[[VAL_24:.*]] = alloca [2 x ptr], align 8 +// CHECK: br label %[[INIT_LABEL:.*]] +// CHECK: [[INIT_LABEL]]: // CHECK: store ptr %[[VAL_13]], ptr %[[VAL_21]], align 8 // CHECK: %[[VAL_22:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_15]], align 8 -// CHECK: %[[VAL_23:.*]] = alloca ptr, align 8 // CHECK: store ptr %[[VAL_15]], ptr %[[VAL_23]], align 8 -// CHECK: %[[VAL_24:.*]] = alloca [2 x ptr], align 8 // CHECK: br label %[[VAL_25:.*]] // CHECK: omp.par.region: ; preds = %[[VAL_26:.*]] // CHECK: br label %[[VAL_27:.*]] >From 8bd75966ed61ca66cde09b7d5a932737de195a5a Mon Sep 17 00:00:00 2001 From: Mats Petersson <mats.peters...@arm.com> Date: Thu, 30 May 2024 13:54:13 +0100 Subject: [PATCH 3/6] Review fixes --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 ++-- .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 15 +++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 60470d7f4b09a..e279ebad0ac1f 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2159,7 +2159,7 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc, Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array"); - Builder.SetInsertPoint(InsertBlock, InsertBlock->begin()); + Builder.SetInsertPoint(InsertBlock, InsertBlock->end()); for (auto En : enumerate(ReductionInfos)) { unsigned Index = En.index(); @@ -3120,7 +3120,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this); // Allocate space for computed loop bounds as expected by the "init" function. - Builder.restoreIP(AllocaIP); + Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); Type *I32Type = Type::getInt32Ty(M.getContext()); Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 5617f9f48f315..a740b81503ba6 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -770,7 +770,7 @@ static void allocByValReductionVars( DenseMap<Value, llvm::Value *> &reductionVariableMap, llvm::ArrayRef<bool> isByRefs) { llvm::IRBuilderBase::InsertPointGuard guard(builder); - builder.restoreIP(allocaIP); + builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); auto args = loop.getRegion().getArguments().take_back(loop.getNumReductionVars()); @@ -1155,7 +1155,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, opInst.getRegion().getArguments().take_back( opInst.getNumReductionVars()); - llvm::BasicBlock *initBlock = nullptr; + llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init"); + allocaIP = + InsertPointTy(allocaIP.getBlock(), + allocaIP.getBlock()->getTerminator()->getIterator()); SmallVector<llvm::Value *> byRefVars; if (isByRef) { for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { @@ -1165,8 +1168,6 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, moduleTranslation.convertType(reductionDecls[i].getType()))); } - initBlock = splitBB(builder, true, "omp.reduction.init"); - allocaIP = InsertPointTy(allocaIP.getBlock(), allocaIP.getBlock()->end()); } for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { @@ -1181,10 +1182,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, assert(phis.size() == 1 && "expected one value to be yielded from the " "reduction neutral element declaration region"); - if (initBlock) - builder.SetInsertPoint(initBlock->getTerminator()); - else - builder.restoreIP(allocaIP); + builder.SetInsertPoint(initBlock->getTerminator()); + if (isByRef) { // Store the result of the inlined region to the allocated reduction var // ptr >From a07d09a48efd8ff4ec6eb93a3c5666e971443b82 Mon Sep 17 00:00:00 2001 From: Mats Petersson <mats.peters...@arm.com> Date: Tue, 4 Jun 2024 15:26:54 +0100 Subject: [PATCH 4/6] Fix bugs --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index a740b81503ba6..8927d11f02c5b 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -780,7 +780,7 @@ static void allocByValReductionVars( llvm::Value *var = builder.CreateAlloca( moduleTranslation.convertType(reductionDecls[i].getType())); moduleTranslation.mapValue(args[i], var); - privateReductionVariables.push_back(var); + privateReductionVariables[i] = var; reductionVariableMap.try_emplace(loop.getReductionVars()[i], var); } } @@ -911,7 +911,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); - SmallVector<llvm::Value *> privateReductionVariables; + SmallVector<llvm::Value *> privateReductionVariables( + wsloopOp.getNumReductionVars()); DenseMap<Value, llvm::Value *> reductionVariableMap; allocByValReductionVars(wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls, privateReductionVariables, @@ -942,7 +943,7 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, // ptr builder.CreateStore(phis[0], var); - privateReductionVariables.push_back(var); + privateReductionVariables[i] = var; moduleTranslation.mapValue(reductionArgs[i], phis[0]); reductionVariableMap.try_emplace(wsloopOp.getReductionVars()[i], phis[0]); } else { @@ -1140,7 +1141,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // Collect reduction declarations SmallVector<omp::DeclareReductionOp> reductionDecls; collectReductionDecls(opInst, reductionDecls); - SmallVector<llvm::Value *> privateReductionVariables; + SmallVector<llvm::Value *> privateReductionVariables( + opInst.getNumReductionVars()); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // Allocate reduction vars @@ -1159,15 +1161,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, allocaIP = InsertPointTy(allocaIP.getBlock(), allocaIP.getBlock()->getTerminator()->getIterator()); - SmallVector<llvm::Value *> byRefVars; - if (isByRef) { - for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { + SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars()); + for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { + if (isByRef[i]) { // Allocate reduction variable (which is a pointer to the real reduciton // variable allocated in the inlined region) - byRefVars.push_back(builder.CreateAlloca( - moduleTranslation.convertType(reductionDecls[i].getType()))); + byRefVars[i] = builder.CreateAlloca( + moduleTranslation.convertType(reductionDecls[i].getType())); } - } for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { @@ -1184,12 +1185,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, "reduction neutral element declaration region"); builder.SetInsertPoint(initBlock->getTerminator()); - if (isByRef) { + if (isByRef[i]) { // Store the result of the inlined region to the allocated reduction var // ptr builder.CreateStore(phis[0], byRefVars[i]); - privateReductionVariables.push_back(byRefVars[i]); + privateReductionVariables[i] = byRefVars[i]; moduleTranslation.mapValue(reductionArgs[i], phis[0]); reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]); } else { >From 8663c66fbd83b05354e8ac7fb711b05e1316a97d Mon Sep 17 00:00:00 2001 From: Mats Petersson <mats.peters...@arm.com> Date: Wed, 5 Jun 2024 19:48:24 +0100 Subject: [PATCH 5/6] Rename and improve test Add a second reduction of a by_val type, so that we can test for the most recent bug found. Also, since the test is now different, rename it. This test is going all the way to LLVM-IR to ensure the whole codegeneration works here. --- .../OpenMP/parallel-reduction-allocate.f90 | 23 --------- .../Lower/OpenMP/parallel-reduction-mixed.f90 | 48 +++++++++++++++++++ 2 files changed, 48 insertions(+), 23 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/parallel-reduction-allocate.f90 create mode 100644 flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 diff --git a/flang/test/Lower/OpenMP/parallel-reduction-allocate.f90 b/flang/test/Lower/OpenMP/parallel-reduction-allocate.f90 deleted file mode 100644 index fddce25ae22cc..0000000000000 --- a/flang/test/Lower/OpenMP/parallel-reduction-allocate.f90 +++ /dev/null @@ -1,23 +0,0 @@ -!! The main point of this test is to check that the code compiles at all, so the -!! checking is not very detailed. Not hitting an assert, crashing or otherwise failing -!! to compile is the key point. Also, emitting llvm is required for this to happen. -! RUN: %flang_fc1 -emit-llvm -fopenmp -o - %s 2>&1 | FileCheck %s -subroutine proc - implicit none - real(8),allocatable :: F(:) - real(8),allocatable :: A(:) - -!$omp parallel private(A) reduction(+:F) - allocate(A(10)) -!$omp end parallel -end subroutine proc - -!CHECK-LABEL: define void @proc_() -!CHECK: call void -!CHECK-SAME: @__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @[[OMP_PAR:.*]], {{.*}}) - -!CHECK: define internal void @[[OMP_PAR]] -!CHECK: omp.par.region8: -!CHECK-NEXT: call ptr @malloc -!CHECK-SAME: i64 10 - diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 new file mode 100644 index 0000000000000..ea04d3d1dfa69 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 @@ -0,0 +1,48 @@ +!! Make sure that mixture of by-ref and by-val reductions work all the way +!! to LLVM-IR code. +! RUN: %flang_fc1 -emit-llvm -fopenmp -o - %s 2>&1 | FileCheck %s +subroutine proc + implicit none + real(8),allocatable :: F(:) + real(8),allocatable :: A(:) + + integer :: I + +!$omp parallel private(A) reduction(+:F,I) + allocate(A(10)) +!$omp end parallel +end subroutine proc + +!CHECK-LABEL: define void @proc_() +!CHECK: call void +!CHECK-SAME: @__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @[[OMP_PAR:.*]], {{.*}}) + +!CHECK: define internal void @[[OMP_PAR]](ptr {{.*}} %[[TID_ADDR:.*]], ptr noalias +!CHECK: %[[TID_LOCAL:.*]] = alloca i32 +!CHECK: %[[TID:.*]] = load i32, ptr %[[TID_ADDR]] +!CHECK: store i32 %[[TID]], ptr %[[TID_LOCAL]] +!CHECK: %[[I_priv:.*]] = alloca i32 +!CHECK: %[[F_priv:.*]] = alloca ptr + +!CHECK: omp.reduction.init: +!CHECK: store ptr %{{.*}}, ptr %[[F_priv]] +!CHECK: store i32 0, ptr %[[I_priv]] + +!CHECK: omp.par.region8: +!CHECK-NEXT: call ptr @malloc +!CHECK-SAME: i64 10 + +!CHECK: %[[RED_ARR_0:.*]] = getelementptr inbounds [2 x ptr], ptr %red.array, i64 0, i64 0 +!CHECK: store ptr %[[F_priv]], ptr %[[RED_ARR_0:.*]] +!CHECK: %[[RED_ARR_1:.*]] = getelementptr inbounds [2 x ptr], ptr %red.array, i64 0, i64 1 +!CHECK: store ptr %[[I_priv]], ptr %[[RED_ARR_1]] + +!CHECK: omp.par.pre_finalize: ; preds = %reduce.finalize +!CHECK: %{{.*}} = load ptr, ptr %[[F_priv]] +!CHECK: br label %omp.reduction.cleanup + +!CHECK: omp.reduction.cleanup: +!CHECK: br i1 %{{.*}}, label %[[OMP_FREE:.*]], label %{{.*}} + +!CHECK: [[OMP_FREE]]: +!CHECK: call void @free >From 243e5fa19528b0f582461ea47b721b3c8049dace Mon Sep 17 00:00:00 2001 From: Mats Petersson <mats.peters...@arm.com> Date: Thu, 6 Jun 2024 16:59:29 +0100 Subject: [PATCH 6/6] Select different place for allocas Also fix some tests, as this moves some allocas to an earlier location. --- .../OpenMP/irbuilder_nested_parallel_for.c | 144 +++++++++--------- clang/test/OpenMP/nested_loop_codegen.cpp | 32 ++-- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 +- 3 files changed, 90 insertions(+), 90 deletions(-) diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c index d737ab33e9ca4..a73eb963f710f 100644 --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -88,6 +88,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-LABEL: define {{[^@]+}}@_Z14parallel_for_0v..omp_par // CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: omp.par.entry: +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -96,10 +100,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK: omp.par.region: // CHECK-NEXT: store i32 0, ptr [[I]], align 4 @@ -286,6 +286,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL8:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR2]], align 4 // CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL8]], align 4 @@ -294,10 +298,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED12:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION5:%.*]] // CHECK: omp.par.region5: // CHECK-NEXT: store i32 0, ptr [[I]], align 4 @@ -508,6 +508,14 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 // CHECK-NEXT: [[STRUCTARG214:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -516,18 +524,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[I160:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK: omp.par.region: // CHECK-NEXT: store i32 0, ptr [[I]], align 4 @@ -658,6 +658,18 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 // CHECK-NEXT: [[STRUCTARG209:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL12:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR6]], align 4 // CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL12]], align 4 @@ -666,26 +678,14 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED17:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED18:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR19:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[I75:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[AGG_CAPTURED76:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED77:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR78:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[I135:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION9:%.*]] // CHECK: omp.par.region9: // CHECK-NEXT: store i32 0, ptr [[I16]], align 4 @@ -875,6 +875,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL106:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR100]], align 4 // CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL106]], align 4 @@ -883,10 +887,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION103:%.*]] // CHECK: omp.par.region103: // CHECK-NEXT: store i32 0, ptr [[I110]], align 4 @@ -954,6 +954,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL47:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR41]], align 4 // CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL47]], align 4 @@ -962,10 +966,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION44:%.*]] // CHECK: omp.par.region44: // CHECK-NEXT: store i32 0, ptr [[I51]], align 4 @@ -1521,6 +1521,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_0v..omp_par // CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG19:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry: +// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -1529,10 +1533,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] @@ -1731,6 +1731,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL8:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR2]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL8]], align 4 @@ -1739,10 +1743,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED12:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5:%.*]] // CHECK-DEBUG: omp.par.region5: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100:![0-9]+]] @@ -1966,6 +1966,14 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[STRUCTARG214:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -1974,18 +1982,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[I160:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] @@ -2118,6 +2118,18 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[STRUCTARG209:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL12:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR6]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL12]], align 4 @@ -2126,26 +2138,14 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED17:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED18:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR19:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[I75:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED76:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED77:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR78:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[I135:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9:%.*]] // CHECK-DEBUG: omp.par.region9: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I16]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]] @@ -2338,6 +2338,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL106:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR100]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL106]], align 4 @@ -2346,10 +2350,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103:%.*]] // CHECK-DEBUG: omp.par.region103: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I110]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] @@ -2418,6 +2418,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL47:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR41]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL47]], align 4 @@ -2426,10 +2430,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44:%.*]] // CHECK-DEBUG: omp.par.region44: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I51]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG257:![0-9]+]] diff --git a/clang/test/OpenMP/nested_loop_codegen.cpp b/clang/test/OpenMP/nested_loop_codegen.cpp index 0eb76bc2e1c69..797f40114fcb2 100644 --- a/clang/test/OpenMP/nested_loop_codegen.cpp +++ b/clang/test/OpenMP/nested_loop_codegen.cpp @@ -545,6 +545,10 @@ int inline_decl() { // CHECK3-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 // CHECK3-NEXT: [[GEP_K:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 // CHECK3-NEXT: [[LOADGEP_K:%.*]] = load ptr, ptr [[GEP_K]], align 8 +// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -552,10 +556,6 @@ int inline_decl() { // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK3: omp.par.region: // CHECK3-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4 @@ -713,6 +713,10 @@ int inline_decl() { // CHECK3-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 // CHECK3-NEXT: [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 // CHECK3-NEXT: [[LOADGEP_RES:%.*]] = load ptr, ptr [[GEP_RES]], align 8 +// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -721,10 +725,6 @@ int inline_decl() { // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK3: omp.par.region: // CHECK3-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4 @@ -884,6 +884,10 @@ int inline_decl() { // CHECK4-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 // CHECK4-NEXT: [[GEP_K:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 // CHECK4-NEXT: [[LOADGEP_K:%.*]] = load ptr, ptr [[GEP_K]], align 8 +// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -891,10 +895,6 @@ int inline_decl() { // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG23:![0-9]+]] @@ -1062,6 +1062,10 @@ int inline_decl() { // CHECK4-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 // CHECK4-NEXT: [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 // CHECK4-NEXT: [[LOADGEP_RES:%.*]] = load ptr, ptr [[GEP_RES]], align 8 +// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -1070,10 +1074,6 @@ int inline_decl() { // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG87:![0-9]+]] diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index e279ebad0ac1f..db734d41232bd 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2557,7 +2557,7 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); // Allocate space for computed loop bounds as expected by the "init" function. - Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); + Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca()); Type *I32Type = Type::getInt32Ty(M.getContext()); Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); @@ -3120,7 +3120,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this); // Allocate space for computed loop bounds as expected by the "init" function. - Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); + Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca()); Type *I32Type = Type::getInt32Ty(M.getContext()); Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits