https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/106066
This patch moves the creation of `DataSharingProcessor` instances for loop constructs out of `genOMPDispatch()` and into their corresponding codegen functions. This is a necessary first step to enable a proper handling of privatization on composite constructs. Some tests are updated due to a change of order between clause processing and privatization. >From 23983c7dff8f61877943a8a8d2332b485e879549 Mon Sep 17 00:00:00 2001 From: Sergio Afonso <safon...@amd.com> Date: Mon, 26 Aug 2024 13:29:52 +0100 Subject: [PATCH] [Flang][OpenMP] Move loop privatization out of dispatch This patch moves the creation of `DataSharingProcessor` instances for loop constructs out of `genOMPDispatch()` and into their corresponding codegen functions. This is a necessary first step to enable a proper handling of privatization on composite constructs. Some tests are updated due to a change of order between clause processing and privatization. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 142 +++++++++++------- .../test/Lower/OpenMP/parallel-reduction3.f90 | 14 +- .../wsloop-reduction-array-assumed-shape.f90 | 14 +- .../Lower/OpenMP/wsloop-reduction-array.f90 | 18 +-- .../Lower/OpenMP/wsloop-reduction-array2.f90 | 18 +-- .../wsloop-reduction-multiple-clauses.f90 | 22 +-- 6 files changed, 127 insertions(+), 101 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d614db8b68ef65..307cf47247b743 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1044,7 +1044,6 @@ static void genDistributeClauses(lower::AbstractConverter &converter, cp.processAllocate(clauseOps); cp.processDistSchedule(stmtCtx, clauseOps); cp.processOrder(clauseOps); - // TODO Support delayed privatization. } static void genFlushClauses(lower::AbstractConverter &converter, @@ -1128,7 +1127,6 @@ static void genSimdClauses(lower::AbstractConverter &converter, cp.processSafelen(clauseOps); cp.processSimdlen(clauseOps); - // TODO Support delayed privatization. cp.processTODO<clause::Linear, clause::Nontemporal>( loc, llvm::omp::Directive::OMPD_simd); } @@ -1299,7 +1297,6 @@ static void genWsloopClauses( cp.processOrdered(clauseOps); cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); cp.processSchedule(stmtCtx, clauseOps); - // TODO Support delayed privatization. cp.processTODO<clause::Allocate, clause::Linear>( loc, llvm::omp::Directive::OMPD_do); @@ -1918,17 +1915,25 @@ genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, // also be a leaf of a composite construct //===----------------------------------------------------------------------===// -static void genStandaloneDistribute( - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { +static void genStandaloneDistribute(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; mlir::omp::DistributeOperands distributeClauseOps; genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, distributeClauseOps); + // TODO: Support delayed privatization. + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); + mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector<const semantics::Symbol *> iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, @@ -1949,8 +1954,7 @@ static void genStandaloneDo(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, - DataSharingProcessor &dsp) { + ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; mlir::omp::WsloopOperands wsloopClauseOps; @@ -1959,6 +1963,12 @@ static void genStandaloneDo(lower::AbstractConverter &converter, genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, wsloopClauseOps, reductionTypes, reductionSyms); + // TODO: Support delayed privatization. + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); + mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector<const semantics::Symbol *> iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, @@ -1998,11 +2008,16 @@ static void genStandaloneSimd(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, - DataSharingProcessor &dsp) { + ConstructQueue::const_iterator item) { mlir::omp::SimdOperands simdClauseOps; genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps); + // TODO: Support delayed privatization. + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); + mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector<const semantics::Symbol *> iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, @@ -2018,11 +2033,13 @@ static void genStandaloneSimd(lower::AbstractConverter &converter, llvm::omp::Directive::OMPD_simd, dsp); } -static void genStandaloneTaskloop( - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { +static void genStandaloneTaskloop(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { TODO(loc, "Taskloop construct"); } @@ -2034,7 +2051,7 @@ static void genCompositeDistributeParallelDo( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { + ConstructQueue::const_iterator item) { assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs"); TODO(loc, "Composite DISTRIBUTE PARALLEL DO"); } @@ -2043,16 +2060,18 @@ static void genCompositeDistributeParallelDoSimd( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { + ConstructQueue::const_iterator item) { assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs"); TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD"); } -static void genCompositeDistributeSimd( - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { +static void genCompositeDistributeSimd(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); @@ -2067,6 +2086,12 @@ static void genCompositeDistributeSimd( mlir::omp::SimdOperands simdClauseOps; genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps); + // TODO: Support delayed privatization. + DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); + // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. mlir::omp::LoopNestOperands loopNestClauseOps; @@ -2103,8 +2128,7 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, - DataSharingProcessor &dsp) { + ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); @@ -2121,6 +2145,12 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter, mlir::omp::SimdOperands simdClauseOps; genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps); + // TODO: Support delayed privatization. + DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); + // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. mlir::omp::LoopNestOperands loopNestClauseOps; @@ -2151,11 +2181,13 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter, llvm::omp::Directive::OMPD_do_simd, dsp); } -static void genCompositeTaskloopSimd( - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { +static void genCompositeTaskloopSimd(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); TODO(loc, "Composite TASKLOOP SIMD"); } @@ -2164,30 +2196,35 @@ static void genCompositeTaskloopSimd( // Dispatch //===----------------------------------------------------------------------===// -static bool genOMPCompositeDispatch( - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { +static bool genOMPCompositeDispatch(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { using llvm::omp::Directive; using lower::omp::matchLeafSequence; + // TODO: Privatization for composite constructs is currently only done based + // on the clauses for their last leaf construct, which may not always be + // correct. Consider per-leaf privatization of composite constructs once + // delayed privatization is supported by all participating ops. if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do)) genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc, - queue, item, dsp); + queue, item); else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do_simd)) genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval, - loc, queue, item, dsp); + loc, queue, item); else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd)) genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue, - item, dsp); + item); else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd)) - genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item, - dsp); + genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item); else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd)) genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue, - item, dsp); + item); else return false; @@ -2202,20 +2239,12 @@ static void genOMPDispatch(lower::AbstractConverter &converter, ConstructQueue::const_iterator item) { assert(item != queue.end()); - std::optional<DataSharingProcessor> loopDsp; bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) == llvm::omp::Association::Loop; if (loopLeaf) { symTable.pushScope(); - // TODO: Use one DataSharingProcessor for each leaf of a composite - // construct. - loopDsp.emplace(converter, semaCtx, item->clauses, eval, - /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, &symTable); - loopDsp->processStep1(); - if (genOMPCompositeDispatch(converter, symTable, semaCtx, eval, loc, queue, - item, *loopDsp)) { + item)) { symTable.popScope(); return; } @@ -2227,11 +2256,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_distribute: genStandaloneDistribute(converter, symTable, semaCtx, eval, loc, queue, - item, *loopDsp); + item); break; case llvm::omp::Directive::OMPD_do: - genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item, - *loopDsp); + genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_loop: TODO(loc, "Unhandled directive " + llvm::omp::getOpenMPDirectiveName(dir)); @@ -2260,8 +2288,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, // in genBodyOfOp break; case llvm::omp::Directive::OMPD_simd: - genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item, - *loopDsp); + genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_single: genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); @@ -2291,8 +2318,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_taskloop: - genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item, - *loopDsp); + genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_taskwait: genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item); diff --git a/flang/test/Lower/OpenMP/parallel-reduction3.f90 b/flang/test/Lower/OpenMP/parallel-reduction3.f90 index 441dff34553d4f..591f41cb946602 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction3.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction3.f90 @@ -69,19 +69,19 @@ ! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i32 ! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_12]]#0 : i32, !fir.box<!fir.array<?xi32>> ! CHECK: omp.parallel { -! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} -! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.array<?xi32>> -! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_16]] : !fir.ref<!fir.box<!fir.array<?xi32>>> +! CHECK: %[[VAL_14:.*]] = fir.alloca !fir.box<!fir.array<?xi32>> +! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_14]] : !fir.ref<!fir.box<!fir.array<?xi32>>> +! CHECK: %[[VAL_15:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} +! CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_18:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_19:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_16]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) { +! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_14]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) { ! CHECK-NEXT: omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_17]]) to (%[[VAL_18]]) inclusive step (%[[VAL_19]]) { ! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFsEc"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> (!fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.array<?xi32>>>) -! CHECK: fir.store %[[VAL_21]] to %[[VAL_15]]#1 : !fir.ref<i32> +! CHECK: fir.store %[[VAL_21]] to %[[VAL_16]]#1 : !fir.ref<i32> ! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>> -! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_16]]#0 : !fir.ref<i32> ! CHECK: %[[VAL_25:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_26:.*]]:3 = fir.box_dims %[[VAL_23]], %[[VAL_25]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index) ! CHECK: %[[VAL_27:.*]] = fir.shape %[[VAL_26]]#1 : (index) -> !fir.shape<1> diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 index c984ab61bedb3b..d881ff8c1a026a 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 @@ -79,18 +79,18 @@ subroutine reduce(r) ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.box<!fir.array<?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?xf64>>, !fir.box<!fir.array<?xf64>>) ! CHECK: omp.parallel { -! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} -! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.array<?xf64>> -! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xf64>>> +! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.box<!fir.array<?xf64>> +! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_4]] : !fir.ref<!fir.box<!fir.array<?xf64>>> +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_7:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_6]] -> %[[VAL_10:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>) { +! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_4]] -> %[[VAL_10:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>) { ! CHECK-NEXT: omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.ref<!fir.box<!fir.array<?xf64>>>) -> (!fir.ref<!fir.box<!fir.array<?xf64>>>, !fir.ref<!fir.box<!fir.array<?xf64>>>) -! CHECK: fir.store %[[VAL_11]] to %[[VAL_5]]#1 : !fir.ref<i32> -! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32> +! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32> +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32> ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f64 ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<!fir.box<!fir.array<?xf64>>> ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : index diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 index 43e4c86b6bade2..afdd486c7be2a4 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 @@ -73,24 +73,24 @@ program reduce ! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_4]]) {uniq_name = "_QFEr"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>) ! CHECK: omp.parallel { -! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} -! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>> -! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box<!fir.array<2xi32>> -! CHECK: fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref<!fir.box<!fir.array<2xi32>>> +! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>> +! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.box<!fir.array<2xi32>> +! CHECK: fir.store %[[VAL_6]] to %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<2xi32>>> +! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) { +! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_7]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) { ! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) { ! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>) -! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32> -! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32> +! CHECK: fir.store %[[VAL_14]] to %[[VAL_9]]#1 : !fir.ref<i32> +! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32> ! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>> ! CHECK: %[[VAL_18:.*]] = arith.constant 1 : index ! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]]) : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32> ! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_19]] : i32, !fir.ref<i32> -! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32> ! CHECK: %[[VAL_21:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_20]] : i32 ! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>> diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 index be5273ea36c99f..ee77332e9b412a 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 @@ -73,23 +73,23 @@ program reduce ! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_4]]) {uniq_name = "_QFEr"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>) ! CHECK: omp.parallel { -! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} -! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>> -! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box<!fir.array<2xi32>> -! CHECK: fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref<!fir.box<!fir.array<2xi32>>> +! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>> +! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.box<!fir.array<2xi32>> +! CHECK: fir.store %[[VAL_6]] to %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<2xi32>>> +! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) { +! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_7]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) { ! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) { ! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>) -! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32> +! CHECK: fir.store %[[VAL_14]] to %[[VAL_9]]#1 : !fir.ref<i32> ! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>> ! CHECK: %[[VAL_17:.*]] = arith.constant 1 : index ! CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_16]] (%[[VAL_17]]) : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32> ! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32> -! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32> ! CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_19]], %[[VAL_20]] : i32 ! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>> ! CHECK: %[[VAL_23:.*]] = arith.constant 1 : index @@ -99,7 +99,7 @@ program reduce ! CHECK: %[[VAL_26:.*]] = arith.constant 2 : index ! CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_25]] (%[[VAL_26]]) : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32> ! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<i32> -! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32> ! CHECK: %[[VAL_30:.*]] = arith.subi %[[VAL_28]], %[[VAL_29]] : i32 ! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>> ! CHECK: %[[VAL_32:.*]] = arith.constant 2 : index diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 index db4b4d33da7579..3e93e915bcd89c 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 @@ -109,23 +109,23 @@ program main ! CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f64 ! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_4]]#0 : f64, !fir.ref<!fir.array<3x3xf64>> ! CHECK: omp.parallel { -! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} -! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: %[[VAL_13:.*]] = fir.embox %[[VAL_4]]#0(%[[VAL_3]]) : (!fir.ref<!fir.array<3x3xf64>>, !fir.shape<2>) -> !fir.box<!fir.array<3x3xf64>> -! CHECK: %[[VAL_14:.*]] = fir.alloca !fir.box<!fir.array<3x3xf64>> -! CHECK: fir.store %[[VAL_13]] to %[[VAL_14]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>> +! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_4]]#0(%[[VAL_3]]) : (!fir.ref<!fir.array<3x3xf64>>, !fir.shape<2>) -> !fir.box<!fir.array<3x3xf64>> +! CHECK: %[[VAL_12:.*]] = fir.alloca !fir.box<!fir.array<3x3xf64>> +! CHECK: fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>> +! CHECK: %[[VAL_13:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_16:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]] : !fir.ref<f64>, byref @add_reduction_byref_box_3x3xf64 %[[VAL_14]] -> %[[VAL_19:.*]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>) { +! CHECK: omp.wsloop reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]] : !fir.ref<f64>, byref @add_reduction_byref_box_3x3xf64 %[[VAL_12]] -> %[[VAL_19:.*]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>) { ! CHECK: omp.loop_nest (%[[VAL_20:.*]]) : i32 = (%[[VAL_15]]) to (%[[VAL_16]]) inclusive step (%[[VAL_17]]) { ! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFEscalar"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>) ! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFEarray"} : (!fir.ref<!fir.box<!fir.array<3x3xf64>>>) -> (!fir.ref<!fir.box<!fir.array<3x3xf64>>>, !fir.ref<!fir.box<!fir.array<3x3xf64>>>) -! CHECK: fir.store %[[VAL_20]] to %[[VAL_12]]#1 : !fir.ref<i32> -! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32> +! CHECK: fir.store %[[VAL_20]] to %[[VAL_14]]#1 : !fir.ref<i32> +! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32> ! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (i32) -> f64 ! CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_21]]#0 : f64, !fir.ref<f64> -! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32> ! CHECK: %[[VAL_26:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_25]], %[[VAL_26]] : i32 ! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> f64 @@ -134,7 +134,7 @@ program main ! CHECK: %[[VAL_31:.*]] = arith.constant 1 : index ! CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_29]] (%[[VAL_30]], %[[VAL_31]]) : (!fir.box<!fir.array<3x3xf64>>, index, index) -> !fir.ref<f64> ! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_32]] : f64, !fir.ref<f64> -! CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32> ! CHECK: %[[VAL_34:.*]] = arith.constant 2 : i32 ! CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_33]], %[[VAL_34]] : i32 ! CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (i32) -> f64 @@ -143,7 +143,7 @@ program main ! CHECK: %[[VAL_39:.*]] = arith.constant 2 : index ! CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_37]] (%[[VAL_38]], %[[VAL_39]]) : (!fir.box<!fir.array<3x3xf64>>, index, index) -> !fir.ref<f64> ! CHECK: hlfir.assign %[[VAL_36]] to %[[VAL_40]] : f64, !fir.ref<f64> -! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32> ! CHECK: %[[VAL_42:.*]] = arith.constant 3 : i32 ! CHECK: %[[VAL_43:.*]] = arith.addi %[[VAL_41]], %[[VAL_42]] : i32 ! CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (i32) -> f64 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits