https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748
>From 4257950e7df8d7eaf92a1a7b02f89422007ffe6a Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Sat, 19 Oct 2024 23:32:27 +0900 Subject: [PATCH 1/7] Do not emit empty omp.single's --- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 50 ++++++++++++------- .../OpenMP/lower-workshare-no-single.mlir | 20 ++++++++ 2 files changed, 52 insertions(+), 18 deletions(-) create mode 100644 flang/test/Transforms/OpenMP/lower-workshare-no-single.mlir diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp index aa4371b3af6f7d..225c585a02d913 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -239,11 +239,12 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion, return alloc; }; - auto moveToSingle = [&](SingleRegion sr, OpBuilder allocaBuilder, - OpBuilder singleBuilder, - OpBuilder parallelBuilder) -> SmallVector<Value> { + auto moveToSingle = + [&](SingleRegion sr, OpBuilder allocaBuilder, OpBuilder singleBuilder, + OpBuilder parallelBuilder) -> std::pair<bool, SmallVector<Value>> { IRMapping singleMapping = rootMapping; SmallVector<Value> copyPrivate; + bool allParallelized = true; for (Operation &op : llvm::make_range(sr.begin, sr.end)) { if (isSafeToParallelize(&op)) { @@ -267,6 +268,7 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion, assert(llvm::all_of(op.getResults(), [&](Value v) { return !isTransitivelyUsedOutside(v, sr); })); + allParallelized = false; } } else if (auto alloca = dyn_cast<fir::AllocaOp>(&op)) { auto hoisted = @@ -274,6 +276,7 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion, rootMapping.map(&*alloca, &*hoisted); rootMapping.map(alloca.getResult(), hoisted.getResult()); copyPrivate.push_back(hoisted); + allParallelized = false; } else { singleBuilder.clone(op, singleMapping); // Prepare reloaded values for results of operations that cannot be @@ -286,10 +289,11 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion, copyPrivate.push_back(alloc); } } + allParallelized = false; } } singleBuilder.create<omp::TerminatorOp>(loc); - return copyPrivate; + return {allParallelized, copyPrivate}; }; for (Block &block : sourceRegion) { @@ -343,25 +347,35 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion, Block *parallelBlock = new Block(); parallelBuilder.setInsertionPointToStart(parallelBlock); - omp::SingleOperands singleOperands; - if (isLast) - singleOperands.nowait = rootBuilder.getUnitAttr(); - singleOperands.copyprivateVars = + auto [allParallelized, copyprivateVars] = moveToSingle(std::get<SingleRegion>(opOrSingle), allocaBuilder, singleBuilder, parallelBuilder); - cleanupBlock(singleBlock); - for (auto var : singleOperands.copyprivateVars) { - mlir::func::FuncOp funcOp = - createCopyFunc(loc, var.getType(), firCopyFuncBuilder); - singleOperands.copyprivateSyms.push_back(SymbolRefAttr::get(funcOp)); + if (allParallelized) { + // The single region was not required as all operations were safe to + // parallelize + assert(copyprivateVars.empty()); + assert(allocaBlock->empty()); + delete singleBlock; + } else { + omp::SingleOperands singleOperands; + if (isLast) + singleOperands.nowait = rootBuilder.getUnitAttr(); + singleOperands.copyprivateVars = copyprivateVars; + cleanupBlock(singleBlock); + for (auto var : singleOperands.copyprivateVars) { + mlir::func::FuncOp funcOp = + createCopyFunc(loc, var.getType(), firCopyFuncBuilder); + singleOperands.copyprivateSyms.push_back( + SymbolRefAttr::get(funcOp)); + } + omp::SingleOp singleOp = + rootBuilder.create<omp::SingleOp>(loc, singleOperands); + singleOp.getRegion().push_back(singleBlock); + targetRegion.front().getOperations().splice( + singleOp->getIterator(), allocaBlock->getOperations()); } - omp::SingleOp singleOp = - rootBuilder.create<omp::SingleOp>(loc, singleOperands); - singleOp.getRegion().push_back(singleBlock); rootBuilder.getInsertionBlock()->getOperations().splice( rootBuilder.getInsertionPoint(), parallelBlock->getOperations()); - targetRegion.front().getOperations().splice( - singleOp->getIterator(), allocaBlock->getOperations()); delete allocaBlock; delete parallelBlock; } else { diff --git a/flang/test/Transforms/OpenMP/lower-workshare-no-single.mlir b/flang/test/Transforms/OpenMP/lower-workshare-no-single.mlir new file mode 100644 index 00000000000000..3e73816e63ace3 --- /dev/null +++ b/flang/test/Transforms/OpenMP/lower-workshare-no-single.mlir @@ -0,0 +1,20 @@ +// RUN: fir-opt --split-input-file --lower-workshare --allow-unregistered-dialect %s | FileCheck %s + +// Check that we do not emit an omp.single for the constant operation + +func.func @foo() { + omp.workshare { + %c1 = arith.constant 1 : index + omp.workshare.loop_wrapper { + omp.loop_nest (%arg1) : index = (%c1) to (%c1) inclusive step (%c1) { + "test.test0"() : () -> () + omp.yield + } + omp.terminator + } + omp.terminator + } + return +} + +// CHECK-NOT: omp.single >From 28f0a6f94f73bacaf28070dbbb9004d17cba4774 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Sun, 4 Aug 2024 17:33:52 +0900 Subject: [PATCH 2/7] Add workshare loop wrapper lowerings Bufferize test Bufferize test Bufferize test Add test for should use workshare lowering --- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 4 +- .../Transforms/OptimizedBufferization.cpp | 10 +- flang/test/HLFIR/bufferize-workshare.fir | 58 ++++++++ .../OpenMP/should-use-workshare-lowering.mlir | 140 ++++++++++++++++++ 4 files changed, 208 insertions(+), 4 deletions(-) create mode 100644 flang/test/HLFIR/bufferize-workshare.fir create mode 100644 flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index 07794828fce267..1848dbe2c7a2c2 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -26,6 +26,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" @@ -792,7 +793,8 @@ struct ElementalOpConversion // Generate a loop nest looping around the fir.elemental shape and clone // fir.elemental region inside the inner loop. hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); + hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); auto insPt = builder.saveInsertionPoint(); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index 3a0a98dc594463..f014724861e333 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -20,6 +20,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/Transforms/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Dominance.h" @@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite( // Generate a loop nest looping around the hlfir.elemental shape and clone // hlfir.elemental region inside the inner loop hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); + hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, loopNest.oneBasedIndices); @@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite( llvm::SmallVector<mlir::Value> extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto arrayElement = hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices); @@ -648,7 +651,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite( llvm::SmallVector<mlir::Value> extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto rhsArrayElement = hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices); diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir new file mode 100644 index 00000000000000..9b7341ae43398a --- /dev/null +++ b/flang/test/HLFIR/bufferize-workshare.fir @@ -0,0 +1,58 @@ +// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s + +// CHECK-LABEL: func.func @simple( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) { +// CHECK: omp.parallel { +// CHECK: omp.workshare { +// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) +// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""} +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>) +// CHECK: %[[VAL_7:.*]] = arith.constant true +// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK: omp.workshare.loop_wrapper { +// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) { +// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32> +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32> +// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32 +// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32> +// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32> +// CHECK: omp.yield +// CHECK: } +// CHECK: omp.terminator +// CHECK: } +// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1> +// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> +// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> +// CHECK: hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>> +// CHECK: fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>> +// CHECK: omp.terminator +// CHECK: } +// CHECK: omp.terminator +// CHECK: } +// CHECK: return +// CHECK: } +func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) { + omp.parallel { + omp.workshare { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + %ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32> + %val = fir.load %ref : !fir.ref<i32> + %sub = arith.subi %val, %c1_i32 : i32 + hlfir.yield_element %sub : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + return +} diff --git a/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir new file mode 100644 index 00000000000000..229fe592a02b9b --- /dev/null +++ b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir @@ -0,0 +1,140 @@ +// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s + +// Checks that we correctly identify when to use the lowering to +// omp.workshare.loop_wrapper + +// CHECK-LABEL: @should_parallelize_0 +// CHECK: omp.workshare.loop_wrapper +func.func @should_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + return +} + +// CHECK-LABEL: @should_parallelize_1 +// CHECK: omp.workshare.loop_wrapper +func.func @should_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.parallel { + omp.workshare { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + return +} + + +// CHECK-LABEL: @should_not_parallelize_0 +// CHECK-NOT: omp.workshare.loop_wrapper +func.func @should_not_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + omp.single { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + return +} + +// CHECK-LABEL: @should_not_parallelize_1 +// CHECK-NOT: omp.workshare.loop_wrapper +func.func @should_not_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + omp.critical { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + return +} + +// CHECK-LABEL: @should_not_parallelize_2 +// CHECK-NOT: omp.workshare.loop_wrapper +func.func @should_not_parallelize_2(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + omp.parallel { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + return +} + +// CHECK-LABEL: @should_not_parallelize_3 +// CHECK-NOT: omp.workshare.loop_wrapper +func.func @should_not_parallelize_3(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + omp.parallel { + omp.workshare { + omp.parallel { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + omp.terminator + } + omp.terminator + } + return +} >From 088d3a7a335acb4478dd13c9674d1a185947bcd7 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Mon, 23 Sep 2024 12:56:11 +0900 Subject: [PATCH 3/7] Add integration test for workshare --- flang/test/Integration/OpenMP/workshare.f90 | 57 +++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 flang/test/Integration/OpenMP/workshare.f90 diff --git a/flang/test/Integration/OpenMP/workshare.f90 b/flang/test/Integration/OpenMP/workshare.f90 new file mode 100644 index 00000000000000..0c4524f8552906 --- /dev/null +++ b/flang/test/Integration/OpenMP/workshare.f90 @@ -0,0 +1,57 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR +!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR + +subroutine sb1(a, x, y, z) + integer :: a + integer :: x(:) + integer :: y(:) + integer :: z(:) + !$omp parallel workshare + z = a * x + y + !$omp end parallel workshare +end subroutine + +! HLFIR: func.func @_QPsb1 +! HLFIR: omp.parallel { +! HLFIR: omp.workshare { +! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> { +! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> { +! HLFIR: hlfir.assign +! HLFIR: hlfir.destroy +! HLFIR: hlfir.destroy +! HLFIR-NOT: omp.barrier +! HLFIR: omp.terminator +! HLFIR: } +! HLFIR-NOT: omp.barrier +! HLFIR: omp.terminator +! HLFIR: } +! HLFIR: return +! HLFIR: } +! HLFIR:} + + +! FIR: func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>> +! FIR: func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32> + +! FIR: func.func @_QPsb1 +! FIR: omp.parallel { +! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref<i32>, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) { +! FIR: fir.allocmem +! FIR: omp.wsloop { +! FIR: omp.loop_nest +! FIR: omp.single nowait { +! FIR: fir.call @_FortranAAssign +! FIR: fir.freemem +! FIR: omp.terminator +! FIR: } +! FIR: omp.barrier +! FIR: omp.terminator +! FIR: } >From 2718382db02f7d4cfb3f0b356dee99a027a6dbd8 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Fri, 4 Oct 2024 15:02:54 +0900 Subject: [PATCH 4/7] One more integration test --- .../OpenMP/workshare-scalar-array-mul.f90 | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 diff --git a/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 new file mode 100644 index 00000000000000..2fb9a029bf93a5 --- /dev/null +++ b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 @@ -0,0 +1,67 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR-O3 +!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR-O3 + +!RUN: %flang_fc1 -emit-hlfir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix HLFIR-O0 +!RUN: %flang_fc1 -emit-fir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix FIR-O0 + +program test + real :: arr_01(10) + !$omp parallel workshare + arr_01 = arr_01*2 + !$omp end parallel workshare +end program + +! HLFIR-O3: omp.parallel { +! HLFIR-O3: omp.workshare { +! HLFIR-O3: hlfir.elemental +! HLFIR-O3: hlfir.assign +! HLFIR-O3: hlfir.destroy +! HLFIR-O3: omp.terminator +! HLFIR-O3: omp.terminator + +! FIR-O3: omp.parallel { +! FIR-O3: omp.wsloop nowait { +! FIR-O3: omp.loop_nest +! FIR-O3: omp.terminator +! FIR-O3: omp.barrier +! FIR-O3: omp.terminator + +! HLFIR-O0: omp.parallel { +! HLFIR-O0: omp.workshare { +! HLFIR-O0: hlfir.elemental +! HLFIR-O0: hlfir.assign +! HLFIR-O0: hlfir.destroy +! HLFIR-O0: omp.terminator +! HLFIR-O0: omp.terminator + +! Check the copyprivate copy function +! FIR-O0: func.func private @_workshare_copy_heap_{{.*}}(%[[DST:.*]]: {{.*}}, %[[SRC:.*]]: {{.*}}) +! FIR-O0: fir.load %[[SRC]] +! FIR-O0: fir.store {{.*}} to %[[DST]] + +! Check that we properly handle the temporary array +! FIR-O0: omp.parallel { +! FIR-O0: %[[CP:.*]] = fir.alloca !fir.heap<!fir.array<10xf32>> +! FIR-O0: omp.single copyprivate(%[[CP]] -> @_workshare_copy_heap_ +! FIR-O0: fir.allocmem +! FIR-O0: fir.store +! FIR-O0: omp.terminator +! FIR-O0: fir.load %[[CP]] +! FIR-O0: omp.wsloop { +! FIR-O0: omp.loop_nest +! FIR-O0: omp.yield +! FIR-O0: omp.terminator +! FIR-O0: omp.single nowait { +! FIR-O0: fir.call @_FortranAAssign +! FIR-O0: fir.freemem +! FIR-O0: omp.terminator +! FIR-O0: omp.barrier +! FIR-O0: omp.terminator >From 2fd6e88a960886a0b38ba0e64a6f7a65d744777d Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Fri, 4 Oct 2024 15:12:43 +0900 Subject: [PATCH 5/7] Add test for cfg workshare bufferization --- .../should-use-workshare-lowering-cfg.mlir | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir diff --git a/flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir b/flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir new file mode 100644 index 00000000000000..8b6d8097caad87 --- /dev/null +++ b/flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir @@ -0,0 +1,22 @@ +// RUN: fir-opt --bufferize-hlfir %s 2>&1 | FileCheck %s + +// CHECK: warning: omp workshare with unstructured control flow currently unsupported. +func.func @warn_cfg(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + ^bb1: + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + cf.br ^bb2 + ^bb2: + omp.terminator + } + return +} >From 6b1623e5d9ab1c669c183a730467bfb2db6849da Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Fri, 4 Oct 2024 15:24:46 +0900 Subject: [PATCH 6/7] Fix tests --- .../should-use-workshare-lowering-cfg.mlir | 22 ------------------- .../OpenMP/should-use-workshare-lowering.mlir | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+), 22 deletions(-) delete mode 100644 flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir diff --git a/flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir b/flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir deleted file mode 100644 index 8b6d8097caad87..00000000000000 --- a/flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: fir-opt --bufferize-hlfir %s 2>&1 | FileCheck %s - -// CHECK: warning: omp workshare with unstructured control flow currently unsupported. -func.func @warn_cfg(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { - omp.workshare { - ^bb1: - %c42 = arith.constant 42 : index - %c1_i32 = arith.constant 1 : i32 - %shape = fir.shape %c42 : (index) -> !fir.shape<1> - %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) - %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { - ^bb0(%i: index): - hlfir.yield_element %c1_i32 : i32 - } - hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> - hlfir.destroy %elemental : !hlfir.expr<42xi32> - cf.br ^bb2 - ^bb2: - omp.terminator - } - return -} diff --git a/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir index 229fe592a02b9b..91b08123cce422 100644 --- a/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir +++ b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir @@ -138,3 +138,25 @@ func.func @should_not_parallelize_3(%arg: !fir.ref<!fir.array<42xi32>>, %idx : i } return } + +// CHECK-LABEL: @should_not_parallelize_4 +// CHECK-NOT: omp.workshare.loop_wrapper +func.func @should_not_parallelize_4(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + ^bb1: + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + cf.br ^bb2 + ^bb2: + omp.terminator + } + return +} >From 3a1ff2651ff74516bd16814bb5726976a79eb2a0 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Sat, 19 Oct 2024 23:30:42 +0900 Subject: [PATCH 7/7] Test coverage for all changes --- .../OpenMP/workshare-array-array-assign.f90 | 35 ++++++++++++++ .../{workshare.f90 => workshare-axpy.f90} | 0 .../OpenMP/workshare-scalar-array-assign.f90 | 46 +++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 flang/test/Integration/OpenMP/workshare-array-array-assign.f90 rename flang/test/Integration/OpenMP/{workshare.f90 => workshare-axpy.f90} (100%) create mode 100644 flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 diff --git a/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 b/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 new file mode 100644 index 00000000000000..065f72d5d72d88 --- /dev/null +++ b/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 @@ -0,0 +1,35 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR +!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR + +subroutine sb1(x, y) + integer :: x(:) + integer :: y(:) + !$omp parallel workshare + x = y + !$omp end parallel workshare +end subroutine + +! HLFIR: omp.parallel { +! HLFIR: omp.workshare { +! HLFIR: hlfir.assign +! HLFIR: omp.terminator +! HLFIR: } +! HLFIR: omp.terminator +! HLFIR: } + +! FIR: omp.parallel { +! FIR: omp.wsloop nowait { +! FIR: omp.loop_nest +! FIR: omp.terminator +! FIR: } +! FIR: omp.barrier +! FIR: omp.terminator +! FIR: } diff --git a/flang/test/Integration/OpenMP/workshare.f90 b/flang/test/Integration/OpenMP/workshare-axpy.f90 similarity index 100% rename from flang/test/Integration/OpenMP/workshare.f90 rename to flang/test/Integration/OpenMP/workshare-axpy.f90 diff --git a/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 b/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 new file mode 100644 index 00000000000000..fad1af110792bb --- /dev/null +++ b/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 @@ -0,0 +1,46 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR +!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR + +subroutine sb1(a, x) + integer :: a + integer :: x(:) + !$omp parallel workshare + x = a + !$omp end parallel workshare +end subroutine + +! HLFIR: omp.parallel { +! HLFIR: omp.workshare { +! HLFIR: %[[SCALAR:.*]] = fir.load %1#0 : !fir.ref<i32> +! HLFIR: hlfir.assign %[[SCALAR]] to +! HLFIR: omp.terminator +! HLFIR: } +! HLFIR: omp.terminator +! HLFIR: } + +! FIR: omp.parallel { +! FIR: %[[SCALAR_ALLOCA:.*]] = fir.alloca i32 +! FIR: omp.single copyprivate(%[[SCALAR_ALLOCA]] -> @_workshare_copy_i32 : !fir.ref<i32>) { +! FIR: %[[SCALAR_LOAD:.*]] = fir.load %{{.*}} : !fir.ref<i32> +! FIR: fir.store %[[SCALAR_LOAD]] to %[[SCALAR_ALLOCA]] : !fir.ref<i32> +! FIR: omp.terminator +! FIR: } +! FIR: %[[SCALAR_RELOAD:.*]] = fir.load %[[SCALAR_ALLOCA]] : !fir.ref<i32> +! FIR: %6:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index) +! FIR: omp.wsloop nowait { +! FIR: omp.loop_nest (%arg2) : index = (%c1) to (%6#1) inclusive step (%c1) { +! FIR: fir.store %[[SCALAR_RELOAD]] +! FIR: omp.yield +! FIR: } +! FIR: omp.terminator +! FIR: } +! FIR: omp.barrier +! FIR: omp.terminator _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits