[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov created https://github.com/llvm/llvm-project/pull/101444 2/4 >From 8068d6036fe84f6c0f22f2c877366eef184292e3 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4..f7bc565ea8cbc 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov created https://github.com/llvm/llvm-project/pull/101445 3/4 >From c2cbd7779dc118c6bad507b6babeb7200262caff Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++--- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 43 +-- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 ++--- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 61 insertions(+), 39 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea078..14e42c6f358e4 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844..0689d6e033dd9 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c2..72a90dd0d6f29 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178..5406e5076d55c 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,44 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() && "must
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov created https://github.com/llvm/llvm-project/pull/101446 4/4 There are two points which need some discussion in this PR: 1. We need to make a value computed in a omp.single accessible in all threads of the omp.parallel region. This is achieved by allocating temporary memory outside the omp.parallel and atoring that in the omp.single and then reloading it from all threads. However, from reading the standard I dont think we are guaranteed that the workshare is nested in the omp.parallel so there could be a omp.parallel { func.call @contains_workshare }, then we would not be able to access the omp.parallel. So I think adding support in the runtime to be able to yield a value from a omp.single could be the fix to this. 2. For the temporary allocations above not all types are supported by fir.alloca, so I need to use llvm.alloca and unrealized_cast to be able to allocate a temporary for a fir.ref type. This too can be fixed by introducing yielding from omp.single >From 003568d028b9d7f0323f31f8717527fc52c93c6f Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:44:31 +0900 Subject: [PATCH] [flang] Lower omp.workshare to other omp constructs --- flang/include/flang/Optimizer/CMakeLists.txt | 1 + .../flang/Optimizer/OpenMP/CMakeLists.txt | 4 + flang/include/flang/Optimizer/OpenMP/Passes.h | 30 ++ .../include/flang/Optimizer/OpenMP/Passes.td | 18 ++ flang/include/flang/Tools/CLOptions.inc | 2 + flang/lib/Frontend/CMakeLists.txt | 1 + flang/lib/Optimizer/Builder/HLFIRTools.cpp| 21 +- flang/lib/Optimizer/CMakeLists.txt| 1 + .../HLFIR/Transforms/BufferizeHLFIR.cpp | 6 +- flang/lib/Optimizer/OpenMP/CMakeLists.txt | 26 ++ flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 260 ++ flang/test/HLFIR/bufferize-workshare.fir | 58 .../Transforms/OpenMP/lower-workshare.mlir| 81 ++ flang/tools/bbc/CMakeLists.txt| 1 + flang/tools/fir-opt/CMakeLists.txt| 1 + flang/tools/fir-opt/fir-opt.cpp | 2 + flang/tools/tco/CMakeLists.txt| 1 + 17 files changed, 505 insertions(+), 9 deletions(-) create mode 100644 flang/include/flang/Optimizer/OpenMP/CMakeLists.txt create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.h create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.td create mode 100644 flang/lib/Optimizer/OpenMP/CMakeLists.txt create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp create mode 100644 flang/test/HLFIR/bufferize-workshare.fir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir diff --git a/flang/include/flang/Optimizer/CMakeLists.txt b/flang/include/flang/Optimizer/CMakeLists.txt index 89e43a9ee8d62..3336ac935e101 100644 --- a/flang/include/flang/Optimizer/CMakeLists.txt +++ b/flang/include/flang/Optimizer/CMakeLists.txt @@ -2,3 +2,4 @@ add_subdirectory(CodeGen) add_subdirectory(Dialect) add_subdirectory(HLFIR) add_subdirectory(Transforms) +add_subdirectory(OpenMP) diff --git a/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt new file mode 100644 index 0..d59573f0f7fd9 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name FlangOpenMP) + +add_public_tablegen_target(FlangOpenMPPassesIncGen) diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h b/flang/include/flang/Optimizer/OpenMP/Passes.h new file mode 100644 index 0..95a05b3005073 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/Passes.h @@ -0,0 +1,30 @@ +//===- Passes.h - OpenMP pass entry points --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This header declares OpenMP pass entry points. +// +//===--===// + +#ifndef FORTRAN_OPTIMIZER_OPENMP_PASSES_H +#define FORTRAN_OPTIMIZER_OPENMP_PASSES_H + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" +#include + +namespace flangomp { +#define GEN_PASS_DECL +#define GEN_PASS_REGISTRATION +#include "flang/Optimizer/OpenMP/Passes.h.inc" + +bool shouldUseWorkshareLowering(mlir::Operation *op); + +} // namespace flangomp + +#endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td new file mode 100644 index 0..6f636ec1df616 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/Passe
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
ivanradanov wrote: Should we have a `-use-experimental-workshare` or similar flag to facilitate some temporary in-tree development as this may require more moving pieces. https://github.com/llvm/llvm-project/pull/101444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
ivanradanov wrote: I am debating introducing a new operation workshare_loop_container which exists only to "contain" a omp.loop_nest between lowering an elemental to lowering the omp.workshare it is contained in. so we would have this state: ``` omp.workshare { omp.workshare_loop_container { omp.loop_nest {} } } ``` ``` omp.workshare { omp.wsloop { omp.loop_nest {} } } ``` Which may have come from a different lowering/codegen and we are not sure what the semantics of that code would be. This new operation can later be reused for the `workdistribute` lowering as well. https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446 >From 9a51b404ab47c5dd0b27c3f957ee6cefd1470c25 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:44:31 +0900 Subject: [PATCH] [flang] Lower omp.workshare to other omp constructs --- flang/include/flang/Optimizer/CMakeLists.txt | 1 + .../flang/Optimizer/OpenMP/CMakeLists.txt | 4 + flang/include/flang/Optimizer/OpenMP/Passes.h | 30 ++ .../include/flang/Optimizer/OpenMP/Passes.td | 18 ++ flang/include/flang/Tools/CLOptions.inc | 2 + flang/lib/Frontend/CMakeLists.txt | 1 + flang/lib/Optimizer/CMakeLists.txt| 1 + .../HLFIR/Transforms/BufferizeHLFIR.cpp | 6 +- flang/lib/Optimizer/OpenMP/CMakeLists.txt | 26 ++ flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 260 ++ flang/test/HLFIR/bufferize-workshare.fir | 58 .../Transforms/OpenMP/lower-workshare.mlir| 81 ++ flang/tools/bbc/CMakeLists.txt| 1 + flang/tools/fir-opt/CMakeLists.txt| 1 + flang/tools/fir-opt/fir-opt.cpp | 2 + flang/tools/tco/CMakeLists.txt| 1 + 16 files changed, 491 insertions(+), 2 deletions(-) create mode 100644 flang/include/flang/Optimizer/OpenMP/CMakeLists.txt create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.h create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.td create mode 100644 flang/lib/Optimizer/OpenMP/CMakeLists.txt create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp create mode 100644 flang/test/HLFIR/bufferize-workshare.fir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir diff --git a/flang/include/flang/Optimizer/CMakeLists.txt b/flang/include/flang/Optimizer/CMakeLists.txt index 89e43a9ee8d62..3336ac935e101 100644 --- a/flang/include/flang/Optimizer/CMakeLists.txt +++ b/flang/include/flang/Optimizer/CMakeLists.txt @@ -2,3 +2,4 @@ add_subdirectory(CodeGen) add_subdirectory(Dialect) add_subdirectory(HLFIR) add_subdirectory(Transforms) +add_subdirectory(OpenMP) diff --git a/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt new file mode 100644 index 0..d59573f0f7fd9 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name FlangOpenMP) + +add_public_tablegen_target(FlangOpenMPPassesIncGen) diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h b/flang/include/flang/Optimizer/OpenMP/Passes.h new file mode 100644 index 0..95a05b3005073 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/Passes.h @@ -0,0 +1,30 @@ +//===- Passes.h - OpenMP pass entry points --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This header declares OpenMP pass entry points. +// +//===--===// + +#ifndef FORTRAN_OPTIMIZER_OPENMP_PASSES_H +#define FORTRAN_OPTIMIZER_OPENMP_PASSES_H + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" +#include + +namespace flangomp { +#define GEN_PASS_DECL +#define GEN_PASS_REGISTRATION +#include "flang/Optimizer/OpenMP/Passes.h.inc" + +bool shouldUseWorkshareLowering(mlir::Operation *op); + +} // namespace flangomp + +#endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td new file mode 100644 index 0..6f636ec1df616 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/Passes.td @@ -0,0 +1,18 @@ +//===-- Passes.td - HLFIR pass definition file -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef FORTRAN_DIALECT_OPENMP_PASSES +#define FORTRAN_DIALECT_OPENMP_PASSES + +include "mlir/Pass/PassBase.td" + +def LowerWorkshare : Pass<"lower-workshare"> { + let summary = "Lower workshare construct"; +} + +#endif //FORTRAN_DIALECT_OPENMP_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 7df5044949463..594369fc2ffe5 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -17,6 +17,7 @@ #include "mlir/Transforms/Passes.h" #include "flang/Optimizer/CodeGen/CodeGen.h"
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 4da93bb2a99ac1d59d4924c518503c94ec81c659 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea078..14e42c6f358e4 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844..0689d6e033dd9 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c2..72a90dd0d6f29 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178..cd07cb741eb4b 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() && "must have at
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446 >From 26d0051179dec85eb6aee2b48db54964bf042a87 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:44:31 +0900 Subject: [PATCH] [flang] Lower omp.workshare to other omp constructs --- flang/include/flang/Optimizer/CMakeLists.txt | 1 + .../flang/Optimizer/OpenMP/CMakeLists.txt | 4 + flang/include/flang/Optimizer/OpenMP/Passes.h | 30 ++ .../include/flang/Optimizer/OpenMP/Passes.td | 18 ++ flang/include/flang/Tools/CLOptions.inc | 2 + flang/lib/Frontend/CMakeLists.txt | 1 + flang/lib/Optimizer/CMakeLists.txt| 1 + .../HLFIR/Transforms/BufferizeHLFIR.cpp | 6 +- flang/lib/Optimizer/OpenMP/CMakeLists.txt | 26 ++ flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 259 ++ flang/test/HLFIR/bufferize-workshare.fir | 58 .../Transforms/OpenMP/lower-workshare.mlir| 81 ++ flang/tools/bbc/CMakeLists.txt| 1 + flang/tools/fir-opt/CMakeLists.txt| 1 + flang/tools/fir-opt/fir-opt.cpp | 2 + flang/tools/tco/CMakeLists.txt| 1 + 16 files changed, 490 insertions(+), 2 deletions(-) create mode 100644 flang/include/flang/Optimizer/OpenMP/CMakeLists.txt create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.h create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.td create mode 100644 flang/lib/Optimizer/OpenMP/CMakeLists.txt create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp create mode 100644 flang/test/HLFIR/bufferize-workshare.fir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir diff --git a/flang/include/flang/Optimizer/CMakeLists.txt b/flang/include/flang/Optimizer/CMakeLists.txt index 89e43a9ee8d62..3336ac935e101 100644 --- a/flang/include/flang/Optimizer/CMakeLists.txt +++ b/flang/include/flang/Optimizer/CMakeLists.txt @@ -2,3 +2,4 @@ add_subdirectory(CodeGen) add_subdirectory(Dialect) add_subdirectory(HLFIR) add_subdirectory(Transforms) +add_subdirectory(OpenMP) diff --git a/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt new file mode 100644 index 0..d59573f0f7fd9 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name FlangOpenMP) + +add_public_tablegen_target(FlangOpenMPPassesIncGen) diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h b/flang/include/flang/Optimizer/OpenMP/Passes.h new file mode 100644 index 0..95a05b3005073 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/Passes.h @@ -0,0 +1,30 @@ +//===- Passes.h - OpenMP pass entry points --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This header declares OpenMP pass entry points. +// +//===--===// + +#ifndef FORTRAN_OPTIMIZER_OPENMP_PASSES_H +#define FORTRAN_OPTIMIZER_OPENMP_PASSES_H + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" +#include + +namespace flangomp { +#define GEN_PASS_DECL +#define GEN_PASS_REGISTRATION +#include "flang/Optimizer/OpenMP/Passes.h.inc" + +bool shouldUseWorkshareLowering(mlir::Operation *op); + +} // namespace flangomp + +#endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td new file mode 100644 index 0..6f636ec1df616 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/Passes.td @@ -0,0 +1,18 @@ +//===-- Passes.td - HLFIR pass definition file -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef FORTRAN_DIALECT_OPENMP_PASSES +#define FORTRAN_DIALECT_OPENMP_PASSES + +include "mlir/Pass/PassBase.td" + +def LowerWorkshare : Pass<"lower-workshare"> { + let summary = "Lower workshare construct"; +} + +#endif //FORTRAN_DIALECT_OPENMP_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 7df5044949463..594369fc2ffe5 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -17,6 +17,7 @@ #include "mlir/Transforms/Passes.h" #include "flang/Optimizer/CodeGen/CodeGen.h"
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
ivanradanov wrote: > Thank you for your work so far. This is a great start. > > What is the plan for transforming do loops generated by lowering (e.g. that > do not become hlfir.elemental operations and are not generated by hlfir > bufferization)? I am looking at [this](https://www.openmp.org/spec-html/5.0/openmpsu39.html) for the standard. I intend to go through the various constructs that require to be separated into units of work and provide an alternative lowering for them so that they will get parallelized when we lower the workdistribute operation. To accurately keep track of constructs that need to be parallelized for workdistribute I em debating adding a new loop_nest wrapper for that as discussed [here](https://github.com/llvm/llvm-project/pull/101445#issuecomment-2261837847) https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -2,3 +2,4 @@ add_subdirectory(CodeGen) add_subdirectory(Dialect) add_subdirectory(HLFIR) add_subdirectory(Transforms) +add_subdirectory(OpenMP) ivanradanov wrote: Thank you, I missed those passes, I guess I will make a separate PR for creating the OpenMP pass directory and moving those there. I will link that here once done. https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -792,7 +793,8 @@ struct ElementalOpConversion // Generate a loop nest looping around the fir.elemental shape and clone // fir.elemental region inside the inner loop. hlfir::LoopNest loopNest = -hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); +hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); ivanradanov wrote: According to the standard: > For array expressions within each statement, including transformational array > intrinsic functions that compute scalar values from arrays: >Evaluation of each element of the array expression, including any > references to ELEMENTAL functions, is a unit of work. >Evaluation of transformational array intrinsic functions may be freely > subdivided into any number of units of work. I was under the impression that ELEMENTAL functions must be PURE, however now I see that they can be marked IMPURE. The standard says to divide these into units of work, and > It is unspecified how the units of work are assigned to the threads executing > a workshare region. So just from reading this it looks like we should lower those to wsloops and the user is responsible for any race conditions. Or perhaps this part > An implementation of the workshare construct must insert any synchronization > that is required to maintain standard Fortran semantics. Can be taken to mean that we need to schedule these properly. Perhaps we can introduce a non-standard clause to the workdistribute to control this? https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -0,0 +1,259 @@ +//===- LowerWorkshare.cpp - special cases for bufferization ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// Lower omp workshare construct. +//===--===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" + +#include + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { +bool shouldUseWorkshareLowering(Operation *op) { + auto workshare = dyn_cast(op->getParentOp()); ivanradanov wrote: I was misreading the standard that we are only parallelizing statements closely nested in a workdistribute. I will fix this to handle nested control regions as well. https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -0,0 +1,259 @@ +//===- LowerWorkshare.cpp - special cases for bufferization ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// Lower omp workshare construct. +//===--===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" + +#include + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { +bool shouldUseWorkshareLowering(Operation *op) { + auto workshare = dyn_cast(op->getParentOp()); + if (!workshare) +return false; + return workshare->getParentOfType(); +} +} // namespace flangomp + +namespace { + +struct SingleRegion { + Block::iterator begin, end; +}; + +static bool isSupportedByFirAlloca(Type ty) { + return !isa(ty); +} + +static bool isSafeToParallelize(Operation *op) { + if (isa(op)) +return true; + + llvm::SmallVector effects; + MemoryEffectOpInterface interface = dyn_cast(op); + if (!interface) { +return false; + } + interface.getEffects(effects); + if (effects.empty()) +return true; + + return false; +} + +/// Lowers workshare to a sequence of single-thread regions and parallel loops +/// +/// For example: +/// +/// omp.workshare { +/// %a = fir.allocmem +/// omp.wsloop {} +/// fir.call Assign %b %a +/// fir.freemem %a +/// } +/// +/// becomes +/// +/// omp.single { +/// %a = fir.allocmem +/// fir.store %a %tmp +/// } +/// %a_reloaded = fir.load %tmp +/// omp.wsloop {} +/// omp.single { +/// fir.call Assign %b %a_reloaded +/// fir.freemem %a_reloaded +/// } +/// +/// Note that we allocate temporary memory for values in omp.single's which need +/// to be accessed in all threads in the closest omp.parallel +/// +/// TODO currently we need to be able to access the encompassing omp.parallel so +/// that we can allocate temporaries accessible by all threads outside of it. +/// In case we do not find it, we fall back to converting the omp.workshare to +/// omp.single. +/// To better handle this we should probably enable yielding values out of an +/// omp.single which will be supported by the omp runtime. +void lowerWorkshare(mlir::omp::WorkshareOp wsOp) { + assert(wsOp.getRegion().getBlocks().size() == 1); + + Location loc = wsOp->getLoc(); + + omp::ParallelOp parallelOp = wsOp->getParentOfType(); + if (!parallelOp) { +wsOp.emitWarning("cannot handle workshare, converting to single"); +Operation *terminator = wsOp.getRegion().front().getTerminator(); +wsOp->getBlock()->getOperations().splice( +wsOp->getIterator(), wsOp.getRegion().front().getOperations()); +terminator->erase(); +return; + } + + OpBuilder allocBuilder(parallelOp); + OpBuilder rootBuilder(wsOp); + IRMapping rootMapping; + + omp::SingleOp singleOp = nullptr; + + auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder, + IRMapping singleMapping) { +if (auto reloaded = rootMapping.lookupOrNull(v)) + return; +Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext()); +Type ty = v.getType(); +Value alloc, reloaded; +if (isSupportedByFirAlloca(ty)) { + alloc = allocBuilder.create(loc, ty); + singleBuilder.create(loc, singleMapping.lookup(v), alloc); + reloaded = rootBuilder.create(loc, ty, alloc); +} else { ivanradanov wrote: I think building a fir.alloca for a ReferenceType fails and I encountered that somewhere. I will check again to see if I can get an example. https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -0,0 +1,259 @@ +//===- LowerWorkshare.cpp - special cases for bufferization ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// Lower omp workshare construct. +//===--===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" + +#include + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { +bool shouldUseWorkshareLowering(Operation *op) { + auto workshare = dyn_cast(op->getParentOp()); + if (!workshare) +return false; + return workshare->getParentOfType(); +} +} // namespace flangomp + +namespace { + +struct SingleRegion { + Block::iterator begin, end; +}; + +static bool isSupportedByFirAlloca(Type ty) { + return !isa(ty); +} + +static bool isSafeToParallelize(Operation *op) { + if (isa(op)) +return true; + + llvm::SmallVector effects; + MemoryEffectOpInterface interface = dyn_cast(op); + if (!interface) { +return false; + } + interface.getEffects(effects); + if (effects.empty()) +return true; + + return false; +} + +/// Lowers workshare to a sequence of single-thread regions and parallel loops +/// +/// For example: +/// +/// omp.workshare { +/// %a = fir.allocmem +/// omp.wsloop {} +/// fir.call Assign %b %a +/// fir.freemem %a +/// } +/// +/// becomes +/// +/// omp.single { +/// %a = fir.allocmem +/// fir.store %a %tmp +/// } +/// %a_reloaded = fir.load %tmp +/// omp.wsloop {} +/// omp.single { +/// fir.call Assign %b %a_reloaded +/// fir.freemem %a_reloaded +/// } +/// +/// Note that we allocate temporary memory for values in omp.single's which need +/// to be accessed in all threads in the closest omp.parallel +/// +/// TODO currently we need to be able to access the encompassing omp.parallel so +/// that we can allocate temporaries accessible by all threads outside of it. +/// In case we do not find it, we fall back to converting the omp.workshare to +/// omp.single. +/// To better handle this we should probably enable yielding values out of an +/// omp.single which will be supported by the omp runtime. +void lowerWorkshare(mlir::omp::WorkshareOp wsOp) { + assert(wsOp.getRegion().getBlocks().size() == 1); + + Location loc = wsOp->getLoc(); + + omp::ParallelOp parallelOp = wsOp->getParentOfType(); + if (!parallelOp) { +wsOp.emitWarning("cannot handle workshare, converting to single"); +Operation *terminator = wsOp.getRegion().front().getTerminator(); +wsOp->getBlock()->getOperations().splice( +wsOp->getIterator(), wsOp.getRegion().front().getOperations()); +terminator->erase(); +return; + } + + OpBuilder allocBuilder(parallelOp); + OpBuilder rootBuilder(wsOp); + IRMapping rootMapping; + + omp::SingleOp singleOp = nullptr; + + auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder, + IRMapping singleMapping) { +if (auto reloaded = rootMapping.lookupOrNull(v)) + return; +Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext()); +Type ty = v.getType(); +Value alloc, reloaded; +if (isSupportedByFirAlloca(ty)) { + alloc = allocBuilder.create(loc, ty); + singleBuilder.create(loc, singleMapping.lookup(v), alloc); ivanradanov wrote: I am sorry, this is probably due to my inexperience with flang/fortran, are there any types in flang that get automatically freed on scope exit? Because if I make a shallow copy of an allocatable array, the operation that frees it will be put in a omp.single, thus freeing it only once sincle the free operation would not be `IsSafeToParallelize`. e.g. ``` %a = fir.allocmem use(%a) fir.freemem %a -> omp.single{ %a = fir.allocmem} use(%a) omp.single{ fir.freemem %a } ``` https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446 >From 62057f90e1e6e9e89df1bb666a3676421e2e52ac Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 2 Aug 2024 16:10:25 +0900 Subject: [PATCH 1/9] Add custom omp loop wrapper --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 +++ 1 file changed, 11 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 5199ff50abb95..76f0c472cfdb1 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -308,6 +308,17 @@ def WorkshareOp : OpenMP_Op<"workshare", clauses = [ let hasVerifier = 1; } +def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [ +DeclareOpInterfaceMethods, +RecursiveMemoryEffects, SingleBlock + ], singleRegion = true> { + let summary = "contains loop nests to be parallelized by workshare"; + + let builders = [ +OpBuilder<(ins), [{ build($_builder, $_state, {}); }]> + ]; +} + //===--===// // Loop Nest //===--===// >From d882f2b7413a9ad306334cc69691671b498985fc Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 2 Aug 2024 16:08:58 +0900 Subject: [PATCH 2/9] Add recursive memory effects trait to workshare --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 76f0c472cfdb1..7d1c80333855e 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -290,7 +290,9 @@ def SingleOp : OpenMP_Op<"single", traits = [ // 2.8.3 Workshare Construct //===--===// -def WorkshareOp : OpenMP_Op<"workshare", clauses = [ +def WorkshareOp : OpenMP_Op<"workshare", traits = [ +RecursiveMemoryEffects, + ], clauses = [ OpenMP_NowaitClause, ], singleRegion = true> { let summary = "workshare directive"; >From 14878e80f5bcf8dac5100951de803ce584a33b25 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 3/9] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4..f7bc565ea8cbc 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. F
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From 14878e80f5bcf8dac5100951de803ce584a33b25 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4..f7bc565ea8cbc 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 16f7146a45ee9b31c00d9d54be4859df312dcb1b Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/2] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea078..14e42c6f358e4 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844..0689d6e033dd9 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c2..72a90dd0d6f29 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178..cd07cb741eb4b 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() && "must ha
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -0,0 +1,259 @@ +//===- LowerWorkshare.cpp - special cases for bufferization ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// Lower omp workshare construct. +//===--===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" + +#include + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { +bool shouldUseWorkshareLowering(Operation *op) { + auto workshare = dyn_cast(op->getParentOp()); + if (!workshare) +return false; + return workshare->getParentOfType(); +} +} // namespace flangomp + +namespace { + +struct SingleRegion { + Block::iterator begin, end; +}; + +static bool isSupportedByFirAlloca(Type ty) { + return !isa(ty); +} + +static bool isSafeToParallelize(Operation *op) { + if (isa(op)) +return true; + + llvm::SmallVector effects; + MemoryEffectOpInterface interface = dyn_cast(op); + if (!interface) { +return false; + } + interface.getEffects(effects); + if (effects.empty()) +return true; + + return false; +} + +/// Lowers workshare to a sequence of single-thread regions and parallel loops +/// +/// For example: +/// +/// omp.workshare { +/// %a = fir.allocmem +/// omp.wsloop {} +/// fir.call Assign %b %a +/// fir.freemem %a +/// } +/// +/// becomes +/// +/// omp.single { +/// %a = fir.allocmem +/// fir.store %a %tmp +/// } +/// %a_reloaded = fir.load %tmp +/// omp.wsloop {} +/// omp.single { +/// fir.call Assign %b %a_reloaded +/// fir.freemem %a_reloaded +/// } +/// +/// Note that we allocate temporary memory for values in omp.single's which need +/// to be accessed in all threads in the closest omp.parallel +/// +/// TODO currently we need to be able to access the encompassing omp.parallel so +/// that we can allocate temporaries accessible by all threads outside of it. +/// In case we do not find it, we fall back to converting the omp.workshare to +/// omp.single. +/// To better handle this we should probably enable yielding values out of an +/// omp.single which will be supported by the omp runtime. +void lowerWorkshare(mlir::omp::WorkshareOp wsOp) { + assert(wsOp.getRegion().getBlocks().size() == 1); + + Location loc = wsOp->getLoc(); + + omp::ParallelOp parallelOp = wsOp->getParentOfType(); + if (!parallelOp) { +wsOp.emitWarning("cannot handle workshare, converting to single"); +Operation *terminator = wsOp.getRegion().front().getTerminator(); +wsOp->getBlock()->getOperations().splice( +wsOp->getIterator(), wsOp.getRegion().front().getOperations()); +terminator->erase(); +return; + } + + OpBuilder allocBuilder(parallelOp); + OpBuilder rootBuilder(wsOp); + IRMapping rootMapping; + + omp::SingleOp singleOp = nullptr; + + auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder, + IRMapping singleMapping) { +if (auto reloaded = rootMapping.lookupOrNull(v)) + return; +Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext()); +Type ty = v.getType(); +Value alloc, reloaded; +if (isSupportedByFirAlloca(ty)) { + alloc = allocBuilder.create(loc, ty); + singleBuilder.create(loc, singleMapping.lookup(v), alloc); ivanradanov wrote: I suppose it will be a problem with if we try to do this to a fir.alloca, which goes out of scope, we would need to firstprivate those for the omp.single regions. Thank you for the comment https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 7422985144525359db3d95c58b2b477872c73d54 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/2] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea078..14e42c6f358e4 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844..0689d6e033dd9 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c2..72a90dd0d6f29 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178..cd07cb741eb4b 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() && "must ha
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From 14878e80f5bcf8dac5100951de803ce584a33b25 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4..f7bc565ea8cbc 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs >From e3460a07d658c09a6e0e225ec390983551e23181 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 16:02:37 +0900 Subject: [PATCH 2/2] Fix lower test for workshare --- flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f..8e771952f5b6d 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From b26c03dab1104a13d47b3a5124a31f4a83dbd320 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/2] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea078..14e42c6f358e4 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844..0689d6e033dd9 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c2..72a90dd0d6f29 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178..cd07cb741eb4b 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() && "must ha
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From 97cd498c0819f1a15464d742915534499739a0b4 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4..f7bc565ea8cbc 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs >From 3b8dbba80f93d863a25179750d9fb652cedf66b4 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 16:02:37 +0900 Subject: [PATCH 2/2] Fix lower test for workshare --- flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f..8e771952f5b6d 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From b26c03dab1104a13d47b3a5124a31f4a83dbd320 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/3] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea078..14e42c6f358e4 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844..0689d6e033dd9 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c2..72a90dd0d6f29 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178..cd07cb741eb4b 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() && "must ha
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -0,0 +1,259 @@ +//===- LowerWorkshare.cpp - special cases for bufferization ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// Lower omp workshare construct. +//===--===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" + +#include + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { +bool shouldUseWorkshareLowering(Operation *op) { + auto workshare = dyn_cast(op->getParentOp()); + if (!workshare) +return false; + return workshare->getParentOfType(); +} +} // namespace flangomp + +namespace { + +struct SingleRegion { + Block::iterator begin, end; +}; + +static bool isSupportedByFirAlloca(Type ty) { + return !isa(ty); +} + +static bool isSafeToParallelize(Operation *op) { + if (isa(op)) +return true; + + llvm::SmallVector effects; + MemoryEffectOpInterface interface = dyn_cast(op); + if (!interface) { +return false; + } + interface.getEffects(effects); + if (effects.empty()) +return true; + + return false; +} + +/// Lowers workshare to a sequence of single-thread regions and parallel loops +/// +/// For example: +/// +/// omp.workshare { +/// %a = fir.allocmem +/// omp.wsloop {} +/// fir.call Assign %b %a +/// fir.freemem %a +/// } +/// +/// becomes +/// +/// omp.single { +/// %a = fir.allocmem +/// fir.store %a %tmp +/// } +/// %a_reloaded = fir.load %tmp +/// omp.wsloop {} +/// omp.single { +/// fir.call Assign %b %a_reloaded +/// fir.freemem %a_reloaded +/// } +/// +/// Note that we allocate temporary memory for values in omp.single's which need +/// to be accessed in all threads in the closest omp.parallel +/// +/// TODO currently we need to be able to access the encompassing omp.parallel so +/// that we can allocate temporaries accessible by all threads outside of it. +/// In case we do not find it, we fall back to converting the omp.workshare to +/// omp.single. +/// To better handle this we should probably enable yielding values out of an +/// omp.single which will be supported by the omp runtime. +void lowerWorkshare(mlir::omp::WorkshareOp wsOp) { + assert(wsOp.getRegion().getBlocks().size() == 1); + + Location loc = wsOp->getLoc(); + + omp::ParallelOp parallelOp = wsOp->getParentOfType(); + if (!parallelOp) { +wsOp.emitWarning("cannot handle workshare, converting to single"); +Operation *terminator = wsOp.getRegion().front().getTerminator(); +wsOp->getBlock()->getOperations().splice( +wsOp->getIterator(), wsOp.getRegion().front().getOperations()); +terminator->erase(); +return; + } + + OpBuilder allocBuilder(parallelOp); + OpBuilder rootBuilder(wsOp); + IRMapping rootMapping; + + omp::SingleOp singleOp = nullptr; + + auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder, + IRMapping singleMapping) { +if (auto reloaded = rootMapping.lookupOrNull(v)) + return; +Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext()); +Type ty = v.getType(); +Value alloc, reloaded; +if (isSupportedByFirAlloca(ty)) { + alloc = allocBuilder.create(loc, ty); + singleBuilder.create(loc, singleMapping.lookup(v), alloc); + reloaded = rootBuilder.create(loc, ty, alloc); +} else { + auto one = allocBuilder.create( + loc, allocBuilder.getI32Type(), 1); + alloc = + allocBuilder.create(loc, llvmPtrTy, llvmPtrTy, one); + Value toStore = singleBuilder + .create( + loc, llvmPtrTy, singleMapping.lookup(v)) + .getResult(0); + singleBuilder.create(loc, toStore, alloc); + reloaded = rootBuilder.create(loc, llvmPtrTy, alloc); + reloaded = + rootBuilder.create(loc, ty, reloaded) + .getResult(0); +} +rootMapping.map(v, reloaded); + }; + + auto moveToSingle = [&](SingleRegion sr, OpBuilder singleBuilder) { +IRMapping singleMapping = rootMapping; + +for (Operation &op : llvm::make_range(sr.begin, sr.end)) { + singleBuilder.clone(op, singleMapping); + if (i
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -0,0 +1,259 @@ +//===- LowerWorkshare.cpp - special cases for bufferization ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// Lower omp workshare construct. +//===--===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" + +#include + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { +bool shouldUseWorkshareLowering(Operation *op) { + auto workshare = dyn_cast(op->getParentOp()); + if (!workshare) +return false; + return workshare->getParentOfType(); +} +} // namespace flangomp + +namespace { + +struct SingleRegion { + Block::iterator begin, end; +}; + +static bool isSupportedByFirAlloca(Type ty) { + return !isa(ty); +} + +static bool isSafeToParallelize(Operation *op) { + if (isa(op)) +return true; + + llvm::SmallVector effects; + MemoryEffectOpInterface interface = dyn_cast(op); + if (!interface) { +return false; + } + interface.getEffects(effects); + if (effects.empty()) +return true; + + return false; +} + +/// Lowers workshare to a sequence of single-thread regions and parallel loops +/// +/// For example: +/// +/// omp.workshare { +/// %a = fir.allocmem +/// omp.wsloop {} +/// fir.call Assign %b %a +/// fir.freemem %a +/// } +/// +/// becomes +/// +/// omp.single { +/// %a = fir.allocmem +/// fir.store %a %tmp +/// } +/// %a_reloaded = fir.load %tmp +/// omp.wsloop {} +/// omp.single { +/// fir.call Assign %b %a_reloaded +/// fir.freemem %a_reloaded +/// } +/// +/// Note that we allocate temporary memory for values in omp.single's which need +/// to be accessed in all threads in the closest omp.parallel +/// +/// TODO currently we need to be able to access the encompassing omp.parallel so +/// that we can allocate temporaries accessible by all threads outside of it. +/// In case we do not find it, we fall back to converting the omp.workshare to +/// omp.single. +/// To better handle this we should probably enable yielding values out of an +/// omp.single which will be supported by the omp runtime. +void lowerWorkshare(mlir::omp::WorkshareOp wsOp) { + assert(wsOp.getRegion().getBlocks().size() == 1); + + Location loc = wsOp->getLoc(); + + omp::ParallelOp parallelOp = wsOp->getParentOfType(); + if (!parallelOp) { +wsOp.emitWarning("cannot handle workshare, converting to single"); +Operation *terminator = wsOp.getRegion().front().getTerminator(); +wsOp->getBlock()->getOperations().splice( +wsOp->getIterator(), wsOp.getRegion().front().getOperations()); +terminator->erase(); +return; + } + + OpBuilder allocBuilder(parallelOp); + OpBuilder rootBuilder(wsOp); + IRMapping rootMapping; + + omp::SingleOp singleOp = nullptr; + + auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder, + IRMapping singleMapping) { +if (auto reloaded = rootMapping.lookupOrNull(v)) + return; +Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext()); +Type ty = v.getType(); +Value alloc, reloaded; +if (isSupportedByFirAlloca(ty)) { + alloc = allocBuilder.create(loc, ty); + singleBuilder.create(loc, singleMapping.lookup(v), alloc); + reloaded = rootBuilder.create(loc, ty, alloc); +} else { + auto one = allocBuilder.create( + loc, allocBuilder.getI32Type(), 1); + alloc = + allocBuilder.create(loc, llvmPtrTy, llvmPtrTy, one); + Value toStore = singleBuilder + .create( + loc, llvmPtrTy, singleMapping.lookup(v)) + .getResult(0); + singleBuilder.create(loc, toStore, alloc); + reloaded = rootBuilder.create(loc, llvmPtrTy, alloc); + reloaded = + rootBuilder.create(loc, ty, reloaded) + .getResult(0); +} +rootMapping.map(v, reloaded); + }; + + auto moveToSingle = [&](SingleRegion sr, OpBuilder singleBuilder) { +IRMapping singleMapping = rootMapping; + +for (Operation &op : llvm::make_range(sr.begin, sr.end)) { + singleBuilder.clone(op, singleMapping); + if (i
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -792,7 +793,8 @@ struct ElementalOpConversion // Generate a loop nest looping around the fir.elemental shape and clone // fir.elemental region inside the inner loop. hlfir::LoopNest loopNest = -hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); +hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); ivanradanov wrote: I have opted to only parallelize loops which are marked as unordered for now. https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -0,0 +1,259 @@ +//===- LowerWorkshare.cpp - special cases for bufferization ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// Lower omp workshare construct. +//===--===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" + +#include + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { +bool shouldUseWorkshareLowering(Operation *op) { + auto workshare = dyn_cast(op->getParentOp()); + if (!workshare) +return false; + return workshare->getParentOfType(); +} +} // namespace flangomp + +namespace { + +struct SingleRegion { + Block::iterator begin, end; +}; + +static bool isSupportedByFirAlloca(Type ty) { + return !isa(ty); +} + +static bool isSafeToParallelize(Operation *op) { + if (isa(op)) +return true; + + llvm::SmallVector effects; + MemoryEffectOpInterface interface = dyn_cast(op); + if (!interface) { +return false; + } + interface.getEffects(effects); + if (effects.empty()) +return true; + + return false; +} + +/// Lowers workshare to a sequence of single-thread regions and parallel loops +/// +/// For example: +/// +/// omp.workshare { +/// %a = fir.allocmem +/// omp.wsloop {} +/// fir.call Assign %b %a +/// fir.freemem %a +/// } +/// +/// becomes +/// +/// omp.single { +/// %a = fir.allocmem +/// fir.store %a %tmp +/// } +/// %a_reloaded = fir.load %tmp +/// omp.wsloop {} +/// omp.single { +/// fir.call Assign %b %a_reloaded +/// fir.freemem %a_reloaded +/// } +/// +/// Note that we allocate temporary memory for values in omp.single's which need +/// to be accessed in all threads in the closest omp.parallel +/// +/// TODO currently we need to be able to access the encompassing omp.parallel so +/// that we can allocate temporaries accessible by all threads outside of it. +/// In case we do not find it, we fall back to converting the omp.workshare to +/// omp.single. +/// To better handle this we should probably enable yielding values out of an +/// omp.single which will be supported by the omp runtime. ivanradanov wrote: Done, thank you for the suggestion. https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -0,0 +1,259 @@ +//===- LowerWorkshare.cpp - special cases for bufferization ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// Lower omp workshare construct. +//===--===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" + +#include + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { +bool shouldUseWorkshareLowering(Operation *op) { + auto workshare = dyn_cast(op->getParentOp()); + if (!workshare) +return false; + return workshare->getParentOfType(); +} +} // namespace flangomp + +namespace { + +struct SingleRegion { + Block::iterator begin, end; +}; + +static bool isSupportedByFirAlloca(Type ty) { + return !isa(ty); +} + +static bool isSafeToParallelize(Operation *op) { + if (isa(op)) +return true; + + llvm::SmallVector effects; + MemoryEffectOpInterface interface = dyn_cast(op); + if (!interface) { +return false; + } + interface.getEffects(effects); + if (effects.empty()) +return true; + + return false; +} + +/// Lowers workshare to a sequence of single-thread regions and parallel loops +/// +/// For example: +/// +/// omp.workshare { +/// %a = fir.allocmem +/// omp.wsloop {} +/// fir.call Assign %b %a +/// fir.freemem %a +/// } +/// +/// becomes +/// +/// omp.single { +/// %a = fir.allocmem +/// fir.store %a %tmp +/// } +/// %a_reloaded = fir.load %tmp +/// omp.wsloop {} +/// omp.single { +/// fir.call Assign %b %a_reloaded +/// fir.freemem %a_reloaded +/// } +/// +/// Note that we allocate temporary memory for values in omp.single's which need +/// to be accessed in all threads in the closest omp.parallel +/// +/// TODO currently we need to be able to access the encompassing omp.parallel so +/// that we can allocate temporaries accessible by all threads outside of it. +/// In case we do not find it, we fall back to converting the omp.workshare to +/// omp.single. +/// To better handle this we should probably enable yielding values out of an +/// omp.single which will be supported by the omp runtime. +void lowerWorkshare(mlir::omp::WorkshareOp wsOp) { + assert(wsOp.getRegion().getBlocks().size() == 1); + + Location loc = wsOp->getLoc(); + + omp::ParallelOp parallelOp = wsOp->getParentOfType(); + if (!parallelOp) { +wsOp.emitWarning("cannot handle workshare, converting to single"); +Operation *terminator = wsOp.getRegion().front().getTerminator(); +wsOp->getBlock()->getOperations().splice( +wsOp->getIterator(), wsOp.getRegion().front().getOperations()); +terminator->erase(); +return; + } + + OpBuilder allocBuilder(parallelOp); + OpBuilder rootBuilder(wsOp); + IRMapping rootMapping; + + omp::SingleOp singleOp = nullptr; + + auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder, + IRMapping singleMapping) { +if (auto reloaded = rootMapping.lookupOrNull(v)) + return; +Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext()); +Type ty = v.getType(); +Value alloc, reloaded; +if (isSupportedByFirAlloca(ty)) { + alloc = allocBuilder.create(loc, ty); + singleBuilder.create(loc, singleMapping.lookup(v), alloc); ivanradanov wrote: I have opted hoist fir.alloca ops to the parallel region and scattering them using omp.single's copyprivate https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
ivanradanov wrote: @kiranchandramohan @tblah I think this warrants another look if you have some time. I have reiterated a bit and opted to have a omp loop nest wrapper op which signals to the workshare lowering which specific loops need to be parallelized (i.e. converted to wsloop { loop_nest}). This will allow us to emit this in the frontend if it is needed and be more precise about the exact loops that need to be parallelized. So the LowerWorksharePass that I have implemented here is tasked with parallelizing the loops nested in workshare_loop_wrapper and both the Fortran->mlir frontend and the hlfir lowering passes would be responsible for emitting the workshare_loop_wrapper ops where appropriate. For that I have started with some of the obvious lowerings in the hlfir bufferizations, but perhaps that can be done gradually and not everything needs to be covered by this PR. Let me know what you think. https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From 1ecd832151fab1cd9b977f0e4b960294cfdc2d12 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4..f7bc565ea8cbc 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs >From 38322dc92e37423f528fdba5535feb2f0a1ce113 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 16:02:37 +0900 Subject: [PATCH 2/2] Fix lower test for workshare --- flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f..8e771952f5b6d 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 0287ff3626883d09e364210e83a652a6328835dd Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/3] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea078..14e42c6f358e4 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844..0689d6e033dd9 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c2..72a90dd0d6f29 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178..cd07cb741eb4b 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() && "must ha
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From aea7da1492e9ad122b3b17f07b8f91b7c6eac777 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4..f7bc565ea8cbc 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs >From 9046df2cebff7e06e803a7db6df506dc67c6edce Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 16:02:37 +0900 Subject: [PATCH 2/2] Fix lower test for workshare --- flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f..8e771952f5b6d 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From cca8588b2b2d1c4275b9eaf22c7bd017942f06d9 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/3] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea078..14e42c6f358e4 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844..0689d6e033dd9 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c2..72a90dd0d6f29 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178..cd07cb741eb4b 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() && "must ha
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 2b23c8b52d3b5b680bbcf090fd8c08de6d86fe62 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/3] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea0780..14e42c6f358e46 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844e..0689d6e033dd9c 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c27..72a90dd0d6f29d 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178c..cd07cb741eb4bb 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() &&
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From 0eae392538510f736b630d93b14d46644d042e4f Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4f..f7bc565ea8cbc1 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs >From 10b7a392916a1dd184c434e41b7be0738c811a38 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 16:02:37 +0900 Subject: [PATCH 2/2] Fix lower test for workshare --- flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f0..8e771952f5b6da 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101443 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From 69e85558ec78c7a74be9168b4227bb262f67225e Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4f..f7bc565ea8cbc1 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs >From d17c55249b5d7065721225e62f10fef5e3da4f51 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 16:02:37 +0900 Subject: [PATCH 2/2] Fix lower test for workshare --- flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f0..8e771952f5b6da 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101443 >From e453738aaa1bd74e2462025075ea4ac12868caac Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:09:09 +0900 Subject: [PATCH 1/7] [MLIR][omp] Add omp.workshare op --- .../Dialect/OpenMP/OpenMPClauseOperands.h | 3 +++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 22 +++ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 13 +++ 3 files changed, 38 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index 38e4d8f245e4fa..d14e5e17afbb08 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -17,6 +17,7 @@ #include "mlir/IR/BuiltinAttributes.h" #include "llvm/ADT/SmallVector.h" +#include #include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc" @@ -316,6 +317,8 @@ using TeamsOperands = detail::Clauses; +using WorkshareOperands = detail::Clauses; + using WsloopOperands = detail::Clauses { + let summary = "workshare directive"; + let description = [{ +The workshare construct divides the execution of the enclosed structured +block into separate units of work, and causes the threads of the team to +share the work such that each unit is executed only once by one thread, in +the context of its implicit task + }] # clausesDescription; + + let builders = [ +OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)> + ]; + + let hasVerifier = 1; +} + //===--===// // Loop Nest //===--===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 11780f84697b15..9a189eb2059e01 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1683,6 +1683,19 @@ LogicalResult SingleOp::verify() { getCopyprivateSyms()); } +//===--===// +// WorkshareOp +//===--===// + +void WorkshareOp::build(OpBuilder &builder, OperationState &state, +const WorkshareOperands &clauses) { + WorkshareOp::build(builder, state, clauses.nowait); +} + +LogicalResult WorkshareOp::verify() { + return (*this)->getRegion(0).getBlocks().size() == 1 ? success() : failure(); +} + //===--===// // WsloopOp //===--===// >From 13f5fa2b7628e78b308a3312fd045d87dd89d458 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 2 Aug 2024 16:10:25 +0900 Subject: [PATCH 2/7] Add custom omp loop wrapper --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 +++ 1 file changed, 11 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 5199ff50abb959..76f0c472cfdb14 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -308,6 +308,17 @@ def WorkshareOp : OpenMP_Op<"workshare", clauses = [ let hasVerifier = 1; } +def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [ +DeclareOpInterfaceMethods, +RecursiveMemoryEffects, SingleBlock + ], singleRegion = true> { + let summary = "contains loop nests to be parallelized by workshare"; + + let builders = [ +OpBuilder<(ins), [{ build($_builder, $_state, {}); }]> + ]; +} + //===--===// // Loop Nest //===--===// >From f0915f426f03e3e867953de36e68fd0ccaf5b0ed Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 2 Aug 2024 16:08:58 +0900 Subject: [PATCH 3/7] Add recursive memory effects trait to workshare --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 76f0c472cfdb14..7d1c80333855e7 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -290,7 +290,9 @@ def SingleOp : OpenMP_Op<"single", traits = [ // 2.8.3 Workshare Construct //===--===// -def WorkshareOp : OpenMP_Op<"workshare", clauses = [ +def WorkshareOp : OpenMP_Op<"workshare", traits = [ +RecursiveMemoryEffects, + ], clauses = [ OpenMP_NowaitClause, ], singleRegion = true> { let summar
[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 38c581c8defc81105160a69bb46a9e489b56f10e Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/3] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea0780..14e42c6f358e46 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844e..0689d6e033dd9c 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c27..72a90dd0d6f29d 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178c..cd07cb741eb4bb 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() &&
[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 38c581c8defc81105160a69bb46a9e489b56f10e Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea0780..14e42c6f358e46 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844e..0689d6e033dd9c 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c27..72a90dd0d6f29d 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178c..cd07cb741eb4bb 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() &&
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov created https://github.com/llvm/llvm-project/pull/104748 WIP I will be adding unit tests and I am considering if we should have integrations tests for the entire omp.workshare pipeline. >From 793ae50dd00c4347bea78ca6ecd33783c69de354 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 17:33:52 +0900 Subject: [PATCH 1/5] Add workshare loop wrapper lowerings --- .../lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp | 6 -- .../HLFIR/Transforms/OptimizedBufferization.cpp| 10 +++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index b608677c526310..1848dbe2c7a2c2 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -26,12 +26,13 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/DialectConversion.h" -#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "llvm/ADT/TypeSwitch.h" namespace hlfir { @@ -792,7 +793,8 @@ struct ElementalOpConversion // Generate a loop nest looping around the fir.elemental shape and clone // fir.elemental region inside the inner loop. hlfir::LoopNest loopNest = -hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); +hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); auto insPt = builder.saveInsertionPoint(); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index c4aed6b79df923..150e3e91197241 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -20,6 +20,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/Transforms/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Dominance.h" @@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite( // Generate a loop nest looping around the hlfir.elemental shape and clone // hlfir.elemental region inside the inner loop hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); + hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, loopNest.oneBasedIndices); @@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite( llvm::SmallVector extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto arrayElement = hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices); @@ -648,7 +651,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite( llvm::SmallVector extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto rhsArrayElement = hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices); >From d7ba8a1598f517a5a3c8401d22b81b50114112f1 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Mon, 19 Aug 2024 15:01:31 +0900 Subject: [PATCH 2/5] Bufferize test --- flang/test/HLFIR/bufferize-workshare.fir | 58 1 file changed, 58 insertions(+) create mode 100644 flang/test/HLFIR/bufferize-workshare.fir diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir new file mode 100644 index 00..86a2f031478dd7 --- /dev
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/104748 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)
https://github.com/ivanradanov ready_for_review https://github.com/llvm/llvm-project/pull/101443 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov ready_for_review https://github.com/llvm/llvm-project/pull/101444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)
https://github.com/ivanradanov ready_for_review https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov ready_for_review https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -2,3 +2,4 @@ add_subdirectory(CodeGen) add_subdirectory(Dialect) add_subdirectory(HLFIR) add_subdirectory(Transforms) +add_subdirectory(OpenMP) ivanradanov wrote: PR for this up here https://github.com/llvm/llvm-project/pull/104732 https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 5e470922405b735d63b4aded76450cc52e94e003 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea0780..14e42c6f358e46 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844e..0689d6e033dd9c 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c27..72a90dd0d6f29d 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178c..cd07cb741eb4bb 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() &&
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From 63d49e4dcd128b470ee77006c594673203dd2df2 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4f..f7bc565ea8cbc1 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs >From 621b01775171a4718fa405f201b58c3dca005e5a Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 16:02:37 +0900 Subject: [PATCH 2/2] Fix lower test for workshare --- flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f0..8e771952f5b6da 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101443 >From 604b0293e0574e9d697d4071c2b853a5a27af1e1 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:09:09 +0900 Subject: [PATCH 1/7] [MLIR][omp] Add omp.workshare op --- .../Dialect/OpenMP/OpenMPClauseOperands.h | 3 +++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 22 +++ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 13 +++ 3 files changed, 38 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index 38e4d8f245e4fa..d14e5e17afbb08 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -17,6 +17,7 @@ #include "mlir/IR/BuiltinAttributes.h" #include "llvm/ADT/SmallVector.h" +#include #include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc" @@ -316,6 +317,8 @@ using TeamsOperands = detail::Clauses; +using WorkshareOperands = detail::Clauses; + using WsloopOperands = detail::Clauses { + let summary = "workshare directive"; + let description = [{ +The workshare construct divides the execution of the enclosed structured +block into separate units of work, and causes the threads of the team to +share the work such that each unit is executed only once by one thread, in +the context of its implicit task + }] # clausesDescription; + + let builders = [ +OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)> + ]; + + let hasVerifier = 1; +} + //===--===// // Loop Nest //===--===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 11780f84697b15..9a189eb2059e01 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1683,6 +1683,19 @@ LogicalResult SingleOp::verify() { getCopyprivateSyms()); } +//===--===// +// WorkshareOp +//===--===// + +void WorkshareOp::build(OpBuilder &builder, OperationState &state, +const WorkshareOperands &clauses) { + WorkshareOp::build(builder, state, clauses.nowait); +} + +LogicalResult WorkshareOp::verify() { + return (*this)->getRegion(0).getBlocks().size() == 1 ? success() : failure(); +} + //===--===// // WsloopOp //===--===// >From f2fd4f278c23ec99dae3ac44e1c05fcb629f707d Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 2 Aug 2024 16:10:25 +0900 Subject: [PATCH 2/7] Add custom omp loop wrapper --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 +++ 1 file changed, 11 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 5199ff50abb959..76f0c472cfdb14 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -308,6 +308,17 @@ def WorkshareOp : OpenMP_Op<"workshare", clauses = [ let hasVerifier = 1; } +def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [ +DeclareOpInterfaceMethods, +RecursiveMemoryEffects, SingleBlock + ], singleRegion = true> { + let summary = "contains loop nests to be parallelized by workshare"; + + let builders = [ +OpBuilder<(ins), [{ build($_builder, $_state, {}); }]> + ]; +} + //===--===// // Loop Nest //===--===// >From 22c66e6db3997e38254d9848661a38627cd7bb19 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 2 Aug 2024 16:08:58 +0900 Subject: [PATCH 3/7] Add recursive memory effects trait to workshare --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 76f0c472cfdb14..7d1c80333855e7 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -290,7 +290,9 @@ def SingleOp : OpenMP_Op<"single", traits = [ // 2.8.3 Workshare Construct //===--===// -def WorkshareOp : OpenMP_Op<"workshare", clauses = [ +def WorkshareOp : OpenMP_Op<"workshare", traits = [ +RecursiveMemoryEffects, + ], clauses = [ OpenMP_NowaitClause, ], singleRegion = true> { let summar
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -344,6 +345,7 @@ inline void createHLFIRToFIRPassPipeline( pm.addPass(hlfir::createLowerHLFIRIntrinsics()); pm.addPass(hlfir::createBufferizeHLFIR()); pm.addPass(hlfir::createConvertHLFIRtoFIR()); + pm.addPass(flangomp::createLowerWorkshare()); ivanradanov wrote: I opted to keep the rest of the openmp passes as they are and have added a bool argument to control whether to run the lower-workshare pass https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748 >From bf0e09f9cfc3159517b1ebec9d39e1143fa935b9 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Tue, 20 Aug 2024 09:28:15 +0900 Subject: [PATCH 1/7] Iterate backwards to find all trivially dead ops --- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 3 +- .../Transforms/OpenMP/lower-workshare4.mlir | 56 ++- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp index 9557dd200cacee..bfb9708af70923 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -200,7 +200,8 @@ static bool isTransitivelyUsedOutside(Value v, SingleRegion sr) { /// We clone pure operations in both the parallel and single blocks. this /// functions cleans them up if they end up with no uses static void cleanupBlock(Block *block) { - for (Operation &op : llvm::make_early_inc_range(*block)) + for (Operation &op : llvm::make_early_inc_range( + llvm::make_range(block->rbegin(), block->rend( if (isOpTriviallyDead(&op)) op.erase(); } diff --git a/flang/test/Transforms/OpenMP/lower-workshare4.mlir b/flang/test/Transforms/OpenMP/lower-workshare4.mlir index 44f68cd2ca3654..81bc20cb34b65d 100644 --- a/flang/test/Transforms/OpenMP/lower-workshare4.mlir +++ b/flang/test/Transforms/OpenMP/lower-workshare4.mlir @@ -1,8 +1,33 @@ // RUN: fir-opt --split-input-file --lower-workshare --allow-unregistered-dialect %s | FileCheck %s -// Check that we cleanup unused pure operations from either the parallel or -// single regions +// Check that we cleanup unused pure operations from the parallel and single +// regions +// CHECK-LABEL: func.func @wsfunc() { +// CHECK: %[[VAL_0:.*]] = fir.alloca i32 +// CHECK: omp.parallel { +// CHECK: omp.single { +// CHECK: %[[VAL_1:.*]] = "test.test1"() : () -> i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_4:.*]] = arith.addi %[[VAL_2]], %[[VAL_3]] : index +// CHECK: "test.test3"(%[[VAL_4]]) : (index) -> () +// CHECK: omp.terminator +// CHECK: } +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_6:.*]] = arith.constant 42 : index +// CHECK: omp.wsloop nowait { +// CHECK: omp.loop_nest (%[[VAL_7:.*]]) : index = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_5]]) { +// CHECK: "test.test2"() : () -> () +// CHECK: omp.yield +// CHECK: } +// CHECK: omp.terminator +// CHECK: } +// CHECK: omp.barrier +// CHECK: omp.terminator +// CHECK: } +// CHECK: return +// CHECK: } func.func @wsfunc() { %a = fir.alloca i32 omp.parallel { @@ -13,7 +38,9 @@ func.func @wsfunc() { %c42 = arith.constant 42 : index %c2 = arith.constant 2 : index - "test.test3"(%c2) : (index) -> () + %c3 = arith.constant 3 : index + %add = arith.addi %c2, %c3 : index + "test.test3"(%add) : (index) -> () omp.workshare_loop_wrapper { omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) { @@ -29,27 +56,4 @@ func.func @wsfunc() { return } -// CHECK-LABEL: func.func @wsfunc() { -// CHECK: %[[VAL_0:.*]] = fir.alloca i32 -// CHECK: omp.parallel { -// CHECK: omp.single { -// CHECK: %[[VAL_1:.*]] = "test.test1"() : () -> i32 -// CHECK: %[[VAL_2:.*]] = arith.constant 2 : index -// CHECK: "test.test3"(%[[VAL_2]]) : (index) -> () -// CHECK: omp.terminator -// CHECK: } -// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_4:.*]] = arith.constant 42 : index -// CHECK: omp.wsloop nowait { -// CHECK: omp.loop_nest (%[[VAL_5:.*]]) : index = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_3]]) { -// CHECK: "test.test2"() : () -> () -// CHECK: omp.yield -// CHECK: } -// CHECK: omp.terminator -// CHECK: } -// CHECK: omp.barrier -// CHECK: omp.terminator -// CHECK: } -// CHECK: return -// CHECK: } >From 90cd77c1c7ffdadfe4f088b358c0ee9ee1958872 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Tue, 20 Aug 2024 12:17:45 +0900 Subject: [PATCH 2/7] Add expalanation comment for createCopyFun --- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cp
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748 >From a45ef32ecf6483bdb65954c4283ea493494cea77 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Tue, 20 Aug 2024 16:57:25 +0900 Subject: [PATCH 1/6] Update test --- .../Transforms/OpenMP/lower-workshare.mlir| 42 +++ 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/flang/test/Transforms/OpenMP/lower-workshare.mlir b/flang/test/Transforms/OpenMP/lower-workshare.mlir index 9347863dc4a609..c189e54aaeb0d4 100644 --- a/flang/test/Transforms/OpenMP/lower-workshare.mlir +++ b/flang/test/Transforms/OpenMP/lower-workshare.mlir @@ -103,28 +103,23 @@ func.func @wsfunc(%arg0: !fir.ref>) { // CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_9]]) {uniq_name = "array"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) // CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_1]] : !fir.ref>> // CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]](%[[VAL_9]]) {uniq_name = ".tmp.array"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) -// CHECK: %[[VAL_13:.*]] = arith.constant true -// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_13:.*]] = arith.constant 1 : index // CHECK: omp.wsloop { -// CHECK: omp.loop_nest (%[[VAL_15:.*]]) : index = (%[[VAL_14]]) to (%[[VAL_7]]) inclusive step (%[[VAL_14]]) { -// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_15]]) : (!fir.ref>, index) -> !fir.ref -// CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref -// CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_17]], %[[VAL_8]] : i32 -// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_15]]) : (!fir.heap>, index) -> !fir.ref -// CHECK: hlfir.assign %[[VAL_18]] to %[[VAL_19]] temporary_lhs : i32, !fir.ref +// CHECK: omp.loop_nest (%[[VAL_14:.*]]) : index = (%[[VAL_13]]) to (%[[VAL_7]]) inclusive step (%[[VAL_13]]) { +// CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_14]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref +// CHECK: %[[VAL_17:.*]] = arith.subi %[[VAL_16]], %[[VAL_8]] : i32 +// CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_14]]) : (!fir.heap>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_18]] temporary_lhs : i32, !fir.ref // CHECK: omp.yield // CHECK: } // CHECK: omp.terminator // CHECK: } // CHECK: omp.single nowait { -// CHECK: %[[VAL_20:.*]] = fir.undefined tuple>, i1> -// CHECK: %[[VAL_21:.*]] = fir.insert_value %[[VAL_20]], %[[VAL_13]], [1 : index] : (tuple>, i1>, i1) -> tuple>, i1> // CHECK: hlfir.assign %[[VAL_12]]#0 to %[[VAL_10]]#0 : !fir.heap>, !fir.ref> // CHECK: fir.freemem %[[VAL_12]]#0 : !fir.heap> // CHECK: omp.terminator // CHECK: } -// CHECK: %[[VAL_22:.*]] = fir.undefined tuple>, i1> -// CHECK: %[[VAL_23:.*]] = fir.insert_value %[[VAL_22]], %[[VAL_13]], [1 : index] : (tuple>, i1>, i1) -> tuple>, i1> // CHECK: omp.barrier // CHECK: omp.terminator // CHECK: } @@ -168,31 +163,26 @@ func.func @wsfunc(%arg0: !fir.ref>) { // CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_11]]) {uniq_name = "array"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) // CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_2]] : !fir.ref>> // CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]](%[[VAL_11]]) {uniq_name = ".tmp.array"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) -// CHECK: %[[VAL_15:.*]] = arith.constant true -// CHECK: %[[VAL_16:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_15:.*]] = arith.constant 1 : index // CHECK: omp.wsloop { -// CHECK: omp.loop_nest (%[[VAL_17:.*]]) : index = (%[[VAL_16]]) to (%[[VAL_10]]) inclusive step (%[[VAL_16]]) { -// CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_17]]) : (!fir.ref>, index) -> !fir.ref -// CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref -// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_1]] : !fir.ref -// CHECK: %[[VAL_21:.*]] = arith.subi %[[VAL_19]], %[[VAL_20]] : i32 -// CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_9]] : i32 -// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_14]]#0 (%[[VAL_17]]) : (!fir.heap>, index) -> !fir.ref -// CHECK: hlfir.assign %[[VAL_22]] to %[[VAL_23]] temporary_lhs : i32, !fir.ref +// CHECK: omp.loop_n
[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)
https://github.com/ivanradanov closed https://github.com/llvm/llvm-project/pull/101443 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)
https://github.com/ivanradanov reopened https://github.com/llvm/llvm-project/pull/101443 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748 >From 4b1c15bf4dcd753e35ec5c1118b107ea058c58df Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 17:33:52 +0900 Subject: [PATCH 1/5] Add workshare loop wrapper lowerings --- .../lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp | 6 -- .../HLFIR/Transforms/OptimizedBufferization.cpp| 10 +++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index b608677c526310..1848dbe2c7a2c2 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -26,12 +26,13 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/DialectConversion.h" -#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "llvm/ADT/TypeSwitch.h" namespace hlfir { @@ -792,7 +793,8 @@ struct ElementalOpConversion // Generate a loop nest looping around the fir.elemental shape and clone // fir.elemental region inside the inner loop. hlfir::LoopNest loopNest = -hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); +hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); auto insPt = builder.saveInsertionPoint(); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index 3a0a98dc594463..f014724861e333 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -20,6 +20,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/Transforms/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Dominance.h" @@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite( // Generate a loop nest looping around the hlfir.elemental shape and clone // hlfir.elemental region inside the inner loop hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); + hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, loopNest.oneBasedIndices); @@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite( llvm::SmallVector extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto arrayElement = hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices); @@ -648,7 +651,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite( llvm::SmallVector extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto rhsArrayElement = hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices); >From a79d7c8cee84295ef7281b0b6aabf2ea5ed50b9e Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Mon, 19 Aug 2024 15:01:31 +0900 Subject: [PATCH 2/5] Bufferize test --- flang/test/HLFIR/bufferize-workshare.fir | 58 1 file changed, 58 insertions(+) create mode 100644 flang/test/HLFIR/bufferize-workshare.fir diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir new file mode 100644 index 00..86a2f031478dd7 --- /dev/null +++ b/flang/test/HLFIR/bufferize-workshare.fir @@ -0,0 +1,58 @@ +// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s + +// CH
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From 3d1258582adc0ec506a23dc3efdba371c29612ca Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d614db8b68ef65..83c90374afa5e3 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; default: // Combined and composite constructs should have been split into a sequence >From 5e01e41362f11f2309dea217ada9026aa437433d Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 16:02:37 +0900 Subject: [PATCH 2/2] Fix lower test for workshare --- flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f0..8e771952f5b6da 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 451a9d2f26cfd8cb770d1ae35d834c63fce56b79 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea0780..14e42c6f358e46 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844e..0689d6e033dd9c 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c27..72a90dd0d6f29d 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178c..cd07cb741eb4bb 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() &&
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446 error: too big or took too long to generate ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From e5789180a3dd1fd8c46a5d7dfc446921110642ca Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d614db8b68ef65..83c90374afa5e3 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; default: // Combined and composite constructs should have been split into a sequence >From 70daa016c0c39861926b1b82e31b96db005cfba1 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 16:02:37 +0900 Subject: [PATCH 2/2] Fix lower test for workshare --- flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f0..8e771952f5b6da 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 81606df746e9862c330681ed8ae9113a43e577a2 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea0780..14e42c6f358e46 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844e..0689d6e033dd9c 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c27..72a90dd0d6f29d 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178c..cd07cb741eb4bb 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() &&
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748 >From d343f3a86f56864757ccdf889fd6897d9d9507e9 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:09:09 +0900 Subject: [PATCH 1/6] [MLIR][omp] Add omp.workshare op Add custom omp loop wrapper Add recursive memory effects trait to workshare Remove stray include Remove omp.workshare verifier Add assembly format for wrapper and add test Add verification and descriptions --- .../Dialect/OpenMP/OpenMPClauseOperands.h | 2 + mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 43 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 23 +++ mlir/test/Dialect/OpenMP/invalid.mlir | 42 +++ mlir/test/Dialect/OpenMP/ops.mlir | 69 +++ 5 files changed, 179 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index 38e4d8f245e4fa..896ca9581c3fc8 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -316,6 +316,8 @@ using TeamsOperands = detail::Clauses; +using WorkshareOperands = detail::Clauses; + using WsloopOperands = detail::Clauses { + let summary = "workshare directive"; + let description = [{ +The workshare construct divides the execution of the enclosed structured +block into separate units of work, and causes the threads of the team to +share the work such that each unit is executed only once by one thread, in +the context of its implicit task + +This operation is used for the intermediate representation of the workshare +block before the work gets divided between the threads. See the flang +LowerWorkshare pass for details. + }] # clausesDescription; + + let builders = [ +OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)> + ]; +} + +def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [ +DeclareOpInterfaceMethods, +RecursiveMemoryEffects, SingleBlock + ], singleRegion = true> { + let summary = "contains loop nests to be parallelized by workshare"; + let description = [{ +This operation wraps a loop nest that is marked for dividing into units of +work by an encompassing omp.workshare operation. + }]; + + let builders = [ +OpBuilder<(ins), [{ build($_builder, $_state, {}); }]> + ]; + let assemblyFormat = "$region attr-dict"; + let hasVerifier = 1; +} + //===--===// // Loop Nest //===--===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 4c943ebbe3144f..f4acbd97ca6d1a 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1689,6 +1689,29 @@ LogicalResult SingleOp::verify() { getCopyprivateSyms()); } +//===--===// +// WorkshareOp +//===--===// + +void WorkshareOp::build(OpBuilder &builder, OperationState &state, +const WorkshareOperands &clauses) { + WorkshareOp::build(builder, state, clauses.nowait); +} + +//===--===// +// WorkshareLoopWrapperOp +//===--===// + +LogicalResult WorkshareLoopWrapperOp::verify() { + if (!isWrapper()) +return emitOpError() << "must be a loop wrapper"; + if (getNestedWrapper()) +return emitError() << "nested wrappers not supported"; + if (!(*this)->getParentOfType()) +return emitError() << "must be nested in an omp.workshare"; + return success(); +} + //===--===// // WsloopOp //===--===// diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index c76b07ec94a597..c330f4c8d0f76a 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -2545,3 +2545,45 @@ func.func @omp_taskloop_invalid_composite(%lb: index, %ub: index, %step: index) } {omp.composite} return } + +// - +func.func @nested_wrapper(%idx : index) { + omp.workshare { +// expected-error @below {{nested wrappers not supported}} +omp.workshare.loop_wrapper { + omp.simd { +omp.loop_nest (%iv) : index = (%idx) to (%idx) step (%idx) { + omp.yield +} +omp.terminator + } + omp.terminator +} +omp.terminator + } + return +} + +// - +func.func @not_wrapper() { + omp.workshare { +//
[llvm-branch-commits] [flang] [mlir] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446 >From d343f3a86f56864757ccdf889fd6897d9d9507e9 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:09:09 +0900 Subject: [PATCH 1/5] [MLIR][omp] Add omp.workshare op Add custom omp loop wrapper Add recursive memory effects trait to workshare Remove stray include Remove omp.workshare verifier Add assembly format for wrapper and add test Add verification and descriptions --- .../Dialect/OpenMP/OpenMPClauseOperands.h | 2 + mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 43 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 23 +++ mlir/test/Dialect/OpenMP/invalid.mlir | 42 +++ mlir/test/Dialect/OpenMP/ops.mlir | 69 +++ 5 files changed, 179 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index 38e4d8f245e4fa..896ca9581c3fc8 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -316,6 +316,8 @@ using TeamsOperands = detail::Clauses; +using WorkshareOperands = detail::Clauses; + using WsloopOperands = detail::Clauses { + let summary = "workshare directive"; + let description = [{ +The workshare construct divides the execution of the enclosed structured +block into separate units of work, and causes the threads of the team to +share the work such that each unit is executed only once by one thread, in +the context of its implicit task + +This operation is used for the intermediate representation of the workshare +block before the work gets divided between the threads. See the flang +LowerWorkshare pass for details. + }] # clausesDescription; + + let builders = [ +OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)> + ]; +} + +def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [ +DeclareOpInterfaceMethods, +RecursiveMemoryEffects, SingleBlock + ], singleRegion = true> { + let summary = "contains loop nests to be parallelized by workshare"; + let description = [{ +This operation wraps a loop nest that is marked for dividing into units of +work by an encompassing omp.workshare operation. + }]; + + let builders = [ +OpBuilder<(ins), [{ build($_builder, $_state, {}); }]> + ]; + let assemblyFormat = "$region attr-dict"; + let hasVerifier = 1; +} + //===--===// // Loop Nest //===--===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 4c943ebbe3144f..f4acbd97ca6d1a 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1689,6 +1689,29 @@ LogicalResult SingleOp::verify() { getCopyprivateSyms()); } +//===--===// +// WorkshareOp +//===--===// + +void WorkshareOp::build(OpBuilder &builder, OperationState &state, +const WorkshareOperands &clauses) { + WorkshareOp::build(builder, state, clauses.nowait); +} + +//===--===// +// WorkshareLoopWrapperOp +//===--===// + +LogicalResult WorkshareLoopWrapperOp::verify() { + if (!isWrapper()) +return emitOpError() << "must be a loop wrapper"; + if (getNestedWrapper()) +return emitError() << "nested wrappers not supported"; + if (!(*this)->getParentOfType()) +return emitError() << "must be nested in an omp.workshare"; + return success(); +} + //===--===// // WsloopOp //===--===// diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index c76b07ec94a597..c330f4c8d0f76a 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -2545,3 +2545,45 @@ func.func @omp_taskloop_invalid_composite(%lb: index, %ub: index, %step: index) } {omp.composite} return } + +// - +func.func @nested_wrapper(%idx : index) { + omp.workshare { +// expected-error @below {{nested wrappers not supported}} +omp.workshare.loop_wrapper { + omp.simd { +omp.loop_nest (%iv) : index = (%idx) to (%idx) step (%idx) { + omp.yield +} +omp.terminator + } + omp.terminator +} +omp.terminator + } + return +} + +// - +func.func @not_wrapper() { + omp.workshare { +//
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From d4310a06639c6cd1565aac2d2bbfebffcf9e175d Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend Fix lower test for workshare --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 +++ flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d614db8b68ef65..83c90374afa5e3 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; default: // Combined and composite constructs should have been split into a sequence diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f0..8e771952f5b6da 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare >From fb06794ba6259fc3bcc3b9c73108a03e77a0b42d Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Thu, 22 Aug 2024 17:01:43 +0900 Subject: [PATCH 2/2] Fix function signature --- flang/lib/Lower/OpenMP/OpenMP.cpp | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 83c90374afa5e3..086dd7aaeaab88 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1908,12 +1908,14 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, static mlir::omp::WorkshareOp genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From c3ec4f1bd4a51139a10b1450e3e194a9270b7362 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH] [flang] Introduce ws loop nest generation for HLFIR lowering Emit loop nests in a custom wrapper Only emit unordered loops as omp loops Fix uninitialized memory bug in genLoopNest --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea0780..f073f494b3fb21 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp = nullptr; + mlir::Block *body = nullptr; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWorkshareLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWorkshareLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWorkshareLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844e..0689d6e033dd9c 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c27..72a90dd0d6f29d 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178c..31378841ed 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + m
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748 >From 45a5069b5b783e0e1cd5fa0ba8f8098a980eb94e Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Thu, 22 Aug 2024 18:05:31 +0900 Subject: [PATCH 1/6] wrong replace --- mlir/test/Dialect/OpenMP/ops.mlir | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 0e1f5ebb1a3739..4c6843bf89ad2f 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -2845,8 +2845,8 @@ func.func @omp_workshare_multiple_blocks() { return } -// CHECK-LABEL: func @omp_workshare.loop_wrapper -func.func @omp_workshare.loop_wrapper(%idx : index) { +// CHECK-LABEL: func @omp_workshare_loop_wrapper +func.func @omp_workshare_loop_wrapper(%idx : index) { // CHECK-NEXT: omp.workshare { omp.workshare { // CHECK-NEXT: omp.workshare.loop_wrapper @@ -2862,8 +2862,8 @@ func.func @omp_workshare.loop_wrapper(%idx : index) { return } -// CHECK-LABEL: func @omp_workshare.loop_wrapper_attrs -func.func @omp_workshare.loop_wrapper_attrs(%idx : index) { +// CHECK-LABEL: func @omp_workshare_loop_wrapper_attrs +func.func @omp_workshare_loop_wrapper_attrs(%idx : index) { // CHECK-NEXT: omp.workshare { omp.workshare { // CHECK-NEXT: omp.workshare.loop_wrapper { >From 0984407c261496c9dc53fdd2d4d5c2431dd90359 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 2/6] [flang][omp] Emit omp.workshare in frontend Fix lower test for workshare --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 +++ flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d614db8b68ef65..83c90374afa5e3 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; default: // Combined and composite constructs should have been split into a sequence diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f0..8e771952f5b6da 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutin
[llvm-branch-commits] [flang] [mlir] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446 >From 45a5069b5b783e0e1cd5fa0ba8f8098a980eb94e Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Thu, 22 Aug 2024 18:05:31 +0900 Subject: [PATCH 1/5] wrong replace --- mlir/test/Dialect/OpenMP/ops.mlir | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 0e1f5ebb1a3739..4c6843bf89ad2f 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -2845,8 +2845,8 @@ func.func @omp_workshare_multiple_blocks() { return } -// CHECK-LABEL: func @omp_workshare.loop_wrapper -func.func @omp_workshare.loop_wrapper(%idx : index) { +// CHECK-LABEL: func @omp_workshare_loop_wrapper +func.func @omp_workshare_loop_wrapper(%idx : index) { // CHECK-NEXT: omp.workshare { omp.workshare { // CHECK-NEXT: omp.workshare.loop_wrapper @@ -2862,8 +2862,8 @@ func.func @omp_workshare.loop_wrapper(%idx : index) { return } -// CHECK-LABEL: func @omp_workshare.loop_wrapper_attrs -func.func @omp_workshare.loop_wrapper_attrs(%idx : index) { +// CHECK-LABEL: func @omp_workshare_loop_wrapper_attrs +func.func @omp_workshare_loop_wrapper_attrs(%idx : index) { // CHECK-NEXT: omp.workshare { omp.workshare { // CHECK-NEXT: omp.workshare.loop_wrapper { >From 0984407c261496c9dc53fdd2d4d5c2431dd90359 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 2/5] [flang][omp] Emit omp.workshare in frontend Fix lower test for workshare --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 +++ flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d614db8b68ef65..83c90374afa5e3 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; default: // Combined and composite constructs should have been split into a sequence diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f0..8e771952f5b6da 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutin
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From d5fbe9c7482b87be295be03aafd5917dd7c17859 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Thu, 22 Aug 2024 18:07:05 +0900 Subject: [PATCH] [flang] Introduce ws loop nest generation for HLFIR lowering Emit loop nests in a custom wrapper Only emit unordered loops as omp loops Fix uninitialized memory bug in genLoopNest --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 33 ++-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 43 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea0780..f073f494b3fb21 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp = nullptr; + mlir::Block *body = nullptr; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWorkshareLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWorkshareLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWorkshareLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844e..0689d6e033dd9c 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c27..72a90dd0d6f29d 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178c..31378841ed 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { +
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From 0984407c261496c9dc53fdd2d4d5c2431dd90359 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend Fix lower test for workshare --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 +++ flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d614db8b68ef65..83c90374afa5e3 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; default: // Combined and composite constructs should have been split into a sequence diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f0..8e771952f5b6da 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare >From de32599eae894520fa383537f3cedacab14e6a87 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Thu, 22 Aug 2024 17:01:43 +0900 Subject: [PATCH 2/2] Fix function signature --- flang/lib/Lower/OpenMP/OpenMP.cpp | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 83c90374afa5e3..086dd7aaeaab88 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1908,12 +1908,14 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, static mlir::omp::WorkshareOp genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
ivanradanov wrote: @skatrak I considered that as well, however, I did not like it because the semantics of the block that the operation is contained in is different. wsloop expects its parent block to be a parallel block which all threads will execute and all of those threads will share the work of the nested loop nest. Whereas the workshare.loop_nest op is semantically executed by a single-thread (because the workshare directive acts like it preserves the semantics of single-threaded fortran execution.). I don't think it is currently a problem, however, if in the future someone adds some optimization or transformation that assumes that it is nested in a parallel it may break wsloops which happen to be nested in a workshare instead. (As you said it is possible to check whether it is supposed to be a workshare or parallel wsloop but an op changing its semantics or transformations dpeending on what it's nested in feels more error prone). So that is why I opted for this approach. https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -0,0 +1,446 @@ +//===- LowerWorkshare.cpp - special cases for bufferization ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file implements the lowering of omp.workshare to other omp constructs. +// +// This pass is tasked with parallelizing the loops nested in +// workshare.loop_wrapper while both the Fortran to mlir lowering and the hlfir +// to fir lowering pipelines are responsible for emitting the +// workshare.loop_wrapper ops where appropriate according to the +// `shouldUseWorkshareLowering` function. +// +//===--===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { + +// Checks for nesting pattern below as we need to avoid sharing the work of +// statements which are nested in some constructs such as omp.critical or +// another omp.parallel. +// +// omp.workshare { // `wsOp` +// ... +// omp.T { // `parent` +// ... +// `op` +// +template +static bool isNestedIn(omp::WorkshareOp wsOp, Operation *op) { + T parent = op->getParentOfType(); + if (!parent) +return false; + return wsOp->isProperAncestor(parent); +} + +bool shouldUseWorkshareLowering(Operation *op) { + auto parentWorkshare = op->getParentOfType(); + + if (!parentWorkshare) +return false; + + if (isNestedIn(parentWorkshare, op)) +return false; + + // 2.8.3 workshare Construct + // For a parallel construct, the construct is a unit of work with respect to + // the workshare construct. The statements contained in the parallel construct + // are executed by a new thread team. + if (isNestedIn(parentWorkshare, op)) +return false; + + // 2.8.2 single Construct + // Binding The binding thread set for a single region is the current team. A + // single region binds to the innermost enclosing parallel region. + // Description Only one of the encountering threads will execute the + // structured block associated with the single construct. + if (isNestedIn(parentWorkshare, op)) +return false; + + return true; +} + +} // namespace flangomp + +namespace { + +struct SingleRegion { + Block::iterator begin, end; +}; + +static bool mustParallelizeOp(Operation *op) { + return op + ->walk([&](Operation *nested) { +// We need to be careful not to pick up workshare.loop_wrapper in nested +// omp.parallel{omp.workshare} regions, i.e. make sure that `nested` +// binds to the workshare region we are currently handling. +// +// For example: +// +// omp.parallel { +// omp.workshare { // currently handling this +// omp.parallel { +// omp.workshare { // nested workshare +// omp.workshare.loop_wrapper {} +// +// Therefore, we skip if we encounter a nested omp.workshare. +if (isa(op)) + return WalkResult::skip(); +if (isa(op)) + return WalkResult::interrupt(); +return WalkResult::advance(); + }) + .wasInterrupted(); +} + +static bool isSafeToParallelize(Operation *op) { + return isa(op) || isa(op) || + isMemoryEffectFree(op); +} + +/// Simple shallow copies suffice for our purposes in this pass, so we implement +/// this simpler alternative to the full fledged `createCopyFunc` in the +/// frontend +static mlir::func::FuncOp createCopyFunc(mlir::Location loc, mlir::Type varType, + fir::FirOpBuilder builder) { + mlir::ModuleOp module = builder.getModule(); + auto rt = cast(varType); + mlir::Type eleTy = rt.getEleTy(); + std::string copyFuncName = + fir::getTypeAsString(eleTy, builder.getKindMap(), "_workshare_copy"); + + if (auto decl = module.lookupSymbol(copyFuncName)) +return decl; + // create function + mlir::OpBuilder::InsertionGuard guard(builder); + mlir::OpBuilder modBuilder(module.getBodyRegion()); + llvm::SmallVector argsTy = {varType, varType}; + auto funcType = mlir::FunctionType::get(builder.getContext(), argsTy, {}); + mlir::func::FuncOp funcOp = + modBuilder.create(loc, copyFuncName, funcType); + funcOp.setVisibility(mlir::SymbolTable::Visibility::Private); + builder.createBlock(&funcOp.getRegion(), funcOp.getRegion().end(), argsTy, +
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -0,0 +1,446 @@ +//===- LowerWorkshare.cpp - special cases for bufferization ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file implements the lowering of omp.workshare to other omp constructs. +// +// This pass is tasked with parallelizing the loops nested in +// workshare.loop_wrapper while both the Fortran to mlir lowering and the hlfir +// to fir lowering pipelines are responsible for emitting the +// workshare.loop_wrapper ops where appropriate according to the +// `shouldUseWorkshareLowering` function. +// +//===--===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { + +// Checks for nesting pattern below as we need to avoid sharing the work of +// statements which are nested in some constructs such as omp.critical or +// another omp.parallel. +// +// omp.workshare { // `wsOp` +// ... +// omp.T { // `parent` +// ... +// `op` +// +template +static bool isNestedIn(omp::WorkshareOp wsOp, Operation *op) { + T parent = op->getParentOfType(); + if (!parent) +return false; + return wsOp->isProperAncestor(parent); +} + +bool shouldUseWorkshareLowering(Operation *op) { + auto parentWorkshare = op->getParentOfType(); + + if (!parentWorkshare) +return false; + + if (isNestedIn(parentWorkshare, op)) +return false; + + // 2.8.3 workshare Construct + // For a parallel construct, the construct is a unit of work with respect to + // the workshare construct. The statements contained in the parallel construct + // are executed by a new thread team. + if (isNestedIn(parentWorkshare, op)) +return false; + + // 2.8.2 single Construct + // Binding The binding thread set for a single region is the current team. A + // single region binds to the innermost enclosing parallel region. + // Description Only one of the encountering threads will execute the + // structured block associated with the single construct. + if (isNestedIn(parentWorkshare, op)) +return false; + + return true; +} + +} // namespace flangomp + +namespace { + +struct SingleRegion { + Block::iterator begin, end; +}; + +static bool mustParallelizeOp(Operation *op) { + return op + ->walk([&](Operation *nested) { +// We need to be careful not to pick up workshare.loop_wrapper in nested +// omp.parallel{omp.workshare} regions, i.e. make sure that `nested` +// binds to the workshare region we are currently handling. +// +// For example: +// +// omp.parallel { +// omp.workshare { // currently handling this +// omp.parallel { +// omp.workshare { // nested workshare +// omp.workshare.loop_wrapper {} +// +// Therefore, we skip if we encounter a nested omp.workshare. +if (isa(op)) + return WalkResult::skip(); +if (isa(op)) + return WalkResult::interrupt(); +return WalkResult::advance(); + }) + .wasInterrupted(); +} + +static bool isSafeToParallelize(Operation *op) { + return isa(op) || isa(op) || + isMemoryEffectFree(op); +} + +/// Simple shallow copies suffice for our purposes in this pass, so we implement +/// this simpler alternative to the full fledged `createCopyFunc` in the +/// frontend +static mlir::func::FuncOp createCopyFunc(mlir::Location loc, mlir::Type varType, + fir::FirOpBuilder builder) { + mlir::ModuleOp module = builder.getModule(); + auto rt = cast(varType); + mlir::Type eleTy = rt.getEleTy(); + std::string copyFuncName = + fir::getTypeAsString(eleTy, builder.getKindMap(), "_workshare_copy"); + + if (auto decl = module.lookupSymbol(copyFuncName)) +return decl; + // create function + mlir::OpBuilder::InsertionGuard guard(builder); + mlir::OpBuilder modBuilder(module.getBodyRegion()); + llvm::SmallVector argsTy = {varType, varType}; + auto funcType = mlir::FunctionType::get(builder.getContext(), argsTy, {}); + mlir::func::FuncOp funcOp = + modBuilder.create(loc, copyFuncName, funcType); + funcOp.setVisibility(mlir::SymbolTable::Visibility::Private); + builder.createBlock(&funcOp.getRegion(), funcOp.getRegion().end(), argsTy, +
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
ivanradanov wrote: > Maybe support for this operation could be just based on changes to how the > MLIR representation is built in the first place, what do you think? This is partly what this implementation aims to do. In fact, after the pass that lowers the omp.workshare operation we are left with IR very close to the one you showed in your example. The approach taken here is similar to the omp.workdistribute implementation, in that the purpose of the omp.workshare and omp.workshare.loop_wrapper ops are to preserve the high-level optimizations available when using HLFIR, after we are done with the LowerWorkshare pass, both omp.workdistribute and omp.workdistribute.loop_wrapper disappear. The sole purpose of the omp.workdistribute.loop_wrapper op is to be able to more explicitly mark loops that need to "parallelized" by the workshare construct and preserve that information through the pipeline. Its lifetime is from the frontend (Fortran->{HLFIR,FIR}) up to the the LowerWorkshare pass which runs after we are done with HLFIR optimizations (after HLFIR->FIR lowering), same for omp.workshare. https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
ivanradanov wrote: No you are right, sorry for the back and forth, as you said, since a wsloop can only be nested in a omp.parallel it is immediately obvious that it binds to the omp.parallel threads so that makes sense. My only concern was that at some point some transformation (perhaps in the future, because I don't think anything transforms `wsloop`s currently) could make the assumption that all (or none) of the threads of the team an `omp.parallel` launches will execute the parent block of a `wsloop` that binds to that team. I thought this was a fair assumption for an optimization/transformation to make because if for example only one of the threads executes a wsloop it would not produce the intended result. (for example it adds an operation immediately before the wsloop which is supposed to be executed by all threads in the omp.parallel. that operation would then be erroneously wrapped in an omp.single in LowerWorkshare.) So the intention was to guard against a potential error like that. Let me know if I am wrong here since I am sure people here have more experience than me on this. I can see that if no transformation can make that assumption, then it is perfectly safe to use `omp.wsloop` instead of `workdistribute.loop_wrapper`. I am fine with both ways and can make that change if you think it is better. (In fact that is what the initial version of this PR did. I decided to introduce the workshare.loop_wrapper later because I was concerned about a potential issue like above) https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
ivanradanov wrote: > ... However, they would work if they ran after the pass lowering > `omp.workshare` to a set of `omp.single` for the code in between > `omp.wsloop`s. That way we would not have to introduce a new loop wrapper and > also we could create passes assuming the parent of region of an `omp.wsloop` > is executed by all threads in the team. I don't think that should be an > issue, since in principle it makes sense to me that the `omp.workshare` > transformation would run immediately after PFT to MLIR lowering. What do you > think about that alternative? Ideally, the `omp.workshare` lowering will run after the HLIF to FIR lowering, because missing the high level optimizations that HLFIR provides can result in very bad performance (unneeded temporary arrays, unnecessary copies, non-fused array computation, etc). The workshare lowering transforms the `omp.workshare.loop_wrapper`s into `omp.wsloop`s so they are gone after that. Another factor is that there may not be PFT->loop lowerings for many constructs that need to be divided into units of work. so we may need to first generate HLFIR and alter the lowerings from HLFIR to FIR to get the `omp.wsloop` (or `omp.workshare.loop_wrapper`), which means that there will be portions of the pipeline (from PFT->HLFIR until HLFIR->FIR) where a `omp.wsloop` nested in an `omp.workshare` will be the wrong representation. Are there any concerns with adding `omp.workshare.loop_wrapper`? I do not see that big of an overhead (maintenance or compile time) resulting from its addition, while it makes things clearer and more robust in my opinion. https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
ivanradanov wrote: > Can you share a case where this would happen? I agree that we wouldn't want > to produce some IR that doesn't keep consistent semantics for a given > operation across the pipeline. In that case, adding another operation might > indeed be the right solution. I was under the impression that direct PFT to FIR lowering is deprecated, so things like array notation (e.g. z = x + y where x,y,z are arrays) always go through hlfir.elemental and then to fir loops. Not sure if the PFT->FIR lowering for that exists, but if PFT->FIR is deprecated then we should probably use the HLFIR lowering for this. > My main concern is from the dialect design perspective. It would be confusing > to have two separate "worksharing loop" operations with one being used on its > own and the other one in conjunction with the `omp.workshare` operation, but > both basically representing the same thing (splitting loop iterations across > threads in the team). That's why I'm trying to explore other options that may > result in a better representation before committing to it. I think the operations describe very different things. The similarity in naming is an unfortunate consequence of the `workshare` construct having the same name as a `workshare loop` (I am open to more descriptive name suggestions). How I read it is: `omp.wsloop` is "each thread from from the team that encounter it, executes its share of the loop nest" whereas `omp.workdistribute.loop_wrapper` is "this loop nest is marked for dividing into units of work by the encompassing `omp.workshare`" (as per the standard). Semantically, it is just a loop nest that is executed by a single thread and only when the workshare lowering transforms it into an `omp.wsloop` does it turn into a worksharing loop. https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
ivanradanov wrote: @kiranchandramohan @tblah @skatrak I have a question to people more familiar with Fortran and the entire Flang pipeline - is it possible that we would have CFG (multiple blocks) in the IR generated in the workshare statement at this point in the pipeline (immediately after lowering HLFIR to FIR). The transformation I implemented can work with CFG but the transformation _inlines_ the region contained in the `omp.workshare` region in its parent op (while transforming it), which means that the parent op would need to support multiple blocks, which is not a given (e.g. `fir.if`). Is there an operation like `scf.execute` which can be used here to inline the contents of the `omp.workshare` or should we not support CFG in this transformation. https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
ivanradanov wrote: Ah yes, I meant `scf.execute_region`. But when I tried creating that and it was not registered so I thought it was a deliberate decision to not pull in the scf dialect so I opted not to go for that lowering. I was wondering if there is some op that is like `scf.execute_region` but already used in flang. I will go for emitting an error in case there are multiple blocks for this first iteration then. https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748 >From c5b5369be3d0db31d9ded0eeeb8e28e03d25bd9e Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 4 Oct 2024 22:45:09 +0900 Subject: [PATCH 1/6] Fix bug and add better clarification comments --- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 28 --- .../lower-workshare-correct-parallelize.mlir | 16 +++ 2 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 flang/test/Transforms/OpenMP/lower-workshare-correct-parallelize.mlir diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp index 4d8e2a9a067141..84cf5e82167987 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -188,14 +189,19 @@ static bool isTransitivelyUsedOutside(Value v, SingleRegion sr) { if (isUserOutsideSR(user, parentOp, sr)) return true; -// Results of nested users cannot be used outside of the SR +// Now we know user is inside `sr`. + +// Results of nested users cannot be used outside of `sr`. if (user->getBlock() != srBlock) continue; -// A non-safe to parallelize operation will be handled separately +// A non-safe to parallelize operation will be checked for uses outside +// separately. if (!isSafeToParallelize(user)) continue; +// For safe to parallelize operations, we need to check if there is a +// transitive use of `v` through them. for (auto res : user->getResults()) if (isTransitivelyUsedOutside(res, sr)) return true; @@ -242,7 +248,21 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion, for (Operation &op : llvm::make_range(sr.begin, sr.end)) { if (isSafeToParallelize(&op)) { singleBuilder.clone(op, singleMapping); -parallelBuilder.clone(op, rootMapping); +if (llvm::all_of(op.getOperands(), [&](Value opr) { + return rootMapping.contains(opr); +})) { + // Safe to parallelize operations which have all operands available in + // the root parallel block can be executed there. + parallelBuilder.clone(op, rootMapping); +} else { + // If any operand was not available, it means that there was no + // transitive use of a non-safe-to-parallelize operation outside `sr`. + // This means that there should be no transitive uses outside `sr` of + // `op`. + assert(llvm::all_of(op.getResults(), [&](Value v) { +return !isTransitivelyUsedOutside(v, sr); + })); +} } else if (auto alloca = dyn_cast(&op)) { auto hoisted = cast(allocaBuilder.clone(*alloca, singleMapping)); @@ -252,7 +272,7 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion, } else { singleBuilder.clone(op, singleMapping); // Prepare reloaded values for results of operations that cannot be -// safely parallelized and which are used after the region `sr` +// safely parallelized and which are used after the region `sr`. for (auto res : op.getResults()) { if (isTransitivelyUsedOutside(res, sr)) { auto alloc = mapReloadedValue(res, allocaBuilder, singleBuilder, diff --git a/flang/test/Transforms/OpenMP/lower-workshare-correct-parallelize.mlir b/flang/test/Transforms/OpenMP/lower-workshare-correct-parallelize.mlir new file mode 100644 index 00..99ca4fe5a0e212 --- /dev/null +++ b/flang/test/Transforms/OpenMP/lower-workshare-correct-parallelize.mlir @@ -0,0 +1,16 @@ +// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s | FileCheck %s + +// Check that the safe to parallelize `fir.declare` op will not be parallelized +// due to its operand %alloc not being reloaded outside the omp.single. + +func.func @foo() { + %c0 = arith.constant 0 : index + omp.workshare { +%alloc = fir.allocmem !fir.array, %c0 {bindc_name = ".tmp.forall", uniq_name = ""} +%shape = fir.shape %c0 : (index) -> !fir.shape<1> +%declare = fir.declare %alloc(%shape) {uniq_name = ".tmp.forall"} : (!fir.heap>, !fir.shape<1>) -> !fir.heap> +fir.freemem %alloc : !fir.heap> +omp.terminator + } + return +} >From 33d6674ca8dfc1adf3b02f45317a7f068a7f7cb3 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 17:33:52 +0900 Subject: [PATCH 2/6] Add workshare loop wrapper lowerings Bufferize test Bufferize test Bufferize test Add test for should use workshare lowering --- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 4 +- .../Transforms/OptimizedBufferization.cpp | 10 +- flang/test/HLFIR/bufferize-workshare.fir | 58 .../OpenMP/should-use-workshare-lowering.mlir | 140 +++
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446 >From e56dbd6a0625890fd9a3d6a62675e864ca94a8f5 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 22:06:55 +0900 Subject: [PATCH 01/10] [flang] Lower omp.workshare to other omp constructs Change to workshare loop wrapper op Move single op declaration Schedule pass properly Correctly handle nested nested loop nests to be parallelized by workshare Leave comments for shouldUseWorkshareLowering Use copyprivate to scatter val from omp.single TODO still need to implement copy function TODO transitive check for usage outside of omp.single not imiplemented yet Transitively check for users outisde of single op TODO need to implement copy func TODO need to hoist allocas outside of single regions Add tests Hoist allocas More tests Emit body for copy func Test the tmp storing logic Clean up trivially dead ops Only handle single-block regions for now Fix tests for custom assembly for loop wrapper Only run the lower workshare pass if openmp is enabled Implement some missing functionality Fix tests Fix test Iterate backwards to find all trivially dead ops Add expalanation comment for createCopyFun Update test --- flang/include/flang/Optimizer/OpenMP/Passes.h | 5 + .../include/flang/Optimizer/OpenMP/Passes.td | 5 + flang/include/flang/Tools/CLOptions.inc | 6 +- flang/include/flang/Tools/CrossToolHelpers.h | 1 + flang/lib/Frontend/FrontendActions.cpp| 10 +- flang/lib/Optimizer/OpenMP/CMakeLists.txt | 1 + flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 446 ++ flang/test/Fir/basic-program.fir | 1 + .../Transforms/OpenMP/lower-workshare.mlir| 189 .../Transforms/OpenMP/lower-workshare2.mlir | 23 + .../Transforms/OpenMP/lower-workshare3.mlir | 74 +++ .../Transforms/OpenMP/lower-workshare4.mlir | 59 +++ .../Transforms/OpenMP/lower-workshare5.mlir | 42 ++ .../Transforms/OpenMP/lower-workshare6.mlir | 51 ++ flang/tools/bbc/bbc.cpp | 5 +- flang/tools/tco/tco.cpp | 1 + 16 files changed, 915 insertions(+), 4 deletions(-) create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare2.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare3.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare4.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare6.mlir diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h b/flang/include/flang/Optimizer/OpenMP/Passes.h index 403d79667bf448..feb395f1a12dbd 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.h +++ b/flang/include/flang/Optimizer/OpenMP/Passes.h @@ -25,6 +25,11 @@ namespace flangomp { #define GEN_PASS_REGISTRATION #include "flang/Optimizer/OpenMP/Passes.h.inc" +/// Impelements the logic specified in the 2.8.3 workshare Construct section of +/// the OpenMP standard which specifies what statements or constructs shall be +/// divided into units of work. +bool shouldUseWorkshareLowering(mlir::Operation *op); + } // namespace flangomp #endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td index 395178e26a5762..041240cad12eb3 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.td +++ b/flang/include/flang/Optimizer/OpenMP/Passes.td @@ -37,4 +37,9 @@ def FunctionFiltering : Pass<"omp-function-filtering"> { ]; } +// Needs to be scheduled on Module as we create functions in it +def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> { + let summary = "Lower workshare construct"; +} + #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 1881e23b00045a..bb00e079008a0b 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -337,7 +337,7 @@ inline void createDefaultFIROptimizerPassPipeline( /// \param optLevel - optimization level used for creating FIR optimization /// passes pipeline inline void createHLFIRToFIRPassPipeline( -mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) { +mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel = defaultOptLevel) { if (optLevel.isOptimizingForSpeed()) { addCanonicalizerPassWithoutRegionSimplification(pm); addNestedPassToAllTopLevelOperations( @@ -354,6 +354,8 @@ inline void createHLFIRToFIRPassPipeline( pm.addPass(hlfir::createLowerHLFIRIntrinsics()); pm.addPass(hlfir::createBufferizeHLFIR()); pm.addPass(hlfir::createConvertHLFIRtoFIR()); + if (enableOpenMP) +pm.a
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
ivanradanov wrote: @Thirumalai-Shaktivel Thank you very much. Fixed. `forall` is actually a case which we do not handle yet. You can give it a shot if you would like. https://github.com/llvm/llvm-project/pull/104748 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446 >From e56dbd6a0625890fd9a3d6a62675e864ca94a8f5 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 22:06:55 +0900 Subject: [PATCH 01/11] [flang] Lower omp.workshare to other omp constructs Change to workshare loop wrapper op Move single op declaration Schedule pass properly Correctly handle nested nested loop nests to be parallelized by workshare Leave comments for shouldUseWorkshareLowering Use copyprivate to scatter val from omp.single TODO still need to implement copy function TODO transitive check for usage outside of omp.single not imiplemented yet Transitively check for users outisde of single op TODO need to implement copy func TODO need to hoist allocas outside of single regions Add tests Hoist allocas More tests Emit body for copy func Test the tmp storing logic Clean up trivially dead ops Only handle single-block regions for now Fix tests for custom assembly for loop wrapper Only run the lower workshare pass if openmp is enabled Implement some missing functionality Fix tests Fix test Iterate backwards to find all trivially dead ops Add expalanation comment for createCopyFun Update test --- flang/include/flang/Optimizer/OpenMP/Passes.h | 5 + .../include/flang/Optimizer/OpenMP/Passes.td | 5 + flang/include/flang/Tools/CLOptions.inc | 6 +- flang/include/flang/Tools/CrossToolHelpers.h | 1 + flang/lib/Frontend/FrontendActions.cpp| 10 +- flang/lib/Optimizer/OpenMP/CMakeLists.txt | 1 + flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 446 ++ flang/test/Fir/basic-program.fir | 1 + .../Transforms/OpenMP/lower-workshare.mlir| 189 .../Transforms/OpenMP/lower-workshare2.mlir | 23 + .../Transforms/OpenMP/lower-workshare3.mlir | 74 +++ .../Transforms/OpenMP/lower-workshare4.mlir | 59 +++ .../Transforms/OpenMP/lower-workshare5.mlir | 42 ++ .../Transforms/OpenMP/lower-workshare6.mlir | 51 ++ flang/tools/bbc/bbc.cpp | 5 +- flang/tools/tco/tco.cpp | 1 + 16 files changed, 915 insertions(+), 4 deletions(-) create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare2.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare3.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare4.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare6.mlir diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h b/flang/include/flang/Optimizer/OpenMP/Passes.h index 403d79667bf448..feb395f1a12dbd 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.h +++ b/flang/include/flang/Optimizer/OpenMP/Passes.h @@ -25,6 +25,11 @@ namespace flangomp { #define GEN_PASS_REGISTRATION #include "flang/Optimizer/OpenMP/Passes.h.inc" +/// Impelements the logic specified in the 2.8.3 workshare Construct section of +/// the OpenMP standard which specifies what statements or constructs shall be +/// divided into units of work. +bool shouldUseWorkshareLowering(mlir::Operation *op); + } // namespace flangomp #endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td index 395178e26a5762..041240cad12eb3 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.td +++ b/flang/include/flang/Optimizer/OpenMP/Passes.td @@ -37,4 +37,9 @@ def FunctionFiltering : Pass<"omp-function-filtering"> { ]; } +// Needs to be scheduled on Module as we create functions in it +def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> { + let summary = "Lower workshare construct"; +} + #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 1881e23b00045a..bb00e079008a0b 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -337,7 +337,7 @@ inline void createDefaultFIROptimizerPassPipeline( /// \param optLevel - optimization level used for creating FIR optimization /// passes pipeline inline void createHLFIRToFIRPassPipeline( -mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) { +mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel = defaultOptLevel) { if (optLevel.isOptimizingForSpeed()) { addCanonicalizerPassWithoutRegionSimplification(pm); addNestedPassToAllTopLevelOperations( @@ -354,6 +354,8 @@ inline void createHLFIRToFIRPassPipeline( pm.addPass(hlfir::createLowerHLFIRIntrinsics()); pm.addPass(hlfir::createBufferizeHLFIR()); pm.addPass(hlfir::createConvertHLFIRtoFIR()); + if (enableOpenMP) +pm.a
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748 >From 8d0651ff644fa6821e0d0fbc4c47fee36802a15c Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 4 Oct 2024 22:48:42 +0900 Subject: [PATCH 1/6] Fix message --- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp index 84cf5e82167987..a91f64f04a30aa 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -466,8 +466,9 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, DominanceInfo &di) { } else { // Otherwise just change the operation to an omp.single. -wsOp->emitWarning("omp workshare with unstructured control flow currently " - "unsupported and will be serialized."); +wsOp->emitWarning( +"omp workshare with unstructured control flow is currently " +"unsupported and will be serialized."); // `shouldUseWorkshareLowering` should have guaranteed that there are no // omp.workshare_loop_wrapper's that bind to this omp.workshare. >From 881067963fea3ce7fa912692e0cca46a68288e85 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 17:33:52 +0900 Subject: [PATCH 2/6] Add workshare loop wrapper lowerings Bufferize test Bufferize test Bufferize test Add test for should use workshare lowering --- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 4 +- .../Transforms/OptimizedBufferization.cpp | 10 +- flang/test/HLFIR/bufferize-workshare.fir | 58 .../OpenMP/should-use-workshare-lowering.mlir | 140 ++ 4 files changed, 208 insertions(+), 4 deletions(-) create mode 100644 flang/test/HLFIR/bufferize-workshare.fir create mode 100644 flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index 07794828fce267..1848dbe2c7a2c2 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -26,6 +26,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" @@ -792,7 +793,8 @@ struct ElementalOpConversion // Generate a loop nest looping around the fir.elemental shape and clone // fir.elemental region inside the inner loop. hlfir::LoopNest loopNest = -hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); +hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); auto insPt = builder.saveInsertionPoint(); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index 3a0a98dc594463..f014724861e333 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -20,6 +20,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/Transforms/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Dominance.h" @@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite( // Generate a loop nest looping around the hlfir.elemental shape and clone // hlfir.elemental region inside the inner loop hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); + hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, loopNest.oneBasedIndices); @@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite( llvm::SmallVector extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto arrayElement =
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
ivanradanov wrote: @Thirumalai-Shaktivel Fixed, it was a very stupid mistake with the argument order of the copyprivate copy function. Thank you. https://github.com/llvm/llvm-project/pull/104748 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748 >From 07a9eb3581f480c47ce4de3de00c7cef15df3cdc Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 4 Oct 2024 14:21:14 +0900 Subject: [PATCH 1/7] Fix dst src in copy function --- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp index cf1867311cc236..baf8346e7608a9 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -162,8 +162,8 @@ static mlir::func::FuncOp createCopyFunc(mlir::Location loc, mlir::Type varType, {loc, loc}); builder.setInsertionPointToStart(&funcOp.getRegion().back()); - Value loaded = builder.create(loc, funcOp.getArgument(0)); - builder.create(loc, loaded, funcOp.getArgument(1)); + Value loaded = builder.create(loc, funcOp.getArgument(1)); + builder.create(loc, loaded, funcOp.getArgument(0)); builder.create(loc); return funcOp; >From c3ff901b31806c73228e4f47a47f420c2d2465ed Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 4 Oct 2024 14:38:48 +0900 Subject: [PATCH 2/7] Use omp.single to handle CFG cases --- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 77 +-- 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp index baf8346e7608a9..34399abbcd20ea 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -16,7 +16,6 @@ // //===--===// -#include "flang/Optimizer/Builder/Todo.h" #include #include #include @@ -39,7 +38,6 @@ #include #include #include -#include #include @@ -96,6 +94,12 @@ bool shouldUseWorkshareLowering(Operation *op) { if (isNestedIn(parentWorkshare, op)) return false; + if (parentWorkshare.getRegion().getBlocks().size() != 1) { +parentWorkshare->emitWarning( +"omp workshare with unstructured control flow currently unsupported."); +return false; + } + return true; } @@ -408,15 +412,6 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, DominanceInfo &di) { OpBuilder rootBuilder(wsOp); - // This operation is just a placeholder which will be erased later. We need it - // because our `parallelizeRegion` function works on regions and not blocks. - omp::WorkshareOp newOp = - rootBuilder.create(loc, omp::WorkshareOperands()); - if (!wsOp.getNowait()) -rootBuilder.create(loc); - - parallelizeRegion(wsOp.getRegion(), newOp.getRegion(), rootMapping, loc, di); - // FIXME Currently, we only support workshare constructs with structured // control flow. The transformation itself supports CFG, however, once we // transform the MLIR region in the omp.workshare, we need to inline that @@ -427,19 +422,53 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, DominanceInfo &di) { // time when fir ops get lowered to CFG. However, SCF is not registered in // flang so we cannot use it. Remove this requirement once we have // scf.execute_region or an alternative operation available. - if (wsOp.getRegion().getBlocks().size() != 1) -TODO(wsOp->getLoc(), "omp workshare with unstructured control flow"); - - // Inline the contents of the placeholder workshare op into its parent block. - Block *theBlock = &newOp.getRegion().front(); - Operation *term = theBlock->getTerminator(); - Block *parentBlock = wsOp->getBlock(); - parentBlock->getOperations().splice(newOp->getIterator(), - theBlock->getOperations()); - assert(term->getNumOperands() == 0); - term->erase(); - newOp->erase(); - wsOp->erase(); + if (wsOp.getRegion().getBlocks().size() == 1) { +// This operation is just a placeholder which will be erased later. We need +// it because our `parallelizeRegion` function works on regions and not +// blocks. +omp::WorkshareOp newOp = +rootBuilder.create(loc, omp::WorkshareOperands()); +if (!wsOp.getNowait()) + rootBuilder.create(loc); + +parallelizeRegion(wsOp.getRegion(), newOp.getRegion(), rootMapping, loc, + di); + +// Inline the contents of the placeholder workshare op into its parent +// block. +Block *theBlock = &newOp.getRegion().front(); +Operation *term = theBlock->getTerminator(); +Block *parentBlock = wsOp->getBlock(); +parentBlock->getOperations().splice(newOp->getIterator(), +theBlock->getOperations()); +assert(term->getNumOperands() == 0); +term->erase(); +newOp->erase(); +wsOp->erase(); + } else { +// Otherwise just change the operation to an omp.single. + +//
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446 >From e56dbd6a0625890fd9a3d6a62675e864ca94a8f5 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 22:06:55 +0900 Subject: [PATCH 1/8] [flang] Lower omp.workshare to other omp constructs Change to workshare loop wrapper op Move single op declaration Schedule pass properly Correctly handle nested nested loop nests to be parallelized by workshare Leave comments for shouldUseWorkshareLowering Use copyprivate to scatter val from omp.single TODO still need to implement copy function TODO transitive check for usage outside of omp.single not imiplemented yet Transitively check for users outisde of single op TODO need to implement copy func TODO need to hoist allocas outside of single regions Add tests Hoist allocas More tests Emit body for copy func Test the tmp storing logic Clean up trivially dead ops Only handle single-block regions for now Fix tests for custom assembly for loop wrapper Only run the lower workshare pass if openmp is enabled Implement some missing functionality Fix tests Fix test Iterate backwards to find all trivially dead ops Add expalanation comment for createCopyFun Update test --- flang/include/flang/Optimizer/OpenMP/Passes.h | 5 + .../include/flang/Optimizer/OpenMP/Passes.td | 5 + flang/include/flang/Tools/CLOptions.inc | 6 +- flang/include/flang/Tools/CrossToolHelpers.h | 1 + flang/lib/Frontend/FrontendActions.cpp| 10 +- flang/lib/Optimizer/OpenMP/CMakeLists.txt | 1 + flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 446 ++ flang/test/Fir/basic-program.fir | 1 + .../Transforms/OpenMP/lower-workshare.mlir| 189 .../Transforms/OpenMP/lower-workshare2.mlir | 23 + .../Transforms/OpenMP/lower-workshare3.mlir | 74 +++ .../Transforms/OpenMP/lower-workshare4.mlir | 59 +++ .../Transforms/OpenMP/lower-workshare5.mlir | 42 ++ .../Transforms/OpenMP/lower-workshare6.mlir | 51 ++ flang/tools/bbc/bbc.cpp | 5 +- flang/tools/tco/tco.cpp | 1 + 16 files changed, 915 insertions(+), 4 deletions(-) create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare2.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare3.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare4.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare6.mlir diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h b/flang/include/flang/Optimizer/OpenMP/Passes.h index 403d79667bf448..feb395f1a12dbd 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.h +++ b/flang/include/flang/Optimizer/OpenMP/Passes.h @@ -25,6 +25,11 @@ namespace flangomp { #define GEN_PASS_REGISTRATION #include "flang/Optimizer/OpenMP/Passes.h.inc" +/// Impelements the logic specified in the 2.8.3 workshare Construct section of +/// the OpenMP standard which specifies what statements or constructs shall be +/// divided into units of work. +bool shouldUseWorkshareLowering(mlir::Operation *op); + } // namespace flangomp #endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td index 395178e26a5762..041240cad12eb3 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.td +++ b/flang/include/flang/Optimizer/OpenMP/Passes.td @@ -37,4 +37,9 @@ def FunctionFiltering : Pass<"omp-function-filtering"> { ]; } +// Needs to be scheduled on Module as we create functions in it +def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> { + let summary = "Lower workshare construct"; +} + #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 1881e23b00045a..bb00e079008a0b 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -337,7 +337,7 @@ inline void createDefaultFIROptimizerPassPipeline( /// \param optLevel - optimization level used for creating FIR optimization /// passes pipeline inline void createHLFIRToFIRPassPipeline( -mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) { +mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel = defaultOptLevel) { if (optLevel.isOptimizingForSpeed()) { addCanonicalizerPassWithoutRegionSimplification(pm); addNestedPassToAllTopLevelOperations( @@ -354,6 +354,8 @@ inline void createHLFIRToFIRPassPipeline( pm.addPass(hlfir::createLowerHLFIRIntrinsics()); pm.addPass(hlfir::createBufferizeHLFIR()); pm.addPass(hlfir::createConvertHLFIRtoFIR()); + if (enableOpenMP) +pm.add
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446 >From e56dbd6a0625890fd9a3d6a62675e864ca94a8f5 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 22:06:55 +0900 Subject: [PATCH 1/9] [flang] Lower omp.workshare to other omp constructs Change to workshare loop wrapper op Move single op declaration Schedule pass properly Correctly handle nested nested loop nests to be parallelized by workshare Leave comments for shouldUseWorkshareLowering Use copyprivate to scatter val from omp.single TODO still need to implement copy function TODO transitive check for usage outside of omp.single not imiplemented yet Transitively check for users outisde of single op TODO need to implement copy func TODO need to hoist allocas outside of single regions Add tests Hoist allocas More tests Emit body for copy func Test the tmp storing logic Clean up trivially dead ops Only handle single-block regions for now Fix tests for custom assembly for loop wrapper Only run the lower workshare pass if openmp is enabled Implement some missing functionality Fix tests Fix test Iterate backwards to find all trivially dead ops Add expalanation comment for createCopyFun Update test --- flang/include/flang/Optimizer/OpenMP/Passes.h | 5 + .../include/flang/Optimizer/OpenMP/Passes.td | 5 + flang/include/flang/Tools/CLOptions.inc | 6 +- flang/include/flang/Tools/CrossToolHelpers.h | 1 + flang/lib/Frontend/FrontendActions.cpp| 10 +- flang/lib/Optimizer/OpenMP/CMakeLists.txt | 1 + flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 446 ++ flang/test/Fir/basic-program.fir | 1 + .../Transforms/OpenMP/lower-workshare.mlir| 189 .../Transforms/OpenMP/lower-workshare2.mlir | 23 + .../Transforms/OpenMP/lower-workshare3.mlir | 74 +++ .../Transforms/OpenMP/lower-workshare4.mlir | 59 +++ .../Transforms/OpenMP/lower-workshare5.mlir | 42 ++ .../Transforms/OpenMP/lower-workshare6.mlir | 51 ++ flang/tools/bbc/bbc.cpp | 5 +- flang/tools/tco/tco.cpp | 1 + 16 files changed, 915 insertions(+), 4 deletions(-) create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare2.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare3.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare4.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare6.mlir diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h b/flang/include/flang/Optimizer/OpenMP/Passes.h index 403d79667bf448..feb395f1a12dbd 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.h +++ b/flang/include/flang/Optimizer/OpenMP/Passes.h @@ -25,6 +25,11 @@ namespace flangomp { #define GEN_PASS_REGISTRATION #include "flang/Optimizer/OpenMP/Passes.h.inc" +/// Impelements the logic specified in the 2.8.3 workshare Construct section of +/// the OpenMP standard which specifies what statements or constructs shall be +/// divided into units of work. +bool shouldUseWorkshareLowering(mlir::Operation *op); + } // namespace flangomp #endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td index 395178e26a5762..041240cad12eb3 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.td +++ b/flang/include/flang/Optimizer/OpenMP/Passes.td @@ -37,4 +37,9 @@ def FunctionFiltering : Pass<"omp-function-filtering"> { ]; } +// Needs to be scheduled on Module as we create functions in it +def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> { + let summary = "Lower workshare construct"; +} + #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 1881e23b00045a..bb00e079008a0b 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -337,7 +337,7 @@ inline void createDefaultFIROptimizerPassPipeline( /// \param optLevel - optimization level used for creating FIR optimization /// passes pipeline inline void createHLFIRToFIRPassPipeline( -mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) { +mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel = defaultOptLevel) { if (optLevel.isOptimizingForSpeed()) { addCanonicalizerPassWithoutRegionSimplification(pm); addNestedPassToAllTopLevelOperations( @@ -354,6 +354,8 @@ inline void createHLFIRToFIRPassPipeline( pm.addPass(hlfir::createLowerHLFIRIntrinsics()); pm.addPass(hlfir::createBufferizeHLFIR()); pm.addPass(hlfir::createConvertHLFIRtoFIR()); + if (enableOpenMP) +pm.add
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
ivanradanov wrote: > My concern with the TODO message is that some code that previously compiled > using the lowering of WORKSHARE as SINGLE will now hit this TODO. This is > okay with me so long as it is fixed soon (before LLVM 20). Otherwise, could > these cases continued to be lowered as SINGLE for now. I have updated it to lower to omp.single and emit a warning in CFG cases. https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits