from:"Ivan R. Ivanov via llvm\-branch\-commits"

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov created 
https://github.com/llvm/llvm-project/pull/101444

2/4

>From 8068d6036fe84f6c0f22f2c877366eef184292e3 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4..f7bc565ea8cbc 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
 
   // Composite constructs

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov created 
https://github.com/llvm/llvm-project/pull/101445

3/4

>From c2cbd7779dc118c6bad507b6babeb7200262caff Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH] [flang] Introduce ws loop nest generation for HLFIR lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++---
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 43 +--
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 ++---
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 61 insertions(+), 39 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea078..14e42c6f358e4 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844..0689d6e033dd9 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c2..72a90dd0d6f29 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178..5406e5076d55c 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,44 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() && "must

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov created 
https://github.com/llvm/llvm-project/pull/101446

4/4 

There are two points which need some discussion in this PR:

1. We need to make a value computed in a omp.single accessible in all threads 
of the omp.parallel region. This is achieved by allocating temporary memory 
outside the omp.parallel and atoring that in the omp.single and then reloading 
it from all threads. However, from reading the standard I dont think we are 
guaranteed that the workshare is nested in the omp.parallel so there could be a 
omp.parallel { func.call @contains_workshare }, then we would not be able to 
access the omp.parallel. So I think adding support in the runtime to be able to 
yield a value from a omp.single could be the fix to this.

2. For the temporary allocations above not all types are supported by 
fir.alloca, so I need to use llvm.alloca and unrealized_cast to be able to 
allocate a temporary for a fir.ref type. This too can be fixed by introducing 
yielding from omp.single

>From 003568d028b9d7f0323f31f8717527fc52c93c6f Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:44:31 +0900
Subject: [PATCH] [flang] Lower omp.workshare to other omp constructs

---
 flang/include/flang/Optimizer/CMakeLists.txt  |   1 +
 .../flang/Optimizer/OpenMP/CMakeLists.txt |   4 +
 flang/include/flang/Optimizer/OpenMP/Passes.h |  30 ++
 .../include/flang/Optimizer/OpenMP/Passes.td  |  18 ++
 flang/include/flang/Tools/CLOptions.inc   |   2 +
 flang/lib/Frontend/CMakeLists.txt |   1 +
 flang/lib/Optimizer/Builder/HLFIRTools.cpp|  21 +-
 flang/lib/Optimizer/CMakeLists.txt|   1 +
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |   6 +-
 flang/lib/Optimizer/OpenMP/CMakeLists.txt |  26 ++
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 260 ++
 flang/test/HLFIR/bufferize-workshare.fir  |  58 
 .../Transforms/OpenMP/lower-workshare.mlir|  81 ++
 flang/tools/bbc/CMakeLists.txt|   1 +
 flang/tools/fir-opt/CMakeLists.txt|   1 +
 flang/tools/fir-opt/fir-opt.cpp   |   2 +
 flang/tools/tco/CMakeLists.txt|   1 +
 17 files changed, 505 insertions(+), 9 deletions(-)
 create mode 100644 flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
 create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.h
 create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.td
 create mode 100644 flang/lib/Optimizer/OpenMP/CMakeLists.txt
 create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
 create mode 100644 flang/test/HLFIR/bufferize-workshare.fir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir

diff --git a/flang/include/flang/Optimizer/CMakeLists.txt 
b/flang/include/flang/Optimizer/CMakeLists.txt
index 89e43a9ee8d62..3336ac935e101 100644
--- a/flang/include/flang/Optimizer/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/CMakeLists.txt
@@ -2,3 +2,4 @@ add_subdirectory(CodeGen)
 add_subdirectory(Dialect)
 add_subdirectory(HLFIR)
 add_subdirectory(Transforms)
+add_subdirectory(OpenMP)
diff --git a/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt 
b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
new file mode 100644
index 0..d59573f0f7fd9
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls -name FlangOpenMP)
+
+add_public_tablegen_target(FlangOpenMPPassesIncGen)
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h 
b/flang/include/flang/Optimizer/OpenMP/Passes.h
new file mode 100644
index 0..95a05b3005073
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.h
@@ -0,0 +1,30 @@
+//===- Passes.h - OpenMP pass entry points --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This header declares OpenMP pass entry points.
+//
+//===--===//
+
+#ifndef FORTRAN_OPTIMIZER_OPENMP_PASSES_H
+#define FORTRAN_OPTIMIZER_OPENMP_PASSES_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DECL
+#define GEN_PASS_REGISTRATION
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+
+bool shouldUseWorkshareLowering(mlir::Operation *op);
+
+} // namespace flangomp
+
+#endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td 
b/flang/include/flang/Optimizer/OpenMP/Passes.td
new file mode 100644
index 0..6f636ec1df616
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/Passe

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101444
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101444
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


ivanradanov wrote:

Should we have a `-use-experimental-workshare` or similar flag to facilitate 
some temporary in-tree development as this may require more moving pieces.

https://github.com/llvm/llvm-project/pull/101444
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


ivanradanov wrote:

I am debating introducing a new operation workshare_loop_container which exists 
only to "contain" a omp.loop_nest between lowering an elemental to lowering the 
omp.workshare it is contained in.

so we would have this state:

```
omp.workshare {
  omp.workshare_loop_container {
omp.loop_nest {}
  }
}
```

```
omp.workshare {
  omp.wsloop {
omp.loop_nest {}
  }
}
```

Which may have come from a different lowering/codegen and we are not sure what 
the semantics of that code would be.

This new operation can later be reused for the `workdistribute` lowering as 
well.

https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

>From 9a51b404ab47c5dd0b27c3f957ee6cefd1470c25 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:44:31 +0900
Subject: [PATCH] [flang] Lower omp.workshare to other omp constructs

---
 flang/include/flang/Optimizer/CMakeLists.txt  |   1 +
 .../flang/Optimizer/OpenMP/CMakeLists.txt |   4 +
 flang/include/flang/Optimizer/OpenMP/Passes.h |  30 ++
 .../include/flang/Optimizer/OpenMP/Passes.td  |  18 ++
 flang/include/flang/Tools/CLOptions.inc   |   2 +
 flang/lib/Frontend/CMakeLists.txt |   1 +
 flang/lib/Optimizer/CMakeLists.txt|   1 +
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |   6 +-
 flang/lib/Optimizer/OpenMP/CMakeLists.txt |  26 ++
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 260 ++
 flang/test/HLFIR/bufferize-workshare.fir  |  58 
 .../Transforms/OpenMP/lower-workshare.mlir|  81 ++
 flang/tools/bbc/CMakeLists.txt|   1 +
 flang/tools/fir-opt/CMakeLists.txt|   1 +
 flang/tools/fir-opt/fir-opt.cpp   |   2 +
 flang/tools/tco/CMakeLists.txt|   1 +
 16 files changed, 491 insertions(+), 2 deletions(-)
 create mode 100644 flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
 create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.h
 create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.td
 create mode 100644 flang/lib/Optimizer/OpenMP/CMakeLists.txt
 create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
 create mode 100644 flang/test/HLFIR/bufferize-workshare.fir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir

diff --git a/flang/include/flang/Optimizer/CMakeLists.txt 
b/flang/include/flang/Optimizer/CMakeLists.txt
index 89e43a9ee8d62..3336ac935e101 100644
--- a/flang/include/flang/Optimizer/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/CMakeLists.txt
@@ -2,3 +2,4 @@ add_subdirectory(CodeGen)
 add_subdirectory(Dialect)
 add_subdirectory(HLFIR)
 add_subdirectory(Transforms)
+add_subdirectory(OpenMP)
diff --git a/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt 
b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
new file mode 100644
index 0..d59573f0f7fd9
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls -name FlangOpenMP)
+
+add_public_tablegen_target(FlangOpenMPPassesIncGen)
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h 
b/flang/include/flang/Optimizer/OpenMP/Passes.h
new file mode 100644
index 0..95a05b3005073
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.h
@@ -0,0 +1,30 @@
+//===- Passes.h - OpenMP pass entry points --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This header declares OpenMP pass entry points.
+//
+//===--===//
+
+#ifndef FORTRAN_OPTIMIZER_OPENMP_PASSES_H
+#define FORTRAN_OPTIMIZER_OPENMP_PASSES_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DECL
+#define GEN_PASS_REGISTRATION
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+
+bool shouldUseWorkshareLowering(mlir::Operation *op);
+
+} // namespace flangomp
+
+#endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td 
b/flang/include/flang/Optimizer/OpenMP/Passes.td
new file mode 100644
index 0..6f636ec1df616
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -0,0 +1,18 @@
+//===-- Passes.td - HLFIR pass definition file -*- tablegen 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_DIALECT_OPENMP_PASSES
+#define FORTRAN_DIALECT_OPENMP_PASSES
+
+include "mlir/Pass/PassBase.td"
+
+def LowerWorkshare : Pass<"lower-workshare"> {
+  let summary = "Lower workshare construct";
+}
+
+#endif //FORTRAN_DIALECT_OPENMP_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc 
b/flang/include/flang/Tools/CLOptions.inc
index 7df5044949463..594369fc2ffe5 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -17,6 +17,7 @@
 #include "mlir/Transforms/Passes.h"
 #include "flang/Optimizer/CodeGen/CodeGen.h"

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 4da93bb2a99ac1d59d4924c518503c94ec81c659 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH] [flang] Introduce ws loop nest generation for HLFIR lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea078..14e42c6f358e4 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844..0689d6e033dd9 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c2..72a90dd0d6f29 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178..cd07cb741eb4b 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() && "must have at

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-07-31 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

>From 26d0051179dec85eb6aee2b48db54964bf042a87 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:44:31 +0900
Subject: [PATCH] [flang] Lower omp.workshare to other omp constructs

---
 flang/include/flang/Optimizer/CMakeLists.txt  |   1 +
 .../flang/Optimizer/OpenMP/CMakeLists.txt |   4 +
 flang/include/flang/Optimizer/OpenMP/Passes.h |  30 ++
 .../include/flang/Optimizer/OpenMP/Passes.td  |  18 ++
 flang/include/flang/Tools/CLOptions.inc   |   2 +
 flang/lib/Frontend/CMakeLists.txt |   1 +
 flang/lib/Optimizer/CMakeLists.txt|   1 +
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |   6 +-
 flang/lib/Optimizer/OpenMP/CMakeLists.txt |  26 ++
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 259 ++
 flang/test/HLFIR/bufferize-workshare.fir  |  58 
 .../Transforms/OpenMP/lower-workshare.mlir|  81 ++
 flang/tools/bbc/CMakeLists.txt|   1 +
 flang/tools/fir-opt/CMakeLists.txt|   1 +
 flang/tools/fir-opt/fir-opt.cpp   |   2 +
 flang/tools/tco/CMakeLists.txt|   1 +
 16 files changed, 490 insertions(+), 2 deletions(-)
 create mode 100644 flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
 create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.h
 create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.td
 create mode 100644 flang/lib/Optimizer/OpenMP/CMakeLists.txt
 create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
 create mode 100644 flang/test/HLFIR/bufferize-workshare.fir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir

diff --git a/flang/include/flang/Optimizer/CMakeLists.txt 
b/flang/include/flang/Optimizer/CMakeLists.txt
index 89e43a9ee8d62..3336ac935e101 100644
--- a/flang/include/flang/Optimizer/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/CMakeLists.txt
@@ -2,3 +2,4 @@ add_subdirectory(CodeGen)
 add_subdirectory(Dialect)
 add_subdirectory(HLFIR)
 add_subdirectory(Transforms)
+add_subdirectory(OpenMP)
diff --git a/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt 
b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
new file mode 100644
index 0..d59573f0f7fd9
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls -name FlangOpenMP)
+
+add_public_tablegen_target(FlangOpenMPPassesIncGen)
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h 
b/flang/include/flang/Optimizer/OpenMP/Passes.h
new file mode 100644
index 0..95a05b3005073
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.h
@@ -0,0 +1,30 @@
+//===- Passes.h - OpenMP pass entry points --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This header declares OpenMP pass entry points.
+//
+//===--===//
+
+#ifndef FORTRAN_OPTIMIZER_OPENMP_PASSES_H
+#define FORTRAN_OPTIMIZER_OPENMP_PASSES_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DECL
+#define GEN_PASS_REGISTRATION
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+
+bool shouldUseWorkshareLowering(mlir::Operation *op);
+
+} // namespace flangomp
+
+#endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td 
b/flang/include/flang/Optimizer/OpenMP/Passes.td
new file mode 100644
index 0..6f636ec1df616
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -0,0 +1,18 @@
+//===-- Passes.td - HLFIR pass definition file -*- tablegen 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_DIALECT_OPENMP_PASSES
+#define FORTRAN_DIALECT_OPENMP_PASSES
+
+include "mlir/Pass/PassBase.td"
+
+def LowerWorkshare : Pass<"lower-workshare"> {
+  let summary = "Lower workshare construct";
+}
+
+#endif //FORTRAN_DIALECT_OPENMP_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc 
b/flang/include/flang/Tools/CLOptions.inc
index 7df5044949463..594369fc2ffe5 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -17,6 +17,7 @@
 #include "mlir/Transforms/Passes.h"
 #include "flang/Optimizer/CodeGen/CodeGen.h"

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-01 Thread Ivan R. Ivanov via llvm-branch-commits

ivanradanov wrote:

> Thank you for your work so far. This is a great start.
> 
> What is the plan for transforming do loops generated by lowering (e.g. that 
> do not become hlfir.elemental operations and are not generated by hlfir 
> bufferization)?

I am looking at [this](https://www.openmp.org/spec-html/5.0/openmpsu39.html) 
for the standard.

I intend to go through the various constructs that require to be separated into 
units of work and provide an alternative lowering for them so that they will 
get parallelized when we lower the workdistribute operation.

To accurately keep track of constructs that need to be parallelized for 
workdistribute I em debating adding a new loop_nest wrapper for that as 
discussed 
[here](https://github.com/llvm/llvm-project/pull/101445#issuecomment-2261837847)

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-01 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -2,3 +2,4 @@ add_subdirectory(CodeGen)
 add_subdirectory(Dialect)
 add_subdirectory(HLFIR)
 add_subdirectory(Transforms)
+add_subdirectory(OpenMP)

ivanradanov wrote:

Thank you, I missed those passes, I guess I will make a separate PR for 
creating the OpenMP pass directory and moving those there.  I will link that 
here once done.

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-01 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -792,7 +793,8 @@ struct ElementalOpConversion
 // Generate a loop nest looping around the fir.elemental shape and clone
 // fir.elemental region inside the inner loop.
 hlfir::LoopNest loopNest =
-hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+   flangomp::shouldUseWorkshareLowering(elemental));

ivanradanov wrote:

According to the standard:

> For array expressions within each statement, including transformational array 
> intrinsic functions that compute scalar values from arrays:
>Evaluation of each element of the array expression, including any 
> references to ELEMENTAL functions, is a unit of work.
>Evaluation of transformational array intrinsic functions may be freely 
> subdivided into any number of units of work.

I was under the impression that ELEMENTAL functions must be PURE, however now I 
see that they can be marked IMPURE. The standard says to divide these into 
units of work, and 

> It is unspecified how the units of work are assigned to the threads executing 
> a workshare region. 

So just from reading this it looks like we should lower those to wsloops and 
the user is responsible for any race conditions.

Or perhaps this part

> An implementation of the workshare construct must insert any synchronization 
> that is required to maintain standard Fortran semantics.

Can be taken to mean that we need to schedule these properly.

Perhaps we can introduce a non-standard clause to the workdistribute to control 
this?

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-01 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -0,0 +1,259 @@
+//===- LowerWorkshare.cpp - special cases for bufferization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+// Lower omp workshare construct.
+//===--===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
+
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+bool shouldUseWorkshareLowering(Operation *op) {
+  auto workshare = dyn_cast(op->getParentOp());

ivanradanov wrote:

I was misreading the standard that we are only parallelizing statements closely 
nested in a workdistribute. I will fix this to handle nested control regions as 
well.

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-02 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -0,0 +1,259 @@
+//===- LowerWorkshare.cpp - special cases for bufferization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+// Lower omp workshare construct.
+//===--===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
+
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+bool shouldUseWorkshareLowering(Operation *op) {
+  auto workshare = dyn_cast(op->getParentOp());
+  if (!workshare)
+return false;
+  return workshare->getParentOfType();
+}
+} // namespace flangomp
+
+namespace {
+
+struct SingleRegion {
+  Block::iterator begin, end;
+};
+
+static bool isSupportedByFirAlloca(Type ty) {
+  return !isa(ty);
+}
+
+static bool isSafeToParallelize(Operation *op) {
+  if (isa(op))
+return true;
+
+  llvm::SmallVector effects;
+  MemoryEffectOpInterface interface = dyn_cast(op);
+  if (!interface) {
+return false;
+  }
+  interface.getEffects(effects);
+  if (effects.empty())
+return true;
+
+  return false;
+}
+
+/// Lowers workshare to a sequence of single-thread regions and parallel loops
+///
+/// For example:
+///
+/// omp.workshare {
+///   %a = fir.allocmem
+///   omp.wsloop {}
+///   fir.call Assign %b %a
+///   fir.freemem %a
+/// }
+///
+/// becomes
+///
+/// omp.single {
+///   %a = fir.allocmem
+///   fir.store %a %tmp
+/// }
+/// %a_reloaded = fir.load %tmp
+/// omp.wsloop {}
+/// omp.single {
+///   fir.call Assign %b %a_reloaded
+///   fir.freemem %a_reloaded
+/// }
+///
+/// Note that we allocate temporary memory for values in omp.single's which 
need
+/// to be accessed in all threads in the closest omp.parallel
+///
+/// TODO currently we need to be able to access the encompassing omp.parallel 
so
+/// that we can allocate temporaries accessible by all threads outside of it.
+/// In case we do not find it, we fall back to converting the omp.workshare to
+/// omp.single.
+/// To better handle this we should probably enable yielding values out of an
+/// omp.single which will be supported by the omp runtime.
+void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
+  assert(wsOp.getRegion().getBlocks().size() == 1);
+
+  Location loc = wsOp->getLoc();
+
+  omp::ParallelOp parallelOp = wsOp->getParentOfType();
+  if (!parallelOp) {
+wsOp.emitWarning("cannot handle workshare, converting to single");
+Operation *terminator = wsOp.getRegion().front().getTerminator();
+wsOp->getBlock()->getOperations().splice(
+wsOp->getIterator(), wsOp.getRegion().front().getOperations());
+terminator->erase();
+return;
+  }
+
+  OpBuilder allocBuilder(parallelOp);
+  OpBuilder rootBuilder(wsOp);
+  IRMapping rootMapping;
+
+  omp::SingleOp singleOp = nullptr;
+
+  auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder,
+  IRMapping singleMapping) {
+if (auto reloaded = rootMapping.lookupOrNull(v))
+  return;
+Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext());
+Type ty = v.getType();
+Value alloc, reloaded;
+if (isSupportedByFirAlloca(ty)) {
+  alloc = allocBuilder.create(loc, ty);
+  singleBuilder.create(loc, singleMapping.lookup(v), alloc);
+  reloaded = rootBuilder.create(loc, ty, alloc);
+} else {

ivanradanov wrote:

I think building a fir.alloca for a ReferenceType fails and I encountered that 
somewhere. I will check again to see if I can get an example.

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-02 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -0,0 +1,259 @@
+//===- LowerWorkshare.cpp - special cases for bufferization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+// Lower omp workshare construct.
+//===--===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
+
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+bool shouldUseWorkshareLowering(Operation *op) {
+  auto workshare = dyn_cast(op->getParentOp());
+  if (!workshare)
+return false;
+  return workshare->getParentOfType();
+}
+} // namespace flangomp
+
+namespace {
+
+struct SingleRegion {
+  Block::iterator begin, end;
+};
+
+static bool isSupportedByFirAlloca(Type ty) {
+  return !isa(ty);
+}
+
+static bool isSafeToParallelize(Operation *op) {
+  if (isa(op))
+return true;
+
+  llvm::SmallVector effects;
+  MemoryEffectOpInterface interface = dyn_cast(op);
+  if (!interface) {
+return false;
+  }
+  interface.getEffects(effects);
+  if (effects.empty())
+return true;
+
+  return false;
+}
+
+/// Lowers workshare to a sequence of single-thread regions and parallel loops
+///
+/// For example:
+///
+/// omp.workshare {
+///   %a = fir.allocmem
+///   omp.wsloop {}
+///   fir.call Assign %b %a
+///   fir.freemem %a
+/// }
+///
+/// becomes
+///
+/// omp.single {
+///   %a = fir.allocmem
+///   fir.store %a %tmp
+/// }
+/// %a_reloaded = fir.load %tmp
+/// omp.wsloop {}
+/// omp.single {
+///   fir.call Assign %b %a_reloaded
+///   fir.freemem %a_reloaded
+/// }
+///
+/// Note that we allocate temporary memory for values in omp.single's which 
need
+/// to be accessed in all threads in the closest omp.parallel
+///
+/// TODO currently we need to be able to access the encompassing omp.parallel 
so
+/// that we can allocate temporaries accessible by all threads outside of it.
+/// In case we do not find it, we fall back to converting the omp.workshare to
+/// omp.single.
+/// To better handle this we should probably enable yielding values out of an
+/// omp.single which will be supported by the omp runtime.
+void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
+  assert(wsOp.getRegion().getBlocks().size() == 1);
+
+  Location loc = wsOp->getLoc();
+
+  omp::ParallelOp parallelOp = wsOp->getParentOfType();
+  if (!parallelOp) {
+wsOp.emitWarning("cannot handle workshare, converting to single");
+Operation *terminator = wsOp.getRegion().front().getTerminator();
+wsOp->getBlock()->getOperations().splice(
+wsOp->getIterator(), wsOp.getRegion().front().getOperations());
+terminator->erase();
+return;
+  }
+
+  OpBuilder allocBuilder(parallelOp);
+  OpBuilder rootBuilder(wsOp);
+  IRMapping rootMapping;
+
+  omp::SingleOp singleOp = nullptr;
+
+  auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder,
+  IRMapping singleMapping) {
+if (auto reloaded = rootMapping.lookupOrNull(v))
+  return;
+Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext());
+Type ty = v.getType();
+Value alloc, reloaded;
+if (isSupportedByFirAlloca(ty)) {
+  alloc = allocBuilder.create(loc, ty);
+  singleBuilder.create(loc, singleMapping.lookup(v), alloc);

ivanradanov wrote:

I am sorry, this is probably due to my inexperience with flang/fortran, are 
there any types in flang that get automatically freed on scope exit? Because if 
I make a shallow copy of an allocatable array, the operation that frees it will 
be put in a omp.single, thus freeing it only once sincle the free operation 
would not be `IsSafeToParallelize`.

e.g. 

```
%a = fir.allocmem
use(%a)
fir.freemem %a

->

omp.single{ %a = fir.allocmem}
use(%a)
omp.single{ fir.freemem %a }
```



https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-02 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [mlir] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-02 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

>From 62057f90e1e6e9e89df1bb666a3676421e2e52ac Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 2 Aug 2024 16:10:25 +0900
Subject: [PATCH 1/9] Add custom omp loop wrapper

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 5199ff50abb95..76f0c472cfdb1 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -308,6 +308,17 @@ def WorkshareOp : OpenMP_Op<"workshare", clauses = [
   let hasVerifier = 1;
 }
 
+def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [
+DeclareOpInterfaceMethods,
+RecursiveMemoryEffects, SingleBlock
+  ], singleRegion = true> {
+  let summary = "contains loop nests to be parallelized by workshare";
+
+  let builders = [
+OpBuilder<(ins), [{ build($_builder, $_state, {}); }]>
+  ];
+}
+
 
//===--===//
 // Loop Nest
 
//===--===//

>From d882f2b7413a9ad306334cc69691671b498985fc Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 2 Aug 2024 16:08:58 +0900
Subject: [PATCH 2/9] Add recursive memory effects trait to workshare

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 76f0c472cfdb1..7d1c80333855e 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -290,7 +290,9 @@ def SingleOp : OpenMP_Op<"single", traits = [
 // 2.8.3 Workshare Construct
 
//===--===//
 
-def WorkshareOp : OpenMP_Op<"workshare", clauses = [
+def WorkshareOp : OpenMP_Op<"workshare", traits = [
+RecursiveMemoryEffects,
+  ], clauses = [
 OpenMP_NowaitClause,
   ], singleRegion = true> {
   let summary = "workshare directive";

>From 14878e80f5bcf8dac5100951de803ce584a33b25 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 3/9] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4..f7bc565ea8cbc 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. F

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-02 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From 14878e80f5bcf8dac5100951de803ce584a33b25 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4..f7bc565ea8cbc 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
 
   // Composite constructs

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-08-02 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 16f7146a45ee9b31c00d9d54be4859df312dcb1b Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/2] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea078..14e42c6f358e4 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844..0689d6e033dd9 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c2..72a90dd0d6f29 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178..cd07cb741eb4b 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() && "must ha

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-02 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -0,0 +1,259 @@
+//===- LowerWorkshare.cpp - special cases for bufferization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+// Lower omp workshare construct.
+//===--===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
+
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+bool shouldUseWorkshareLowering(Operation *op) {
+  auto workshare = dyn_cast(op->getParentOp());
+  if (!workshare)
+return false;
+  return workshare->getParentOfType();
+}
+} // namespace flangomp
+
+namespace {
+
+struct SingleRegion {
+  Block::iterator begin, end;
+};
+
+static bool isSupportedByFirAlloca(Type ty) {
+  return !isa(ty);
+}
+
+static bool isSafeToParallelize(Operation *op) {
+  if (isa(op))
+return true;
+
+  llvm::SmallVector effects;
+  MemoryEffectOpInterface interface = dyn_cast(op);
+  if (!interface) {
+return false;
+  }
+  interface.getEffects(effects);
+  if (effects.empty())
+return true;
+
+  return false;
+}
+
+/// Lowers workshare to a sequence of single-thread regions and parallel loops
+///
+/// For example:
+///
+/// omp.workshare {
+///   %a = fir.allocmem
+///   omp.wsloop {}
+///   fir.call Assign %b %a
+///   fir.freemem %a
+/// }
+///
+/// becomes
+///
+/// omp.single {
+///   %a = fir.allocmem
+///   fir.store %a %tmp
+/// }
+/// %a_reloaded = fir.load %tmp
+/// omp.wsloop {}
+/// omp.single {
+///   fir.call Assign %b %a_reloaded
+///   fir.freemem %a_reloaded
+/// }
+///
+/// Note that we allocate temporary memory for values in omp.single's which 
need
+/// to be accessed in all threads in the closest omp.parallel
+///
+/// TODO currently we need to be able to access the encompassing omp.parallel 
so
+/// that we can allocate temporaries accessible by all threads outside of it.
+/// In case we do not find it, we fall back to converting the omp.workshare to
+/// omp.single.
+/// To better handle this we should probably enable yielding values out of an
+/// omp.single which will be supported by the omp runtime.
+void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
+  assert(wsOp.getRegion().getBlocks().size() == 1);
+
+  Location loc = wsOp->getLoc();
+
+  omp::ParallelOp parallelOp = wsOp->getParentOfType();
+  if (!parallelOp) {
+wsOp.emitWarning("cannot handle workshare, converting to single");
+Operation *terminator = wsOp.getRegion().front().getTerminator();
+wsOp->getBlock()->getOperations().splice(
+wsOp->getIterator(), wsOp.getRegion().front().getOperations());
+terminator->erase();
+return;
+  }
+
+  OpBuilder allocBuilder(parallelOp);
+  OpBuilder rootBuilder(wsOp);
+  IRMapping rootMapping;
+
+  omp::SingleOp singleOp = nullptr;
+
+  auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder,
+  IRMapping singleMapping) {
+if (auto reloaded = rootMapping.lookupOrNull(v))
+  return;
+Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext());
+Type ty = v.getType();
+Value alloc, reloaded;
+if (isSupportedByFirAlloca(ty)) {
+  alloc = allocBuilder.create(loc, ty);
+  singleBuilder.create(loc, singleMapping.lookup(v), alloc);

ivanradanov wrote:

I suppose it will be a problem with if we try to do this to a fir.alloca, which 
goes out of scope, we would need to firstprivate those for the omp.single 
regions. Thank you for the comment

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 7422985144525359db3d95c58b2b477872c73d54 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/2] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea078..14e42c6f358e4 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844..0689d6e033dd9 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c2..72a90dd0d6f29 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178..cd07cb741eb4b 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() && "must ha

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From 14878e80f5bcf8dac5100951de803ce584a33b25 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4..f7bc565ea8cbc 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
 
   // Composite constructs

>From e3460a07d658c09a6e0e225ec390983551e23181 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 16:02:37 +0900
Subject: [PATCH 2/2] Fix lower test for workshare

---
 flang/test/Lower/OpenMP/workshare.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f..8e771952f5b6d 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From b26c03dab1104a13d47b3a5124a31f4a83dbd320 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/2] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea078..14e42c6f358e4 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844..0689d6e033dd9 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c2..72a90dd0d6f29 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178..cd07cb741eb4b 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() && "must ha

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From 97cd498c0819f1a15464d742915534499739a0b4 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4..f7bc565ea8cbc 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
 
   // Composite constructs

>From 3b8dbba80f93d863a25179750d9fb652cedf66b4 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 16:02:37 +0900
Subject: [PATCH 2/2] Fix lower test for workshare

---
 flang/test/Lower/OpenMP/workshare.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f..8e771952f5b6d 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From b26c03dab1104a13d47b3a5124a31f4a83dbd320 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/3] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea078..14e42c6f358e4 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844..0689d6e033dd9 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c2..72a90dd0d6f29 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178..cd07cb741eb4b 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() && "must ha

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -0,0 +1,259 @@
+//===- LowerWorkshare.cpp - special cases for bufferization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+// Lower omp workshare construct.
+//===--===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
+
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+bool shouldUseWorkshareLowering(Operation *op) {
+  auto workshare = dyn_cast(op->getParentOp());
+  if (!workshare)
+return false;
+  return workshare->getParentOfType();
+}
+} // namespace flangomp
+
+namespace {
+
+struct SingleRegion {
+  Block::iterator begin, end;
+};
+
+static bool isSupportedByFirAlloca(Type ty) {
+  return !isa(ty);
+}
+
+static bool isSafeToParallelize(Operation *op) {
+  if (isa(op))
+return true;
+
+  llvm::SmallVector effects;
+  MemoryEffectOpInterface interface = dyn_cast(op);
+  if (!interface) {
+return false;
+  }
+  interface.getEffects(effects);
+  if (effects.empty())
+return true;
+
+  return false;
+}
+
+/// Lowers workshare to a sequence of single-thread regions and parallel loops
+///
+/// For example:
+///
+/// omp.workshare {
+///   %a = fir.allocmem
+///   omp.wsloop {}
+///   fir.call Assign %b %a
+///   fir.freemem %a
+/// }
+///
+/// becomes
+///
+/// omp.single {
+///   %a = fir.allocmem
+///   fir.store %a %tmp
+/// }
+/// %a_reloaded = fir.load %tmp
+/// omp.wsloop {}
+/// omp.single {
+///   fir.call Assign %b %a_reloaded
+///   fir.freemem %a_reloaded
+/// }
+///
+/// Note that we allocate temporary memory for values in omp.single's which 
need
+/// to be accessed in all threads in the closest omp.parallel
+///
+/// TODO currently we need to be able to access the encompassing omp.parallel 
so
+/// that we can allocate temporaries accessible by all threads outside of it.
+/// In case we do not find it, we fall back to converting the omp.workshare to
+/// omp.single.
+/// To better handle this we should probably enable yielding values out of an
+/// omp.single which will be supported by the omp runtime.
+void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
+  assert(wsOp.getRegion().getBlocks().size() == 1);
+
+  Location loc = wsOp->getLoc();
+
+  omp::ParallelOp parallelOp = wsOp->getParentOfType();
+  if (!parallelOp) {
+wsOp.emitWarning("cannot handle workshare, converting to single");
+Operation *terminator = wsOp.getRegion().front().getTerminator();
+wsOp->getBlock()->getOperations().splice(
+wsOp->getIterator(), wsOp.getRegion().front().getOperations());
+terminator->erase();
+return;
+  }
+
+  OpBuilder allocBuilder(parallelOp);
+  OpBuilder rootBuilder(wsOp);
+  IRMapping rootMapping;
+
+  omp::SingleOp singleOp = nullptr;
+
+  auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder,
+  IRMapping singleMapping) {
+if (auto reloaded = rootMapping.lookupOrNull(v))
+  return;
+Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext());
+Type ty = v.getType();
+Value alloc, reloaded;
+if (isSupportedByFirAlloca(ty)) {
+  alloc = allocBuilder.create(loc, ty);
+  singleBuilder.create(loc, singleMapping.lookup(v), alloc);
+  reloaded = rootBuilder.create(loc, ty, alloc);
+} else {
+  auto one = allocBuilder.create(
+  loc, allocBuilder.getI32Type(), 1);
+  alloc =
+  allocBuilder.create(loc, llvmPtrTy, llvmPtrTy, one);
+  Value toStore = singleBuilder
+  .create(
+  loc, llvmPtrTy, singleMapping.lookup(v))
+  .getResult(0);
+  singleBuilder.create(loc, toStore, alloc);
+  reloaded = rootBuilder.create(loc, llvmPtrTy, alloc);
+  reloaded =
+  rootBuilder.create(loc, ty, reloaded)
+  .getResult(0);
+}
+rootMapping.map(v, reloaded);
+  };
+
+  auto moveToSingle = [&](SingleRegion sr, OpBuilder singleBuilder) {
+IRMapping singleMapping = rootMapping;
+
+for (Operation &op : llvm::make_range(sr.begin, sr.end)) {
+  singleBuilder.clone(op, singleMapping);
+  if (i

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -0,0 +1,259 @@
+//===- LowerWorkshare.cpp - special cases for bufferization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+// Lower omp workshare construct.
+//===--===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
+
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+bool shouldUseWorkshareLowering(Operation *op) {
+  auto workshare = dyn_cast(op->getParentOp());
+  if (!workshare)
+return false;
+  return workshare->getParentOfType();
+}
+} // namespace flangomp
+
+namespace {
+
+struct SingleRegion {
+  Block::iterator begin, end;
+};
+
+static bool isSupportedByFirAlloca(Type ty) {
+  return !isa(ty);
+}
+
+static bool isSafeToParallelize(Operation *op) {
+  if (isa(op))
+return true;
+
+  llvm::SmallVector effects;
+  MemoryEffectOpInterface interface = dyn_cast(op);
+  if (!interface) {
+return false;
+  }
+  interface.getEffects(effects);
+  if (effects.empty())
+return true;
+
+  return false;
+}
+
+/// Lowers workshare to a sequence of single-thread regions and parallel loops
+///
+/// For example:
+///
+/// omp.workshare {
+///   %a = fir.allocmem
+///   omp.wsloop {}
+///   fir.call Assign %b %a
+///   fir.freemem %a
+/// }
+///
+/// becomes
+///
+/// omp.single {
+///   %a = fir.allocmem
+///   fir.store %a %tmp
+/// }
+/// %a_reloaded = fir.load %tmp
+/// omp.wsloop {}
+/// omp.single {
+///   fir.call Assign %b %a_reloaded
+///   fir.freemem %a_reloaded
+/// }
+///
+/// Note that we allocate temporary memory for values in omp.single's which 
need
+/// to be accessed in all threads in the closest omp.parallel
+///
+/// TODO currently we need to be able to access the encompassing omp.parallel 
so
+/// that we can allocate temporaries accessible by all threads outside of it.
+/// In case we do not find it, we fall back to converting the omp.workshare to
+/// omp.single.
+/// To better handle this we should probably enable yielding values out of an
+/// omp.single which will be supported by the omp runtime.
+void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
+  assert(wsOp.getRegion().getBlocks().size() == 1);
+
+  Location loc = wsOp->getLoc();
+
+  omp::ParallelOp parallelOp = wsOp->getParentOfType();
+  if (!parallelOp) {
+wsOp.emitWarning("cannot handle workshare, converting to single");
+Operation *terminator = wsOp.getRegion().front().getTerminator();
+wsOp->getBlock()->getOperations().splice(
+wsOp->getIterator(), wsOp.getRegion().front().getOperations());
+terminator->erase();
+return;
+  }
+
+  OpBuilder allocBuilder(parallelOp);
+  OpBuilder rootBuilder(wsOp);
+  IRMapping rootMapping;
+
+  omp::SingleOp singleOp = nullptr;
+
+  auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder,
+  IRMapping singleMapping) {
+if (auto reloaded = rootMapping.lookupOrNull(v))
+  return;
+Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext());
+Type ty = v.getType();
+Value alloc, reloaded;
+if (isSupportedByFirAlloca(ty)) {
+  alloc = allocBuilder.create(loc, ty);
+  singleBuilder.create(loc, singleMapping.lookup(v), alloc);
+  reloaded = rootBuilder.create(loc, ty, alloc);
+} else {
+  auto one = allocBuilder.create(
+  loc, allocBuilder.getI32Type(), 1);
+  alloc =
+  allocBuilder.create(loc, llvmPtrTy, llvmPtrTy, one);
+  Value toStore = singleBuilder
+  .create(
+  loc, llvmPtrTy, singleMapping.lookup(v))
+  .getResult(0);
+  singleBuilder.create(loc, toStore, alloc);
+  reloaded = rootBuilder.create(loc, llvmPtrTy, alloc);
+  reloaded =
+  rootBuilder.create(loc, ty, reloaded)
+  .getResult(0);
+}
+rootMapping.map(v, reloaded);
+  };
+
+  auto moveToSingle = [&](SingleRegion sr, OpBuilder singleBuilder) {
+IRMapping singleMapping = rootMapping;
+
+for (Operation &op : llvm::make_range(sr.begin, sr.end)) {
+  singleBuilder.clone(op, singleMapping);
+  if (i

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -792,7 +793,8 @@ struct ElementalOpConversion
 // Generate a loop nest looping around the fir.elemental shape and clone
 // fir.elemental region inside the inner loop.
 hlfir::LoopNest loopNest =
-hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+   flangomp::shouldUseWorkshareLowering(elemental));

ivanradanov wrote:

I have opted to only parallelize loops which are marked as unordered for now.

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -0,0 +1,259 @@
+//===- LowerWorkshare.cpp - special cases for bufferization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+// Lower omp workshare construct.
+//===--===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
+
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+bool shouldUseWorkshareLowering(Operation *op) {
+  auto workshare = dyn_cast(op->getParentOp());
+  if (!workshare)
+return false;
+  return workshare->getParentOfType();
+}
+} // namespace flangomp
+
+namespace {
+
+struct SingleRegion {
+  Block::iterator begin, end;
+};
+
+static bool isSupportedByFirAlloca(Type ty) {
+  return !isa(ty);
+}
+
+static bool isSafeToParallelize(Operation *op) {
+  if (isa(op))
+return true;
+
+  llvm::SmallVector effects;
+  MemoryEffectOpInterface interface = dyn_cast(op);
+  if (!interface) {
+return false;
+  }
+  interface.getEffects(effects);
+  if (effects.empty())
+return true;
+
+  return false;
+}
+
+/// Lowers workshare to a sequence of single-thread regions and parallel loops
+///
+/// For example:
+///
+/// omp.workshare {
+///   %a = fir.allocmem
+///   omp.wsloop {}
+///   fir.call Assign %b %a
+///   fir.freemem %a
+/// }
+///
+/// becomes
+///
+/// omp.single {
+///   %a = fir.allocmem
+///   fir.store %a %tmp
+/// }
+/// %a_reloaded = fir.load %tmp
+/// omp.wsloop {}
+/// omp.single {
+///   fir.call Assign %b %a_reloaded
+///   fir.freemem %a_reloaded
+/// }
+///
+/// Note that we allocate temporary memory for values in omp.single's which 
need
+/// to be accessed in all threads in the closest omp.parallel
+///
+/// TODO currently we need to be able to access the encompassing omp.parallel 
so
+/// that we can allocate temporaries accessible by all threads outside of it.
+/// In case we do not find it, we fall back to converting the omp.workshare to
+/// omp.single.
+/// To better handle this we should probably enable yielding values out of an
+/// omp.single which will be supported by the omp runtime.

ivanradanov wrote:

Done, thank you for the suggestion.

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -0,0 +1,259 @@
+//===- LowerWorkshare.cpp - special cases for bufferization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+// Lower omp workshare construct.
+//===--===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
+
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+bool shouldUseWorkshareLowering(Operation *op) {
+  auto workshare = dyn_cast(op->getParentOp());
+  if (!workshare)
+return false;
+  return workshare->getParentOfType();
+}
+} // namespace flangomp
+
+namespace {
+
+struct SingleRegion {
+  Block::iterator begin, end;
+};
+
+static bool isSupportedByFirAlloca(Type ty) {
+  return !isa(ty);
+}
+
+static bool isSafeToParallelize(Operation *op) {
+  if (isa(op))
+return true;
+
+  llvm::SmallVector effects;
+  MemoryEffectOpInterface interface = dyn_cast(op);
+  if (!interface) {
+return false;
+  }
+  interface.getEffects(effects);
+  if (effects.empty())
+return true;
+
+  return false;
+}
+
+/// Lowers workshare to a sequence of single-thread regions and parallel loops
+///
+/// For example:
+///
+/// omp.workshare {
+///   %a = fir.allocmem
+///   omp.wsloop {}
+///   fir.call Assign %b %a
+///   fir.freemem %a
+/// }
+///
+/// becomes
+///
+/// omp.single {
+///   %a = fir.allocmem
+///   fir.store %a %tmp
+/// }
+/// %a_reloaded = fir.load %tmp
+/// omp.wsloop {}
+/// omp.single {
+///   fir.call Assign %b %a_reloaded
+///   fir.freemem %a_reloaded
+/// }
+///
+/// Note that we allocate temporary memory for values in omp.single's which 
need
+/// to be accessed in all threads in the closest omp.parallel
+///
+/// TODO currently we need to be able to access the encompassing omp.parallel 
so
+/// that we can allocate temporaries accessible by all threads outside of it.
+/// In case we do not find it, we fall back to converting the omp.workshare to
+/// omp.single.
+/// To better handle this we should probably enable yielding values out of an
+/// omp.single which will be supported by the omp runtime.
+void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
+  assert(wsOp.getRegion().getBlocks().size() == 1);
+
+  Location loc = wsOp->getLoc();
+
+  omp::ParallelOp parallelOp = wsOp->getParentOfType();
+  if (!parallelOp) {
+wsOp.emitWarning("cannot handle workshare, converting to single");
+Operation *terminator = wsOp.getRegion().front().getTerminator();
+wsOp->getBlock()->getOperations().splice(
+wsOp->getIterator(), wsOp.getRegion().front().getOperations());
+terminator->erase();
+return;
+  }
+
+  OpBuilder allocBuilder(parallelOp);
+  OpBuilder rootBuilder(wsOp);
+  IRMapping rootMapping;
+
+  omp::SingleOp singleOp = nullptr;
+
+  auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder,
+  IRMapping singleMapping) {
+if (auto reloaded = rootMapping.lookupOrNull(v))
+  return;
+Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext());
+Type ty = v.getType();
+Value alloc, reloaded;
+if (isSupportedByFirAlloca(ty)) {
+  alloc = allocBuilder.create(loc, ty);
+  singleBuilder.create(loc, singleMapping.lookup(v), alloc);

ivanradanov wrote:

I have opted hoist fir.alloca ops to the parallel region and scattering them 
using omp.single's copyprivate 

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits


ivanradanov wrote:

@kiranchandramohan @tblah 
I think this warrants another look if you have some time. 

I have reiterated a bit and opted to have a omp loop nest wrapper op which 
signals to the workshare lowering which specific loops need to be parallelized 
(i.e. converted to wsloop { loop_nest}).

This will allow us to emit this in the frontend if it is needed and be more 
precise about the exact loops that need to be parallelized.


So the LowerWorksharePass that I have implemented here is tasked with 
parallelizing the loops nested in workshare_loop_wrapper and both the 
Fortran->mlir frontend and the hlfir lowering passes would be responsible for 
emitting the workshare_loop_wrapper ops where appropriate. For that I have 
started with some of the obvious lowerings in the hlfir bufferizations, but 
perhaps that can be done gradually and not everything needs to be covered by 
this PR. Let me know what you think.

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From 1ecd832151fab1cd9b977f0e4b960294cfdc2d12 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4..f7bc565ea8cbc 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
 
   // Composite constructs

>From 38322dc92e37423f528fdba5535feb2f0a1ce113 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 16:02:37 +0900
Subject: [PATCH 2/2] Fix lower test for workshare

---
 flang/test/Lower/OpenMP/workshare.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f..8e771952f5b6d 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-08-04 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 0287ff3626883d09e364210e83a652a6328835dd Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/3] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea078..14e42c6f358e4 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844..0689d6e033dd9 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c2..72a90dd0d6f29 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178..cd07cb741eb4b 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() && "must ha

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-05 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From aea7da1492e9ad122b3b17f07b8f91b7c6eac777 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4..f7bc565ea8cbc 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
 
   // Composite constructs

>From 9046df2cebff7e06e803a7db6df506dc67c6edce Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 16:02:37 +0900
Subject: [PATCH 2/2] Fix lower test for workshare

---
 flang/test/Lower/OpenMP/workshare.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f..8e771952f5b6d 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-08-05 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From cca8588b2b2d1c4275b9eaf22c7bd017942f06d9 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/3] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea078..14e42c6f358e4 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844..0689d6e033dd9 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c2..72a90dd0d6f29 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178..cd07cb741eb4b 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() && "must ha

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-08-18 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 2b23c8b52d3b5b680bbcf090fd8c08de6d86fe62 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/3] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..14e42c6f358e46 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844e..0689d6e033dd9c 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c27..72a90dd0d6f29d 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..cd07cb741eb4bb 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() &&

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-18 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From 0eae392538510f736b630d93b14d46644d042e4f Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4f..f7bc565ea8cbc1 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
 
   // Composite constructs

>From 10b7a392916a1dd184c434e41b7be0738c811a38 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 16:02:37 +0900
Subject: [PATCH 2/2] Fix lower test for workshare

---
 flang/test/Lower/OpenMP/workshare.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f0..8e771952f5b6da 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)

2024-08-18 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101443
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-18 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From 69e85558ec78c7a74be9168b4227bb262f67225e Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4f..f7bc565ea8cbc1 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
 
   // Composite constructs

>From d17c55249b5d7065721225e62f10fef5e3da4f51 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 16:02:37 +0900
Subject: [PATCH 2/2] Fix lower test for workshare

---
 flang/test/Lower/OpenMP/workshare.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f0..8e771952f5b6da 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)

2024-08-18 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101443

>From e453738aaa1bd74e2462025075ea4ac12868caac Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:09:09 +0900
Subject: [PATCH 1/7] [MLIR][omp] Add omp.workshare op

---
 .../Dialect/OpenMP/OpenMPClauseOperands.h |  3 +++
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 22 +++
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 13 +++
 3 files changed, 38 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
index 38e4d8f245e4fa..d14e5e17afbb08 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
@@ -17,6 +17,7 @@
 
 #include "mlir/IR/BuiltinAttributes.h"
 #include "llvm/ADT/SmallVector.h"
+#include 
 
 #include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc"
 
@@ -316,6 +317,8 @@ using TeamsOperands =
 detail::Clauses;
 
+using WorkshareOperands = detail::Clauses;
+
 using WsloopOperands =
 detail::Clauses {
+  let summary = "workshare directive";
+  let description = [{
+The workshare construct divides the execution of the enclosed structured
+block into separate units of work, and causes the threads of the team to
+share the work such that each unit is executed only once by one thread, in
+the context of its implicit task
+  }] # clausesDescription;
+
+  let builders = [
+OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)>
+  ];
+
+  let hasVerifier = 1;
+}
+
 
//===--===//
 // Loop Nest
 
//===--===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 
b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 11780f84697b15..9a189eb2059e01 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1683,6 +1683,19 @@ LogicalResult SingleOp::verify() {
   getCopyprivateSyms());
 }
 
+//===--===//
+// WorkshareOp
+//===--===//
+
+void WorkshareOp::build(OpBuilder &builder, OperationState &state,
+const WorkshareOperands &clauses) {
+  WorkshareOp::build(builder, state, clauses.nowait);
+}
+
+LogicalResult WorkshareOp::verify() {
+  return (*this)->getRegion(0).getBlocks().size() == 1 ? success() : failure();
+}
+
 
//===--===//
 // WsloopOp
 
//===--===//

>From 13f5fa2b7628e78b308a3312fd045d87dd89d458 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 2 Aug 2024 16:10:25 +0900
Subject: [PATCH 2/7] Add custom omp loop wrapper

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 5199ff50abb959..76f0c472cfdb14 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -308,6 +308,17 @@ def WorkshareOp : OpenMP_Op<"workshare", clauses = [
   let hasVerifier = 1;
 }
 
+def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [
+DeclareOpInterfaceMethods,
+RecursiveMemoryEffects, SingleBlock
+  ], singleRegion = true> {
+  let summary = "contains loop nests to be parallelized by workshare";
+
+  let builders = [
+OpBuilder<(ins), [{ build($_builder, $_state, {}); }]>
+  ];
+}
+
 
//===--===//
 // Loop Nest
 
//===--===//

>From f0915f426f03e3e867953de36e68fd0ccaf5b0ed Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 2 Aug 2024 16:08:58 +0900
Subject: [PATCH 3/7] Add recursive memory effects trait to workshare

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 76f0c472cfdb14..7d1c80333855e7 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -290,7 +290,9 @@ def SingleOp : OpenMP_Op<"single", traits = [
 // 2.8.3 Workshare Construct
 
//===--===//
 
-def WorkshareOp : OpenMP_Op<"workshare", clauses = [
+def WorkshareOp : OpenMP_Op<"workshare", traits = [
+RecursiveMemoryEffects,
+  ], clauses = [
 OpenMP_NowaitClause,
   ], singleRegion = true> {
   let summar

[llvm-branch-commits] [flang] [flang] Introduce ws loop nest generation for HLFIR lowering (PR #101445)

2024-08-18 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 38c581c8defc81105160a69bb46a9e489b56f10e Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/3] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..14e42c6f358e46 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844e..0689d6e033dd9c 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c27..72a90dd0d6f29d 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..cd07cb741eb4bb 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() &&

[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 38c581c8defc81105160a69bb46a9e489b56f10e Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..14e42c6f358e46 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844e..0689d6e033dd9c 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c27..72a90dd0d6f29d 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..cd07cb741eb4bb 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() &&

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov created 
https://github.com/llvm/llvm-project/pull/104748

WIP I will be adding unit tests and I am considering if we should have 
integrations tests for the entire omp.workshare pipeline.

>From 793ae50dd00c4347bea78ca6ecd33783c69de354 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 17:33:52 +0900
Subject: [PATCH 1/5] Add workshare loop wrapper lowerings

---
 .../lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp  |  6 --
 .../HLFIR/Transforms/OptimizedBufferization.cpp| 10 +++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index b608677c526310..1848dbe2c7a2c2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -26,12 +26,13 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "llvm/ADT/TypeSwitch.h"
 
 namespace hlfir {
@@ -792,7 +793,8 @@ struct ElementalOpConversion
 // Generate a loop nest looping around the fir.elemental shape and clone
 // fir.elemental region inside the inner loop.
 hlfir::LoopNest loopNest =
-hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+   flangomp::shouldUseWorkshareLowering(elemental));
 auto insPt = builder.saveInsertionPoint();
 builder.setInsertionPointToStart(loopNest.body);
 auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index c4aed6b79df923..150e3e91197241 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
 #include "flang/Optimizer/Transforms/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Dominance.h"
@@ -482,7 +483,8 @@ llvm::LogicalResult 
ElementalAssignBufferization::matchAndRewrite(
   // Generate a loop nest looping around the hlfir.elemental shape and clone
   // hlfir.elemental region inside the inner loop
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+ flangomp::shouldUseWorkshareLowering(elemental));
   builder.setInsertionPointToStart(loopNest.body);
   auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
 loopNest.oneBasedIndices);
@@ -553,7 +555,8 @@ llvm::LogicalResult 
BroadcastAssignBufferization::matchAndRewrite(
   llvm::SmallVector extents =
   hlfir::getIndexExtents(loc, builder, shape);
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
   builder.setInsertionPointToStart(loopNest.body);
   auto arrayElement =
   hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
@@ -648,7 +651,8 @@ llvm::LogicalResult 
VariableAssignBufferization::matchAndRewrite(
   llvm::SmallVector extents =
   hlfir::getIndexExtents(loc, builder, shape);
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
   builder.setInsertionPointToStart(loopNest.body);
   auto rhsArrayElement =
   hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);

>From d7ba8a1598f517a5a3c8401d22b81b50114112f1 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Mon, 19 Aug 2024 15:01:31 +0900
Subject: [PATCH 2/5] Bufferize test

---
 flang/test/HLFIR/bufferize-workshare.fir | 58 
 1 file changed, 58 insertions(+)
 create mode 100644 flang/test/HLFIR/bufferize-workshare.fir

diff --git a/flang/test/HLFIR/bufferize-workshare.fir 
b/flang/test/HLFIR/bufferize-workshare.fir
new file mode 100644
index 00..86a2f031478dd7
--- /dev

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/104748
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov ready_for_review 
https://github.com/llvm/llvm-project/pull/101443
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov ready_for_review 
https://github.com/llvm/llvm-project/pull/101444
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov ready_for_review 
https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov ready_for_review 
https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -2,3 +2,4 @@ add_subdirectory(CodeGen)
 add_subdirectory(Dialect)
 add_subdirectory(HLFIR)
 add_subdirectory(Transforms)
+add_subdirectory(OpenMP)

ivanradanov wrote:

PR for this up here https://github.com/llvm/llvm-project/pull/104732

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 5e470922405b735d63b4aded76450cc52e94e003 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..14e42c6f358e46 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844e..0689d6e033dd9c 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c27..72a90dd0d6f29d 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..cd07cb741eb4bb 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() &&

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From 63d49e4dcd128b470ee77006c594673203dd2df2 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4f..f7bc565ea8cbc1 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
 
   // Composite constructs

>From 621b01775171a4718fa405f201b58c3dca005e5a Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 16:02:37 +0900
Subject: [PATCH 2/2] Fix lower test for workshare

---
 flang/test/Lower/OpenMP/workshare.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f0..8e771952f5b6da 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101443

>From 604b0293e0574e9d697d4071c2b853a5a27af1e1 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:09:09 +0900
Subject: [PATCH 1/7] [MLIR][omp] Add omp.workshare op

---
 .../Dialect/OpenMP/OpenMPClauseOperands.h |  3 +++
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 22 +++
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 13 +++
 3 files changed, 38 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
index 38e4d8f245e4fa..d14e5e17afbb08 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
@@ -17,6 +17,7 @@
 
 #include "mlir/IR/BuiltinAttributes.h"
 #include "llvm/ADT/SmallVector.h"
+#include 
 
 #include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc"
 
@@ -316,6 +317,8 @@ using TeamsOperands =
 detail::Clauses;
 
+using WorkshareOperands = detail::Clauses;
+
 using WsloopOperands =
 detail::Clauses {
+  let summary = "workshare directive";
+  let description = [{
+The workshare construct divides the execution of the enclosed structured
+block into separate units of work, and causes the threads of the team to
+share the work such that each unit is executed only once by one thread, in
+the context of its implicit task
+  }] # clausesDescription;
+
+  let builders = [
+OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)>
+  ];
+
+  let hasVerifier = 1;
+}
+
 
//===--===//
 // Loop Nest
 
//===--===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 
b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 11780f84697b15..9a189eb2059e01 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1683,6 +1683,19 @@ LogicalResult SingleOp::verify() {
   getCopyprivateSyms());
 }
 
+//===--===//
+// WorkshareOp
+//===--===//
+
+void WorkshareOp::build(OpBuilder &builder, OperationState &state,
+const WorkshareOperands &clauses) {
+  WorkshareOp::build(builder, state, clauses.nowait);
+}
+
+LogicalResult WorkshareOp::verify() {
+  return (*this)->getRegion(0).getBlocks().size() == 1 ? success() : failure();
+}
+
 
//===--===//
 // WsloopOp
 
//===--===//

>From f2fd4f278c23ec99dae3ac44e1c05fcb629f707d Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 2 Aug 2024 16:10:25 +0900
Subject: [PATCH 2/7] Add custom omp loop wrapper

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 5199ff50abb959..76f0c472cfdb14 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -308,6 +308,17 @@ def WorkshareOp : OpenMP_Op<"workshare", clauses = [
   let hasVerifier = 1;
 }
 
+def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [
+DeclareOpInterfaceMethods,
+RecursiveMemoryEffects, SingleBlock
+  ], singleRegion = true> {
+  let summary = "contains loop nests to be parallelized by workshare";
+
+  let builders = [
+OpBuilder<(ins), [{ build($_builder, $_state, {}); }]>
+  ];
+}
+
 
//===--===//
 // Loop Nest
 
//===--===//

>From 22c66e6db3997e38254d9848661a38627cd7bb19 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 2 Aug 2024 16:08:58 +0900
Subject: [PATCH 3/7] Add recursive memory effects trait to workshare

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 76f0c472cfdb14..7d1c80333855e7 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -290,7 +290,9 @@ def SingleOp : OpenMP_Op<"single", traits = [
 // 2.8.3 Workshare Construct
 
//===--===//
 
-def WorkshareOp : OpenMP_Op<"workshare", clauses = [
+def WorkshareOp : OpenMP_Op<"workshare", traits = [
+RecursiveMemoryEffects,
+  ], clauses = [
 OpenMP_NowaitClause,
   ], singleRegion = true> {
   let summar

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -344,6 +345,7 @@ inline void createHLFIRToFIRPassPipeline(
   pm.addPass(hlfir::createLowerHLFIRIntrinsics());
   pm.addPass(hlfir::createBufferizeHLFIR());
   pm.addPass(hlfir::createConvertHLFIRtoFIR());
+  pm.addPass(flangomp::createLowerWorkshare());

ivanradanov wrote:

I opted to keep the rest of the openmp passes as they are and have added a bool 
argument to control whether to run the lower-workshare pass

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/104748

>From bf0e09f9cfc3159517b1ebec9d39e1143fa935b9 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Tue, 20 Aug 2024 09:28:15 +0900
Subject: [PATCH 1/7] Iterate backwards to find all trivially dead ops

---
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp |  3 +-
 .../Transforms/OpenMP/lower-workshare4.mlir   | 56 ++-
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp 
b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index 9557dd200cacee..bfb9708af70923 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -200,7 +200,8 @@ static bool isTransitivelyUsedOutside(Value v, SingleRegion 
sr) {
 /// We clone pure operations in both the parallel and single blocks. this
 /// functions cleans them up if they end up with no uses
 static void cleanupBlock(Block *block) {
-  for (Operation &op : llvm::make_early_inc_range(*block))
+  for (Operation &op : llvm::make_early_inc_range(
+   llvm::make_range(block->rbegin(), block->rend(
 if (isOpTriviallyDead(&op))
   op.erase();
 }
diff --git a/flang/test/Transforms/OpenMP/lower-workshare4.mlir 
b/flang/test/Transforms/OpenMP/lower-workshare4.mlir
index 44f68cd2ca3654..81bc20cb34b65d 100644
--- a/flang/test/Transforms/OpenMP/lower-workshare4.mlir
+++ b/flang/test/Transforms/OpenMP/lower-workshare4.mlir
@@ -1,8 +1,33 @@
 // RUN: fir-opt --split-input-file --lower-workshare 
--allow-unregistered-dialect %s | FileCheck %s
 
-// Check that we cleanup unused pure operations from either the parallel or
-// single regions
+// Check that we cleanup unused pure operations from the parallel and single
+// regions
 
+// CHECK-LABEL:   func.func @wsfunc() {
+// CHECK:   %[[VAL_0:.*]] = fir.alloca i32
+// CHECK:   omp.parallel {
+// CHECK: omp.single {
+// CHECK:   %[[VAL_1:.*]] = "test.test1"() : () -> i32
+// CHECK:   %[[VAL_2:.*]] = arith.constant 2 : index
+// CHECK:   %[[VAL_3:.*]] = arith.constant 3 : index
+// CHECK:   %[[VAL_4:.*]] = arith.addi %[[VAL_2]], %[[VAL_3]] : 
index
+// CHECK:   "test.test3"(%[[VAL_4]]) : (index) -> ()
+// CHECK:   omp.terminator
+// CHECK: }
+// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_6:.*]] = arith.constant 42 : index
+// CHECK: omp.wsloop nowait {
+// CHECK:   omp.loop_nest (%[[VAL_7:.*]]) : index = (%[[VAL_5]]) 
to (%[[VAL_6]]) inclusive step (%[[VAL_5]]) {
+// CHECK: "test.test2"() : () -> ()
+// CHECK: omp.yield
+// CHECK:   }
+// CHECK:   omp.terminator
+// CHECK: }
+// CHECK: omp.barrier
+// CHECK: omp.terminator
+// CHECK:   }
+// CHECK:   return
+// CHECK: }
 func.func @wsfunc() {
   %a = fir.alloca i32
   omp.parallel {
@@ -13,7 +38,9 @@ func.func @wsfunc() {
   %c42 = arith.constant 42 : index
 
   %c2 = arith.constant 2 : index
-  "test.test3"(%c2) : (index) -> ()
+  %c3 = arith.constant 3 : index
+  %add = arith.addi %c2, %c3 : index
+  "test.test3"(%add) : (index) -> ()
 
   omp.workshare_loop_wrapper {
 omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) {
@@ -29,27 +56,4 @@ func.func @wsfunc() {
   return
 }
 
-// CHECK-LABEL:   func.func @wsfunc() {
-// CHECK:   %[[VAL_0:.*]] = fir.alloca i32
-// CHECK:   omp.parallel {
-// CHECK: omp.single {
-// CHECK:   %[[VAL_1:.*]] = "test.test1"() : () -> i32
-// CHECK:   %[[VAL_2:.*]] = arith.constant 2 : index
-// CHECK:   "test.test3"(%[[VAL_2]]) : (index) -> ()
-// CHECK:   omp.terminator
-// CHECK: }
-// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_4:.*]] = arith.constant 42 : index
-// CHECK: omp.wsloop nowait {
-// CHECK:   omp.loop_nest (%[[VAL_5:.*]]) : index = (%[[VAL_3]]) 
to (%[[VAL_4]]) inclusive step (%[[VAL_3]]) {
-// CHECK: "test.test2"() : () -> ()
-// CHECK: omp.yield
-// CHECK:   }
-// CHECK:   omp.terminator
-// CHECK: }
-// CHECK: omp.barrier
-// CHECK: omp.terminator
-// CHECK:   }
-// CHECK:   return
-// CHECK: }
 

>From 90cd77c1c7ffdadfe4f088b358c0ee9ee1958872 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Tue, 20 Aug 2024 12:17:45 +0900
Subject: [PATCH 2/7] Add expalanation comment for createCopyFun

---
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp 
b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cp

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-08-20 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/104748

>From a45ef32ecf6483bdb65954c4283ea493494cea77 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Tue, 20 Aug 2024 16:57:25 +0900
Subject: [PATCH 1/6] Update test

---
 .../Transforms/OpenMP/lower-workshare.mlir| 42 +++
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/flang/test/Transforms/OpenMP/lower-workshare.mlir 
b/flang/test/Transforms/OpenMP/lower-workshare.mlir
index 9347863dc4a609..c189e54aaeb0d4 100644
--- a/flang/test/Transforms/OpenMP/lower-workshare.mlir
+++ b/flang/test/Transforms/OpenMP/lower-workshare.mlir
@@ -103,28 +103,23 @@ func.func @wsfunc(%arg0: !fir.ref>) {
 // CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_9]]) 
{uniq_name = "array"} : (!fir.ref>, !fir.shape<1>) -> 
(!fir.ref>, !fir.ref>)
 // CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_1]] : 
!fir.ref>>
 // CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]](%[[VAL_9]]) 
{uniq_name = ".tmp.array"} : (!fir.heap>, !fir.shape<1>) -> 
(!fir.heap>, !fir.heap>)
-// CHECK: %[[VAL_13:.*]] = arith.constant true
-// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_13:.*]] = arith.constant 1 : index
 // CHECK: omp.wsloop {
-// CHECK:   omp.loop_nest (%[[VAL_15:.*]]) : index = (%[[VAL_14]]) 
to (%[[VAL_7]]) inclusive step (%[[VAL_14]]) {
-// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_10]]#0 
(%[[VAL_15]])  : (!fir.ref>, index) -> !fir.ref
-// CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref
-// CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_17]], %[[VAL_8]] 
: i32
-// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_12]]#0 
(%[[VAL_15]])  : (!fir.heap>, index) -> !fir.ref
-// CHECK: hlfir.assign %[[VAL_18]] to %[[VAL_19]] 
temporary_lhs : i32, !fir.ref
+// CHECK:   omp.loop_nest (%[[VAL_14:.*]]) : index = (%[[VAL_13]]) 
to (%[[VAL_7]]) inclusive step (%[[VAL_13]]) {
+// CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_10]]#0 
(%[[VAL_14]])  : (!fir.ref>, index) -> !fir.ref
+// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref
+// CHECK: %[[VAL_17:.*]] = arith.subi %[[VAL_16]], %[[VAL_8]] 
: i32
+// CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_12]]#0 
(%[[VAL_14]])  : (!fir.heap>, index) -> !fir.ref
+// CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_18]] 
temporary_lhs : i32, !fir.ref
 // CHECK: omp.yield
 // CHECK:   }
 // CHECK:   omp.terminator
 // CHECK: }
 // CHECK: omp.single nowait {
-// CHECK:   %[[VAL_20:.*]] = fir.undefined 
tuple>, i1>
-// CHECK:   %[[VAL_21:.*]] = fir.insert_value %[[VAL_20]], 
%[[VAL_13]], [1 : index] : (tuple>, i1>, i1) -> 
tuple>, i1>
 // CHECK:   hlfir.assign %[[VAL_12]]#0 to %[[VAL_10]]#0 : 
!fir.heap>, !fir.ref>
 // CHECK:   fir.freemem %[[VAL_12]]#0 : 
!fir.heap>
 // CHECK:   omp.terminator
 // CHECK: }
-// CHECK: %[[VAL_22:.*]] = fir.undefined 
tuple>, i1>
-// CHECK: %[[VAL_23:.*]] = fir.insert_value %[[VAL_22]], 
%[[VAL_13]], [1 : index] : (tuple>, i1>, i1) -> 
tuple>, i1>
 // CHECK: omp.barrier
 // CHECK: omp.terminator
 // CHECK:   }
@@ -168,31 +163,26 @@ func.func @wsfunc(%arg0: !fir.ref>) {
 // CHECK:   %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_11]]) 
{uniq_name = "array"} : (!fir.ref>, !fir.shape<1>) -> 
(!fir.ref>, !fir.ref>)
 // CHECK:   %[[VAL_13:.*]] = fir.load %[[VAL_2]] : 
!fir.ref>>
 // CHECK:   %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]](%[[VAL_11]]) 
{uniq_name = ".tmp.array"} : (!fir.heap>, !fir.shape<1>) -> 
(!fir.heap>, !fir.heap>)
-// CHECK:   %[[VAL_15:.*]] = arith.constant true
-// CHECK:   %[[VAL_16:.*]] = arith.constant 1 : index
+// CHECK:   %[[VAL_15:.*]] = arith.constant 1 : index
 // CHECK:   omp.wsloop {
-// CHECK: omp.loop_nest (%[[VAL_17:.*]]) : index = (%[[VAL_16]]) 
to (%[[VAL_10]]) inclusive step (%[[VAL_16]]) {
-// CHECK:   %[[VAL_18:.*]] = hlfir.designate %[[VAL_12]]#0 
(%[[VAL_17]])  : (!fir.ref>, index) -> !fir.ref
-// CHECK:   %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref
-// CHECK:   %[[VAL_20:.*]] = fir.load %[[VAL_1]] : !fir.ref
-// CHECK:   %[[VAL_21:.*]] = arith.subi %[[VAL_19]], %[[VAL_20]] : 
i32
-// CHECK:   %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_9]] : 
i32
-// CHECK:   %[[VAL_23:.*]] = hlfir.designate %[[VAL_14]]#0 
(%[[VAL_17]])  : (!fir.heap>, index) -> !fir.ref
-// CHECK:   hlfir.assign %[[VAL_22]] to %[[VAL_23]] temporary_lhs 
: i32, !fir.ref
+// CHECK: omp.loop_n

[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)

2024-08-21 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov closed 
https://github.com/llvm/llvm-project/pull/101443
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)

2024-08-21 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov reopened 
https://github.com/llvm/llvm-project/pull/101443
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-08-21 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/104748

>From 4b1c15bf4dcd753e35ec5c1118b107ea058c58df Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 17:33:52 +0900
Subject: [PATCH 1/5] Add workshare loop wrapper lowerings

---
 .../lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp  |  6 --
 .../HLFIR/Transforms/OptimizedBufferization.cpp| 10 +++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index b608677c526310..1848dbe2c7a2c2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -26,12 +26,13 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "llvm/ADT/TypeSwitch.h"
 
 namespace hlfir {
@@ -792,7 +793,8 @@ struct ElementalOpConversion
 // Generate a loop nest looping around the fir.elemental shape and clone
 // fir.elemental region inside the inner loop.
 hlfir::LoopNest loopNest =
-hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+   flangomp::shouldUseWorkshareLowering(elemental));
 auto insPt = builder.saveInsertionPoint();
 builder.setInsertionPointToStart(loopNest.body);
 auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 3a0a98dc594463..f014724861e333 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
 #include "flang/Optimizer/Transforms/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Dominance.h"
@@ -482,7 +483,8 @@ llvm::LogicalResult 
ElementalAssignBufferization::matchAndRewrite(
   // Generate a loop nest looping around the hlfir.elemental shape and clone
   // hlfir.elemental region inside the inner loop
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+ flangomp::shouldUseWorkshareLowering(elemental));
   builder.setInsertionPointToStart(loopNest.body);
   auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
 loopNest.oneBasedIndices);
@@ -553,7 +555,8 @@ llvm::LogicalResult 
BroadcastAssignBufferization::matchAndRewrite(
   llvm::SmallVector extents =
   hlfir::getIndexExtents(loc, builder, shape);
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
   builder.setInsertionPointToStart(loopNest.body);
   auto arrayElement =
   hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
@@ -648,7 +651,8 @@ llvm::LogicalResult 
VariableAssignBufferization::matchAndRewrite(
   llvm::SmallVector extents =
   hlfir::getIndexExtents(loc, builder, shape);
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
   builder.setInsertionPointToStart(loopNest.body);
   auto rhsArrayElement =
   hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);

>From a79d7c8cee84295ef7281b0b6aabf2ea5ed50b9e Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Mon, 19 Aug 2024 15:01:31 +0900
Subject: [PATCH 2/5] Bufferize test

---
 flang/test/HLFIR/bufferize-workshare.fir | 58 
 1 file changed, 58 insertions(+)
 create mode 100644 flang/test/HLFIR/bufferize-workshare.fir

diff --git a/flang/test/HLFIR/bufferize-workshare.fir 
b/flang/test/HLFIR/bufferize-workshare.fir
new file mode 100644
index 00..86a2f031478dd7
--- /dev/null
+++ b/flang/test/HLFIR/bufferize-workshare.fir
@@ -0,0 +1,58 @@
+// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
+
+// CH

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-21 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From 3d1258582adc0ec506a23dc3efdba371c29612ca Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index d614db8b68ef65..83c90374afa5e3 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
   default:
 // Combined and composite constructs should have been split into a sequence

>From 5e01e41362f11f2309dea217ada9026aa437433d Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 16:02:37 +0900
Subject: [PATCH 2/2] Fix lower test for workshare

---
 flang/test/Lower/OpenMP/workshare.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f0..8e771952f5b6da 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-21 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 451a9d2f26cfd8cb770d1ae35d834c63fce56b79 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..14e42c6f358e46 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844e..0689d6e033dd9c 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c27..72a90dd0d6f29d 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..cd07cb741eb4bb 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() &&

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-21 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

error: too big or took too long to generate
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-21 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From e5789180a3dd1fd8c46a5d7dfc446921110642ca Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index d614db8b68ef65..83c90374afa5e3 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
   default:
 // Combined and composite constructs should have been split into a sequence

>From 70daa016c0c39861926b1b82e31b96db005cfba1 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 16:02:37 +0900
Subject: [PATCH 2/2] Fix lower test for workshare

---
 flang/test/Lower/OpenMP/workshare.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f0..8e771952f5b6da 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-21 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 81606df746e9862c330681ed8ae9113a43e577a2 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..14e42c6f358e46 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844e..0689d6e033dd9c 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c27..72a90dd0d6f29d 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..cd07cb741eb4bb 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() &&

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/104748

>From d343f3a86f56864757ccdf889fd6897d9d9507e9 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:09:09 +0900
Subject: [PATCH 1/6] [MLIR][omp] Add omp.workshare op

Add custom omp loop wrapper

Add recursive memory effects trait to workshare

Remove stray include

Remove omp.workshare verifier

Add assembly format for wrapper and add test

Add verification and descriptions
---
 .../Dialect/OpenMP/OpenMPClauseOperands.h |  2 +
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 43 
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 23 +++
 mlir/test/Dialect/OpenMP/invalid.mlir | 42 +++
 mlir/test/Dialect/OpenMP/ops.mlir | 69 +++
 5 files changed, 179 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
index 38e4d8f245e4fa..896ca9581c3fc8 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
@@ -316,6 +316,8 @@ using TeamsOperands =
 detail::Clauses;
 
+using WorkshareOperands = detail::Clauses;
+
 using WsloopOperands =
 detail::Clauses {
+  let summary = "workshare directive";
+  let description = [{
+The workshare construct divides the execution of the enclosed structured
+block into separate units of work, and causes the threads of the team to
+share the work such that each unit is executed only once by one thread, in
+the context of its implicit task
+
+This operation is used for the intermediate representation of the workshare
+block before the work gets divided between the threads. See the flang
+LowerWorkshare pass for details.
+  }] # clausesDescription;
+
+  let builders = [
+OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)>
+  ];
+}
+
+def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [
+DeclareOpInterfaceMethods,
+RecursiveMemoryEffects, SingleBlock
+  ], singleRegion = true> {
+  let summary = "contains loop nests to be parallelized by workshare";
+  let description = [{
+This operation wraps a loop nest that is marked for dividing into units of
+work by an encompassing omp.workshare operation.
+  }];
+
+  let builders = [
+OpBuilder<(ins), [{ build($_builder, $_state, {}); }]>
+  ];
+  let assemblyFormat = "$region attr-dict";
+  let hasVerifier = 1;
+}
+
 
//===--===//
 // Loop Nest
 
//===--===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 
b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 4c943ebbe3144f..f4acbd97ca6d1a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1689,6 +1689,29 @@ LogicalResult SingleOp::verify() {
   getCopyprivateSyms());
 }
 
+//===--===//
+// WorkshareOp
+//===--===//
+
+void WorkshareOp::build(OpBuilder &builder, OperationState &state,
+const WorkshareOperands &clauses) {
+  WorkshareOp::build(builder, state, clauses.nowait);
+}
+
+//===--===//
+// WorkshareLoopWrapperOp
+//===--===//
+
+LogicalResult WorkshareLoopWrapperOp::verify() {
+  if (!isWrapper())
+return emitOpError() << "must be a loop wrapper";
+  if (getNestedWrapper())
+return emitError() << "nested wrappers not supported";
+  if (!(*this)->getParentOfType())
+return emitError() << "must be nested in an omp.workshare";
+  return success();
+}
+
 
//===--===//
 // WsloopOp
 
//===--===//
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir 
b/mlir/test/Dialect/OpenMP/invalid.mlir
index c76b07ec94a597..c330f4c8d0f76a 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -2545,3 +2545,45 @@ func.func @omp_taskloop_invalid_composite(%lb: index, 
%ub: index, %step: index)
   } {omp.composite}
   return
 }
+
+// -
+func.func @nested_wrapper(%idx : index) {
+  omp.workshare {
+// expected-error @below {{nested wrappers not supported}}
+omp.workshare.loop_wrapper {
+  omp.simd {
+omp.loop_nest (%iv) : index = (%idx) to (%idx) step (%idx) {
+  omp.yield
+}
+omp.terminator
+  }
+  omp.terminator
+}
+omp.terminator
+  }
+  return
+}
+
+// -
+func.func @not_wrapper() {
+  omp.workshare {
+//

[llvm-branch-commits] [flang] [mlir] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

>From d343f3a86f56864757ccdf889fd6897d9d9507e9 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:09:09 +0900
Subject: [PATCH 1/5] [MLIR][omp] Add omp.workshare op

Add custom omp loop wrapper

Add recursive memory effects trait to workshare

Remove stray include

Remove omp.workshare verifier

Add assembly format for wrapper and add test

Add verification and descriptions
---
 .../Dialect/OpenMP/OpenMPClauseOperands.h |  2 +
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 43 
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 23 +++
 mlir/test/Dialect/OpenMP/invalid.mlir | 42 +++
 mlir/test/Dialect/OpenMP/ops.mlir | 69 +++
 5 files changed, 179 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
index 38e4d8f245e4fa..896ca9581c3fc8 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
@@ -316,6 +316,8 @@ using TeamsOperands =
 detail::Clauses;
 
+using WorkshareOperands = detail::Clauses;
+
 using WsloopOperands =
 detail::Clauses {
+  let summary = "workshare directive";
+  let description = [{
+The workshare construct divides the execution of the enclosed structured
+block into separate units of work, and causes the threads of the team to
+share the work such that each unit is executed only once by one thread, in
+the context of its implicit task
+
+This operation is used for the intermediate representation of the workshare
+block before the work gets divided between the threads. See the flang
+LowerWorkshare pass for details.
+  }] # clausesDescription;
+
+  let builders = [
+OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)>
+  ];
+}
+
+def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [
+DeclareOpInterfaceMethods,
+RecursiveMemoryEffects, SingleBlock
+  ], singleRegion = true> {
+  let summary = "contains loop nests to be parallelized by workshare";
+  let description = [{
+This operation wraps a loop nest that is marked for dividing into units of
+work by an encompassing omp.workshare operation.
+  }];
+
+  let builders = [
+OpBuilder<(ins), [{ build($_builder, $_state, {}); }]>
+  ];
+  let assemblyFormat = "$region attr-dict";
+  let hasVerifier = 1;
+}
+
 
//===--===//
 // Loop Nest
 
//===--===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 
b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 4c943ebbe3144f..f4acbd97ca6d1a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1689,6 +1689,29 @@ LogicalResult SingleOp::verify() {
   getCopyprivateSyms());
 }
 
+//===--===//
+// WorkshareOp
+//===--===//
+
+void WorkshareOp::build(OpBuilder &builder, OperationState &state,
+const WorkshareOperands &clauses) {
+  WorkshareOp::build(builder, state, clauses.nowait);
+}
+
+//===--===//
+// WorkshareLoopWrapperOp
+//===--===//
+
+LogicalResult WorkshareLoopWrapperOp::verify() {
+  if (!isWrapper())
+return emitOpError() << "must be a loop wrapper";
+  if (getNestedWrapper())
+return emitError() << "nested wrappers not supported";
+  if (!(*this)->getParentOfType())
+return emitError() << "must be nested in an omp.workshare";
+  return success();
+}
+
 
//===--===//
 // WsloopOp
 
//===--===//
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir 
b/mlir/test/Dialect/OpenMP/invalid.mlir
index c76b07ec94a597..c330f4c8d0f76a 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -2545,3 +2545,45 @@ func.func @omp_taskloop_invalid_composite(%lb: index, 
%ub: index, %step: index)
   } {omp.composite}
   return
 }
+
+// -
+func.func @nested_wrapper(%idx : index) {
+  omp.workshare {
+// expected-error @below {{nested wrappers not supported}}
+omp.workshare.loop_wrapper {
+  omp.simd {
+omp.loop_nest (%iv) : index = (%idx) to (%idx) step (%idx) {
+  omp.yield
+}
+omp.terminator
+  }
+  omp.terminator
+}
+omp.terminator
+  }
+  return
+}
+
+// -
+func.func @not_wrapper() {
+  omp.workshare {
+//

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From d4310a06639c6cd1565aac2d2bbfebffcf9e175d Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

Fix lower test for workshare
---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 +++
 flang/test/Lower/OpenMP/workshare.f90 |  6 +++---
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index d614db8b68ef65..83c90374afa5e3 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
   default:
 // Combined and composite constructs should have been split into a sequence
diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f0..8e771952f5b6da 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

>From fb06794ba6259fc3bcc3b9c73108a03e77a0b42d Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Thu, 22 Aug 2024 17:01:43 +0900
Subject: [PATCH 2/2] Fix function signature

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 83c90374afa5e3..086dd7aaeaab88 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1908,12 +1908,14 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
 
 static mlir::omp::WorkshareOp
 genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
-   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-   mlir::Location loc, const ConstructQueue &queue,
-   ConstructQueue::

[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From c3ec4f1bd4a51139a10b1450e3e194a9270b7362 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH] [flang] Introduce ws loop nest generation for HLFIR lowering

Emit loop nests in a custom wrapper

Only emit unordered loops as omp loops

Fix uninitialized memory bug in genLoopNest
---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..f073f494b3fb21 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp = nullptr;
+  mlir::Block *body = nullptr;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWorkshareLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWorkshareLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWorkshareLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844e..0689d6e033dd9c 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c27..72a90dd0d6f29d 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..31378841ed 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   m

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/104748

>From 45a5069b5b783e0e1cd5fa0ba8f8098a980eb94e Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Thu, 22 Aug 2024 18:05:31 +0900
Subject: [PATCH 1/6] wrong replace

---
 mlir/test/Dialect/OpenMP/ops.mlir | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlir/test/Dialect/OpenMP/ops.mlir 
b/mlir/test/Dialect/OpenMP/ops.mlir
index 0e1f5ebb1a3739..4c6843bf89ad2f 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -2845,8 +2845,8 @@ func.func @omp_workshare_multiple_blocks() {
   return
 }
 
-// CHECK-LABEL: func @omp_workshare.loop_wrapper
-func.func @omp_workshare.loop_wrapper(%idx : index) {
+// CHECK-LABEL: func @omp_workshare_loop_wrapper
+func.func @omp_workshare_loop_wrapper(%idx : index) {
   // CHECK-NEXT: omp.workshare {
   omp.workshare {
 // CHECK-NEXT: omp.workshare.loop_wrapper
@@ -2862,8 +2862,8 @@ func.func @omp_workshare.loop_wrapper(%idx : index) {
   return
 }
 
-// CHECK-LABEL: func @omp_workshare.loop_wrapper_attrs
-func.func @omp_workshare.loop_wrapper_attrs(%idx : index) {
+// CHECK-LABEL: func @omp_workshare_loop_wrapper_attrs
+func.func @omp_workshare_loop_wrapper_attrs(%idx : index) {
   // CHECK-NEXT: omp.workshare {
   omp.workshare {
 // CHECK-NEXT: omp.workshare.loop_wrapper {

>From 0984407c261496c9dc53fdd2d4d5c2431dd90359 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 2/6] [flang][omp] Emit omp.workshare in frontend

Fix lower test for workshare
---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 +++
 flang/test/Lower/OpenMP/workshare.f90 |  6 +++---
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index d614db8b68ef65..83c90374afa5e3 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
   default:
 // Combined and composite constructs should have been split into a sequence
diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f0..8e771952f5b6da 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutin

[llvm-branch-commits] [flang] [mlir] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

>From 45a5069b5b783e0e1cd5fa0ba8f8098a980eb94e Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Thu, 22 Aug 2024 18:05:31 +0900
Subject: [PATCH 1/5] wrong replace

---
 mlir/test/Dialect/OpenMP/ops.mlir | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlir/test/Dialect/OpenMP/ops.mlir 
b/mlir/test/Dialect/OpenMP/ops.mlir
index 0e1f5ebb1a3739..4c6843bf89ad2f 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -2845,8 +2845,8 @@ func.func @omp_workshare_multiple_blocks() {
   return
 }
 
-// CHECK-LABEL: func @omp_workshare.loop_wrapper
-func.func @omp_workshare.loop_wrapper(%idx : index) {
+// CHECK-LABEL: func @omp_workshare_loop_wrapper
+func.func @omp_workshare_loop_wrapper(%idx : index) {
   // CHECK-NEXT: omp.workshare {
   omp.workshare {
 // CHECK-NEXT: omp.workshare.loop_wrapper
@@ -2862,8 +2862,8 @@ func.func @omp_workshare.loop_wrapper(%idx : index) {
   return
 }
 
-// CHECK-LABEL: func @omp_workshare.loop_wrapper_attrs
-func.func @omp_workshare.loop_wrapper_attrs(%idx : index) {
+// CHECK-LABEL: func @omp_workshare_loop_wrapper_attrs
+func.func @omp_workshare_loop_wrapper_attrs(%idx : index) {
   // CHECK-NEXT: omp.workshare {
   omp.workshare {
 // CHECK-NEXT: omp.workshare.loop_wrapper {

>From 0984407c261496c9dc53fdd2d4d5c2431dd90359 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 2/5] [flang][omp] Emit omp.workshare in frontend

Fix lower test for workshare
---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 +++
 flang/test/Lower/OpenMP/workshare.f90 |  6 +++---
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index d614db8b68ef65..83c90374afa5e3 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
   default:
 // Combined and composite constructs should have been split into a sequence
diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f0..8e771952f5b6da 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutin

[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From d5fbe9c7482b87be295be03aafd5917dd7c17859 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Thu, 22 Aug 2024 18:07:05 +0900
Subject: [PATCH] [flang] Introduce ws loop nest generation for HLFIR lowering

Emit loop nests in a custom wrapper

Only emit unordered loops as omp loops

Fix uninitialized memory bug in genLoopNest
---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 33 ++--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 43 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..f073f494b3fb21 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp = nullptr;
+  mlir::Block *body = nullptr;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWorkshareLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWorkshareLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWorkshareLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844e..0689d6e033dd9c 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c27..72a90dd0d6f29d 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..31378841ed 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From 0984407c261496c9dc53fdd2d4d5c2431dd90359 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

Fix lower test for workshare
---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 +++
 flang/test/Lower/OpenMP/workshare.f90 |  6 +++---
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index d614db8b68ef65..83c90374afa5e3 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1272,6 +1272,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1897,6 +1906,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2309,10 +2334,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
   default:
 // Combined and composite constructs should have been split into a sequence
diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f0..8e771952f5b6da 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

>From de32599eae894520fa383537f3cedacab14e6a87 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Thu, 22 Aug 2024 17:01:43 +0900
Subject: [PATCH 2/2] Fix function signature

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 83c90374afa5e3..086dd7aaeaab88 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1908,12 +1908,14 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
 
 static mlir::omp::WorkshareOp
 genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
-   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-   mlir::Location loc, const ConstructQueue &queue,
-   ConstructQueue::

[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits


ivanradanov wrote:

@skatrak 
I considered that as well, however, I did not like it because the semantics of 
the block that the operation is contained in is different. wsloop expects its 
parent block to be a parallel block which all threads will execute and all of 
those threads will share the work of the nested loop nest. Whereas the 
workshare.loop_nest op is semantically executed by a single-thread (because the 
workshare directive acts like it preserves the semantics of single-threaded 
fortran execution.).

I don't think it is currently a problem, however, if in the future someone adds 
some optimization or transformation that assumes that it is nested in a 
parallel it may break wsloops which happen to be nested in a workshare instead. 
(As you said it is possible to check whether it is supposed to be a workshare 
or parallel wsloop but an op changing its semantics or transformations 
dpeending on what it's nested in feels more error prone).

So that is why I opted for this approach.

https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -0,0 +1,446 @@
+//===- LowerWorkshare.cpp - special cases for bufferization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements the lowering of omp.workshare to other omp constructs.
+//
+// This pass is tasked with parallelizing the loops nested in
+// workshare.loop_wrapper while both the Fortran to mlir lowering and the hlfir
+// to fir lowering pipelines are responsible for emitting the
+// workshare.loop_wrapper ops where appropriate according to the
+// `shouldUseWorkshareLowering` function.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+
+// Checks for nesting pattern below as we need to avoid sharing the work of
+// statements which are nested in some constructs such as omp.critical or
+// another omp.parallel.
+//
+// omp.workshare { // `wsOp`
+//   ...
+// omp.T { // `parent`
+//   ...
+// `op`
+//
+template 
+static bool isNestedIn(omp::WorkshareOp wsOp, Operation *op) {
+  T parent = op->getParentOfType();
+  if (!parent)
+return false;
+  return wsOp->isProperAncestor(parent);
+}
+
+bool shouldUseWorkshareLowering(Operation *op) {
+  auto parentWorkshare = op->getParentOfType();
+
+  if (!parentWorkshare)
+return false;
+
+  if (isNestedIn(parentWorkshare, op))
+return false;
+
+  // 2.8.3  workshare Construct
+  // For a parallel construct, the construct is a unit of work with respect to
+  // the workshare construct. The statements contained in the parallel 
construct
+  // are executed by a new thread team.
+  if (isNestedIn(parentWorkshare, op))
+return false;
+
+  // 2.8.2  single Construct
+  // Binding The binding thread set for a single region is the current team. A
+  // single region binds to the innermost enclosing parallel region.
+  // Description Only one of the encountering threads will execute the
+  // structured block associated with the single construct.
+  if (isNestedIn(parentWorkshare, op))
+return false;
+
+  return true;
+}
+
+} // namespace flangomp
+
+namespace {
+
+struct SingleRegion {
+  Block::iterator begin, end;
+};
+
+static bool mustParallelizeOp(Operation *op) {
+  return op
+  ->walk([&](Operation *nested) {
+// We need to be careful not to pick up workshare.loop_wrapper in 
nested
+// omp.parallel{omp.workshare} regions, i.e. make sure that `nested`
+// binds to the workshare region we are currently handling.
+//
+// For example:
+//
+// omp.parallel {
+//   omp.workshare { // currently handling this
+// omp.parallel {
+//   omp.workshare { // nested workshare
+// omp.workshare.loop_wrapper {}
+//
+// Therefore, we skip if we encounter a nested omp.workshare.
+if (isa(op))
+  return WalkResult::skip();
+if (isa(op))
+  return WalkResult::interrupt();
+return WalkResult::advance();
+  })
+  .wasInterrupted();
+}
+
+static bool isSafeToParallelize(Operation *op) {
+  return isa(op) || isa(op) ||
+ isMemoryEffectFree(op);
+}
+
+/// Simple shallow copies suffice for our purposes in this pass, so we 
implement
+/// this simpler alternative to the full fledged `createCopyFunc` in the
+/// frontend
+static mlir::func::FuncOp createCopyFunc(mlir::Location loc, mlir::Type 
varType,
+ fir::FirOpBuilder builder) {
+  mlir::ModuleOp module = builder.getModule();
+  auto rt = cast(varType);
+  mlir::Type eleTy = rt.getEleTy();
+  std::string copyFuncName =
+  fir::getTypeAsString(eleTy, builder.getKindMap(), "_workshare_copy");
+
+  if (auto decl = module.lookupSymbol(copyFuncName))
+return decl;
+  // create function
+  mlir::OpBuilder::InsertionGuard guard(builder);
+  mlir::OpBuilder modBuilder(module.getBodyRegion());
+  llvm::SmallVector argsTy = {varType, varType};
+  auto funcType = mlir::FunctionType::get(builder.getContext(), argsTy, {});
+  mlir::func::FuncOp funcOp =
+  modBuilder.create(loc, copyFuncName, funcType);
+  funcOp.setVisibility(mlir::SymbolTable::Visibility::Private);
+  builder.createBlock(&funcOp.getRegion(), funcOp.getRegion().end(), argsTy,
+

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-22 Thread Ivan R. Ivanov via llvm-branch-commits



@@ -0,0 +1,446 @@
+//===- LowerWorkshare.cpp - special cases for bufferization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements the lowering of omp.workshare to other omp constructs.
+//
+// This pass is tasked with parallelizing the loops nested in
+// workshare.loop_wrapper while both the Fortran to mlir lowering and the hlfir
+// to fir lowering pipelines are responsible for emitting the
+// workshare.loop_wrapper ops where appropriate according to the
+// `shouldUseWorkshareLowering` function.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+
+// Checks for nesting pattern below as we need to avoid sharing the work of
+// statements which are nested in some constructs such as omp.critical or
+// another omp.parallel.
+//
+// omp.workshare { // `wsOp`
+//   ...
+// omp.T { // `parent`
+//   ...
+// `op`
+//
+template 
+static bool isNestedIn(omp::WorkshareOp wsOp, Operation *op) {
+  T parent = op->getParentOfType();
+  if (!parent)
+return false;
+  return wsOp->isProperAncestor(parent);
+}
+
+bool shouldUseWorkshareLowering(Operation *op) {
+  auto parentWorkshare = op->getParentOfType();
+
+  if (!parentWorkshare)
+return false;
+
+  if (isNestedIn(parentWorkshare, op))
+return false;
+
+  // 2.8.3  workshare Construct
+  // For a parallel construct, the construct is a unit of work with respect to
+  // the workshare construct. The statements contained in the parallel 
construct
+  // are executed by a new thread team.
+  if (isNestedIn(parentWorkshare, op))
+return false;
+
+  // 2.8.2  single Construct
+  // Binding The binding thread set for a single region is the current team. A
+  // single region binds to the innermost enclosing parallel region.
+  // Description Only one of the encountering threads will execute the
+  // structured block associated with the single construct.
+  if (isNestedIn(parentWorkshare, op))
+return false;
+
+  return true;
+}
+
+} // namespace flangomp
+
+namespace {
+
+struct SingleRegion {
+  Block::iterator begin, end;
+};
+
+static bool mustParallelizeOp(Operation *op) {
+  return op
+  ->walk([&](Operation *nested) {
+// We need to be careful not to pick up workshare.loop_wrapper in 
nested
+// omp.parallel{omp.workshare} regions, i.e. make sure that `nested`
+// binds to the workshare region we are currently handling.
+//
+// For example:
+//
+// omp.parallel {
+//   omp.workshare { // currently handling this
+// omp.parallel {
+//   omp.workshare { // nested workshare
+// omp.workshare.loop_wrapper {}
+//
+// Therefore, we skip if we encounter a nested omp.workshare.
+if (isa(op))
+  return WalkResult::skip();
+if (isa(op))
+  return WalkResult::interrupt();
+return WalkResult::advance();
+  })
+  .wasInterrupted();
+}
+
+static bool isSafeToParallelize(Operation *op) {
+  return isa(op) || isa(op) ||
+ isMemoryEffectFree(op);
+}
+
+/// Simple shallow copies suffice for our purposes in this pass, so we 
implement
+/// this simpler alternative to the full fledged `createCopyFunc` in the
+/// frontend
+static mlir::func::FuncOp createCopyFunc(mlir::Location loc, mlir::Type 
varType,
+ fir::FirOpBuilder builder) {
+  mlir::ModuleOp module = builder.getModule();
+  auto rt = cast(varType);
+  mlir::Type eleTy = rt.getEleTy();
+  std::string copyFuncName =
+  fir::getTypeAsString(eleTy, builder.getKindMap(), "_workshare_copy");
+
+  if (auto decl = module.lookupSymbol(copyFuncName))
+return decl;
+  // create function
+  mlir::OpBuilder::InsertionGuard guard(builder);
+  mlir::OpBuilder modBuilder(module.getBodyRegion());
+  llvm::SmallVector argsTy = {varType, varType};
+  auto funcType = mlir::FunctionType::get(builder.getContext(), argsTy, {});
+  mlir::func::FuncOp funcOp =
+  modBuilder.create(loc, copyFuncName, funcType);
+  funcOp.setVisibility(mlir::SymbolTable::Visibility::Private);
+  builder.createBlock(&funcOp.getRegion(), funcOp.getRegion().end(), argsTy,
+

[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-23 Thread Ivan R. Ivanov via llvm-branch-commits

ivanradanov wrote:

> Maybe support for this operation could be just based on changes to how the 
> MLIR representation is built in the first place, what do you think?

This is partly what this implementation aims to do. In fact, after the pass 
that lowers the omp.workshare operation we are left with IR very close to the 
one you showed in your example.

The approach taken here is similar to the omp.workdistribute implementation, in 
that the purpose of the omp.workshare and omp.workshare.loop_wrapper ops are to 
preserve the high-level optimizations available when using HLFIR, after we are 
done with the LowerWorkshare pass, both omp.workdistribute and 
omp.workdistribute.loop_wrapper disappear.

The sole purpose of the omp.workdistribute.loop_wrapper op is to be able to 
more explicitly mark loops that need to "parallelized" by the workshare 
construct and preserve that information through the pipeline. Its lifetime is 
from the frontend (Fortran->{HLFIR,FIR}) up to the the LowerWorkshare pass 
which runs after we are done with HLFIR optimizations (after HLFIR->FIR 
lowering), same for omp.workshare.

https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-23 Thread Ivan R. Ivanov via llvm-branch-commits


ivanradanov wrote:

No you are right, sorry for the back and forth, as you said, since a wsloop can 
only be nested in a omp.parallel it is immediately obvious that it binds to the 
omp.parallel threads so that makes sense.

My only concern was that at some point some transformation (perhaps in the 
future, because I don't think anything transforms `wsloop`s currently) could 
make the assumption that all (or none) of the threads of the team an 
`omp.parallel` launches will execute the parent block of a `wsloop` that binds 
to that team.

I thought this was a fair assumption for an optimization/transformation to make 
because if for example only one of the threads executes a wsloop it would not 
produce the intended result. (for example it adds an operation immediately 
before the wsloop which is supposed to be executed by all threads in the 
omp.parallel. that operation would then be erroneously wrapped in an omp.single 
in LowerWorkshare.) So the intention was to guard against a potential error 
like that. Let me know if I am wrong here since I am sure people here have more 
experience than me on this. 

I can see that if no transformation can make that assumption, then it is 
perfectly safe to use `omp.wsloop` instead of `workdistribute.loop_wrapper`. I 
am fine with both ways and can make that change if you think it is better. (In 
fact that is what the initial version of this PR did. I decided to introduce 
the workshare.loop_wrapper later because I was concerned about a potential 
issue like above)

https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-28 Thread Ivan R. Ivanov via llvm-branch-commits

ivanradanov wrote:

> ... However, they would work if they ran after the pass lowering 
> `omp.workshare` to a set of `omp.single` for the code in between 
> `omp.wsloop`s. That way we would not have to introduce a new loop wrapper and 
> also we could create passes assuming the parent of region of an `omp.wsloop` 
> is executed by all threads in the team. I don't think that should be an 
> issue, since in principle it makes sense to me that the `omp.workshare` 
> transformation would run immediately after PFT to MLIR lowering. What do you 
> think about that alternative?

Ideally, the `omp.workshare` lowering will run after the HLIF to FIR lowering, 
because missing the high level optimizations that HLFIR provides can result in 
very bad performance (unneeded temporary arrays, unnecessary copies, non-fused 
array computation, etc). The workshare lowering transforms the 
`omp.workshare.loop_wrapper`s into `omp.wsloop`s so they are gone after that.

Another factor is that there may not be PFT->loop lowerings for many constructs 
that need to be divided into units of work. so we may need to first generate 
HLFIR and alter the lowerings from HLFIR to FIR to get the `omp.wsloop` (or 
`omp.workshare.loop_wrapper`), which means that there will be portions of the 
pipeline (from PFT->HLFIR until HLFIR->FIR) where a `omp.wsloop` nested in an 
`omp.workshare` will be the wrong representation.

Are there any concerns with adding `omp.workshare.loop_wrapper`? I do not see 
that big of an overhead (maintenance or compile time) resulting from its 
addition, while it makes things clearer and more robust in my opinion.

https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-29 Thread Ivan R. Ivanov via llvm-branch-commits

ivanradanov wrote:

> Can you share a case where this would happen? I agree that we wouldn't want 
> to produce some IR that doesn't keep consistent semantics for a given 
> operation across the pipeline. In that case, adding another operation might 
> indeed be the right solution.

I was under the impression that direct PFT to FIR lowering is deprecated, so 
things like array notation (e.g. z = x + y where x,y,z are arrays) always go 
through hlfir.elemental and then to fir loops. Not sure if the PFT->FIR 
lowering for that exists, but if PFT->FIR is deprecated then we should probably 
use the HLFIR lowering for this.

> My main concern is from the dialect design perspective. It would be confusing 
> to have two separate "worksharing loop" operations with one being used on its 
> own and the other one in conjunction with the `omp.workshare` operation, but 
> both basically representing the same thing (splitting loop iterations across 
> threads in the team). That's why I'm trying to explore other options that may 
> result in a better representation before committing to it.

I think the operations describe very different things. The similarity in naming 
is an unfortunate consequence of the `workshare` construct having the same name 
as a `workshare loop` (I am open to more descriptive name suggestions). How I 
read it is: `omp.wsloop` is "each thread from from the team that encounter it, 
executes its share of the loop nest" whereas `omp.workdistribute.loop_wrapper` 
is "this loop nest is marked for dividing into units of work by the 
encompassing `omp.workshare`" (as per the standard). Semantically, it is just a 
loop nest that is executed by a single thread and only when the workshare 
lowering transforms it into an `omp.wsloop` does it turn into a worksharing 
loop.

https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-09-10 Thread Ivan R. Ivanov via llvm-branch-commits


ivanradanov wrote:

@kiranchandramohan @tblah @skatrak I have a question to people more familiar 
with Fortran and the entire Flang pipeline - is it possible that we would have 
CFG (multiple blocks) in the IR generated in the workshare statement at this 
point in the pipeline (immediately after lowering HLFIR to FIR).

The transformation I implemented can work with CFG but the transformation 
_inlines_ the region contained in the `omp.workshare` region in its parent op 
(while transforming it), which means that the parent op would need to support 
multiple blocks, which is not a given (e.g. `fir.if`). Is there an operation 
like `scf.execute` which can be used here to inline the contents of the 
`omp.workshare` or should we not support CFG in this transformation.

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-09-10 Thread Ivan R. Ivanov via llvm-branch-commits


ivanradanov wrote:

Ah yes, I meant `scf.execute_region`. But when I tried creating that and it was 
not registered so I thought it was a deliberate decision to not pull in the scf 
dialect so I opted not to go for that lowering. I was wondering if there is 
some op that is like `scf.execute_region` but already used in flang.

I will go for emitting an error in case there are multiple blocks for this 
first iteration then. 

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-10-04 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/104748

>From c5b5369be3d0db31d9ded0eeeb8e28e03d25bd9e Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 4 Oct 2024 22:45:09 +0900
Subject: [PATCH 1/6] Fix bug and add better clarification comments

---
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 28 ---
 .../lower-workshare-correct-parallelize.mlir  | 16 +++
 2 files changed, 40 insertions(+), 4 deletions(-)
 create mode 100644 
flang/test/Transforms/OpenMP/lower-workshare-correct-parallelize.mlir

diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp 
b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index 4d8e2a9a067141..84cf5e82167987 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -188,14 +189,19 @@ static bool isTransitivelyUsedOutside(Value v, 
SingleRegion sr) {
 if (isUserOutsideSR(user, parentOp, sr))
   return true;
 
-// Results of nested users cannot be used outside of the SR
+// Now we know user is inside `sr`.
+
+// Results of nested users cannot be used outside of `sr`.
 if (user->getBlock() != srBlock)
   continue;
 
-// A non-safe to parallelize operation will be handled separately
+// A non-safe to parallelize operation will be checked for uses outside
+// separately.
 if (!isSafeToParallelize(user))
   continue;
 
+// For safe to parallelize operations, we need to check if there is a
+// transitive use of `v` through them.
 for (auto res : user->getResults())
   if (isTransitivelyUsedOutside(res, sr))
 return true;
@@ -242,7 +248,21 @@ static void parallelizeRegion(Region &sourceRegion, Region 
&targetRegion,
 for (Operation &op : llvm::make_range(sr.begin, sr.end)) {
   if (isSafeToParallelize(&op)) {
 singleBuilder.clone(op, singleMapping);
-parallelBuilder.clone(op, rootMapping);
+if (llvm::all_of(op.getOperands(), [&](Value opr) {
+  return rootMapping.contains(opr);
+})) {
+  // Safe to parallelize operations which have all operands available 
in
+  // the root parallel block can be executed there.
+  parallelBuilder.clone(op, rootMapping);
+} else {
+  // If any operand was not available, it means that there was no
+  // transitive use of a non-safe-to-parallelize operation outside 
`sr`.
+  // This means that there should be no transitive uses outside `sr` of
+  // `op`.
+  assert(llvm::all_of(op.getResults(), [&](Value v) {
+return !isTransitivelyUsedOutside(v, sr);
+  }));
+}
   } else if (auto alloca = dyn_cast(&op)) {
 auto hoisted =
 cast(allocaBuilder.clone(*alloca, singleMapping));
@@ -252,7 +272,7 @@ static void parallelizeRegion(Region &sourceRegion, Region 
&targetRegion,
   } else {
 singleBuilder.clone(op, singleMapping);
 // Prepare reloaded values for results of operations that cannot be
-// safely parallelized and which are used after the region `sr`
+// safely parallelized and which are used after the region `sr`.
 for (auto res : op.getResults()) {
   if (isTransitivelyUsedOutside(res, sr)) {
 auto alloc = mapReloadedValue(res, allocaBuilder, singleBuilder,
diff --git 
a/flang/test/Transforms/OpenMP/lower-workshare-correct-parallelize.mlir 
b/flang/test/Transforms/OpenMP/lower-workshare-correct-parallelize.mlir
new file mode 100644
index 00..99ca4fe5a0e212
--- /dev/null
+++ b/flang/test/Transforms/OpenMP/lower-workshare-correct-parallelize.mlir
@@ -0,0 +1,16 @@
+// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s | FileCheck 
%s
+
+// Check that the safe to parallelize `fir.declare` op will not be parallelized
+// due to its operand %alloc not being reloaded outside the omp.single.
+
+func.func @foo() {
+  %c0 = arith.constant 0 : index
+  omp.workshare {
+%alloc = fir.allocmem !fir.array, %c0 {bindc_name = ".tmp.forall", 
uniq_name = ""}
+%shape = fir.shape %c0 : (index) -> !fir.shape<1>
+%declare = fir.declare %alloc(%shape) {uniq_name = ".tmp.forall"} : 
(!fir.heap>, !fir.shape<1>) -> !fir.heap>
+fir.freemem %alloc : !fir.heap>
+omp.terminator
+  }
+  return
+}

>From 33d6674ca8dfc1adf3b02f45317a7f068a7f7cb3 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 17:33:52 +0900
Subject: [PATCH 2/6] Add workshare loop wrapper lowerings

Bufferize test

Bufferize test

Bufferize test

Add test for should use workshare lowering
---
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |   4 +-
 .../Transforms/OptimizedBufferization.cpp |  10 +-
 flang/test/HLFIR/bufferize-workshare.fir  |  58 
 .../OpenMP/should-use-workshare-lowering.mlir | 140 +++

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-10-04 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

>From e56dbd6a0625890fd9a3d6a62675e864ca94a8f5 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 22:06:55 +0900
Subject: [PATCH 01/10] [flang] Lower omp.workshare to other omp constructs

Change to workshare loop wrapper op

Move single op declaration

Schedule pass properly

Correctly handle nested nested loop nests to be parallelized by workshare

Leave comments for shouldUseWorkshareLowering

Use copyprivate to scatter val from omp.single

TODO still need to implement copy function
TODO transitive check for usage outside of omp.single not imiplemented yet

Transitively check for users outisde of single op

TODO need to implement copy func
TODO need to hoist allocas outside of single regions

Add tests

Hoist allocas

More tests

Emit body for copy func

Test the tmp storing logic

Clean up trivially dead ops

Only handle single-block regions for now

Fix tests for custom assembly for loop wrapper

Only run the lower workshare pass if openmp is enabled

Implement some missing functionality

Fix tests

Fix test

Iterate backwards to find all trivially dead ops

Add expalanation comment for createCopyFun

Update test
---
 flang/include/flang/Optimizer/OpenMP/Passes.h |   5 +
 .../include/flang/Optimizer/OpenMP/Passes.td  |   5 +
 flang/include/flang/Tools/CLOptions.inc   |   6 +-
 flang/include/flang/Tools/CrossToolHelpers.h  |   1 +
 flang/lib/Frontend/FrontendActions.cpp|  10 +-
 flang/lib/Optimizer/OpenMP/CMakeLists.txt |   1 +
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 446 ++
 flang/test/Fir/basic-program.fir  |   1 +
 .../Transforms/OpenMP/lower-workshare.mlir| 189 
 .../Transforms/OpenMP/lower-workshare2.mlir   |  23 +
 .../Transforms/OpenMP/lower-workshare3.mlir   |  74 +++
 .../Transforms/OpenMP/lower-workshare4.mlir   |  59 +++
 .../Transforms/OpenMP/lower-workshare5.mlir   |  42 ++
 .../Transforms/OpenMP/lower-workshare6.mlir   |  51 ++
 flang/tools/bbc/bbc.cpp   |   5 +-
 flang/tools/tco/tco.cpp   |   1 +
 16 files changed, 915 insertions(+), 4 deletions(-)
 create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare2.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare3.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare4.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare6.mlir

diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h 
b/flang/include/flang/Optimizer/OpenMP/Passes.h
index 403d79667bf448..feb395f1a12dbd 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.h
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.h
@@ -25,6 +25,11 @@ namespace flangomp {
 #define GEN_PASS_REGISTRATION
 #include "flang/Optimizer/OpenMP/Passes.h.inc"
 
+/// Impelements the logic specified in the 2.8.3  workshare Construct section 
of
+/// the OpenMP standard which specifies what statements or constructs shall be
+/// divided into units of work.
+bool shouldUseWorkshareLowering(mlir::Operation *op);
+
 } // namespace flangomp
 
 #endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td 
b/flang/include/flang/Optimizer/OpenMP/Passes.td
index 395178e26a5762..041240cad12eb3 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.td
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -37,4 +37,9 @@ def FunctionFiltering : Pass<"omp-function-filtering"> {
   ];
 }
 
+// Needs to be scheduled on Module as we create functions in it
+def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> {
+  let summary = "Lower workshare construct";
+}
+
 #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc 
b/flang/include/flang/Tools/CLOptions.inc
index 1881e23b00045a..bb00e079008a0b 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -337,7 +337,7 @@ inline void createDefaultFIROptimizerPassPipeline(
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
 inline void createHLFIRToFIRPassPipeline(
-mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) 
{
+mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel 
= defaultOptLevel) {
   if (optLevel.isOptimizingForSpeed()) {
 addCanonicalizerPassWithoutRegionSimplification(pm);
 addNestedPassToAllTopLevelOperations(
@@ -354,6 +354,8 @@ inline void createHLFIRToFIRPassPipeline(
   pm.addPass(hlfir::createLowerHLFIRIntrinsics());
   pm.addPass(hlfir::createBufferizeHLFIR());
   pm.addPass(hlfir::createConvertHLFIRtoFIR());
+  if (enableOpenMP)
+pm.a

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-10-04 Thread Ivan R. Ivanov via llvm-branch-commits


ivanradanov wrote:

@Thirumalai-Shaktivel Thank you very much. Fixed. 

`forall` is actually a case which we do not handle yet. You can give it a shot 
if you would like.

https://github.com/llvm/llvm-project/pull/104748
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-10-04 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

>From e56dbd6a0625890fd9a3d6a62675e864ca94a8f5 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 22:06:55 +0900
Subject: [PATCH 01/11] [flang] Lower omp.workshare to other omp constructs

Change to workshare loop wrapper op

Move single op declaration

Schedule pass properly

Correctly handle nested nested loop nests to be parallelized by workshare

Leave comments for shouldUseWorkshareLowering

Use copyprivate to scatter val from omp.single

TODO still need to implement copy function
TODO transitive check for usage outside of omp.single not imiplemented yet

Transitively check for users outisde of single op

TODO need to implement copy func
TODO need to hoist allocas outside of single regions

Add tests

Hoist allocas

More tests

Emit body for copy func

Test the tmp storing logic

Clean up trivially dead ops

Only handle single-block regions for now

Fix tests for custom assembly for loop wrapper

Only run the lower workshare pass if openmp is enabled

Implement some missing functionality

Fix tests

Fix test

Iterate backwards to find all trivially dead ops

Add expalanation comment for createCopyFun

Update test
---
 flang/include/flang/Optimizer/OpenMP/Passes.h |   5 +
 .../include/flang/Optimizer/OpenMP/Passes.td  |   5 +
 flang/include/flang/Tools/CLOptions.inc   |   6 +-
 flang/include/flang/Tools/CrossToolHelpers.h  |   1 +
 flang/lib/Frontend/FrontendActions.cpp|  10 +-
 flang/lib/Optimizer/OpenMP/CMakeLists.txt |   1 +
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 446 ++
 flang/test/Fir/basic-program.fir  |   1 +
 .../Transforms/OpenMP/lower-workshare.mlir| 189 
 .../Transforms/OpenMP/lower-workshare2.mlir   |  23 +
 .../Transforms/OpenMP/lower-workshare3.mlir   |  74 +++
 .../Transforms/OpenMP/lower-workshare4.mlir   |  59 +++
 .../Transforms/OpenMP/lower-workshare5.mlir   |  42 ++
 .../Transforms/OpenMP/lower-workshare6.mlir   |  51 ++
 flang/tools/bbc/bbc.cpp   |   5 +-
 flang/tools/tco/tco.cpp   |   1 +
 16 files changed, 915 insertions(+), 4 deletions(-)
 create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare2.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare3.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare4.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare6.mlir

diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h 
b/flang/include/flang/Optimizer/OpenMP/Passes.h
index 403d79667bf448..feb395f1a12dbd 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.h
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.h
@@ -25,6 +25,11 @@ namespace flangomp {
 #define GEN_PASS_REGISTRATION
 #include "flang/Optimizer/OpenMP/Passes.h.inc"
 
+/// Impelements the logic specified in the 2.8.3  workshare Construct section 
of
+/// the OpenMP standard which specifies what statements or constructs shall be
+/// divided into units of work.
+bool shouldUseWorkshareLowering(mlir::Operation *op);
+
 } // namespace flangomp
 
 #endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td 
b/flang/include/flang/Optimizer/OpenMP/Passes.td
index 395178e26a5762..041240cad12eb3 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.td
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -37,4 +37,9 @@ def FunctionFiltering : Pass<"omp-function-filtering"> {
   ];
 }
 
+// Needs to be scheduled on Module as we create functions in it
+def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> {
+  let summary = "Lower workshare construct";
+}
+
 #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc 
b/flang/include/flang/Tools/CLOptions.inc
index 1881e23b00045a..bb00e079008a0b 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -337,7 +337,7 @@ inline void createDefaultFIROptimizerPassPipeline(
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
 inline void createHLFIRToFIRPassPipeline(
-mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) 
{
+mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel 
= defaultOptLevel) {
   if (optLevel.isOptimizingForSpeed()) {
 addCanonicalizerPassWithoutRegionSimplification(pm);
 addNestedPassToAllTopLevelOperations(
@@ -354,6 +354,8 @@ inline void createHLFIRToFIRPassPipeline(
   pm.addPass(hlfir::createLowerHLFIRIntrinsics());
   pm.addPass(hlfir::createBufferizeHLFIR());
   pm.addPass(hlfir::createConvertHLFIRtoFIR());
+  if (enableOpenMP)
+pm.a

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-10-04 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/104748

>From 8d0651ff644fa6821e0d0fbc4c47fee36802a15c Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 4 Oct 2024 22:48:42 +0900
Subject: [PATCH 1/6] Fix message

---
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp 
b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index 84cf5e82167987..a91f64f04a30aa 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -466,8 +466,9 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, 
DominanceInfo &di) {
   } else {
 // Otherwise just change the operation to an omp.single.
 
-wsOp->emitWarning("omp workshare with unstructured control flow currently "
-  "unsupported and will be serialized.");
+wsOp->emitWarning(
+"omp workshare with unstructured control flow is currently "
+"unsupported and will be serialized.");
 
 // `shouldUseWorkshareLowering` should have guaranteed that there are no
 // omp.workshare_loop_wrapper's that bind to this omp.workshare.

>From 881067963fea3ce7fa912692e0cca46a68288e85 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 17:33:52 +0900
Subject: [PATCH 2/6] Add workshare loop wrapper lowerings

Bufferize test

Bufferize test

Bufferize test

Add test for should use workshare lowering
---
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |   4 +-
 .../Transforms/OptimizedBufferization.cpp |  10 +-
 flang/test/HLFIR/bufferize-workshare.fir  |  58 
 .../OpenMP/should-use-workshare-lowering.mlir | 140 ++
 4 files changed, 208 insertions(+), 4 deletions(-)
 create mode 100644 flang/test/HLFIR/bufferize-workshare.fir
 create mode 100644 
flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 07794828fce267..1848dbe2c7a2c2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -26,6 +26,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/PatternMatch.h"
@@ -792,7 +793,8 @@ struct ElementalOpConversion
 // Generate a loop nest looping around the fir.elemental shape and clone
 // fir.elemental region inside the inner loop.
 hlfir::LoopNest loopNest =
-hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+   flangomp::shouldUseWorkshareLowering(elemental));
 auto insPt = builder.saveInsertionPoint();
 builder.setInsertionPointToStart(loopNest.body);
 auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 3a0a98dc594463..f014724861e333 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
 #include "flang/Optimizer/Transforms/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Dominance.h"
@@ -482,7 +483,8 @@ llvm::LogicalResult 
ElementalAssignBufferization::matchAndRewrite(
   // Generate a loop nest looping around the hlfir.elemental shape and clone
   // hlfir.elemental region inside the inner loop
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+ flangomp::shouldUseWorkshareLowering(elemental));
   builder.setInsertionPointToStart(loopNest.body);
   auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
 loopNest.oneBasedIndices);
@@ -553,7 +555,8 @@ llvm::LogicalResult 
BroadcastAssignBufferization::matchAndRewrite(
   llvm::SmallVector extents =
   hlfir::getIndexExtents(loc, builder, shape);
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
   builder.setInsertionPointToStart(loopNest.body);
   auto arrayElement =

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-10-03 Thread Ivan R. Ivanov via llvm-branch-commits


ivanradanov wrote:

@Thirumalai-Shaktivel Fixed, it was a very stupid mistake with the argument 
order of the copyprivate copy function. Thank you.

https://github.com/llvm/llvm-project/pull/104748
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-10-03 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/104748

>From 07a9eb3581f480c47ce4de3de00c7cef15df3cdc Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 4 Oct 2024 14:21:14 +0900
Subject: [PATCH 1/7] Fix dst src in copy function

---
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp 
b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index cf1867311cc236..baf8346e7608a9 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -162,8 +162,8 @@ static mlir::func::FuncOp createCopyFunc(mlir::Location 
loc, mlir::Type varType,
   {loc, loc});
   builder.setInsertionPointToStart(&funcOp.getRegion().back());
 
-  Value loaded = builder.create(loc, funcOp.getArgument(0));
-  builder.create(loc, loaded, funcOp.getArgument(1));
+  Value loaded = builder.create(loc, funcOp.getArgument(1));
+  builder.create(loc, loaded, funcOp.getArgument(0));
 
   builder.create(loc);
   return funcOp;

>From c3ff901b31806c73228e4f47a47f420c2d2465ed Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 4 Oct 2024 14:38:48 +0900
Subject: [PATCH 2/7] Use omp.single to handle CFG cases

---
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 77 +--
 1 file changed, 53 insertions(+), 24 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp 
b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index baf8346e7608a9..34399abbcd20ea 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -16,7 +16,6 @@
 //
 
//===--===//
 
-#include "flang/Optimizer/Builder/Todo.h"
 #include 
 #include 
 #include 
@@ -39,7 +38,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 
@@ -96,6 +94,12 @@ bool shouldUseWorkshareLowering(Operation *op) {
   if (isNestedIn(parentWorkshare, op))
 return false;
 
+  if (parentWorkshare.getRegion().getBlocks().size() != 1) {
+parentWorkshare->emitWarning(
+"omp workshare with unstructured control flow currently unsupported.");
+return false;
+  }
+
   return true;
 }
 
@@ -408,15 +412,6 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, 
DominanceInfo &di) {
 
   OpBuilder rootBuilder(wsOp);
 
-  // This operation is just a placeholder which will be erased later. We need 
it
-  // because our `parallelizeRegion` function works on regions and not blocks.
-  omp::WorkshareOp newOp =
-  rootBuilder.create(loc, omp::WorkshareOperands());
-  if (!wsOp.getNowait())
-rootBuilder.create(loc);
-
-  parallelizeRegion(wsOp.getRegion(), newOp.getRegion(), rootMapping, loc, di);
-
   // FIXME Currently, we only support workshare constructs with structured
   // control flow. The transformation itself supports CFG, however, once we
   // transform the MLIR region in the omp.workshare, we need to inline that
@@ -427,19 +422,53 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, 
DominanceInfo &di) {
   // time when fir ops get lowered to CFG. However, SCF is not registered in
   // flang so we cannot use it. Remove this requirement once we have
   // scf.execute_region or an alternative operation available.
-  if (wsOp.getRegion().getBlocks().size() != 1)
-TODO(wsOp->getLoc(), "omp workshare with unstructured control flow");
-
-  // Inline the contents of the placeholder workshare op into its parent block.
-  Block *theBlock = &newOp.getRegion().front();
-  Operation *term = theBlock->getTerminator();
-  Block *parentBlock = wsOp->getBlock();
-  parentBlock->getOperations().splice(newOp->getIterator(),
-  theBlock->getOperations());
-  assert(term->getNumOperands() == 0);
-  term->erase();
-  newOp->erase();
-  wsOp->erase();
+  if (wsOp.getRegion().getBlocks().size() == 1) {
+// This operation is just a placeholder which will be erased later. We need
+// it because our `parallelizeRegion` function works on regions and not
+// blocks.
+omp::WorkshareOp newOp =
+rootBuilder.create(loc, omp::WorkshareOperands());
+if (!wsOp.getNowait())
+  rootBuilder.create(loc);
+
+parallelizeRegion(wsOp.getRegion(), newOp.getRegion(), rootMapping, loc,
+  di);
+
+// Inline the contents of the placeholder workshare op into its parent
+// block.
+Block *theBlock = &newOp.getRegion().front();
+Operation *term = theBlock->getTerminator();
+Block *parentBlock = wsOp->getBlock();
+parentBlock->getOperations().splice(newOp->getIterator(),
+theBlock->getOperations());
+assert(term->getNumOperands() == 0);
+term->erase();
+newOp->erase();
+wsOp->erase();
+  } else {
+// Otherwise just change the operation to an omp.single.
+
+//

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-10-03 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

>From e56dbd6a0625890fd9a3d6a62675e864ca94a8f5 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 22:06:55 +0900
Subject: [PATCH 1/8] [flang] Lower omp.workshare to other omp constructs

Change to workshare loop wrapper op

Move single op declaration

Schedule pass properly

Correctly handle nested nested loop nests to be parallelized by workshare

Leave comments for shouldUseWorkshareLowering

Use copyprivate to scatter val from omp.single

TODO still need to implement copy function
TODO transitive check for usage outside of omp.single not imiplemented yet

Transitively check for users outisde of single op

TODO need to implement copy func
TODO need to hoist allocas outside of single regions

Add tests

Hoist allocas

More tests

Emit body for copy func

Test the tmp storing logic

Clean up trivially dead ops

Only handle single-block regions for now

Fix tests for custom assembly for loop wrapper

Only run the lower workshare pass if openmp is enabled

Implement some missing functionality

Fix tests

Fix test

Iterate backwards to find all trivially dead ops

Add expalanation comment for createCopyFun

Update test
---
 flang/include/flang/Optimizer/OpenMP/Passes.h |   5 +
 .../include/flang/Optimizer/OpenMP/Passes.td  |   5 +
 flang/include/flang/Tools/CLOptions.inc   |   6 +-
 flang/include/flang/Tools/CrossToolHelpers.h  |   1 +
 flang/lib/Frontend/FrontendActions.cpp|  10 +-
 flang/lib/Optimizer/OpenMP/CMakeLists.txt |   1 +
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 446 ++
 flang/test/Fir/basic-program.fir  |   1 +
 .../Transforms/OpenMP/lower-workshare.mlir| 189 
 .../Transforms/OpenMP/lower-workshare2.mlir   |  23 +
 .../Transforms/OpenMP/lower-workshare3.mlir   |  74 +++
 .../Transforms/OpenMP/lower-workshare4.mlir   |  59 +++
 .../Transforms/OpenMP/lower-workshare5.mlir   |  42 ++
 .../Transforms/OpenMP/lower-workshare6.mlir   |  51 ++
 flang/tools/bbc/bbc.cpp   |   5 +-
 flang/tools/tco/tco.cpp   |   1 +
 16 files changed, 915 insertions(+), 4 deletions(-)
 create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare2.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare3.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare4.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare6.mlir

diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h 
b/flang/include/flang/Optimizer/OpenMP/Passes.h
index 403d79667bf448..feb395f1a12dbd 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.h
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.h
@@ -25,6 +25,11 @@ namespace flangomp {
 #define GEN_PASS_REGISTRATION
 #include "flang/Optimizer/OpenMP/Passes.h.inc"
 
+/// Impelements the logic specified in the 2.8.3  workshare Construct section 
of
+/// the OpenMP standard which specifies what statements or constructs shall be
+/// divided into units of work.
+bool shouldUseWorkshareLowering(mlir::Operation *op);
+
 } // namespace flangomp
 
 #endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td 
b/flang/include/flang/Optimizer/OpenMP/Passes.td
index 395178e26a5762..041240cad12eb3 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.td
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -37,4 +37,9 @@ def FunctionFiltering : Pass<"omp-function-filtering"> {
   ];
 }
 
+// Needs to be scheduled on Module as we create functions in it
+def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> {
+  let summary = "Lower workshare construct";
+}
+
 #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc 
b/flang/include/flang/Tools/CLOptions.inc
index 1881e23b00045a..bb00e079008a0b 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -337,7 +337,7 @@ inline void createDefaultFIROptimizerPassPipeline(
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
 inline void createHLFIRToFIRPassPipeline(
-mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) 
{
+mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel 
= defaultOptLevel) {
   if (optLevel.isOptimizingForSpeed()) {
 addCanonicalizerPassWithoutRegionSimplification(pm);
 addNestedPassToAllTopLevelOperations(
@@ -354,6 +354,8 @@ inline void createHLFIRToFIRPassPipeline(
   pm.addPass(hlfir::createLowerHLFIRIntrinsics());
   pm.addPass(hlfir::createBufferizeHLFIR());
   pm.addPass(hlfir::createConvertHLFIRtoFIR());
+  if (enableOpenMP)
+pm.add

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-10-03 Thread Ivan R. Ivanov via llvm-branch-commits


https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

>From e56dbd6a0625890fd9a3d6a62675e864ca94a8f5 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 22:06:55 +0900
Subject: [PATCH 1/9] [flang] Lower omp.workshare to other omp constructs

Change to workshare loop wrapper op

Move single op declaration

Schedule pass properly

Correctly handle nested nested loop nests to be parallelized by workshare

Leave comments for shouldUseWorkshareLowering

Use copyprivate to scatter val from omp.single

TODO still need to implement copy function
TODO transitive check for usage outside of omp.single not imiplemented yet

Transitively check for users outisde of single op

TODO need to implement copy func
TODO need to hoist allocas outside of single regions

Add tests

Hoist allocas

More tests

Emit body for copy func

Test the tmp storing logic

Clean up trivially dead ops

Only handle single-block regions for now

Fix tests for custom assembly for loop wrapper

Only run the lower workshare pass if openmp is enabled

Implement some missing functionality

Fix tests

Fix test

Iterate backwards to find all trivially dead ops

Add expalanation comment for createCopyFun

Update test
---
 flang/include/flang/Optimizer/OpenMP/Passes.h |   5 +
 .../include/flang/Optimizer/OpenMP/Passes.td  |   5 +
 flang/include/flang/Tools/CLOptions.inc   |   6 +-
 flang/include/flang/Tools/CrossToolHelpers.h  |   1 +
 flang/lib/Frontend/FrontendActions.cpp|  10 +-
 flang/lib/Optimizer/OpenMP/CMakeLists.txt |   1 +
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 446 ++
 flang/test/Fir/basic-program.fir  |   1 +
 .../Transforms/OpenMP/lower-workshare.mlir| 189 
 .../Transforms/OpenMP/lower-workshare2.mlir   |  23 +
 .../Transforms/OpenMP/lower-workshare3.mlir   |  74 +++
 .../Transforms/OpenMP/lower-workshare4.mlir   |  59 +++
 .../Transforms/OpenMP/lower-workshare5.mlir   |  42 ++
 .../Transforms/OpenMP/lower-workshare6.mlir   |  51 ++
 flang/tools/bbc/bbc.cpp   |   5 +-
 flang/tools/tco/tco.cpp   |   1 +
 16 files changed, 915 insertions(+), 4 deletions(-)
 create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare2.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare3.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare4.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare6.mlir

diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h 
b/flang/include/flang/Optimizer/OpenMP/Passes.h
index 403d79667bf448..feb395f1a12dbd 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.h
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.h
@@ -25,6 +25,11 @@ namespace flangomp {
 #define GEN_PASS_REGISTRATION
 #include "flang/Optimizer/OpenMP/Passes.h.inc"
 
+/// Impelements the logic specified in the 2.8.3  workshare Construct section 
of
+/// the OpenMP standard which specifies what statements or constructs shall be
+/// divided into units of work.
+bool shouldUseWorkshareLowering(mlir::Operation *op);
+
 } // namespace flangomp
 
 #endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td 
b/flang/include/flang/Optimizer/OpenMP/Passes.td
index 395178e26a5762..041240cad12eb3 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.td
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -37,4 +37,9 @@ def FunctionFiltering : Pass<"omp-function-filtering"> {
   ];
 }
 
+// Needs to be scheduled on Module as we create functions in it
+def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> {
+  let summary = "Lower workshare construct";
+}
+
 #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc 
b/flang/include/flang/Tools/CLOptions.inc
index 1881e23b00045a..bb00e079008a0b 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -337,7 +337,7 @@ inline void createDefaultFIROptimizerPassPipeline(
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
 inline void createHLFIRToFIRPassPipeline(
-mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) 
{
+mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel 
= defaultOptLevel) {
   if (optLevel.isOptimizingForSpeed()) {
 addCanonicalizerPassWithoutRegionSimplification(pm);
 addNestedPassToAllTopLevelOperations(
@@ -354,6 +354,8 @@ inline void createHLFIRToFIRPassPipeline(
   pm.addPass(hlfir::createLowerHLFIRIntrinsics());
   pm.addPass(hlfir::createBufferizeHLFIR());
   pm.addPass(hlfir::createConvertHLFIRtoFIR());
+  if (enableOpenMP)
+pm.add

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-10-03 Thread Ivan R. Ivanov via llvm-branch-commits


ivanradanov wrote:

> My concern with the TODO message is that some code that previously compiled 
> using the lowering of WORKSHARE as SINGLE will now hit this TODO. This is 
> okay with me so long as it is fixed soon (before LLVM 20). Otherwise, could 
> these cases continued to be lowered as SINGLE for now.

I have updated it to lower to omp.single and emit a warning in CFG cases.

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

1 2 >

1 - 100 of 133 matches

Mail list logo