[llvm-branch-commits] [llvm] [NVPTX] add support for encoding PTX registers for DWARF (PR #109495)
@@ -141,3 +142,47 @@ NVPTXRegisterInfo::getFrameLocalRegister(const MachineFunction &MF) const { static_cast(MF.getTarget()); return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32; } + +void NVPTXRegisterInfo::clearDebugRegisterMap() const { + debugRegisterMap.clear(); +} + +static uint64_t encodeRegisterForDwarf(std::string registerName) { + if (registerName.length() > 8) { +// The name is more than 8 characters long, and so won't fit into 64 bits. +return 0; + } + + // Encode the name string into a DWARF register number using cuda-gdb's + // encoding. See cuda_check_dwarf2_reg_ptx_virtual_register in cuda-tdep.c, + // https://github.com/NVIDIA/cuda-gdb/blob/e5cf3bddae520ffb326f95b4d98ce5c7474b828b/gdb/cuda/cuda-tdep.c#L353 + // IE the bytes of the string are concatenated in reverse into a single + // number, which is stored in ULEB128, but in practice must be no more than 8 + // bytes (excluding null terminator, which is not included). + uint64_t result = 0; + for (int i = 0; i < registerName.length(); ++i) { +result = result << 8; +char c = registerName[i]; +result += c; walter-erquinigo wrote: Use |= for clarity because you are doing a bitwise operation https://github.com/llvm/llvm-project/pull/109495 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NVPTX] add support for encoding PTX registers for DWARF (PR #109495)
@@ -1773,6 +1774,26 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( OutStreamer->emitRawText(O.str()); } +/// Translate virtual register numbers in DebugInfo locations to their printed +/// encodings, as used by CUDA-GDB. +void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers( +const MachineFunction &MF) { + const NVPTXSubtarget &STI = MF.getSubtarget(); + const NVPTXRegisterInfo *registerInfo = STI.getRegisterInfo(); + + // Clear the old mapping, and add the new one. This mapping is used after the + // printing of the current function is complete, but before the next function + // is printed. + registerInfo->clearDebugRegisterMap(); + + for (auto classMap : VRegMapping) { +for (auto registerMapping : classMap.getSecond()) { walter-erquinigo wrote: could you make these iterators references? https://github.com/llvm/llvm-project/pull/109495 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NVPTX] add support for encoding PTX registers for DWARF (PR #109495)
@@ -141,3 +142,47 @@ NVPTXRegisterInfo::getFrameLocalRegister(const MachineFunction &MF) const { static_cast(MF.getTarget()); return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32; } + +void NVPTXRegisterInfo::clearDebugRegisterMap() const { + debugRegisterMap.clear(); +} + +static uint64_t encodeRegisterForDwarf(std::string registerName) { + if (registerName.length() > 8) { +// The name is more than 8 characters long, and so won't fit into 64 bits. +return 0; + } walter-erquinigo wrote: could this validly happen or would it be an error if it happens? https://github.com/llvm/llvm-project/pull/109495 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NVPTX] add support for encoding PTX registers for DWARF (PR #109495)
@@ -141,3 +142,47 @@ NVPTXRegisterInfo::getFrameLocalRegister(const MachineFunction &MF) const { static_cast(MF.getTarget()); return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32; } + +void NVPTXRegisterInfo::clearDebugRegisterMap() const { + debugRegisterMap.clear(); +} + +static uint64_t encodeRegisterForDwarf(std::string registerName) { + if (registerName.length() > 8) { +// The name is more than 8 characters long, and so won't fit into 64 bits. +return 0; + } + + // Encode the name string into a DWARF register number using cuda-gdb's + // encoding. See cuda_check_dwarf2_reg_ptx_virtual_register in cuda-tdep.c, + // https://github.com/NVIDIA/cuda-gdb/blob/e5cf3bddae520ffb326f95b4d98ce5c7474b828b/gdb/cuda/cuda-tdep.c#L353 + // IE the bytes of the string are concatenated in reverse into a single + // number, which is stored in ULEB128, but in practice must be no more than 8 + // bytes (excluding null terminator, which is not included). + uint64_t result = 0; + for (int i = 0; i < registerName.length(); ++i) { +result = result << 8; +char c = registerName[i]; +result += c; + } + return result; +} + +void NVPTXRegisterInfo::addToDebugRegisterMap( +uint64_t preEncodedVirtualRegister, std::string registerName) const { + uint64_t mapped = encodeRegisterForDwarf(registerName); + if (mapped == 0) +return; + debugRegisterMap.insert({preEncodedVirtualRegister, mapped}); +} + +int64_t NVPTXRegisterInfo::getDwarfRegNum(MCRegister RegNum, bool isEH) const { + if (Register::isPhysicalRegister(RegNum)) { +std::string name = NVPTXInstPrinter::getRegisterName(RegNum.id()); +return encodeRegisterForDwarf(name); + } + auto lookup = debugRegisterMap.lookup(RegNum.id()); walter-erquinigo wrote: don't use auto here https://github.com/llvm/llvm-project/pull/109495 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NVPTX] add support for encoding PTX registers for DWARF (PR #109495)
@@ -141,3 +142,47 @@ NVPTXRegisterInfo::getFrameLocalRegister(const MachineFunction &MF) const { static_cast(MF.getTarget()); return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32; } + +void NVPTXRegisterInfo::clearDebugRegisterMap() const { + debugRegisterMap.clear(); +} + +static uint64_t encodeRegisterForDwarf(std::string registerName) { + if (registerName.length() > 8) { +// The name is more than 8 characters long, and so won't fit into 64 bits. +return 0; + } + + // Encode the name string into a DWARF register number using cuda-gdb's + // encoding. See cuda_check_dwarf2_reg_ptx_virtual_register in cuda-tdep.c, + // https://github.com/NVIDIA/cuda-gdb/blob/e5cf3bddae520ffb326f95b4d98ce5c7474b828b/gdb/cuda/cuda-tdep.c#L353 + // IE the bytes of the string are concatenated in reverse into a single + // number, which is stored in ULEB128, but in practice must be no more than 8 + // bytes (excluding null terminator, which is not included). + uint64_t result = 0; + for (int i = 0; i < registerName.length(); ++i) { +result = result << 8; +char c = registerName[i]; walter-erquinigo wrote: shouldn't this be unsigned char? https://github.com/llvm/llvm-project/pull/109495 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748 >From e0ef194ecf8bf0e9c450ee11c244eb4450548aef Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 17:33:52 +0900 Subject: [PATCH 1/2] Add workshare loop wrapper lowerings Bufferize test Bufferize test Bufferize test Add test for should use workshare lowering --- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 4 +- .../Transforms/OptimizedBufferization.cpp | 10 +- flang/test/HLFIR/bufferize-workshare.fir | 58 .../OpenMP/should-use-workshare-lowering.mlir | 140 ++ 4 files changed, 208 insertions(+), 4 deletions(-) create mode 100644 flang/test/HLFIR/bufferize-workshare.fir create mode 100644 flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index 07794828fce267..1848dbe2c7a2c2 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -26,6 +26,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" @@ -792,7 +793,8 @@ struct ElementalOpConversion // Generate a loop nest looping around the fir.elemental shape and clone // fir.elemental region inside the inner loop. hlfir::LoopNest loopNest = -hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); +hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); auto insPt = builder.saveInsertionPoint(); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index 3a0a98dc594463..f014724861e333 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -20,6 +20,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/Transforms/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Dominance.h" @@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite( // Generate a loop nest looping around the hlfir.elemental shape and clone // hlfir.elemental region inside the inner loop hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); + hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, loopNest.oneBasedIndices); @@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite( llvm::SmallVector extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto arrayElement = hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices); @@ -648,7 +651,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite( llvm::SmallVector extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto rhsArrayElement = hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices); diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir new file mode 100644 index 00..9b7341ae43398a --- /dev/null +++ b/flang/test/HLFIR/bufferize-workshare.fir @@ -0,0 +1,58 @@ +// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s + +// CHECK-LABEL: func.func @simple( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>) { +// CHECK: omp.parallel { +// CHECK: omp.workshare { +// CHECK: %[[VAL_1:.*]] = arith.c
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff d5fbe9c7482b87be295be03aafd5917dd7c17859 79ac7998609480d18be4ea3bc61b6c1c77089f70 --extensions h,cpp,inc -- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp flang/include/flang/Optimizer/OpenMP/Passes.h flang/include/flang/Tools/CLOptions.inc flang/include/flang/Tools/CrossToolHelpers.h flang/lib/Frontend/FrontendActions.cpp flang/tools/bbc/bbc.cpp flang/tools/tco/tco.cpp `` View the diff from clang-format here. ``diff diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index bb00e07900..81ce69b4ec 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -336,8 +336,8 @@ inline void createDefaultFIROptimizerPassPipeline( /// \param pm - MLIR pass manager that will hold the pipeline definition /// \param optLevel - optimization level used for creating FIR optimization /// passes pipeline -inline void createHLFIRToFIRPassPipeline( -mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel = defaultOptLevel) { +inline void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, +bool enableOpenMP, llvm::OptimizationLevel optLevel = defaultOptLevel) { if (optLevel.isOptimizingForSpeed()) { addCanonicalizerPassWithoutRegionSimplification(pm); addNestedPassToAllTopLevelOperations( `` https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748 >From 975a0d74c5ae81c69844b8bd089832ed53278477 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Mon, 23 Sep 2024 15:07:48 +0900 Subject: [PATCH 1/4] Emit a proper error message for CFG in workshare --- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 13 +- .../OpenMP/lower-workshare-todo-cfg-dom.mlir | 23 ++ .../OpenMP/lower-workshare-todo-cfg.mlir | 20 + .../Transforms/OpenMP/lower-workshare5.mlir | 42 --- 4 files changed, 55 insertions(+), 43 deletions(-) create mode 100644 flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir delete mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp index 6e5538b54ba5e0..cf1867311cc236 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -16,6 +16,7 @@ // //===--===// +#include "flang/Optimizer/Builder/Todo.h" #include #include #include @@ -416,8 +417,18 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, DominanceInfo &di) { parallelizeRegion(wsOp.getRegion(), newOp.getRegion(), rootMapping, loc, di); + // FIXME Currently, we only support workshare constructs with structured + // control flow. The transformation itself supports CFG, however, once we + // transform the MLIR region in the omp.workshare, we need to inline that + // region in the parent block. We have no guarantees at this point of the + // pipeline that the parent op supports CFG (e.g. fir.if), thus this is not + // generally possible. The alternative is to put the lowered region in an + // operation akin to scf.execute_region, which will get lowered at the same + // time when fir ops get lowered to CFG. However, SCF is not registered in + // flang so we cannot use it. Remove this requirement once we have + // scf.execute_region or an alternative operation available. if (wsOp.getRegion().getBlocks().size() != 1) -return failure(); +TODO(wsOp->getLoc(), "omp workshare with unstructured control flow"); // Inline the contents of the placeholder workshare op into its parent block. Block *theBlock = &newOp.getRegion().front(); diff --git a/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir new file mode 100644 index 00..1c47d448f597d9 --- /dev/null +++ b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir @@ -0,0 +1,23 @@ +// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s 2>&1 | FileCheck %s + +// CHECK: not yet implemented: omp workshare with unstructured control flow + +// Check that the definition of %r dominates its use post-transform +func.func @wsfunc() { + %a = fir.alloca i32 + omp.parallel { +omp.workshare { +^bb1: + %c1 = arith.constant 1 : i32 + cf.br ^bb3(%c1: i32) +^bb2: + "test.test2"(%r) : (i32) -> () + omp.terminator +^bb3(%arg1: i32): + %r = "test.test2"(%arg1) : (i32) -> i32 + cf.br ^bb2 +} +omp.terminator + } + return +} diff --git a/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir new file mode 100644 index 00..bf6c196a05b4a3 --- /dev/null +++ b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir @@ -0,0 +1,20 @@ +// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s 2>&1 | FileCheck %s + +// CHECK: not yet implemented: omp workshare with unstructured control flow + +// Check transforming a simple CFG +func.func @wsfunc() { + %a = fir.alloca i32 + omp.parallel { +omp.workshare { +^bb1: + %c1 = arith.constant 1 : i32 + cf.br ^bb3(%c1: i32) +^bb3(%arg1: i32): + "test.test2"(%arg1) : (i32) -> () + omp.terminator +} +omp.terminator + } + return +} diff --git a/flang/test/Transforms/OpenMP/lower-workshare5.mlir b/flang/test/Transforms/OpenMP/lower-workshare5.mlir deleted file mode 100644 index 177f8aa8f86c7c..00 --- a/flang/test/Transforms/OpenMP/lower-workshare5.mlir +++ /dev/null @@ -1,42 +0,0 @@ -// XFAIL: * -// RUN: fir-opt --split-input-file --lower-workshare --allow-unregistered-dialect %s | FileCheck %s - -// TODO we can lower these but we have no guarantee that the parent of -// omp.workshare supports multi-block regions, thus we fail for now. - -func.func @wsfunc() { - %a = fir.alloca i32 - omp.parallel { -omp.workshare { -^bb1: - %c1 = arith.constant 1 : i32 - cf.br ^bb3(%c1: i32) -^bb3(%arg1: i32): - "test.test2"(%arg1) : (i32) -> () - omp.terminator -} -omp.terminator - } -
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446 >From e56dbd6a0625890fd9a3d6a62675e864ca94a8f5 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 22:06:55 +0900 Subject: [PATCH 1/3] [flang] Lower omp.workshare to other omp constructs Change to workshare loop wrapper op Move single op declaration Schedule pass properly Correctly handle nested nested loop nests to be parallelized by workshare Leave comments for shouldUseWorkshareLowering Use copyprivate to scatter val from omp.single TODO still need to implement copy function TODO transitive check for usage outside of omp.single not imiplemented yet Transitively check for users outisde of single op TODO need to implement copy func TODO need to hoist allocas outside of single regions Add tests Hoist allocas More tests Emit body for copy func Test the tmp storing logic Clean up trivially dead ops Only handle single-block regions for now Fix tests for custom assembly for loop wrapper Only run the lower workshare pass if openmp is enabled Implement some missing functionality Fix tests Fix test Iterate backwards to find all trivially dead ops Add expalanation comment for createCopyFun Update test --- flang/include/flang/Optimizer/OpenMP/Passes.h | 5 + .../include/flang/Optimizer/OpenMP/Passes.td | 5 + flang/include/flang/Tools/CLOptions.inc | 6 +- flang/include/flang/Tools/CrossToolHelpers.h | 1 + flang/lib/Frontend/FrontendActions.cpp| 10 +- flang/lib/Optimizer/OpenMP/CMakeLists.txt | 1 + flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 446 ++ flang/test/Fir/basic-program.fir | 1 + .../Transforms/OpenMP/lower-workshare.mlir| 189 .../Transforms/OpenMP/lower-workshare2.mlir | 23 + .../Transforms/OpenMP/lower-workshare3.mlir | 74 +++ .../Transforms/OpenMP/lower-workshare4.mlir | 59 +++ .../Transforms/OpenMP/lower-workshare5.mlir | 42 ++ .../Transforms/OpenMP/lower-workshare6.mlir | 51 ++ flang/tools/bbc/bbc.cpp | 5 +- flang/tools/tco/tco.cpp | 1 + 16 files changed, 915 insertions(+), 4 deletions(-) create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare2.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare3.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare4.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare6.mlir diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h b/flang/include/flang/Optimizer/OpenMP/Passes.h index 403d79667bf448..feb395f1a12dbd 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.h +++ b/flang/include/flang/Optimizer/OpenMP/Passes.h @@ -25,6 +25,11 @@ namespace flangomp { #define GEN_PASS_REGISTRATION #include "flang/Optimizer/OpenMP/Passes.h.inc" +/// Impelements the logic specified in the 2.8.3 workshare Construct section of +/// the OpenMP standard which specifies what statements or constructs shall be +/// divided into units of work. +bool shouldUseWorkshareLowering(mlir::Operation *op); + } // namespace flangomp #endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td index 395178e26a5762..041240cad12eb3 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.td +++ b/flang/include/flang/Optimizer/OpenMP/Passes.td @@ -37,4 +37,9 @@ def FunctionFiltering : Pass<"omp-function-filtering"> { ]; } +// Needs to be scheduled on Module as we create functions in it +def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> { + let summary = "Lower workshare construct"; +} + #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 1881e23b00045a..bb00e079008a0b 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -337,7 +337,7 @@ inline void createDefaultFIROptimizerPassPipeline( /// \param optLevel - optimization level used for creating FIR optimization /// passes pipeline inline void createHLFIRToFIRPassPipeline( -mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) { +mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel = defaultOptLevel) { if (optLevel.isOptimizingForSpeed()) { addCanonicalizerPassWithoutRegionSimplification(pm); addNestedPassToAllTopLevelOperations( @@ -354,6 +354,8 @@ inline void createHLFIRToFIRPassPipeline( pm.addPass(hlfir::createLowerHLFIRIntrinsics()); pm.addPass(hlfir::createBufferizeHLFIR()); pm.addPass(hlfir::createConvertHLFIRtoFIR()); + if (enableOpenMP) +pm.add