[llvm-branch-commits] [mlir] [mlir][Transforms][NFC] Make `rewriterImpl` private in `IRRewrite` (PR #84865)
https://github.com/nicolasvasilache approved this pull request. https://github.com/llvm/llvm-project/pull/84865 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 3747eb9 - [mlir][Linalg] Add a padding option to Linalg tiling
Author: Nicolas Vasilache Date: 2021-01-25T09:17:30Z New Revision: 3747eb9c85b3393aa00ad12e9e7ef31ffec8bd4c URL: https://github.com/llvm/llvm-project/commit/3747eb9c85b3393aa00ad12e9e7ef31ffec8bd4c DIFF: https://github.com/llvm/llvm-project/commit/3747eb9c85b3393aa00ad12e9e7ef31ffec8bd4c.diff LOG: [mlir][Linalg] Add a padding option to Linalg tiling This revision allows the base Linalg tiling pattern to optionally require padding to a constant bounding shape. When requested, a simple analysis is performed, similar to buffer promotion. A temporary `linalg.simple_pad` op is added to model padding for the purpose of connecting the dots. This will be replaced by a more fleshed out `linalg.pad_tensor` op when it is available. In the meantime, this temporary op serves the purpose of exhibiting the necessary properties required from a more fleshed out pad op, to compose with transformations properly. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D95149 Added: mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir Modified: mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h mlir/include/mlir/Interfaces/ViewLikeInterface.h mlir/include/mlir/Interfaces/ViewLikeInterface.td mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp mlir/lib/Dialect/StandardOps/IR/Ops.cpp mlir/test/Dialect/Linalg/roundtrip.mlir mlir/test/lib/Transforms/TestLinalgTransforms.cpp Removed: diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td index ae9f81d043f5..9ea1bc5a3587 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -475,6 +475,38 @@ def Linalg_SliceOp : Linalg_Op<"slice", [ let hasFolder = 1; } +def Linalg_SimplePadOp : Linalg_Op<"simple_pad", [NoSideEffect]> { + let summary = "TODO: replace with pad_tensors when ready."; + + let description = [{ +`linalg.simple_pad` is a tmp placeholder for padding and packing on tensors. +Its semantics are to pad a partially dynamic tensor to a fully static tensor +where the static sizes are assumed to be greater than the dynamic sizes. The +op perforrms "high" padding (i.e. it adds trailing padding values until the +desired size is met). + }]; + + let arguments = (ins AnyRankedTensor:$tensor, AnyType:$padding); + let results = (outs AnyRankedTensor:$result); + + // TODO: verify all static result, some dynamic input, static shapes match, + // element types match, ranks match etc. Use pad_tensors when ready but for + // now just let it ne fully specified by traits. + let verifier = ?; + + let extraClassDeclaration = [{ +RankedTensorType getSourceType() { + return tensor().getType().cast(); } +RankedTensorType getResultType() { + return getResult().getType().cast(); } + }]; + + let assemblyFormat = [{ +$tensor `pad` $padding attr-dict `:` + type($tensor) `to` type($result) `pad` type($padding) + }]; +} + def Linalg_YieldOp : Linalg_Op<"yield", [NoSideEffect, ReturnLike, Terminator]>, Arguments<(ins Variadic:$values)> { let summary = "Linalg yield operation"; diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 611ab6867372..f359992e5ff1 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -345,6 +345,9 @@ enum class LinalgTilingLoopType { using TileSizeComputationFunction = std::function(OpBuilder &, Operation *)>; +using PaddingValueComputationFunction = +std::function; + struct LinalgTilingOptions { /// Computation function that returns the tile sizes for each operation. /// Delayed construction of constant tile sizes should occur to interoperate @@ -393,6 +396,18 @@ struct LinalgTilingOptions { distribution = std::move(distributionOptions); return *this; } + + /// Computation function that returns a padding value to use when padding to + /// force static sizes. When `paddingValueComputationFunction` is set, padding + /// operations are introduced, that guarantee the underlying op is statically + /// shaped and can thus be vectorized. + PaddingValueComputationFunction paddingValueComputationFunction = nullptr; + + LinalgTilingOptions & + setPaddingValueComputationFunction(PaddingValueComputationFunction fun) { +paddingValueComputationFunction = std::move(fun); +return *this; + } }; /// Canonicalization patterns relevant to apply after tiling patterns. These are @@ -403,6 +418,11 @@ getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx); void populateLinalgTilingCanonicalizationPatterns( OwningRewritePatternList &patterns, MLIRContext *ctx); +/// Base patt
[llvm-branch-commits] [mlir] dbf9bed - [mlir][Linalg] Add a hoistPaddingOnTensors transformation
Author: Nicolas Vasilache Date: 2021-01-25T12:41:18Z New Revision: dbf9bedf40792cf8c5492a27b61809737793b9c7 URL: https://github.com/llvm/llvm-project/commit/dbf9bedf40792cf8c5492a27b61809737793b9c7 DIFF: https://github.com/llvm/llvm-project/commit/dbf9bedf40792cf8c5492a27b61809737793b9c7.diff LOG: [mlir][Linalg] Add a hoistPaddingOnTensors transformation This transformation anchors on a padding op whose result is only used as an input to a Linalg op and pulls it out of a given number of loops. The result is a packing of padded tailes of ops that is amortized just before the outermost loop from which the pad operation is hoisted. Differential revision: https://reviews.llvm.org/D95243 Added: mlir/test/Dialect/Linalg/hoist-padding.mlir Modified: mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h mlir/include/mlir/Dialect/StandardOps/IR/Ops.td mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp mlir/lib/Dialect/StandardOps/IR/Ops.cpp mlir/test/lib/Transforms/TestLinalgTransforms.cpp Removed: diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h index ed585d1f5cf5..4d44b3717991 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h @@ -11,8 +11,10 @@ namespace mlir { class FuncOp; +struct LogicalResult; namespace linalg { +class SimplePadOp; /// Hoist alloc/dealloc pairs and alloca op out of immediately enclosing /// scf::ForOp if both conditions are true: @@ -40,6 +42,44 @@ void hoistRedundantVectorTransfers(FuncOp func); /// instead of buffers. void hoistRedundantVectorTransfersOnTensor(FuncOp func); +/// Mechanically hoist padding operations on tensors by `nLoops` into a new, +/// generally larger tensor. This achieves packing of multiple padding ops into +/// a larger tensor. On success, `simplePadOp` is replaced by the cloned version +/// in the packing loop so the caller can continue reasoning about the padding +/// operation. +/// +/// Example in pseudo-mlir: +/// === +/// +/// If hoistPaddingOnTensors is called with `nLoops` = 2 on the following IR. +/// ``` +///scf.for (%i, %j, %k) +/// %st0 = subtensor f(%i, %k) : ... to tensor +/// %0 = linalg.simple_pad %st0 pad %pad : +/// tensor to tensor<4x8xf32> +/// compute(%0) +/// ``` +/// +/// IR resembling the following is produced: +/// +/// ``` +///scf.for (%i) { +/// %packed_init = linalg.init_tensor range(%j) : tensor +/// %packed = scf.for (%k) iter_args(%p : %packed_init) +///%st0 = subtensor f(%i, %k) : ... to tensor +///%0 = linalg.simple_pad %st0 pad %pad : +/// tensor to tensor<4x8xf32> +///scf.yield %1: tensor +/// } -> tensor +/// scf.for (%j, %k) { +///%st0 = subtensor %packed [%k, 0, 0][1, 4, 8][1, 1, 1] : +/// tensor to tensor<4x8xf32> +///compute(%st0) +/// } +///} +/// ``` +LogicalResult hoistPaddingOnTensors(SimplePadOp &simplePadOp, unsigned nLoops); + } // namespace linalg } // namespace mlir diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index ce1907cb6435..1c21b1639b7e 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -3058,6 +3058,17 @@ def SubTensorOp : BaseOpWithOffsetSizesAndStrides< // Build a SubTensorOp with all dynamic entries and custom result type. OpBuilderDAG<(ins "RankedTensorType":$resultType, "Value":$source, "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, +// Build a SubTensorOp with mixed static and dynamic entries and inferred +// result type. +OpBuilderDAG<(ins "Value":$source, "ArrayRef":$offsets, + "ArrayRef":$sizes, "ArrayRef":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, +// Build a SubTensorOp with mixed static and dynamic entries and custom +// result type. If the type passed is nullptr, it is inferred. +OpBuilderDAG<(ins "RankedTensorType":$resultType, "Value":$source, + "ArrayRef":$offsets, "ArrayRef":$sizes, + "ArrayRef":$strides, CArg<"ArrayRef", "{}">:$attrs)> ]; @@ -3154,6 +3165,11 @@ def SubTensorInsertOp : BaseOpWithOffsetSizesAndStrides< // Build a SubTensorInsertOp with all dynamic entries. OpBuilderDAG<(ins "Value":$source, "Value":$dest, "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, +// Build a SubTensorInsertOp with mixed static and dynamic entries. +OpBuilderDAG<(ins "Value":$source, "Value":$dest, + "ArrayRef":$offsets, "ArrayRef":$sizes, + "ArrayRef":$strides, CArg<"ArrayRef", "{}">:$attrs)>
[llvm-branch-commits] [mlir] 68eee55 - [mlir][Linalg] Address missed review item
Author: Nicolas Vasilache Date: 2021-01-25T13:47:44Z New Revision: 68eee55ce6a41bb294d63886679b599883e96c3a URL: https://github.com/llvm/llvm-project/commit/68eee55ce6a41bb294d63886679b599883e96c3a DIFF: https://github.com/llvm/llvm-project/commit/68eee55ce6a41bb294d63886679b599883e96c3a.diff LOG: [mlir][Linalg] Address missed review item This revision addresses a remaining comment that was overlooked in https://reviews.llvm.org/D95243: the pad hoisting transformation is made to additionally bail out on side effecting ops other than LoopLikeOps. Added: Modified: mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir Removed: diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp index 7f1ead8ca386..9ca1f6da43f6 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -342,6 +342,8 @@ void mlir::linalg::hoistRedundantVectorTransfers(FuncOp func) { /// 3. There exists an op with a region that is dominated by /// `outermostEnclosingForOp` and that isn't a LoopLikeInterface or a ///LinalgOp. +/// 3. There exists an op with side effects that is dominated by +///`outermostEnclosingForOp` and that isn't a LoopLikeInterface. /// /// While ensuring prerequisites: /// 1. Fill the `backwardSlice` to contain the topologically sorted ops @@ -383,6 +385,21 @@ hoistPaddingOnTensorsPrerequisites(linalg::SimplePadOp simplePadOp, int nLevels, return domInfo.dominates(outermostEnclosingForOp, op); }); + #if 0 + + // Bail on any op with a region that is not a LoopLikeInterface or a LinalgOp. + // Bail on any op with side effects that is not a LoopLikeInterface. + if (llvm::any_of(backwardSlice, [](Operation *op) { +if (isa(op)) + return false; +if (!MemoryEffectOpInterface::hasNoEffect(op)) + return true; +return op->getNumRegions() > 0 && !isa(op); + })) +return failure(); + + #else + // Bail on any op with a region that is not a LoopLikeInterface or a LinalgOp. if (llvm::any_of(backwardSlice, [](Operation *op) { return op->getNumRegions() > 0 && !isa(op) && @@ -390,6 +407,8 @@ hoistPaddingOnTensorsPrerequisites(linalg::SimplePadOp simplePadOp, int nLevels, })) return failure(); + #endif + // Filter out the loops whose induction variable is not used to compute the // padded result. As a first approximation, just look for IVs that have no use // in the backwardSlice. diff --git a/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir index e4121083e240..1291b5c990df 100644 --- a/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir @@ -1,4 +1,5 @@ -// RUN: mlir-opt %s -test-linalg-transform-patterns=test-tile-and-pad-pattern -canonicalize | FileCheck %s +// RUN: mlir-opt %s -test-linalg-transform-patterns=test-tile-and-pad-pattern -canonicalize +//| FileCheck %s // CHECK-LABEL: func @matmul_tensors( // CHECK-SAME:%[[TA:[0-9a-z]+]]: tensor ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 52e2552 - [mlir][Linalg] Fix incorrect erase order
Author: Nicolas Vasilache Date: 2021-01-25T14:04:06Z New Revision: 52e25523a98f1f6c0afeba7f29308b02761d8017 URL: https://github.com/llvm/llvm-project/commit/52e25523a98f1f6c0afeba7f29308b02761d8017 DIFF: https://github.com/llvm/llvm-project/commit/52e25523a98f1f6c0afeba7f29308b02761d8017.diff LOG: [mlir][Linalg] Fix incorrect erase order Added: Modified: mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp Removed: diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp index 9ca1f6da43f6..5c67c8e61829 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -595,11 +595,14 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(SimplePadOp &simplePadOp, b.create(loc, simplePadOp.getResultType(), packedTensor, offsets, sizes, strides) ->getResult(0)); - simplePadOp.erase(); + + Operation *toErase = simplePadOp; // Make the newly cloned `simplePadOp` available to the caller. simplePadOp = cast(bvm.lookup(simplePadOp.result()).getDefiningOp()); + toErase->erase(); + return success(); } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 05d5125 - [mlir] Generalize OpFoldResult usage in ops with offsets, sizes and operands.
Author: Nicolas Vasilache Date: 2021-01-25T14:17:03Z New Revision: 05d5125d8a9ffa458ea2deff90eb73473db0047e URL: https://github.com/llvm/llvm-project/commit/05d5125d8a9ffa458ea2deff90eb73473db0047e DIFF: https://github.com/llvm/llvm-project/commit/05d5125d8a9ffa458ea2deff90eb73473db0047e.diff LOG: [mlir] Generalize OpFoldResult usage in ops with offsets, sizes and operands. This revision starts evolving the APIs to manipulate ops with offsets, sizes and operands towards a ValueOrAttr abstraction that is already used in folding under the name OpFoldResult. The objective, in the future, is to allow such manipulations all the way to the level of ODS to avoid all the genuflexions involved in distinguishing between values and attributes for generic constant foldings. Once this evolution is accepted, the next step will be a mechanical OpFoldResult -> ValueOrAttr. Differential Revision: https://reviews.llvm.org/D95310 Added: Modified: mlir/include/mlir/Dialect/StandardOps/IR/Ops.td mlir/include/mlir/IR/OpDefinition.h mlir/include/mlir/Interfaces/ViewLikeInterface.td mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp mlir/lib/Dialect/StandardOps/IR/Ops.cpp mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp mlir/lib/Dialect/Vector/VectorTransforms.cpp mlir/test/Dialect/Linalg/fusion-sequence.mlir mlir/test/Dialect/Linalg/promote.mlir mlir/test/Dialect/Linalg/transform-patterns.mlir mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir Removed: diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 1c21b1639b7e..08f2174886f7 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1959,14 +1959,19 @@ def MemRefReinterpretCastOp: let builders = [ // Build a ReinterpretCastOp with mixed static and dynamic entries. OpBuilderDAG<(ins "MemRefType":$resultType, "Value":$source, - "int64_t":$staticOffset, "ArrayRef":$staticSizes, - "ArrayRef":$staticStrides, "ValueRange":$offset, - "ValueRange":$sizes, "ValueRange":$strides, + "OpFoldResult":$offset, "ArrayRef":$sizes, + "ArrayRef":$strides, CArg<"ArrayRef", "{}">:$attrs)>, -// Build a ReinterpretCastOp with all dynamic entries. +// Build a ReinterpretCastOp with static entries. OpBuilderDAG<(ins "MemRefType":$resultType, "Value":$source, - "Value":$offset, "ValueRange":$sizes, "ValueRange":$strides, + "int64_t":$offset, "ArrayRef":$sizes, + "ArrayRef":$strides, CArg<"ArrayRef", "{}">:$attrs)>, +// Build a ReinterpretCastOp with dynamic entries. +OpBuilderDAG<(ins "MemRefType":$resultType, "Value":$source, + "Value":$offset, "ValueRange":$sizes, + "ValueRange":$strides, + CArg<"ArrayRef", "{}">:$attrs)> ]; let extraClassDeclaration = extraBaseClassDeclaration # [{ @@ -2927,23 +2932,33 @@ def SubViewOp : BaseOpWithOffsetSizesAndStrides< let results = (outs AnyMemRef:$result); let builders = [ -// Build a SubViewOp with mixed static and dynamic entries. -OpBuilderDAG<(ins "Value":$source, "ArrayRef":$staticOffsets, - "ArrayRef":$staticSizes, "ArrayRef":$staticStrides, - "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides, +// Build a SubViewOp with mixed static and dynamic entries and custom +// result type. If the type passed is nullptr, it is inferred. +OpBuilderDAG<(ins "Value":$source, "ArrayRef":$offsets, + "ArrayRef":$sizes, "ArrayRef":$strides, CArg<"ArrayRef", "{}">:$attrs)>, -// Build a SubViewOp with all dynamic entries. -OpBuilderDAG<(ins "Value":$source, "ValueRange":$offsets, - "ValueRange":$sizes, "ValueRange":$strides, +// Build a SubViewOp with mixed static and dynamic entries and inferred +// result type. +OpBuilderDAG<(ins "MemRefType":$resultType, "Value":$source, + "ArrayRef":$offsets, "ArrayRef":$sizes, + "ArrayRef":$strides, CArg<"ArrayRef", "{}">:$attrs)>, -// Build a SubViewOp with mixed static and dynamic entries -// and custom result type. +// Build a SubViewOp with static entries and custom result type. If the +// type passed is nullptr, it is inferred. +OpBuilderDAG<(ins "Value":$source, "ArrayRef":$offsets, + "ArrayRef":$sizes, "ArrayRef":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, +// Build a SubViewOp with static entries and inferred result type. OpBuilderDAG<(ins "MemRefType":$resultType, "Value":$source, - "ArrayRef":$staticOffsets, "ArrayRef":$staticSizes, - "ArrayRef":$staticStrides, "
[llvm-branch-commits] [mlir] 93a873d - [mlir][Affine] Revisit and simplify composeAffineMapAndOperands.
Author: Nicolas Vasilache Date: 2021-01-19T13:52:07Z New Revision: 93a873dfc9ee7e8b4386dea87e43c5f238eeef06 URL: https://github.com/llvm/llvm-project/commit/93a873dfc9ee7e8b4386dea87e43c5f238eeef06 DIFF: https://github.com/llvm/llvm-project/commit/93a873dfc9ee7e8b4386dea87e43c5f238eeef06.diff LOG: [mlir][Affine] Revisit and simplify composeAffineMapAndOperands. In prehistorical times, AffineApplyOp was allowed to produce multiple values. This allowed the creation of intricate SSA use-def chains. AffineApplyNormalizer was originally introduced as a means of reusing the AffineMap::compose method to write SSA use-def chains. Unfortunately, symbols that were produced by an AffineApplyOp needed to be promoted to dims and reordered for the mathematical composition to be valid. Since then, single result AffineApplyOp became the law of the land but the original assumptions were not revisited. This revision revisits these assumptions and retires AffineApplyNormalizer. Differential Revision: https://reviews.llvm.org/D94920 Added: Modified: mlir/include/mlir/Dialect/Affine/IR/AffineOps.h mlir/include/mlir/IR/AffineExpr.h mlir/include/mlir/IR/AffineMap.h mlir/lib/Dialect/Affine/IR/AffineOps.cpp mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp mlir/lib/IR/AffineExpr.cpp mlir/lib/IR/AffineMap.cpp mlir/test/Dialect/Affine/affine-data-copy.mlir mlir/test/Dialect/Affine/canonicalize.mlir mlir/test/Dialect/Linalg/reshape_fusion.mlir mlir/test/EDSC/builder-api-test.cpp mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp Removed: mlir/test/Dialect/Affine/SuperVectorize/normalize_maps.mlir diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h index b097f18e8cea..9b30c9b160b7 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h @@ -439,81 +439,6 @@ class AffineBound { friend class AffineForOp; }; -/// An `AffineApplyNormalizer` is a helper class that supports renumbering -/// operands of AffineApplyOp. This acts as a reindexing map of Value to -/// positional dims or symbols and allows simplifications such as: -/// -/// ```mlir -///%1 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0) -/// ``` -/// -/// into: -/// -/// ```mlir -///%1 = affine.apply () -> (0) -/// ``` -struct AffineApplyNormalizer { - AffineApplyNormalizer(AffineMap map, ArrayRef operands); - - /// Returns the AffineMap resulting from normalization. - AffineMap getAffineMap() { return affineMap; } - - SmallVector getOperands() { -SmallVector res(reorderedDims); -res.append(concatenatedSymbols.begin(), concatenatedSymbols.end()); -return res; - } - - unsigned getNumSymbols() { return concatenatedSymbols.size(); } - unsigned getNumDims() { return reorderedDims.size(); } - - /// Normalizes 'otherMap' and its operands 'otherOperands' to map to this - /// normalizer's coordinate space. - void normalize(AffineMap *otherMap, SmallVectorImpl *otherOperands); - -private: - /// Helper function to insert `v` into the coordinate system of the current - /// AffineApplyNormalizer. Returns the AffineDimExpr with the corresponding - /// renumbered position. - AffineDimExpr renumberOneDim(Value v); - - /// Given an `other` normalizer, this rewrites `other.affineMap` in the - /// coordinate system of the current AffineApplyNormalizer. - /// Returns the rewritten AffineMap and updates the dims and symbols of - /// `this`. - AffineMap renumber(const AffineApplyNormalizer &other); - - /// Maps of Value to position in `affineMap`. - DenseMap dimValueToPosition; - - /// Ordered dims and symbols matching positional dims and symbols in - /// `affineMap`. - SmallVector reorderedDims; - SmallVector concatenatedSymbols; - - /// The number of symbols in concatenated symbols that belong to the original - /// map as opposed to those concatendated during map composition. - unsigned numProperSymbols; - - AffineMap affineMap; - - /// Used with RAII to control the depth at which AffineApply are composed - /// recursively. Only accepts depth 1 for now to allow a behavior where a - /// newly composed AffineApplyOp does not increase the length of the chain of - /// AffineApplyOps. Full composition is implemented iteratively on top of - /// this behavior. - static unsigned &affineApplyDepth() { -static thread_local unsigned depth = 0; -return depth; - } - static constexpr unsigned kMaxAffineApplyDepth = 1; - - AffineApplyNormalizer() : numProperSymbols(0) { affineApplyDepth()++; } - -public: - ~AffineApplyNormalizer() { affineApplyDepth()--; } -}; - } // end namespace mlir #endif diff --git a/mlir/include/mlir/IR/AffineExpr.h b/mlir/include/mlir/IR/AffineExpr.h index d4f7de501a95..3e4e1c014b58 100644 --- a/mlir/include/mlir/IR/AffineExpr.h +++ b/mlir/i
[llvm-branch-commits] [mlir] f5d8eb0 - [mlir][Linalg] NFC - getAssumedNonShapedOperands now returns OperandRange
Author: Nicolas Vasilache Date: 2021-01-20T19:23:26Z New Revision: f5d8eb085af97c6d873edf3ca16d85b8a97c67e6 URL: https://github.com/llvm/llvm-project/commit/f5d8eb085af97c6d873edf3ca16d85b8a97c67e6 DIFF: https://github.com/llvm/llvm-project/commit/f5d8eb085af97c6d873edf3ca16d85b8a97c67e6.diff LOG: [mlir][Linalg] NFC - getAssumedNonShapedOperands now returns OperandRange Also adds a isInput interface method. Added: Modified: mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td Removed: diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index 85133604cda0..b8009a818aa0 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -609,6 +609,22 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { return payloadUsesValueFromOpOperand(&getOutputOpOperands()[index]); }] >, +InterfaceMethod< + /*desc=*/[{ +Return true if `opOperand` is an input tensor. + }], + /*retTy=*/"bool", + /*methodName=*/"isInputTensor", + /*args=*/(ins "OpOperand *":$opOperand), + /*methodBody=*/"", + /*defaultImplementation=*/[{ +if (!opOperand->get().getType().template isa()) + return false; +if (opOperand->getOperandNumber() < $_op.getNumInputs()) + return true; +return false; + }] +>, InterfaceMethod< /*desc=*/[{ Return true if `opOperand` is an init tensor. This is true when it is @@ -1063,18 +1079,13 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /// init_tensors operands. Asserts that these operands are value types to /// allow transformations like tiling to just use the values when cloning /// `linalgOp`. -SmallVector getAssumedNonShapedOperands() { - unsigned numShapedOperands = getNumShapedOperands(); - unsigned nExtraOperands = -getOperation()->getNumOperands() - numShapedOperands; - SmallVector res; - res.reserve(nExtraOperands); - for (unsigned i = 0; i < nExtraOperands; ++i) { -res.push_back(getOperation()->getOperand(numShapedOperands + i)); -assert((res.back().getType().isSignlessIntOrIndexOrFloat() -|| res.back().getType().template isa()) && - "expected scalar or vector type"); - } +Operation::operand_range getAssumedNonShapedOperands() { + Operation::operand_range res{ +getOperation()->getOperands().begin() + getNumShapedOperands(), +getOperation()->getOperands().end()}; + for (Type t : TypeRange{res}) +assert((t.isSignlessIntOrIndexOrFloat() || t.template isa()) + &&"expected scalar or vector type"); return res; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] c075572 - [mlir][Linalg] NFC - Expose getSmallestBoundingIndex as an utility function
Author: Nicolas Vasilache Date: 2021-01-20T19:53:09Z New Revision: c075572646a9bd71ac675e20f3d75101ae7dd090 URL: https://github.com/llvm/llvm-project/commit/c075572646a9bd71ac675e20f3d75101ae7dd090 DIFF: https://github.com/llvm/llvm-project/commit/c075572646a9bd71ac675e20f3d75101ae7dd090.diff LOG: [mlir][Linalg] NFC - Expose getSmallestBoundingIndex as an utility function Added: Modified: mlir/include/mlir/Dialect/Linalg/Utils/Utils.h mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp mlir/lib/Dialect/Linalg/Utils/Utils.cpp Removed: diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index f194209f1910..1a7dc939435e 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -142,6 +142,12 @@ void applyPermutationToVector(SmallVector &inVec, inVec = auxVec; } +/// If `size` comes from an AffineMinOp and one of the values of AffineMinOp +/// is a constant then return a new value set to the smallest such constant. +/// If `size` comes from a ConstantOp, return the constant. +/// Otherwise return nullptr. +IntegerAttr getSmallestBoundingIndex(Value size); + /// Scheme used to distribute loops to processors. enum class DistributionMethod { /// Cyclic distribution where no assumption is made about the dynamic diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index 329cc88bd2ae..4d314c32657a 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -44,27 +44,6 @@ using folded_std_view = FoldedValueBuilder; #define DEBUG_TYPE "linalg-promotion" -/// If `size` comes from an AffineMinOp and one of the values of AffineMinOp -/// is a constant then return a new value set to the smallest such constant. -/// Otherwise return size. -static Value extractSmallestConstantBoundingSize(OpBuilder &b, Location loc, - Value size) { - Optional boundingConst = {}; - if (auto affineMinOp = size.getDefiningOp()) { -for (auto e : affineMinOp.getAffineMap().getResults()) - if (auto cst = e.dyn_cast()) -boundingConst = boundingConst -? std::min(boundingConst.getValue(), cst.getValue()) -: cst.getValue(); - } else if (auto constIndexOp = size.getDefiningOp()) { -if (constIndexOp.getType().isa()) - boundingConst = constIndexOp.value().cast().getInt(); - } - return boundingConst && *boundingConst >= 0 - ? b.create(loc, *boundingConst) - : size; -} - /// Alloc a new buffer of `size`. If `dynamicBuffers` is true allocate exactly /// the size needed, otherwise try to allocate a static bounding box. static Value allocBuffer(const LinalgPromotionOptions &options, @@ -242,7 +221,9 @@ Optional mlir::linalg::promoteSubviewAsNewBuffer( auto rangeValue = en.value(); // Try to extract a tight constant. LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n"); -Value size = extractSmallestConstantBoundingSize(b, loc, rangeValue.size); +IntegerAttr sizeAttr = getSmallestBoundingIndex(rangeValue.size); +Value size = +(!sizeAttr) ? rangeValue.size : b.create(loc, sizeAttr); LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n"); fullSizes.push_back(size); partialSizes.push_back(folded_std_dim(folder, subView, en.index())); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 81bfbc6ecf52..32b32be066cb 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -115,6 +115,31 @@ Optional> getStaticLoopRanges(LinalgOp linalgOp) { return invertedMap.compose(viewSizes); } +/// If `size` comes from an AffineMinOp and one of the values of AffineMinOp +/// is a constant then return a new value set to the smallest such constant. +/// Otherwise returngetSmallestBoundingIndex nullptr. +IntegerAttr getSmallestBoundingIndex(Value size) { + Optional boundingConst = {}; + if (auto affineMinOp = size.getDefiningOp()) { +for (auto e : affineMinOp.getAffineMap().getResults()) + if (auto cst = e.dyn_cast()) +boundingConst = boundingConst +? std::min(boundingConst.getValue(), cst.getValue()) +: cst.getValue(); + } else if (auto constIndexOp = size.getDefiningOp()) { +if (constIndexOp.getType().isa()) + boundingConst = constIndexOp.value().cast().getInt(); + } else if (auto affineApplyOp = size.getDefiningOp()) { +if (auto cExpr = affineApplyOp.getAffineMap() + .getResult(0) + .dyn_cast()) + boundingConst = cExpr.getValue();
[llvm-branch-commits] [mlir] 866cb26 - [mlir] Fix SubTensorInsertOp semantics
Author: Nicolas Vasilache Date: 2021-01-20T20:16:01Z New Revision: 866cb26039043581d5ab8b30d5a999a7c273f361 URL: https://github.com/llvm/llvm-project/commit/866cb26039043581d5ab8b30d5a999a7c273f361 DIFF: https://github.com/llvm/llvm-project/commit/866cb26039043581d5ab8b30d5a999a7c273f361.diff LOG: [mlir] Fix SubTensorInsertOp semantics Like SubView, SubTensor/SubTensorInsertOp are allowed to have rank-reducing/expanding semantics. In the case of SubTensorInsertOp , the rank of offsets/sizes/strides should be the rank of the destination tensor. Also, add a builder flavor for SubTensorOp to return a rank-reduced tensor. Differential Revision: https://reviews.llvm.org/D95076 Added: Modified: mlir/include/mlir/Dialect/StandardOps/IR/Ops.td mlir/lib/Dialect/StandardOps/IR/Ops.cpp mlir/test/IR/core-ops.mlir Removed: diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 770e68f6da835..6dbb24a4358f8 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -3244,6 +3244,17 @@ def SubTensorOp : BaseOpWithOffsetSizesAndStrides< // Build a SubTensorOp with all dynamic entries. OpBuilderDAG<(ins "Value":$source, "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, +// Build a SubTensorOp with mixed static and dynamic entries +// and custom result type. +OpBuilderDAG<(ins "RankedTensorType":$resultType, "Value":$source, + "ArrayRef":$staticOffsets, "ArrayRef":$staticSizes, + "ArrayRef":$staticStrides, "ValueRange":$offsets, + "ValueRange":$sizes, "ValueRange":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, +// Build a SubTensorOp with all dynamic entries and custom result type. +OpBuilderDAG<(ins "RankedTensorType":$resultType, "Value":$source, + "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides, CArg<"ArrayRef", "{}">:$attrs)> ]; @@ -3349,7 +3360,7 @@ def SubTensorInsertOp : BaseOpWithOffsetSizesAndStrides< return source().getType().cast(); } -/// The result of a subtensor is always a tensor. +/// The result of a subtensor_insert is always a tensor. RankedTensorType getType() { return getResult().getType().cast(); } @@ -3357,7 +3368,7 @@ def SubTensorInsertOp : BaseOpWithOffsetSizesAndStrides< /// Return the expected rank of each of the`static_offsets`, `static_sizes` /// and `static_strides` attributes. std::array getArrayAttrRanks() { - unsigned rank = getSourceType().getRank(); + unsigned rank = getType().getRank(); return {rank, rank, rank}; } diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 1718ab14d5d12..428006e20d9f9 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -2892,12 +2892,11 @@ void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, ArrayRef attrs) { auto sourceMemRefType = source.getType().cast(); unsigned rank = sourceMemRefType.getRank(); - SmallVector staticOffsetsVector; - staticOffsetsVector.assign(rank, ShapedType::kDynamicStrideOrOffset); - SmallVector staticSizesVector; - staticSizesVector.assign(rank, ShapedType::kDynamicSize); - SmallVector staticStridesVector; - staticStridesVector.assign(rank, ShapedType::kDynamicStrideOrOffset); + SmallVector staticOffsetsVector( + rank, ShapedType::kDynamicStrideOrOffset); + SmallVector staticSizesVector(rank, ShapedType::kDynamicSize); + SmallVector staticStridesVector( + rank, ShapedType::kDynamicStrideOrOffset); build(b, result, resultType, source, staticOffsetsVector, staticSizesVector, staticStridesVector, offsets, sizes, strides, attrs); } @@ -3444,6 +3443,38 @@ void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result, staticStridesVector, offsets, sizes, strides, attrs); } +/// Build a SubTensorOp as above but with custom result type. +void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result, + RankedTensorType resultType, Value source, + ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides, + ValueRange offsets, ValueRange sizes, + ValueRange strides, + ArrayRef attrs) { + build(b, result, resultType, source, offsets, sizes, strides, +b.getI64ArrayAttr(staticOffsets), b.getI64ArrayAttr(staticSizes), +b.getI64ArrayAttr(staticStrides)); + result.addAttributes(attrs); +} + +/// Build a SubTensorOp as above but with custom result type. +void ml
[llvm-branch-commits] [mlir] 8dd58a5 - [mlir][Linalg] NFC - Fully compose map and operands when creating AffineMin in tiling.
Author: Nicolas Vasilache Date: 2021-01-20T20:36:18Z New Revision: 8dd58a509cc8b93a211c9b07b12e1548dc187fc3 URL: https://github.com/llvm/llvm-project/commit/8dd58a509cc8b93a211c9b07b12e1548dc187fc3 DIFF: https://github.com/llvm/llvm-project/commit/8dd58a509cc8b93a211c9b07b12e1548dc187fc3.diff LOG: [mlir][Linalg] NFC - Fully compose map and operands when creating AffineMin in tiling. This may simplify the composition of patterns but is otherwise NFC. Added: Modified: mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp Removed: diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index eb8c9bb6a6fc..62a5d325ddcf 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -34,7 +34,6 @@ using namespace mlir::edsc::intrinsics; using namespace mlir::linalg; using namespace mlir::scf; -using folded_affine_min = FoldedValueBuilder; #define DEBUG_TYPE "linalg-tiling" @@ -292,8 +291,9 @@ makeTiledShapes(OpBuilder &b, Location loc, LinalgOp linalgOp, getAffineDimExpr(/*position=*/2, b.getContext())}, b.getContext()); auto d = std_dim(shapedOp, r); -size = -affine_min(b.getIndexType(), minMap, ValueRange{size, d, offset}); +SmallVector operands{size, d, offset}; +fullyComposeAffineMapAndOperands(&minMap, &operands); +size = affine_min(b.getIndexType(), minMap, operands); } sizes.push_back(size); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 555a395 - [mlir] NFC - Fix unused variable in non-debug mode
Author: Nicolas Vasilache Date: 2021-01-20T22:20:38Z New Revision: 555a395f2ccd531159538668fa36c7dc63ecff60 URL: https://github.com/llvm/llvm-project/commit/555a395f2ccd531159538668fa36c7dc63ecff60 DIFF: https://github.com/llvm/llvm-project/commit/555a395f2ccd531159538668fa36c7dc63ecff60.diff LOG: [mlir] NFC - Fix unused variable in non-debug mode Added: Modified: mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td Removed: diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index b8009a818aa0..7f3839a02b2f 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -1083,9 +1083,11 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { Operation::operand_range res{ getOperation()->getOperands().begin() + getNumShapedOperands(), getOperation()->getOperands().end()}; - for (Type t : TypeRange{res}) + for (Type t : TypeRange{res}) { +(void)t; assert((t.isSignlessIntOrIndexOrFloat() || t.template isa()) &&"expected scalar or vector type"); + } return res; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] ed507bc - [mlir] NFC - Fix SubViewOp printing
Author: Nicolas Vasilache Date: 2020-12-30T16:34:37Z New Revision: ed507bc4d5eee48560d28089ab6e31d91ef3fd88 URL: https://github.com/llvm/llvm-project/commit/ed507bc4d5eee48560d28089ab6e31d91ef3fd88 DIFF: https://github.com/llvm/llvm-project/commit/ed507bc4d5eee48560d28089ab6e31d91ef3fd88.diff LOG: [mlir] NFC - Fix SubViewOp printing Avoid casting the source operand type allows better debugging when conversion patterns fail to produce a proper MemRefType. Added: Modified: mlir/lib/Dialect/StandardOps/IR/Ops.cpp Removed: diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 30bf546807c4..c73a9a41719c 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -3053,7 +3053,7 @@ static void print(OpAsmPrinter &p, SubViewOp op) { p << op->getName().getStringRef().drop_front(stdDotLen) << ' '; p << op.source(); printOffsetsSizesAndStrides(p, op); - p << " : " << op.getSourceType() << " to " << op.getType(); + p << " : " << op.source().getType() << " to " << op.getType(); } /// Parse a subview op of the form: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 9b5a3d6 - [mlir] Fix indexing of first offset operand in ops that implement OffsetSizeAndStrideOpInterface
Author: Nicolas Vasilache Date: 2020-12-30T16:44:26Z New Revision: 9b5a3d67b496ce92729c61b7b2a99e8dc8d39ca3 URL: https://github.com/llvm/llvm-project/commit/9b5a3d67b496ce92729c61b7b2a99e8dc8d39ca3 DIFF: https://github.com/llvm/llvm-project/commit/9b5a3d67b496ce92729c61b7b2a99e8dc8d39ca3.diff LOG: [mlir] Fix indexing of first offset operand in ops that implement OffsetSizeAndStrideOpInterface OffsetSizeAndStrideOpInterface ops may have a varying number of operands before the first offset operand. This revision adds a method that such ops much implement to properly specify the position of the first offset operand. Differential Revision: https://reviews.llvm.org/D93947 Added: Modified: mlir/include/mlir/Dialect/StandardOps/IR/Ops.td mlir/include/mlir/Interfaces/ViewLikeInterface.td Removed: diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index ba78db68214f..1f7b888167cd 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -2177,6 +2177,10 @@ def MemRefReinterpretCastOp: unsigned resultRank = getResult().getType().cast().getRank(); return {1, resultRank, resultRank}; } + +/// Return the number of leading operands before the `offsets`, `sizes` and +/// and `strides` operands. +static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; } }]; } @@ -3031,7 +3035,8 @@ def SubIOp : IntArithmeticOp<"subi"> { //===--===// def SubViewOp : BaseOpWithOffsetSizesAndStrides< -"subview", [DeclareOpInterfaceMethods, OffsetSizeAndStrideOpInterface] > { +"subview", [DeclareOpInterfaceMethods, +OffsetSizeAndStrideOpInterface] > { let summary = "memref subview operation"; let description = [{ The "subview" operation converts a memref type to another memref type @@ -3217,6 +3222,10 @@ def SubViewOp : BaseOpWithOffsetSizesAndStrides< unsigned rank = getSourceType().getRank(); return {rank, rank, rank}; } + +/// Return the number of leading operands before the `offsets`, `sizes` and +/// and `strides` operands. +static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; } }]; let hasCanonicalizer = 1; @@ -3227,7 +3236,8 @@ def SubViewOp : BaseOpWithOffsetSizesAndStrides< // SubTensorOp //===--===// -def SubTensorOp : BaseOpWithOffsetSizesAndStrides<"subtensor", [OffsetSizeAndStrideOpInterface]> { +def SubTensorOp : BaseOpWithOffsetSizesAndStrides< +"subtensor", [OffsetSizeAndStrideOpInterface]> { let summary = "subtensor operation"; let description = [{ The "subtensor" operation extract a tensor from another tensor as @@ -3279,12 +3289,12 @@ def SubTensorOp : BaseOpWithOffsetSizesAndStrides<"subtensor", [OffsetSizeAndStr let results = (outs AnyRankedTensor:$result); let builders = [ -// Build a SubViewOp with mixed static and dynamic entries. +// Build a SubTensorOp with mixed static and dynamic entries. OpBuilderDAG<(ins "Value":$source, "ArrayRef":$staticOffsets, "ArrayRef":$staticSizes, "ArrayRef":$staticStrides, "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides, CArg<"ArrayRef", "{}">:$attrs)>, -// Build a SubViewOp with all dynamic entries. +// Build a SubTensorOp with all dynamic entries. OpBuilderDAG<(ins "Value":$source, "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides, CArg<"ArrayRef", "{}">:$attrs)> @@ -3315,6 +3325,10 @@ def SubTensorOp : BaseOpWithOffsetSizesAndStrides<"subtensor", [OffsetSizeAndStr unsigned rank = getSourceType().getRank(); return {rank, rank, rank}; } + +/// Return the number of leading operands before the `offsets`, `sizes` and +/// and `strides` operands. +static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; } }]; let hasCanonicalizer = 1; @@ -3324,7 +3338,8 @@ def SubTensorOp : BaseOpWithOffsetSizesAndStrides<"subtensor", [OffsetSizeAndStr // SubTensorInsertOp //===--===// -def SubTensorInsertOp : BaseOpWithOffsetSizesAndStrides<"subtensor_insert", [OffsetSizeAndStrideOpInterface]> { +def SubTensorInsertOp : BaseOpWithOffsetSizesAndStrides< +"subtensor_insert", [OffsetSizeAndStrideOpInterface]> { let summary = "subtensor_insert operation"; let description = [{ The "subtensor_insert" operation insert a tensor `source` into another @@ -3369,13 +3384,13 @@ def SubTensorInsertOp : BaseOpWithOffsetSizesAndStrides<"subtensor_insert", [Off let results = (outs AnyRankedTensor:$result); let builders = [ -// Build
[llvm-branch-commits] [mlir] a922486 - [mlir] NFC - Drop spurious assertion on symbols during `promoteComposedSymbolsAsDims`
Author: Nicolas Vasilache Date: 2021-01-10T14:02:16Z New Revision: a92248600ec4acba00db566a54c8ce53de807e3c URL: https://github.com/llvm/llvm-project/commit/a92248600ec4acba00db566a54c8ce53de807e3c DIFF: https://github.com/llvm/llvm-project/commit/a92248600ec4acba00db566a54c8ce53de807e3c.diff LOG: [mlir] NFC - Drop spurious assertion on symbols during `promoteComposedSymbolsAsDims` This assertion is an old remnant from earlier days when only affine functions existed. It is not the place of affine map composition to check whether orthogonal considerations on what is allowed to be a symbol under the AffineScope trait. Added: Modified: mlir/lib/Dialect/Affine/IR/AffineOps.cpp Removed: diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index 3fd1b62a5d2d..2e75cd07ed83 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -635,12 +635,6 @@ static AffineMap promoteComposedSymbolsAsDims(AffineMap map, return map; } - // Sanity check on symbols. - for (auto sym : symbols) { -assert(isValidSymbol(sym) && "Expected only valid symbols"); -(void)sym; - } - // Extract the symbol positions that come from an AffineApplyOp and // needs to be rewritten as dims. auto symPositions = indicesFromAffineApplyOp(symbols); @@ -2401,8 +2395,7 @@ LogicalResult AffineStoreOp::fold(ArrayRef cstOperands, // AffineMinMaxOpBase //===--===// -template -static LogicalResult verifyAffineMinMaxOp(T op) { +template static LogicalResult verifyAffineMinMaxOp(T op) { // Verify that operand count matches affine map dimension and symbol count. if (op.getNumOperands() != op.map().getNumDims() + op.map().getNumSymbols()) return op.emitOpError( @@ -2410,8 +2403,7 @@ static LogicalResult verifyAffineMinMaxOp(T op) { return success(); } -template -static void printAffineMinMaxOp(OpAsmPrinter &p, T op) { +template static void printAffineMinMaxOp(OpAsmPrinter &p, T op) { p << op.getOperationName() << ' ' << op->getAttr(T::getMapAttrName()); auto operands = op.getOperands(); unsigned numDims = op.map().getNumDims(); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 80f0785 - [mlir][Linalg] NFC - Refactor fusion APIs
Author: Nicolas Vasilache Date: 2021-01-12T14:27:15Z New Revision: 80f078548868d0dd3d74ab8a1deb8aa46870cdf3 URL: https://github.com/llvm/llvm-project/commit/80f078548868d0dd3d74ab8a1deb8aa46870cdf3 DIFF: https://github.com/llvm/llvm-project/commit/80f078548868d0dd3d74ab8a1deb8aa46870cdf3.diff LOG: [mlir][Linalg] NFC - Refactor fusion APIs This revision uniformizes fusion APIs to allow passing OpOperand, OpResult and adds a finer level of control fusion. Differential Revision: https://reviews.llvm.org/D94493 Added: Modified: mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h mlir/include/mlir/Dialect/Linalg/Utils/Utils.h mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp Removed: diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index 3fc3fa4a5556..f3b7181d71a5 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -726,6 +726,18 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { getNumShapedOperands()); }] >, +InterfaceMethod< + /*desc=*/[{ +Return the OpOperands for all the shaped operands. + }], + /*retTy=*/" OpOperand&", + /*methodName=*/"getShapedOpOperand", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ +return *(this->getShapedOpOperands().begin() + i); + }] +>, InterfaceMethod< /*desc=*/[{ Return the range over input and output operands. diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index d816414ef8b4..de1658f96a87 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -35,6 +35,7 @@ struct TiledLinalgOp { LinalgOp op; SmallVector loops; SmallVector tensorResults; + TiledLinalgOp &operator=(const TiledLinalgOp &) = default; }; /// Populates patterns for vectorization of all ConvN-D ops. @@ -412,9 +413,8 @@ struct LinalgBaseTilingPattern : public RewritePattern { LinalgTilingOptions options, LinalgMarker marker = LinalgMarker(), PatternBenefit benefit = 1); - LogicalResult - matchAndRewriteBase(Operation *op, PatternRewriter &rewriter, - SmallVectorImpl &tensorResults) const; + LogicalResult matchAndRewriteBase(Operation *op, PatternRewriter &rewriter, +TiledLinalgOp &result) const; private: /// LinalgTransformMarker handles special attribute manipulations. @@ -432,14 +432,14 @@ struct LinalgTilingPattern : public LinalgBaseTilingPattern { marker, benefit) {} LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override { -SmallVector tensorResults; +TiledLinalgOp tiledLinalgOp; if (failed(LinalgBaseTilingPattern::matchAndRewriteBase(op, rewriter, -tensorResults))) +tiledLinalgOp))) return failure(); -if (tensorResults.empty()) +if (tiledLinalgOp.tensorResults.empty()) rewriter.eraseOp(op); else - rewriter.replaceOp(op, tensorResults); + rewriter.replaceOp(op, tiledLinalgOp.tensorResults); return success(); } }; diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 2ef32cfe378b..f194209f1910 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -92,26 +92,31 @@ findAllFusableDependences(ArrayRef ops, /// Fuses producer into consumer if the producer is structurally feasible and /// the fusion would not violate dependencies. -/// Implements the fusion part of the "tileAndFuse on buffers" -/// transformation and thus requires the `consumerdIdx`^th operand of `consumer` -/// to be a `subview` op (generally obtained by applying the tiling -/// transformation). -Optional fuseProducerOfBuffer(OpBuilder &b, LinalgOp consumer, - unsigned consumerIdx, +/// Implements the fusion part of the "tileAndFuse on buffers" transformation +/// and thus requires the `consumerOpOperand` to be a `subview` op (generally +/// obtained by applying the ti
[llvm-branch-commits] [mlir] b88ed4e - [mlir][Linlag] Reflow Linalg.md - NFC
Author: Nicolas Vasilache Date: 2020-12-18T16:15:58Z New Revision: b88ed4ec8e7d35f786a59de527989316ba9c5f48 URL: https://github.com/llvm/llvm-project/commit/b88ed4ec8e7d35f786a59de527989316ba9c5f48 DIFF: https://github.com/llvm/llvm-project/commit/b88ed4ec8e7d35f786a59de527989316ba9c5f48.diff LOG: [mlir][Linlag] Reflow Linalg.md - NFC Markdown formatting seems to now be available, reflowing the doc without changing any content. Added: Modified: mlir/docs/Dialects/Linalg.md Removed: diff --git a/mlir/docs/Dialects/Linalg.md b/mlir/docs/Dialects/Linalg.md index c6681a93e53e..02508a81b63a 100644 --- a/mlir/docs/Dialects/Linalg.md +++ b/mlir/docs/Dialects/Linalg.md @@ -6,12 +6,12 @@ https://user-images.githubusercontent.com/10148468/73613629-c5586580-45c5-11ea-94b7-074aeea94c7b.png";> -Linalg is designed to solve the High-level Hierarchical Optimization -(HHO box) in MLIR and to interoperate nicely within a -*Mixture Of Expert Compilers* environment (i.e. the *CGSel* box). +Linalg is designed to solve the High-level Hierarchical Optimization (HHO box) +in MLIR and to interoperate nicely within a *Mixture Of Expert Compilers* +environment (i.e. the *CGSel* box). -The [Rationale Document](../Rationale/RationaleLinalgDialect.md) -goes into significantly more design and architectural decision details. +The [Rationale Document](../Rationale/RationaleLinalgDialect.md) goes into +significantly more design and architectural decision details. ## Set of Key Transformations @@ -20,51 +20,56 @@ Linalg. They are all implemented in terms of the properties of the `linalg.generic` OpInterface and avoid the pitfall of relying on hardcoded one-off op knowledge. -The textual form description of these transformations is left for future -work. Still, it is useful to at least the key transformations that are -performed on the Linalg IR and that have influenced its design: -1. Progressive Buffer Allocation. -1. Parametric Tiling. -1. Promotion to Temporary Buffer in Fast Memory. -1. Tiled Producer-Consumer Fusion with Parametric Tile-And-Fuse. -1. Map to Parallel and Reduction Loops and Hardware. -1. Vectorization: Rewrite in Vector Form. -1. Lower to Loops (Affine, Generic, and Parallel). -1. Lower to Library Calls or Special Instructions, Intrinsics or ISA. -1. Partially Lower to Iterations Over a Finer-Grained Linalg Op. +The textual form description of these transformations is left for future work. +Still, it is useful to at least the key transformations that are performed on +the Linalg IR and that have influenced its design: + +1. Progressive Buffer Allocation. +1. Parametric Tiling. +1. Promotion to Temporary Buffer in Fast Memory. +1. Tiled Producer-Consumer Fusion with Parametric Tile-And-Fuse. +1. Map to Parallel and Reduction Loops and Hardware. +1. Vectorization: Rewrite in Vector Form. +1. Lower to Loops (Affine, Generic, and Parallel). +1. Lower to Library Calls or Special Instructions, Intrinsics or ISA. +1. Partially Lower to Iterations Over a Finer-Grained Linalg Op. ## High-Level Description of Linalg Ops -Linalg takes at least some inspiration from all previously [listed prior -art](#prior_art). The design enables the definition of ***CustomOps*** with -generic properties that enable [key transformations](#key_transformations), -including lowering to scalar load/store and other operations or to external -library calls and intrinsics. + +Linalg takes at least some inspiration from all previously +[listed prior art](#prior_art). The design enables the definition of +***CustomOps*** with generic properties that enable +[key transformations](#key_transformations), including lowering to scalar +load/store and other operations or to external library calls and intrinsics. These ops can have ***either tensor or buffer operands***, subject to [conventions and limitations](#tensors_and_buffers). ### Payload-Carrying Ops -Linalg defines two payload carrying operations that implement the [structured ops]( -https://docs.google.com/presentation/d/1P-j1GrH6Q5gLBjao0afQ-GfvcAeF-QU4GXXeSy0eJ9I/edit#slide=id.p -) abstraction on tensors and buffers. This is architected as two generic operations -`linalg.generic` (resp. `linalg.indexed_generic`) that can express custom -operations with *index-free semantics* (resp. *indexing semantics*). -The properties of these generic ops are the result of applying the -guiding principles described in the [Rationale Document](../Rationale/RationaleLinalgDialect.md). -They are listed next, with a brief example and discussion for each. + +Linalg defines two payload carrying operations that implement the +[structured ops](https://docs.google.com/presentation/d/1P-j1GrH6Q5gLBjao0afQ-GfvcAeF-QU4GXXeSy0eJ9I/edit#slide=id.p) +abstraction on tensors and buffers. This is architected as two generic +operations `linalg.generic` (resp. `linalg.indexed_generic`)
[llvm-branch-commits] [mlir] 01c4418 - [mlir][Linalg] NFC - Factor out Linalg functionality for shape and loop bounds computation
Author: Nicolas Vasilache Date: 2020-11-23T10:17:18Z New Revision: 01c4418544b7934f8216a6616562bbaf34dc6979 URL: https://github.com/llvm/llvm-project/commit/01c4418544b7934f8216a6616562bbaf34dc6979 DIFF: https://github.com/llvm/llvm-project/commit/01c4418544b7934f8216a6616562bbaf34dc6979.diff LOG: [mlir][Linalg] NFC - Factor out Linalg functionality for shape and loop bounds computation This revision refactors code used in various Linalg transformations and makes it a first class citizen to the LinalgStructureOpInterface. This is in preparation to allowing more advanced Linalg behavior but is otherwise NFC. Differential revision: https://reviews.llvm.org/D91863 Added: Modified: mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h mlir/include/mlir/Dialect/Linalg/Utils/Utils.h mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp mlir/lib/Dialect/Linalg/Transforms/Loops.cpp mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp mlir/lib/Dialect/Linalg/Utils/Utils.cpp mlir/test/Dialect/Linalg/invalid.mlir Removed: diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h index 713fb192f073..f8002279132f 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h @@ -11,12 +11,13 @@ #include "mlir/Dialect/Linalg/IR/LinalgTraits.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Builders.h" -#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinDialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/StandardTypes.h" #include "mlir/IR/TypeUtilities.h" @@ -32,10 +33,29 @@ namespace mlir { namespace linalg { class ConvOp; +class LinalgOp; class PoolingMaxOp; class PoolingMinOp; class PoolingSumOp; +// TOFO: allow an extra ValueRange to specify an indexing and allow +// non-hyperrectangular shapes. +using LoopRangeBuilder = +std::function(OpBuilder &, Location)>; + +/// Returns the values obtained by applying `map` to the list of values. +SmallVector applyMapToValues(OpBuilder &b, Location loc, + AffineMap map, ValueRange values); + +/// Provide a very simple inference procedure to build the loop ranges from the +/// op and its operands. This only works with permutation affine maps and +/// patterns of the form `(m, n)[s] -> (m + n - s floordiv 2)`. +/// A more advanced Tensor-Comprehension like inference is possible but has +/// proven to be ambiguous in unfavorable case. +/// As a consequence, we relax the default behavior very conservatively and +/// provide an op-specified hook so that Linalg ops may override the behavior. +LoopRangeBuilder defaultLoopRangesBuilder(LinalgOp op); + using ReassociationIndices = SmallVector; using ReassociationExprs = SmallVector; diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index 0373bf3f6adf..6c7da083d7af 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -765,6 +765,59 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { }] >, + //===--===// +// Linalg generalization hooks. + //===--===// +InterfaceMethod< + /*desc=*/[{ +Hook to provide a custom AffineMap used to compute all the operand +subshapes given loop bounds. This is used to answer the question: "given +an iteration space over the codomain, what are the subshapes of the +operands involved in the computation". +The default behavior is to just concatenate all the indexing maps. +A custom AffineMap allows providing a map that can be used to +compute subshapes even in cases where the concatenation of indexing maps +(i.e. the data traversal order) is not a simple permutation of the loop +traversal order. It is then possible to define ops with skewed data +traversal order for which we can still easily compute hyperrectangular +loop bounds and subviews. + }], + /*retTy=*/"AffineMap", + /*methodName=*/"getLoopsToShapesMap", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ +auto r = $_op.indexing_maps().template
[llvm-branch-commits] [mlir] 9ac0b31 - [mlir][Linalg] Drop symbol_source abstraction which does not pay for itself.
Author: Nicolas Vasilache Date: 2020-11-23T12:43:02Z New Revision: 9ac0b314a431405aa6a681124f83138f7544807e URL: https://github.com/llvm/llvm-project/commit/9ac0b314a431405aa6a681124f83138f7544807e DIFF: https://github.com/llvm/llvm-project/commit/9ac0b314a431405aa6a681124f83138f7544807e.diff LOG: [mlir][Linalg] Drop symbol_source abstraction which does not pay for itself. Differential Revision: https://reviews.llvm.org/D91956 Added: Modified: mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td mlir/include/mlir/Dialect/Linalg/Passes.td mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h mlir/lib/Dialect/Linalg/EDSC/Builders.cpp mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp mlir/lib/Dialect/Linalg/Transforms/Loops.cpp mlir/test/Dialect/Linalg/invalid.mlir mlir/test/Dialect/Linalg/loops.mlir Removed: diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index aba4f9d61fe0..66f39104d7e7 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -503,9 +503,7 @@ class GenericOpBase : LinalgStructuredBase_Op:$doc, OptionalAttr:$library_call, // ArrayAttr of StrArrayAttr: - OptionalAttr:$sparse, - Confined, [IntMinValue<0>]> - :$symbol_source); + OptionalAttr:$sparse); let results = (outs Variadic:$result_tensors); let regions = (region AnyRegion:$region); let extraClassDeclaration = [{ @@ -513,18 +511,13 @@ class GenericOpBase : LinalgStructuredBase_Op{ getDocAttrName(), getIndexingMapsAttrName(), getLibraryCallAttrName(), -getIteratorTypesAttrName(), getSymbolSourceAttrName() +getIteratorTypesAttrName(), }; } std::string getLibraryCallName() { return library_call().hasValue() ? library_call()->str() : "op_has_no_registered_library_name"; } -llvm::Optional getSymbolSource() { - auto ss = symbol_source(); - return ss.hasValue() ? -llvm::Optional(ss.getValue()) : llvm::None; -} static std::function getRegionBuilder() { return nullptr; @@ -566,10 +559,6 @@ def GenericOp : GenericOpBase<"generic"> { parallel, reduction, window - sparse: an optional list with per-dimension sparsity annotations (either "D" for dense or "S" for sparse) for each input and output view. - - symbol_source: index of the operand whose dimensions will be propagated -as symbols to the indexing maps. When specified the number of symbols -in each of the indexing maps has to be either 0 or the rank of the -specified operand. Example: Defining a #matmul_trait attribute in MLIR can be done as follows: @@ -646,50 +635,17 @@ def GenericOp : GenericOpBase<"generic"> { Tensor values must be legalized by a buffer allocation pass before most transformations can be applied. Such legalizations move tensor return values into output buffer operands and updates the region arguments accordingly. - -The `symbol_source` attribute allows selecting a particular operand and -introducing symbols for each operand dimension. Such symbols can then be -used in the indexing maps. - -Example of 1D convolution with symbols: -```mlir -#conv_1d_accesses = [ - affine_map<(m, n)[dimN] -> (m + n - dimN floordiv 2)>, // in - affine_map<(m, n)[dimN] -> (n)>, // filter - affine_map<(m, n)[dimN] -> (m)> // out -] - -#conv_1d_trait = { - doc = "O(m) += I(m + n - size(n) floordiv 2) * K(n)", - indexing_maps = #conv_1d_accesses, - library_call = "linalg_conv_1d", - iterator_types = ["parallel", "parallel"], - symbol_source = 1 -} - -linalg.generic #conv_1d_trait - ins(%in, %filter : memref, memref) - outs(%out : memref) { - ^bb0(%a: f32, %b: f32, %c: f32) : -%d = mulf %a, %b : f32 -%e = addf %c, %d : f32 -linalg.yield %e : f32 -} -``` -where symbol s0 will be substituted with `dim %filter, %c0` i.e. the first -and only dimension of the second operand as specified by the symbol_source -attribute. }]; let builders = [ OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs, "ValueRange":$outputBuffers, "ValueRange":$initTensors, "ArrayRef":$indexingMaps, "ArrayRef":$iteratorTypes, - "StringRef":$doc, "StringRef":$libraryCall, "IntegerAttr":$symbolSource, + "StringRef":$doc, "StringRef":$libraryCall, CArg<"function_ref", "nullptr">)>, Op
[llvm-branch-commits] [mlir] 5073e7e - [mlir] Add mising dependency
Author: Nicolas Vasilache Date: 2020-11-23T20:36:50Z New Revision: 5073e7edb64b61d130f2a0eac4731a0585bad28f URL: https://github.com/llvm/llvm-project/commit/5073e7edb64b61d130f2a0eac4731a0585bad28f DIFF: https://github.com/llvm/llvm-project/commit/5073e7edb64b61d130f2a0eac4731a0585bad28f.diff LOG: [mlir] Add mising dependency Added: Modified: mlir/lib/Dialect/Linalg/IR/CMakeLists.txt Removed: diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt index 963260adad66..15e29d749e65 100644 --- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_dialect_library(MLIRLinalg MLIRLinalgStructuredOpsInterfaceIncGen LINK_LIBS PUBLIC + MLIRAffine MLIRIR MLIRSideEffectInterfaces MLIRViewLikeInterface ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 5dd5a08 - [mlir] Let ModuleTranslate propagate LLVM triple
Author: Nicolas Vasilache Date: 2020-11-27T08:01:44Z New Revision: 5dd5a083630c797c958b02acd381b9de2bf02c86 URL: https://github.com/llvm/llvm-project/commit/5dd5a083630c797c958b02acd381b9de2bf02c86 DIFF: https://github.com/llvm/llvm-project/commit/5dd5a083630c797c958b02acd381b9de2bf02c86.diff LOG: [mlir] Let ModuleTranslate propagate LLVM triple This adds LLVM triple propagation and updates the test that did not check it properly. Differential Revision: https://reviews.llvm.org/D92182 Added: Modified: mlir/lib/Target/LLVMIR/ModuleTranslation.cpp mlir/test/Target/llvmir.mlir Removed: diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index d333c2ff8390..4f21eac5965f 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -946,6 +946,9 @@ std::unique_ptr ModuleTranslation::prepareLLVMModule( if (auto dataLayoutAttr = m->getAttr(LLVM::LLVMDialect::getDataLayoutAttrName())) llvmModule->setDataLayout(dataLayoutAttr.cast().getValue()); + if (auto targetTripleAttr = + m->getAttr(LLVM::LLVMDialect::getTargetTripleAttrName())) + llvmModule->setTargetTriple(targetTripleAttr.cast().getValue()); // Inject declarations for `malloc` and `free` functions that can be used in // memref allocation/deallocation coming from standard ops lowering. diff --git a/mlir/test/Target/llvmir.mlir b/mlir/test/Target/llvmir.mlir index 8491e67fdfb5..9dc2fbfda028 100644 --- a/mlir/test/Target/llvmir.mlir +++ b/mlir/test/Target/llvmir.mlir @@ -1319,6 +1319,7 @@ module attributes {llvm.target_triple = "x86_64-pc-windows-msvc"} {} // - // CHECK-NOT: "CodeView", i32 1 +// CHECK: aarch64-linux-android module attributes {llvm.target_triple = "aarch64-linux-android"} {} // - ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 047400e - [mlir][LLVMIR] Add support for InlineAsmOp
Author: Nicolas Vasilache Date: 2020-11-30T08:32:02Z New Revision: 047400ed8204ebcc0b361ca9285b34ea91479b69 URL: https://github.com/llvm/llvm-project/commit/047400ed8204ebcc0b361ca9285b34ea91479b69 DIFF: https://github.com/llvm/llvm-project/commit/047400ed8204ebcc0b361ca9285b34ea91479b69.diff LOG: [mlir][LLVMIR] Add support for InlineAsmOp The InlineAsmOp mirrors the underlying LLVM semantics with a notable exception: the embedded `asm_string` is not allowed to define or reference any symbol or any global variable: only the operands of the op may be read, written, or referenced. Attempting to define or reference any symbol or any global behavior is considered undefined behavior at this time. The asm dialect syntax is currently specified with an integer (0 [default] for the "att dialect", 1 for the intel dialect) to circumvent the ODS limitation on string enums. Translation to LLVM is provided and raises the fact that the asm constraints string must be well-formed with respect to in/out operands. No check is performed on the asm_string. An InlineAsm instruction in LLVM is a special call operation to a function that is constructed on the fly. It does not fit the current model of MLIR calls with symbols. As a consequence, the current implementation constructs the function type in ModuleTranslation.cpp. This should be refactored in the future. The mlir-cpu-runner is augmented with the global initialization of the X86 asm parser to allow proper execution in JIT mode. Previously, only the X86 asm printer was initialized. Differential revision: https://reviews.llvm.org/D92166 Added: mlir/integration_test/Dialect/LLVMIR/CPU/X86/lit.local.cfg mlir/integration_test/Dialect/LLVMIR/CPU/X86/test-inline-asm.mlir Modified: mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp mlir/lib/Target/LLVMIR/ModuleTranslation.cpp mlir/test/Dialect/LLVMIR/roundtrip.mlir mlir/test/Target/llvmir.mlir mlir/tools/mlir-cpu-runner/CMakeLists.txt mlir/tools/mlir-cpu-runner/mlir-cpu-runner.cpp Removed: diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 542afaa83f5f..e4dee53560bd 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1191,4 +1191,48 @@ def LLVM_FenceOp : LLVM_ZeroResultOp<"fence", []> { let printer = [{ printFenceOp(p, *this); }]; let verifier = "return ::verify(*this);"; } + +def AsmATT : LLVM_EnumAttrCase< + /*string cppSym=*/"AD_ATT", /*string irSym=*/"att", + /*string llvmSym=*/"AD_ATT", /*int val=*/0>; +def AsmIntel : LLVM_EnumAttrCase< + /*string cppSym=*/"AD_Intel", /*string irSym=*/"intel", + /*string llvmSym=*/"AD_Intel", /*int val=*/1>; +def AsmATTOrIntel : LLVM_EnumAttr< + /*string name=*/"AsmDialect", + /*string llvmName=*/"::llvm::InlineAsm::AsmDialect", + /*string description=*/"ATT (0) or Intel (1) asm dialect", + /*list cases=*/[AsmATT, AsmIntel]> { + let cppNamespace = "::mlir::LLVM"; +} + +def LLVM_InlineAsmOp : LLVM_Op<"inline_asm", []> { + let description = [{ +The InlineAsmOp mirrors the underlying LLVM semantics with a notable +exception: the embedded `asm_string` is not allowed to define or reference +any symbol or any global variable: only the operands of the op may be read, +written, or referenced. +Attempting to define or reference any symbol or any global behavior is +considered undefined behavior at this time. + }]; + let arguments = ( +ins Variadic:$operands, +StrAttr:$asm_string, +StrAttr:$constraints, +UnitAttr:$has_side_effects, +UnitAttr:$is_align_stack, +OptionalAttr< + DefaultValuedAttr>:$asm_dialect); + + let results = (outs Optional:$res); + + let assemblyFormat = [{ +(`has_side_effects` $has_side_effects^)? +(`is_align_stack` $is_align_stack^)? +(`asm_dialect` `=` $asm_dialect^)? +attr-dict +$asm_string `,` $constraints +operands `:` functional-type(operands, results) + }]; +} #endif // LLVMIR_OPS diff --git a/mlir/integration_test/Dialect/LLVMIR/CPU/X86/lit.local.cfg b/mlir/integration_test/Dialect/LLVMIR/CPU/X86/lit.local.cfg new file mode 100644 index ..84776f850fcb --- /dev/null +++ b/mlir/integration_test/Dialect/LLVMIR/CPU/X86/lit.local.cfg @@ -0,0 +1,8 @@ +import platform + +if platform.machine() != 'x86_64': +config.unsupported = True + +# No JIT on win32. +if sys.platform == 'win32': +config.unsupported = True diff --git a/mlir/integration_test/Dialect/LLVMIR/CPU/X86/test-inline-asm.mlir b/mlir/integration_test/Dialect/LLVMIR/CPU/X86/test-inline-asm.mlir new file mode 100644 index ..a4c0efb7beed --- /dev/null +++ b/mlir/integration_test/Dialect/LLVMIR/CPU/X86/test-inline-asm.mlir @@ -0,0 +1,16 @@ +// RUN: mlir-cpu-runner %s
[llvm-branch-commits] [mlir] 78c7118 - [mlir] Make mlir-cpu-runner depend on native instead of X86
Author: Nicolas Vasilache Date: 2020-11-30T15:11:34Z New Revision: 78c71187465a8e877d2e07d462b45a19363fb782 URL: https://github.com/llvm/llvm-project/commit/78c71187465a8e877d2e07d462b45a19363fb782 DIFF: https://github.com/llvm/llvm-project/commit/78c71187465a8e877d2e07d462b45a19363fb782.diff LOG: [mlir] Make mlir-cpu-runner depend on native instead of X86 Added: Modified: mlir/tools/mlir-cpu-runner/CMakeLists.txt Removed: diff --git a/mlir/tools/mlir-cpu-runner/CMakeLists.txt b/mlir/tools/mlir-cpu-runner/CMakeLists.txt index 539f9914a91a..c749b8c40b09 100644 --- a/mlir/tools/mlir-cpu-runner/CMakeLists.txt +++ b/mlir/tools/mlir-cpu-runner/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS Core Support nativecodegen + native ) add_llvm_tool(mlir-cpu-runner @@ -11,8 +12,6 @@ llvm_update_compile_flags(mlir-cpu-runner) get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) target_link_libraries(mlir-cpu-runner PRIVATE ${dialect_libs} - LLVMAsmParser - LLVMX86AsmParser MLIRAnalysis MLIREDSC MLIRExecutionEngine ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] a1cd559 - [mlir][Linalg] Properly use distribution options.
Author: Nicolas Vasilache Date: 2020-12-04T14:00:54Z New Revision: a1cd559ce500d18eb15750ac776e7e73b3819832 URL: https://github.com/llvm/llvm-project/commit/a1cd559ce500d18eb15750ac776e7e73b3819832 DIFF: https://github.com/llvm/llvm-project/commit/a1cd559ce500d18eb15750ac776e7e73b3819832.diff LOG: [mlir][Linalg] Properly use distribution options. Let tiling to scf.for actually use the distribution method. For now only Cyclic is supported. Differential Revision: https://reviews.llvm.org/D92653 Added: Modified: mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp mlir/lib/Dialect/Linalg/Utils/Utils.cpp mlir/test/lib/Transforms/TestLinalgTransforms.cpp Removed: diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index b37a14f0eb7a..90c6a0374e94 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -389,6 +389,11 @@ OwningRewritePatternList getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx); struct LinalgBaseTilingPattern : public RewritePattern { + // Entry point to match any LinalgOp OpInterface. + LinalgBaseTilingPattern(LinalgTilingOptions options, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1); + // Entry point to match a specific Linalg op. LinalgBaseTilingPattern(StringRef opName, MLIRContext *context, LinalgTilingOptions options, LinalgMarker marker = LinalgMarker(), diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 97c3dafe57a8..804ae6681f8c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -111,6 +111,11 @@ mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern( : RewritePattern(opName, {}, benefit, context), marker(marker), options(options) {} +mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern( +LinalgTilingOptions options, LinalgMarker marker, PatternBenefit benefit) +: RewritePattern(benefit, MatchAnyOpTypeTag()), marker(marker), + options(options) {} + LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewriteBase( Operation *op, PatternRewriter &rewriter, SmallVectorImpl &tensorResults) const { diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 8e60312bf4fd..f44bb6769e61 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -128,12 +128,12 @@ void GenerateLoopNest::doit( ArrayRef iteratorTypes, function_ref bodyBuilderFn, Optional distributionOptions) { - // Create procInfo so it dominate loops, if appropriate. + // Create procInfo so it dominates loops, if appropriate. OpBuilder &builder = edsc::ScopedContext::getBuilderRef(); Location loc = edsc::ScopedContext::getLocation(); SmallVector procInfo; if (distributionOptions.hasValue()) -procInfo = distributionOptions->procInfo(builder, loc, ArrayRef{}); +procInfo = distributionOptions->procInfo(builder, loc, loopRanges); SmallVector lbs, ubs, steps; unpackRanges(loopRanges, lbs, ubs, steps); @@ -143,11 +143,12 @@ void GenerateLoopNest::doit( if (!distributionOptions.hasValue() || loopNest.loops.empty()) return; - // TODO: support distributionMethod, which is currently ignored. + // Only supports cyclic distribution for now. for (auto it : llvm::zip(loopNest.loops, procInfo, distributionOptions->distributionMethod)) -mapLoopToProcessorIds(std::get<0>(it), std::get<1>(it).procId, - std::get<1>(it).nprocs); +if (std::get<2>(it) == DistributionMethod::Cyclic) + mapLoopToProcessorIds(std::get<0>(it), std::get<1>(it).procId, +std::get<1>(it).nprocs); } /// Specialization to build affine "for" nest. diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp index 9e3efcf41664..c2b4c7b9c821 100644 --- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp @@ -415,8 +415,8 @@ static void fillTileAndDistributePatterns(MLIRContext *context, { LinalgLoopDistributionOptions cyclicNprocsEqNiters; -cyclicNprocsEqNiters.distributionMethod.resize( -2, DistributionMethod::CyclicNumProcsEqNumIters); +cyclicNprocsEqNiters.distributionMethod.resize(2, + DistributionMethod::Cyclic); cyclicNprocsEqNiters.procInfo = getGpuProcIds;
[llvm-branch-commits] [mlir] 2c66b6e - [mlir][Linalg] NFC - Expose tiling canonicalization patterns through a populate method
Author: Nicolas Vasilache Date: 2020-12-04T14:57:29Z New Revision: 2c66b6ec09ddf30b49563ff8fd961e5edbfc0b88 URL: https://github.com/llvm/llvm-project/commit/2c66b6ec09ddf30b49563ff8fd961e5edbfc0b88 DIFF: https://github.com/llvm/llvm-project/commit/2c66b6ec09ddf30b49563ff8fd961e5edbfc0b88.diff LOG: [mlir][Linalg] NFC - Expose tiling canonicalization patterns through a populate method Added: Modified: mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp Removed: diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 90c6a0374e94..dc82569aac38 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -387,6 +387,8 @@ struct LinalgTilingOptions { /// when tiling is called programmatically. OwningRewritePatternList getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx); +void populateLinalgTilingCanonicalizationPatterns( +OwningRewritePatternList &patterns, MLIRContext *ctx); struct LinalgBaseTilingPattern : public RewritePattern { // Entry point to match any LinalgOp OpInterface. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 2fd7597e9b10..50a18d4fb01c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -554,6 +554,12 @@ class RewritePatternList { OwningRewritePatternList mlir::linalg::getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx) { OwningRewritePatternList patterns; + populateLinalgTilingCanonicalizationPatterns(patterns, ctx); + return patterns; +} + +void mlir::linalg::populateLinalgTilingCanonicalizationPatterns( +OwningRewritePatternList &patterns, MLIRContext *ctx) { AffineApplyOp::getCanonicalizationPatterns(patterns, ctx); AffineForOp::getCanonicalizationPatterns(patterns, ctx); AffineMinOp::getCanonicalizationPatterns(patterns, ctx); @@ -569,7 +575,6 @@ mlir::linalg::getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx) { #define GET_OP_LIST #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" >::insert(patterns, ctx); - return patterns; } /// Populate the given list with patterns that apply Linalg tiling. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 7310501 - [mlir][ArmNeon][RFC] Add a Neon dialect
Author: Nicolas Vasilache Date: 2020-12-11T13:49:40Z New Revision: 7310501f74037e2845529da7affd8710d058bd04 URL: https://github.com/llvm/llvm-project/commit/7310501f74037e2845529da7affd8710d058bd04 DIFF: https://github.com/llvm/llvm-project/commit/7310501f74037e2845529da7affd8710d058bd04.diff LOG: [mlir][ArmNeon][RFC] Add a Neon dialect This revision starts an Arm-specific ArmNeon dialect discussed in the [discourse RFC thread](https://llvm.discourse.group/t/rfc-vector-dialects-neon-and-sve/2284). Differential Revision: https://reviews.llvm.org/D92171 Added: mlir/include/mlir/Conversion/ArmNeonToLLVM/ArmNeonToLLVM.h mlir/include/mlir/Dialect/ArmNeon/ArmNeon.td mlir/include/mlir/Dialect/ArmNeon/ArmNeonDialect.h mlir/include/mlir/Dialect/ArmNeon/CMakeLists.txt mlir/include/mlir/Dialect/LLVMIR/LLVMArmNeon.td mlir/include/mlir/Dialect/LLVMIR/LLVMArmNeonDialect.h mlir/lib/Conversion/ArmNeonToLLVM/ArmNeonToLLVM.cpp mlir/lib/Conversion/ArmNeonToLLVM/CMakeLists.txt mlir/lib/Dialect/ArmNeon/CMakeLists.txt mlir/lib/Dialect/ArmNeon/IR/ArmNeonDialect.cpp mlir/lib/Dialect/LLVMIR/IR/LLVMArmNeonDialect.cpp mlir/lib/Target/LLVMIR/LLVMArmNeonIntr.cpp mlir/test/Conversion/ArmNeonToLLVM/convert-to-llvm.mlir mlir/test/Dialect/ArmNeon/roundtrip.mlir mlir/test/Target/arm-neon.mlir Modified: mlir/include/mlir/Conversion/Passes.td mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h mlir/include/mlir/Dialect/CMakeLists.txt mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt mlir/include/mlir/IR/BuiltinTypes.h mlir/include/mlir/InitAllDialects.h mlir/include/mlir/InitAllTranslations.h mlir/lib/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.cpp mlir/lib/Conversion/CMakeLists.txt mlir/lib/Conversion/PassDetail.h mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp mlir/lib/Dialect/CMakeLists.txt mlir/lib/Dialect/LLVMIR/CMakeLists.txt mlir/lib/IR/BuiltinTypes.cpp mlir/lib/Target/CMakeLists.txt Removed: diff --git a/mlir/include/mlir/Conversion/ArmNeonToLLVM/ArmNeonToLLVM.h b/mlir/include/mlir/Conversion/ArmNeonToLLVM/ArmNeonToLLVM.h new file mode 100644 index ..41342c50d5ea --- /dev/null +++ b/mlir/include/mlir/Conversion/ArmNeonToLLVM/ArmNeonToLLVM.h @@ -0,0 +1,23 @@ +//===- ArmNeonToLLVM.h - Conversion Patterns from ArmNeon to LLVM -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef MLIR_CONVERSION_ARMNEONTOLLVM_ARMNEONTOLLVM_H_ +#define MLIR_CONVERSION_ARMNEONTOLLVM_ARMNEONTOLLVM_H_ + +namespace mlir { + +class LLVMTypeConverter; +class OwningRewritePatternList; + +/// Collect a set of patterns to convert from theArmNeon dialect to LLVM. +void populateArmNeonToLLVMConversionPatterns( +LLVMTypeConverter &converter, OwningRewritePatternList &patterns); + +} // namespace mlir + +#endif // MLIR_CONVERSION_ARMNEONTOLLVM_ARMNEONTOLLVM_H_ diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 53158afa0530..56169d90c849 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -396,12 +396,13 @@ def ConvertVectorToLLVM : Pass<"convert-vector-to-llvm", "ModuleOp"> { operations. The lowering pass provides several options to control the kinds of optimizations that are allowed. It also provides options that enable the use of one or more architectural-specific dialects -(AVX512, Neon, SVE, etc.) in combination with the architectural-neutral +(AVX512, ArmNeon, SVE, etc.) in combination with the architectural-neutral vector dialect lowering. }]; let constructor = "mlir::createConvertVectorToLLVMPass()"; - let dependentDialects = ["LLVM::LLVMDialect", "LLVM::LLVMAVX512Dialect"]; + // Override explicitly in C++ to allow conditional dialect dependence. + // let dependentDialects; let options = [ Option<"reassociateFPReductions", "reassociate-fp-reductions", "bool", /*default=*/"false", @@ -413,6 +414,10 @@ def ConvertVectorToLLVM : Pass<"convert-vector-to-llvm", "ModuleOp"> { Option<"enableAVX512", "enable-avx512", "bool", /*default=*/"false", "Enables the use of AVX512 dialect while lowering the vector " + "dialect.">, +Option<"enableArmNeon", "enable-arm-neon", + "bool", /*default=*/"false", + "Enables the use of ArmNeon dialect while lowering the vector " "dialect."> ]; } diff --git a/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h b/mlir/i
[llvm-branch-commits] [mlir] [mlir][transform] Drop redundant padding_dimensions spec from pad_tiling_interface (PR #145257)
https://github.com/nicolasvasilache updated https://github.com/llvm/llvm-project/pull/145257 >From 2f1558ae8c1c90a6091dbc821fd5438f5136b8ae Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Wed, 18 Jun 2025 19:14:31 +0200 Subject: [PATCH 1/2] [mlir][transform] Plumb a simplified form of AffineMin folding into transform.pad-tiling-interface This revision introduces a simple variant of AffineMin folding in makeComposedFoldedAffineApply and makes use of it in transform.pad-tiling-interface. Since this version explicitly call ValueBoundsInterface, it may be too expensive and is only activate behind a flag. It results in better foldings when mixing tiling and padding, including with dynamic shapes. This should be further composed with #145068 to provide full simplification and address the remaining TODO in the test. --- .../mlir/Dialect/Affine/IR/AffineOps.h| 18 ++- .../mlir/Interfaces/ValueBoundsOpInterface.h | 2 +- mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 134 ++ .../Linalg/Transforms/PadTilingInterface.cpp | 5 +- .../lib/Interfaces/ValueBoundsOpInterface.cpp | 2 +- ...m-op-pad-tiling-interface-multiple-of.mlir | 131 + 6 files changed, 251 insertions(+), 41 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h index 6fdb72c370e6d..2091faa6b0b02 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h @@ -410,9 +410,11 @@ void canonicalizeSetAndOperands(IntegerSet *set, /// other AffineApplyOps supplying those operands. The operands of the resulting /// AffineApplyOp do not change the length of AffineApplyOp chains. AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, - ArrayRef operands); + ArrayRef operands, + bool composeAffineMin = false); AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e, - ArrayRef operands); + ArrayRef operands, + bool composeAffineMin = false); /// Constructs an AffineApplyOp that applies `map` to `operands` after composing /// the map with the maps of any other AffineApplyOp supplying the operands, @@ -421,16 +423,19 @@ AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e, /// map. OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, - ArrayRef operands); + ArrayRef operands, + bool composeAffineMin = false); /// Variant of `makeComposedFoldedAffineApply` that applies to an expression. OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineExpr expr, - ArrayRef operands); + ArrayRef operands, + bool composeAffineMin = false); /// Variant of `makeComposedFoldedAffineApply` suitable for multi-result maps. /// Note that this may create as many affine.apply operations as the map has /// results given that affine.apply must be single-result. SmallVector makeComposedFoldedMultiResultAffineApply( -OpBuilder &b, Location loc, AffineMap map, ArrayRef operands); +OpBuilder &b, Location loc, AffineMap map, ArrayRef operands, +bool composeAffineMin = false); /// Returns an AffineMinOp obtained by composing `map` and `operands` with /// AffineApplyOps supplying those operands. @@ -459,7 +464,8 @@ OpFoldResult makeComposedFoldedAffineMax(OpBuilder &b, Location loc, /// terminal symbol, i.e., a symbol defined at the top level or a block/function /// argument. void fullyComposeAffineMapAndOperands(AffineMap *map, - SmallVectorImpl *operands); + SmallVectorImpl *operands, + bool composeAffineMin = false); } // namespace affine } // namespace mlir diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h index 337314143c80c..523df173093fa 100644 --- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h +++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h @@ -135,7 +135,7 @@ class ValueBoundsConstraintSet /// Construct a variable for a map and its operands. Variable(AffineMap map, ArrayRef mapOperands); -Variable(AffineMap map, ArrayRef mapOperands); +Variable(AffineMap map, ValueRange mapOperands); MLIRContext *getContext() const { return map.getContext();
[llvm-branch-commits] [mlir] [mlir] NFC - refactor id builder and avoid leaking impl details (PR #146922)
https://github.com/nicolasvasilache created https://github.com/llvm/llvm-project/pull/146922 None >From c88aee740d5d944364e79600bf3c01493a1c3fee Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 18:32:59 +0200 Subject: [PATCH] [mlir] NFC - refactor id builder and avoid leaking impl details --- .../mlir/Dialect/GPU/TransformOps/Utils.h | 31 ++- .../GPU/TransformOps/GPUTransformOps.cpp | 33 +--- mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 176 +++--- 3 files changed, 127 insertions(+), 113 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index 111c67638efc8..de512ded59fec 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -28,27 +28,24 @@ namespace transform { namespace gpu { /// Helper type for functions that generate ids for the mapping of a scf.forall. -/// Operates on both 1) an "original" basis that represents the individual -/// thread and block ids and 2) a "scaled" basis that represents grouped ids -/// (e.g. block clusters, warpgroups and warps). -/// The mapping of ids is done in the "scaled" basis (i.e. when mapping to warps -/// a division by 32 occurs). -/// The predication is in the "original" basis using the "active" quantities -/// (`activeMappingSizes`, `availableMappingSizes` and `activeIdOps`). struct IdBuilderResult { - // Ops used to replace the forall induction variables. + /// Error message, if not empty then building the ids failed. + std::string errorMsg; + /// Values used to replace the forall induction variables. SmallVector mappingIdOps; - // Available mapping sizes used to predicate the forall body when they are - // larger than the predicate mapping sizes. - SmallVector availableMappingSizes; - // Actual mapping sizes used to predicate the forall body when they are - // smaller than the available mapping sizes. - SmallVector activeMappingSizes; - // Ops used to predicate the forall body when activeMappingSizes is smaller - // than the available mapping sizes. - SmallVector activeIdOps; + /// Values used to predicate the forall body when activeMappingSizes is + /// smaller than the available mapping sizes. + SmallVector predicateOps; }; +inline raw_ostream &operator<<(raw_ostream &os, const IdBuilderResult &res) { + llvm::interleaveComma(res.mappingIdOps, os << "mappingIdOps: "); + os << "\n"; + llvm::interleaveComma(res.predicateOps, os << "predicateOps: "); + os << "\n"; + return os; +} + /// Common gpu id builder type, allows the configuration of lowering for various /// mapping schemes. Takes: /// - A rewriter with insertion point set before the forall op to rewrite. diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index 20d1c94409238..63f87d9b5877e 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -491,6 +491,10 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl( IdBuilderResult builderResult = gpuIdBuilder.idBuilder(rewriter, loc, forallMappingSizes, originalBasis); + if (!builderResult.errorMsg.empty()) +return definiteFailureHelper(transformOp, forallOp, builderResult.errorMsg); + + LLVM_DEBUG(DBGS() << builderResult); // Step 4. Map the induction variables to the mappingIdOps, this may involve // a permutation. @@ -501,7 +505,7 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl( forallMappingAttrs.getArrayRef().take_front(forallOp.getRank( { auto mappingAttr = cast(dim); Value peIdOp = mappingIdOps[mappingAttr.getRelativeIndex()]; -LDBG("map: " << iv << " to" << peIdOp); +LDBG("map: " << iv << " to " << peIdOp); bvm.map(iv, peIdOp); } @@ -510,32 +514,7 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl( // originalBasis and no predication occurs. Value predicate; if (originalBasisWasProvided) { -SmallVector activeMappingSizes = builderResult.activeMappingSizes; -SmallVector availableMappingSizes = -builderResult.availableMappingSizes; -SmallVector activeIdOps = builderResult.activeIdOps; -LDBG("activeMappingSizes: " << llvm::interleaved(activeMappingSizes)); -LDBG("availableMappingSizes: " - << llvm::interleaved(availableMappingSizes)); -LDBG("activeIdOps: " << llvm::interleaved(activeIdOps)); -for (auto [activeId, activeMappingSize, availableMappingSize] : - llvm::zip_equal(activeIdOps, activeMappingSizes, - availableMappingSizes)) { - if (activeMappingSize > availableMappingSize) { -return definiteFailureHelper( -transformOp, forallOp, -"Trying to map to fewer GPU threads than loop iterations but " -
[llvm-branch-commits] [llvm] [mlir] [mlir][GPU][transform] Add gpu_to_rocdl conversion pattern to transfo… (PR #146962)
https://github.com/nicolasvasilache created https://github.com/llvm/llvm-project/pull/146962 …rm dialect Authored-by: Son Tuan Vu >From d8730eb667660782ec1dce6e9cdea020c5821300 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 23:09:00 +0200 Subject: [PATCH] [mlir][GPU][transform] Add gpu_to_rocdl conversion pattern to transform dialect Authored-by: Son Tuan Vu --- .../GPU/TransformOps/GPUTransformOps.td | 14 +++ .../Dialect/GPU/TransformOps/CMakeLists.txt | 1 + .../GPU/TransformOps/GPUTransformOps.cpp | 38 +++ .../llvm-project-overlay/mlir/BUILD.bazel | 2 + 4 files changed, 55 insertions(+) diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td index 36b579485fc04..87423c639945f 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td @@ -54,6 +54,20 @@ def ApplyGPUSubgroupReduceToNVVMConversionPatternsOp : Op]> { + let description = [{ +Collects patterns that convert GPU dialect ops to ROCDL dialect ops. These +patterns require an "LLVMTypeConverter". + }]; + let arguments = (ins StrAttr:$chipset); + let assemblyFormat = [{ +`chipset` `=` $chipset attr-dict + }]; +} + //===--===// // Apply...PatternsOp //===--===// diff --git a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt index b26788f675ce5..e5cc0254f1ffe 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt @@ -24,4 +24,5 @@ add_mlir_dialect_library(MLIRGPUTransformOps # ConversionPatterns MLIRNVGPUToNVVM MLIRGPUToNVVMTransforms + MLIRGPUToROCDLTransforms ) diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index a86fc47947130..b764a72529f8f 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -10,6 +10,7 @@ #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" +#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -42,6 +43,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InterleavedRange.h" +#include "llvm/Support/LogicalResult.h" #include using namespace mlir; @@ -129,6 +131,42 @@ LogicalResult transform::ApplyGPUSubgroupReduceToNVVMConversionPatternsOp:: return success(); } +void transform::ApplyGPUToROCDLConversionPatternsOp::populatePatterns( +TypeConverter &typeConverter, RewritePatternSet &patterns) { + auto &llvmTypeConverter = static_cast(typeConverter); + populateGpuMemorySpaceAttributeConversions( + llvmTypeConverter, [](AddressSpace space) { +switch (space) { +case AddressSpace::Global: + return 1; +case AddressSpace::Workgroup: + return 3; +case AddressSpace::Private: + return 5; +} +llvm_unreachable("unknown address space enum value"); +return 0; + }); + FailureOr maybeChipset = + amdgpu::Chipset::parse(getChipset()); + assert(llvm::succeeded(maybeChipset) && "expected valid chipset"); + populateGpuToROCDLConversionPatterns( + llvmTypeConverter, patterns, mlir::gpu::amd::Runtime::HIP, *maybeChipset); +} + +LogicalResult +transform::ApplyGPUToROCDLConversionPatternsOp::verifyTypeConverter( +transform::TypeConverterBuilderOpInterface builder) { + FailureOr maybeChipset = + amdgpu::Chipset::parse(getChipset()); + if (failed(maybeChipset)) { +return emitOpError("Invalid chipset name: " + getChipset()); + } + if (builder.getTypeConverterType() != "LLVMTypeConverter") +return emitOpError("expected LLVMTypeConverter"); + return success(); +} + //===--===// // Apply...PatternsOp //===--===//s diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index cc266c2fe3a77..79f2cd5ea71db 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5502,6 +5502,7 @@ cc_library( ":GPUDialect", ":GPUToGPURuntimeTransforms", ":GPUToNVVMTransforms", +":GPUToROCDLTransforms", ":GPUTransformOpsIncGen", ":GPUTransforms", ":IR", @@ -5509,6 +5510,7 @@ cc_libra
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache updated https://github.com/llvm/llvm-project/pull/146943 >From e2fc2f4d78809d5196719b546fd2a6a06058837f Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 21:26:53 +0200 Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::ForallOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. Co-authored-by: Oleksandr "Alex" Zinenko --- .../Dialect/GPU/IR/GPUDeviceMappingAttr.td| 18 .../mlir/Dialect/GPU/TransformOps/Utils.h | 15 ++- .../Dialect/SCF/IR/DeviceMappingInterface.td | 45 +++- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td| 12 +++ mlir/lib/Dialect/GPU/CMakeLists.txt | 1 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp| 45 .../GPU/TransformOps/GPUTransformOps.cpp | 58 ++ mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 102 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 43 ++-- .../Dialect/GPU/transform-gpu-failing.mlir| 61 +++ mlir/test/Dialect/GPU/transform-gpu.mlir | 81 ++ mlir/test/Dialect/SCF/invalid.mlir| 18 12 files changed, 441 insertions(+), 58 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td index 63f228ca3157f..e8540027e7b77 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td @@ -252,6 +252,24 @@ def GPULaneMappingAttr }]; } +def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [ + DeclareAttrInterfaceMethods ] > { + let parameters = (ins "uint64_t":$mask); + let assemblyFormat = "`<` params `>`"; + let description = [{ +Attribute describing how to filter the processing units that a +region is mapped to. + +In the first implementation the masking is a bitfield that specifies for +each processing unit whether it is active or not. + +In the future, we may want to implement this as a symbol to refer to +dynamically defined values. + +Extending op semantics with an operand is deemed too intrusive at this time. + }]; +} + def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ DeclareAttrInterfaceMethods ] > { let parameters = (ins diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index de512ded59fec..0a11b8f8d3fa0 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -78,7 +78,8 @@ struct GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuBlockIdBuilder : public GpuIdBuilder { - GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); }; /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups. @@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpgroupIdBuilder : public GpuIdBuilder { GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, -bool useLinearMapping = false); +bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; /// In the future this may be configured by the transformation. static constexpr int64_t kNumWarpsPerGroup = 4; @@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpIdBuilder : public GpuIdBuilder { GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize, - bool useLinearMapping = false); + bool useLinearMapping = false, + DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; }; @@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuThreadIdBuilder : public GpuIdBuilder {
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache updated https://github.com/llvm/llvm-project/pull/146943 >From ad456bbf3da7ca290c521a945e950fd1cbf3ca81 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 21:26:53 +0200 Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::ForallOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. Co-authored-by: Oleksandr "Alex" Zinenko --- .../Dialect/GPU/IR/GPUDeviceMappingAttr.td| 18 .../mlir/Dialect/GPU/TransformOps/Utils.h | 15 ++- .../Dialect/SCF/IR/DeviceMappingInterface.td | 45 +++- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td| 12 +++ mlir/lib/Dialect/GPU/CMakeLists.txt | 1 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp| 45 .../GPU/TransformOps/GPUTransformOps.cpp | 62 +++ mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 102 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 43 ++-- .../Dialect/GPU/transform-gpu-failing.mlir| 61 +++ mlir/test/Dialect/GPU/transform-gpu.mlir | 81 ++ mlir/test/Dialect/SCF/invalid.mlir| 18 12 files changed, 444 insertions(+), 59 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td index 63f228ca3157f..e8540027e7b77 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td @@ -252,6 +252,24 @@ def GPULaneMappingAttr }]; } +def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [ + DeclareAttrInterfaceMethods ] > { + let parameters = (ins "uint64_t":$mask); + let assemblyFormat = "`<` params `>`"; + let description = [{ +Attribute describing how to filter the processing units that a +region is mapped to. + +In the first implementation the masking is a bitfield that specifies for +each processing unit whether it is active or not. + +In the future, we may want to implement this as a symbol to refer to +dynamically defined values. + +Extending op semantics with an operand is deemed too intrusive at this time. + }]; +} + def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ DeclareAttrInterfaceMethods ] > { let parameters = (ins diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index de512ded59fec..0a11b8f8d3fa0 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -78,7 +78,8 @@ struct GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuBlockIdBuilder : public GpuIdBuilder { - GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); }; /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups. @@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpgroupIdBuilder : public GpuIdBuilder { GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, -bool useLinearMapping = false); +bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; /// In the future this may be configured by the transformation. static constexpr int64_t kNumWarpsPerGroup = 4; @@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpIdBuilder : public GpuIdBuilder { GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize, - bool useLinearMapping = false); + bool useLinearMapping = false, + DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; }; @@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuThreadIdBuilder : public GpuIdBuilder {
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache edited https://github.com/llvm/llvm-project/pull/146943 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache updated https://github.com/llvm/llvm-project/pull/146943 >From 403e4ba3929516ac27d51baf306dda2a043fd305 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 21:26:53 +0200 Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::ForallOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. Co-authored-by: Oleksandr "Alex" Zinenko --- .../Dialect/GPU/IR/GPUDeviceMappingAttr.td| 18 .../mlir/Dialect/GPU/TransformOps/Utils.h | 15 ++- .../Dialect/SCF/IR/DeviceMappingInterface.td | 45 +++- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td| 12 +++ mlir/lib/Dialect/GPU/CMakeLists.txt | 1 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp| 45 .../GPU/TransformOps/GPUTransformOps.cpp | 62 +++ mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 102 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 43 ++-- .../Dialect/GPU/transform-gpu-failing.mlir| 61 +++ mlir/test/Dialect/GPU/transform-gpu.mlir | 81 ++ mlir/test/Dialect/SCF/invalid.mlir| 18 12 files changed, 444 insertions(+), 59 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td index 63f228ca3157f..e8540027e7b77 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td @@ -252,6 +252,24 @@ def GPULaneMappingAttr }]; } +def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [ + DeclareAttrInterfaceMethods ] > { + let parameters = (ins "uint64_t":$mask); + let assemblyFormat = "`<` params `>`"; + let description = [{ +Attribute describing how to filter the processing units that a +region is mapped to. + +In the first implementation the masking is a bitfield that specifies for +each processing unit whether it is active or not. + +In the future, we may want to implement this as a symbol to refer to +dynamically defined values. + +Extending op semantics with an operand is deemed too intrusive at this time. + }]; +} + def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ DeclareAttrInterfaceMethods ] > { let parameters = (ins diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index de512ded59fec..0a11b8f8d3fa0 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -78,7 +78,8 @@ struct GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuBlockIdBuilder : public GpuIdBuilder { - GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); }; /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups. @@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpgroupIdBuilder : public GpuIdBuilder { GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, -bool useLinearMapping = false); +bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; /// In the future this may be configured by the transformation. static constexpr int64_t kNumWarpsPerGroup = 4; @@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpIdBuilder : public GpuIdBuilder { GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize, - bool useLinearMapping = false); + bool useLinearMapping = false, + DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; }; @@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuThreadIdBuilder : public GpuIdBuilder {
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache updated https://github.com/llvm/llvm-project/pull/146943 >From 85aa5f8c72801f5a75142a663d6e89e83e63decc Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 21:26:53 +0200 Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::ForallOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. Co-authored-by: Oleksandr "Alex" Zinenko --- .../Dialect/GPU/IR/GPUDeviceMappingAttr.td| 18 .../mlir/Dialect/GPU/TransformOps/Utils.h | 15 ++- .../Dialect/SCF/IR/DeviceMappingInterface.td | 45 +++- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td| 12 +++ mlir/lib/Dialect/GPU/CMakeLists.txt | 1 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp| 45 .../GPU/TransformOps/GPUTransformOps.cpp | 62 +++ mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 102 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 43 ++-- .../Dialect/GPU/transform-gpu-failing.mlir| 61 +++ mlir/test/Dialect/GPU/transform-gpu.mlir | 81 ++ mlir/test/Dialect/SCF/invalid.mlir| 18 12 files changed, 444 insertions(+), 59 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td index 63f228ca3157f..e8540027e7b77 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td @@ -252,6 +252,24 @@ def GPULaneMappingAttr }]; } +def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [ + DeclareAttrInterfaceMethods ] > { + let parameters = (ins "uint64_t":$mask); + let assemblyFormat = "`<` params `>`"; + let description = [{ +Attribute describing how to filter the processing units that a +region is mapped to. + +In the first implementation the masking is a bitfield that specifies for +each processing unit whether it is active or not. + +In the future, we may want to implement this as a symbol to refer to +dynamically defined values. + +Extending op semantics with an operand is deemed too intrusive at this time. + }]; +} + def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ DeclareAttrInterfaceMethods ] > { let parameters = (ins diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index de512ded59fec..0a11b8f8d3fa0 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -78,7 +78,8 @@ struct GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuBlockIdBuilder : public GpuIdBuilder { - GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); }; /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups. @@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpgroupIdBuilder : public GpuIdBuilder { GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, -bool useLinearMapping = false); +bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; /// In the future this may be configured by the transformation. static constexpr int64_t kNumWarpsPerGroup = 4; @@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpIdBuilder : public GpuIdBuilder { GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize, - bool useLinearMapping = false); + bool useLinearMapping = false, + DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; }; @@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuThreadIdBuilder : public GpuIdBuilder {
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache created https://github.com/llvm/llvm-project/pull/146943 …lOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. >From 02e425b30966f4781fe07d8cf595a1e2b0d41aa3 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 21:26:53 +0200 Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::ForallOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. Co-authored-by: Oleksandr "Alex" Zinenko --- .../Dialect/GPU/IR/GPUDeviceMappingAttr.td| 18 .../mlir/Dialect/GPU/TransformOps/Utils.h | 15 ++- .../Dialect/SCF/IR/DeviceMappingInterface.td | 45 +++- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td| 12 +++ mlir/lib/Dialect/GPU/CMakeLists.txt | 1 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp| 45 .../GPU/TransformOps/GPUTransformOps.cpp | 58 ++ mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 100 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 43 ++-- .../Dialect/GPU/transform-gpu-failing.mlir| 61 +++ mlir/test/Dialect/GPU/transform-gpu.mlir | 81 ++ mlir/test/Dialect/SCF/invalid.mlir| 18 12 files changed, 439 insertions(+), 58 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td index 63f228ca3157f..e8540027e7b77 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td @@ -252,6 +252,24 @@ def GPULaneMappingAttr }]; } +def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [ + DeclareAttrInterfaceMethods ] > { + let parameters = (ins "uint64_t":$mask); + let assemblyFormat = "`<` params `>`"; + let description = [{ +Attribute describing how to filter the processing units that a +region is mapped to. + +In the first implementation the masking is a bitfield that specifies for +each processing unit whether it is active or not. + +In the future, we may want to implement this as a symbol to refer to +dynamically defined values. + +Extending op semantics with an operand is deemed too intrusive at this time. + }]; +} + def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ DeclareAttrInterfaceMethods ] > { let parameters = (ins diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index de512ded59fec..0a11b8f8d3fa0 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -78,7 +78,8 @@ struct GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuBlockIdBuilder : public GpuIdBuilder { - GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); }; /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups. @@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpgroupIdBuilder : public GpuIdBuilder { GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, -bool useLinearMapping = false); +bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; /// In the future this may be configured by the transformation. static constexpr int64_t kNumWarpsPerGroup = 4; @@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. stru