[flang] [clang-tools-extra] [llvm] [libcxx] [libc] [lld] [lldb] [clang] [compiler-rt] [libunwind] Fix clang to recognize new C23 modifiers %w and %wf when printing (PR #71771)

2023-12-01 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer updated 
https://github.com/llvm/llvm-project/pull/71771

Sorry, this diff is unavailable.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [clang] [llvm] Fix unsigned typos (PR #76670)

2024-01-01 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer approved this pull request.


https://github.com/llvm/llvm-project/pull/76670
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[libc] [compiler-rt] [lld] [clang-tools-extra] [libcxx] [llvm] [mlir] [lldb] [clang] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-05 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer edited 
https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[compiler-rt] [lld] [lldb] [libc] [llvm] [clang-tools-extra] [libcxx] [clang] [mlir] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-05 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer commented:

Sorry, I dropped the ball on this review. Here a few more small comments, I'm 
going to do another more thorough review.

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[lld] [llvm] [clang-tools-extra] [lldb] [clang] [mlir] [libcxx] [libc] [compiler-rt] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-05 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,165 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partiaIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value &splitBound) {
+  RewriterBase::InsertionGuard guard(b);
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto ubInt = getConstantIntValue(forOp.getUpperBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // No specialization necessary if step already divides upper bound evenly.
+  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
+return failure();
+  // No specialization necessary if step size is 1.
+  if (stepInt == static_cast(1))
+return failure();
+
+  // Create ForOp for partial iteration.
+  b.setInsertionPointAfter(forOp);
+  partialIteration = cast(b.clone(*forOp.getOperation()));
+  partialIteration.getLowerBoundMutable().assign(splitBound);
+  forOp.replaceAllUsesWith(partialIteration->getResults());
+  partialIteration.getInitArgsMutable().assign(forOp->getResults());
+
+  // Set new upper loop bound.
+  b.updateRootInPlace(
+  forOp, [&]() { forOp.getUpperBoundMutable().assign(splitBound); });
+
+  return success();
+}
+
+/// Convert single-iteration for loop to if-else block.
+static scf::IfOp convertSingleIterFor(RewriterBase &b, scf::ForOp &forOp) {
+  Location loc = forOp->getLoc();
+  IRMapping mapping;
+  mapping.map(forOp.getInductionVar(), forOp.getLowerBound());
+  for (auto [arg, operand] :
+   llvm::zip(forOp.getRegionIterArgs(), forOp.getInitsMutable())) {
+mapping.map(arg, operand.get());
+  }
+  b.setInsertionPoint(forOp);
+  auto cond =
+  b.create(loc, arith::CmpIPredicate::slt,
+  forOp.getLowerBound(), forOp.getUpperBound());
+  auto ifOp = b.create(loc, forOp->getResultTypes(), cond, true);
+  // then branch
+  SmallVector bbArgReplacements;
+  bbArgReplacements.push_back(forOp.getLowerBound());
+  llvm::append_range(bbArgReplacements, forOp.getInitArgs());
+
+  b.inlineBlockBefore(forOp.getBody(), ifOp.thenBlock(),
+  ifOp.thenBlock()->begin(), bbArgReplacements);
+  // else branch
+  b.setInsertionPointToStart(ifOp.elseBlock());
+  if (!forOp->getResultTypes().empty()) {
+b.create(loc, forOp.getInits());
+  }
+  b.replaceOp(forOp, ifOp->getResults());
+  return ifOp;
+}
+
+/// Rewrite a for loop with bounds/step that potentially do not divide the
+/// iteration space evenly into a chain of for loops where the step is a
+/// power of 2 and decreases exponentially across subsequent loops. Helps
+/// divide the iteration space across all resulting peeled loops evenly.
+///
+/// Optionally, convert all single iteration for loops to if-else
+/// blocks when convert_single_iter_loops_to_if attribute is set to true or
+/// alternatively with the convert-single-iter-loops-to-if option for the
+/// scf-for-loop-continuous-peeling pass.
+static LogicalResult continuousPeelForLoop(RewriterBase &b, ForOp forOp,
+   ForOp &partialIteration,
+   bool convertSingleIterLoopsToIf) {
+
+  scf::ForOp currentLoop;
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // Step size must be a known positive constant greater than 1.
+  if (!stepInt || stepInt <= static_cast(1))
+return failure();
+
+  Value initialUb = forOp.getUpperBound();
+  Value initialStep = forOp.getStep();
+  uint64_t loopStep = *stepInt;
+  currentLoop = forOp;
+  AffineExpr sym0, sym1, sym2;
+  bindSymbols(b.getContext(), sym0, sym1, sym2);
+  AffineMap defaultSplitMap =
+  AffineMap::get(0, 3, {sym1 - ((sym1 - sym0) % sym2)});
+  AffineMap powerSplitMap = AffineMap::get(0, 3, {sym1 - (sym1 % sym2)});
+  bool usePowerSplit = (lbInt.has_value()) &&
+   (*lbInt % *stepInt == static_cast(0)) &&
+   (loopStep == llvm::bit_floor(loopStep));
+  AffineMap splitMap = usePowerSplit ? powerSplitMap : defaultSplitMap;
+  SmallVector loops;
+  while (loopStep) {
+b.setInsertionPoint(currentLoop);
+auto constStepOp =
+b.create(currentLoop.getLoc(), loopStep);
+currentLoop.getStepMutable().assign(constStepOp);

matthias-springer wrote:

This should be wrapped in `updateRootInPlace` because existing operations may 
be modified.


https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[lld] [libc] [compiler-rt] [libcxx] [llvm] [clang] [mlir] [clang-tools-extra] [lldb] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-05 Thread Matthias Springer via cfe-commits


@@ -147,6 +147,43 @@ def LoopPeelOp : Op {
+  let description = [{
+Transforms the loop into a chain of loops, with step sizes that are
+powers of two and decrease exponetially across subsequent loops.

matthias-springer wrote:

typo

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[lld] [llvm] [clang] [lldb] [mlir] [compiler-rt] [clang-tools-extra] [libcxx] [libc] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-05 Thread Matthias Springer via cfe-commits


@@ -206,6 +206,34 @@ 
transform::LoopPeelOp::applyToOne(transform::TransformRewriter &rewriter,
   return DiagnosedSilenceableFailure::success();
 }
 
+//===-===//
+// LoopContinuousPeelOp
+//===-===//
+
+DiagnosedSilenceableFailure transform::LoopContinuousPeelOp::applyToOne(
+transform::TransformRewriter &rewriter, Operation *target,
+transform::ApplyToEachResultList &results,
+transform::TransformState &state) {
+  scf::ForOp loop, result;
+  loop = dyn_cast(target);
+  bool convertSingleIterLoopsToIf = false;
+
+  if (getConvertSingleIterLoopsToIf())
+convertSingleIterLoopsToIf = true;
+
+  LogicalResult status = scf::continuousPeelForLoopAndSimplifyBounds(
+  rewriter, loop, result, convertSingleIterLoopsToIf);

matthias-springer wrote:

You can use `getConvertSingleIterLoopsToIf()` directly here.

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [libcxx] [lldb] [libc] [clang] [clang-tools-extra] [llvm] [lld] [compiler-rt] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-05 Thread Matthias Springer via cfe-commits


@@ -81,6 +81,47 @@ void naivelyFuseParallelOps(Region ®ion);
 LogicalResult peelForLoopAndSimplifyBounds(RewriterBase &rewriter, ForOp forOp,
scf::ForOp &partialIteration);
 
+/// Rewrite a for loop with bounds/step that potentially do not divide the
+/// iteration space evenly into a chain of for loops where the step is a
+/// power of 2 and decreases exponentially across subsequent loops.
+///
+/// E.g., assuming a lower bound of 0, the following loop
+/// ```
+/// scf.for %iv = %c0 to %ub step %c8 {
+///   (loop body)
+/// }
+/// ```
+/// is rewritten into the following pseudo IR:
+/// ```
+/// %newUb = %ub - (%ub mod %c8)
+/// scf.for %iv = %c0 to %newUb step %c8 {
+///   (loop body)
+/// }
+/// %newUb2 = %ub - (%ub mod %c4)
+/// scf.for %iv2 = %newUb to %newUb2 {

matthias-springer wrote:

Step size is missing.

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [lld] [libcxx] [lldb] [mlir] [clang-tools-extra] [compiler-rt] [libc] [llvm] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-05 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,165 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partiaIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value &splitBound) {
+  RewriterBase::InsertionGuard guard(b);
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto ubInt = getConstantIntValue(forOp.getUpperBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // No specialization necessary if step already divides upper bound evenly.
+  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
+return failure();
+  // No specialization necessary if step size is 1.
+  if (stepInt == static_cast(1))
+return failure();
+
+  // Create ForOp for partial iteration.
+  b.setInsertionPointAfter(forOp);
+  partialIteration = cast(b.clone(*forOp.getOperation()));
+  partialIteration.getLowerBoundMutable().assign(splitBound);
+  forOp.replaceAllUsesWith(partialIteration->getResults());
+  partialIteration.getInitArgsMutable().assign(forOp->getResults());
+
+  // Set new upper loop bound.
+  b.updateRootInPlace(
+  forOp, [&]() { forOp.getUpperBoundMutable().assign(splitBound); });
+
+  return success();
+}
+
+/// Convert single-iteration for loop to if-else block.
+static scf::IfOp convertSingleIterFor(RewriterBase &b, scf::ForOp &forOp) {
+  Location loc = forOp->getLoc();
+  IRMapping mapping;
+  mapping.map(forOp.getInductionVar(), forOp.getLowerBound());
+  for (auto [arg, operand] :
+   llvm::zip(forOp.getRegionIterArgs(), forOp.getInitsMutable())) {

matthias-springer wrote:

nit: `zip_equal`

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [libc] [mlir] [clang] [lld] [lldb] [compiler-rt] [libcxx] [llvm] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,167 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partiaIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value &splitBound) {
+  RewriterBase::InsertionGuard guard(b);
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto ubInt = getConstantIntValue(forOp.getUpperBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // No specialization necessary if step already divides upper bound evenly.
+  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
+return failure();
+  // No specialization necessary if step size is 1.
+  if (stepInt == static_cast(1))
+return failure();
+
+  // Create ForOp for partial iteration.
+  b.setInsertionPointAfter(forOp);
+  partialIteration = cast(b.clone(*forOp.getOperation()));
+  partialIteration.getLowerBoundMutable().assign(splitBound);
+  forOp.replaceAllUsesWith(partialIteration->getResults());
+  partialIteration.getInitArgsMutable().assign(forOp->getResults());
+
+  // Set new upper loop bound.
+  b.updateRootInPlace(
+  forOp, [&]() { forOp.getUpperBoundMutable().assign(splitBound); });
+
+  return success();
+}
+
+/// Convert single-iteration for loop to if-else block.
+static scf::IfOp convertSingleIterFor(RewriterBase &b, scf::ForOp &forOp) {
+  Location loc = forOp->getLoc();
+  IRMapping mapping;
+  mapping.map(forOp.getInductionVar(), forOp.getLowerBound());
+  for (auto [arg, operand] :
+   llvm::zip_equal(forOp.getRegionIterArgs(), forOp.getInitsMutable())) {
+mapping.map(arg, operand.get());
+  }
+  b.setInsertionPoint(forOp);
+  auto cond =
+  b.create(loc, arith::CmpIPredicate::slt,
+  forOp.getLowerBound(), forOp.getUpperBound());
+  auto ifOp = b.create(loc, forOp->getResultTypes(), cond, true);

matthias-springer wrote:

`/*withElseRegion=*/true`

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[libcxx] [mlir] [lldb] [llvm] [compiler-rt] [clang] [libc] [clang-tools-extra] [lld] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,167 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partiaIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value &splitBound) {

matthias-springer wrote:

Do not pass `splitBound` as reference. Otherwise it looks like a return value.

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [llvm] [libc] [libcxx] [clang-tools-extra] [clang] [compiler-rt] [lldb] [lld] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -0,0 +1,46 @@
+// RUN: mlir-opt %s 
-scf-for-loop-continuous-peeling=convert-single-iter-loops-to-if=true 
-split-input-file | FileCheck %s
+
+#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
+func.func @foo(%ub: index) -> index {
+  %c0 = arith.constant 0 : index
+  %step = arith.constant 8 : index
+  %0 = scf.for %iv = %c0 to %ub step %step iter_args(%arg = %c0) -> (index) {
+%1 = affine.min #map(%ub, %iv)[%step]
+%2 = index.add %1, %arg
+scf.yield %2 : index
+  }
+  return %0 : index
+}
+
+// CHECK: #[[MAP:.*]] = affine_map<()[s0, s1, s2] -> (s1 - s1 mod s2)>
+// CHECK: func.func @foo(%[[UB:.*]]: index) -> index {
+// CHECK: %[[STEP8:.*]] = arith.constant 8 : index
+// CHECK: %[[STEP4:.*]] = arith.constant 4 : index
+// CHECK: %[[STEP2:.*]] = arith.constant 2 : index
+// CHECK: %[[STEP1:.*]] = arith.constant 1 : index
+// CHECK: %[[LB:.*]] = arith.constant 0 : index
+// CHECK: %[[I0:.*]] = affine.apply #[[MAP]]()[%[[LB]], %[[UB]], %[[STEP8]]]
+// CHECK: %[[I1:.*]] = scf.for %{{.*}} = %[[LB]] to %[[I0]] step %[[STEP8]] 
iter_args(%[[ALB:.*]] = %[[LB]]) -> (index) {
+// CHECK: %[[SUM:.*]] = index.add %[[ALB]], %[[STEP8]]
+// CHECK: scf.yield %[[SUM]] : index
+// CHECK: %[[I2:.*]] = affine.apply #[[MAP]]()[%[[I0]], %[[UB]], %[[STEP4]]]

matthias-springer wrote:

Please indent for better readability.


https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [clang-tools-extra] [libcxx] [lld] [libc] [clang] [lldb] [llvm] [compiler-rt] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,167 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partiaIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value &splitBound) {
+  RewriterBase::InsertionGuard guard(b);
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto ubInt = getConstantIntValue(forOp.getUpperBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // No specialization necessary if step already divides upper bound evenly.
+  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
+return failure();
+  // No specialization necessary if step size is 1.
+  if (stepInt == static_cast(1))
+return failure();
+
+  // Create ForOp for partial iteration.
+  b.setInsertionPointAfter(forOp);
+  partialIteration = cast(b.clone(*forOp.getOperation()));
+  partialIteration.getLowerBoundMutable().assign(splitBound);
+  forOp.replaceAllUsesWith(partialIteration->getResults());
+  partialIteration.getInitArgsMutable().assign(forOp->getResults());
+
+  // Set new upper loop bound.
+  b.updateRootInPlace(
+  forOp, [&]() { forOp.getUpperBoundMutable().assign(splitBound); });
+
+  return success();
+}
+
+/// Convert single-iteration for loop to if-else block.
+static scf::IfOp convertSingleIterFor(RewriterBase &b, scf::ForOp &forOp) {
+  Location loc = forOp->getLoc();
+  IRMapping mapping;
+  mapping.map(forOp.getInductionVar(), forOp.getLowerBound());
+  for (auto [arg, operand] :
+   llvm::zip_equal(forOp.getRegionIterArgs(), forOp.getInitsMutable())) {
+mapping.map(arg, operand.get());
+  }
+  b.setInsertionPoint(forOp);
+  auto cond =
+  b.create(loc, arith::CmpIPredicate::slt,
+  forOp.getLowerBound(), forOp.getUpperBound());
+  auto ifOp = b.create(loc, forOp->getResultTypes(), cond, true);
+  // then branch
+  SmallVector bbArgReplacements;
+  bbArgReplacements.push_back(forOp.getLowerBound());
+  llvm::append_range(bbArgReplacements, forOp.getInitArgs());
+
+  b.inlineBlockBefore(forOp.getBody(), ifOp.thenBlock(),
+  ifOp.thenBlock()->begin(), bbArgReplacements);
+  // else branch
+  b.setInsertionPointToStart(ifOp.elseBlock());
+  if (!forOp->getResultTypes().empty()) {
+b.create(loc, forOp.getInits());
+  }
+  b.replaceOp(forOp, ifOp->getResults());
+  return ifOp;
+}
+
+/// Rewrite a for loop with bounds/step that potentially do not divide the
+/// iteration space evenly into a chain of for loops where the step is a
+/// power of 2 and decreases exponentially across subsequent loops. Helps
+/// divide the iteration space across all resulting peeled loops evenly.
+///
+/// Optionally, convert all single iteration for loops to if-else
+/// blocks when convert_single_iter_loops_to_if attribute is set to true or
+/// alternatively with the convert-single-iter-loops-to-if option for the
+/// scf-for-loop-continuous-peeling pass.
+static LogicalResult continuousPeelForLoop(RewriterBase &b, ForOp forOp,
+   ForOp &partialIteration,
+   bool convertSingleIterLoopsToIf) {
+
+  scf::ForOp currentLoop;
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // Step size must be a known positive constant greater than 1.
+  if (!stepInt || stepInt <= static_cast(1))
+return failure();
+
+  Value initialUb = forOp.getUpperBound();
+  Value initialStep = forOp.getStep();
+  uint64_t loopStep = *stepInt;
+  currentLoop = forOp;
+  AffineExpr sym0, sym1, sym2;
+  bindSymbols(b.getContext(), sym0, sym1, sym2);
+  AffineMap defaultSplitMap =
+  AffineMap::get(0, 3, {sym1 - ((sym1 - sym0) % sym2)});
+  AffineMap powerSplitMap = AffineMap::get(0, 3, {sym1 - (sym1 % sym2)});
+  bool usePowerSplit = (lbInt.has_value()) &&
+   (*lbInt % *stepInt == static_cast(0)) &&
+   (loopStep == llvm::bit_floor(loopStep));
+  AffineMap splitMap = usePowerSplit ? powerSplitMap : defaultSplitMap;
+  SmallVector loops;
+  while (loopStep) {
+b.setInsertionPoint(currentLoop);
+auto constStepOp =
+b.create(currentLoop.getLoc(), loopStep);
+b.updateRootInPlace(currentLoop, [&]() {
+  currentLoop.getStepMutable().assign(constStepOp);
+});
+b.setInsertionPoint(currentLoop);
+Value splitBound = b.createOrFold(
+currentLoop.getLoc(), splitMap,
+ValueRange{currentLoop.getLowerBound(), currentLoop.getUpperBound(),
+   currentLoop.getStep()});
+LogicalResult status =
+spl

[llvm] [libc] [lldb] [mlir] [libcxx] [clang] [clang-tools-extra] [compiler-rt] [lld] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,167 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partiaIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up.

matthias-springer wrote:

Explain the meaning of the return value.

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [libc] [clang-tools-extra] [libcxx] [clang] [llvm] [lld] [compiler-rt] [lldb] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,167 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partiaIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value &splitBound) {
+  RewriterBase::InsertionGuard guard(b);
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto ubInt = getConstantIntValue(forOp.getUpperBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // No specialization necessary if step already divides upper bound evenly.
+  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
+return failure();
+  // No specialization necessary if step size is 1.
+  if (stepInt == static_cast(1))
+return failure();
+
+  // Create ForOp for partial iteration.
+  b.setInsertionPointAfter(forOp);
+  partialIteration = cast(b.clone(*forOp.getOperation()));
+  partialIteration.getLowerBoundMutable().assign(splitBound);
+  forOp.replaceAllUsesWith(partialIteration->getResults());
+  partialIteration.getInitArgsMutable().assign(forOp->getResults());
+
+  // Set new upper loop bound.
+  b.updateRootInPlace(
+  forOp, [&]() { forOp.getUpperBoundMutable().assign(splitBound); });
+
+  return success();
+}
+
+/// Convert single-iteration for loop to if-else block.
+static scf::IfOp convertSingleIterFor(RewriterBase &b, scf::ForOp &forOp) {
+  Location loc = forOp->getLoc();
+  IRMapping mapping;
+  mapping.map(forOp.getInductionVar(), forOp.getLowerBound());
+  for (auto [arg, operand] :
+   llvm::zip_equal(forOp.getRegionIterArgs(), forOp.getInitsMutable())) {
+mapping.map(arg, operand.get());
+  }
+  b.setInsertionPoint(forOp);
+  auto cond =
+  b.create(loc, arith::CmpIPredicate::slt,
+  forOp.getLowerBound(), forOp.getUpperBound());
+  auto ifOp = b.create(loc, forOp->getResultTypes(), cond, true);
+  // then branch
+  SmallVector bbArgReplacements;
+  bbArgReplacements.push_back(forOp.getLowerBound());
+  llvm::append_range(bbArgReplacements, forOp.getInitArgs());
+
+  b.inlineBlockBefore(forOp.getBody(), ifOp.thenBlock(),
+  ifOp.thenBlock()->begin(), bbArgReplacements);
+  // else branch
+  b.setInsertionPointToStart(ifOp.elseBlock());
+  if (!forOp->getResultTypes().empty()) {
+b.create(loc, forOp.getInits());
+  }
+  b.replaceOp(forOp, ifOp->getResults());
+  return ifOp;
+}
+
+/// Rewrite a for loop with bounds/step that potentially do not divide the
+/// iteration space evenly into a chain of for loops where the step is a
+/// power of 2 and decreases exponentially across subsequent loops. Helps
+/// divide the iteration space across all resulting peeled loops evenly.
+///
+/// Optionally, convert all single iteration for loops to if-else
+/// blocks when convert_single_iter_loops_to_if attribute is set to true or
+/// alternatively with the convert-single-iter-loops-to-if option for the
+/// scf-for-loop-continuous-peeling pass.
+static LogicalResult continuousPeelForLoop(RewriterBase &b, ForOp forOp,
+   ForOp &partialIteration,
+   bool convertSingleIterLoopsToIf) {
+
+  scf::ForOp currentLoop;
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // Step size must be a known positive constant greater than 1.
+  if (!stepInt || stepInt <= static_cast(1))
+return failure();
+
+  Value initialUb = forOp.getUpperBound();
+  Value initialStep = forOp.getStep();
+  uint64_t loopStep = *stepInt;
+  currentLoop = forOp;
+  AffineExpr sym0, sym1, sym2;
+  bindSymbols(b.getContext(), sym0, sym1, sym2);
+  AffineMap defaultSplitMap =

matthias-springer wrote:

Add comment to explain why you have `defaultSplitMap` and `powerSplitMap`.

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [lld] [compiler-rt] [lldb] [libcxx] [clang-tools-extra] [mlir] [libc] [llvm] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -0,0 +1,46 @@
+// RUN: mlir-opt %s 
-scf-for-loop-continuous-peeling=convert-single-iter-loops-to-if=true 
-split-input-file | FileCheck %s
+
+#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
+func.func @foo(%ub: index) -> index {
+  %c0 = arith.constant 0 : index
+  %step = arith.constant 8 : index
+  %0 = scf.for %iv = %c0 to %ub step %step iter_args(%arg = %c0) -> (index) {

matthias-springer wrote:

Add another test case so that both `usePowerSplit = false` and `usePowerSplit = 
true` is exercised.

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [clang-tools-extra] [lld] [lldb] [clang] [libcxx] [llvm] [compiler-rt] [libc] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -189,16 +188,17 @@ LogicalResult 
scf::canonicalizeMinMaxOpInLoop(RewriterBase &rewriter,
 /// * Inside the peeled loop: min(step, ub - iv) == step
 /// * Inside the partial iteration: min(step, ub - iv) == ub - iv
 ///
-/// Returns `success` if the given operation was replaced by a new operation;
+/// Returns the new Affine op if the operation was replaced by a new operation;

matthias-springer wrote:

`new AffineApplyOp`

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[libcxx] [libc] [lldb] [clang-tools-extra] [llvm] [mlir] [compiler-rt] [lld] [clang] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,167 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partiaIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value &splitBound) {
+  RewriterBase::InsertionGuard guard(b);
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto ubInt = getConstantIntValue(forOp.getUpperBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // No specialization necessary if step already divides upper bound evenly.
+  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
+return failure();
+  // No specialization necessary if step size is 1.
+  if (stepInt == static_cast(1))
+return failure();
+
+  // Create ForOp for partial iteration.
+  b.setInsertionPointAfter(forOp);
+  partialIteration = cast(b.clone(*forOp.getOperation()));
+  partialIteration.getLowerBoundMutable().assign(splitBound);
+  forOp.replaceAllUsesWith(partialIteration->getResults());
+  partialIteration.getInitArgsMutable().assign(forOp->getResults());
+
+  // Set new upper loop bound.
+  b.updateRootInPlace(
+  forOp, [&]() { forOp.getUpperBoundMutable().assign(splitBound); });
+
+  return success();
+}
+
+/// Convert single-iteration for loop to if-else block.
+static scf::IfOp convertSingleIterFor(RewriterBase &b, scf::ForOp &forOp) {
+  Location loc = forOp->getLoc();
+  IRMapping mapping;
+  mapping.map(forOp.getInductionVar(), forOp.getLowerBound());
+  for (auto [arg, operand] :
+   llvm::zip_equal(forOp.getRegionIterArgs(), forOp.getInitsMutable())) {
+mapping.map(arg, operand.get());
+  }
+  b.setInsertionPoint(forOp);
+  auto cond =
+  b.create(loc, arith::CmpIPredicate::slt,
+  forOp.getLowerBound(), forOp.getUpperBound());
+  auto ifOp = b.create(loc, forOp->getResultTypes(), cond, true);
+  // then branch
+  SmallVector bbArgReplacements;
+  bbArgReplacements.push_back(forOp.getLowerBound());
+  llvm::append_range(bbArgReplacements, forOp.getInitArgs());
+
+  b.inlineBlockBefore(forOp.getBody(), ifOp.thenBlock(),
+  ifOp.thenBlock()->begin(), bbArgReplacements);
+  // else branch
+  b.setInsertionPointToStart(ifOp.elseBlock());
+  if (!forOp->getResultTypes().empty()) {
+b.create(loc, forOp.getInits());
+  }
+  b.replaceOp(forOp, ifOp->getResults());
+  return ifOp;
+}
+
+/// Rewrite a for loop with bounds/step that potentially do not divide the
+/// iteration space evenly into a chain of for loops where the step is a
+/// power of 2 and decreases exponentially across subsequent loops. Helps
+/// divide the iteration space across all resulting peeled loops evenly.
+///
+/// Optionally, convert all single iteration for loops to if-else
+/// blocks when convert_single_iter_loops_to_if attribute is set to true or
+/// alternatively with the convert-single-iter-loops-to-if option for the
+/// scf-for-loop-continuous-peeling pass.
+static LogicalResult continuousPeelForLoop(RewriterBase &b, ForOp forOp,
+   ForOp &partialIteration,
+   bool convertSingleIterLoopsToIf) {
+
+  scf::ForOp currentLoop;
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // Step size must be a known positive constant greater than 1.
+  if (!stepInt || stepInt <= static_cast(1))
+return failure();
+
+  Value initialUb = forOp.getUpperBound();
+  Value initialStep = forOp.getStep();
+  uint64_t loopStep = *stepInt;
+  currentLoop = forOp;
+  AffineExpr sym0, sym1, sym2;
+  bindSymbols(b.getContext(), sym0, sym1, sym2);
+  AffineMap defaultSplitMap =
+  AffineMap::get(0, 3, {sym1 - ((sym1 - sym0) % sym2)});
+  AffineMap powerSplitMap = AffineMap::get(0, 3, {sym1 - (sym1 % sym2)});
+  bool usePowerSplit = (lbInt.has_value()) &&
+   (*lbInt % *stepInt == static_cast(0)) &&
+   (loopStep == llvm::bit_floor(loopStep));
+  AffineMap splitMap = usePowerSplit ? powerSplitMap : defaultSplitMap;
+  SmallVector loops;
+  while (loopStep) {
+b.setInsertionPoint(currentLoop);
+auto constStepOp =
+b.create(currentLoop.getLoc(), loopStep);
+b.updateRootInPlace(currentLoop, [&]() {
+  currentLoop.getStepMutable().assign(constStepOp);
+});
+b.setInsertionPoint(currentLoop);
+Value splitBound = b.createOrFold(
+currentLoop.getLoc(), splitMap,
+ValueRange{currentLoop.getLowerBound(), currentLoop.getUpperBound(),
+   currentLoop.getStep()});
+LogicalResult status =
+spl

[clang-tools-extra] [lldb] [libcxx] [libc] [compiler-rt] [clang] [llvm] [mlir] [lld] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,167 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partiaIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value &splitBound) {
+  RewriterBase::InsertionGuard guard(b);
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto ubInt = getConstantIntValue(forOp.getUpperBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // No specialization necessary if step already divides upper bound evenly.
+  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
+return failure();
+  // No specialization necessary if step size is 1.
+  if (stepInt == static_cast(1))
+return failure();
+
+  // Create ForOp for partial iteration.
+  b.setInsertionPointAfter(forOp);
+  partialIteration = cast(b.clone(*forOp.getOperation()));
+  partialIteration.getLowerBoundMutable().assign(splitBound);
+  forOp.replaceAllUsesWith(partialIteration->getResults());
+  partialIteration.getInitArgsMutable().assign(forOp->getResults());
+
+  // Set new upper loop bound.
+  b.updateRootInPlace(
+  forOp, [&]() { forOp.getUpperBoundMutable().assign(splitBound); });
+
+  return success();
+}
+
+/// Convert single-iteration for loop to if-else block.
+static scf::IfOp convertSingleIterFor(RewriterBase &b, scf::ForOp &forOp) {
+  Location loc = forOp->getLoc();
+  IRMapping mapping;

matthias-springer wrote:

`mapping` is not used, can be deleted.

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[lldb] [clang-tools-extra] [compiler-rt] [mlir] [libcxx] [clang] [libc] [lld] [llvm] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,167 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partiaIteration)

matthias-springer wrote:

typo: partiaIteration

https://github.com/llvm/llvm-project/pull/71555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [lld] [lldb] [mlir] [clang] [libc] [clang-tools-extra] [compiler-rt] [libcxx] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)

2024-01-08 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,167 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partiaIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value &splitBound) {
+  RewriterBase::InsertionGuard guard(b);
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto ubInt = getConstantIntValue(forOp.getUpperBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // No specialization necessary if step already divides upper bound evenly.
+  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
+return failure();
+  // No specialization necessary if step size is 1.
+  if (stepInt == static_cast(1))
+return failure();
+
+  // Create ForOp for partial iteration.
+  b.setInsertionPointAfter(forOp);
+  partialIteration = cast(b.clone(*forOp.getOperation()));
+  partialIteration.getLowerBoundMutable().assign(splitBound);
+  forOp.replaceAllUsesWith(partialIteration->getResults());
+  partialIteration.getInitArgsMutable().assign(forOp->getResults());
+
+  // Set new upper loop bound.
+  b.updateRootInPlace(
+  forOp, [&]() { forOp.getUpperBoundMutable().assign(splitBound); });
+
+  return success();
+}
+
+/// Convert single-iteration for loop to if-else block.
+static scf::IfOp convertSingleIterFor(RewriterBase &b, scf::ForOp &forOp) {
+  Location loc = forOp->getLoc();
+  IRMapping mapping;
+  mapping.map(forOp.getInductionVar(), forOp.getLowerBound());
+  for (auto [arg, operand] :
+   llvm::zip_equal(forOp.getRegionIterArgs(), forOp.getInitsMutable())) {
+mapping.map(arg, operand.get());
+  }
+  b.setInsertionPoint(forOp);
+  auto cond =
+  b.create(loc, arith::CmpIPredicate::slt,
+  forOp.getLowerBound(), forOp.getUpperBound());
+  auto ifOp = b.create(loc, forOp->getResultTypes(), cond, true);
+  // then branch
+  SmallVector bbArgReplacements;
+  bbArgReplacements.push_back(forOp.getLowerBound());
+  llvm::append_range(bbArgReplacements, forOp.getInitArgs());
+
+  b.inlineBlockBefore(forOp.getBody(), ifOp.thenBlock(),
+  ifOp.thenBlock()->begin(), bbArgReplacements);
+  // else branch
+  b.setInsertionPointToStart(ifOp.elseBlock());
+  if (!forOp->getResultTypes().empty()) {
+b.create(loc, forOp.getInits());
+  }
+  b.replaceOp(forOp, ifOp->getResults());
+  return ifOp;
+}
+
+/// Rewrite a for loop with bounds/step that potentially do not divide the
+/// iteration space evenly into a chain of for loops where the step is a
+/// power of 2 and decreases exponentially across subsequent loops. Helps
+/// divide the iteration space across all resulting peeled loops evenly.
+///
+/// Optionally, convert all single iteration for loops to if-else
+/// blocks when convert_single_iter_loops_to_if attribute is set to true or
+/// alternatively with the convert-single-iter-loops-to-if option for the
+/// scf-for-loop-continuous-peeling pass.
+static LogicalResult continuousPeelForLoop(RewriterBase &b, ForOp forOp,
+   ForOp &partialIteration,
+   bool convertSingleIterLoopsToIf) {
+
+  scf::ForOp currentLoop;
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // Step size must be a known positive constant greater than 1.
+  if (!stepInt || stepInt <= static_cast(1))
+return failure();
+
+  Value initialUb = forOp.getUpperBound();
+  Value initialStep = forOp.getStep();
+  uint64_t loopStep = *stepInt;
+  currentLoop = forOp;
+  AffineExpr sym0, sym1, sym2;
+  bindSymbols(b.getContext(), sym0, sym1, sym2);
+  AffineMap defaultSplitMap =
+  AffineMap::get(0, 3, {sym1 - ((sym1 - sym0) % sym2)});
+  AffineMap powerSplitMap = AffineMap::get(0, 3, {sym1 - (sym1 % sym2)});
+  bool usePowerSplit = (lbInt.has_value()) &&
+   (*lbInt % *stepInt == static_cast(0)) &&
+   (loopStep == llvm::bit_floor(loopStep));
+  AffineMap splitMap = usePowerSplit ? powerSplitMap : defaultSplitMap;
+  SmallVector loops;
+  while (loopStep) {
+b.setInsertionPoint(currentLoop);
+auto constStepOp =
+b.create(currentLoop.getLoc(), loopStep);
+b.updateRootInPlace(currentLoop, [&]() {
+  currentLoop.getStepMutable().assign(constStepOp);
+});
+b.setInsertionPoint(currentLoop);
+Value splitBound = b.createOrFold(
+currentLoop.getLoc(), splitMap,
+ValueRange{currentLoop.getLowerBound(), currentLoop.getUpperBound(),
+   currentLoop.getStep()});
+LogicalResult status =
+spl

[mlir] [flang] [llvm] [clang] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #77328)

2024-01-10 Thread Matthias Springer via cfe-commits

matthias-springer wrote:

Can you re-open the old PR and force-push the contents of this PR to the old 
PR? Ideally, we'd keep using the old PR, so that we don't loose the review 
comments.


https://github.com/llvm/llvm-project/pull/77328
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [flang] [llvm] [clang] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #77328)

2024-01-10 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,161 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partialIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up. Returns failure if the loop can not
+/// be split and no new partialIteration is created.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value splitBound) {
+  RewriterBase::InsertionGuard guard(b);
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto ubInt = getConstantIntValue(forOp.getUpperBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // No specialization necessary if step already divides upper bound evenly.
+  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
+return failure();
+  // No specialization necessary if step size is 1.
+  if (stepInt == static_cast(1))
+return failure();
+
+  // Create ForOp for partial iteration.
+  b.setInsertionPointAfter(forOp);
+  IRMapping map;
+  auto constStepOp =
+  b.create(forOp.getLoc(), *stepInt / 2);
+  // The new for loop for the remaining iterations has half the step size
+  // as continuous peeling requires the step size to diminish exponentially
+  // across subsequent loops.
+  map.map(forOp.getStep(), constStepOp);

matthias-springer wrote:

I think this won't work. The SSA value of `forOp.getStep()` could be used in 
different ways inside of the loop and you don't want to change that.

E.g.:
```mlir
scf.for ... step %c16 {
  // This op should not be changed as part of loop peeling.
  "test.foo"(%c16) : () -> ()
}
```

What's the purpose of this `map.map`? Is it meant to canonicalize 
`affine.min/max` ops, taking into account the fact that the loop was peeled?


https://github.com/llvm/llvm-project/pull/77328
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[flang] [mlir] [llvm] [clang] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #77328)

2024-01-10 Thread Matthias Springer via cfe-commits


@@ -105,6 +106,161 @@ static void specializeForLoopForUnrolling(ForOp op) {
   op.erase();
 }
 
+/// Create a new for loop for the remaining iterations (partialIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up. Returns failure if the loop can not
+/// be split and no new partialIteration is created.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value splitBound) {
+  RewriterBase::InsertionGuard guard(b);
+  auto lbInt = getConstantIntValue(forOp.getLowerBound());
+  auto ubInt = getConstantIntValue(forOp.getUpperBound());
+  auto stepInt = getConstantIntValue(forOp.getStep());
+
+  // No specialization necessary if step already divides upper bound evenly.
+  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
+return failure();
+  // No specialization necessary if step size is 1.
+  if (stepInt == static_cast(1))
+return failure();
+
+  // Create ForOp for partial iteration.
+  b.setInsertionPointAfter(forOp);
+  IRMapping map;
+  auto constStepOp =
+  b.create(forOp.getLoc(), *stepInt / 2);
+  // The new for loop for the remaining iterations has half the step size
+  // as continuous peeling requires the step size to diminish exponentially
+  // across subsequent loops.
+  map.map(forOp.getStep(), constStepOp);

matthias-springer wrote:

Generally speaking, for whatever modification you make to the loop body, you 
have to be sure that the loop body is still computing the same thing as before. 
Just blanket replacing all occurrences of the old step size (even if it's just 
in `affine.min/max` ops) with the new step size may change the semantics of the 
loop.

The only safe way of canonicalizing the loop body that we have at the moment is 
`rewritePeeledMinMaxOp`. This function will look for `affine.min/max` ops and 
select one of the provided options if it can prove that doing so would always 
be correct in the peeled or partial loop.



https://github.com/llvm/llvm-project/pull/77328
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [compiler-rt] [clang-tools-extra] [clang] [llvm] [mlir][bufferization] Fix SimplifyClones with dealloc before cloneOp (PR #79098)

2024-01-25 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer closed 
https://github.com/llvm/llvm-project/pull/79098
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [mlir] [clang] [flang] [mlir][complex] Prevent underflow in complex.abs (PR #79786)

2024-01-29 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer approved this pull request.


https://github.com/llvm/llvm-project/pull/79786
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [clang] [flang] [mlir] [libc] [llvm] [compiler-rt] [MLIR] Setting MemorySpace During Bufferization + Fixes (PR #78484)

2024-01-22 Thread Matthias Springer via cfe-commits


@@ -351,6 +354,16 @@ struct BufferizationOptions {
   /// used.
   UnknownTypeConverterFn unknownTypeConverterFn = nullptr;
 
+  // Use during type conversion to determine the memory space for memref based
+  // on the originanl tensor type
+  GetMemorySpaceFn getMemorySpaceFn = nullptr;

matthias-springer wrote:

I think we don't need both `defaultMemorySpace` and `getMemorySpaceFn`. Can you 
rename this field to `defaultMemorySpaceFn` and give it a default lambda of 
`return Attribute();`? Also clarify in the documentation of the field that this 
lambda is only used when the memory space cannot be inferred.


https://github.com/llvm/llvm-project/pull/78484
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [MLIR][scf.parallel] Don't allow a tile size of 0 (PR #68762)

2023-10-22 Thread Matthias Springer via cfe-commits


@@ -0,0 +1,8 @@
+// RUN: mlir-opt %s 
-pass-pipeline='builtin.module(func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=0,0}))'
 -split-input-file -verify-diagnostics
+
+// XFAIL: *

matthias-springer wrote:

I didn't know that. Sounds good!

https://github.com/llvm/llvm-project/pull/68762
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [mlir][sparse] refine sparse fusion with empty tensors materialization (PR #66563)

2023-09-18 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer approved this pull request.


https://github.com/llvm/llvm-project/pull/66563
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852)

2023-10-06 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer updated 
https://github.com/llvm/llvm-project/pull/65852

>From d9d8bcbb98e8f5aecb9733329389d61a489bd731 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng 
Date: Sat, 9 Sep 2023 23:07:29 +0800
Subject: [PATCH 01/10] [InstCombine] Simplify the pattern `a ne/eq (zext (a
 ne/eq c))`

---
 .../InstCombine/InstCombineCompares.cpp   |  62 ++
 .../test/Transforms/InstCombine/icmp-range.ll | 181 ++
 2 files changed, 243 insertions(+)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9fdc46fec631679..837b8e6d2619989 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6309,7 +6309,69 @@ Instruction 
*InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) {
   Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE)
 return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y);
 
+  ICmpInst::Predicate Pred1, Pred2;
   const APInt *C;
+  // icmp eq/ne X, (zext (icmp eq/ne X, C))
+  if (match(&I, m_c_ICmp(Pred1, m_Value(X),
+ m_ZExt(m_ICmp(Pred2, m_Deferred(X), m_APInt(C) &&
+  ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) {
+if (C->isZero()) {
+  if (Pred2 == ICmpInst::ICMP_EQ) {
+// icmp eq X, (zext (icmp eq X, 0)) --> false
+// icmp ne X, (zext (icmp eq X, 0)) --> true
+return replaceInstUsesWith(
+I,
+Constant::getIntegerValue(
+I.getType(),
+APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE;
+  } else {
+// icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2
+// icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1
+return ICmpInst::Create(
+Instruction::ICmp,
+Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT
+   : ICmpInst::ICMP_ULT,
+X,
+Constant::getIntegerValue(
+X->getType(), APInt(X->getType()->getScalarSizeInBits(),
+Pred1 == ICmpInst::ICMP_NE ? 1 : 2)));
+  }
+} else if (C->isOne()) {
+  if (Pred2 == ICmpInst::ICMP_NE) {
+// icmp eq X, (zext (icmp ne X, 1)) --> false
+// icmp ne X, (zext (icmp ne X, 1)) --> true
+return replaceInstUsesWith(
+I,
+Constant::getIntegerValue(
+I.getType(),
+APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE;
+  } else {
+// icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2
+// icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1
+return ICmpInst::Create(
+Instruction::ICmp,
+Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT
+   : ICmpInst::ICMP_ULT,
+X,
+Constant::getIntegerValue(
+X->getType(), APInt(X->getType()->getScalarSizeInBits(),
+Pred1 == ICmpInst::ICMP_NE ? 1 : 2)));
+  }
+} else {
+  // C != 0 && C != 1
+  // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0
+  // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1
+  // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0
+  // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1
+  return ICmpInst::Create(
+  Instruction::ICmp, Pred1, X,
+  Constant::getIntegerValue(
+  X->getType(),
+  APInt(X->getType()->getScalarSizeInBits(),
+static_cast(Pred2 == ICmpInst::ICMP_NE;
+}
+  }
+
   if (match(I.getOperand(0), m_c_Add(m_ZExt(m_Value(X)), m_SExt(m_Value(Y 
&&
   match(I.getOperand(1), m_APInt(C)) &&
   X->getType()->isIntOrIntVectorTy(1) &&
diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll 
b/llvm/test/Transforms/InstCombine/icmp-range.ll
index 4281e09cb0309c8..15424fce33fdeea 100644
--- a/llvm/test/Transforms/InstCombine/icmp-range.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-range.ll
@@ -1034,6 +1034,187 @@ define i1 @icmp_ne_bool_1(ptr %ptr) {
   ret i1 %cmp
 }
 
+define i1 @icmp_ne_zext_eq_zero(i32 %a) {
+; CHECK-LABEL: @icmp_ne_zext_eq_zero(
+; CHECK-NEXT:ret i1 true
+;
+  %cmp = icmp eq i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  %cmp1 = icmp ne i32 %conv, %a
+  ret i1 %cmp1
+}
+
+define i1 @icmp_ne_zext_ne_zero(i32 %a) {
+; CHECK-LABEL: @icmp_ne_zext_ne_zero(
+; CHECK-NEXT:[[CMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1
+; CHECK-NEXT:ret i1 [[CMP1]]
+;
+  %cmp = icmp ne i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  %cmp1 = icmp ne i32 %conv, %a
+  ret i1 %cmp1
+}
+
+define i1 @icmp_eq_zext_eq_zero(i32 %a) {
+; CHECK-LABEL: @icmp_eq_zext_eq_zero(
+; CHECK-NEXT:ret i1 false
+;
+  %cmp = icmp eq i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  %cmp1 = icmp eq i32 %conv, %a
+  ret i1 %cmp1
+}
+
+define i1 @icmp_eq_zext_n

[clang-tools-extra] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852)

2023-10-06 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer updated 
https://github.com/llvm/llvm-project/pull/65852

>From d9d8bcbb98e8f5aecb9733329389d61a489bd731 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng 
Date: Sat, 9 Sep 2023 23:07:29 +0800
Subject: [PATCH 01/10] [InstCombine] Simplify the pattern `a ne/eq (zext (a
 ne/eq c))`

---
 .../InstCombine/InstCombineCompares.cpp   |  62 ++
 .../test/Transforms/InstCombine/icmp-range.ll | 181 ++
 2 files changed, 243 insertions(+)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9fdc46fec631679..837b8e6d2619989 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6309,7 +6309,69 @@ Instruction 
*InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) {
   Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE)
 return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y);
 
+  ICmpInst::Predicate Pred1, Pred2;
   const APInt *C;
+  // icmp eq/ne X, (zext (icmp eq/ne X, C))
+  if (match(&I, m_c_ICmp(Pred1, m_Value(X),
+ m_ZExt(m_ICmp(Pred2, m_Deferred(X), m_APInt(C) &&
+  ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) {
+if (C->isZero()) {
+  if (Pred2 == ICmpInst::ICMP_EQ) {
+// icmp eq X, (zext (icmp eq X, 0)) --> false
+// icmp ne X, (zext (icmp eq X, 0)) --> true
+return replaceInstUsesWith(
+I,
+Constant::getIntegerValue(
+I.getType(),
+APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE;
+  } else {
+// icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2
+// icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1
+return ICmpInst::Create(
+Instruction::ICmp,
+Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT
+   : ICmpInst::ICMP_ULT,
+X,
+Constant::getIntegerValue(
+X->getType(), APInt(X->getType()->getScalarSizeInBits(),
+Pred1 == ICmpInst::ICMP_NE ? 1 : 2)));
+  }
+} else if (C->isOne()) {
+  if (Pred2 == ICmpInst::ICMP_NE) {
+// icmp eq X, (zext (icmp ne X, 1)) --> false
+// icmp ne X, (zext (icmp ne X, 1)) --> true
+return replaceInstUsesWith(
+I,
+Constant::getIntegerValue(
+I.getType(),
+APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE;
+  } else {
+// icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2
+// icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1
+return ICmpInst::Create(
+Instruction::ICmp,
+Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT
+   : ICmpInst::ICMP_ULT,
+X,
+Constant::getIntegerValue(
+X->getType(), APInt(X->getType()->getScalarSizeInBits(),
+Pred1 == ICmpInst::ICMP_NE ? 1 : 2)));
+  }
+} else {
+  // C != 0 && C != 1
+  // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0
+  // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1
+  // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0
+  // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1
+  return ICmpInst::Create(
+  Instruction::ICmp, Pred1, X,
+  Constant::getIntegerValue(
+  X->getType(),
+  APInt(X->getType()->getScalarSizeInBits(),
+static_cast(Pred2 == ICmpInst::ICMP_NE;
+}
+  }
+
   if (match(I.getOperand(0), m_c_Add(m_ZExt(m_Value(X)), m_SExt(m_Value(Y 
&&
   match(I.getOperand(1), m_APInt(C)) &&
   X->getType()->isIntOrIntVectorTy(1) &&
diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll 
b/llvm/test/Transforms/InstCombine/icmp-range.ll
index 4281e09cb0309c8..15424fce33fdeea 100644
--- a/llvm/test/Transforms/InstCombine/icmp-range.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-range.ll
@@ -1034,6 +1034,187 @@ define i1 @icmp_ne_bool_1(ptr %ptr) {
   ret i1 %cmp
 }
 
+define i1 @icmp_ne_zext_eq_zero(i32 %a) {
+; CHECK-LABEL: @icmp_ne_zext_eq_zero(
+; CHECK-NEXT:ret i1 true
+;
+  %cmp = icmp eq i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  %cmp1 = icmp ne i32 %conv, %a
+  ret i1 %cmp1
+}
+
+define i1 @icmp_ne_zext_ne_zero(i32 %a) {
+; CHECK-LABEL: @icmp_ne_zext_ne_zero(
+; CHECK-NEXT:[[CMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1
+; CHECK-NEXT:ret i1 [[CMP1]]
+;
+  %cmp = icmp ne i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  %cmp1 = icmp ne i32 %conv, %a
+  ret i1 %cmp1
+}
+
+define i1 @icmp_eq_zext_eq_zero(i32 %a) {
+; CHECK-LABEL: @icmp_eq_zext_eq_zero(
+; CHECK-NEXT:ret i1 false
+;
+  %cmp = icmp eq i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  %cmp1 = icmp eq i32 %conv, %a
+  ret i1 %cmp1
+}
+
+define i1 @icmp_eq_zext_n

[clang] [llvm] [mlir] [TableGen] Add const variants of accessors for backend (PR #106658)

2024-09-05 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer approved this pull request.


https://github.com/llvm/llvm-project/pull/106658
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [mlir] [TableGen] Add const variants of accessors for backend (PR #106658)

2024-09-02 Thread Matthias Springer via cfe-commits


@@ -189,7 +189,7 @@ static StringRef NormalizeGNUAttrSpelling(StringRef 
AttrSpelling) {
 
 typedef std::vector> ParsedAttrMap;
 
-static ParsedAttrMap getParsedAttrList(const RecordKeeper &Records,

matthias-springer wrote:

I expected to see more `const` instead of less. Why is this here (and below) 
passes as non-const?


https://github.com/llvm/llvm-project/pull/106658
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [mlir] [NFC][MLIR] Fix some typos (PR #108355)

2024-09-12 Thread Matthias Springer via cfe-commits


@@ -16,7 +16,7 @@ like Toy to get the information they need.
 
 MLIR provides a set of always available-hooks for certain core transformations,
 as seen in the [previous chapter](Ch-3.md), where we registered some
-canonicalizations via a hook on our operations (`getCanonicalizationPatterns`).

matthias-springer wrote:

Not sure if `canonicalization` has a plural. In practice we often say 
`canonicalizations` though.

https://github.com/llvm/llvm-project/pull/108355
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [mlir] [NFC][MLIR] Fix some typos (PR #108355)

2024-09-12 Thread Matthias Springer via cfe-commits


@@ -121,7 +121,7 @@ struct EmulateWideIntPass final
 [&typeConverter](Operation *op) { return typeConverter.isLegal(op); });
 
 RewritePatternSet patterns(ctx);
-// Add common pattenrs to support contants, functions, etc.

matthias-springer wrote:

I think think should be `constants`. Also there's a typo in `patterns`.

https://github.com/llvm/llvm-project/pull/108355
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [mlir] [NFC][MLIR] Fix some typos (PR #108355)

2024-09-12 Thread Matthias Springer via cfe-commits


@@ -14,7 +14,7 @@
   "name": "punctuation.definition.string.begin.cpp"
 },
 "1": {
-  "name": "mlir.delimeter.raw.string.cpp"

matthias-springer wrote:

I'm not sure if this is safe to change...

https://github.com/llvm/llvm-project/pull/108355
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [mlir] [NFC][MLIR] Fix some typos (PR #108355)

2024-09-12 Thread Matthias Springer via cfe-commits


@@ -316,7 +316,7 @@ static omp::DeclareReductionOp 
declareReduction(PatternRewriter &builder,
   reduction, {arith::CmpFPredicate::OLT, arith::CmpFPredicate::OLE},
   {arith::CmpFPredicate::OGT, arith::CmpFPredicate::OGE}, isMin) ||
   matchSelectReduction(
-  reduction, {LLVM::FCmpPredicate::olt, LLVM::FCmpPredicate::ole},

matthias-springer wrote:

Same here and in other places.

https://github.com/llvm/llvm-project/pull/108355
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [mlir] [NFC][MLIR] Fix some typos (PR #108355)

2024-09-12 Thread Matthias Springer via cfe-commits


@@ -239,7 +239,7 @@ std::optional 
mlir::detail::getDefaultIndexBitwidth(
 
 // Returns the endianness if specified in the given entry. If the entry is 
empty
 // the default endianness represented by an empty attribute is returned.
-Attribute mlir::detail::getDefaultEndianness(DataLayoutEntryInterface entry) {
+Attribute mlir::detail::getDefaultEndiannesss(DataLayoutEntryInterface entry) {

matthias-springer wrote:

Extra `s`?

https://github.com/llvm/llvm-project/pull/108355
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [mlir] [NFC][MLIR] Fix some typos (PR #108355)

2024-09-12 Thread Matthias Springer via cfe-commits


@@ -335,7 +335,7 @@ SPIR-V Dialect op| LLVM Dialect op
 `spirv.FOrdEqual`  | `llvm.fcmp "oeq"`
 `spirv.FOrdGreaterThan`| `llvm.fcmp "ogt"`
 `spirv.FOrdGreaterThanEqual`   | `llvm.fcmp "oge"`
-`spirv.FOrdLessThan`   | `llvm.fcmp "olt"`

matthias-springer wrote:

Not a typo.

https://github.com/llvm/llvm-project/pull/108355
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [mlir] [NFC][MLIR] Fix some typos (PR #108355)

2024-09-12 Thread Matthias Springer via cfe-commits


@@ -41,7 +41,7 @@ def Arith_CmpFPredicateAttr : I64EnumAttr<
   I64EnumAttrCase<"OEQ", 1, "oeq">,
   I64EnumAttrCase<"OGT", 2, "ogt">,
   I64EnumAttrCase<"OGE", 3, "oge">,
-  I64EnumAttrCase<"OLT", 4, "olt">,

matthias-springer wrote:

This is not a typo

https://github.com/llvm/llvm-project/pull/108355
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [llvm][NFC] `APFloat`: Add missing semantics to enum (PR #117291)

2024-11-21 Thread Matthias Springer via cfe-commits

matthias-springer wrote:

There are various places in `APFloat.h` and `APFloat.cpp` that contain 
switch-case / if-check sequences for checking the type of semantics. I'm trying 
to reduce these. What I had in mind:

* Removing `APFloatBase::IEEEhalf()`, `APFloatBase::BFloat()`, ..., and use a 
single `APFloatBase::getSemantics` instead. (But there are a lot of places that 
call `IEEEHalf` etc.)
* Add function pointers `APInt (*convertToAPInt)(const APFloat &)` and `APFloat 
(*initFromAPInt)(const APInt &)` to `fltSemantics`. Each semantics can specify 
how to convert between APFloat and APInt. No more switch case statements in 
`IEEEFloat::bitcastToAPInt()` etc.
* Longer term: Turn `fltSemantics` into a public class that can be used to 
define custom floating-point types in downstream projects (without having to 
modify LLVM).

Any thoughts?

https://github.com/llvm/llvm-project/pull/117291
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [llvm][NFC] Add missing semantics to enum (PR #117291)

2024-11-21 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer created 
https://github.com/llvm/llvm-project/pull/117291

* Add missing semantics to the `Semantics` enum.
* Move all documentation of the different semantics to the header file.
* Rename the `EnumToSemanatics` to `getSemantics`.
* Store enum value in `fltSemantics` so that there's one fewer place that must 
be updated when adding a new semantics.


>From 35848f59ef8a680e0ed264c5fb03edb7d4a6d6ff Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Fri, 22 Nov 2024 07:03:17 +0100
Subject: [PATCH] [llvm][NFC] Add missing semantics to enum

Add missing semantics to the `Semantics` enum. Move all documentation to the 
header file. Rename the `EnumToSemanatics` to `getSemantics`.
---
 clang/include/clang/AST/Expr.h|   2 +-
 clang/include/clang/AST/PropertiesBase.td |   4 +-
 llvm/include/llvm/ADT/APFloat.h   |  43 ++-
 llvm/lib/Support/APFloat.cpp  | 144 +++---
 llvm/unittests/ADT/APFloatTest.cpp|  34 ++---
 5 files changed, 105 insertions(+), 122 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 708c8656decbe0..22ce7bcbe181e3 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -1672,7 +1672,7 @@ class FloatingLiteral : public Expr, private 
APFloatStorage {
 
   /// Return the APFloat semantics this literal uses.
   const llvm::fltSemantics &getSemantics() const {
-return llvm::APFloatBase::EnumToSemantics(
+return llvm::APFloatBase::getSemantics(
 static_cast(
 FloatingLiteralBits.Semantics));
   }
diff --git a/clang/include/clang/AST/PropertiesBase.td 
b/clang/include/clang/AST/PropertiesBase.td
index 42883b6419261c..7e417c7ef8fb24 100644
--- a/clang/include/clang/AST/PropertiesBase.td
+++ b/clang/include/clang/AST/PropertiesBase.td
@@ -282,7 +282,7 @@ let Class = PropertyTypeCase in {
 let Read = [{ node.getFloat().bitcastToAPInt() }];
   }
   def : Creator<[{
-const llvm::fltSemantics &floatSema = llvm::APFloatBase::EnumToSemantics(
+const llvm::fltSemantics &floatSema = llvm::APFloatBase::getSemantics(
 static_cast(semantics));
 return APValue(llvm::APFloat(floatSema, value));
   }]>;
@@ -324,7 +324,7 @@ let Class = PropertyTypeCase in {
 let Read = [{ node.getComplexFloatImag().bitcastToAPInt() }];
   }
   def : Creator<[{
-const llvm::fltSemantics &sema = llvm::APFloatBase::EnumToSemantics(
+const llvm::fltSemantics &sema = llvm::APFloatBase::getSemantics(
 static_cast(semantics));
 return APValue(llvm::APFloat(sema, real),
llvm::APFloat(sema, imag));
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 4ca928bf4f49e3..3c90feeb16ae51 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -155,6 +155,15 @@ struct APFloatBase {
 S_IEEEsingle,
 S_IEEEdouble,
 S_IEEEquad,
+// The IBM double-double semantics. Such a number consists of a pair of
+// IEEE 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
+// (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
+// Therefore it has two 53-bit mantissa parts that aren't necessarily
+// adjacent to each other, and two 11-bit exponents.
+//
+// Note: we need to make the value different from semBogus as otherwise
+// an unsafe optimization may collapse both values to a single address,
+// and we heavily rely on them having distinct addresses.
 S_PPCDoubleDouble,
 // 8-bit floating point number following IEEE-754 conventions with bit
 // layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.
@@ -214,13 +223,41 @@ struct APFloatBase {
 // types, there are no infinity or NaN values. The format is detailed in
 // 
https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
 S_Float4E2M1FN,
-
+// TODO: Documentation is missing.
 S_x87DoubleExtended,
-S_MaxSemantics = S_x87DoubleExtended,
+// These are legacy semantics for the fallback, inaccrurate implementation
+// of IBM double-double, if the accurate semPPCDoubleDouble doesn't handle
+// the operation. It's equivalent to having an IEEE number with consecutive
+// 106 bits of mantissa and 11 bits of exponent.
+//
+// It's not equivalent to IBM double-double. For example, a legit IBM
+// double-double, 1 + epsilon:
+//
+// 1 + epsilon = 1 + (1 >> 1076)
+//
+// is not representable by a consecutive 106 bits of mantissa.
+//
+// Currently, these semantics are used in the following way:
+//
+//   semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
+//   (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
+//   semPPCDoubleDoubleLegacy -> IEEE operations
+//
+// We use bitcastToAPInt() to get the bit representation (in APInt) of the
+// underlying IEEEdouble, then use the 

[clang] [llvm] [llvm][NFC] `APFloat`: Add missing semantics to enum (PR #117291)

2024-11-21 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer edited 
https://github.com/llvm/llvm-project/pull/117291
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang-tools-extra] [flang] [lldb] [llvm] [mlir] [experiment] Make `fltSemantics` public (PR #123374)

2025-01-17 Thread Matthias Springer via cfe-commits

https://github.com/matthias-springer created 
https://github.com/llvm/llvm-project/pull/123374

None

>From f595dfc75253a3ca80196f6e7f5fb38ca6d82376 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Fri, 17 Jan 2025 18:08:14 +0100
Subject: [PATCH] [experiment] Make `fltSemantics` public

---
 .../bugprone/NarrowingConversionsCheck.cpp|   4 +-
 clang/include/clang/AST/OptionalDiagnostic.h  |   2 +-
 .../CIR/Interfaces/CIRFPTypeInterface.td  |   4 +-
 clang/lib/AST/ByteCode/Floating.h |   6 +-
 clang/lib/CodeGen/CGExprComplex.cpp   |   4 +-
 clang/lib/CodeGen/PatternInit.cpp |   4 +-
 clang/lib/Sema/SemaChecking.cpp   |   6 +-
 clang/lib/Sema/SemaExpr.cpp   |   8 +-
 .../Checkers/ConversionChecker.cpp|   2 +-
 flang/lib/Optimizer/Dialect/FIRAttr.cpp   |   2 +-
 .../TypeSystem/Clang/TypeSystemClang.cpp  |   3 +-
 llvm/include/llvm/ADT/APFloat.h   | 126 +++-
 llvm/lib/Analysis/ValueTracking.cpp   |   2 +-
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  10 +-
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |  12 +-
 llvm/lib/Support/APFloat.cpp  | 538 +++---
 llvm/lib/Support/Z3Solver.cpp |  14 +-
 .../Target/AArch64/AArch64ISelLowering.cpp|   2 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |   6 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp   |   2 +-
 .../InstCombine/InstCombineCasts.cpp  |   2 +-
 .../InstCombine/InstructionCombining.cpp  |   2 +-
 llvm/lib/Transforms/Scalar/Float2Int.cpp  |   4 +-
 .../Transforms/Utils/FunctionComparator.cpp   |  12 +-
 llvm/unittests/ADT/APFloatTest.cpp|  20 +-
 llvm/unittests/IR/ConstantFPRangeTest.cpp |  10 +-
 mlir/lib/AsmParser/Parser.cpp |   2 +-
 .../Conversion/TosaToLinalg/TosaToLinalg.cpp  |   2 +-
 mlir/lib/IR/BuiltinTypeInterfaces.cpp |   4 +-
 29 files changed, 399 insertions(+), 416 deletions(-)

diff --git 
a/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp 
b/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp
index bafcd402ca8510..aa868cfe68c1ae 100644
--- a/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp
@@ -244,8 +244,8 @@ struct IntegerRange {
 static IntegerRange createFromType(const ASTContext &Context,
const BuiltinType &T) {
   if (T.isFloatingPoint()) {
-unsigned PrecisionBits = llvm::APFloatBase::semanticsPrecision(
-Context.getFloatTypeSemantics(T.desugar()));
+unsigned PrecisionBits = 
+Context.getFloatTypeSemantics(T.desugar()).precision;
 // Contrary to two's complement integer, floating point values are
 // symmetric and have the same number of positive and negative values.
 // The range of valid integers for a floating point value is:
diff --git a/clang/include/clang/AST/OptionalDiagnostic.h 
b/clang/include/clang/AST/OptionalDiagnostic.h
index c9a2d19f4ebce0..784a006072be37 100644
--- a/clang/include/clang/AST/OptionalDiagnostic.h
+++ b/clang/include/clang/AST/OptionalDiagnostic.h
@@ -54,7 +54,7 @@ class OptionalDiagnostic {
   // APFloat::toString would automatically print the shortest
   // representation which rounds to the correct value, but it's a bit
   // tricky to implement. Could use std::to_chars.
-  unsigned precision = llvm::APFloat::semanticsPrecision(F.getSemantics());
+  unsigned precision = F.getSemantics().precision;
   precision = (precision * 59 + 195) / 196;
   SmallVector Buffer;
   F.toString(Buffer, precision);
diff --git a/clang/include/clang/CIR/Interfaces/CIRFPTypeInterface.td 
b/clang/include/clang/CIR/Interfaces/CIRFPTypeInterface.td
index 973851b61444f0..cec3dc615d1d43 100644
--- a/clang/include/clang/CIR/Interfaces/CIRFPTypeInterface.td
+++ b/clang/include/clang/CIR/Interfaces/CIRFPTypeInterface.td
@@ -30,7 +30,7 @@ def CIRFPTypeInterface : TypeInterface<"CIRFPTypeInterface"> {
   /*args=*/(ins),
   /*methodBody=*/"",
   /*defaultImplementation=*/[{
-  return 
llvm::APFloat::semanticsSizeInBits($_type.getFloatSemantics());
+  return $_type.getFloatSemantics().sizeInBits;
 }]
 >,
 InterfaceMethod<[{
@@ -41,7 +41,7 @@ def CIRFPTypeInterface : TypeInterface<"CIRFPTypeInterface"> {
   /*args=*/(ins),
   /*methodBody=*/"",
   /*defaultImplementation=*/[{
-  return llvm::APFloat::semanticsPrecision($_type.getFloatSemantics());
+  return $_type.getFloatSemantics().precision;
 }]
 >,
 InterfaceMethod<[{
diff --git a/clang/lib/AST/ByteCode/Floating.h 
b/clang/lib/AST/ByteCode/Floating.h
index 3a874fc6f0b412..123d546c5fdbd9 100644
--- a/clang/lib/AST/ByteCode/Floating.h
+++ b/clang/lib/AST/ByteCode/Floating.h
@@ -83,7 +83,7 @@ class Floating final {
 return NameStr;
   }
 
-  unsign