https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/71528
>From 754519ad9b37343c827504e7d6bfcfa590f69483 Mon Sep 17 00:00:00 2001 From: Graham Hunter <graham.hun...@arm.com> Date: Fri, 3 Nov 2023 14:22:57 +0000 Subject: [PATCH] [InstCombine] Fold converted urem to 0 if there's no overlapping bits When folding urem instructions we can end up not recognizing that the output will always be 0 due to Value*s being different, despite generating the same data (in this case, 2 different calls to vscale). This patch recognizes the (x << N) & (add (x << M), -1) pattern that instcombine replaces urem with after the two vscale calls have been reduced to one via CSE, then replaces with 0 when x is a non-zero power of 2 and N >= M. --- .../InstCombine/InstCombineAndOrXor.cpp | 10 ++++ .../InstCombine/po2-shift-add-and-to-zero.ll | 52 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/po2-shift-add-and-to-zero.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 46af9bf5eed003a..da38f8039dbc3ca 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2662,6 +2662,16 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { if (sinkNotIntoOtherHandOfLogicalOp(I)) return &I; + // (x << N) & (add (x << M), -1) --> 0, where x is known to be a non-zero + // power of 2 and M <= N. + const APInt *Shift1, *Shift2; + if (match(&I, m_c_And(m_OneUse(m_Shl(m_Value(X), m_APInt(Shift1))), + m_OneUse(m_Add(m_Shl(m_Value(Y), m_APInt(Shift2)), + m_AllOnes())))) && + X == Y && isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, 0, &I) && + Shift1->uge(*Shift2)) + return replaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // An and recurrence w/loop invariant step is equivelent to (and start, step) PHINode *PN = nullptr; Value *Start = nullptr, *Step = nullptr; diff --git a/llvm/test/Transforms/InstCombine/po2-shift-add-and-to-zero.ll b/llvm/test/Transforms/InstCombine/po2-shift-add-and-to-zero.ll new file mode 100644 index 000000000000000..4979e7a01972299 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/po2-shift-add-and-to-zero.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -mtriple unknown -passes=instcombine -S < %s | FileCheck %s + +;; The and X, (add Y, -1) pattern is from an earlier instcombine pass which +;; converted + +;; define dso_local i64 @f1() local_unnamed_addr #0 { +;; entry: +;; %0 = call i64 @llvm.aarch64.sve.cntb(i32 31) +;; %1 = call i64 @llvm.aarch64.sve.cnth(i32 31) +;; %rem = urem i64 %0, %1 +;; ret i64 %rem +;; } + +;; into + +;; define dso_local i64 @f1() local_unnamed_addr #0 { +;; entry: +;; %0 = call i64 @llvm.vscale.i64() +;; %1 = shl nuw nsw i64 %0, 4 +;; %2 = call i64 @llvm.vscale.i64() +;; %3 = shl nuw nsw i64 %2, 3 +;; %4 = add nsw i64 %3, -1 +;; %rem = and i64 %1, %4 +;; ret i64 %rem +;; } + +;; InstCombine would have folded the original to returning 0 if the vscale +;; calls were the same Value*, but since there's two of them it doesn't +;; work and we convert the urem to add/and. CSE then gets rid of the extra +;; vscale, leaving us with a new pattern to match. This only works because +;; vscale is known to be a nonzero power of 2 (assuming there's a defined +;; range for it). + +define dso_local i64 @f1() local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local i64 @f1 +; CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i64 0 +; +entry: + %0 = call i64 @llvm.vscale.i64() + %1 = shl nuw nsw i64 %0, 4 + %2 = shl nuw nsw i64 %0, 3 + %3 = add nsw i64 %2, -1 + %rem = and i64 %1, %3 + ret i64 %rem +} + +declare i64 @llvm.vscale.i64() + +attributes #0 = { vscale_range(1,16) } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits