Author: Roman Lebedev Date: 2020-12-24T21:20:52+03:00 New Revision: 25aebe2ccfb4622b17494c5cfdb2b422c93cee4d
URL: https://github.com/llvm/llvm-project/commit/25aebe2ccfb4622b17494c5cfdb2b422c93cee4d DIFF: https://github.com/llvm/llvm-project/commit/25aebe2ccfb4622b17494c5cfdb2b422c93cee4d.diff LOG: [LoopIdiom] 'left-shift-until-bittest': keep no-wrap flags on shift, fix edge-case miscompilation for %x.next While `%x.curr` is always safe to compute, because `LoopBackedgeTakenCount` will always be smaller than `bitwidth(X)`, i.e. we never get poison, rewriting `%x.next` is more complicated, however, because `X << LoopTripCount` will be poison iff `LoopTripCount == bitwidth(X)` (which will happen iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know that isn't the case (as alive2 notes, we know it's safe to do iff shift had no-wrap flags, or bitpos does not indicate signbit, or we know that %x is never `1`), we'll need to emit an alternative, safe IR, by either just shifting the `%x.curr`, or conditionally selecting between the computed `%x.next` and `0`.. Former IR looks better so let's do that. While there, ensure that we don't drop no-wrap flags from said shift. Added: Modified: llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll Removed: ################################################################################ diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 9ab896f58141..3612f8cc1a71 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1963,7 +1963,7 @@ inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) { /// \endcode static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, - Value *&CurrX, Value *&NextX) { + Value *&CurrX, Instruction *&NextX) { LLVM_DEBUG(dbgs() << DEBUG_TYPE " Performing shift-until-bittest idiom detection.\n"); @@ -2030,9 +2030,10 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, } BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB); - NextX = CurrXPN->getIncomingValueForBlock(LoopHeaderBB); + NextX = + dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB)); - if (!match(NextX, m_Shl(m_Specific(CurrX), m_One()))) { + if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) { // FIXME: support right-shift? LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n"); return false; @@ -2113,7 +2114,8 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { bool MadeChange = false; - Value *X, *BitMask, *BitPos, *XCurr, *XNext; + Value *X, *BitMask, *BitPos, *XCurr; + Instruction *XNext; if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr, XNext)) { LLVM_DEBUG(dbgs() << DEBUG_TYPE @@ -2163,9 +2165,8 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { // Step 1: Compute the loop trip count. - Value *LowBitMask = - Builder.CreateAdd(BitMask, Constant::getAllOnesValue(BitMask->getType()), - BitPos->getName() + ".lowbitmask"); + Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty), + BitPos->getName() + ".lowbitmask"); Value *Mask = Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask"); Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked"); @@ -2173,11 +2174,11 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()}, /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros"); Value *XMaskedNumActiveBits = Builder.CreateSub( - ConstantInt::get(X->getType(), X->getType()->getScalarSizeInBits()), - XMaskedNumLeadingZeros, XMasked->getName() + ".numactivebits"); - Value *XMaskedLeadingOnePos = Builder.CreateAdd( - XMaskedNumActiveBits, Constant::getAllOnesValue(BitMask->getType()), - XMasked->getName() + ".leadingonepos"); + ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros, + XMasked->getName() + ".numactivebits"); + Value *XMaskedLeadingOnePos = + Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty), + XMasked->getName() + ".leadingonepos"); Value *LoopBackedgeTakenCount = Builder.CreateSub( BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount"); @@ -2189,11 +2190,34 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { // Step 2: Compute the recurrence's final value without a loop. + // NewX is always safe to compute, because `LoopBackedgeTakenCount` + // will always be smaller than `bitwidth(X)`, i.e. we never get poison. Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount); NewX->takeName(XCurr); + if (auto *I = dyn_cast<Instruction>(NewX)) + I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true); + + Value *NewXNext; + // Rewriting XNext is more complicated, however, because `X << LoopTripCount` + // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen + // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know + // that isn't the case, we'll need to emit an alternative, safe IR. + if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() || + PatternMatch::match( + BitPos, PatternMatch::m_SpecificInt_ICMP( + ICmpInst::ICMP_NE, APInt(Ty->getScalarSizeInBits(), + Ty->getScalarSizeInBits() - 1)))) + NewXNext = Builder.CreateShl(X, LoopTripCount); + else { + // Otherwise, just additionally shift by one. It's the smallest solution, + // alternatively, we could check that NewX is INT_MIN (or BitPos is ) + // and select 0 instead. + NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1)); + } - Value *NewXNext = Builder.CreateShl(X, LoopTripCount); NewXNext->takeName(XNext); + if (auto *I = dyn_cast<Instruction>(NewXNext)) + I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true); // Step 3: Adjust the successor basic block to recieve the computed // recurrence's final value instead of the recurrence itself. diff --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll index ac328a507c08..17ff7fc7663b 100644 --- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll @@ -28,7 +28,7 @@ define i32 @p0_i32(i32 %x, i32 %bit) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG17]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG17]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG17]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG17]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG17]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG18:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG17]] @@ -96,7 +96,7 @@ define i16 @p1_i16(i16 %x, i16 %bit) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i16 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG33]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i16 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG33]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i16 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG33]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i16 [[X]], [[LOOP_TRIPCOUNT]], [[DBG33]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i16 [[X_CURR]], 1, [[DBG33]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG34:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG33]] @@ -164,7 +164,7 @@ define i32 @p2_ diff erent_liveout(i32 %x, i32 %bit) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG48]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG48]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG48]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG48]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG48]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG49:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG48]] @@ -368,8 +368,8 @@ define void @p5_nuw(i32 %x, i32 %bit, i32* %p0, i32* %p1) { ; LZCNT-NEXT: [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG93]] ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG93]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG93]] -; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG93]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG93]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl nuw i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG93]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl nuw i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG93]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG94:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG93]] @@ -442,8 +442,8 @@ define void @p6_nsw(i32 %x, i32 %bit, i32* %p0, i32* %p1) { ; LZCNT-NEXT: [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG110]] ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG110]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG110]] -; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG110]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG110]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl nsw i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG110]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl nsw i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG110]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG111:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG110]] @@ -516,8 +516,8 @@ define void @p7_nuwnsw(i32 %x, i32 %bit, i32* %p0, i32* %p1) { ; LZCNT-NEXT: [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG127]] ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG127]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG127]] -; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG127]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG127]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl nuw nsw i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG127]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl nuw nsw i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG127]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG128:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG127]] @@ -587,7 +587,7 @@ define void @p8_constant_mask_signbit_noncanonical(i32 %x, i32* %p0, i32* %p1) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG142]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG142]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG142]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG142]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG142]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG143:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG142]] @@ -654,7 +654,7 @@ define void @p9_constant_mask_signbit_canonical(i32 %x, i32* %p0, i32* %p1) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG156]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG156]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG156]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG156]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG156]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG157:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG156]] @@ -721,7 +721,7 @@ define void @p10_x_is_not_one(i32 %bit, i32* %p0, i32* %p1) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[DOTMASKED_LEADINGONEPOS]], [[DBG172]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG172]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 2, [[LOOP_BACKEDGETAKENCOUNT]], [[DBG172]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 2, [[LOOP_TRIPCOUNT]], [[DBG172]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG172]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG173:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG172]] @@ -797,7 +797,7 @@ define i32 @p11(i32 %x, i32 %bit) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG189]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG189]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG189]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG189]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG189]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG190:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG189]] @@ -865,7 +865,7 @@ define i32 @p12(i32 %x, i32 %bit) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG204]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG204]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG204]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG204]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG204]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG205:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG204]] @@ -934,7 +934,7 @@ define i32 @p13(i32 %x, i32 %bit) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG219]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG219]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG219]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG219]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG219]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG220:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG219]] @@ -997,7 +997,7 @@ define i32 @p14(i32 %x) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG231]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG231]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG231]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG231]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG231]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG232:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG231]] @@ -1556,7 +1556,7 @@ define i32 @n29(i32 %x, i32 %bit) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG449]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG449]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG449]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG449]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG449]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG450:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG449]] @@ -1621,7 +1621,7 @@ define i32 @n30(i32 %x) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG462]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG462]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG462]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG462]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG462]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG463:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG462]] @@ -1684,7 +1684,7 @@ define i32 @n31(i32 %x, i32 %bit) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG477]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG477]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG477]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG477]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG477]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG478:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG477]] @@ -1755,7 +1755,7 @@ define i32 @n32(i32 %x, i32 %bit) { ; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG493]] ; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG493]] ; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG493]] -; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG493]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG493]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG494:!dbg !.*]] ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG493]] _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits