[llvm] 25aebe2 - [LoopIdiom] 'left-shift-until-bittest': keep no-wrap flags on shift, fix edge-case miscompilation for %x.next
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 24 10:23:41 PST 2020
Author: Roman Lebedev
Date: 2020-12-24T21:20:52+03:00
New Revision: 25aebe2ccfb4622b17494c5cfdb2b422c93cee4d
URL: https://github.com/llvm/llvm-project/commit/25aebe2ccfb4622b17494c5cfdb2b422c93cee4d
DIFF: https://github.com/llvm/llvm-project/commit/25aebe2ccfb4622b17494c5cfdb2b422c93cee4d.diff
LOG: [LoopIdiom] 'left-shift-until-bittest': keep no-wrap flags on shift, fix edge-case miscompilation for %x.next
While `%x.curr` is always safe to compute, because `LoopBackedgeTakenCount`
will always be smaller than `bitwidth(X)`, i.e. we never get poison,
rewriting `%x.next` is more complicated, however, because `X << LoopTripCount`
will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`).
So unless we know that isn't the case (as alive2 notes, we know it's safe
to do iff shift had no-wrap flags, or bitpos does not indicate signbit,
or we know that %x is never `1`), we'll need to emit an alternative,
safe IR, by either just shifting the `%x.curr`, or conditionally selecting
between the computed `%x.next` and `0`..
Former IR looks better so let's do that.
While there, ensure that we don't drop no-wrap flags from said shift.
Added:
Modified:
llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 9ab896f58141..3612f8cc1a71 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1963,7 +1963,7 @@ inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
/// \endcode
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
Value *&BitMask, Value *&BitPos,
- Value *&CurrX, Value *&NextX) {
+ Value *&CurrX, Instruction *&NextX) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
" Performing shift-until-bittest idiom detection.\n");
@@ -2030,9 +2030,10 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
}
BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
- NextX = CurrXPN->getIncomingValueForBlock(LoopHeaderBB);
+ NextX =
+ dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
- if (!match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
+ if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
// FIXME: support right-shift?
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
return false;
@@ -2113,7 +2114,8 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
bool MadeChange = false;
- Value *X, *BitMask, *BitPos, *XCurr, *XNext;
+ Value *X, *BitMask, *BitPos, *XCurr;
+ Instruction *XNext;
if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
XNext)) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
@@ -2163,9 +2165,8 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// Step 1: Compute the loop trip count.
- Value *LowBitMask =
- Builder.CreateAdd(BitMask, Constant::getAllOnesValue(BitMask->getType()),
- BitPos->getName() + ".lowbitmask");
+ Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
+ BitPos->getName() + ".lowbitmask");
Value *Mask =
Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
@@ -2173,11 +2174,11 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()},
/*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
Value *XMaskedNumActiveBits = Builder.CreateSub(
- ConstantInt::get(X->getType(), X->getType()->getScalarSizeInBits()),
- XMaskedNumLeadingZeros, XMasked->getName() + ".numactivebits");
- Value *XMaskedLeadingOnePos = Builder.CreateAdd(
- XMaskedNumActiveBits, Constant::getAllOnesValue(BitMask->getType()),
- XMasked->getName() + ".leadingonepos");
+ ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
+ XMasked->getName() + ".numactivebits");
+ Value *XMaskedLeadingOnePos =
+ Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
+ XMasked->getName() + ".leadingonepos");
Value *LoopBackedgeTakenCount = Builder.CreateSub(
BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount");
@@ -2189,11 +2190,34 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// Step 2: Compute the recurrence's final value without a loop.
+ // NewX is always safe to compute, because `LoopBackedgeTakenCount`
+ // will always be smaller than `bitwidth(X)`, i.e. we never get poison.
Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
NewX->takeName(XCurr);
+ if (auto *I = dyn_cast<Instruction>(NewX))
+ I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
+
+ Value *NewXNext;
+ // Rewriting XNext is more complicated, however, because `X << LoopTripCount`
+ // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
+ // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
+ // that isn't the case, we'll need to emit an alternative, safe IR.
+ if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() ||
+ PatternMatch::match(
+ BitPos, PatternMatch::m_SpecificInt_ICMP(
+ ICmpInst::ICMP_NE, APInt(Ty->getScalarSizeInBits(),
+ Ty->getScalarSizeInBits() - 1))))
+ NewXNext = Builder.CreateShl(X, LoopTripCount);
+ else {
+ // Otherwise, just additionally shift by one. It's the smallest solution,
+ // alternatively, we could check that NewX is INT_MIN (or BitPos is )
+ // and select 0 instead.
+ NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
+ }
- Value *NewXNext = Builder.CreateShl(X, LoopTripCount);
NewXNext->takeName(XNext);
+ if (auto *I = dyn_cast<Instruction>(NewXNext))
+ I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
// Step 3: Adjust the successor basic block to recieve the computed
// recurrence's final value instead of the recurrence itself.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
index ac328a507c08..17ff7fc7663b 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
@@ -28,7 +28,7 @@ define i32 @p0_i32(i32 %x, i32 %bit) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG17]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG17]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG17]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG17]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG17]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG18:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG17]]
@@ -96,7 +96,7 @@ define i16 @p1_i16(i16 %x, i16 %bit) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i16 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG33]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i16 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG33]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i16 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG33]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i16 [[X]], [[LOOP_TRIPCOUNT]], [[DBG33]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i16 [[X_CURR]], 1, [[DBG33]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG34:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG33]]
@@ -164,7 +164,7 @@ define i32 @p2_
diff erent_liveout(i32 %x, i32 %bit) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG48]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG48]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG48]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG48]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG48]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG49:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG48]]
@@ -368,8 +368,8 @@ define void @p5_nuw(i32 %x, i32 %bit, i32* %p0, i32* %p1) {
; LZCNT-NEXT: [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG93]]
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG93]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG93]]
-; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG93]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG93]]
+; LZCNT-NEXT: [[X_CURR:%.*]] = shl nuw i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG93]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl nuw i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG93]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG94:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG93]]
@@ -442,8 +442,8 @@ define void @p6_nsw(i32 %x, i32 %bit, i32* %p0, i32* %p1) {
; LZCNT-NEXT: [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG110]]
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG110]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG110]]
-; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG110]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG110]]
+; LZCNT-NEXT: [[X_CURR:%.*]] = shl nsw i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG110]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl nsw i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG110]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG111:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG110]]
@@ -516,8 +516,8 @@ define void @p7_nuwnsw(i32 %x, i32 %bit, i32* %p0, i32* %p1) {
; LZCNT-NEXT: [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG127]]
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG127]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG127]]
-; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG127]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG127]]
+; LZCNT-NEXT: [[X_CURR:%.*]] = shl nuw nsw i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG127]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl nuw nsw i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG127]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG128:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG127]]
@@ -587,7 +587,7 @@ define void @p8_constant_mask_signbit_noncanonical(i32 %x, i32* %p0, i32* %p1) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG142]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG142]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG142]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG142]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG142]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG143:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG142]]
@@ -654,7 +654,7 @@ define void @p9_constant_mask_signbit_canonical(i32 %x, i32* %p0, i32* %p1) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG156]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG156]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG156]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG156]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG156]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG157:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG156]]
@@ -721,7 +721,7 @@ define void @p10_x_is_not_one(i32 %bit, i32* %p0, i32* %p1) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[DOTMASKED_LEADINGONEPOS]], [[DBG172]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG172]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 2, [[LOOP_BACKEDGETAKENCOUNT]], [[DBG172]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 2, [[LOOP_TRIPCOUNT]], [[DBG172]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG172]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG173:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG172]]
@@ -797,7 +797,7 @@ define i32 @p11(i32 %x, i32 %bit) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG189]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG189]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG189]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG189]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG189]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG190:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG189]]
@@ -865,7 +865,7 @@ define i32 @p12(i32 %x, i32 %bit) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG204]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG204]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG204]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG204]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG204]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG205:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG204]]
@@ -934,7 +934,7 @@ define i32 @p13(i32 %x, i32 %bit) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG219]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG219]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG219]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG219]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG219]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG220:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG219]]
@@ -997,7 +997,7 @@ define i32 @p14(i32 %x) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG231]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG231]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG231]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG231]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG231]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG232:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG231]]
@@ -1556,7 +1556,7 @@ define i32 @n29(i32 %x, i32 %bit) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG449]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG449]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG449]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG449]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG449]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG450:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG449]]
@@ -1621,7 +1621,7 @@ define i32 @n30(i32 %x) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG462]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG462]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG462]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG462]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG462]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG463:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG462]]
@@ -1684,7 +1684,7 @@ define i32 @n31(i32 %x, i32 %bit) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG477]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG477]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG477]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG477]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG477]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG478:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG477]]
@@ -1755,7 +1755,7 @@ define i32 @n32(i32 %x, i32 %bit) {
; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG493]]
; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG493]]
; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG493]]
-; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG493]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG493]]
; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG494:!dbg !.*]]
; LZCNT: loop:
; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG493]]
More information about the llvm-commits
mailing list