[llvm-branch-commits] [llvm] 25aebe2 - [LoopIdiom] 'left-shift-until-bittest': keep no-wrap flags on shift, fix edge-case miscompilation for %x.next

Roman Lebedev via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Dec 24 10:27:42 PST 2020


Author: Roman Lebedev
Date: 2020-12-24T21:20:52+03:00
New Revision: 25aebe2ccfb4622b17494c5cfdb2b422c93cee4d

URL: https://github.com/llvm/llvm-project/commit/25aebe2ccfb4622b17494c5cfdb2b422c93cee4d
DIFF: https://github.com/llvm/llvm-project/commit/25aebe2ccfb4622b17494c5cfdb2b422c93cee4d.diff

LOG: [LoopIdiom] 'left-shift-until-bittest': keep no-wrap flags on shift, fix edge-case miscompilation for %x.next

While `%x.curr` is always safe to compute, because `LoopBackedgeTakenCount`
will always be smaller than `bitwidth(X)`, i.e. we never get poison,
rewriting `%x.next` is more complicated, however, because `X << LoopTripCount`
will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`).

So unless we know that isn't the case (as alive2 notes, we know it's safe
to do iff shift had no-wrap flags, or bitpos does not indicate signbit,
or we know that %x is never `1`), we'll need to emit an alternative,
safe IR, by either just shifting the `%x.curr`, or conditionally selecting
between the computed `%x.next` and `0`..
Former IR looks better so let's do that.

While there, ensure that we don't drop no-wrap flags from said shift.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 9ab896f58141..3612f8cc1a71 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1963,7 +1963,7 @@ inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
 /// \endcode
 static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
                                          Value *&BitMask, Value *&BitPos,
-                                         Value *&CurrX, Value *&NextX) {
+                                         Value *&CurrX, Instruction *&NextX) {
   LLVM_DEBUG(dbgs() << DEBUG_TYPE
              " Performing shift-until-bittest idiom detection.\n");
 
@@ -2030,9 +2030,10 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
   }
 
   BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
-  NextX = CurrXPN->getIncomingValueForBlock(LoopHeaderBB);
+  NextX =
+      dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
 
-  if (!match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
+  if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
     // FIXME: support right-shift?
     LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
     return false;
@@ -2113,7 +2114,8 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
 bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
   bool MadeChange = false;
 
-  Value *X, *BitMask, *BitPos, *XCurr, *XNext;
+  Value *X, *BitMask, *BitPos, *XCurr;
+  Instruction *XNext;
   if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
                                     XNext)) {
     LLVM_DEBUG(dbgs() << DEBUG_TYPE
@@ -2163,9 +2165,8 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
 
   // Step 1: Compute the loop trip count.
 
-  Value *LowBitMask =
-      Builder.CreateAdd(BitMask, Constant::getAllOnesValue(BitMask->getType()),
-                        BitPos->getName() + ".lowbitmask");
+  Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
+                                        BitPos->getName() + ".lowbitmask");
   Value *Mask =
       Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
   Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
@@ -2173,11 +2174,11 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
       IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()},
       /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
   Value *XMaskedNumActiveBits = Builder.CreateSub(
-      ConstantInt::get(X->getType(), X->getType()->getScalarSizeInBits()),
-      XMaskedNumLeadingZeros, XMasked->getName() + ".numactivebits");
-  Value *XMaskedLeadingOnePos = Builder.CreateAdd(
-      XMaskedNumActiveBits, Constant::getAllOnesValue(BitMask->getType()),
-      XMasked->getName() + ".leadingonepos");
+      ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
+      XMasked->getName() + ".numactivebits");
+  Value *XMaskedLeadingOnePos =
+      Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
+                        XMasked->getName() + ".leadingonepos");
 
   Value *LoopBackedgeTakenCount = Builder.CreateSub(
       BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount");
@@ -2189,11 +2190,34 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
 
   // Step 2: Compute the recurrence's final value without a loop.
 
+  // NewX is always safe to compute, because `LoopBackedgeTakenCount`
+  // will always be smaller than `bitwidth(X)`, i.e. we never get poison.
   Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
   NewX->takeName(XCurr);
+  if (auto *I = dyn_cast<Instruction>(NewX))
+    I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
+
+  Value *NewXNext;
+  // Rewriting XNext is more complicated, however, because `X << LoopTripCount`
+  // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
+  // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
+  // that isn't the case, we'll need to emit an alternative, safe IR.
+  if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() ||
+      PatternMatch::match(
+          BitPos, PatternMatch::m_SpecificInt_ICMP(
+                      ICmpInst::ICMP_NE, APInt(Ty->getScalarSizeInBits(),
+                                               Ty->getScalarSizeInBits() - 1))))
+    NewXNext = Builder.CreateShl(X, LoopTripCount);
+  else {
+    // Otherwise, just additionally shift by one. It's the smallest solution,
+    // alternatively, we could check that NewX is INT_MIN (or BitPos is )
+    // and select 0 instead.
+    NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
+  }
 
-  Value *NewXNext = Builder.CreateShl(X, LoopTripCount);
   NewXNext->takeName(XNext);
+  if (auto *I = dyn_cast<Instruction>(NewXNext))
+    I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
 
   // Step 3: Adjust the successor basic block to recieve the computed
   //         recurrence's final value instead of the recurrence itself.

diff  --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
index ac328a507c08..17ff7fc7663b 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
@@ -28,7 +28,7 @@ define i32 @p0_i32(i32 %x, i32 %bit) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG17]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG17]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG17]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG17]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG17]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG18:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG17]]
@@ -96,7 +96,7 @@ define i16 @p1_i16(i16 %x, i16 %bit) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i16 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG33]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i16 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG33]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i16 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG33]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i16 [[X]], [[LOOP_TRIPCOUNT]], [[DBG33]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i16 [[X_CURR]], 1, [[DBG33]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG34:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG33]]
@@ -164,7 +164,7 @@ define i32 @p2_
diff erent_liveout(i32 %x, i32 %bit) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG48]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG48]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG48]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG48]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG48]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG49:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG48]]
@@ -368,8 +368,8 @@ define void @p5_nuw(i32 %x, i32 %bit, i32* %p0, i32* %p1) {
 ; LZCNT-NEXT:    [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG93]]
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG93]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG93]]
-; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG93]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG93]]
+; LZCNT-NEXT:    [[X_CURR:%.*]] = shl nuw i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG93]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl nuw i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG93]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG94:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG93]]
@@ -442,8 +442,8 @@ define void @p6_nsw(i32 %x, i32 %bit, i32* %p0, i32* %p1) {
 ; LZCNT-NEXT:    [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG110]]
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG110]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG110]]
-; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG110]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG110]]
+; LZCNT-NEXT:    [[X_CURR:%.*]] = shl nsw i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG110]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl nsw i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG110]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG111:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG110]]
@@ -516,8 +516,8 @@ define void @p7_nuwnsw(i32 %x, i32 %bit, i32* %p0, i32* %p1) {
 ; LZCNT-NEXT:    [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG127]]
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG127]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG127]]
-; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG127]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG127]]
+; LZCNT-NEXT:    [[X_CURR:%.*]] = shl nuw nsw i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG127]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl nuw nsw i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG127]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG128:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG127]]
@@ -587,7 +587,7 @@ define void @p8_constant_mask_signbit_noncanonical(i32 %x, i32* %p0, i32* %p1) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG142]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG142]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG142]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG142]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG142]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG143:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG142]]
@@ -654,7 +654,7 @@ define void @p9_constant_mask_signbit_canonical(i32 %x, i32* %p0, i32* %p1) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG156]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG156]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG156]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG156]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG156]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG157:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG156]]
@@ -721,7 +721,7 @@ define void @p10_x_is_not_one(i32 %bit, i32* %p0, i32* %p1) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[DOTMASKED_LEADINGONEPOS]], [[DBG172]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG172]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 2, [[LOOP_BACKEDGETAKENCOUNT]], [[DBG172]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 2, [[LOOP_TRIPCOUNT]], [[DBG172]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG172]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG173:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG172]]
@@ -797,7 +797,7 @@ define i32 @p11(i32 %x, i32 %bit) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG189]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG189]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG189]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG189]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG189]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG190:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG189]]
@@ -865,7 +865,7 @@ define i32 @p12(i32 %x, i32 %bit) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG204]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG204]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG204]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG204]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG204]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG205:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG204]]
@@ -934,7 +934,7 @@ define i32 @p13(i32 %x, i32 %bit) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG219]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG219]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG219]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG219]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG219]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG220:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG219]]
@@ -997,7 +997,7 @@ define i32 @p14(i32 %x) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG231]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG231]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG231]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG231]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG231]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG232:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG231]]
@@ -1556,7 +1556,7 @@ define i32 @n29(i32 %x, i32 %bit) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG449]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG449]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG449]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG449]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG449]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG450:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG449]]
@@ -1621,7 +1621,7 @@ define i32 @n30(i32 %x) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG462]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG462]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG462]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG462]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG462]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG463:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG462]]
@@ -1684,7 +1684,7 @@ define i32 @n31(i32 %x, i32 %bit) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG477]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG477]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG477]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG477]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG477]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG478:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG477]]
@@ -1755,7 +1755,7 @@ define i32 @n32(i32 %x, i32 %bit) {
 ; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG493]]
 ; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG493]]
 ; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG493]]
-; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG493]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, [[DBG493]]
 ; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG494:!dbg !.*]]
 ; LZCNT:       loop:
 ; LZCNT-NEXT:    [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG493]]


        


More information about the llvm-branch-commits mailing list