[llvm] ea39f97 - Revert "[LoopIdiom] Support 'shift until less-than' idiom (#95002)" (#98065)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 8 12:02:36 PDT 2024


Author: Hari Limaye
Date: 2024-07-08T20:02:31+01:00
New Revision: ea39f977271d1a6bc3f0a4225724ff99d22b2d49

URL: https://github.com/llvm/llvm-project/commit/ea39f977271d1a6bc3f0a4225724ff99d22b2d49
DIFF: https://github.com/llvm/llvm-project/commit/ea39f977271d1a6bc3f0a4225724ff99d22b2d49.diff

LOG: Revert "[LoopIdiom] Support 'shift until less-than' idiom (#95002)" (#98065)

Reverts #95002 while I investigate buildbot failure.

This reverts commit 83b01aaf51072a07261ee2e5fc14102f71273bc0.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Removed: 
    llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index b3e3e1e71f1010..635bd1236196e5 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -231,19 +231,12 @@ class LoopIdiomRecognize {
   bool recognizePopcount();
   void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
                                PHINode *CntPhi, Value *Var);
-  bool isProfitableToInsertFFS(Intrinsic::ID IntrinID, Value *InitX,
-                               bool ZeroCheck, size_t CanonicalSize);
-  bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX,
-                             Instruction *DefX, PHINode *CntPhi,
-                             Instruction *CntInst);
   bool recognizeAndInsertFFS();  /// Find First Set: ctlz or cttz
-  bool recognizeShiftUntilLessThan();
   void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
                                 Instruction *CntInst, PHINode *CntPhi,
                                 Value *Var, Instruction *DefX,
                                 const DebugLoc &DL, bool ZeroCheck,
-                                bool IsCntPhiUsedOutsideLoop,
-                                bool InsertSub = false);
+                                bool IsCntPhiUsedOutsideLoop);
 
   bool recognizeShiftUntilBitTest();
   bool recognizeShiftUntilZero();
@@ -1489,8 +1482,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
                     << CurLoop->getHeader()->getName() << "\n");
 
   return recognizePopcount() || recognizeAndInsertFFS() ||
-         recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
-         recognizeShiftUntilLessThan();
+         recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
 }
 
 /// Check if the given conditional branch is based on the comparison between
@@ -1525,34 +1517,6 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
   return nullptr;
 }
 
-/// Check if the given conditional branch is based on an unsigned less-than
-/// comparison between a variable and a constant, and if the comparison is false
-/// the control yields to the loop entry. If the branch matches the behaviour,
-/// the variable involved in the comparison is returned.
-static Value *matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry,
-                                     uint64_t &Threshold) {
-  if (!BI || !BI->isConditional())
-    return nullptr;
-
-  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
-  if (!Cond)
-    return nullptr;
-
-  ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand(1));
-  if (!CmpConst)
-    return nullptr;
-
-  BasicBlock *FalseSucc = BI->getSuccessor(1);
-  ICmpInst::Predicate Pred = Cond->getPredicate();
-
-  if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
-    Threshold = CmpConst->getZExtValue();
-    return Cond->getOperand(0);
-  }
-
-  return nullptr;
-}
-
 // Check if the recurrence variable `VarX` is in the right form to create
 // the idiom. Returns the value coerced to a PHINode if so.
 static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
@@ -1564,107 +1528,6 @@ static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
   return nullptr;
 }
 
-/// Return true if the idiom is detected in the loop.
-///
-/// Additionally:
-/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
-///       or nullptr if there is no such.
-/// 2) \p CntPhi is set to the corresponding phi node
-///       or nullptr if there is no such.
-/// 3) \p InitX is set to the value whose CTLZ could be used.
-/// 4) \p DefX is set to the instruction calculating Loop exit condition.
-/// 5) \p Threshold is set to the constant involved in the unsigned less-than
-///       comparison.
-///
-/// The core idiom we are trying to detect is:
-/// \code
-///    if (x0 < 2)
-///      goto loop-exit // the precondition of the loop
-///    cnt0 = init-val
-///    do {
-///      x = phi (x0, x.next);   //PhiX
-///      cnt = phi (cnt0, cnt.next)
-///
-///      cnt.next = cnt + 1;
-///       ...
-///      x.next = x >> 1;   // DefX
-///    } while (x >= 4)
-/// loop-exit:
-/// \endcode
-static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL,
-                                          Intrinsic::ID &IntrinID,
-                                          Value *&InitX, Instruction *&CntInst,
-                                          PHINode *&CntPhi, Instruction *&DefX,
-                                          uint64_t &Threshold) {
-  BasicBlock *LoopEntry;
-
-  DefX = nullptr;
-  CntInst = nullptr;
-  CntPhi = nullptr;
-  LoopEntry = *(CurLoop->block_begin());
-
-  // step 1: Check if the loop-back branch is in desirable form.
-  if (Value *T = matchShiftULTCondition(
-          dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry,
-          Threshold))
-    DefX = dyn_cast<Instruction>(T);
-  else
-    return false;
-
-  // step 2: Check the recurrence of variable X
-  if (!DefX || !isa<PHINode>(DefX))
-    return false;
-
-  PHINode *VarPhi = cast<PHINode>(DefX);
-  int Idx = VarPhi->getBasicBlockIndex(LoopEntry);
-  if (Idx == -1)
-    return false;
-
-  DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue(Idx));
-  if (!DefX || DefX->getNumOperands() == 0 || DefX->getOperand(0) != VarPhi)
-    return false;
-
-  // step 3: detect instructions corresponding to "x.next = x >> 1"
-  if (DefX->getOpcode() != Instruction::LShr)
-    return false;
-
-  IntrinID = Intrinsic::ctlz;
-  ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
-  if (!Shft || !Shft->isOne())
-    return false;
-
-  InitX = VarPhi->getIncomingValueForBlock(CurLoop->getLoopPreheader());
-
-  // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
-  //         or cnt.next = cnt + -1.
-  // TODO: We can skip the step. If loop trip count is known (CTLZ),
-  //       then all uses of "cnt.next" could be optimized to the trip count
-  //       plus "cnt0". Currently it is not optimized.
-  //       This step could be used to detect POPCNT instruction:
-  //       cnt.next = cnt + (x.next & 1)
-  for (Instruction &Inst : llvm::make_range(
-           LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
-    if (Inst.getOpcode() != Instruction::Add)
-      continue;
-
-    ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
-    if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
-      continue;
-
-    PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
-    if (!Phi)
-      continue;
-
-    CntInst = &Inst;
-    CntPhi = Phi;
-    break;
-  }
-  if (!CntInst)
-    return false;
-
-  return true;
-}
-
 /// Return true iff the idiom is detected in the loop.
 ///
 /// Additionally:
@@ -1893,35 +1756,27 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
   return true;
 }
 
-// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
-// profitable if we delete the loop.
-bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
-                                                 Value *InitX, bool ZeroCheck,
-                                                 size_t CanonicalSize) {
-  const Value *Args[] = {InitX,
-                         ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
+/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
+/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
+/// trip count returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeAndInsertFFS() {
+  // Give up if the loop has multiple blocks or multiple backedges.
+  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+    return false;
 
-  // @llvm.dbg doesn't count as they have no semantic effect.
-  auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
-  uint32_t HeaderSize =
-      std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
+  Intrinsic::ID IntrinID;
+  Value *InitX;
+  Instruction *DefX = nullptr;
+  PHINode *CntPhi = nullptr;
+  Instruction *CntInst = nullptr;
+  // Help decide if transformation is profitable. For ShiftUntilZero idiom,
+  // this is always 6.
+  size_t IdiomCanonicalSize = 6;
 
-  IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
-  InstructionCost Cost = TTI->getIntrinsicInstrCost(
-      Attrs, TargetTransformInfo::TCK_SizeAndLatency);
-  if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
+  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
+                                 CntInst, CntPhi, DefX))
     return false;
 
-  return true;
-}
-
-/// Convert CTLZ / CTTZ idiom loop into countable loop.
-/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
-/// returns false.
-bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
-                                               Value *InitX, Instruction *DefX,
-                                               PHINode *CntPhi,
-                                               Instruction *CntInst) {
   bool IsCntPhiUsedOutsideLoop = false;
   for (User *U : CntPhi->users())
     if (!CurLoop->contains(cast<Instruction>(U))) {
@@ -1963,107 +1818,35 @@ bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
     ZeroCheck = true;
   }
 
-  // FFS idiom loop has only 6 instructions:
+  // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
+  // profitable if we delete the loop.
+
+  // the loop has only 6 instructions:
   //  %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
   //  %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
   //  %shr = ashr %n.addr.0, 1
   //  %tobool = icmp eq %shr, 0
   //  %inc = add nsw %i.0, 1
   //  br i1 %tobool
-  size_t IdiomCanonicalSize = 6;
-  if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
-    return false;
-
-  transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
-                           DefX->getDebugLoc(), ZeroCheck,
-                           IsCntPhiUsedOutsideLoop);
-  return true;
-}
-
-/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
-/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
-/// trip count returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeAndInsertFFS() {
-  // Give up if the loop has multiple blocks or multiple backedges.
-  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
-    return false;
-
-  Intrinsic::ID IntrinID;
-  Value *InitX;
-  Instruction *DefX = nullptr;
-  PHINode *CntPhi = nullptr;
-  Instruction *CntInst = nullptr;
-
-  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
-                                 DefX))
-    return false;
 
-  return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
-}
-
-bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
-  // Give up if the loop has multiple blocks or multiple backedges.
-  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
-    return false;
-
-  Intrinsic::ID IntrinID;
-  Value *InitX;
-  Instruction *DefX = nullptr;
-  PHINode *CntPhi = nullptr;
-  Instruction *CntInst = nullptr;
-
-  uint64_t LoopThreshold;
-  if (!detectShiftUntilLessThanIdiom(CurLoop, *DL, IntrinID, InitX, CntInst,
-                                     CntPhi, DefX, LoopThreshold))
-    return false;
-
-  if (LoopThreshold == 2) {
-    // Treat as regular FFS.
-    return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
-  }
-
-  // Look for Floor Log2 Idiom.
-  if (LoopThreshold != 4)
-    return false;
-
-  // Abort if CntPhi is used outside of the loop.
-  for (User *U : CntPhi->users())
-    if (!CurLoop->contains(cast<Instruction>(U)))
-      return false;
-
-  // It is safe to assume Preheader exist as it was checked in
-  // parent function RunOnLoop.
-  BasicBlock *PH = CurLoop->getLoopPreheader();
-  auto *PreCondBB = PH->getSinglePredecessor();
-  if (!PreCondBB)
-    return false;
-  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
-  if (!PreCondBI)
-    return false;
-
-  uint64_t PreLoopThreshold;
-  if (matchShiftULTCondition(PreCondBI, PH, PreLoopThreshold) != InitX ||
-      PreLoopThreshold != 2)
-    return false;
+  const Value *Args[] = {InitX,
+                         ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
 
-  bool ZeroCheck = true;
+  // @llvm.dbg doesn't count as they have no semantic effect.
+  auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
+  uint32_t HeaderSize =
+      std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
 
-  // the loop has only 6 instructions:
-  //  %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
-  //  %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
-  //  %shr = ashr %n.addr.0, 1
-  //  %tobool = icmp ult %n.addr.0, C
-  //  %inc = add nsw %i.0, 1
-  //  br i1 %tobool
-  size_t IdiomCanonicalSize = 6;
-  if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
+  IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
+  InstructionCost Cost =
+    TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
+  if (HeaderSize != IdiomCanonicalSize &&
+      Cost > TargetTransformInfo::TCC_Basic)
     return false;
 
-  // log2(x) = w − 1 − clz(x)
   transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
                            DefX->getDebugLoc(), ZeroCheck,
-                           /*IsCntPhiUsedOutsideLoop=*/false,
-                           /*InsertSub=*/true);
+                           IsCntPhiUsedOutsideLoop);
   return true;
 }
 
@@ -2178,7 +1961,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
 void LoopIdiomRecognize::transformLoopToCountable(
     Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
     PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
-    bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
+    bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
   BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
 
   // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -2208,8 +1991,6 @@ void LoopIdiomRecognize::transformLoopToCountable(
   Type *CountTy = Count->getType();
   Count = Builder.CreateSub(
       ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
-  if (InsertSub)
-    Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
   Value *NewCount = Count;
   if (IsCntPhiUsedOutsideLoop)
     Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));

diff  --git a/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll b/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
deleted file mode 100644
index 47ae4fd5b66a7c..00000000000000
--- a/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
+++ /dev/null
@@ -1,778 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -passes=loop-idiom -mtriple=aarch64 < %s -S | FileCheck %s
-
-; Recognize CTLZ builtin pattern.
-; Here we'll just convert loop to countable,
-; so do not insert builtin if CPU do not support CTLZ
-;
-; int ctlz_and_other(int n, char *a)
-; {
-;   n = n >= 0 ? n : -n;
-;   int i = 0, n0 = n;
-;   while(n >>= 1) {
-;     a[i] = (n0 & (1 << i)) ? 1 : 0;
-;     i++;
-;   }
-;   return i;
-; }
-;
-
-; Function Attrs: norecurse nounwind uwtable
-define i32 @ctlz_and_other(i32 %n, ptr nocapture %a) {
-; CHECK-LABEL: define i32 @ctlz_and_other(
-; CHECK-SAME: i32 [[N:%.*]], ptr nocapture [[A:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT:    [[SHR8:%.*]] = lshr i32 [[ABS_N]], 1
-; CHECK-NEXT:    [[TOBOOL9:%.*]] = icmp eq i32 [[SHR8]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK:       while.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SHR8]], i1 true)
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
-; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-; CHECK:       while.body:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[SHR11:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[SHR8]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[TMP3]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[ABS_N]]
-; CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TOBOOL1]] to i8
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[SHR]] = ashr i32 [[SHR11]], 1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK:       while.end.loopexit:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_LCSSA:%.*]] = phi i64 [ [[TMP2]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDVARS_IV_NEXT_LCSSA]] to i32
-; CHECK-NEXT:    br label [[WHILE_END]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP4]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT:    ret i32 [[I_0_LCSSA]]
-;
-entry:
-  %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
-  %shr8 = lshr i32 %abs_n, 1
-  %tobool9 = icmp eq i32 %shr8, 0
-  br i1 %tobool9, label %while.end, label %while.body.preheader
-
-while.body.preheader:                             ; preds = %entry
-  br label %while.body
-
-while.body:                                       ; preds = %while.body.preheader, %while.body
-  %indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
-  %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
-  %0 = trunc i64 %indvars.iv to i32
-  %shl = shl i32 1, %0
-  %and = and i32 %shl, %abs_n
-  %tobool1 = icmp ne i32 %and, 0
-  %conv = zext i1 %tobool1 to i8
-  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
-  store i8 %conv, ptr %arrayidx, align 1
-  %indvars.iv.next = add nuw i64 %indvars.iv, 1
-  %shr = ashr i32 %shr11, 1
-  %tobool = icmp eq i32 %shr, 0
-  br i1 %tobool, label %while.end.loopexit, label %while.body
-
-while.end.loopexit:                               ; preds = %while.body
-  %1 = trunc i64 %indvars.iv.next to i32
-  br label %while.end
-
-while.end:                                        ; preds = %while.end.loopexit, %entry
-  %i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
-  ret i32 %i.0.lcssa
-}
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz_zero_check(int n)
-; {
-;   n = n >= 0 ? n : -n;
-;   int i = 0;
-;   while(n) {
-;     n >>= 1;
-;     i++;
-;   }
-;   return i;
-; }
-;
-
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @ctlz_zero_check(i32 %n) {
-; CHECK-LABEL: define i32 @ctlz_zero_check(
-; CHECK-SAME: i32 [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT:    [[TOBOOL4:%.*]] = icmp eq i32 [[ABS_N]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK:       while.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[ABS_N]], i1 true)
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
-; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-; CHECK:       while.body:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[ABS_N]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[SHR]] = ashr i32 [[N_ADDR_05]], 1
-; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_06]], 1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK:       while.end.loopexit:
-; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    br label [[WHILE_END]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT:    ret i32 [[I_0_LCSSA]]
-;
-entry:
-  %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
-  %tobool4 = icmp eq i32 %abs_n, 0
-  br i1 %tobool4, label %while.end, label %while.body.preheader
-
-while.body.preheader:                             ; preds = %entry
-  br label %while.body
-
-while.body:                                       ; preds = %while.body.preheader, %while.body
-  %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
-  %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ]
-  %shr = ashr i32 %n.addr.05, 1
-  %inc = add nsw i32 %i.06, 1
-  %tobool = icmp eq i32 %shr, 0
-  br i1 %tobool, label %while.end.loopexit, label %while.body
-
-while.end.loopexit:                               ; preds = %while.body
-  br label %while.end
-
-while.end:                                        ; preds = %while.end.loopexit, %entry
-  %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
-  ret i32 %i.0.lcssa
-}
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz(int n)
-; {
-;   n = n >= 0 ? n : -n;
-;   int i = 0;
-;   while(n >>= 1) {
-;     i++;
-;   }
-;   return i;
-; }
-;
-
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @ctlz(i32 %n) {
-; CHECK-LABEL: define i32 @ctlz(
-; CHECK-SAME: i32 [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT:    [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-; CHECK:       while.cond:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[SHR]] = ashr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_COND]] ]
-; CHECK-NEXT:    ret i32 [[I_0_LCSSA]]
-;
-entry:
-  %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
-  br label %while.cond
-
-while.cond:                                       ; preds = %while.cond, %entry
-  %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
-  %shr = ashr i32 %n.addr.0, 1
-  %tobool = icmp eq i32 %shr, 0
-  %inc = add nsw i32 %i.0, 1
-  br i1 %tobool, label %while.end, label %while.cond
-
-while.end:                                        ; preds = %while.cond
-  ret i32 %i.0
-}
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; This test covers how instcombine may optimise the previous ctlz case.
-;
-; int ctlz(int n)
-; {
-;   n = n >= 0 ? n : -n;
-;   int i = 0;
-;   while(n >>= 1) {
-;     i++;
-;   }
-;   return i;
-; }
-
-define i32 @ctlz_fold(i32 noundef %n) {
-; CHECK-LABEL: define i32 @ctlz_fold(
-; CHECK-SAME: i32 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[COND:%.*]] = tail call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT:    [[TOBOOL_NOT5:%.*]] = icmp ult i32 [[COND]], 2
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT5]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK:       while.body.preheader:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[COND]], i1 true)
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 [[TMP2]], 1
-; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-; CHECK:       while.body:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[N_ADDR_06:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[COND]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[SHR]] = lshr i32 [[N_ADDR_06]], 1
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_07]], 1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK:       while.end.loopexit:
-; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    br label [[WHILE_END]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT:    ret i32 [[I_0_LCSSA]]
-;
-entry:
-  %cond = tail call i32 @llvm.abs.i32(i32 %n, i1 true)
-  %tobool.not5 = icmp ult i32 %cond, 2
-  br i1 %tobool.not5, label %while.end, label %while.body.preheader
-
-while.body.preheader:                             ; preds = %entry
-  br label %while.body
-
-while.body:                                       ; preds = %while.body.preheader, %while.body
-  %i.07 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
-  %n.addr.06 = phi i32 [ %shr, %while.body ], [ %cond, %while.body.preheader ]
-  %shr = lshr i32 %n.addr.06, 1
-  %inc = add nuw nsw i32 %i.07, 1
-  %tobool.not = icmp ult i32 %n.addr.06, 4
-  br i1 %tobool.not, label %while.end.loopexit, label %while.body
-
-while.end.loopexit:                               ; preds = %while.body
-  %inc.lcssa = phi i32 [ %inc, %while.body ]
-  br label %while.end
-
-while.end:                                        ; preds = %while.end.loopexit, %entry
-  %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
-  ret i32 %i.0.lcssa
-}
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz_add(int n, int i0)
-; {
-;   n = n >= 0 ? n : -n;
-;   int i = i0;
-;   while(n >>= 1) {
-;     i++;
-;   }
-;   return i;
-; }
-;
-;
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @ctlz_add(i32 %n, i32 %i0) {
-; CHECK-LABEL: define i32 @ctlz_add(
-; CHECK-SAME: i32 [[N:%.*]], i32 [[I0:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT:    [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], [[I0]]
-; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-; CHECK:       while.cond:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ [[I0]], [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[SHR]] = ashr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
-; CHECK-NEXT:    ret i32 [[I_0_LCSSA]]
-;
-entry:
-  %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
-  br label %while.cond
-
-while.cond:                                       ; preds = %while.cond, %entry
-  %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
-  %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
-  %shr = ashr i32 %n.addr.0, 1
-  %tobool = icmp eq i32 %shr, 0
-  %inc = add nsw i32 %i.0, 1
-  br i1 %tobool, label %while.end, label %while.cond
-
-while.end:                                        ; preds = %while.cond
-  ret i32 %i.0
-}
-
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz_sub(int n, int i0)
-; {
-;   n = n >= 0 ? n : -n;
-;   int i = i0;
-;   while(n >>= 1) {
-;     i--;
-;   }
-;   return i;
-; }
-;
-;
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @ctlz_sub(i32 %n, i32 %i0) {
-; CHECK-LABEL: define i32 @ctlz_sub(
-; CHECK-SAME: i32 [[N:%.*]], i32 [[I0:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT:    [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[I0]], [[TMP2]]
-; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-; CHECK:       while.cond:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ [[I0]], [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[SHR]] = ashr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], -1
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
-; CHECK-NEXT:    ret i32 [[I_0_LCSSA]]
-;
-entry:
-  %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
-  br label %while.cond
-
-while.cond:                                       ; preds = %while.cond, %entry
-  %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
-  %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
-  %shr = ashr i32 %n.addr.0, 1
-  %tobool = icmp eq i32 %shr, 0
-  %inc = add nsw i32 %i.0, -1
-  br i1 %tobool, label %while.end, label %while.cond
-
-while.end:                                        ; preds = %while.cond
-  ret i32 %i.0
-}
-
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz_sext(short in)
-; {
-;   int n = in;
-;   if (in < 0)
-;     n = -n;
-;   int i = 0;
-;   while(n >>= 1) {
-;     i++;
-;   }
-;   return i;
-; }
-;
-
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @ctlz_sext(i16 %in) {
-; CHECK-LABEL: define i32 @ctlz_sext(
-; CHECK-SAME: i16 [[IN:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ABS:%.*]] = call i16 @llvm.abs.i16(i16 [[IN]], i1 false)
-; CHECK-NEXT:    [[ABS_N:%.*]] = zext i16 [[ABS]] to i32
-; CHECK-NEXT:    [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-; CHECK:       while.cond:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[SHR]] = ashr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_COND]] ]
-; CHECK-NEXT:    ret i32 [[I_0_LCSSA]]
-;
-entry:
-  %abs = call i16 @llvm.abs.i16(i16 %in, i1 false)
-  %abs_n = zext i16 %abs to i32
-  br label %while.cond
-
-while.cond:                                       ; preds = %while.cond, %entry
-  %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
-  %shr = ashr i32 %n.addr.0, 1
-  %tobool = icmp eq i32 %shr, 0
-  %inc = add nsw i32 %i.0, 1
-  br i1 %tobool, label %while.end, label %while.cond
-
-while.end:                                        ; preds = %while.cond
-  ret i32 %i.0
-}
-
-
-; unsigned floor_log2(unsigned long n) {
-;   unsigned result = 0;
-;   while (n >>= 1) result++;
-;   return result;
-; }
-
-define i32 @floor_log2_use_inc(i64 noundef %n) {
-; CHECK-LABEL: define i32 @floor_log2_use_inc(
-; CHECK-SAME: i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT2:%.*]] = icmp ult i64 [[N]], 2
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK:       while.body.preheader:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[N]], i1 true)
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 64, [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP4]] to i32
-; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-; CHECK:       while.body:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i64 [ [[TMP4]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    [[RESULT_04:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[N_ADDR_03:%.*]] = phi i64 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[SHR]] = lshr i64 [[N_ADDR_03]], 1
-; CHECK-NEXT:    [[INC]] = add i32 [[RESULT_04]], 1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i64 [[TCPHI]], 1
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i64 [[TCDEC]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK:       while.end.loopexit:
-; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    br label [[WHILE_END]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
-;
-entry:
-  %tobool.not2 = icmp ult i64 %n, 2
-  br i1 %tobool.not2, label %while.end, label %while.body.preheader
-
-while.body.preheader:
-  br label %while.body
-
-while.body:
-  %result.04 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
-  %n.addr.03 = phi i64 [ %shr, %while.body ], [ %n, %while.body.preheader ]
-  %shr = lshr i64 %n.addr.03, 1
-  %inc = add i32 %result.04, 1
-  %tobool.not = icmp ult i64 %n.addr.03, 4
-  br i1 %tobool.not, label %while.end.loopexit, label %while.body
-
-while.end.loopexit:
-  %inc.lcssa = phi i32 [ %inc, %while.body ]
-  br label %while.end
-
-while.end:
-  %result.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
-  ret i32 %result.0.lcssa
-}
-
-
-define i32 @floor_log2_use_phi(i64 noundef %n) {
-; CHECK-LABEL: define i32 @floor_log2_use_phi(
-; CHECK-SAME: i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT2:%.*]] = icmp ult i64 [[N]], 2
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK:       while.body.preheader:
-; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-; CHECK:       while.body:
-; CHECK-NEXT:    [[RESULT_04:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[N_ADDR_03:%.*]] = phi i64 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[SHR]] = lshr i64 [[N_ADDR_03]], 1
-; CHECK-NEXT:    [[INC]] = add i32 [[RESULT_04]], 1
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ult i64 [[N_ADDR_03]], 4
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK:       while.end.loopexit:
-; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[RESULT_04]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    br label [[WHILE_END]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
-;
-entry:
-  %tobool.not2 = icmp ult i64 %n, 2
-  br i1 %tobool.not2, label %while.end, label %while.body.preheader
-
-while.body.preheader:
-  br label %while.body
-
-while.body:
-  %result.04 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
-  %n.addr.03 = phi i64 [ %shr, %while.body ], [ %n, %while.body.preheader ]
-  %shr = lshr i64 %n.addr.03, 1
-  %inc = add i32 %result.04, 1
-  %tobool.not = icmp ult i64 %n.addr.03, 4
-  br i1 %tobool.not, label %while.end.loopexit, label %while.body
-
-while.end.loopexit:
-  %inc.lcssa = phi i32 [ %result.04, %while.body ]
-  br label %while.end
-
-while.end:
-  %result.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
-  ret i32 %result.0.lcssa
-}
-
-
-; unsigned floor_log2_dec(unsigned long n) {
-;   unsigned result = 0;
-;   while (n >>= 1) result--;
-;   return result;
-; }
-
-define i32 @floor_log2_dec(i64 noundef %n) {
-; CHECK-LABEL: define i32 @floor_log2_dec(
-; CHECK-SAME: i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT2:%.*]] = icmp ult i64 [[N]], 2
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK:       while.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[N]], i1 true)
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 64, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 0, [[TMP3]]
-; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-; CHECK:       while.body:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i64 [ [[TMP2]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    [[RESULT_04:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[N_ADDR_03:%.*]] = phi i64 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[SHR]] = lshr i64 [[N_ADDR_03]], 1
-; CHECK-NEXT:    [[INC]] = add i32 [[RESULT_04]], -1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i64 [[TCPHI]], 1
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i64 [[TCDEC]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK:       while.end.loopexit:
-; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    br label [[WHILE_END]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
-;
-entry:
-  %tobool.not2 = icmp ult i64 %n, 2
-  br i1 %tobool.not2, label %while.end, label %while.body.preheader
-
-while.body.preheader:
-  br label %while.body
-
-while.body:
-  %result.04 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
-  %n.addr.03 = phi i64 [ %shr, %while.body ], [ %n, %while.body.preheader ]
-  %shr = lshr i64 %n.addr.03, 1
-  %inc = add i32 %result.04, -1
-  %tobool.not = icmp ult i64 %n.addr.03, 4
-  br i1 %tobool.not, label %while.end.loopexit, label %while.body
-
-while.end.loopexit:
-  %inc.lcssa = phi i32 [ %inc, %while.body ]
-  br label %while.end
-
-while.end:
-  %result.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
-  ret i32 %result.0.lcssa
-}
-
-
-; unsigned int_log2_rec(unsigned x) {
-;   return x == 0 ? 0 : int_log2_rec(x >> 1) + 1;
-; }
-
-define i32 @int_log2_rec(i32 noundef %x) {
-; CHECK-LABEL: define i32 @int_log2_rec(
-; CHECK-SAME: i32 noundef [[X:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_END:%.*]], label [[COND_FALSE_PREHEADER:%.*]]
-; CHECK:       cond.false.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
-; CHECK-NEXT:    br label [[COND_FALSE:%.*]]
-; CHECK:       cond.false:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[COND_FALSE_PREHEADER]] ], [ [[TCDEC:%.*]], [[COND_FALSE]] ]
-; CHECK-NEXT:    [[X_TR4:%.*]] = phi i32 [ [[SHR:%.*]], [[COND_FALSE]] ], [ [[X]], [[COND_FALSE_PREHEADER]] ]
-; CHECK-NEXT:    [[ACCUMULATOR_TR3:%.*]] = phi i32 [ [[ADD:%.*]], [[COND_FALSE]] ], [ 0, [[COND_FALSE_PREHEADER]] ]
-; CHECK-NEXT:    [[SHR]] = lshr i32 [[X_TR4]], 1
-; CHECK-NEXT:    [[ADD]] = add i32 [[ACCUMULATOR_TR3]], 1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[COND_END_LOOPEXIT:%.*]], label [[COND_FALSE]]
-; CHECK:       cond.end.loopexit:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[TMP1]], [[COND_FALSE]] ]
-; CHECK-NEXT:    br label [[COND_END]]
-; CHECK:       cond.end:
-; CHECK-NEXT:    [[ACCUMULATOR_TR_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[COND_END_LOOPEXIT]] ]
-; CHECK-NEXT:    ret i32 [[ACCUMULATOR_TR_LCSSA]]
-;
-entry:
-  %cmp2 = icmp eq i32 %x, 0
-  br i1 %cmp2, label %cond.end, label %cond.false.preheader
-
-cond.false.preheader:                             ; preds = %entry
-  br label %cond.false
-
-cond.false:                                       ; preds = %cond.false.preheader, %cond.false
-  %x.tr4 = phi i32 [ %shr, %cond.false ], [ %x, %cond.false.preheader ]
-  %accumulator.tr3 = phi i32 [ %add, %cond.false ], [ 0, %cond.false.preheader ]
-  %shr = lshr i32 %x.tr4, 1
-  %add = add i32 %accumulator.tr3, 1
-  %cmp = icmp ult i32 %x.tr4, 2
-  br i1 %cmp, label %cond.end.loopexit, label %cond.false
-
-cond.end.loopexit:                                ; preds = %cond.false
-  %add.lcssa = phi i32 [ %add, %cond.false ]
-  br label %cond.end
-
-cond.end:                                         ; preds = %cond.end.loopexit, %entry
-  %accumulator.tr.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %cond.end.loopexit ]
-  ret i32 %accumulator.tr.lcssa
-}
-
-
-; We can't easily transform this loop. It returns 1 for an input of both
-; 0 and 1.
-; int ctlz_do_while_use_inc(unsigned n)
-; {
-;   int i = 0;
-;   do {
-;     i++;
-;     n >>= 1;
-;   } while(n != 0);
-;   return i;
-; }
-
-define i32 @ctlz_do_while_use_inc(i32 noundef %n) {
-; CHECK-LABEL: define i32 @ctlz_do_while_use_inc(
-; CHECK-SAME: i32 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[DO_BODY:%.*]]
-; CHECK:       do.body:
-; CHECK-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[SHR:%.*]], [[DO_BODY]] ]
-; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[DO_BODY]] ]
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0]], 1
-; CHECK-NEXT:    [[SHR]] = lshr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp ult i32 [[N_ADDR_0]], 2
-; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[DO_END:%.*]], label [[DO_BODY]]
-; CHECK:       do.end:
-; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[DO_BODY]] ]
-; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
-;
-entry:
-  br label %do.body
-
-do.body:                                          ; preds = %do.body, %entry
-  %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %do.body ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
-  %inc = add nuw nsw i32 %i.0, 1
-  %shr = lshr i32 %n.addr.0, 1
-  %cmp.not = icmp ult i32 %n.addr.0, 2
-  br i1 %cmp.not, label %do.end, label %do.body
-
-do.end:                                           ; preds = %do.body
-  %inc.lcssa = phi i32 [ %inc, %do.body ]
-  ret i32 %inc.lcssa
-}
-
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz_do_while_use_phi(unsigned n)
-; {
-;   int phi;
-;   int inc = 0;
-;   do {
-;     phi = inc;
-;     inc++;
-;     n >>= 1;
-;   } while(n != 0);
-;   return phi;
-; }
-
-define i32 @ctlz_do_while_use_phi(i32 noundef %n) {
-; CHECK-LABEL: define i32 @ctlz_do_while_use_phi(
-; CHECK-SAME: i32 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[N]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT:    br label [[DO_BODY:%.*]]
-; CHECK:       do.body:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[DO_BODY]] ]
-; CHECK-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHR:%.*]], [[DO_BODY]] ]
-; CHECK-NEXT:    [[INC_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC1:%.*]], [[DO_BODY]] ]
-; CHECK-NEXT:    [[INC1]] = add nuw nsw i32 [[INC_0]], 1
-; CHECK-NEXT:    [[SHR]] = lshr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[DO_END:%.*]], label [[DO_BODY]]
-; CHECK:       do.end:
-; CHECK-NEXT:    [[INC_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[DO_BODY]] ]
-; CHECK-NEXT:    ret i32 [[INC_0_LCSSA]]
-;
-entry:
-  br label %do.body
-
-do.body:                                          ; preds = %do.body, %entry
-  %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %do.body ]
-  %inc.0 = phi i32 [ 0, %entry ], [ %inc1, %do.body ]
-  %inc1 = add nuw nsw i32 %inc.0, 1
-  %shr = lshr i32 %n.addr.0, 1
-  %cmp.not = icmp ult i32 %n.addr.0, 2
-  br i1 %cmp.not, label %do.end, label %do.body
-
-do.end:                                           ; preds = %do.body
-  ret i32 %inc.0
-}
-
-
-declare i32 @llvm.abs.i32(i32, i1)
-declare i16 @llvm.abs.i16(i16, i1)


        


More information about the llvm-commits mailing list