[llvm] [LoopIdiom] Support 'shift until less-than' idiom (PR #95002)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 10 08:25:08 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Hari Limaye (hazzlim)

<details>
<summary>Changes</summary>

The current loop idiom code for recognising and inserting a CTLZ intrinsic does not support loops where the loopback control is based on an unsigned less-than condition. This patch adds support for recognising these loops and inserting a CTLZ intrinsic.

---

Patch is 41.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95002.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (+254-36) 
- (added) llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll (+660) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3fe5478408d45..23bd136615305 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -231,12 +231,18 @@ class LoopIdiomRecognize {
   bool recognizePopcount();
   void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
                                PHINode *CntPhi, Value *Var);
+  bool isProfitableToInsertFFS(Intrinsic::ID IntrinID, Value *InitX,
+                               bool ZeroCheck, size_t CanonicalSize);
+  bool insertFFS(Intrinsic::ID IntrinID, Value *InitX, Instruction *DefX,
+                 PHINode *CntPhi, Instruction *CntInst);
   bool recognizeAndInsertFFS();  /// Find First Set: ctlz or cttz
+  bool recognizeShiftUntilLessThan();
   void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
                                 Instruction *CntInst, PHINode *CntPhi,
                                 Value *Var, Instruction *DefX,
                                 const DebugLoc &DL, bool ZeroCheck,
-                                bool IsCntPhiUsedOutsideLoop);
+                                bool IsCntPhiUsedOutsideLoop,
+                                bool InsertSub = false);
 
   bool recognizeShiftUntilBitTest();
   bool recognizeShiftUntilZero();
@@ -1482,7 +1488,8 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
                     << CurLoop->getHeader()->getName() << "\n");
 
   return recognizePopcount() || recognizeAndInsertFFS() ||
-         recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
+         recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
+         recognizeShiftUntilLessThan();
 }
 
 /// Check if the given conditional branch is based on the comparison between
@@ -1517,6 +1524,34 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
   return nullptr;
 }
 
+/// Check if the given conditional branch is based on an unsigned less-than
+/// comparison between a variable and a constant, and if the comparison is false
+/// the control yields to the loop entry. If the branch matches the behaviour,
+/// the variable involved in the comparison is returned.
+static Value *matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry,
+                                     unsigned &Threshold) {
+  if (!BI || !BI->isConditional())
+    return nullptr;
+
+  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!Cond)
+    return nullptr;
+
+  ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand(1));
+  if (!CmpConst)
+    return nullptr;
+
+  BasicBlock *FalseSucc = BI->getSuccessor(1);
+  ICmpInst::Predicate Pred = Cond->getPredicate();
+
+  if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
+    Threshold = CmpConst->getZExtValue();
+    return Cond->getOperand(0);
+  }
+
+  return nullptr;
+}
+
 // Check if the recurrence variable `VarX` is in the right form to create
 // the idiom. Returns the value coerced to a PHINode if so.
 static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
@@ -1528,6 +1563,107 @@ static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
   return nullptr;
 }
 
+/// Return true if the idiom is detected in the loop.
+///
+/// Additionally:
+/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
+///       or nullptr if there is no such.
+/// 2) \p CntPhi is set to the corresponding phi node
+///       or nullptr if there is no such.
+/// 3) \p InitX is set to the value whose CTLZ could be used.
+/// 4) \p DefX is set to the instruction calculating Loop exit condition.
+/// 5) \p Threshold is set to the constant involved in the unsigned less-than
+///       comparison.
+///
+/// The core idiom we are trying to detect is:
+/// \code
+///    if (x0 < 2)
+///      goto loop-exit // the predcondition of the loop
+///    cnt0 = init-val
+///    do {
+///      x = phi (x0, x.next);   //PhiX
+///      cnt = phi (cnt0, cnt.next)
+///
+///      cnt.next = cnt + 1;
+///       ...
+///      x.next = x >> 1;   // DefX
+///    } while (x < 4)
+/// loop-exit:
+/// \endcode
+static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL,
+                                          Intrinsic::ID &IntrinID,
+                                          Value *&InitX, Instruction *&CntInst,
+                                          PHINode *&CntPhi, Instruction *&DefX,
+                                          unsigned &Threshold) {
+  BasicBlock *LoopEntry;
+
+  DefX = nullptr;
+  CntInst = nullptr;
+  CntPhi = nullptr;
+  LoopEntry = *(CurLoop->block_begin());
+
+  // step 1: Check if the loop-back branch is in desirable form.
+  if (Value *T = matchShiftULTCondition(
+          dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry,
+          Threshold))
+    DefX = dyn_cast<Instruction>(T);
+  else
+    return false;
+
+  // step 2: Check the recurrence of variable X
+  if (!DefX || !isa<PHINode>(DefX))
+    return false;
+
+  PHINode *VarPhi = cast<PHINode>(DefX);
+  int Idx = VarPhi->getBasicBlockIndex(LoopEntry);
+  if (Idx == -1)
+    return false;
+
+  DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue(Idx));
+  if (!DefX || DefX->getNumOperands() == 0 || DefX->getOperand(0) != VarPhi)
+    return false;
+
+  // step 3: detect instructions corresponding to "x.next = x >> 1"
+  if (DefX->getOpcode() != Instruction::LShr)
+    return false;
+
+  IntrinID = Intrinsic::ctlz;
+  ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
+  if (!Shft || !Shft->isOne())
+    return false;
+
+  InitX = VarPhi->getIncomingValueForBlock(CurLoop->getLoopPreheader());
+
+  // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
+  //         or cnt.next = cnt + -1.
+  // TODO: We can skip the step. If loop trip count is known (CTLZ),
+  //       then all uses of "cnt.next" could be optimized to the trip count
+  //       plus "cnt0". Currently it is not optimized.
+  //       This step could be used to detect POPCNT instruction:
+  //       cnt.next = cnt + (x.next & 1)
+  for (Instruction &Inst : llvm::make_range(
+           LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
+    if (Inst.getOpcode() != Instruction::Add)
+      continue;
+
+    ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
+    if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
+      continue;
+
+    PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
+    if (!Phi)
+      continue;
+
+    CntInst = &Inst;
+    CntPhi = Phi;
+    break;
+  }
+  if (!CntInst)
+    return false;
+
+  return true;
+}
+
 /// Return true iff the idiom is detected in the loop.
 ///
 /// Additionally:
@@ -1756,27 +1892,34 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
   return true;
 }
 
-/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
-/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
-/// trip count returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeAndInsertFFS() {
-  // Give up if the loop has multiple blocks or multiple backedges.
-  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
-    return false;
+// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
+// profitable if we delete the loop.
+bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
+                                                 Value *InitX, bool ZeroCheck,
+                                                 size_t CanonicalSize) {
+  const Value *Args[] = {InitX,
+                         ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
 
-  Intrinsic::ID IntrinID;
-  Value *InitX;
-  Instruction *DefX = nullptr;
-  PHINode *CntPhi = nullptr;
-  Instruction *CntInst = nullptr;
-  // Help decide if transformation is profitable. For ShiftUntilZero idiom,
-  // this is always 6.
-  size_t IdiomCanonicalSize = 6;
+  // @llvm.dbg doesn't count as they have no semantic effect.
+  auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
+  uint32_t HeaderSize =
+      std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
 
-  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
-                                 CntInst, CntPhi, DefX))
+  IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
+  InstructionCost Cost = TTI->getIntrinsicInstrCost(
+      Attrs, TargetTransformInfo::TCK_SizeAndLatency);
+  if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
     return false;
 
+  return true;
+}
+
+/// Convert CTLZ / CTTZ idiom loop into countable loop.
+/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
+/// returns false.
+bool LoopIdiomRecognize::insertFFS(Intrinsic::ID IntrinID, Value *InitX,
+                                   Instruction *DefX, PHINode *CntPhi,
+                                   Instruction *CntInst) {
   bool IsCntPhiUsedOutsideLoop = false;
   for (User *U : CntPhi->users())
     if (!CurLoop->contains(cast<Instruction>(U))) {
@@ -1818,35 +1961,108 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
     ZeroCheck = true;
   }
 
-  // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
-  // profitable if we delete the loop.
-
-  // the loop has only 6 instructions:
+  // FFS idiom loop has only 6 instructions:
   //  %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
   //  %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
   //  %shr = ashr %n.addr.0, 1
   //  %tobool = icmp eq %shr, 0
   //  %inc = add nsw %i.0, 1
   //  br i1 %tobool
+  size_t IdiomCanonicalSize = 6;
+  if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
+    return false;
 
-  const Value *Args[] = {InitX,
-                         ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
+  transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
+                           DefX->getDebugLoc(), ZeroCheck,
+                           IsCntPhiUsedOutsideLoop);
+  return true;
+}
 
-  // @llvm.dbg doesn't count as they have no semantic effect.
-  auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
-  uint32_t HeaderSize =
-      std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
+/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
+/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
+/// trip count returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeAndInsertFFS() {
+  // Give up if the loop has multiple blocks or multiple backedges.
+  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+    return false;
 
-  IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
-  InstructionCost Cost =
-    TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
-  if (HeaderSize != IdiomCanonicalSize &&
-      Cost > TargetTransformInfo::TCC_Basic)
+  Intrinsic::ID IntrinID;
+  Value *InitX;
+  Instruction *DefX = nullptr;
+  PHINode *CntPhi = nullptr;
+  Instruction *CntInst = nullptr;
+
+  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
+                                 DefX))
+    return false;
+
+  return insertFFS(IntrinID, InitX, DefX, CntPhi, CntInst);
+}
+
+bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
+  // Give up if the loop has multiple blocks or multiple backedges.
+  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+    return false;
+
+  Intrinsic::ID IntrinID;
+  Value *InitX;
+  Instruction *DefX = nullptr;
+  PHINode *CntPhi = nullptr;
+  Instruction *CntInst = nullptr;
+
+  unsigned LoopThreshold;
+  if (!detectShiftUntilLessThanIdiom(CurLoop, *DL, IntrinID, InitX, CntInst,
+                                     CntPhi, DefX, LoopThreshold))
+    return false;
+
+  if (LoopThreshold == 2) {
+    // Treat as regular FFS.
+    return insertFFS(IntrinID, InitX, DefX, CntPhi, CntInst);
+  }
+
+  // Look for Floor Log2 Idiom.
+  if (LoopThreshold != 4)
+    return false;
+
+  // Abort if CntPhi is used outside of the loop.
+  for (User *U : CntPhi->users())
+    if (!CurLoop->contains(cast<Instruction>(U))) {
+      return false;
+    }
+
+  // It is safe to assume Preheader exist as it was checked in
+  // parent function RunOnLoop.
+  BasicBlock *PH = CurLoop->getLoopPreheader();
+  auto *PreCondBB = PH->getSinglePredecessor();
+  if (!PreCondBB)
+    return false;
+  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+  if (!PreCondBI)
+    return false;
+
+  unsigned PreLoopThreshold;
+  if (matchShiftULTCondition(PreCondBI, PH, PreLoopThreshold) != InitX ||
+      PreLoopThreshold != 2)
     return false;
 
+  bool ZeroCheck = true;
+
+  // the loop has only 6 instructions:
+  //  %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
+  //  %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
+  //  %shr = ashr %n.addr.0, 1
+  //  %tobool = icmp ult %n.addr.0, C
+  //  %inc = add nsw %i.0, 1
+  //  br i1 %tobool
+  size_t IdiomCanonicalSize = 6;
+  if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
+    return false;
+
+  // log2(x) = w − 1 − clz(x)
   transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
                            DefX->getDebugLoc(), ZeroCheck,
-                           IsCntPhiUsedOutsideLoop);
+                           /*IsCntPhiUsedOutsideLoop=*/false,
+                           /*InsertSub=*/true);
   return true;
 }
 
@@ -1961,7 +2177,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
 void LoopIdiomRecognize::transformLoopToCountable(
     Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
     PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
-    bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
+    bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
   BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
 
   // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -1991,6 +2207,8 @@ void LoopIdiomRecognize::transformLoopToCountable(
   Type *CountTy = Count->getType();
   Count = Builder.CreateSub(
       ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
+  if (InsertSub)
+    Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
   Value *NewCount = Count;
   if (IsCntPhiUsedOutsideLoop)
     Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll b/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
new file mode 100644
index 0000000000000..77b542a0fe137
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
@@ -0,0 +1,660 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=loop-idiom -mtriple=aarch64 < %s -S | FileCheck %s
+
+; Recognize CTLZ builtin pattern.
+; Here we'll just convert loop to countable,
+; so do not insert builtin if CPU do not support CTLZ
+;
+; int ctlz_and_other(int n, char *a)
+; {
+;   n = n >= 0 ? n : -n;
+;   int i = 0, n0 = n;
+;   while(n >>= 1) {
+;     a[i] = (n0 & (1 << i)) ? 1 : 0;
+;     i++;
+;   }
+;   return i;
+; }
+;
+
+; Function Attrs: norecurse nounwind uwtable
+define i32 @ctlz_and_other(i32 %n, ptr nocapture %a) {
+; CHECK-LABEL: define i32 @ctlz_and_other(
+; CHECK-SAME: i32 [[N:%.*]], ptr nocapture [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
+; CHECK-NEXT:    [[SHR8:%.*]] = lshr i32 [[ABS_N]], 1
+; CHECK-NEXT:    [[TOBOOL9:%.*]] = icmp eq i32 [[SHR8]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
+; CHECK:       while.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SHR8]], i1 true)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[SHR11:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[SHR8]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[TMP3]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[ABS_N]]
+; CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TOBOOL1]] to i8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[SHR]] = ashr i32 [[SHR11]], 1
+; CHECK-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; CHECK:       while.end.loopexit:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_LCSSA:%.*]] = phi i64 [ [[TMP2]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDVARS_IV_NEXT_LCSSA]] to i32
+; CHECK-NEXT:    br label [[WHILE_END]]
+; CHECK:       while.end:
+; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP4]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[I_0_LCSSA]]
+;
+entry:
+  %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
+  %shr8 = lshr i32 %abs_n, 1
+  %tobool9 = icmp eq i32 %shr8, 0
+  br i1 %tobool9, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
+  %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
+  %0 = trunc i64 %indvars.iv to i32
+  %shl = shl i32 1, %0
+  %and = and i32 %shl, %abs_n
+  %tobool1 = icmp ne i32 %and, 0
+  %conv = zext i1 %tobool1 to i8
+  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
+  store i8 %conv, ptr %arrayidx, align 1
+  %indvars.iv.next = add nuw i64 %indvars.iv, 1
+  %shr = ashr i32 %shr11, 1
+  %tobool = icmp eq i32 %shr, 0
+  br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %while.body
+  %1 = trunc i64 %indvars.iv.next to i32
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
+  ret i32 %i.0.lcssa
+}
+
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz_zero_check(int n)
+; {
+;   n = n >= 0 ? n : -n;
+;   int i = 0;
+;   while(n) {
+;     n >>= 1;
+;     i++;
+;   }
+;   return i;
+; }
+;
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @ctlz_zero_check(i32 %n) {
+; CHECK-LABEL: define i32 @ctlz_zero_check(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
+; CHECK-NEXT:    [[TOBOOL4:%.*]] = icmp eq i32 [[ABS_N]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
+; CHECK:       while.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[ABS_N]], i1 true)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
+; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[ABS_N]], [[WHILE_BODY_P...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/95002


More information about the llvm-commits mailing list