[llvm] 83b01aa - [LoopIdiom] Support 'shift until less-than' idiom (#95002)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 8 06:32:11 PDT 2024
Author: Hari Limaye
Date: 2024-07-08T14:32:08+01:00
New Revision: 83b01aaf51072a07261ee2e5fc14102f71273bc0
URL: https://github.com/llvm/llvm-project/commit/83b01aaf51072a07261ee2e5fc14102f71273bc0
DIFF: https://github.com/llvm/llvm-project/commit/83b01aaf51072a07261ee2e5fc14102f71273bc0.diff
LOG: [LoopIdiom] Support 'shift until less-than' idiom (#95002)
The current loop idiom code for recognising and inserting a CTLZ
intrinsic does not support loops where the loopback control is based on
an unsigned less-than condition. This patch adds support for recognising
these loops and inserting a CTLZ intrinsic.
Fixes the missed optimization cases in #51064
---------
Co-authored-by: David Sherwood <david.sherwood at arm.com>
Added:
llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
Modified:
llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 635bd1236196e..b3e3e1e71f101 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -231,12 +231,19 @@ class LoopIdiomRecognize {
bool recognizePopcount();
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
PHINode *CntPhi, Value *Var);
+ bool isProfitableToInsertFFS(Intrinsic::ID IntrinID, Value *InitX,
+ bool ZeroCheck, size_t CanonicalSize);
+ bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX,
+ Instruction *DefX, PHINode *CntPhi,
+ Instruction *CntInst);
bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
+ bool recognizeShiftUntilLessThan();
void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
Instruction *CntInst, PHINode *CntPhi,
Value *Var, Instruction *DefX,
const DebugLoc &DL, bool ZeroCheck,
- bool IsCntPhiUsedOutsideLoop);
+ bool IsCntPhiUsedOutsideLoop,
+ bool InsertSub = false);
bool recognizeShiftUntilBitTest();
bool recognizeShiftUntilZero();
@@ -1482,7 +1489,8 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
<< CurLoop->getHeader()->getName() << "\n");
return recognizePopcount() || recognizeAndInsertFFS() ||
- recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
+ recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
+ recognizeShiftUntilLessThan();
}
/// Check if the given conditional branch is based on the comparison between
@@ -1517,6 +1525,34 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
return nullptr;
}
+/// Check if the given conditional branch is based on an unsigned less-than
+/// comparison between a variable and a constant, and if the comparison is false
+/// the control yields to the loop entry. If the branch matches the behaviour,
+/// the variable involved in the comparison is returned.
+static Value *matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry,
+ uint64_t &Threshold) {
+ if (!BI || !BI->isConditional())
+ return nullptr;
+
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return nullptr;
+
+ ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand(1));
+ if (!CmpConst)
+ return nullptr;
+
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+ ICmpInst::Predicate Pred = Cond->getPredicate();
+
+ if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
+ Threshold = CmpConst->getZExtValue();
+ return Cond->getOperand(0);
+ }
+
+ return nullptr;
+}
+
// Check if the recurrence variable `VarX` is in the right form to create
// the idiom. Returns the value coerced to a PHINode if so.
static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
@@ -1528,6 +1564,107 @@ static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
return nullptr;
}
+/// Return true if the idiom is detected in the loop.
+///
+/// Additionally:
+/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
+/// or nullptr if there is no such.
+/// 2) \p CntPhi is set to the corresponding phi node
+/// or nullptr if there is no such.
+/// 3) \p InitX is set to the value whose CTLZ could be used.
+/// 4) \p DefX is set to the instruction calculating Loop exit condition.
+/// 5) \p Threshold is set to the constant involved in the unsigned less-than
+/// comparison.
+///
+/// The core idiom we are trying to detect is:
+/// \code
+/// if (x0 < 2)
+/// goto loop-exit // the precondition of the loop
+/// cnt0 = init-val
+/// do {
+/// x = phi (x0, x.next); //PhiX
+/// cnt = phi (cnt0, cnt.next)
+///
+/// cnt.next = cnt + 1;
+/// ...
+/// x.next = x >> 1; // DefX
+/// } while (x >= 4)
+/// loop-exit:
+/// \endcode
+static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL,
+ Intrinsic::ID &IntrinID,
+ Value *&InitX, Instruction *&CntInst,
+ PHINode *&CntPhi, Instruction *&DefX,
+ uint64_t &Threshold) {
+ BasicBlock *LoopEntry;
+
+ DefX = nullptr;
+ CntInst = nullptr;
+ CntPhi = nullptr;
+ LoopEntry = *(CurLoop->block_begin());
+
+ // step 1: Check if the loop-back branch is in desirable form.
+ if (Value *T = matchShiftULTCondition(
+ dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry,
+ Threshold))
+ DefX = dyn_cast<Instruction>(T);
+ else
+ return false;
+
+ // step 2: Check the recurrence of variable X
+ if (!DefX || !isa<PHINode>(DefX))
+ return false;
+
+ PHINode *VarPhi = cast<PHINode>(DefX);
+ int Idx = VarPhi->getBasicBlockIndex(LoopEntry);
+ if (Idx == -1)
+ return false;
+
+ DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue(Idx));
+ if (!DefX || DefX->getNumOperands() == 0 || DefX->getOperand(0) != VarPhi)
+ return false;
+
+ // step 3: detect instructions corresponding to "x.next = x >> 1"
+ if (DefX->getOpcode() != Instruction::LShr)
+ return false;
+
+ IntrinID = Intrinsic::ctlz;
+ ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
+ if (!Shft || !Shft->isOne())
+ return false;
+
+ InitX = VarPhi->getIncomingValueForBlock(CurLoop->getLoopPreheader());
+
+ // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
+ // or cnt.next = cnt + -1.
+ // TODO: We can skip the step. If loop trip count is known (CTLZ),
+ // then all uses of "cnt.next" could be optimized to the trip count
+ // plus "cnt0". Currently it is not optimized.
+ // This step could be used to detect POPCNT instruction:
+ // cnt.next = cnt + (x.next & 1)
+ for (Instruction &Inst : llvm::make_range(
+ LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
+ if (Inst.getOpcode() != Instruction::Add)
+ continue;
+
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
+ if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
+ continue;
+
+ PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
+ if (!Phi)
+ continue;
+
+ CntInst = &Inst;
+ CntPhi = Phi;
+ break;
+ }
+ if (!CntInst)
+ return false;
+
+ return true;
+}
+
/// Return true iff the idiom is detected in the loop.
///
/// Additionally:
@@ -1756,27 +1893,35 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
return true;
}
-/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
-/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
-/// trip count returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeAndInsertFFS() {
- // Give up if the loop has multiple blocks or multiple backedges.
- if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
- return false;
+// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
+// profitable if we delete the loop.
+bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
+ Value *InitX, bool ZeroCheck,
+ size_t CanonicalSize) {
+ const Value *Args[] = {InitX,
+ ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
- Intrinsic::ID IntrinID;
- Value *InitX;
- Instruction *DefX = nullptr;
- PHINode *CntPhi = nullptr;
- Instruction *CntInst = nullptr;
- // Help decide if transformation is profitable. For ShiftUntilZero idiom,
- // this is always 6.
- size_t IdiomCanonicalSize = 6;
+ // @llvm.dbg doesn't count as they have no semantic effect.
+ auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
+ uint32_t HeaderSize =
+ std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
- if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
- CntInst, CntPhi, DefX))
+ IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
+ InstructionCost Cost = TTI->getIntrinsicInstrCost(
+ Attrs, TargetTransformInfo::TCK_SizeAndLatency);
+ if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
return false;
+ return true;
+}
+
+/// Convert CTLZ / CTTZ idiom loop into countable loop.
+/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
+/// returns false.
+bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
+ Value *InitX, Instruction *DefX,
+ PHINode *CntPhi,
+ Instruction *CntInst) {
bool IsCntPhiUsedOutsideLoop = false;
for (User *U : CntPhi->users())
if (!CurLoop->contains(cast<Instruction>(U))) {
@@ -1818,35 +1963,107 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
ZeroCheck = true;
}
- // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
- // profitable if we delete the loop.
-
- // the loop has only 6 instructions:
+ // FFS idiom loop has only 6 instructions:
// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
// %shr = ashr %n.addr.0, 1
// %tobool = icmp eq %shr, 0
// %inc = add nsw %i.0, 1
// br i1 %tobool
+ size_t IdiomCanonicalSize = 6;
+ if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
+ return false;
- const Value *Args[] = {InitX,
- ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
+ transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
+ DefX->getDebugLoc(), ZeroCheck,
+ IsCntPhiUsedOutsideLoop);
+ return true;
+}
- // @llvm.dbg doesn't count as they have no semantic effect.
- auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
- uint32_t HeaderSize =
- std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
+/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
+/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
+/// trip count returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeAndInsertFFS() {
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+ return false;
- IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
- InstructionCost Cost =
- TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
- if (HeaderSize != IdiomCanonicalSize &&
- Cost > TargetTransformInfo::TCC_Basic)
+ Intrinsic::ID IntrinID;
+ Value *InitX;
+ Instruction *DefX = nullptr;
+ PHINode *CntPhi = nullptr;
+ Instruction *CntInst = nullptr;
+
+ if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
+ DefX))
+ return false;
+
+ return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
+}
+
+bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+ return false;
+
+ Intrinsic::ID IntrinID;
+ Value *InitX;
+ Instruction *DefX = nullptr;
+ PHINode *CntPhi = nullptr;
+ Instruction *CntInst = nullptr;
+
+ uint64_t LoopThreshold;
+ if (!detectShiftUntilLessThanIdiom(CurLoop, *DL, IntrinID, InitX, CntInst,
+ CntPhi, DefX, LoopThreshold))
+ return false;
+
+ if (LoopThreshold == 2) {
+ // Treat as regular FFS.
+ return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
+ }
+
+ // Look for Floor Log2 Idiom.
+ if (LoopThreshold != 4)
+ return false;
+
+ // Abort if CntPhi is used outside of the loop.
+ for (User *U : CntPhi->users())
+ if (!CurLoop->contains(cast<Instruction>(U)))
+ return false;
+
+ // It is safe to assume Preheader exist as it was checked in
+ // parent function RunOnLoop.
+ BasicBlock *PH = CurLoop->getLoopPreheader();
+ auto *PreCondBB = PH->getSinglePredecessor();
+ if (!PreCondBB)
+ return false;
+ auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ if (!PreCondBI)
+ return false;
+
+ uint64_t PreLoopThreshold;
+ if (matchShiftULTCondition(PreCondBI, PH, PreLoopThreshold) != InitX ||
+ PreLoopThreshold != 2)
return false;
+ bool ZeroCheck = true;
+
+ // the loop has only 6 instructions:
+ // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
+ // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
+ // %shr = ashr %n.addr.0, 1
+ // %tobool = icmp ult %n.addr.0, C
+ // %inc = add nsw %i.0, 1
+ // br i1 %tobool
+ size_t IdiomCanonicalSize = 6;
+ if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
+ return false;
+
+ // log2(x) = w − 1 − clz(x)
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
DefX->getDebugLoc(), ZeroCheck,
- IsCntPhiUsedOutsideLoop);
+ /*IsCntPhiUsedOutsideLoop=*/false,
+ /*InsertSub=*/true);
return true;
}
@@ -1961,7 +2178,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
void LoopIdiomRecognize::transformLoopToCountable(
Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
- bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
+ bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
// Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -1991,6 +2208,8 @@ void LoopIdiomRecognize::transformLoopToCountable(
Type *CountTy = Count->getType();
Count = Builder.CreateSub(
ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
+ if (InsertSub)
+ Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
Value *NewCount = Count;
if (IsCntPhiUsedOutsideLoop)
Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll b/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
new file mode 100644
index 0000000000000..47ae4fd5b66a7
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
@@ -0,0 +1,778 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=loop-idiom -mtriple=aarch64 < %s -S | FileCheck %s
+
+; Recognize CTLZ builtin pattern.
+; Here we'll just convert loop to countable,
+; so do not insert builtin if CPU do not support CTLZ
+;
+; int ctlz_and_other(int n, char *a)
+; {
+; n = n >= 0 ? n : -n;
+; int i = 0, n0 = n;
+; while(n >>= 1) {
+; a[i] = (n0 & (1 << i)) ? 1 : 0;
+; i++;
+; }
+; return i;
+; }
+;
+
+; Function Attrs: norecurse nounwind uwtable
+define i32 @ctlz_and_other(i32 %n, ptr nocapture %a) {
+; CHECK-LABEL: define i32 @ctlz_and_other(
+; CHECK-SAME: i32 [[N:%.*]], ptr nocapture [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
+; CHECK-NEXT: [[SHR8:%.*]] = lshr i32 [[ABS_N]], 1
+; CHECK-NEXT: [[TOBOOL9:%.*]] = icmp eq i32 [[SHR8]], 0
+; CHECK-NEXT: br i1 [[TOBOOL9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
+; CHECK: while.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SHR8]], i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[SHR11:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[SHR8]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP3]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[ABS_N]]
+; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL1]] to i8
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[SHR]] = ashr i32 [[SHR11]], 1
+; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; CHECK: while.end.loopexit:
+; CHECK-NEXT: [[INDVARS_IV_NEXT_LCSSA:%.*]] = phi i64 [ [[TMP2]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVARS_IV_NEXT_LCSSA]] to i32
+; CHECK-NEXT: br label [[WHILE_END]]
+; CHECK: while.end:
+; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP4]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
+;
+entry:
+ %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
+ %shr8 = lshr i32 %abs_n, 1
+ %tobool9 = icmp eq i32 %shr8, 0
+ br i1 %tobool9, label %while.end, label %while.body.preheader
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
+ %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
+ %0 = trunc i64 %indvars.iv to i32
+ %shl = shl i32 1, %0
+ %and = and i32 %shl, %abs_n
+ %tobool1 = icmp ne i32 %and, 0
+ %conv = zext i1 %tobool1 to i8
+ %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
+ store i8 %conv, ptr %arrayidx, align 1
+ %indvars.iv.next = add nuw i64 %indvars.iv, 1
+ %shr = ashr i32 %shr11, 1
+ %tobool = icmp eq i32 %shr, 0
+ br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit: ; preds = %while.body
+ %1 = trunc i64 %indvars.iv.next to i32
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ %i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
+ ret i32 %i.0.lcssa
+}
+
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz_zero_check(int n)
+; {
+; n = n >= 0 ? n : -n;
+; int i = 0;
+; while(n) {
+; n >>= 1;
+; i++;
+; }
+; return i;
+; }
+;
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @ctlz_zero_check(i32 %n) {
+; CHECK-LABEL: define i32 @ctlz_zero_check(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
+; CHECK-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[ABS_N]], 0
+; CHECK-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
+; CHECK: while.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[ABS_N]], i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
+; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[ABS_N]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[SHR]] = ashr i32 [[N_ADDR_05]], 1
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_06]], 1
+; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; CHECK: while.end.loopexit:
+; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY]] ]
+; CHECK-NEXT: br label [[WHILE_END]]
+; CHECK: while.end:
+; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
+;
+entry:
+ %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
+ %tobool4 = icmp eq i32 %abs_n, 0
+ br i1 %tobool4, label %while.end, label %while.body.preheader
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
+ %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ]
+ %shr = ashr i32 %n.addr.05, 1
+ %inc = add nsw i32 %i.06, 1
+ %tobool = icmp eq i32 %shr, 0
+ br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit: ; preds = %while.body
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
+ ret i32 %i.0.lcssa
+}
+
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz(int n)
+; {
+; n = n >= 0 ? n : -n;
+; int i = 0;
+; while(n >>= 1) {
+; i++;
+; }
+; return i;
+; }
+;
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @ctlz(i32 %n) {
+; CHECK-LABEL: define i32 @ctlz(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
+; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; CHECK-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK: while.cond:
+; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[SHR]] = ashr i32 [[N_ADDR_0]], 1
+; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
+; CHECK: while.end:
+; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_COND]] ]
+; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
+;
+entry:
+ %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %entry
+ %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
+ %shr = ashr i32 %n.addr.0, 1
+ %tobool = icmp eq i32 %shr, 0
+ %inc = add nsw i32 %i.0, 1
+ br i1 %tobool, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ ret i32 %i.0
+}
+
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; This test covers how instcombine may optimise the previous ctlz case.
+;
+; int ctlz(int n)
+; {
+; n = n >= 0 ? n : -n;
+; int i = 0;
+; while(n >>= 1) {
+; i++;
+; }
+; return i;
+; }
+
+define i32 @ctlz_fold(i32 noundef %n) {
+; CHECK-LABEL: define i32 @ctlz_fold(
+; CHECK-SAME: i32 noundef [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = tail call i32 @llvm.abs.i32(i32 [[N]], i1 true)
+; CHECK-NEXT: [[TOBOOL_NOT5:%.*]] = icmp ult i32 [[COND]], 2
+; CHECK-NEXT: br i1 [[TOBOOL_NOT5]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
+; CHECK: while.body.preheader:
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[COND]], i1 true)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP2]], 1
+; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[N_ADDR_06:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[COND]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[SHR]] = lshr i32 [[N_ADDR_06]], 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
+; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TCDEC]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; CHECK: while.end.loopexit:
+; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_BODY]] ]
+; CHECK-NEXT: br label [[WHILE_END]]
+; CHECK: while.end:
+; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
+;
+entry:
+ %cond = tail call i32 @llvm.abs.i32(i32 %n, i1 true)
+ %tobool.not5 = icmp ult i32 %cond, 2
+ br i1 %tobool.not5, label %while.end, label %while.body.preheader
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %i.07 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
+ %n.addr.06 = phi i32 [ %shr, %while.body ], [ %cond, %while.body.preheader ]
+ %shr = lshr i32 %n.addr.06, 1
+ %inc = add nuw nsw i32 %i.07, 1
+ %tobool.not = icmp ult i32 %n.addr.06, 4
+ br i1 %tobool.not, label %while.end.loopexit, label %while.body
+
+while.end.loopexit: ; preds = %while.body
+ %inc.lcssa = phi i32 [ %inc, %while.body ]
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
+ ret i32 %i.0.lcssa
+}
+
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz_add(int n, int i0)
+; {
+; n = n >= 0 ? n : -n;
+; int i = i0;
+; while(n >>= 1) {
+; i++;
+; }
+; return i;
+; }
+;
+;
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @ctlz_add(i32 %n, i32 %i0) {
+; CHECK-LABEL: define i32 @ctlz_add(
+; CHECK-SAME: i32 [[N:%.*]], i32 [[I0:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
+; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[I0]]
+; CHECK-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK: while.cond:
+; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[I0]], [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[SHR]] = ashr i32 [[N_ADDR_0]], 1
+; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
+; CHECK: while.end:
+; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
+; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
+;
+entry:
+ %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %entry
+ %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
+ %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
+ %shr = ashr i32 %n.addr.0, 1
+ %tobool = icmp eq i32 %shr, 0
+ %inc = add nsw i32 %i.0, 1
+ br i1 %tobool, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ ret i32 %i.0
+}
+
+
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz_sub(int n, int i0)
+; {
+; n = n >= 0 ? n : -n;
+; int i = i0;
+; while(n >>= 1) {
+; i--;
+; }
+; return i;
+; }
+;
+;
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @ctlz_sub(i32 %n, i32 %i0) {
+; CHECK-LABEL: define i32 @ctlz_sub(
+; CHECK-SAME: i32 [[N:%.*]], i32 [[I0:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
+; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[I0]], [[TMP2]]
+; CHECK-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK: while.cond:
+; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[I0]], [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[SHR]] = ashr i32 [[N_ADDR_0]], 1
+; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], -1
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
+; CHECK: while.end:
+; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
+; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
+;
+entry:
+ %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %entry
+ %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
+ %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
+ %shr = ashr i32 %n.addr.0, 1
+ %tobool = icmp eq i32 %shr, 0
+ %inc = add nsw i32 %i.0, -1
+ br i1 %tobool, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ ret i32 %i.0
+}
+
+
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz_sext(short in)
+; {
+; int n = in;
+; if (in < 0)
+; n = -n;
+; int i = 0;
+; while(n >>= 1) {
+; i++;
+; }
+; return i;
+; }
+;
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @ctlz_sext(i16 %in) {
+; CHECK-LABEL: define i32 @ctlz_sext(
+; CHECK-SAME: i16 [[IN:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ABS:%.*]] = call i16 @llvm.abs.i16(i16 [[IN]], i1 false)
+; CHECK-NEXT: [[ABS_N:%.*]] = zext i16 [[ABS]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; CHECK-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK: while.cond:
+; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[SHR]] = ashr i32 [[N_ADDR_0]], 1
+; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
+; CHECK: while.end:
+; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_COND]] ]
+; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
+;
+entry:
+ %abs = call i16 @llvm.abs.i16(i16 %in, i1 false)
+ %abs_n = zext i16 %abs to i32
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %entry
+ %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
+ %shr = ashr i32 %n.addr.0, 1
+ %tobool = icmp eq i32 %shr, 0
+ %inc = add nsw i32 %i.0, 1
+ br i1 %tobool, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ ret i32 %i.0
+}
+
+
+; unsigned floor_log2(unsigned long n) {
+; unsigned result = 0;
+; while (n >>= 1) result++;
+; return result;
+; }
+
+define i32 @floor_log2_use_inc(i64 noundef %n) {
+; CHECK-LABEL: define i32 @floor_log2_use_inc(
+; CHECK-SAME: i64 noundef [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL_NOT2:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-NEXT: br i1 [[TOBOOL_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
+; CHECK: while.body.preheader:
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[N]], i1 true)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 64, [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[TCPHI:%.*]] = phi i64 [ [[TMP4]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[RESULT_04:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[N_ADDR_03:%.*]] = phi i64 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[SHR]] = lshr i64 [[N_ADDR_03]], 1
+; CHECK-NEXT: [[INC]] = add i32 [[RESULT_04]], 1
+; CHECK-NEXT: [[TCDEC]] = sub nsw i64 [[TCPHI]], 1
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[TCDEC]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; CHECK: while.end.loopexit:
+; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_BODY]] ]
+; CHECK-NEXT: br label [[WHILE_END]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
+;
+entry:
+ %tobool.not2 = icmp ult i64 %n, 2
+ br i1 %tobool.not2, label %while.end, label %while.body.preheader
+
+while.body.preheader:
+ br label %while.body
+
+while.body:
+ %result.04 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
+ %n.addr.03 = phi i64 [ %shr, %while.body ], [ %n, %while.body.preheader ]
+ %shr = lshr i64 %n.addr.03, 1
+ %inc = add i32 %result.04, 1
+ %tobool.not = icmp ult i64 %n.addr.03, 4
+ br i1 %tobool.not, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:
+ %inc.lcssa = phi i32 [ %inc, %while.body ]
+ br label %while.end
+
+while.end:
+ %result.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
+ ret i32 %result.0.lcssa
+}
+
+
+define i32 @floor_log2_use_phi(i64 noundef %n) {
+; CHECK-LABEL: define i32 @floor_log2_use_phi(
+; CHECK-SAME: i64 noundef [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL_NOT2:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-NEXT: br i1 [[TOBOOL_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
+; CHECK: while.body.preheader:
+; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT_04:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[N_ADDR_03:%.*]] = phi i64 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[SHR]] = lshr i64 [[N_ADDR_03]], 1
+; CHECK-NEXT: [[INC]] = add i32 [[RESULT_04]], 1
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp ult i64 [[N_ADDR_03]], 4
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; CHECK: while.end.loopexit:
+; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[RESULT_04]], [[WHILE_BODY]] ]
+; CHECK-NEXT: br label [[WHILE_END]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
+;
+entry:
+ %tobool.not2 = icmp ult i64 %n, 2
+ br i1 %tobool.not2, label %while.end, label %while.body.preheader
+
+while.body.preheader:
+ br label %while.body
+
+while.body:
+ %result.04 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
+ %n.addr.03 = phi i64 [ %shr, %while.body ], [ %n, %while.body.preheader ]
+ %shr = lshr i64 %n.addr.03, 1
+ %inc = add i32 %result.04, 1
+ %tobool.not = icmp ult i64 %n.addr.03, 4
+ br i1 %tobool.not, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:
+ %inc.lcssa = phi i32 [ %result.04, %while.body ]
+ br label %while.end
+
+while.end:
+ %result.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
+ ret i32 %result.0.lcssa
+}
+
+
+; unsigned floor_log2_dec(unsigned long n) {
+; unsigned result = 0;
+; while (n >>= 1) result--;
+; return result;
+; }
+
+define i32 @floor_log2_dec(i64 noundef %n) {
+; CHECK-LABEL: define i32 @floor_log2_dec(
+; CHECK-SAME: i64 noundef [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL_NOT2:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-NEXT: br i1 [[TOBOOL_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
+; CHECK: while.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[N]], i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 64, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
+; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[TCPHI:%.*]] = phi i64 [ [[TMP2]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[RESULT_04:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[N_ADDR_03:%.*]] = phi i64 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[SHR]] = lshr i64 [[N_ADDR_03]], 1
+; CHECK-NEXT: [[INC]] = add i32 [[RESULT_04]], -1
+; CHECK-NEXT: [[TCDEC]] = sub nsw i64 [[TCPHI]], 1
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[TCDEC]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; CHECK: while.end.loopexit:
+; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_BODY]] ]
+; CHECK-NEXT: br label [[WHILE_END]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
+;
+entry:
+ %tobool.not2 = icmp ult i64 %n, 2
+ br i1 %tobool.not2, label %while.end, label %while.body.preheader
+
+while.body.preheader:
+ br label %while.body
+
+while.body:
+ %result.04 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
+ %n.addr.03 = phi i64 [ %shr, %while.body ], [ %n, %while.body.preheader ]
+ %shr = lshr i64 %n.addr.03, 1
+ %inc = add i32 %result.04, -1
+ %tobool.not = icmp ult i64 %n.addr.03, 4
+ br i1 %tobool.not, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:
+ %inc.lcssa = phi i32 [ %inc, %while.body ]
+ br label %while.end
+
+while.end:
+ %result.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
+ ret i32 %result.0.lcssa
+}
+
+
+; unsigned int_log2_rec(unsigned x) {
+; return x == 0 ? 0 : int_log2_rec(x >> 1) + 1;
+; }
+
+define i32 @int_log2_rec(i32 noundef %x) {
+; CHECK-LABEL: define i32 @int_log2_rec(
+; CHECK-SAME: i32 noundef [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT: br i1 [[CMP2]], label [[COND_END:%.*]], label [[COND_FALSE_PREHEADER:%.*]]
+; CHECK: cond.false.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
+; CHECK-NEXT: br label [[COND_FALSE:%.*]]
+; CHECK: cond.false:
+; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[COND_FALSE_PREHEADER]] ], [ [[TCDEC:%.*]], [[COND_FALSE]] ]
+; CHECK-NEXT: [[X_TR4:%.*]] = phi i32 [ [[SHR:%.*]], [[COND_FALSE]] ], [ [[X]], [[COND_FALSE_PREHEADER]] ]
+; CHECK-NEXT: [[ACCUMULATOR_TR3:%.*]] = phi i32 [ [[ADD:%.*]], [[COND_FALSE]] ], [ 0, [[COND_FALSE_PREHEADER]] ]
+; CHECK-NEXT: [[SHR]] = lshr i32 [[X_TR4]], 1
+; CHECK-NEXT: [[ADD]] = add i32 [[ACCUMULATOR_TR3]], 1
+; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TCDEC]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[COND_END_LOOPEXIT:%.*]], label [[COND_FALSE]]
+; CHECK: cond.end.loopexit:
+; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[TMP1]], [[COND_FALSE]] ]
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[ACCUMULATOR_TR_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[COND_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[ACCUMULATOR_TR_LCSSA]]
+;
+entry:
+ %cmp2 = icmp eq i32 %x, 0
+ br i1 %cmp2, label %cond.end, label %cond.false.preheader
+
+cond.false.preheader: ; preds = %entry
+ br label %cond.false
+
+cond.false: ; preds = %cond.false.preheader, %cond.false
+ %x.tr4 = phi i32 [ %shr, %cond.false ], [ %x, %cond.false.preheader ]
+ %accumulator.tr3 = phi i32 [ %add, %cond.false ], [ 0, %cond.false.preheader ]
+ %shr = lshr i32 %x.tr4, 1
+ %add = add i32 %accumulator.tr3, 1
+ %cmp = icmp ult i32 %x.tr4, 2
+ br i1 %cmp, label %cond.end.loopexit, label %cond.false
+
+cond.end.loopexit: ; preds = %cond.false
+ %add.lcssa = phi i32 [ %add, %cond.false ]
+ br label %cond.end
+
+cond.end: ; preds = %cond.end.loopexit, %entry
+ %accumulator.tr.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %cond.end.loopexit ]
+ ret i32 %accumulator.tr.lcssa
+}
+
+
+; We can't easily transform this loop. It returns 1 for an input of both
+; 0 and 1.
+; int ctlz_do_while_use_inc(unsigned n)
+; {
+; int i = 0;
+; do {
+; i++;
+; n >>= 1;
+; } while(n != 0);
+; return i;
+; }
+
+define i32 @ctlz_do_while_use_inc(i32 noundef %n) {
+; CHECK-LABEL: define i32 @ctlz_do_while_use_inc(
+; CHECK-SAME: i32 noundef [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[DO_BODY:%.*]]
+; CHECK: do.body:
+; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[SHR:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
+; CHECK-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp ult i32 [[N_ADDR_0]], 2
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[DO_END:%.*]], label [[DO_BODY]]
+; CHECK: do.end:
+; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[DO_BODY]] ]
+; CHECK-NEXT: ret i32 [[INC_LCSSA]]
+;
+entry:
+ br label %do.body
+
+do.body: ; preds = %do.body, %entry
+ %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %do.body ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
+ %inc = add nuw nsw i32 %i.0, 1
+ %shr = lshr i32 %n.addr.0, 1
+ %cmp.not = icmp ult i32 %n.addr.0, 2
+ br i1 %cmp.not, label %do.end, label %do.body
+
+do.end: ; preds = %do.body
+ %inc.lcssa = phi i32 [ %inc, %do.body ]
+ ret i32 %inc.lcssa
+}
+
+
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz_do_while_use_phi(unsigned n)
+; {
+; int phi;
+; int inc = 0;
+; do {
+; phi = inc;
+; inc++;
+; n >>= 1;
+; } while(n != 0);
+; return phi;
+; }
+
+define i32 @ctlz_do_while_use_phi(i32 noundef %n) {
+; CHECK-LABEL: define i32 @ctlz_do_while_use_phi(
+; CHECK-SAME: i32 noundef [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = lshr i32 [[N]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; CHECK-NEXT: br label [[DO_BODY:%.*]]
+; CHECK: do.body:
+; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHR:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT: [[INC_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC1:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT: [[INC1]] = add nuw nsw i32 [[INC_0]], 1
+; CHECK-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
+; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[TCDEC]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[DO_END:%.*]], label [[DO_BODY]]
+; CHECK: do.end:
+; CHECK-NEXT: [[INC_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[DO_BODY]] ]
+; CHECK-NEXT: ret i32 [[INC_0_LCSSA]]
+;
+entry:
+ br label %do.body
+
+do.body: ; preds = %do.body, %entry
+ %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %do.body ]
+ %inc.0 = phi i32 [ 0, %entry ], [ %inc1, %do.body ]
+ %inc1 = add nuw nsw i32 %inc.0, 1
+ %shr = lshr i32 %n.addr.0, 1
+ %cmp.not = icmp ult i32 %n.addr.0, 2
+ br i1 %cmp.not, label %do.end, label %do.body
+
+do.end: ; preds = %do.body
+ ret i32 %inc.0
+}
+
+
+declare i32 @llvm.abs.i32(i32, i1)
+declare i16 @llvm.abs.i16(i16, i1)
More information about the llvm-commits
mailing list