[llvm] Revert "[LoopIdiom] Support 'shift until less-than' idiom (#95002)" (PR #98065)
Hari Limaye via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 8 12:01:30 PDT 2024
https://github.com/hazzlim created https://github.com/llvm/llvm-project/pull/98065
Reverts #95002 while I investigate buildbot failure.
This reverts commit 83b01aaf51072a07261ee2e5fc14102f71273bc0.
>From 9a46c4662bdbb8f74e95c377b82c1f3b53a4e86a Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Mon, 8 Jul 2024 18:09:51 +0000
Subject: [PATCH] Revert "[LoopIdiom] Support 'shift until less-than' idiom
(#95002)"
Reverts #95002 while I investigate buildbot failure.
This reverts commit 83b01aaf51072a07261ee2e5fc14102f71273bc0.
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 291 +------
.../test/Transforms/LoopIdiom/AArch64/ctlz.ll | 778 ------------------
2 files changed, 36 insertions(+), 1033 deletions(-)
delete mode 100644 llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index b3e3e1e71f1010..635bd1236196e5 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -231,19 +231,12 @@ class LoopIdiomRecognize {
bool recognizePopcount();
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
PHINode *CntPhi, Value *Var);
- bool isProfitableToInsertFFS(Intrinsic::ID IntrinID, Value *InitX,
- bool ZeroCheck, size_t CanonicalSize);
- bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX,
- Instruction *DefX, PHINode *CntPhi,
- Instruction *CntInst);
bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
- bool recognizeShiftUntilLessThan();
void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
Instruction *CntInst, PHINode *CntPhi,
Value *Var, Instruction *DefX,
const DebugLoc &DL, bool ZeroCheck,
- bool IsCntPhiUsedOutsideLoop,
- bool InsertSub = false);
+ bool IsCntPhiUsedOutsideLoop);
bool recognizeShiftUntilBitTest();
bool recognizeShiftUntilZero();
@@ -1489,8 +1482,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
<< CurLoop->getHeader()->getName() << "\n");
return recognizePopcount() || recognizeAndInsertFFS() ||
- recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
- recognizeShiftUntilLessThan();
+ recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
}
/// Check if the given conditional branch is based on the comparison between
@@ -1525,34 +1517,6 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
return nullptr;
}
-/// Check if the given conditional branch is based on an unsigned less-than
-/// comparison between a variable and a constant, and if the comparison is false
-/// the control yields to the loop entry. If the branch matches the behaviour,
-/// the variable involved in the comparison is returned.
-static Value *matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry,
- uint64_t &Threshold) {
- if (!BI || !BI->isConditional())
- return nullptr;
-
- ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!Cond)
- return nullptr;
-
- ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand(1));
- if (!CmpConst)
- return nullptr;
-
- BasicBlock *FalseSucc = BI->getSuccessor(1);
- ICmpInst::Predicate Pred = Cond->getPredicate();
-
- if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
- Threshold = CmpConst->getZExtValue();
- return Cond->getOperand(0);
- }
-
- return nullptr;
-}
-
// Check if the recurrence variable `VarX` is in the right form to create
// the idiom. Returns the value coerced to a PHINode if so.
static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
@@ -1564,107 +1528,6 @@ static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
return nullptr;
}
-/// Return true if the idiom is detected in the loop.
-///
-/// Additionally:
-/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
-/// or nullptr if there is no such.
-/// 2) \p CntPhi is set to the corresponding phi node
-/// or nullptr if there is no such.
-/// 3) \p InitX is set to the value whose CTLZ could be used.
-/// 4) \p DefX is set to the instruction calculating Loop exit condition.
-/// 5) \p Threshold is set to the constant involved in the unsigned less-than
-/// comparison.
-///
-/// The core idiom we are trying to detect is:
-/// \code
-/// if (x0 < 2)
-/// goto loop-exit // the precondition of the loop
-/// cnt0 = init-val
-/// do {
-/// x = phi (x0, x.next); //PhiX
-/// cnt = phi (cnt0, cnt.next)
-///
-/// cnt.next = cnt + 1;
-/// ...
-/// x.next = x >> 1; // DefX
-/// } while (x >= 4)
-/// loop-exit:
-/// \endcode
-static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL,
- Intrinsic::ID &IntrinID,
- Value *&InitX, Instruction *&CntInst,
- PHINode *&CntPhi, Instruction *&DefX,
- uint64_t &Threshold) {
- BasicBlock *LoopEntry;
-
- DefX = nullptr;
- CntInst = nullptr;
- CntPhi = nullptr;
- LoopEntry = *(CurLoop->block_begin());
-
- // step 1: Check if the loop-back branch is in desirable form.
- if (Value *T = matchShiftULTCondition(
- dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry,
- Threshold))
- DefX = dyn_cast<Instruction>(T);
- else
- return false;
-
- // step 2: Check the recurrence of variable X
- if (!DefX || !isa<PHINode>(DefX))
- return false;
-
- PHINode *VarPhi = cast<PHINode>(DefX);
- int Idx = VarPhi->getBasicBlockIndex(LoopEntry);
- if (Idx == -1)
- return false;
-
- DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue(Idx));
- if (!DefX || DefX->getNumOperands() == 0 || DefX->getOperand(0) != VarPhi)
- return false;
-
- // step 3: detect instructions corresponding to "x.next = x >> 1"
- if (DefX->getOpcode() != Instruction::LShr)
- return false;
-
- IntrinID = Intrinsic::ctlz;
- ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
- if (!Shft || !Shft->isOne())
- return false;
-
- InitX = VarPhi->getIncomingValueForBlock(CurLoop->getLoopPreheader());
-
- // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
- // or cnt.next = cnt + -1.
- // TODO: We can skip the step. If loop trip count is known (CTLZ),
- // then all uses of "cnt.next" could be optimized to the trip count
- // plus "cnt0". Currently it is not optimized.
- // This step could be used to detect POPCNT instruction:
- // cnt.next = cnt + (x.next & 1)
- for (Instruction &Inst : llvm::make_range(
- LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
- if (Inst.getOpcode() != Instruction::Add)
- continue;
-
- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
- if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
- continue;
-
- PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
- if (!Phi)
- continue;
-
- CntInst = &Inst;
- CntPhi = Phi;
- break;
- }
- if (!CntInst)
- return false;
-
- return true;
-}
-
/// Return true iff the idiom is detected in the loop.
///
/// Additionally:
@@ -1893,35 +1756,27 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
return true;
}
-// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
-// profitable if we delete the loop.
-bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
- Value *InitX, bool ZeroCheck,
- size_t CanonicalSize) {
- const Value *Args[] = {InitX,
- ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
+/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
+/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
+/// trip count returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeAndInsertFFS() {
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+ return false;
- // @llvm.dbg doesn't count as they have no semantic effect.
- auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
- uint32_t HeaderSize =
- std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
+ Intrinsic::ID IntrinID;
+ Value *InitX;
+ Instruction *DefX = nullptr;
+ PHINode *CntPhi = nullptr;
+ Instruction *CntInst = nullptr;
+ // Help decide if transformation is profitable. For ShiftUntilZero idiom,
+ // this is always 6.
+ size_t IdiomCanonicalSize = 6;
- IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
- InstructionCost Cost = TTI->getIntrinsicInstrCost(
- Attrs, TargetTransformInfo::TCK_SizeAndLatency);
- if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
+ if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
+ CntInst, CntPhi, DefX))
return false;
- return true;
-}
-
-/// Convert CTLZ / CTTZ idiom loop into countable loop.
-/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
-/// returns false.
-bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
- Value *InitX, Instruction *DefX,
- PHINode *CntPhi,
- Instruction *CntInst) {
bool IsCntPhiUsedOutsideLoop = false;
for (User *U : CntPhi->users())
if (!CurLoop->contains(cast<Instruction>(U))) {
@@ -1963,107 +1818,35 @@ bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
ZeroCheck = true;
}
- // FFS idiom loop has only 6 instructions:
+ // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
+ // profitable if we delete the loop.
+
+ // the loop has only 6 instructions:
// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
// %shr = ashr %n.addr.0, 1
// %tobool = icmp eq %shr, 0
// %inc = add nsw %i.0, 1
// br i1 %tobool
- size_t IdiomCanonicalSize = 6;
- if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
- return false;
-
- transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
- DefX->getDebugLoc(), ZeroCheck,
- IsCntPhiUsedOutsideLoop);
- return true;
-}
-
-/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
-/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
-/// trip count returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeAndInsertFFS() {
- // Give up if the loop has multiple blocks or multiple backedges.
- if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
- return false;
-
- Intrinsic::ID IntrinID;
- Value *InitX;
- Instruction *DefX = nullptr;
- PHINode *CntPhi = nullptr;
- Instruction *CntInst = nullptr;
-
- if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
- DefX))
- return false;
- return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
-}
-
-bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
- // Give up if the loop has multiple blocks or multiple backedges.
- if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
- return false;
-
- Intrinsic::ID IntrinID;
- Value *InitX;
- Instruction *DefX = nullptr;
- PHINode *CntPhi = nullptr;
- Instruction *CntInst = nullptr;
-
- uint64_t LoopThreshold;
- if (!detectShiftUntilLessThanIdiom(CurLoop, *DL, IntrinID, InitX, CntInst,
- CntPhi, DefX, LoopThreshold))
- return false;
-
- if (LoopThreshold == 2) {
- // Treat as regular FFS.
- return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
- }
-
- // Look for Floor Log2 Idiom.
- if (LoopThreshold != 4)
- return false;
-
- // Abort if CntPhi is used outside of the loop.
- for (User *U : CntPhi->users())
- if (!CurLoop->contains(cast<Instruction>(U)))
- return false;
-
- // It is safe to assume Preheader exist as it was checked in
- // parent function RunOnLoop.
- BasicBlock *PH = CurLoop->getLoopPreheader();
- auto *PreCondBB = PH->getSinglePredecessor();
- if (!PreCondBB)
- return false;
- auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
- if (!PreCondBI)
- return false;
-
- uint64_t PreLoopThreshold;
- if (matchShiftULTCondition(PreCondBI, PH, PreLoopThreshold) != InitX ||
- PreLoopThreshold != 2)
- return false;
+ const Value *Args[] = {InitX,
+ ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
- bool ZeroCheck = true;
+ // @llvm.dbg doesn't count as they have no semantic effect.
+ auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
+ uint32_t HeaderSize =
+ std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
- // the loop has only 6 instructions:
- // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
- // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
- // %shr = ashr %n.addr.0, 1
- // %tobool = icmp ult %n.addr.0, C
- // %inc = add nsw %i.0, 1
- // br i1 %tobool
- size_t IdiomCanonicalSize = 6;
- if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
+ IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
+ InstructionCost Cost =
+ TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
+ if (HeaderSize != IdiomCanonicalSize &&
+ Cost > TargetTransformInfo::TCC_Basic)
return false;
- // log2(x) = w − 1 − clz(x)
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
DefX->getDebugLoc(), ZeroCheck,
- /*IsCntPhiUsedOutsideLoop=*/false,
- /*InsertSub=*/true);
+ IsCntPhiUsedOutsideLoop);
return true;
}
@@ -2178,7 +1961,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
void LoopIdiomRecognize::transformLoopToCountable(
Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
- bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
+ bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
// Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -2208,8 +1991,6 @@ void LoopIdiomRecognize::transformLoopToCountable(
Type *CountTy = Count->getType();
Count = Builder.CreateSub(
ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
- if (InsertSub)
- Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
Value *NewCount = Count;
if (IsCntPhiUsedOutsideLoop)
Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll b/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
deleted file mode 100644
index 47ae4fd5b66a7c..00000000000000
--- a/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
+++ /dev/null
@@ -1,778 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -passes=loop-idiom -mtriple=aarch64 < %s -S | FileCheck %s
-
-; Recognize CTLZ builtin pattern.
-; Here we'll just convert loop to countable,
-; so do not insert builtin if CPU do not support CTLZ
-;
-; int ctlz_and_other(int n, char *a)
-; {
-; n = n >= 0 ? n : -n;
-; int i = 0, n0 = n;
-; while(n >>= 1) {
-; a[i] = (n0 & (1 << i)) ? 1 : 0;
-; i++;
-; }
-; return i;
-; }
-;
-
-; Function Attrs: norecurse nounwind uwtable
-define i32 @ctlz_and_other(i32 %n, ptr nocapture %a) {
-; CHECK-LABEL: define i32 @ctlz_and_other(
-; CHECK-SAME: i32 [[N:%.*]], ptr nocapture [[A:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT: [[SHR8:%.*]] = lshr i32 [[ABS_N]], 1
-; CHECK-NEXT: [[TOBOOL9:%.*]] = icmp eq i32 [[SHR8]], 0
-; CHECK-NEXT: br i1 [[TOBOOL9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK: while.body.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SHR8]], i1 true)
-; CHECK-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
-; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[SHR11:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[SHR8]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP3]]
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[ABS_N]]
-; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL1]] to i8
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[SHR]] = ashr i32 [[SHR11]], 1
-; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK: while.end.loopexit:
-; CHECK-NEXT: [[INDVARS_IV_NEXT_LCSSA:%.*]] = phi i64 [ [[TMP2]], [[WHILE_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVARS_IV_NEXT_LCSSA]] to i32
-; CHECK-NEXT: br label [[WHILE_END]]
-; CHECK: while.end:
-; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP4]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
-;
-entry:
- %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
- %shr8 = lshr i32 %abs_n, 1
- %tobool9 = icmp eq i32 %shr8, 0
- br i1 %tobool9, label %while.end, label %while.body.preheader
-
-while.body.preheader: ; preds = %entry
- br label %while.body
-
-while.body: ; preds = %while.body.preheader, %while.body
- %indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
- %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
- %0 = trunc i64 %indvars.iv to i32
- %shl = shl i32 1, %0
- %and = and i32 %shl, %abs_n
- %tobool1 = icmp ne i32 %and, 0
- %conv = zext i1 %tobool1 to i8
- %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
- store i8 %conv, ptr %arrayidx, align 1
- %indvars.iv.next = add nuw i64 %indvars.iv, 1
- %shr = ashr i32 %shr11, 1
- %tobool = icmp eq i32 %shr, 0
- br i1 %tobool, label %while.end.loopexit, label %while.body
-
-while.end.loopexit: ; preds = %while.body
- %1 = trunc i64 %indvars.iv.next to i32
- br label %while.end
-
-while.end: ; preds = %while.end.loopexit, %entry
- %i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
- ret i32 %i.0.lcssa
-}
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz_zero_check(int n)
-; {
-; n = n >= 0 ? n : -n;
-; int i = 0;
-; while(n) {
-; n >>= 1;
-; i++;
-; }
-; return i;
-; }
-;
-
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @ctlz_zero_check(i32 %n) {
-; CHECK-LABEL: define i32 @ctlz_zero_check(
-; CHECK-SAME: i32 [[N:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[ABS_N]], 0
-; CHECK-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK: while.body.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[ABS_N]], i1 true)
-; CHECK-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
-; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[ABS_N]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[SHR]] = ashr i32 [[N_ADDR_05]], 1
-; CHECK-NEXT: [[INC]] = add nsw i32 [[I_06]], 1
-; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK: while.end.loopexit:
-; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY]] ]
-; CHECK-NEXT: br label [[WHILE_END]]
-; CHECK: while.end:
-; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
-;
-entry:
- %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
- %tobool4 = icmp eq i32 %abs_n, 0
- br i1 %tobool4, label %while.end, label %while.body.preheader
-
-while.body.preheader: ; preds = %entry
- br label %while.body
-
-while.body: ; preds = %while.body.preheader, %while.body
- %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
- %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ]
- %shr = ashr i32 %n.addr.05, 1
- %inc = add nsw i32 %i.06, 1
- %tobool = icmp eq i32 %shr, 0
- br i1 %tobool, label %while.end.loopexit, label %while.body
-
-while.end.loopexit: ; preds = %while.body
- br label %while.end
-
-while.end: ; preds = %while.end.loopexit, %entry
- %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
- ret i32 %i.0.lcssa
-}
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz(int n)
-; {
-; n = n >= 0 ? n : -n;
-; int i = 0;
-; while(n >>= 1) {
-; i++;
-; }
-; return i;
-; }
-;
-
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @ctlz(i32 %n) {
-; CHECK-LABEL: define i32 @ctlz(
-; CHECK-SAME: i32 [[N:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT: br label [[WHILE_COND:%.*]]
-; CHECK: while.cond:
-; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[SHR]] = ashr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
-; CHECK: while.end:
-; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_COND]] ]
-; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
-;
-entry:
- %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
- br label %while.cond
-
-while.cond: ; preds = %while.cond, %entry
- %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
- %shr = ashr i32 %n.addr.0, 1
- %tobool = icmp eq i32 %shr, 0
- %inc = add nsw i32 %i.0, 1
- br i1 %tobool, label %while.end, label %while.cond
-
-while.end: ; preds = %while.cond
- ret i32 %i.0
-}
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; This test covers how instcombine may optimise the previous ctlz case.
-;
-; int ctlz(int n)
-; {
-; n = n >= 0 ? n : -n;
-; int i = 0;
-; while(n >>= 1) {
-; i++;
-; }
-; return i;
-; }
-
-define i32 @ctlz_fold(i32 noundef %n) {
-; CHECK-LABEL: define i32 @ctlz_fold(
-; CHECK-SAME: i32 noundef [[N:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = tail call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT: [[TOBOOL_NOT5:%.*]] = icmp ult i32 [[COND]], 2
-; CHECK-NEXT: br i1 [[TOBOOL_NOT5]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK: while.body.preheader:
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[COND]], i1 true)
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP2]], 1
-; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[N_ADDR_06:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[COND]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[SHR]] = lshr i32 [[N_ADDR_06]], 1
-; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
-; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK: while.end.loopexit:
-; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_BODY]] ]
-; CHECK-NEXT: br label [[WHILE_END]]
-; CHECK: while.end:
-; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
-;
-entry:
- %cond = tail call i32 @llvm.abs.i32(i32 %n, i1 true)
- %tobool.not5 = icmp ult i32 %cond, 2
- br i1 %tobool.not5, label %while.end, label %while.body.preheader
-
-while.body.preheader: ; preds = %entry
- br label %while.body
-
-while.body: ; preds = %while.body.preheader, %while.body
- %i.07 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
- %n.addr.06 = phi i32 [ %shr, %while.body ], [ %cond, %while.body.preheader ]
- %shr = lshr i32 %n.addr.06, 1
- %inc = add nuw nsw i32 %i.07, 1
- %tobool.not = icmp ult i32 %n.addr.06, 4
- br i1 %tobool.not, label %while.end.loopexit, label %while.body
-
-while.end.loopexit: ; preds = %while.body
- %inc.lcssa = phi i32 [ %inc, %while.body ]
- br label %while.end
-
-while.end: ; preds = %while.end.loopexit, %entry
- %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
- ret i32 %i.0.lcssa
-}
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz_add(int n, int i0)
-; {
-; n = n >= 0 ? n : -n;
-; int i = i0;
-; while(n >>= 1) {
-; i++;
-; }
-; return i;
-; }
-;
-;
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @ctlz_add(i32 %n, i32 %i0) {
-; CHECK-LABEL: define i32 @ctlz_add(
-; CHECK-SAME: i32 [[N:%.*]], i32 [[I0:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[I0]]
-; CHECK-NEXT: br label [[WHILE_COND:%.*]]
-; CHECK: while.cond:
-; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[I0]], [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[SHR]] = ashr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
-; CHECK: while.end:
-; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
-; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
-;
-entry:
- %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
- br label %while.cond
-
-while.cond: ; preds = %while.cond, %entry
- %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
- %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
- %shr = ashr i32 %n.addr.0, 1
- %tobool = icmp eq i32 %shr, 0
- %inc = add nsw i32 %i.0, 1
- br i1 %tobool, label %while.end, label %while.cond
-
-while.end: ; preds = %while.cond
- ret i32 %i.0
-}
-
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz_sub(int n, int i0)
-; {
-; n = n >= 0 ? n : -n;
-; int i = i0;
-; while(n >>= 1) {
-; i--;
-; }
-; return i;
-; }
-;
-;
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @ctlz_sub(i32 %n, i32 %i0) {
-; CHECK-LABEL: define i32 @ctlz_sub(
-; CHECK-SAME: i32 [[N:%.*]], i32 [[I0:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[I0]], [[TMP2]]
-; CHECK-NEXT: br label [[WHILE_COND:%.*]]
-; CHECK: while.cond:
-; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[I0]], [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[SHR]] = ashr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], -1
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
-; CHECK: while.end:
-; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
-; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
-;
-entry:
- %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
- br label %while.cond
-
-while.cond: ; preds = %while.cond, %entry
- %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
- %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
- %shr = ashr i32 %n.addr.0, 1
- %tobool = icmp eq i32 %shr, 0
- %inc = add nsw i32 %i.0, -1
- br i1 %tobool, label %while.end, label %while.cond
-
-while.end: ; preds = %while.cond
- ret i32 %i.0
-}
-
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz_sext(short in)
-; {
-; int n = in;
-; if (in < 0)
-; n = -n;
-; int i = 0;
-; while(n >>= 1) {
-; i++;
-; }
-; return i;
-; }
-;
-
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @ctlz_sext(i16 %in) {
-; CHECK-LABEL: define i32 @ctlz_sext(
-; CHECK-SAME: i16 [[IN:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ABS:%.*]] = call i16 @llvm.abs.i16(i16 [[IN]], i1 false)
-; CHECK-NEXT: [[ABS_N:%.*]] = zext i16 [[ABS]] to i32
-; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[ABS_N]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT: br label [[WHILE_COND:%.*]]
-; CHECK: while.cond:
-; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[ABS_N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[SHR]] = ashr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
-; CHECK: while.end:
-; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_COND]] ]
-; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
-;
-entry:
- %abs = call i16 @llvm.abs.i16(i16 %in, i1 false)
- %abs_n = zext i16 %abs to i32
- br label %while.cond
-
-while.cond: ; preds = %while.cond, %entry
- %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
- %shr = ashr i32 %n.addr.0, 1
- %tobool = icmp eq i32 %shr, 0
- %inc = add nsw i32 %i.0, 1
- br i1 %tobool, label %while.end, label %while.cond
-
-while.end: ; preds = %while.cond
- ret i32 %i.0
-}
-
-
-; unsigned floor_log2(unsigned long n) {
-; unsigned result = 0;
-; while (n >>= 1) result++;
-; return result;
-; }
-
-define i32 @floor_log2_use_inc(i64 noundef %n) {
-; CHECK-LABEL: define i32 @floor_log2_use_inc(
-; CHECK-SAME: i64 noundef [[N:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TOBOOL_NOT2:%.*]] = icmp ult i64 [[N]], 2
-; CHECK-NEXT: br i1 [[TOBOOL_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK: while.body.preheader:
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[N]], i1 true)
-; CHECK-NEXT: [[TMP2:%.*]] = sub i64 64, [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP4]] to i32
-; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[TCPHI:%.*]] = phi i64 [ [[TMP4]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT: [[RESULT_04:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[N_ADDR_03:%.*]] = phi i64 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[SHR]] = lshr i64 [[N_ADDR_03]], 1
-; CHECK-NEXT: [[INC]] = add i32 [[RESULT_04]], 1
-; CHECK-NEXT: [[TCDEC]] = sub nsw i64 [[TCPHI]], 1
-; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[TCDEC]], 0
-; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK: while.end.loopexit:
-; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_BODY]] ]
-; CHECK-NEXT: br label [[WHILE_END]]
-; CHECK: while.end:
-; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
-;
-entry:
- %tobool.not2 = icmp ult i64 %n, 2
- br i1 %tobool.not2, label %while.end, label %while.body.preheader
-
-while.body.preheader:
- br label %while.body
-
-while.body:
- %result.04 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
- %n.addr.03 = phi i64 [ %shr, %while.body ], [ %n, %while.body.preheader ]
- %shr = lshr i64 %n.addr.03, 1
- %inc = add i32 %result.04, 1
- %tobool.not = icmp ult i64 %n.addr.03, 4
- br i1 %tobool.not, label %while.end.loopexit, label %while.body
-
-while.end.loopexit:
- %inc.lcssa = phi i32 [ %inc, %while.body ]
- br label %while.end
-
-while.end:
- %result.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
- ret i32 %result.0.lcssa
-}
-
-
-define i32 @floor_log2_use_phi(i64 noundef %n) {
-; CHECK-LABEL: define i32 @floor_log2_use_phi(
-; CHECK-SAME: i64 noundef [[N:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TOBOOL_NOT2:%.*]] = icmp ult i64 [[N]], 2
-; CHECK-NEXT: br i1 [[TOBOOL_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK: while.body.preheader:
-; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[RESULT_04:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[N_ADDR_03:%.*]] = phi i64 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[SHR]] = lshr i64 [[N_ADDR_03]], 1
-; CHECK-NEXT: [[INC]] = add i32 [[RESULT_04]], 1
-; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp ult i64 [[N_ADDR_03]], 4
-; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK: while.end.loopexit:
-; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[RESULT_04]], [[WHILE_BODY]] ]
-; CHECK-NEXT: br label [[WHILE_END]]
-; CHECK: while.end:
-; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
-;
-entry:
- %tobool.not2 = icmp ult i64 %n, 2
- br i1 %tobool.not2, label %while.end, label %while.body.preheader
-
-while.body.preheader:
- br label %while.body
-
-while.body:
- %result.04 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
- %n.addr.03 = phi i64 [ %shr, %while.body ], [ %n, %while.body.preheader ]
- %shr = lshr i64 %n.addr.03, 1
- %inc = add i32 %result.04, 1
- %tobool.not = icmp ult i64 %n.addr.03, 4
- br i1 %tobool.not, label %while.end.loopexit, label %while.body
-
-while.end.loopexit:
- %inc.lcssa = phi i32 [ %result.04, %while.body ]
- br label %while.end
-
-while.end:
- %result.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
- ret i32 %result.0.lcssa
-}
-
-
-; unsigned floor_log2_dec(unsigned long n) {
-; unsigned result = 0;
-; while (n >>= 1) result--;
-; return result;
-; }
-
-define i32 @floor_log2_dec(i64 noundef %n) {
-; CHECK-LABEL: define i32 @floor_log2_dec(
-; CHECK-SAME: i64 noundef [[N:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TOBOOL_NOT2:%.*]] = icmp ult i64 [[N]], 2
-; CHECK-NEXT: br i1 [[TOBOOL_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK: while.body.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[N]], i1 true)
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 64, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
-; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[TCPHI:%.*]] = phi i64 [ [[TMP2]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT: [[RESULT_04:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[N_ADDR_03:%.*]] = phi i64 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[SHR]] = lshr i64 [[N_ADDR_03]], 1
-; CHECK-NEXT: [[INC]] = add i32 [[RESULT_04]], -1
-; CHECK-NEXT: [[TCDEC]] = sub nsw i64 [[TCPHI]], 1
-; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[TCDEC]], 0
-; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK: while.end.loopexit:
-; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_BODY]] ]
-; CHECK-NEXT: br label [[WHILE_END]]
-; CHECK: while.end:
-; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
-;
-entry:
- %tobool.not2 = icmp ult i64 %n, 2
- br i1 %tobool.not2, label %while.end, label %while.body.preheader
-
-while.body.preheader:
- br label %while.body
-
-while.body:
- %result.04 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
- %n.addr.03 = phi i64 [ %shr, %while.body ], [ %n, %while.body.preheader ]
- %shr = lshr i64 %n.addr.03, 1
- %inc = add i32 %result.04, -1
- %tobool.not = icmp ult i64 %n.addr.03, 4
- br i1 %tobool.not, label %while.end.loopexit, label %while.body
-
-while.end.loopexit:
- %inc.lcssa = phi i32 [ %inc, %while.body ]
- br label %while.end
-
-while.end:
- %result.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
- ret i32 %result.0.lcssa
-}
-
-
-; unsigned int_log2_rec(unsigned x) {
-; return x == 0 ? 0 : int_log2_rec(x >> 1) + 1;
-; }
-
-define i32 @int_log2_rec(i32 noundef %x) {
-; CHECK-LABEL: define i32 @int_log2_rec(
-; CHECK-SAME: i32 noundef [[X:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT: br i1 [[CMP2]], label [[COND_END:%.*]], label [[COND_FALSE_PREHEADER:%.*]]
-; CHECK: cond.false.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; CHECK-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
-; CHECK-NEXT: br label [[COND_FALSE:%.*]]
-; CHECK: cond.false:
-; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[COND_FALSE_PREHEADER]] ], [ [[TCDEC:%.*]], [[COND_FALSE]] ]
-; CHECK-NEXT: [[X_TR4:%.*]] = phi i32 [ [[SHR:%.*]], [[COND_FALSE]] ], [ [[X]], [[COND_FALSE_PREHEADER]] ]
-; CHECK-NEXT: [[ACCUMULATOR_TR3:%.*]] = phi i32 [ [[ADD:%.*]], [[COND_FALSE]] ], [ 0, [[COND_FALSE_PREHEADER]] ]
-; CHECK-NEXT: [[SHR]] = lshr i32 [[X_TR4]], 1
-; CHECK-NEXT: [[ADD]] = add i32 [[ACCUMULATOR_TR3]], 1
-; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT: br i1 [[CMP]], label [[COND_END_LOOPEXIT:%.*]], label [[COND_FALSE]]
-; CHECK: cond.end.loopexit:
-; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[TMP1]], [[COND_FALSE]] ]
-; CHECK-NEXT: br label [[COND_END]]
-; CHECK: cond.end:
-; CHECK-NEXT: [[ACCUMULATOR_TR_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[COND_END_LOOPEXIT]] ]
-; CHECK-NEXT: ret i32 [[ACCUMULATOR_TR_LCSSA]]
-;
-entry:
- %cmp2 = icmp eq i32 %x, 0
- br i1 %cmp2, label %cond.end, label %cond.false.preheader
-
-cond.false.preheader: ; preds = %entry
- br label %cond.false
-
-cond.false: ; preds = %cond.false.preheader, %cond.false
- %x.tr4 = phi i32 [ %shr, %cond.false ], [ %x, %cond.false.preheader ]
- %accumulator.tr3 = phi i32 [ %add, %cond.false ], [ 0, %cond.false.preheader ]
- %shr = lshr i32 %x.tr4, 1
- %add = add i32 %accumulator.tr3, 1
- %cmp = icmp ult i32 %x.tr4, 2
- br i1 %cmp, label %cond.end.loopexit, label %cond.false
-
-cond.end.loopexit: ; preds = %cond.false
- %add.lcssa = phi i32 [ %add, %cond.false ]
- br label %cond.end
-
-cond.end: ; preds = %cond.end.loopexit, %entry
- %accumulator.tr.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %cond.end.loopexit ]
- ret i32 %accumulator.tr.lcssa
-}
-
-
-; We can't easily transform this loop. It returns 1 for an input of both
-; 0 and 1.
-; int ctlz_do_while_use_inc(unsigned n)
-; {
-; int i = 0;
-; do {
-; i++;
-; n >>= 1;
-; } while(n != 0);
-; return i;
-; }
-
-define i32 @ctlz_do_while_use_inc(i32 noundef %n) {
-; CHECK-LABEL: define i32 @ctlz_do_while_use_inc(
-; CHECK-SAME: i32 noundef [[N:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[DO_BODY:%.*]]
-; CHECK: do.body:
-; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[SHR:%.*]], [[DO_BODY]] ]
-; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[DO_BODY]] ]
-; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
-; CHECK-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp ult i32 [[N_ADDR_0]], 2
-; CHECK-NEXT: br i1 [[CMP_NOT]], label [[DO_END:%.*]], label [[DO_BODY]]
-; CHECK: do.end:
-; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[DO_BODY]] ]
-; CHECK-NEXT: ret i32 [[INC_LCSSA]]
-;
-entry:
- br label %do.body
-
-do.body: ; preds = %do.body, %entry
- %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %do.body ]
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
- %inc = add nuw nsw i32 %i.0, 1
- %shr = lshr i32 %n.addr.0, 1
- %cmp.not = icmp ult i32 %n.addr.0, 2
- br i1 %cmp.not, label %do.end, label %do.body
-
-do.end: ; preds = %do.body
- %inc.lcssa = phi i32 [ %inc, %do.body ]
- ret i32 %inc.lcssa
-}
-
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz_do_while_use_phi(unsigned n)
-; {
-; int phi;
-; int inc = 0;
-; do {
-; phi = inc;
-; inc++;
-; n >>= 1;
-; } while(n != 0);
-; return phi;
-; }
-
-define i32 @ctlz_do_while_use_phi(i32 noundef %n) {
-; CHECK-LABEL: define i32 @ctlz_do_while_use_phi(
-; CHECK-SAME: i32 noundef [[N:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = lshr i32 [[N]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT: br label [[DO_BODY:%.*]]
-; CHECK: do.body:
-; CHECK-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[DO_BODY]] ]
-; CHECK-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHR:%.*]], [[DO_BODY]] ]
-; CHECK-NEXT: [[INC_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC1:%.*]], [[DO_BODY]] ]
-; CHECK-NEXT: [[INC1]] = add nuw nsw i32 [[INC_0]], 1
-; CHECK-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
-; CHECK-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT: br i1 [[CMP_NOT]], label [[DO_END:%.*]], label [[DO_BODY]]
-; CHECK: do.end:
-; CHECK-NEXT: [[INC_0_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[DO_BODY]] ]
-; CHECK-NEXT: ret i32 [[INC_0_LCSSA]]
-;
-entry:
- br label %do.body
-
-do.body: ; preds = %do.body, %entry
- %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %do.body ]
- %inc.0 = phi i32 [ 0, %entry ], [ %inc1, %do.body ]
- %inc1 = add nuw nsw i32 %inc.0, 1
- %shr = lshr i32 %n.addr.0, 1
- %cmp.not = icmp ult i32 %n.addr.0, 2
- br i1 %cmp.not, label %do.end, label %do.body
-
-do.end: ; preds = %do.body
- ret i32 %inc.0
-}
-
-
-declare i32 @llvm.abs.i32(i32, i1)
-declare i16 @llvm.abs.i16(i16, i1)
More information about the llvm-commits
mailing list