[llvm] r265388 - Adds the ability to use an epilog remainder loop during loop unrolling and makes
David L Kreitzer via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 5 05:19:35 PDT 2016
Author: dlkreitz
Date: Tue Apr 5 07:19:35 2016
New Revision: 265388
URL: http://llvm.org/viewvc/llvm-project?rev=265388&view=rev
Log:
Adds the ability to use an epilog remainder loop during loop unrolling and makes
this the default behavior.
Patch by Evgeny Stupachenko (evstupac at gmail.com).
Differential Revision: http://reviews.llvm.org/D18158
Modified:
llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp
llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp
llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
llvm/trunk/test/Transforms/LoopUnroll/X86/mmx.ll
llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll
llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll
llvm/trunk/test/Transforms/LoopUnroll/runtime-loop2.ll
llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll
llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll
llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll
llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanup.ll
llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
Modified: llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h Tue Apr 5 07:19:35 2016
@@ -34,10 +34,11 @@ bool UnrollLoop(Loop *L, unsigned Count,
LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
AssumptionCache *AC, bool PreserveLCSSA);
-bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
- bool AllowExpensiveTripCount, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- bool PreserveLCSSA);
+bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
+ bool AllowExpensiveTripCount,
+ bool UseEpilogRemainder, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ bool PreserveLCSSA);
MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
}
Modified: llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp Tue Apr 5 07:19:35 2016
@@ -44,6 +44,11 @@ using namespace llvm;
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+static cl::opt<bool>
+UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(true), cl::Hidden,
+ cl::desc("Allow runtime unrolled loops to be unrolled "
+ "with epilog instead of prolog."));
+
/// Convert the instruction operands from referencing the current values into
/// those specified by VMap.
static inline void remapInstruction(Instruction *I,
@@ -288,12 +293,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned
"convergent "
"operation.");
});
- // Don't output the runtime loop prolog if Count is a multiple of
- // TripMultiple. Such a prolog is never needed, and is unsafe if the loop
+ // Don't output the runtime loop remainder if Count is a multiple of
+ // TripMultiple. Such a remainder is never needed, and is unsafe if the loop
// contains a convergent instruction.
if (RuntimeTripCount && TripMultiple % Count != 0 &&
- !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
- PreserveLCSSA))
+ !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
+ UnrollRuntimeEpilog, LI, SE, DT,
+ PreserveLCSSA))
return false;
// Notify ScalarEvolution that the loop will be substantially changed,
Modified: llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp Tue Apr 5 07:19:35 2016
@@ -16,8 +16,8 @@
// case, we need to generate code to execute these 'left over' iterations.
//
// The current strategy generates an if-then-else sequence prior to the
-// unrolled loop to execute the 'left over' iterations. Other strategies
-// include generate a loop before or after the unrolled loop.
+// unrolled loop to execute the 'left over' iterations before or after the
+// unrolled loop.
//
//===----------------------------------------------------------------------===//
@@ -60,33 +60,35 @@ STATISTIC(NumRuntimeUnrolled,
/// than the unroll factor.
///
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
- BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
- BasicBlock *OrigPH, BasicBlock *NewPH,
- ValueToValueMapTy &VMap, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
+ BasicBlock *PrologExit, BasicBlock *PreHeader,
+ BasicBlock *NewPreHeader, ValueToValueMapTy &VMap,
+ DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
+ BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
// Create a PHI node for each outgoing value from the original loop
// (which means it is an outgoing value from the prolog code too).
// The new PHI node is inserted in the prolog end basic block.
- // The new PHI name is added as an operand of a PHI node in either
+ // The new PHI node value is added as an operand of a PHI node in either
// the loop header or the loop exit block.
- for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
- SBI != SBE; ++SBI) {
- for (BasicBlock::iterator BBI = (*SBI)->begin();
- PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
-
+ for (BasicBlock *Succ : successors(Latch)) {
+ for (Instruction &BBI : *Succ) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
// Add a new PHI node to the prolog end block and add the
// appropriate incoming values.
- PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
- PrologEnd->getTerminator());
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
+ PrologExit->getFirstNonPHI());
// Adding a value to the new PHI node from the original loop preheader.
// This is the value that skips all the prolog code.
if (L->contains(PN)) {
- NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),
+ PreHeader);
} else {
- NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH);
+ NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
}
Value *V = PN->getIncomingValueForBlock(Latch);
@@ -97,22 +99,22 @@ static void ConnectProlog(Loop *L, Value
}
// Adding a value to the new PHI node from the last prolog block
// that was created.
- NewPN->addIncoming(V, LastPrologBB);
+ NewPN->addIncoming(V, PrologLatch);
// Update the existing PHI node operand with the value from the
// new PHI node. How this is done depends on if the existing
// PHI node is in the original loop block, or the exit block.
if (L->contains(PN)) {
- PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);
} else {
- PN->addIncoming(NewPN, PrologEnd);
+ PN->addIncoming(NewPN, PrologExit);
}
}
}
// Create a branch around the original loop, which is taken if there are no
// iterations remaining to be executed after running the prologue.
- Instruction *InsertPt = PrologEnd->getTerminator();
+ Instruction *InsertPt = PrologExit->getTerminator();
IRBuilder<> B(InsertPt);
assert(Count != 0 && "nonsensical Count!");
@@ -126,25 +128,152 @@ static void ConnectProlog(Loop *L, Value
BasicBlock *Exit = L->getUniqueExitBlock();
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
- SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolled loop)
- B.CreateCondBr(BrLoopExit, Exit, NewPH);
+ B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
+ InsertPt->eraseFromParent();
+}
+
+/// Connect the unrolling epilog code to the original loop.
+/// The unrolling epilog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
+/// - Create PHI nodes at the unrolling loop exit to combine
+/// values that exit the unrolling loop code and jump around it.
+/// - Update PHI operands in the epilog loop by the new PHI nodes
+/// - Branch around the epilog loop if extra iters (ModVal) is zero.
+///
+static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
+ BasicBlock *Exit, BasicBlock *PreHeader,
+ BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
+ ValueToValueMapTy &VMap, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch && "Loop must have a latch");
+ BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
+
+ // Loop structure should be the following:
+ //
+ // PreHeader
+ // NewPreHeader
+ // Header
+ // ...
+ // Latch
+ // NewExit (PN)
+ // EpilogPreHeader
+ // EpilogHeader
+ // ...
+ // EpilogLatch
+ // Exit (EpilogPN)
+
+ // Update PHI nodes at NewExit and Exit.
+ for (Instruction &BBI : *NewExit) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
+ // PN should be used in another PHI located in Exit block as
+ // Exit was split by SplitBlockPredecessors into Exit and NewExit
+ // Basicaly it should look like:
+ // NewExit:
+ // PN = PHI [I, Latch]
+ // ...
+ // Exit:
+ // EpilogPN = PHI [PN, EpilogPreHeader]
+ //
+ // There is EpilogPreHeader incoming block instead of NewExit as
+ // NewExit was spilt 1 more time to get EpilogPreHeader.
+ assert(PN->hasOneUse() && "The phi should have 1 use");
+ PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser());
+ assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
+
+ // Add incoming PreHeader from branch around the Loop
+ PN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
+
+ Value *V = PN->getIncomingValueForBlock(Latch);
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I && L->contains(I))
+ // If value comes from an instruction in the loop add VMap value.
+ V = VMap[I];
+ // For the instruction out of the loop, constant or undefined value
+ // insert value itself.
+ EpilogPN->addIncoming(V, EpilogLatch);
+
+ assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
+ "EpilogPN should have EpilogPreHeader incoming block");
+ // Change EpilogPreHeader incoming block to NewExit.
+ EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
+ NewExit);
+ // Now PHIs should look like:
+ // NewExit:
+ // PN = PHI [I, Latch], [undef, PreHeader]
+ // ...
+ // Exit:
+ // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
+ }
+
+ // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
+ // Update corresponding PHI nodes in epilog loop.
+ for (BasicBlock *Succ : successors(Latch)) {
+ // Skip this as we already updated phis in exit blocks.
+ if (!L->contains(Succ))
+ continue;
+ for (Instruction &BBI : *Succ) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
+ // Add new PHI nodes to the loop exit block and update epilog
+ // PHIs with the new PHI values.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
+ NewExit->getFirstNonPHI());
+ // Adding a value to the new PHI node from the unrolling loop preheader.
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader);
+ // Adding a value to the new PHI node from the unrolling loop latch.
+ NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch);
+
+ // Update the existing PHI node operand with the value from the new PHI
+ // node. Corresponding instruction in epilog loop should be PHI.
+ PHINode *VPN = cast<PHINode>(VMap[&BBI]);
+ VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
+ }
+ }
+
+ Instruction *InsertPt = NewExit->getTerminator();
+ IRBuilder<> B(InsertPt);
+ Value *BrLoopExit = B.CreateIsNotNull(ModVal);
+ assert(Exit && "Loop must have a single exit block only");
+ // Split the exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
+ SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
+ PreserveLCSSA);
+ // Add the branch to the exit block (around the unrolling loop)
+ B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
InsertPt->eraseFromParent();
}
/// Create a clone of the blocks in a loop and connect them together.
-/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new
-/// loop will be created including all cloned blocks, and the iterator of it
-/// switches to count NewIter down to 0.
+/// If CreateRemainderLoop is false, loop structure will not be cloned,
+/// otherwise a new loop will be created including all cloned blocks, and the
+/// iterator of it switches to count NewIter down to 0.
+/// The cloned blocks should be inserted between InsertTop and InsertBot.
+/// If loop structure is cloned InsertTop should be new preheader, InsertBot
+/// new loop exit.
///
-static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
+static void CloneLoopBlocks(Loop *L, Value *NewIter,
+ const bool CreateRemainderLoop,
+ const bool UseEpilogRemainder,
BasicBlock *InsertTop, BasicBlock *InsertBot,
+ BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
LoopInfo *LI) {
- BasicBlock *Preheader = L->getLoopPreheader();
+ StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
Function *F = Header->getParent();
@@ -152,7 +281,7 @@ static void CloneLoopBlocks(Loop *L, Val
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
Loop *NewLoop = nullptr;
Loop *ParentLoop = L->getParentLoop();
- if (!UnrollProlog) {
+ if (CreateRemainderLoop) {
NewLoop = new Loop();
if (ParentLoop)
ParentLoop->addChildLoop(NewLoop);
@@ -163,7 +292,7 @@ static void CloneLoopBlocks(Loop *L, Val
// For each block in the original loop, create a new copy,
// and update the value map with the newly created values.
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
- BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F);
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
NewBlocks.push_back(NewBB);
if (NewLoop)
@@ -179,16 +308,17 @@ static void CloneLoopBlocks(Loop *L, Val
}
if (Latch == *BB) {
- // For the last block, if UnrollProlog is true, create a direct jump to
- // InsertBot. If not, create a loop back to cloned head.
+ // For the last block, if CreateRemainderLoop is false, create a direct
+ // jump to InsertBot. If not, create a loop back to cloned head.
VMap.erase((*BB)->getTerminator());
BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
IRBuilder<> Builder(LatchBR);
- if (UnrollProlog) {
+ if (!CreateRemainderLoop) {
Builder.CreateBr(InsertBot);
} else {
- PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",
+ PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
+ suffix + ".iter",
FirstLoopBB->getFirstNonPHI());
Value *IdxSub =
Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
@@ -207,9 +337,15 @@ static void CloneLoopBlocks(Loop *L, Val
// cloned loop.
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
- if (UnrollProlog) {
- VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
- cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
+ if (!CreateRemainderLoop) {
+ if (UseEpilogRemainder) {
+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ NewPHI->removeIncomingValue(Latch, false);
+ } else {
+ VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
+ cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
+ }
} else {
unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
NewPHI->setIncomingBlock(idx, InsertTop);
@@ -254,7 +390,7 @@ static void CloneLoopBlocks(Loop *L, Val
}
}
-/// Insert code in the prolog code when unrolling a loop with a
+/// Insert code in the prolog/epilog code when unrolling a loop with a
/// run-time trip-count.
///
/// This method assumes that the loop unroll factor is total number
@@ -266,6 +402,7 @@ static void CloneLoopBlocks(Loop *L, Val
/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
/// the switch instruction is generated.
///
+/// ***Prolog case***
/// extraiters = tripcount % loopfactor
/// if (extraiters == 0) jump Loop:
/// else jump Prol
@@ -277,17 +414,35 @@ static void CloneLoopBlocks(Loop *L, Val
/// ...
/// End:
///
-bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
- bool AllowExpensiveTripCount, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- bool PreserveLCSSA) {
- // For now, only unroll loops that contain a single exit.
+/// ***Epilog case***
+/// extraiters = tripcount % loopfactor
+/// if (extraiters == tripcount) jump LoopExit:
+/// unroll_iters = tripcount - extraiters
+/// Loop: LoopBody; (executes unroll_iter times);
+/// unroll_iter -= 1
+/// if (unroll_iter != 0) jump Loop:
+/// LoopExit:
+/// if (extraiters == 0) jump EpilExit:
+/// Epil: LoopBody; (executes extraiters times)
+/// extraiters -= 1 // Omitted if unroll factor is 2.
+/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
+/// EpilExit:
+
+bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
+ bool AllowExpensiveTripCount,
+ bool UseEpilogRemainder,
+ LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, bool PreserveLCSSA) {
+ // for now, only unroll loops that contain a single exit
if (!L->getExitingBlock())
return false;
// Make sure the loop is in canonical form, and there is a single
// exit block only.
- if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
+ if (!L->isLoopSimplifyForm())
+ return false;
+ BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop
+ if (!Exit)
return false;
// Use Scalar Evolution to compute the trip count. This allows more loops to
@@ -311,8 +466,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
return false;
BasicBlock *Header = L->getHeader();
- BasicBlock *PH = L->getLoopPreheader();
- BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
const DataLayout &DL = Header->getModule()->getDataLayout();
SCEVExpander Expander(*SE, DL, "loop-unroll");
if (!AllowExpensiveTripCount &&
@@ -330,26 +485,75 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
SE->forgetLoop(ParentLoop);
BasicBlock *Latch = L->getLoopLatch();
- // It helps to split the original preheader twice, one for the end of the
- // prolog code and one for a new loop preheader.
- BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
- BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
- PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+ // Loop structure is the following:
+ //
+ // PreHeader
+ // Header
+ // ...
+ // Latch
+ // Exit
+
+ BasicBlock *NewPreHeader;
+ BasicBlock *NewExit = nullptr;
+ BasicBlock *PrologExit = nullptr;
+ BasicBlock *EpilogPreHeader = nullptr;
+ BasicBlock *PrologPreHeader = nullptr;
+
+ if (UseEpilogRemainder) {
+ // If epilog remainder
+ // Split PreHeader to insert a branch around loop for unrolling.
+ NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
+ NewPreHeader->setName(PreHeader->getName() + ".new");
+ // Split Exit to create phi nodes from branch above.
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
+ NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",
+ DT, LI, PreserveLCSSA);
+ // Split NewExit to insert epilog remainder loop.
+ EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
+ EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
+ } else {
+ // If prolog remainder
+ // Split the original preheader twice to insert prolog remainder loop
+ PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
+ PrologPreHeader->setName(Header->getName() + ".prol.preheader");
+ PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
+ DT, LI);
+ PrologExit->setName(Header->getName() + ".prol.loopexit");
+ // Split PrologExit to get NewPreHeader.
+ NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
+ NewPreHeader->setName(PreHeader->getName() + ".new");
+ }
+ // Loop structure should be the following:
+ // Epilog Prolog
+ //
+ // PreHeader PreHeader
+ // *NewPreHeader *PrologPreHeader
+ // Header *PrologExit
+ // ... *NewPreHeader
+ // Latch Header
+ // *NewExit ...
+ // *EpilogPreHeader Latch
+ // Exit Exit
+
+ // Calculate conditions for branch around loop for unrolling
+ // in epilog case and around prolog remainder loop in prolog case.
// Compute the number of extra iterations required, which is:
- // extra iterations = run-time trip count % (loop unroll factor + 1)
+ // extra iterations = run-time trip count % loop unroll factor
+ PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
PreHeaderBR);
-
IRBuilder<> B(PreHeaderBR);
Value *ModVal;
// Calculate ModVal = (BECount + 1) % Count.
// Note that TripCount is BECount + 1.
if (isPowerOf2_32(Count)) {
+ // When Count is power of 2 we don't BECount for epilog case, however we'll
+ // need it for a branch around unrolling loop for prolog case.
ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
- // 1. There are no iterations to be run in the prologue loop.
+ // 1. There are no iterations to be run in the prolog/epilog loop.
// OR
// 2. The addition computing TripCount overflowed.
//
@@ -371,18 +575,18 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
ConstantInt::get(BECount->getType(), Count),
"xtraiter");
}
- Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
-
- // Branch to either the extra iterations or the cloned/unrolled loop.
- // We will fix up the true branch label when adding loop body copies.
- B.CreateCondBr(BranchVal, PEnd, PEnd);
- assert(PreHeaderBR->isUnconditional() &&
- PreHeaderBR->getSuccessor(0) == PEnd &&
- "CFG edges in Preheader are not correct");
+ Value *CmpOperand =
+ UseEpilogRemainder ? TripCount :
+ ConstantInt::get(TripCount->getType(), 0);
+ Value *BranchVal = B.CreateICmpNE(ModVal, CmpOperand, "lcmp.mod");
+ BasicBlock *FirstLoop = UseEpilogRemainder ? NewPreHeader : PrologPreHeader;
+ BasicBlock *SecondLoop = UseEpilogRemainder ? NewExit : PrologExit;
+ // Branch to either remainder (extra iterations) loop or unrolling loop.
+ B.CreateCondBr(BranchVal, FirstLoop, SecondLoop);
PreHeaderBR->eraseFromParent();
Function *F = Header->getParent();
// Get an ordered list of blocks in the loop to help with the ordering of the
- // cloned blocks in the prolog code.
+ // cloned blocks in the prolog/epilog code
LoopBlocksDFS LoopBlocks(L);
LoopBlocks.perform(LI);
@@ -394,17 +598,38 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap;
- bool UnrollPrologue = Count == 2;
+ // For unroll factor 2 remainder loop will have 1 iterations.
+ // Do not create 1 iteration loop.
+ bool CreateRemainderLoop = (Count != 2);
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
// iterations. This function adds the appropriate CFG connections.
- CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
- VMap, LI);
+ BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
+ BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
+ CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
+ InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
+
+ // Insert the cloned blocks into the function.
+ F->getBasicBlockList().splice(InsertBot->getIterator(),
+ F->getBasicBlockList(),
+ NewBlocks[0]->getIterator(),
+ F->end());
- // Insert the cloned blocks into the function just before the original loop.
- F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(),
- NewBlocks[0]->getIterator(), F->end());
+ // Loop structure should be the following:
+ // Epilog Prolog
+ //
+ // PreHeader PreHeader
+ // NewPreHeader PrologPreHeader
+ // Header PrologHeader
+ // ... ...
+ // Latch PrologLatch
+ // NewExit PrologExit
+ // EpilogPreHeader NewPreHeader
+ // EpilogHeader Header
+ // ... ...
+ // EpilogLatch Latch
+ // Exit Exit
// Rewrite the cloned instruction operands to use the values created when the
// clone is created.
@@ -415,11 +640,38 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
}
}
- // Connect the prolog code to the original loop and update the
- // PHI functions.
- BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
- ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI,
- PreserveLCSSA);
+ if (UseEpilogRemainder) {
+ // Connect the epilog code to the original loop and update the
+ // PHI functions.
+ ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,
+ EpilogPreHeader, NewPreHeader, VMap, DT, LI,
+ PreserveLCSSA);
+
+ // Update counter in loop for unrolling.
+ // I should be multiply of Count.
+ IRBuilder<> B2(NewPreHeader->getTerminator());
+ Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ B2.SetInsertPoint(LatchBR);
+ PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
+ Header->getFirstNonPHI());
+ Value *IdxSub =
+ B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
+ NewIdx->getName() + ".nsub");
+ Value *IdxCmp;
+ if (LatchBR->getSuccessor(0) == Header)
+ IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
+ else
+ IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
+ NewIdx->addIncoming(TestVal, NewPreHeader);
+ NewIdx->addIncoming(IdxSub, Latch);
+ LatchBR->setCondition(IdxCmp);
+ } else {
+ // Connect the prolog code to the original loop and update the
+ // PHI functions.
+ ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
+ VMap, DT, LI, PreserveLCSSA);
+ }
NumRuntimeUnrolled++;
return true;
}
Modified: llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll Tue Apr 5 07:19:35 2016
@@ -1,13 +1,21 @@
-; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; Tests for unrolling loops with run-time trip counts
-; CHECK: %xtraiter = and i32 %n
-; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
+; EPILOG: %xtraiter = and i32 %n
+; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, %n
+; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa
-; CHECK: for.body.prol:
-; CHECK: for.body:
+; PROLOG: %xtraiter = and i32 %n
+; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
+
+; EPILOG: for.body:
+; EPILOG: for.body.epil:
+
+; PROLOG: for.body.prol:
+; PROLOG: for.body:
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
Modified: llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll Tue Apr 5 07:19:35 2016
@@ -1,4 +1,5 @@
-; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
define void @unroll_opt_for_size() nounwind optsize {
entry:
br label %loop
@@ -13,11 +14,17 @@ exit:
ret void
}
-; CHECK-LABEL: @unroll_opt_for_size
-; CHECK: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK: icmp
+; EPILOG-LABEL: @unroll_opt_for_size
+; EPILOG: add
+; EPILOG-NEXT: add
+; EPILOG-NEXT: add
+; EPILOG: icmp
+
+; PROLOG-LABEL: @unroll_opt_for_size
+; PROLOG: add
+; PROLOG-NEXT: add
+; PROLOG-NEXT: add
+; PROLOG: icmp
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
@@ -40,8 +47,13 @@ for.end:
ret i32 %sum.0.lcssa
}
-; CHECK-LABEL: @test
-; CHECK: for.body.prol{{.*}}:
-; CHECK: for.body:
-; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+; EPILOG-LABEL: @test
+; EPILOG: for.body:
+; EPILOG: br i1 %niter.ncmp.7, label %for.end.loopexit{{.*}}, label %for.body
+; EPILOG: for.body.epil{{.*}}:
+
+; PROLOG-LABEL: @test
+; PROLOG: for.body.prol{{.*}}:
+; PROLOG: for.body:
+; PROLOG: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
Modified: llvm/trunk/test/Transforms/LoopUnroll/X86/mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/X86/mmx.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/X86/mmx.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/X86/mmx.ll Tue Apr 5 07:19:35 2016
@@ -14,9 +14,9 @@ for.body:
exit: ; preds = %for.body
%ret = phi x86_mmx [ undef, %for.body ]
- ; CHECK: %[[ret_unr:.*]] = phi x86_mmx [ undef,
- ; CHECK: %[[ret_ph:.*]] = phi x86_mmx [ undef,
- ; CHECK: %[[ret:.*]] = phi x86_mmx [ %[[ret_unr]], {{.*}} ], [ %[[ret_ph]]
+ ; CHECK: %[[ret_ph:.*]] = phi x86_mmx [ undef, %entry
+ ; CHECK: %[[ret_ph1:.*]] = phi x86_mmx [ undef,
+ ; CHECK: %[[ret:.*]] = phi x86_mmx [ %[[ret_ph]], {{.*}} ], [ %[[ret_ph1]],
; CHECK: ret x86_mmx %[[ret]]
ret x86_mmx %ret
}
Modified: llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll Tue Apr 5 07:19:35 2016
@@ -34,7 +34,7 @@ define i32 @test2(i64* %loc, i64 %conv7)
; CHECK: udiv
; CHECK: udiv
; CHECK-NOT: udiv
-; CHECK-LABEL: for.body.prol
+; CHECK-LABEL: for.body
entry:
%rem0 = load i64, i64* %loc, align 8
%ExpensiveComputation = udiv i64 %rem0, 42 ; <<< Extra computations are added to the trip-count expression
Modified: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll Tue Apr 5 07:19:35 2016
@@ -1,18 +1,30 @@
-; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Tests for unrolling loops with run-time trip counts
-; CHECK: %xtraiter = and i32 %n
-; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
-
-; CHECK: for.body.prol:
-; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]
-; CHECK: %prol.iter.sub = sub i32 %prol.iter, 1
-; CHECK: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
-; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split, !llvm.loop !0
+; EPILOG: %xtraiter = and i32 %n
+; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, %n
+; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa
+
+; PROLOG: %xtraiter = and i32 %n
+; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
+
+; EPILOG: for.body.epil:
+; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.body.epil.preheader ]
+; EPILOG: %epil.iter.sub = sub i32 %epil.iter, 1
+; EPILOG: %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
+; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
+
+; PROLOG: for.body.prol:
+; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
+; PROLOG: %prol.iter.sub = sub i32 %prol.iter, 1
+; PROLOG: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
+; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit, !llvm.loop !0
+
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
@@ -39,8 +51,11 @@ for.end:
; Still try to completely unroll loops with compile-time trip counts
; even if the -unroll-runtime is specified
-; CHECK: for.body:
-; CHECK-NOT: for.body.prol:
+; EPILOG: for.body:
+; EPILOG-NOT: for.body.epil:
+
+; PROLOG: for.body:
+; PROLOG-NOT: for.body.prol:
define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
entry:
@@ -64,7 +79,8 @@ for.end:
; This is test 2007-05-09-UnknownTripCount.ll which can be unrolled now
; if the -unroll-runtime option is turned on
-; CHECK: bb72.2:
+; EPILOG: bb72.2:
+; PROLOG: bb72.2:
define void @foo(i32 %trips) {
entry:
@@ -86,8 +102,11 @@ cond_true138:
; Test run-time unrolling for a loop that counts down by -2.
-; CHECK: for.body.prol:
-; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
+; EPILOG: for.body.epil:
+; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.cond.for.end_crit_edge.epilog-lcssa
+
+; PROLOG: for.body.prol:
+; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit
define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
@@ -116,8 +135,11 @@ for.end:
}
; Test run-time unrolling disable metadata.
-; CHECK: for.body:
-; CHECK-NOT: for.body.prol:
+; EPILOG: for.body:
+; EPILOG-NOT: for.body.epil:
+
+; PROLOG: for.body:
+; PROLOG-NOT: for.body.prol:
define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
@@ -148,6 +170,8 @@ for.end:
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: !0 = distinct !{!0, !1}
-; CHECK: !1 = !{!"llvm.loop.unroll.disable"}
+; EPILOG: !0 = distinct !{!0, !1}
+; EPILOG: !1 = !{!"llvm.loop.unroll.disable"}
+; PROLOG: !0 = distinct !{!0, !1}
+; PROLOG: !1 = !{!"llvm.loop.unroll.disable"}
Modified: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll Tue Apr 5 07:19:35 2016
@@ -1,19 +1,35 @@
-; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; This tests that setting the unroll count works
-; CHECK: for.body.preheader:
-; CHECK: br {{.*}} label %for.body.prol, label %for.body.preheader.split, !dbg [[PH_LOC:![0-9]+]]
-; CHECK: for.body.prol:
-; CHECK: br label %for.body.preheader.split, !dbg [[BODY_LOC:![0-9]+]]
-; CHECK: for.body.preheader.split:
-; CHECK: br {{.*}} label %for.end.loopexit, label %for.body.preheader.split.split, !dbg [[PH_LOC]]
-; CHECK: for.body:
-; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body, !dbg [[BODY_LOC]]
-; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
-; CHECK-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
-; CHECK-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
+; EPILOG: for.body.preheader:
+; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa, !dbg [[PH_LOC:![0-9]+]]
+; EPILOG: for.body:
+; EPILOG: br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]]
+; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body
+; EPILOG: for.body.epil.preheader:
+; EPILOG: br label %for.body.epil, !dbg [[EXIT_LOC:![0-9]+]]
+; EPILOG: for.body.epil:
+; EPILOG: br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC:![0-9]+]]
+
+; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
+; EPILOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
+; EPILOG-DAG: [[EXIT_LOC]] = !DILocation(line: 103, column: 1, scope: !{{.*}})
+
+; PROLOG: for.body.preheader:
+; PROLOG: br {{.*}} label %for.body.prol.preheader, label %for.body.prol.loopexit, !dbg [[PH_LOC:![0-9]+]]
+; PROLOG: for.body.prol:
+; PROLOG: br label %for.body.prol.loopexit, !dbg [[BODY_LOC:![0-9]+]]
+; PROLOG: for.body.prol.loopexit:
+; PROLOG: br {{.*}} label %for.end.loopexit, label %for.body.preheader.new, !dbg [[PH_LOC]]
+; PROLOG: for.body:
+; PROLOG: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body, !dbg [[BODY_LOC]]
+; PROLOG-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+
+; PROLOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
+; PROLOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly !dbg !6 {
entry:
Modified: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop2.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop2.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop2.ll Tue Apr 5 07:19:35 2016
@@ -1,12 +1,18 @@
-; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; Choose a smaller, power-of-two, unroll count if the loop is too large.
; This test makes sure we're not unrolling 'odd' counts
-; CHECK: for.body.prol:
-; CHECK: for.body:
-; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
-; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+; EPILOG: for.body:
+; EPILOG: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit{{.*}}, label %for.body
+; EPILOG-NOT: br i1 %niter.ncmp.4, label %for.end.loopexit.unr-lcssa.loopexit{{.*}}, label %for.body
+; EPILOG: for.body.epil:
+
+; PROLOG: for.body.prol:
+; PROLOG: for.body:
+; PROLOG: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; PROLOG-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
Modified: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll Tue Apr 5 07:19:35 2016
@@ -1,13 +1,21 @@
-; RUN: opt < %s -S -O2 -unroll-runtime=true | FileCheck %s
+; RUN: opt < %s -S -O2 -unroll-runtime=true | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -O2 -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; Check runtime unrolling prologue can be promoted by LICM pass.
-; CHECK: entry:
-; CHECK: %xtraiter
-; CHECK: %lcmp.mod
-; CHECK: loop1:
-; CHECK: br i1 %lcmp.mod
-; CHECK: loop2.prol:
+; EPILOG: entry:
+; EPILOG: %xtraiter
+; EPILOG: %lcmp.mod
+; EPILOG: loop1:
+; EPILOG: br i1 %lcmp.mod
+; EPILOG: loop2.epil:
+
+; PROLOG: entry:
+; PROLOG: %xtraiter
+; PROLOG: %lcmp.mod
+; PROLOG: loop1:
+; PROLOG: br i1 %lcmp.mod
+; PROLOG: loop2.prol:
define void @unroll(i32 %iter, i32* %addr1, i32* %addr2) nounwind {
entry:
Modified: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll Tue Apr 5 07:19:35 2016
@@ -11,9 +11,6 @@ entry:
%cmp1 = icmp eq i3 %n, 0
br i1 %cmp1, label %for.end, label %for.body
-; UNROLL-16-NOT: for.body.prol:
-; UNROLL-4: for.body.prol:
-
for.body: ; preds = %for.body, %entry
; UNROLL-16-LABEL: for.body:
; UNROLL-4-LABEL: for.body:
@@ -39,6 +36,10 @@ for.body:
; UNROLL-16-LABEL: for.end
; UNROLL-4-LABEL: for.end
+
+; UNROLL-16-NOT: for.body.epil:
+; UNROLL-4: for.body.epil:
+
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i3 [ 0, %entry ], [ %add, %for.body ]
ret i3 %sum.0.lcssa
Modified: llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll Tue Apr 5 07:19:35 2016
@@ -13,13 +13,13 @@ target datalayout = "e-m:o-i64:64-f80:12
; CHECK: entry:
; CHECK-NEXT: %0 = add i32 %N, 1
; CHECK-NEXT: %xtraiter = and i32 %0, 1
-; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK-NEXT: br i1 %lcmp.mod, label %while.body.prol, label %entry.split
+; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, %0
+; CHECK-NEXT: br i1 %lcmp.mod, label %entry.new, label %while.end.unr-lcssa
-; CHECK: while.body.prol:
-; CHECK: br label %entry.split
+; CHECK: while.body.epil:
+; CHECK: br label %while.end.epilog-lcssa
-; CHECK: entry.split:
+; CHECK: while.end.epilog-lcssa:
; Function Attrs: nounwind readnone ssp uwtable
define i32 @foo(i32 %N) {
Modified: llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanup.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanup.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanup.ll Tue Apr 5 07:19:35 2016
@@ -4,14 +4,14 @@
; RUN: opt < %s -O2 -S | FileCheck %s
; After loop unroll:
-; %dec18 = add nsw i32 %dec18.in, -1
+; %niter.nsub = add nsw i32 %niter, -1
; ...
-; %dec18.1 = add nsw i32 %dec18, -1
+; %niter.nsub.1 = add nsw i32 %niter.nsub, -1
; should be merged to:
-; %dec18.1 = add nsw i32 %dec18.in, -2
+; %dec18.1 = add nsw i32 %niter, -2
;
; CHECK-LABEL: @_Z3fn1v(
-; CHECK: %dec18.1 = add nsw i32 %dec18.in, -2
+; CHECK: %niter.nsub.1 = add i32 %niter, -2
; ModuleID = '<stdin>'
target triple = "x86_64-unknown-linux-gnu"
Modified: llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll?rev=265388&r1=265387&r2=265388&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll Tue Apr 5 07:19:35 2016
@@ -171,10 +171,6 @@ for.end:
; should be duplicated (original and 4x unrolled).
;
; CHECK-LABEL: @runtime_loop_with_count4(
-; CHECK: for.body.prol:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1
; CHECK: for.body
; CHECK: store
; CHECK: store
@@ -182,6 +178,10 @@ for.end:
; CHECK: store
; CHECK-NOT: store
; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
@@ -287,10 +287,6 @@ for.end:
; (original and 8x).
;
; CHECK-LABEL: @runtime_loop_with_enable(
-; CHECK: for.body.prol:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1
; CHECK: for.body:
; CHECK: store i32
; CHECK: store i32
@@ -302,6 +298,10 @@ for.end:
; CHECK: store i32
; CHECK-NOT: store i32
; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
@@ -328,16 +328,16 @@ for.end:
; should be duplicated (original and 3x unrolled).
;
; CHECK-LABEL: @runtime_loop_with_count3(
-; CHECK: for.body.prol:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1
; CHECK: for.body
; CHECK: store
; CHECK: store
; CHECK: store
; CHECK-NOT: store
; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
More information about the llvm-commits
mailing list