[llvm-branch-commits] [llvm] b7dee66 - [OpenMPIRBuilder] Implement tileLoops.
Michael Kruse via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Jan 23 17:43:41 PST 2021
Author: Michael Kruse
Date: 2021-01-23T19:39:29-06:00
New Revision: b7dee667b64ff7dea66b483a35883190798c7d72
URL: https://github.com/llvm/llvm-project/commit/b7dee667b64ff7dea66b483a35883190798c7d72
DIFF: https://github.com/llvm/llvm-project/commit/b7dee667b64ff7dea66b483a35883190798c7d72.diff
LOG: [OpenMPIRBuilder] Implement tileLoops.
The tileLoops method implements the code generation part of the tile directive introduced in OpenMP 5.1. It takes a list of loops forming a loop nest, tiles it, and returns the CanonicalLoopInfo representing the generated loops.
The implementation takes n CanonicalLoopInfos, n tile size Values and returns 2*n new CanonicalLoopInfos. The input CanonicalLoopInfos are invalidated and BBs not reused in the new loop nest removed from the function.
In a modified version of D76342, I was able to correctly compile and execute a tiled loop nest.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D92974
Added:
Modified:
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/lib/IR/BasicBlock.cpp
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 8e95226d3895..22204d9a9ccb 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -300,6 +300,53 @@ class OpenMPIRBuilder {
bool NeedsBarrier,
Value *Chunk = nullptr);
+ /// Tile a loop nest.
+ ///
+ /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
+ /// \p/ Loops must be perfectly nested, from outermost to innermost loop
+ /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
+ /// of every loop and every tile sizes must be usable in the outermost
+ /// loop's preheader. This implies that the loop nest is rectangular.
+ ///
+ /// Example:
+ /// \code
+ /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
+ /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
+ /// body(i, j);
+ /// \endcode
+ ///
+ /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
+ /// \code
+ /// for (int i1 = 0; i1 < 3; ++i1)
+ /// for (int j1 = 0; j1 < 2; ++j1)
+ /// for (int i2 = 0; i2 < 5; ++i2)
+ /// for (int j2 = 0; j2 < 7; ++j2)
+ /// body(i1*3+i2, j1*3+j2);
+ /// \endcode
+ ///
+ /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
+ /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
+ /// handles non-constant trip counts, non-constant tile sizes and trip counts
+ /// that are not multiples of the tile size. In the latter case the tile loop
+ /// of the last floor-loop iteration will have fewer iterations than specified
+ /// as its tile size.
+ ///
+ ///
+ /// @param DL Debug location for instructions added by tiling, for
+ /// instance the floor- and tile trip count computation.
+ /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
+ /// invalidated by this method, i.e. should not used after
+ /// tiling.
+ /// @param TileSizes For each loop in \p Loops, the tile size for that
+ /// dimensions.
+ ///
+ /// \returns A list of generated loops. Contains twice as many loops as the
+ /// input loop nest; the first half are the floor loops and the
+ /// second half are the tile loops.
+ std::vector<CanonicalLoopInfo *>
+ tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
+ ArrayRef<Value *> TileSizes);
+
/// Generator for '#omp flush'
///
/// \param Loc The location where the flush directive was encountered
@@ -729,6 +776,12 @@ class CanonicalLoopInfo {
BasicBlock *Exit;
BasicBlock *After;
+ /// Add the control blocks of this loop to \p BBs.
+ ///
+ /// This does not include any block from the body, including the one returned
+ /// by getBody().
+ void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
+
public:
/// The preheader ensures that there is only a single edge entering the loop.
/// Code that must be execute before any loop iteration can be emitted here,
@@ -781,6 +834,14 @@ class CanonicalLoopInfo {
return IndVarPHI;
}
+ /// Return the type of the induction variable (and the trip count).
+ Type *getIndVarType() const { return getIndVar()->getType(); }
+
+ /// Return the insertion point for user code before the loop.
+ OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
+ return {Preheader, std::prev(Preheader->end())};
+ };
+
/// Return the insertion point for user code in the body.
OpenMPIRBuilder::InsertPointTy getBodyIP() const {
return {Body, Body->begin()};
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index fc8d1eb238a9..e70847b2f02c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1164,6 +1164,252 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
return CLI;
}
+/// Make \p Source branch to \p Target.
+///
+/// Handles two situations:
+/// * \p Source already has an unconditional branch.
+/// * \p Source is a degenerate block (no terminator because the BB is
+/// the current head of the IR construction).
+static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) {
+ if (Instruction *Term = Source->getTerminator()) {
+ auto *Br = cast<BranchInst>(Term);
+ assert(!Br->isConditional() &&
+ "BB's terminator must be an unconditional branch (or degenerate)");
+ BasicBlock *Succ = Br->getSuccessor(0);
+ Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
+ Br->setSuccessor(0, Target);
+ return;
+ }
+
+ auto *NewBr = BranchInst::Create(Target, Source);
+ NewBr->setDebugLoc(DL);
+}
+
+/// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
+/// after this \p OldTarget will be orphaned.
+static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
+ BasicBlock *NewTarget, DebugLoc DL) {
+ for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
+ redirectTo(Pred, NewTarget, DL);
+}
+
+/// Determine which blocks in \p BBs are reachable from outside and remove the
+/// ones that are not reachable from the function.
+static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) {
+ SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
+ auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
+ for (Use &U : BB->uses()) {
+ auto *UseInst = dyn_cast<Instruction>(U.getUser());
+ if (!UseInst)
+ continue;
+ if (BBsToErase.count(UseInst->getParent()))
+ continue;
+ return true;
+ }
+ return false;
+ };
+
+ while (true) {
+ bool Changed = false;
+ for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
+ if (HasRemainingUses(BB)) {
+ BBsToErase.erase(BB);
+ Changed = true;
+ }
+ }
+ if (!Changed)
+ break;
+ }
+
+ SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
+ DeleteDeadBlocks(BBVec);
+}
+
+std::vector<CanonicalLoopInfo *>
+OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
+ ArrayRef<Value *> TileSizes) {
+ int NumLoops = Loops.size();
+ assert(TileSizes.size() == NumLoops &&
+ "Must pass as many tile sizes as there are loops");
+ assert(NumLoops >= 1 && "At least one loop to tile required");
+
+ CanonicalLoopInfo *OutermostLoop = Loops.front();
+ CanonicalLoopInfo *InnermostLoop = Loops.back();
+ Function *F = OutermostLoop->getBody()->getParent();
+ BasicBlock *InnerEnter = InnermostLoop->getBody();
+ BasicBlock *InnerLatch = InnermostLoop->getLatch();
+
+ // Collect original trip counts and induction variable to be accessible by
+ // index. Also, the structure of the original loops is not preserved during
+ // the construction of the tiled loops, so do it before we scavenge the BBs of
+ // any original CanonicalLoopInfo.
+ SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
+ for (CanonicalLoopInfo *L : Loops) {
+ OrigTripCounts.push_back(L->getTripCount());
+ OrigIndVars.push_back(L->getIndVar());
+ }
+
+ // Collect the code between loop headers. These may contain SSA definitions
+ // that are used in the loop nest body. To be usable with in the innermost
+ // body, these BasicBlocks will be sunk into the loop nest body. That is,
+ // these instructions may be executed more often than before the tiling.
+ // TODO: It would be sufficient to only sink them into body of the
+ // corresponding tile loop.
+ SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode;
+ for (int i = 0; i < NumLoops - 1; ++i) {
+ CanonicalLoopInfo *Surrounding = Loops[i];
+ CanonicalLoopInfo *Nested = Loops[i + 1];
+
+ BasicBlock *EnterBB = Surrounding->getBody();
+ BasicBlock *ExitBB = Nested->getHeader();
+ InbetweenCode.emplace_back(EnterBB, ExitBB);
+ }
+
+ // Compute the trip counts of the floor loops.
+ Builder.SetCurrentDebugLocation(DL);
+ Builder.restoreIP(OutermostLoop->getPreheaderIP());
+ SmallVector<Value *, 4> FloorCount, FloorRems;
+ for (int i = 0; i < NumLoops; ++i) {
+ Value *TileSize = TileSizes[i];
+ Value *OrigTripCount = OrigTripCounts[i];
+ Type *IVType = OrigTripCount->getType();
+
+ Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
+ Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
+
+ // 0 if tripcount divides the tilesize, 1 otherwise.
+ // 1 means we need an additional iteration for a partial tile.
+ //
+ // Unfortunately we cannot just use the roundup-formula
+ // (tripcount + tilesize - 1)/tilesize
+ // because the summation might overflow. We do not want introduce undefined
+ // behavior when the untiled loop nest did not.
+ Value *FloorTripOverflow =
+ Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
+
+ FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
+ FloorTripCount =
+ Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
+ "omp_floor" + Twine(i) + ".tripcount", true);
+
+ // Remember some values for later use.
+ FloorCount.push_back(FloorTripCount);
+ FloorRems.push_back(FloorTripRem);
+ }
+
+ // Generate the new loop nest, from the outermost to the innermost.
+ std::vector<CanonicalLoopInfo *> Result;
+ Result.reserve(NumLoops * 2);
+
+ // The basic block of the surrounding loop that enters the nest generated
+ // loop.
+ BasicBlock *Enter = OutermostLoop->getPreheader();
+
+ // The basic block of the surrounding loop where the inner code should
+ // continue.
+ BasicBlock *Continue = OutermostLoop->getAfter();
+
+ // Where the next loop basic block should be inserted.
+ BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
+
+ auto EmbeddNewLoop =
+ [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
+ Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
+ CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
+ DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
+ redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
+ redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
+
+ // Setup the position where the next embedded loop connects to this loop.
+ Enter = EmbeddedLoop->getBody();
+ Continue = EmbeddedLoop->getLatch();
+ OutroInsertBefore = EmbeddedLoop->getLatch();
+ return EmbeddedLoop;
+ };
+
+ auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
+ const Twine &NameBase) {
+ for (auto P : enumerate(TripCounts)) {
+ CanonicalLoopInfo *EmbeddedLoop =
+ EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
+ Result.push_back(EmbeddedLoop);
+ }
+ };
+
+ EmbeddNewLoops(FloorCount, "floor");
+
+ // Within the innermost floor loop, emit the code that computes the tile
+ // sizes.
+ Builder.SetInsertPoint(Enter->getTerminator());
+ SmallVector<Value *, 4> TileCounts;
+ for (int i = 0; i < NumLoops; ++i) {
+ CanonicalLoopInfo *FloorLoop = Result[i];
+ Value *TileSize = TileSizes[i];
+
+ Value *FloorIsEpilogue =
+ Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
+ Value *TileTripCount =
+ Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
+
+ TileCounts.push_back(TileTripCount);
+ }
+
+ // Create the tile loops.
+ EmbeddNewLoops(TileCounts, "tile");
+
+ // Insert the inbetween code into the body.
+ BasicBlock *BodyEnter = Enter;
+ BasicBlock *BodyEntered = nullptr;
+ for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
+ BasicBlock *EnterBB = P.first;
+ BasicBlock *ExitBB = P.second;
+
+ if (BodyEnter)
+ redirectTo(BodyEnter, EnterBB, DL);
+ else
+ redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
+
+ BodyEnter = nullptr;
+ BodyEntered = ExitBB;
+ }
+
+ // Append the original loop nest body into the generated loop nest body.
+ if (BodyEnter)
+ redirectTo(BodyEnter, InnerEnter, DL);
+ else
+ redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
+ redirectAllPredecessorsTo(InnerLatch, Continue, DL);
+
+ // Replace the original induction variable with an induction variable computed
+ // from the tile and floor induction variables.
+ Builder.restoreIP(Result.back()->getBodyIP());
+ for (int i = 0; i < NumLoops; ++i) {
+ CanonicalLoopInfo *FloorLoop = Result[i];
+ CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
+ Value *OrigIndVar = OrigIndVars[i];
+ Value *Size = TileSizes[i];
+
+ Value *Scale =
+ Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
+ Value *Shift =
+ Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
+ OrigIndVar->replaceAllUsesWith(Shift);
+ }
+
+ // Remove unused parts of the original loops.
+ SmallVector<BasicBlock *, 12> OldControlBBs;
+ OldControlBBs.reserve(6 * Loops.size());
+ for (CanonicalLoopInfo *Loop : Loops)
+ Loop->collectControlBlocks(OldControlBBs);
+ removeUnusedBlocksFromParent(OldControlBBs);
+
+#ifndef NDEBUG
+ for (CanonicalLoopInfo *GenL : Result)
+ GenL->assertOK();
+#endif
+ return Result;
+}
+
OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
llvm::Value *BufSize, llvm::Value *CpyBuf,
@@ -1570,6 +1816,16 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks(
}
}
+void CanonicalLoopInfo::collectControlBlocks(
+ SmallVectorImpl<BasicBlock *> &BBs) {
+ // We only count those BBs as control block for which we do not need to
+ // reverse the CFG, i.e. not the loop body which can contain arbitrary control
+ // flow. For consistency, this also means we do not add the Body block, which
+ // is just the entry to the body code.
+ BBs.reserve(BBs.size() + 6);
+ BBs.append({Preheader, Header, Cond, Latch, Exit, After});
+}
+
void CanonicalLoopInfo::assertOK() const {
#ifndef NDEBUG
if (!IsValid)
@@ -1604,11 +1860,16 @@ void CanonicalLoopInfo::assertOK() const {
assert(Body);
assert(Body->getSinglePredecessor() == Cond &&
"Body only reachable from exiting block");
+ assert(!isa<PHINode>(Body->front()));
assert(Latch);
assert(isa<BranchInst>(Latch->getTerminator()) &&
"Latch must terminate with unconditional branch");
assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
+ // TODO: To support simple redirecting of the end of the body code that has
+ // multiple; introduce another auxiliary basic block like preheader and after.
+ assert(Latch->getSinglePredecessor() != nullptr);
+ assert(!isa<PHINode>(Latch->front()));
assert(Exit);
assert(isa<BranchInst>(Exit->getTerminator()) &&
@@ -1619,6 +1880,7 @@ void CanonicalLoopInfo::assertOK() const {
assert(After);
assert(After->getSinglePredecessor() == Exit &&
"After block only reachable from exit block");
+ assert(After->empty() || !isa<PHINode>(After->front()));
Instruction *IndVar = getIndVar();
assert(IndVar && "Canonical induction variable not found?");
@@ -1626,6 +1888,17 @@ void CanonicalLoopInfo::assertOK() const {
"Induction variable must be an integer");
assert(cast<PHINode>(IndVar)->getParent() == Header &&
"Induction variable must be a PHI in the loop header");
+ assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
+ assert(
+ cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
+ assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
+
+ auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
+ assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
+ assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
+ assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
+ assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
+ ->isOne());
Value *TripCount = getTripCount();
assert(TripCount && "Loop trip count not found?");
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 7f34565f5cd8..00ef10dd53af 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -325,7 +325,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
"Pred is not a predecessor!");
// Return early if there are no PHI nodes to update.
- if (!isa<PHINode>(begin()))
+ if (empty() || !isa<PHINode>(begin()))
return;
unsigned NumPreds = cast<PHINode>(front()).getNumIncomingValues();
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 508c274b9b65..1efe9d0ceaf4 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -23,6 +23,95 @@ using namespace omp;
namespace {
+/// Create an instruction that uses the values in \p Values. We use "printf"
+/// just because it is often used for this purpose in test code, but it is never
+/// executed here.
+static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr,
+ ArrayRef<Value *> Values) {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+
+ GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M);
+ Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
+ Constant *Indices[] = {Zero, Zero};
+ Constant *FormatStrConst =
+ ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices);
+
+ Function *PrintfDecl = M->getFunction("printf");
+ if (!PrintfDecl) {
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+ FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true);
+ PrintfDecl = Function::Create(Ty, Linkage, "printf", M);
+ }
+
+ SmallVector<Value *, 4> Args;
+ Args.push_back(FormatStrConst);
+ Args.append(Values.begin(), Values.end());
+ return Builder.CreateCall(PrintfDecl, Args);
+}
+
+/// Verify that blocks in \p RefOrder are corresponds to the depth-first visit
+/// order the control flow of \p F.
+///
+/// This is an easy way to verify the branching structure of the CFG without
+/// checking every branch instruction individually. For the CFG of a
+/// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering
+/// the body, i.e. the DFS order corresponds to the execution order with one
+/// loop iteration.
+static testing::AssertionResult
+verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
+ ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
+ ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
+
+ df_iterator_default_set<BasicBlock *, 16> Visited;
+ auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited);
+
+ BasicBlock *Prev = nullptr;
+ for (BasicBlock *BB : DFS) {
+ if (It != E && BB == *It) {
+ Prev = *It;
+ ++It;
+ }
+ }
+
+ if (It == E)
+ return testing::AssertionSuccess();
+ if (!Prev)
+ return testing::AssertionFailure()
+ << "Did not find " << (*It)->getName() << " in control flow";
+ return testing::AssertionFailure()
+ << "Expected " << Prev->getName() << " before " << (*It)->getName()
+ << " in control flow";
+}
+
+/// Verify that blocks in \p RefOrder are in the same relative order in the
+/// linked lists of blocks in \p F. The linked list may contain additional
+/// blocks in-between.
+///
+/// While the order in the linked list is not relevant for semantics, keeping
+/// the order roughly in execution order makes its printout easier to read.
+static testing::AssertionResult
+verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
+ ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
+ ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
+
+ BasicBlock *Prev = nullptr;
+ for (BasicBlock &BB : *F) {
+ if (It != E && &BB == *It) {
+ Prev = *It;
+ ++It;
+ }
+ }
+
+ if (It == E)
+ return testing::AssertionSuccess();
+ if (!Prev)
+ return testing::AssertionFailure() << "Did not find " << (*It)->getName()
+ << " in function " << F->getName();
+ return testing::AssertionFailure()
+ << "Expected " << Prev->getName() << " before " << (*It)->getName()
+ << " in function " << F->getName();
+}
+
class OpenMPIRBuilderTest : public testing::Test {
protected:
void SetUp() override {
@@ -1071,6 +1160,366 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
EXPECT_FALSE(verifyModule(*M, &errs()));
}
+TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func");
+
+ IRBuilder<> Builder(BB);
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+ Value *TripCount = F->getArg(0);
+
+ BasicBlock *BodyCode = nullptr;
+ Instruction *Call = nullptr;
+ auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
+ Builder.restoreIP(CodeGenIP);
+ BodyCode = Builder.GetInsertBlock();
+
+ // Add something that consumes the induction variable to the body.
+ Call = createPrintfCall(Builder, "%d\\n", {LC});
+ };
+ CanonicalLoopInfo *Loop =
+ OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount);
+
+ // Finalize the function.
+ Builder.restoreIP(Loop->getAfterIP());
+ Builder.CreateRetVoid();
+
+ Instruction *OrigIndVar = Loop->getIndVar();
+ EXPECT_EQ(Call->getOperand(1), OrigIndVar);
+
+ // Tile the loop.
+ Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7));
+ std::vector<CanonicalLoopInfo *> GenLoops =
+ OMPBuilder.tileLoops(DL, {Loop}, {TileSize});
+
+ OMPBuilder.finalize();
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+
+ EXPECT_EQ(GenLoops.size(), 2);
+ CanonicalLoopInfo *Floor = GenLoops[0];
+ CanonicalLoopInfo *Tile = GenLoops[1];
+
+ BasicBlock *RefOrder[] = {
+ Floor->getPreheader(), Floor->getHeader(), Floor->getCond(),
+ Floor->getBody(), Tile->getPreheader(), Tile->getHeader(),
+ Tile->getCond(), Tile->getBody(), BodyCode,
+ Tile->getLatch(), Tile->getExit(), Tile->getAfter(),
+ Floor->getLatch(), Floor->getExit(), Floor->getAfter(),
+ };
+ EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
+ EXPECT_TRUE(verifyListOrder(F, RefOrder));
+
+ // Check the induction variable.
+ EXPECT_EQ(Call->getParent(), BodyCode);
+ auto *Shift = cast<AddOperator>(Call->getOperand(1));
+ EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody());
+ EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar());
+ auto *Scale = cast<MulOperator>(Shift->getOperand(0));
+ EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody());
+ EXPECT_EQ(Scale->getOperand(0), TileSize);
+ EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar());
+}
+
+TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func");
+
+ IRBuilder<> Builder(BB);
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+ Value *TripCount = F->getArg(0);
+ Type *LCTy = TripCount->getType();
+
+ BasicBlock *BodyCode = nullptr;
+ CanonicalLoopInfo *InnerLoop = nullptr;
+ auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
+ llvm::Value *OuterLC) {
+ auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
+ llvm::Value *InnerLC) {
+ Builder.restoreIP(InnerCodeGenIP);
+ BodyCode = Builder.GetInsertBlock();
+
+ // Add something that consumes the induction variables to the body.
+ createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
+ };
+ InnerLoop = OMPBuilder.createCanonicalLoop(
+ OuterCodeGenIP, InnerLoopBodyGenCB, TripCount, "inner");
+ };
+ CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
+ Loc, OuterLoopBodyGenCB, TripCount, "outer");
+
+ // Finalize the function.
+ Builder.restoreIP(OuterLoop->getAfterIP());
+ Builder.CreateRetVoid();
+
+ // Tile to loop nest.
+ Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11));
+ Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7));
+ std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops(
+ DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize});
+
+ OMPBuilder.finalize();
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+
+ EXPECT_EQ(GenLoops.size(), 4);
+ CanonicalLoopInfo *Floor1 = GenLoops[0];
+ CanonicalLoopInfo *Floor2 = GenLoops[1];
+ CanonicalLoopInfo *Tile1 = GenLoops[2];
+ CanonicalLoopInfo *Tile2 = GenLoops[3];
+
+ BasicBlock *RefOrder[] = {
+ Floor1->getPreheader(),
+ Floor1->getHeader(),
+ Floor1->getCond(),
+ Floor1->getBody(),
+ Floor2->getPreheader(),
+ Floor2->getHeader(),
+ Floor2->getCond(),
+ Floor2->getBody(),
+ Tile1->getPreheader(),
+ Tile1->getHeader(),
+ Tile1->getCond(),
+ Tile1->getBody(),
+ Tile2->getPreheader(),
+ Tile2->getHeader(),
+ Tile2->getCond(),
+ Tile2->getBody(),
+ BodyCode,
+ Tile2->getLatch(),
+ Tile2->getExit(),
+ Tile2->getAfter(),
+ Tile1->getLatch(),
+ Tile1->getExit(),
+ Tile1->getAfter(),
+ Floor2->getLatch(),
+ Floor2->getExit(),
+ Floor2->getAfter(),
+ Floor1->getLatch(),
+ Floor1->getExit(),
+ Floor1->getAfter(),
+ };
+ EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
+ EXPECT_TRUE(verifyListOrder(F, RefOrder));
+}
+
+TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func");
+
+ IRBuilder<> Builder(BB);
+ Value *TripCount = F->getArg(0);
+ Type *LCTy = TripCount->getType();
+
+ Value *OuterStartVal = ConstantInt::get(LCTy, 2);
+ Value *OuterStopVal = TripCount;
+ Value *OuterStep = ConstantInt::get(LCTy, 5);
+ Value *InnerStartVal = ConstantInt::get(LCTy, 13);
+ Value *InnerStopVal = TripCount;
+ Value *InnerStep = ConstantInt::get(LCTy, 3);
+
+ // Fix an insertion point for ComputeIP.
+ BasicBlock *LoopNextEnter =
+ BasicBlock::Create(M->getContext(), "loopnest.enter", F,
+ Builder.GetInsertBlock()->getNextNode());
+ BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
+ InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
+
+ InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()};
+ OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL});
+
+ BasicBlock *BodyCode = nullptr;
+ CanonicalLoopInfo *InnerLoop = nullptr;
+ CallInst *Call = nullptr;
+ auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
+ llvm::Value *OuterLC) {
+ auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
+ llvm::Value *InnerLC) {
+ Builder.restoreIP(InnerCodeGenIP);
+ BodyCode = Builder.GetInsertBlock();
+
+ // Add something that consumes the induction variable to the body.
+ Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
+ };
+ InnerLoop = OMPBuilder.createCanonicalLoop(
+ OuterCodeGenIP, InnerLoopBodyGenCB, InnerStartVal, InnerStopVal,
+ InnerStep, false, false, ComputeIP, "inner");
+ };
+ CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
+ Loc, OuterLoopBodyGenCB, OuterStartVal, OuterStopVal, OuterStep, false,
+ false, ComputeIP, "outer");
+
+ // Finalize the function
+ Builder.restoreIP(OuterLoop->getAfterIP());
+ Builder.CreateRetVoid();
+
+ // Tile the loop nest.
+ Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11));
+ Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7));
+ std::vector<CanonicalLoopInfo *> GenLoops =
+ OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1});
+
+ OMPBuilder.finalize();
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+
+ EXPECT_EQ(GenLoops.size(), 4);
+ CanonicalLoopInfo *Floor0 = GenLoops[0];
+ CanonicalLoopInfo *Floor1 = GenLoops[1];
+ CanonicalLoopInfo *Tile0 = GenLoops[2];
+ CanonicalLoopInfo *Tile1 = GenLoops[3];
+
+ BasicBlock *RefOrder[] = {
+ Floor0->getPreheader(),
+ Floor0->getHeader(),
+ Floor0->getCond(),
+ Floor0->getBody(),
+ Floor1->getPreheader(),
+ Floor1->getHeader(),
+ Floor1->getCond(),
+ Floor1->getBody(),
+ Tile0->getPreheader(),
+ Tile0->getHeader(),
+ Tile0->getCond(),
+ Tile0->getBody(),
+ Tile1->getPreheader(),
+ Tile1->getHeader(),
+ Tile1->getCond(),
+ Tile1->getBody(),
+ BodyCode,
+ Tile1->getLatch(),
+ Tile1->getExit(),
+ Tile1->getAfter(),
+ Tile0->getLatch(),
+ Tile0->getExit(),
+ Tile0->getAfter(),
+ Floor1->getLatch(),
+ Floor1->getExit(),
+ Floor1->getAfter(),
+ Floor0->getLatch(),
+ Floor0->getExit(),
+ Floor0->getAfter(),
+ };
+ EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
+ EXPECT_TRUE(verifyListOrder(F, RefOrder));
+
+ EXPECT_EQ(Call->getParent(), BodyCode);
+
+ auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1));
+ EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal);
+ auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0));
+ EXPECT_EQ(RangeScale0->getOperand(1), OuterStep);
+ auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0));
+ EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody());
+ EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar());
+ auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0));
+ EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody());
+ EXPECT_EQ(TileScale0->getOperand(0), TileSize0);
+ EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar());
+
+ auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2));
+ EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode);
+ EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal);
+ auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0));
+ EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode);
+ EXPECT_EQ(RangeScale1->getOperand(1), InnerStep);
+ auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0));
+ EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody());
+ EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar());
+ auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0));
+ EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody());
+ EXPECT_EQ(TileScale1->getOperand(0), TileSize1);
+ EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar());
+}
+
+TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ IRBuilder<> Builder(BB);
+
+ // Create a loop, tile it, and extract its trip count. All input values are
+ // constant and IRBuilder evaluates all-constant arithmetic inplace, such that
+ // the floor trip count itself will be a ConstantInt. Unfortunately we cannot
+ // do the same for the tile loop.
+ auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step,
+ bool IsSigned, bool InclusiveStop,
+ int64_t TileSize) -> uint64_t {
+ OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
+ Type *LCTy = Type::getInt16Ty(Ctx);
+ Value *StartVal = ConstantInt::get(LCTy, Start);
+ Value *StopVal = ConstantInt::get(LCTy, Stop);
+ Value *StepVal = ConstantInt::get(LCTy, Step);
+
+ // Generate a loop.
+ auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
+ CanonicalLoopInfo *Loop =
+ OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
+ StepVal, IsSigned, InclusiveStop);
+
+ // Tile the loop.
+ Value *TileSizeVal = ConstantInt::get(LCTy, TileSize);
+ std::vector<CanonicalLoopInfo *> GenLoops =
+ OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal});
+
+ // Set the insertion pointer to after loop, where the next loop will be
+ // emitted.
+ Builder.restoreIP(Loop->getAfterIP());
+
+ // Extract the trip count.
+ CanonicalLoopInfo *FloorLoop = GenLoops[0];
+ Value *FloorTripCount = FloorLoop->getTripCount();
+ return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue();
+ };
+
+ // Empty iteration domain.
+ EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0);
+ EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0);
+ EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0);
+ EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0);
+ EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0);
+
+ // Only complete tiles.
+ EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2);
+ EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2);
+ EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2);
+ EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2);
+ EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2);
+ EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2);
+
+ // Only a partial tile.
+ EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1);
+ EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1);
+ EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1);
+ EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1);
+ EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1);
+
+ // Complete and partial tiles.
+ EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2);
+ EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3);
+ EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2);
+ EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5);
+ EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5);
+
+ // Close to 16-bit integer range.
+ EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFF);
+ EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFF / 7 + 1);
+ EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFF / 7 + 1);
+ EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFF / 7 + 1);
+ EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFF / 7 + 1);
+ EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1);
+ EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1);
+
+ // Finalize the function.
+ Builder.CreateRetVoid();
+ OMPBuilder.finalize();
+
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+}
+
TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
More information about the llvm-branch-commits
mailing list