[llvm] b7dee66 - [OpenMPIRBuilder] Implement tileLoops.

Michael Kruse via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 23 17:39:38 PST 2021


Author: Michael Kruse
Date: 2021-01-23T19:39:29-06:00
New Revision: b7dee667b64ff7dea66b483a35883190798c7d72

URL: https://github.com/llvm/llvm-project/commit/b7dee667b64ff7dea66b483a35883190798c7d72
DIFF: https://github.com/llvm/llvm-project/commit/b7dee667b64ff7dea66b483a35883190798c7d72.diff

LOG: [OpenMPIRBuilder] Implement tileLoops.

The  tileLoops method implements the code generation part of the tile directive introduced in OpenMP 5.1. It takes a list of loops forming a loop nest, tiles it, and returns the CanonicalLoopInfo representing the generated loops.

The implementation takes n CanonicalLoopInfos, n tile size Values and returns 2*n new CanonicalLoopInfos. The input CanonicalLoopInfos are invalidated and BBs not reused in the new loop nest removed from the function.

In a modified version of D76342, I was able to correctly compile and execute a tiled loop nest.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D92974

Added: 
    

Modified: 
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/lib/IR/BasicBlock.cpp
    llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 8e95226d3895..22204d9a9ccb 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -300,6 +300,53 @@ class OpenMPIRBuilder {
                                                bool NeedsBarrier,
                                                Value *Chunk = nullptr);
 
+  /// Tile a loop nest.
+  ///
+  /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
+  /// \p/ Loops must be perfectly nested, from outermost to innermost loop
+  /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
+  /// of every loop and every tile sizes must be usable in the outermost
+  /// loop's preheader. This implies that the loop nest is rectangular.
+  ///
+  /// Example:
+  /// \code
+  ///   for (int i = 0; i < 15; ++i) // Canonical loop "i"
+  ///     for (int j = 0; j < 14; ++j) // Canonical loop "j"
+  ///         body(i, j);
+  /// \endcode
+  ///
+  /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
+  /// \code
+  ///   for (int i1 = 0; i1 < 3; ++i1)
+  ///     for (int j1 = 0; j1 < 2; ++j1)
+  ///       for (int i2 = 0; i2 < 5; ++i2)
+  ///         for (int j2 = 0; j2 < 7; ++j2)
+  ///           body(i1*3+i2, j1*3+j2);
+  /// \endcode
+  ///
+  /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
+  /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
+  /// handles non-constant trip counts, non-constant tile sizes and trip counts
+  /// that are not multiples of the tile size. In the latter case the tile loop
+  /// of the last floor-loop iteration will have fewer iterations than specified
+  /// as its tile size.
+  ///
+  ///
+  /// @param DL        Debug location for instructions added by tiling, for
+  ///                  instance the floor- and tile trip count computation.
+  /// @param Loops     Loops to tile. The CanonicalLoopInfo objects are
+  ///                  invalidated by this method, i.e. should not used after
+  ///                  tiling.
+  /// @param TileSizes For each loop in \p Loops, the tile size for that
+  ///                  dimensions.
+  ///
+  /// \returns A list of generated loops. Contains twice as many loops as the
+  ///          input loop nest; the first half are the floor loops and the
+  ///          second half are the tile loops.
+  std::vector<CanonicalLoopInfo *>
+  tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
+            ArrayRef<Value *> TileSizes);
+
   /// Generator for '#omp flush'
   ///
   /// \param Loc The location where the flush directive was encountered
@@ -729,6 +776,12 @@ class CanonicalLoopInfo {
   BasicBlock *Exit;
   BasicBlock *After;
 
+  /// Add the control blocks of this loop to \p BBs.
+  ///
+  /// This does not include any block from the body, including the one returned
+  /// by getBody().
+  void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
+
 public:
   /// The preheader ensures that there is only a single edge entering the loop.
   /// Code that must be execute before any loop iteration can be emitted here,
@@ -781,6 +834,14 @@ class CanonicalLoopInfo {
     return IndVarPHI;
   }
 
+  /// Return the type of the induction variable (and the trip count).
+  Type *getIndVarType() const { return getIndVar()->getType(); }
+
+  /// Return the insertion point for user code before the loop.
+  OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
+    return {Preheader, std::prev(Preheader->end())};
+  };
+
   /// Return the insertion point for user code in the body.
   OpenMPIRBuilder::InsertPointTy getBodyIP() const {
     return {Body, Body->begin()};

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index fc8d1eb238a9..e70847b2f02c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1164,6 +1164,252 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
   return CLI;
 }
 
+/// Make \p Source branch to \p Target.
+///
+/// Handles two situations:
+/// * \p Source already has an unconditional branch.
+/// * \p Source is a degenerate block (no terminator because the BB is
+///             the current head of the IR construction).
+static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) {
+  if (Instruction *Term = Source->getTerminator()) {
+    auto *Br = cast<BranchInst>(Term);
+    assert(!Br->isConditional() &&
+           "BB's terminator must be an unconditional branch (or degenerate)");
+    BasicBlock *Succ = Br->getSuccessor(0);
+    Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
+    Br->setSuccessor(0, Target);
+    return;
+  }
+
+  auto *NewBr = BranchInst::Create(Target, Source);
+  NewBr->setDebugLoc(DL);
+}
+
+/// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
+/// after this \p OldTarget will be orphaned.
+static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
+                                      BasicBlock *NewTarget, DebugLoc DL) {
+  for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
+    redirectTo(Pred, NewTarget, DL);
+}
+
+/// Determine which blocks in \p BBs are reachable from outside and remove the
+/// ones that are not reachable from the function.
+static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) {
+  SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
+  auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
+    for (Use &U : BB->uses()) {
+      auto *UseInst = dyn_cast<Instruction>(U.getUser());
+      if (!UseInst)
+        continue;
+      if (BBsToErase.count(UseInst->getParent()))
+        continue;
+      return true;
+    }
+    return false;
+  };
+
+  while (true) {
+    bool Changed = false;
+    for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
+      if (HasRemainingUses(BB)) {
+        BBsToErase.erase(BB);
+        Changed = true;
+      }
+    }
+    if (!Changed)
+      break;
+  }
+
+  SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
+  DeleteDeadBlocks(BBVec);
+}
+
+std::vector<CanonicalLoopInfo *>
+OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
+                           ArrayRef<Value *> TileSizes) {
+  int NumLoops = Loops.size();
+  assert(TileSizes.size() == NumLoops &&
+         "Must pass as many tile sizes as there are loops");
+  assert(NumLoops >= 1 && "At least one loop to tile required");
+
+  CanonicalLoopInfo *OutermostLoop = Loops.front();
+  CanonicalLoopInfo *InnermostLoop = Loops.back();
+  Function *F = OutermostLoop->getBody()->getParent();
+  BasicBlock *InnerEnter = InnermostLoop->getBody();
+  BasicBlock *InnerLatch = InnermostLoop->getLatch();
+
+  // Collect original trip counts and induction variable to be accessible by
+  // index. Also, the structure of the original loops is not preserved during
+  // the construction of the tiled loops, so do it before we scavenge the BBs of
+  // any original CanonicalLoopInfo.
+  SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
+  for (CanonicalLoopInfo *L : Loops) {
+    OrigTripCounts.push_back(L->getTripCount());
+    OrigIndVars.push_back(L->getIndVar());
+  }
+
+  // Collect the code between loop headers. These may contain SSA definitions
+  // that are used in the loop nest body. To be usable with in the innermost
+  // body, these BasicBlocks will be sunk into the loop nest body. That is,
+  // these instructions may be executed more often than before the tiling.
+  // TODO: It would be sufficient to only sink them into body of the
+  // corresponding tile loop.
+  SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode;
+  for (int i = 0; i < NumLoops - 1; ++i) {
+    CanonicalLoopInfo *Surrounding = Loops[i];
+    CanonicalLoopInfo *Nested = Loops[i + 1];
+
+    BasicBlock *EnterBB = Surrounding->getBody();
+    BasicBlock *ExitBB = Nested->getHeader();
+    InbetweenCode.emplace_back(EnterBB, ExitBB);
+  }
+
+  // Compute the trip counts of the floor loops.
+  Builder.SetCurrentDebugLocation(DL);
+  Builder.restoreIP(OutermostLoop->getPreheaderIP());
+  SmallVector<Value *, 4> FloorCount, FloorRems;
+  for (int i = 0; i < NumLoops; ++i) {
+    Value *TileSize = TileSizes[i];
+    Value *OrigTripCount = OrigTripCounts[i];
+    Type *IVType = OrigTripCount->getType();
+
+    Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
+    Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
+
+    // 0 if tripcount divides the tilesize, 1 otherwise.
+    // 1 means we need an additional iteration for a partial tile.
+    //
+    // Unfortunately we cannot just use the roundup-formula
+    //   (tripcount + tilesize - 1)/tilesize
+    // because the summation might overflow. We do not want introduce undefined
+    // behavior when the untiled loop nest did not.
+    Value *FloorTripOverflow =
+        Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
+
+    FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
+    FloorTripCount =
+        Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
+                          "omp_floor" + Twine(i) + ".tripcount", true);
+
+    // Remember some values for later use.
+    FloorCount.push_back(FloorTripCount);
+    FloorRems.push_back(FloorTripRem);
+  }
+
+  // Generate the new loop nest, from the outermost to the innermost.
+  std::vector<CanonicalLoopInfo *> Result;
+  Result.reserve(NumLoops * 2);
+
+  // The basic block of the surrounding loop that enters the nest generated
+  // loop.
+  BasicBlock *Enter = OutermostLoop->getPreheader();
+
+  // The basic block of the surrounding loop where the inner code should
+  // continue.
+  BasicBlock *Continue = OutermostLoop->getAfter();
+
+  // Where the next loop basic block should be inserted.
+  BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
+
+  auto EmbeddNewLoop =
+      [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
+          Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
+    CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
+        DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
+    redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
+    redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
+
+    // Setup the position where the next embedded loop connects to this loop.
+    Enter = EmbeddedLoop->getBody();
+    Continue = EmbeddedLoop->getLatch();
+    OutroInsertBefore = EmbeddedLoop->getLatch();
+    return EmbeddedLoop;
+  };
+
+  auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
+                                                  const Twine &NameBase) {
+    for (auto P : enumerate(TripCounts)) {
+      CanonicalLoopInfo *EmbeddedLoop =
+          EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
+      Result.push_back(EmbeddedLoop);
+    }
+  };
+
+  EmbeddNewLoops(FloorCount, "floor");
+
+  // Within the innermost floor loop, emit the code that computes the tile
+  // sizes.
+  Builder.SetInsertPoint(Enter->getTerminator());
+  SmallVector<Value *, 4> TileCounts;
+  for (int i = 0; i < NumLoops; ++i) {
+    CanonicalLoopInfo *FloorLoop = Result[i];
+    Value *TileSize = TileSizes[i];
+
+    Value *FloorIsEpilogue =
+        Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
+    Value *TileTripCount =
+        Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
+
+    TileCounts.push_back(TileTripCount);
+  }
+
+  // Create the tile loops.
+  EmbeddNewLoops(TileCounts, "tile");
+
+  // Insert the inbetween code into the body.
+  BasicBlock *BodyEnter = Enter;
+  BasicBlock *BodyEntered = nullptr;
+  for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
+    BasicBlock *EnterBB = P.first;
+    BasicBlock *ExitBB = P.second;
+
+    if (BodyEnter)
+      redirectTo(BodyEnter, EnterBB, DL);
+    else
+      redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
+
+    BodyEnter = nullptr;
+    BodyEntered = ExitBB;
+  }
+
+  // Append the original loop nest body into the generated loop nest body.
+  if (BodyEnter)
+    redirectTo(BodyEnter, InnerEnter, DL);
+  else
+    redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
+  redirectAllPredecessorsTo(InnerLatch, Continue, DL);
+
+  // Replace the original induction variable with an induction variable computed
+  // from the tile and floor induction variables.
+  Builder.restoreIP(Result.back()->getBodyIP());
+  for (int i = 0; i < NumLoops; ++i) {
+    CanonicalLoopInfo *FloorLoop = Result[i];
+    CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
+    Value *OrigIndVar = OrigIndVars[i];
+    Value *Size = TileSizes[i];
+
+    Value *Scale =
+        Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
+    Value *Shift =
+        Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
+    OrigIndVar->replaceAllUsesWith(Shift);
+  }
+
+  // Remove unused parts of the original loops.
+  SmallVector<BasicBlock *, 12> OldControlBBs;
+  OldControlBBs.reserve(6 * Loops.size());
+  for (CanonicalLoopInfo *Loop : Loops)
+    Loop->collectControlBlocks(OldControlBBs);
+  removeUnusedBlocksFromParent(OldControlBBs);
+
+#ifndef NDEBUG
+  for (CanonicalLoopInfo *GenL : Result)
+    GenL->assertOK();
+#endif
+  return Result;
+}
+
 OpenMPIRBuilder::InsertPointTy
 OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
                                    llvm::Value *BufSize, llvm::Value *CpyBuf,
@@ -1570,6 +1816,16 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks(
   }
 }
 
+void CanonicalLoopInfo::collectControlBlocks(
+    SmallVectorImpl<BasicBlock *> &BBs) {
+  // We only count those BBs as control block for which we do not need to
+  // reverse the CFG, i.e. not the loop body which can contain arbitrary control
+  // flow. For consistency, this also means we do not add the Body block, which
+  // is just the entry to the body code.
+  BBs.reserve(BBs.size() + 6);
+  BBs.append({Preheader, Header, Cond, Latch, Exit, After});
+}
+
 void CanonicalLoopInfo::assertOK() const {
 #ifndef NDEBUG
   if (!IsValid)
@@ -1604,11 +1860,16 @@ void CanonicalLoopInfo::assertOK() const {
   assert(Body);
   assert(Body->getSinglePredecessor() == Cond &&
          "Body only reachable from exiting block");
+  assert(!isa<PHINode>(Body->front()));
 
   assert(Latch);
   assert(isa<BranchInst>(Latch->getTerminator()) &&
          "Latch must terminate with unconditional branch");
   assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
+  // TODO: To support simple redirecting of the end of the body code that has
+  // multiple; introduce another auxiliary basic block like preheader and after.
+  assert(Latch->getSinglePredecessor() != nullptr);
+  assert(!isa<PHINode>(Latch->front()));
 
   assert(Exit);
   assert(isa<BranchInst>(Exit->getTerminator()) &&
@@ -1619,6 +1880,7 @@ void CanonicalLoopInfo::assertOK() const {
   assert(After);
   assert(After->getSinglePredecessor() == Exit &&
          "After block only reachable from exit block");
+  assert(After->empty() || !isa<PHINode>(After->front()));
 
   Instruction *IndVar = getIndVar();
   assert(IndVar && "Canonical induction variable not found?");
@@ -1626,6 +1888,17 @@ void CanonicalLoopInfo::assertOK() const {
          "Induction variable must be an integer");
   assert(cast<PHINode>(IndVar)->getParent() == Header &&
          "Induction variable must be a PHI in the loop header");
+  assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
+  assert(
+      cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
+  assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
+
+  auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
+  assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
+  assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
+  assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
+  assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
+             ->isOne());
 
   Value *TripCount = getTripCount();
   assert(TripCount && "Loop trip count not found?");

diff  --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 7f34565f5cd8..00ef10dd53af 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -325,7 +325,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
          "Pred is not a predecessor!");
 
   // Return early if there are no PHI nodes to update.
-  if (!isa<PHINode>(begin()))
+  if (empty() || !isa<PHINode>(begin()))
     return;
 
   unsigned NumPreds = cast<PHINode>(front()).getNumIncomingValues();

diff  --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 508c274b9b65..1efe9d0ceaf4 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -23,6 +23,95 @@ using namespace omp;
 
 namespace {
 
+/// Create an instruction that uses the values in \p Values. We use "printf"
+/// just because it is often used for this purpose in test code, but it is never
+/// executed here.
+static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr,
+                                  ArrayRef<Value *> Values) {
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+
+  GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M);
+  Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
+  Constant *Indices[] = {Zero, Zero};
+  Constant *FormatStrConst =
+      ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices);
+
+  Function *PrintfDecl = M->getFunction("printf");
+  if (!PrintfDecl) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+    FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true);
+    PrintfDecl = Function::Create(Ty, Linkage, "printf", M);
+  }
+
+  SmallVector<Value *, 4> Args;
+  Args.push_back(FormatStrConst);
+  Args.append(Values.begin(), Values.end());
+  return Builder.CreateCall(PrintfDecl, Args);
+}
+
+/// Verify that blocks in \p RefOrder are corresponds to the depth-first visit
+/// order the control flow of \p F.
+///
+/// This is an easy way to verify the branching structure of the CFG without
+/// checking every branch instruction individually. For the CFG of a
+/// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering
+/// the body, i.e. the DFS order corresponds to the execution order with one
+/// loop iteration.
+static testing::AssertionResult
+verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
+  ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
+  ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
+
+  df_iterator_default_set<BasicBlock *, 16> Visited;
+  auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited);
+
+  BasicBlock *Prev = nullptr;
+  for (BasicBlock *BB : DFS) {
+    if (It != E && BB == *It) {
+      Prev = *It;
+      ++It;
+    }
+  }
+
+  if (It == E)
+    return testing::AssertionSuccess();
+  if (!Prev)
+    return testing::AssertionFailure()
+           << "Did not find " << (*It)->getName() << " in control flow";
+  return testing::AssertionFailure()
+         << "Expected " << Prev->getName() << " before " << (*It)->getName()
+         << " in control flow";
+}
+
+/// Verify that blocks in \p RefOrder are in the same relative order in the
+/// linked lists of blocks in \p F. The linked list may contain additional
+/// blocks in-between.
+///
+/// While the order in the linked list is not relevant for semantics, keeping
+/// the order roughly in execution order makes its printout easier to read.
+static testing::AssertionResult
+verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
+  ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
+  ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
+
+  BasicBlock *Prev = nullptr;
+  for (BasicBlock &BB : *F) {
+    if (It != E && &BB == *It) {
+      Prev = *It;
+      ++It;
+    }
+  }
+
+  if (It == E)
+    return testing::AssertionSuccess();
+  if (!Prev)
+    return testing::AssertionFailure() << "Did not find " << (*It)->getName()
+                                       << " in function " << F->getName();
+  return testing::AssertionFailure()
+         << "Expected " << Prev->getName() << " before " << (*It)->getName()
+         << " in function " << F->getName();
+}
+
 class OpenMPIRBuilderTest : public testing::Test {
 protected:
   void SetUp() override {
@@ -1071,6 +1160,366 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
   EXPECT_FALSE(verifyModule(*M, &errs()));
 }
 
+TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+
+  IRBuilder<> Builder(BB);
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+  Value *TripCount = F->getArg(0);
+
+  BasicBlock *BodyCode = nullptr;
+  Instruction *Call = nullptr;
+  auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
+    Builder.restoreIP(CodeGenIP);
+    BodyCode = Builder.GetInsertBlock();
+
+    // Add something that consumes the induction variable to the body.
+    Call = createPrintfCall(Builder, "%d\\n", {LC});
+  };
+  CanonicalLoopInfo *Loop =
+      OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount);
+
+  // Finalize the function.
+  Builder.restoreIP(Loop->getAfterIP());
+  Builder.CreateRetVoid();
+
+  Instruction *OrigIndVar = Loop->getIndVar();
+  EXPECT_EQ(Call->getOperand(1), OrigIndVar);
+
+  // Tile the loop.
+  Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7));
+  std::vector<CanonicalLoopInfo *> GenLoops =
+      OMPBuilder.tileLoops(DL, {Loop}, {TileSize});
+
+  OMPBuilder.finalize();
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+
+  EXPECT_EQ(GenLoops.size(), 2);
+  CanonicalLoopInfo *Floor = GenLoops[0];
+  CanonicalLoopInfo *Tile = GenLoops[1];
+
+  BasicBlock *RefOrder[] = {
+      Floor->getPreheader(), Floor->getHeader(),   Floor->getCond(),
+      Floor->getBody(),      Tile->getPreheader(), Tile->getHeader(),
+      Tile->getCond(),       Tile->getBody(),      BodyCode,
+      Tile->getLatch(),      Tile->getExit(),      Tile->getAfter(),
+      Floor->getLatch(),     Floor->getExit(),     Floor->getAfter(),
+  };
+  EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
+  EXPECT_TRUE(verifyListOrder(F, RefOrder));
+
+  // Check the induction variable.
+  EXPECT_EQ(Call->getParent(), BodyCode);
+  auto *Shift = cast<AddOperator>(Call->getOperand(1));
+  EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody());
+  EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar());
+  auto *Scale = cast<MulOperator>(Shift->getOperand(0));
+  EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody());
+  EXPECT_EQ(Scale->getOperand(0), TileSize);
+  EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar());
+}
+
+TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+
+  IRBuilder<> Builder(BB);
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+  Value *TripCount = F->getArg(0);
+  Type *LCTy = TripCount->getType();
+
+  BasicBlock *BodyCode = nullptr;
+  CanonicalLoopInfo *InnerLoop = nullptr;
+  auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
+                                llvm::Value *OuterLC) {
+    auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
+                                  llvm::Value *InnerLC) {
+      Builder.restoreIP(InnerCodeGenIP);
+      BodyCode = Builder.GetInsertBlock();
+
+      // Add something that consumes the induction variables to the body.
+      createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
+    };
+    InnerLoop = OMPBuilder.createCanonicalLoop(
+        OuterCodeGenIP, InnerLoopBodyGenCB, TripCount, "inner");
+  };
+  CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
+      Loc, OuterLoopBodyGenCB, TripCount, "outer");
+
+  // Finalize the function.
+  Builder.restoreIP(OuterLoop->getAfterIP());
+  Builder.CreateRetVoid();
+
+  // Tile to loop nest.
+  Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11));
+  Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7));
+  std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops(
+      DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize});
+
+  OMPBuilder.finalize();
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+
+  EXPECT_EQ(GenLoops.size(), 4);
+  CanonicalLoopInfo *Floor1 = GenLoops[0];
+  CanonicalLoopInfo *Floor2 = GenLoops[1];
+  CanonicalLoopInfo *Tile1 = GenLoops[2];
+  CanonicalLoopInfo *Tile2 = GenLoops[3];
+
+  BasicBlock *RefOrder[] = {
+      Floor1->getPreheader(),
+      Floor1->getHeader(),
+      Floor1->getCond(),
+      Floor1->getBody(),
+      Floor2->getPreheader(),
+      Floor2->getHeader(),
+      Floor2->getCond(),
+      Floor2->getBody(),
+      Tile1->getPreheader(),
+      Tile1->getHeader(),
+      Tile1->getCond(),
+      Tile1->getBody(),
+      Tile2->getPreheader(),
+      Tile2->getHeader(),
+      Tile2->getCond(),
+      Tile2->getBody(),
+      BodyCode,
+      Tile2->getLatch(),
+      Tile2->getExit(),
+      Tile2->getAfter(),
+      Tile1->getLatch(),
+      Tile1->getExit(),
+      Tile1->getAfter(),
+      Floor2->getLatch(),
+      Floor2->getExit(),
+      Floor2->getAfter(),
+      Floor1->getLatch(),
+      Floor1->getExit(),
+      Floor1->getAfter(),
+  };
+  EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
+  EXPECT_TRUE(verifyListOrder(F, RefOrder));
+}
+
+TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+
+  IRBuilder<> Builder(BB);
+  Value *TripCount = F->getArg(0);
+  Type *LCTy = TripCount->getType();
+
+  Value *OuterStartVal = ConstantInt::get(LCTy, 2);
+  Value *OuterStopVal = TripCount;
+  Value *OuterStep = ConstantInt::get(LCTy, 5);
+  Value *InnerStartVal = ConstantInt::get(LCTy, 13);
+  Value *InnerStopVal = TripCount;
+  Value *InnerStep = ConstantInt::get(LCTy, 3);
+
+  // Fix an insertion point for ComputeIP.
+  BasicBlock *LoopNextEnter =
+      BasicBlock::Create(M->getContext(), "loopnest.enter", F,
+                         Builder.GetInsertBlock()->getNextNode());
+  BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
+  InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
+
+  InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()};
+  OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL});
+
+  BasicBlock *BodyCode = nullptr;
+  CanonicalLoopInfo *InnerLoop = nullptr;
+  CallInst *Call = nullptr;
+  auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
+                                llvm::Value *OuterLC) {
+    auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
+                                  llvm::Value *InnerLC) {
+      Builder.restoreIP(InnerCodeGenIP);
+      BodyCode = Builder.GetInsertBlock();
+
+      // Add something that consumes the induction variable to the body.
+      Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
+    };
+    InnerLoop = OMPBuilder.createCanonicalLoop(
+        OuterCodeGenIP, InnerLoopBodyGenCB, InnerStartVal, InnerStopVal,
+        InnerStep, false, false, ComputeIP, "inner");
+  };
+  CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
+      Loc, OuterLoopBodyGenCB, OuterStartVal, OuterStopVal, OuterStep, false,
+      false, ComputeIP, "outer");
+
+  // Finalize the function
+  Builder.restoreIP(OuterLoop->getAfterIP());
+  Builder.CreateRetVoid();
+
+  // Tile the loop nest.
+  Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11));
+  Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7));
+  std::vector<CanonicalLoopInfo *> GenLoops =
+      OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1});
+
+  OMPBuilder.finalize();
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+
+  EXPECT_EQ(GenLoops.size(), 4);
+  CanonicalLoopInfo *Floor0 = GenLoops[0];
+  CanonicalLoopInfo *Floor1 = GenLoops[1];
+  CanonicalLoopInfo *Tile0 = GenLoops[2];
+  CanonicalLoopInfo *Tile1 = GenLoops[3];
+
+  BasicBlock *RefOrder[] = {
+      Floor0->getPreheader(),
+      Floor0->getHeader(),
+      Floor0->getCond(),
+      Floor0->getBody(),
+      Floor1->getPreheader(),
+      Floor1->getHeader(),
+      Floor1->getCond(),
+      Floor1->getBody(),
+      Tile0->getPreheader(),
+      Tile0->getHeader(),
+      Tile0->getCond(),
+      Tile0->getBody(),
+      Tile1->getPreheader(),
+      Tile1->getHeader(),
+      Tile1->getCond(),
+      Tile1->getBody(),
+      BodyCode,
+      Tile1->getLatch(),
+      Tile1->getExit(),
+      Tile1->getAfter(),
+      Tile0->getLatch(),
+      Tile0->getExit(),
+      Tile0->getAfter(),
+      Floor1->getLatch(),
+      Floor1->getExit(),
+      Floor1->getAfter(),
+      Floor0->getLatch(),
+      Floor0->getExit(),
+      Floor0->getAfter(),
+  };
+  EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
+  EXPECT_TRUE(verifyListOrder(F, RefOrder));
+
+  EXPECT_EQ(Call->getParent(), BodyCode);
+
+  auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1));
+  EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal);
+  auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0));
+  EXPECT_EQ(RangeScale0->getOperand(1), OuterStep);
+  auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0));
+  EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody());
+  EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar());
+  auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0));
+  EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody());
+  EXPECT_EQ(TileScale0->getOperand(0), TileSize0);
+  EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar());
+
+  auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2));
+  EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode);
+  EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal);
+  auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0));
+  EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode);
+  EXPECT_EQ(RangeScale1->getOperand(1), InnerStep);
+  auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0));
+  EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody());
+  EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar());
+  auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0));
+  EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody());
+  EXPECT_EQ(TileScale1->getOperand(0), TileSize1);
+  EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar());
+}
+
+TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  IRBuilder<> Builder(BB);
+
+  // Create a loop, tile it, and extract its trip count. All input values are
+  // constant and IRBuilder evaluates all-constant arithmetic inplace, such that
+  // the floor trip count itself will be a ConstantInt. Unfortunately we cannot
+  // do the same for the tile loop.
+  auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step,
+                           bool IsSigned, bool InclusiveStop,
+                           int64_t TileSize) -> uint64_t {
+    OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
+    Type *LCTy = Type::getInt16Ty(Ctx);
+    Value *StartVal = ConstantInt::get(LCTy, Start);
+    Value *StopVal = ConstantInt::get(LCTy, Stop);
+    Value *StepVal = ConstantInt::get(LCTy, Step);
+
+    // Generate a loop.
+    auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
+    CanonicalLoopInfo *Loop =
+        OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
+                                       StepVal, IsSigned, InclusiveStop);
+
+    // Tile the loop.
+    Value *TileSizeVal = ConstantInt::get(LCTy, TileSize);
+    std::vector<CanonicalLoopInfo *> GenLoops =
+        OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal});
+
+    // Set the insertion pointer to after loop, where the next loop will be
+    // emitted.
+    Builder.restoreIP(Loop->getAfterIP());
+
+    // Extract the trip count.
+    CanonicalLoopInfo *FloorLoop = GenLoops[0];
+    Value *FloorTripCount = FloorLoop->getTripCount();
+    return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue();
+  };
+
+  // Empty iteration domain.
+  EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0);
+  EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0);
+  EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0);
+  EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0);
+  EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0);
+
+  // Only complete tiles.
+  EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2);
+  EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2);
+  EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2);
+  EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2);
+  EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2);
+  EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2);
+
+  // Only a partial tile.
+  EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1);
+  EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1);
+  EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1);
+  EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1);
+  EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1);
+
+  // Complete and partial tiles.
+  EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2);
+  EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3);
+  EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2);
+  EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5);
+  EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5);
+
+  // Close to 16-bit integer range.
+  EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFF);
+  EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFF / 7 + 1);
+  EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFF / 7 + 1);
+  EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFF / 7 + 1);
+  EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFF / 7 + 1);
+  EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1);
+  EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1);
+
+  // Finalize the function.
+  Builder.CreateRetVoid();
+  OMPBuilder.finalize();
+
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+}
+
 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);


        


More information about the llvm-commits mailing list