[llvm-branch-commits] [llvm] c102c78 - [OpenMPIRBuilder] introduce createStaticWorkshareLoop

Alex Zinenko via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Dec 7 13:37:15 PST 2020


Author: Alex Zinenko
Date: 2020-12-07T22:30:59+01:00
New Revision: c102c783cd08cb1bf9119fe33cce34f6d1563881

URL: https://github.com/llvm/llvm-project/commit/c102c783cd08cb1bf9119fe33cce34f6d1563881
DIFF: https://github.com/llvm/llvm-project/commit/c102c783cd08cb1bf9119fe33cce34f6d1563881.diff

LOG: [OpenMPIRBuilder] introduce createStaticWorkshareLoop

Introduce a function that creates a statically-scheduled workshare loop
out of a canonical loop created earlier by the OpenMPIRBuilder. This
basically amounts to injecting runtime calls to the preheader and the
after block and updating the trip count. Static scheduling kind is
currently hardcoded and needs to be extracted from the runtime library
into common TableGen definitions.

Differential Revision: https://reviews.llvm.org/D92476

Added: 
    

Modified: 
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index a09605bb1023..2e4bb20c7998 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -260,6 +260,32 @@ class OpenMPIRBuilder {
                                          Value *Start, Value *Stop, Value *Step,
                                          bool IsSigned, bool InclusiveStop);
 
+  /// Modifies the canonical loop to be a statically-scheduled workshare loop.
+  ///
+  /// This takes a \p LoopInfo representing a canonical loop, such as the one
+  /// created by \p createCanonicalLoop and emits additional instructions to
+  /// turn it into a workshare loop. In particular, it calls to an OpenMP
+  /// runtime function in the preheader to obtain the loop bounds to be used in
+  /// the current thread, updates the relevant instructions in the canonical
+  /// loop and calls to an OpenMP runtime finalization function after the loop.
+  ///
+  /// \param Loc      The source location description, the insertion location
+  ///                 is not used.
+  /// \param CLI      A descriptor of the canonical loop to workshare.
+  /// \param AllocaIP An insertion point for Alloca instructions usable in the
+  ///                 preheader of the loop.
+  /// \param NeedsBarrier Indicates whether a barrier must be insterted after
+  ///                     the loop.
+  /// \param Chunk    The size of loop chunk considered as a unit when
+  ///                 scheduling. If \p nullptr, defaults to 1.
+  ///
+  /// \returns Updated CanonicalLoopInfo.
+  CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc,
+                                               CanonicalLoopInfo *CLI,
+                                               InsertPointTy AllocaIP,
+                                               bool NeedsBarrier,
+                                               Value *Chunk = nullptr);
+
   /// Generator for '#omp flush'
   ///
   /// \param Loc The location where the flush directive was encountered
@@ -636,7 +662,9 @@ class OpenMPIRBuilder {
 ///  |    Cond---\
 ///  |     |     |
 ///  |    Body   |
-///  |     |     |
+///  |    | |    |
+///  |   <...>   |
+///  |    | |    |
 ///   \--Latch   |
 ///              |
 ///             Exit
@@ -644,7 +672,9 @@ class OpenMPIRBuilder {
 ///            After
 ///
 /// Code in the header, condition block, latch and exit block must not have any
-/// side-effect.
+/// side-effect. The body block is the single entry point into the loop body,
+/// which may contain arbitrary control flow as long as all control paths
+/// eventually branch to the latch block.
 ///
 /// Defined outside OpenMPIRBuilder because one cannot forward-declare nested
 /// classes.
@@ -701,7 +731,7 @@ class CanonicalLoopInfo {
   /// statements/cancellations).
   BasicBlock *getAfter() const { return After; }
 
-  /// Returns the llvm::Value containing the number of loop iterations. I must
+  /// Returns the llvm::Value containing the number of loop iterations. It must
   /// be valid in the preheader and always interpreted as an unsigned integer of
   /// any bit-width.
   Value *getTripCount() const {

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 044e69da8665..6587a3637c90 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -999,6 +999,118 @@ CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop(
   return createCanonicalLoop(Builder.saveIP(), BodyGen, TripCount);
 }
 
+// Returns an LLVM function to call for initializing loop bounds using OpenMP
+// static scheduling depending on `type`. Only i32 and i64 are supported by the
+// runtime. Always interpret integers as unsigned similarly to
+// CanonicalLoopInfo.
+static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M,
+                                                  OpenMPIRBuilder &OMPBuilder) {
+  unsigned Bitwidth = Ty->getIntegerBitWidth();
+  if (Bitwidth == 32)
+    return OMPBuilder.getOrCreateRuntimeFunction(
+        M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
+  if (Bitwidth == 64)
+    return OMPBuilder.getOrCreateRuntimeFunction(
+        M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
+  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
+}
+
+// Sets the number of loop iterations to the given value. This value must be
+// valid in the condition block (i.e., defined in the preheader) and is
+// interpreted as an unsigned integer.
+void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) {
+  Instruction *CmpI = &CLI->getCond()->front();
+  assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
+  CmpI->setOperand(1, TripCount);
+  CLI->assertOK();
+}
+
+CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
+    const LocationDescription &Loc, CanonicalLoopInfo *CLI,
+    InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) {
+  // Set up the source location value for OpenMP runtime.
+  if (!updateToLocation(Loc))
+    return nullptr;
+
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
+
+  // Declare useful OpenMP runtime functions.
+  Value *IV = CLI->getIndVar();
+  Type *IVTy = IV->getType();
+  FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
+  FunctionCallee StaticFini =
+      getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
+
+  // Allocate space for computed loop bounds as expected by the "init" function.
+  Builder.restoreIP(AllocaIP);
+  Type *I32Type = Type::getInt32Ty(M.getContext());
+  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
+  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
+  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
+  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
+
+  // At the end of the preheader, prepare for calling the "init" function by
+  // storing the current loop bounds into the allocated space. A canonical loop
+  // always iterates from 0 to trip-count with step 1. Note that "init" expects
+  // and produces an inclusive upper bound.
+  Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
+  Constant *Zero = ConstantInt::get(IVTy, 0);
+  Constant *One = ConstantInt::get(IVTy, 1);
+  Builder.CreateStore(Zero, PLowerBound);
+  Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
+  Builder.CreateStore(UpperBound, PUpperBound);
+  Builder.CreateStore(One, PStride);
+
+  if (!Chunk)
+    Chunk = One;
+
+  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
+
+  // TODO: extract scheduling type and map it to OMP constant. This is curently
+  // happening in kmp.h and its ilk and needs to be moved to OpenMP.td first.
+  constexpr int StaticSchedType = 34;
+  Constant *SchedulingType = ConstantInt::get(I32Type, StaticSchedType);
+
+  // Call the "init" function and update the trip count of the loop with the
+  // value it produced.
+  Builder.CreateCall(StaticInit,
+                     {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
+                      PUpperBound, PStride, One, Chunk});
+  Value *LowerBound = Builder.CreateLoad(PLowerBound);
+  Value *InclusiveUpperBound = Builder.CreateLoad(PUpperBound);
+  Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
+  Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
+  setCanonicalLoopTripCount(CLI, TripCount);
+
+  // Update all uses of the induction variable except the one in the condition
+  // block that compares it with the actual upper bound, and the increment in
+  // the latch block.
+  // TODO: this can eventually move to CanonicalLoopInfo or to a new
+  // CanonicalLoopInfoUpdater interface.
+  Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
+  Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
+  IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
+    auto *Instr = dyn_cast<Instruction>(U.getUser());
+    return !Instr ||
+           (Instr->getParent() != CLI->getCond() &&
+            Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
+  });
+
+  // In the "exit" block, call the "fini" function.
+  Builder.SetInsertPoint(CLI->getExit(),
+                         CLI->getExit()->getTerminator()->getIterator());
+  Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
+
+  // Add the barrier if requested.
+  if (NeedsBarrier)
+    createBarrier(Loc, omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
+                  /* CheckCancelFlag */ false);
+
+  CLI->assertOK();
+  return CLI;
+}
+
 void CanonicalLoopInfo::eraseFromParent() {
   assert(IsValid && "can only erase previously valid loop cfg");
   IsValid = false;

diff  --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index fbf6c2fe4076..1ad2264d3e39 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1071,6 +1071,92 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
   EXPECT_FALSE(verifyModule(*M, &errs()));
 }
 
+TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  IRBuilder<> Builder(BB);
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  Type *LCTy = Type::getInt32Ty(Ctx);
+  Value *StartVal = ConstantInt::get(LCTy, 10);
+  Value *StopVal = ConstantInt::get(LCTy, 52);
+  Value *StepVal = ConstantInt::get(LCTy, 2);
+  auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
+
+  CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
+      Loc, LoopBodyGen, StartVal, StopVal, StepVal,
+      /*IsSigned=*/false, /*InclusiveStop=*/false);
+
+  Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
+  InsertPointTy AllocaIP = Builder.saveIP();
+
+  CLI = OMPBuilder.createStaticWorkshareLoop(Loc, CLI, AllocaIP,
+                                             /*NeedsBarrier=*/true);
+  auto AllocaIter = BB->begin();
+  ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
+  AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
+  AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
+  AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
+  AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
+  EXPECT_NE(PLastIter, nullptr);
+  EXPECT_NE(PLowerBound, nullptr);
+  EXPECT_NE(PUpperBound, nullptr);
+  EXPECT_NE(PStride, nullptr);
+
+  auto PreheaderIter = CLI->getPreheader()->begin();
+  ASSERT_GE(
+      std::distance(CLI->getPreheader()->begin(), CLI->getPreheader()->end()),
+      7);
+  StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
+  StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
+  StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
+  ASSERT_NE(LowerBoundStore, nullptr);
+  ASSERT_NE(UpperBoundStore, nullptr);
+  ASSERT_NE(StrideStore, nullptr);
+
+  auto *OrigLowerBound =
+      dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
+  auto *OrigUpperBound =
+      dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
+  auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand());
+  ASSERT_NE(OrigLowerBound, nullptr);
+  ASSERT_NE(OrigUpperBound, nullptr);
+  ASSERT_NE(OrigStride, nullptr);
+  EXPECT_EQ(OrigLowerBound->getValue(), 0);
+  EXPECT_EQ(OrigUpperBound->getValue(), 20);
+  EXPECT_EQ(OrigStride->getValue(), 1);
+
+  // Check that the loop IV is updated to account for the lower bound returned
+  // by the OpenMP runtime call.
+  BinaryOperator *Add = dyn_cast<BinaryOperator>(&CLI->getBody()->front());
+  EXPECT_EQ(Add->getOperand(0), CLI->getIndVar());
+  auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1));
+  ASSERT_NE(LoadedLowerBound, nullptr);
+  EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound);
+
+  // Check that the trip count is updated to account for the lower and upper
+  // bounds return by the OpenMP runtime call.
+  auto *AddOne = dyn_cast<Instruction>(CLI->getTripCount());
+  ASSERT_NE(AddOne, nullptr);
+  ASSERT_TRUE(AddOne->isBinaryOp());
+  auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1));
+  ASSERT_NE(One, nullptr);
+  EXPECT_EQ(One->getValue(), 1);
+  auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0));
+  ASSERT_NE(Difference, nullptr);
+  ASSERT_TRUE(Difference->isBinaryOp());
+  EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound);
+  auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0));
+  ASSERT_NE(LoadedUpperBound, nullptr);
+  EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound);
+
+  // The original loop iterator should only be used in the condition, in the
+  // increment and in the statement that adds the lower bound to it.
+  Value *IV = CLI->getIndVar();
+  EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
+}
+
 TEST_F(OpenMPIRBuilderTest, MasterDirective) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);


        


More information about the llvm-branch-commits mailing list