[llvm] e4add97 - [OpenMP][IR-Builder] Introduce "pragma omp parallel" code generation

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 25 16:03:02 PST 2019


Author: Johannes Doerfert
Date: 2019-12-25T18:02:23-06:00
New Revision: e4add9727b43f413a93993add5d97695b8c3b2be

URL: https://github.com/llvm/llvm-project/commit/e4add9727b43f413a93993add5d97695b8c3b2be
DIFF: https://github.com/llvm/llvm-project/commit/e4add9727b43f413a93993add5d97695b8c3b2be.diff

LOG: [OpenMP][IR-Builder] Introduce "pragma omp parallel" code generation

This patch combines the `emitParallel` logic prototyped in D61953 with
the OpenMPIRBuilder (D69785) and introduces `CreateParallel`.

Reviewed By: fghanim

Differential Revision: https://reviews.llvm.org/D70109

Added: 
    

Modified: 
    llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/unittests/Frontend/CMakeLists.txt
    llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index 9fc949aac790..2f9a5ee71e67 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -49,6 +49,16 @@ enum class RuntimeFunction {
 #define OMP_RTL(Enum, ...) constexpr auto Enum = omp::RuntimeFunction::Enum;
 #include "llvm/Frontend/OpenMP/OMPKinds.def"
 
+/// IDs for the 
diff erent proc bind kinds.
+enum class ProcBindKind {
+#define OMP_PROC_BIND_KIND(Enum, Str, Value) Enum = Value,
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+};
+
+#define OMP_PROC_BIND_KIND(Enum, ...)                                          \
+  constexpr auto Enum = omp::ProcBindKind::Enum;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
 /// IDs for all omp runtime library ident_t flag encodings (see
 /// their defintion in openmp/runtime/src/kmp.h).
 enum class IdentFlag {
@@ -67,8 +77,8 @@ Directive getOpenMPDirectiveKind(StringRef Str);
 StringRef getOpenMPDirectiveName(Directive D);
 
 /// Forward declarations for LLVM-IR types (simple, function and structure) are
-/// generated below. Their names are defined and used in OpenMPKinds.def. Here
-/// we provide the forward declarations, the initializeTypes function will
+/// generated below. Their names are defined and used in OpenMP/OMPKinds.def.
+/// Here we provide the forward declarations, the initializeTypes function will
 /// provide the values.
 ///
 ///{
@@ -83,10 +93,10 @@ namespace types {
   extern PointerType *VarName##Ptr;
 #include "llvm/Frontend/OpenMP/OMPKinds.def"
 
-/// Helper to initialize all types defined in OpenMPKinds.def.
+/// Helper to initialize all types defined in OpenMP/OMPKinds.def.
 void initializeTypes(Module &M);
 
-/// Helper to uninitialize all types defined in OpenMPKinds.def.
+/// Helper to uninitialize all types defined in OpenMP/OMPKinds.def.
 void uninitializeTypes();
 
 } // namespace types

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 933b0a2ceece..880add6b9bba 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -75,6 +75,40 @@ class OpenMPIRBuilder {
   /// NOTE: Temporary solution until Clang CG is gone.
   void popFinalizationCB() { FinalizationStack.pop_back(); }
 
+  /// Callback type for body (=inner region) code generation
+  ///
+  /// The callback takes code locations as arguments, each describing a
+  /// location at which code might need to be generated or a location that is
+  /// the target of control transfer.
+  ///
+  /// \param AllocaIP is the insertion point at which new alloca instructions
+  ///                 should be placed.
+  /// \param CodeGenIP is the insertion point at which the body code should be
+  ///                  placed.
+  /// \param ContinuationBB is the basic block target to leave the body.
+  ///
+  /// Note that all blocks pointed to by the arguments have terminators.
+  using BodyGenCallbackTy = function_ref<void(
+      InsertPointTy /* AllocaIP */, InsertPointTy /* CodeGenIP */,
+      BasicBlock & /* ContinuationBB */)>;
+
+  /// Callback type for variable privatization (think copy & default
+  /// constructor).
+  ///
+  /// \param AllocaIP is the insertion point at which new alloca instructions
+  ///                 should be placed.
+  /// \param CodeGenIP is the insertion point at which the privatization code
+  ///                  should be placed.
+  /// \param Val The value beeing copied/created.
+  /// \param ReplVal The replacement value, thus a copy or new created version
+  ///                of \p Val.
+  ///
+  /// \returns The new insertion point where code generation continues and
+  ///          \p ReplVal the replacement of \p Val.
+  using PrivatizeCallbackTy = function_ref<InsertPointTy(
+      InsertPointTy /* AllocaIP */, InsertPointTy /* CodeGenIP */,
+      Value & /* Val */, Value *& /* ReplVal */)>;
+
   /// Description of a LLVM-IR insertion point (IP) and a debug/source location
   /// (filename, line, column, ...).
   struct LocationDescription {
@@ -105,6 +139,24 @@ class OpenMPIRBuilder {
                               bool ForceSimpleCall = false,
                               bool CheckCancelFlag = true);
 
+  /// Generator for '#omp parallel'
+  ///
+  /// \param Loc The insert and source location description.
+  /// \param BodyGenCB Callback that will generate the region code.
+  /// \param PrivCB Callback to copy a given variable (think copy constructor).
+  /// \param FiniCB Callback to finalize variable copies.
+  /// \param IfCondition The evaluated 'if' clause expression, if any.
+  /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
+  /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
+  /// \param IsCancellable Flag to indicate a cancellable parallel region.
+  ///
+  /// \returns The insertion position *after* the parallel.
+  IRBuilder<>::InsertPoint
+  CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
+                 PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB,
+                 Value *IfCondition, Value *NumThreads,
+                 omp::ProcBindKind ProcBind, bool IsCancellable);
+
   ///}
 
 private:

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 874d3f91bf31..cf54da528cbc 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -167,6 +167,11 @@ __OMP_RTL(__kmpc_barrier, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_cancel_barrier, false, Int32, IdentPtr, Int32)
 __OMP_RTL(__kmpc_global_thread_num, false, Int32, IdentPtr)
 __OMP_RTL(__kmpc_fork_call, true, Void, IdentPtr, Int32, ParallelTaskPtr)
+__OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */Int32)
+__OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */Int32)
+__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
+
 __OMP_RTL(omp_get_thread_num, false, Int32, )
 
 #undef __OMP_RTL
@@ -234,3 +239,26 @@ __OMP_IDENT_FLAG(BARRIER_IMPL_WORKSHARE, 0x01C0)
 #undef OMP_IDENT_FLAG
 
 ///}
+
+
+/// Proc bind kinds
+///
+///{
+
+#ifndef OMP_PROC_BIND_KIND
+#define OMP_PROC_BIND_KIND(Enum, Str, Value)
+#endif
+
+#define __OMP_PROC_BIND_KIND(Name, Value)                                      \
+  OMP_PROC_BIND_KIND(OMP_PB_##Name, #Name, Value)
+
+__OMP_PROC_BIND_KIND(master, 2)
+__OMP_PROC_BIND_KIND(close, 3)
+__OMP_PROC_BIND_KIND(spread, 4)
+__OMP_PROC_BIND_KIND(default, 6)
+__OMP_PROC_BIND_KIND(unknown, 7)
+
+#undef __OMP_PROC_BIND_KIND
+#undef OMP_PROC_BIND_KIND
+
+///}

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 4c173e032e84..e59f964a3c0c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -16,10 +16,13 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
 
 #include <sstream>
 
@@ -216,8 +219,17 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
   if (UseCancelBarrier && CheckCancelFlag) {
     // For a cancel barrier we create two new blocks.
     BasicBlock *BB = Builder.GetInsertBlock();
-    BasicBlock *NonCancellationBlock = BasicBlock::Create(
-        BB->getContext(), BB->getName() + ".cont", BB->getParent());
+    BasicBlock *NonCancellationBlock;
+    if (Builder.GetInsertPoint() == BB->end()) {
+      // TODO: This branch will not be needed once we moved to the
+      // OpenMPIRBuilder codegen completely.
+      NonCancellationBlock = BasicBlock::Create(
+          BB->getContext(), BB->getName() + ".cont", BB->getParent());
+    } else {
+      NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
+      BB->getTerminator()->eraseFromParent();
+      Builder.SetInsertPoint(BB);
+    }
     BasicBlock *CancellationBlock = BasicBlock::Create(
         BB->getContext(), BB->getName() + ".cncl", BB->getParent());
 
@@ -233,8 +245,310 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
     FI.FiniCB(Builder.saveIP());
 
     // The continuation block is where code generation continues.
-    Builder.SetInsertPoint(NonCancellationBlock);
+    Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
   }
 
   return Builder.saveIP();
 }
+
+IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
+    const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
+    PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
+    Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
+  if (!updateToLocation(Loc))
+    return Loc.IP;
+
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
+  Value *ThreadID = getOrCreateThreadID(Ident);
+
+  if (NumThreads) {
+    // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
+    Value *Args[] = {
+        Ident, ThreadID,
+        Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
+    Builder.CreateCall(
+        getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args);
+  }
+
+  if (ProcBind != OMP_PB_default) {
+    // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
+    Value *Args[] = {
+        Ident, ThreadID,
+        ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
+    Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind),
+                       Args);
+  }
+
+  BasicBlock *InsertBB = Builder.GetInsertBlock();
+  Function *OuterFn = InsertBB->getParent();
+
+  // Vector to remember instructions we used only during the modeling but which
+  // we want to delete at the end.
+  SmallVector<Instruction *, 4> ToBeDeleted;
+
+  Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
+  AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
+  AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
+
+  // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
+  // program, otherwise we only need them for modeling purposes to get the
+  // associated arguments in the outlined function. In the former case,
+  // initialize the allocas properly, in the latter case, delete them later.
+  if (IfCondition) {
+    Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
+    Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
+  } else {
+    ToBeDeleted.push_back(TIDAddr);
+    ToBeDeleted.push_back(ZeroAddr);
+  }
+
+  // Create an artificial insertion point that will also ensure the blocks we
+  // are about to split are not degenerated.
+  auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
+
+  Instruction *ThenTI = UI, *ElseTI = nullptr;
+  if (IfCondition)
+    SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
+
+  BasicBlock *ThenBB = ThenTI->getParent();
+  BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
+  BasicBlock *PRegBodyBB =
+      PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
+  BasicBlock *PRegPreFiniBB =
+      PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
+  BasicBlock *PRegExitBB =
+      PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
+
+  auto FiniCBWrapper = [&](InsertPointTy IP) {
+    // Hide "open-ended" blocks from the given FiniCB by setting the right jump
+    // target to the region exit block.
+    if (IP.getBlock()->end() == IP.getPoint()) {
+      IRBuilder<>::InsertPointGuard IPG(Builder);
+      Builder.restoreIP(IP);
+      Instruction *I = Builder.CreateBr(PRegExitBB);
+      IP = InsertPointTy(I->getParent(), I->getIterator());
+    }
+    assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
+           IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
+           "Unexpected insertion point for finalization call!");
+    return FiniCB(IP);
+  };
+
+  FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
+
+  // Generate the privatization allocas in the block that will become the entry
+  // of the outlined function.
+  InsertPointTy AllocaIP(PRegEntryBB,
+                         PRegEntryBB->getTerminator()->getIterator());
+  Builder.restoreIP(AllocaIP);
+  AllocaInst *PrivTIDAddr =
+      Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
+  Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
+
+  // Add some fake uses for OpenMP provided arguments.
+  ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
+  ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use"));
+
+  // ThenBB
+  //   |
+  //   V
+  // PRegionEntryBB         <- Privatization allocas are placed here.
+  //   |
+  //   V
+  // PRegionBodyBB          <- BodeGen is invoked here.
+  //   |
+  //   V
+  // PRegPreFiniBB          <- The block we will start finalization from.
+  //   |
+  //   V
+  // PRegionExitBB          <- A common exit to simplify block collection.
+  //
+
+  LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
+
+  // Let the caller create the body.
+  assert(BodyGenCB && "Expected body generation callback!");
+  InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
+  BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
+
+  LLVM_DEBUG(dbgs() << "After  body codegen: " << *UI->getFunction() << "\n");
+
+  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
+  SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
+  ParallelRegionBlockSet.insert(PRegEntryBB);
+  ParallelRegionBlockSet.insert(PRegExitBB);
+
+  // Collect all blocks in-between PRegEntryBB and PRegExitBB.
+  Worklist.push_back(PRegEntryBB);
+  while (!Worklist.empty()) {
+    BasicBlock *BB = Worklist.pop_back_val();
+    ParallelRegionBlocks.push_back(BB);
+    for (BasicBlock *SuccBB : successors(BB))
+      if (ParallelRegionBlockSet.insert(SuccBB).second)
+        Worklist.push_back(SuccBB);
+  }
+
+  CodeExtractorAnalysisCache CEAC(*OuterFn);
+  CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
+                          /* AggregateArgs */ false,
+                          /* BlockFrequencyInfo */ nullptr,
+                          /* BranchProbabilityInfo */ nullptr,
+                          /* AssumptionCache */ nullptr,
+                          /* AllowVarArgs */ true,
+                          /* AllowAlloca */ true,
+                          /* Suffix */ ".omp_par");
+
+  // Find inputs to, outputs from the code region.
+  BasicBlock *CommonExit = nullptr;
+  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
+  Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
+  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
+
+  LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
+
+  FunctionCallee TIDRTLFn =
+      getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
+
+  auto PrivHelper = [&](Value &V) {
+    if (&V == TIDAddr || &V == ZeroAddr)
+      return;
+
+    SmallVector<Use *, 8> Uses;
+    for (Use &U : V.uses())
+      if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
+        if (ParallelRegionBlockSet.count(UserI->getParent()))
+          Uses.push_back(&U);
+
+    Value *ReplacementValue = nullptr;
+    CallInst *CI = dyn_cast<CallInst>(&V);
+    if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
+      ReplacementValue = PrivTID;
+    } else {
+      Builder.restoreIP(
+          PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
+      assert(ReplacementValue &&
+             "Expected copy/create callback to set replacement value!");
+      if (ReplacementValue == &V)
+        return;
+    }
+
+    for (Use *UPtr : Uses)
+      UPtr->set(ReplacementValue);
+  };
+
+  for (Value *Input : Inputs) {
+    LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
+    PrivHelper(*Input);
+  }
+  for (Value *Output : Outputs) {
+    LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
+    PrivHelper(*Output);
+  }
+
+  LLVM_DEBUG(dbgs() << "After  privatization: " << *UI->getFunction() << "\n");
+  LLVM_DEBUG({
+    for (auto *BB : ParallelRegionBlocks)
+      dbgs() << " PBR: " << BB->getName() << "\n";
+  });
+
+  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
+  LLVM_DEBUG(dbgs() << "After      outlining: " << *UI->getFunction() << "\n");
+  LLVM_DEBUG(dbgs() << "   Outlined function: " << *OutlinedFn << "\n");
+
+  // Remove the artificial entry introduced by the extractor right away, we
+  // made our own entry block after all.
+  {
+    BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
+    assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
+    assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
+    PRegEntryBB->moveBefore(&ArtificialEntry);
+    ArtificialEntry.eraseFromParent();
+  }
+  LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
+  assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
+
+  assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
+  assert(OutlinedFn->arg_size() >= 2 &&
+         "Expected at least tid and bounded tid as arguments");
+  unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
+
+  CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
+  CI->getParent()->setName("omp_parallel");
+  Builder.SetInsertPoint(CI);
+
+  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
+  Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
+                           Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
+
+  SmallVector<Value *, 16> RealArgs;
+  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
+  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
+
+  FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
+  Builder.CreateCall(RTLFn, RealArgs);
+
+  LLVM_DEBUG(dbgs() << "With fork_call placed: "
+                    << *Builder.GetInsertBlock()->getParent() << "\n");
+
+  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
+  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
+  UI->eraseFromParent();
+
+  // Initialize the local TID stack location with the argument value.
+  Builder.SetInsertPoint(PrivTID);
+  Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
+  Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
+
+  // If no "if" clause was present we do not need the call created during
+  // outlining, otherwise we reuse it in the serialized parallel region.
+  if (!ElseTI) {
+    CI->eraseFromParent();
+  } else {
+
+    // If an "if" clause was present we are now generating the serialized
+    // version into the "else" branch.
+    Builder.SetInsertPoint(ElseTI);
+
+    // Build calls __kmpc_serialized_parallel(&Ident, GTid);
+    Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
+    Builder.CreateCall(
+        getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
+        SerializedParallelCallArgs);
+
+    // OutlinedFn(&GTid, &zero, CapturedStruct);
+    CI->removeFromParent();
+    Builder.Insert(CI);
+
+    // __kmpc_end_serialized_parallel(&Ident, GTid);
+    Value *EndArgs[] = {Ident, ThreadID};
+    Builder.CreateCall(
+        getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
+        EndArgs);
+
+    LLVM_DEBUG(dbgs() << "With serialized parallel region: "
+                      << *Builder.GetInsertBlock()->getParent() << "\n");
+  }
+
+  // Adjust the finalization stack, verify the adjustment, and call the
+  // finalize function a last time to finalize values between the pre-fini block
+  // and the exit block if we left the parallel "the normal way".
+  auto FiniInfo = FinalizationStack.pop_back_val();
+  (void)FiniInfo;
+  assert(FiniInfo.DK == OMPD_parallel &&
+         "Unexpected finalization stack state!");
+
+  Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
+  assert(PreFiniTI->getNumSuccessors() == 1 &&
+         PreFiniTI->getSuccessor(0)->size() == 1 &&
+         isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
+         "Unexpected CFG structure!");
+
+  InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
+  FiniCB(PreFiniIP);
+
+  for (Instruction *I : ToBeDeleted)
+    I->eraseFromParent();
+
+  return AfterIP;
+}

diff  --git a/llvm/unittests/Frontend/CMakeLists.txt b/llvm/unittests/Frontend/CMakeLists.txt
index f63a3f85dd61..530c188ca8e5 100644
--- a/llvm/unittests/Frontend/CMakeLists.txt
+++ b/llvm/unittests/Frontend/CMakeLists.txt
@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
   FrontendOpenMP
   Support
   Passes
+  TransformUtils
   )
 
 add_llvm_unittest(LLVMFrontendTests

diff  --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 39dd83232d9a..13e93446d45f 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -14,6 +14,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -99,20 +100,18 @@ TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
 }
 
 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
   OMPBuilder.initialize();
 
   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
   new UnreachableInst(Ctx, CBB);
-  auto FiniCB = [CBB](llvm::OpenMPIRBuilder::InsertPointTy IP) {
-    assert(IP.getBlock()->end() == IP.getPoint() &&
-           "Clang CG should cause non-terminated block!");
+  auto FiniCB = [&](InsertPointTy IP) {
+    ASSERT_NE(IP.getBlock(), nullptr);
+    ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
     BranchInst::Create(CBB, IP.getBlock());
   };
-  // Emulate an outer parallel.
-  llvm::OpenMPIRBuilder::FinalizationInfo FI(
-      {FiniCB, OMPD_parallel, /* HasCancel */ true});
-  OMPBuilder.pushFinalizationCB(std::move(FI));
+  OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
 
   IRBuilder<> Builder(BB);
 
@@ -141,6 +140,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
   Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
   EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
   EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
+  EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
             1U);
   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
@@ -188,4 +188,309 @@ TEST_F(OpenMPIRBuilderTest, DbgLoc) {
     return;
   EXPECT_EQ(SrcSrc->getAsCString(), ";test.dbg;foo;3;7;;");
 }
+
+TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  AllocaInst *PrivAI = nullptr;
+
+  unsigned NumBodiesGenerated = 0;
+  unsigned NumPrivatizedVars = 0;
+  unsigned NumFinalizationPoints = 0;
+
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                       BasicBlock &ContinuationIP) {
+    ++NumBodiesGenerated;
+
+    Builder.restoreIP(AllocaIP);
+    PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
+    Builder.CreateStore(F->arg_begin(), PrivAI);
+
+    Builder.restoreIP(CodeGenIP);
+    Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use");
+    Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
+    Instruction *ThenTerm, *ElseTerm;
+    SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
+                                  &ThenTerm, &ElseTerm);
+
+    Builder.SetInsertPoint(ThenTerm);
+    Builder.CreateBr(&ContinuationIP);
+    ThenTerm->eraseFromParent();
+  };
+
+  auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                    Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+    ++NumPrivatizedVars;
+
+    if (!isa<AllocaInst>(VPtr)) {
+      EXPECT_EQ(&VPtr, F->arg_begin());
+      ReplacementValue = &VPtr;
+      return CodeGenIP;
+    }
+
+    // Trivial copy (=firstprivate).
+    Builder.restoreIP(AllocaIP);
+    Type *VTy = VPtr.getType()->getPointerElementType();
+    Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
+    ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+    Builder.restoreIP(CodeGenIP);
+    Builder.CreateStore(V, ReplacementValue);
+    return CodeGenIP;
+  };
+
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+
+  IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
+      Loc, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PB_default, false);
+
+  EXPECT_EQ(NumBodiesGenerated, 1U);
+  EXPECT_EQ(NumPrivatizedVars, 1U);
+  EXPECT_EQ(NumFinalizationPoints, 1U);
+
+  Builder.restoreIP(AfterIP);
+  Builder.CreateRetVoid();
+
+  EXPECT_NE(PrivAI, nullptr);
+  Function *OutlinedFn = PrivAI->getFunction();
+  EXPECT_NE(F, OutlinedFn);
+  EXPECT_FALSE(verifyModule(*M));
+
+  EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
+  EXPECT_EQ(OutlinedFn->arg_size(), 3U);
+
+  EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
+  EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
+  User *Usr = OutlinedFn->user_back();
+  ASSERT_TRUE(isa<ConstantExpr>(Usr));
+  CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
+  ASSERT_NE(ForkCI, nullptr);
+
+  EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
+  EXPECT_EQ(ForkCI->getNumArgOperands(), 4U);
+  EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
+  EXPECT_EQ(ForkCI->getArgOperand(1),
+            ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
+  EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
+  EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
+}
+
+TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  AllocaInst *PrivAI = nullptr;
+
+  unsigned NumBodiesGenerated = 0;
+  unsigned NumPrivatizedVars = 0;
+  unsigned NumFinalizationPoints = 0;
+
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                       BasicBlock &ContinuationIP) {
+    ++NumBodiesGenerated;
+
+    Builder.restoreIP(AllocaIP);
+    PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
+    Builder.CreateStore(F->arg_begin(), PrivAI);
+
+    Builder.restoreIP(CodeGenIP);
+    Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use");
+    Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
+    Instruction *ThenTerm, *ElseTerm;
+    SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
+                                  &ThenTerm, &ElseTerm);
+
+    Builder.SetInsertPoint(ThenTerm);
+    Builder.CreateBr(&ContinuationIP);
+    ThenTerm->eraseFromParent();
+  };
+
+  auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                    Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+    ++NumPrivatizedVars;
+
+    if (!isa<AllocaInst>(VPtr)) {
+      EXPECT_EQ(&VPtr, F->arg_begin());
+      ReplacementValue = &VPtr;
+      return CodeGenIP;
+    }
+
+    // Trivial copy (=firstprivate).
+    Builder.restoreIP(AllocaIP);
+    Type *VTy = VPtr.getType()->getPointerElementType();
+    Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
+    ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+    Builder.restoreIP(CodeGenIP);
+    Builder.CreateStore(V, ReplacementValue);
+    return CodeGenIP;
+  };
+
+  auto FiniCB = [&](InsertPointTy CodeGenIP) {
+    ++NumFinalizationPoints;
+    // No destructors.
+  };
+
+  IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
+      Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
+      nullptr, OMP_PB_default, false);
+
+  EXPECT_EQ(NumBodiesGenerated, 1U);
+  EXPECT_EQ(NumPrivatizedVars, 1U);
+  EXPECT_EQ(NumFinalizationPoints, 1U);
+
+  Builder.restoreIP(AfterIP);
+  Builder.CreateRetVoid();
+
+  EXPECT_NE(PrivAI, nullptr);
+  Function *OutlinedFn = PrivAI->getFunction();
+  EXPECT_NE(F, OutlinedFn);
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+
+  EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
+  EXPECT_EQ(OutlinedFn->arg_size(), 3U);
+
+  EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
+  ASSERT_EQ(OutlinedFn->getNumUses(), 2U);
+
+  CallInst *DirectCI = nullptr;
+  CallInst *ForkCI = nullptr;
+  for (User *Usr : OutlinedFn->users()) {
+    if (isa<CallInst>(Usr)) {
+      ASSERT_EQ(DirectCI, nullptr);
+      DirectCI = cast<CallInst>(Usr);
+    } else {
+      ASSERT_TRUE(isa<ConstantExpr>(Usr));
+      ASSERT_EQ(Usr->getNumUses(), 1U);
+      ASSERT_TRUE(isa<CallInst>(Usr->user_back()));
+      ForkCI = cast<CallInst>(Usr->user_back());
+    }
+  }
+
+  EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
+  EXPECT_EQ(ForkCI->getNumArgOperands(), 4U);
+  EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
+  EXPECT_EQ(ForkCI->getArgOperand(1),
+            ConstantInt::get(Type::getInt32Ty(Ctx), 1));
+  EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
+
+  EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn);
+  EXPECT_EQ(DirectCI->getNumArgOperands(), 3U);
+  EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(0)));
+  EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(1)));
+  EXPECT_EQ(DirectCI->getArgOperand(2), F->arg_begin());
+}
+
+TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  unsigned NumBodiesGenerated = 0;
+  unsigned NumPrivatizedVars = 0;
+  unsigned NumFinalizationPoints = 0;
+
+  CallInst *CheckedBarrier = nullptr;
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                       BasicBlock &ContinuationIP) {
+    ++NumBodiesGenerated;
+
+    Builder.restoreIP(CodeGenIP);
+
+    // Create three barriers, two cancel barriers but only one checked.
+    Function *CBFn, *BFn;
+
+    Builder.restoreIP(
+        OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel));
+
+    CBFn = M->getFunction("__kmpc_cancel_barrier");
+    BFn = M->getFunction("__kmpc_barrier");
+    ASSERT_NE(CBFn, nullptr);
+    ASSERT_EQ(BFn, nullptr);
+    ASSERT_EQ(CBFn->getNumUses(), 1U);
+    ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
+    ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
+    CheckedBarrier = cast<CallInst>(CBFn->user_back());
+
+    Builder.restoreIP(
+        OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel, true));
+    CBFn = M->getFunction("__kmpc_cancel_barrier");
+    BFn = M->getFunction("__kmpc_barrier");
+    ASSERT_NE(CBFn, nullptr);
+    ASSERT_NE(BFn, nullptr);
+    ASSERT_EQ(CBFn->getNumUses(), 1U);
+    ASSERT_EQ(BFn->getNumUses(), 1U);
+    ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
+    ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
+
+    Builder.restoreIP(OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel,
+                                               false, false));
+    ASSERT_EQ(CBFn->getNumUses(), 2U);
+    ASSERT_EQ(BFn->getNumUses(), 1U);
+    ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
+    ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
+    ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
+  };
+
+  auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V,
+                    Value *&) -> InsertPointTy {
+    ++NumPrivatizedVars;
+    llvm_unreachable("No privatization callback call expected!");
+  };
+
+  FunctionType *FakeDestructorTy =
+      FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
+                        /*isVarArg=*/false);
+  auto *FakeDestructor = Function::Create(
+      FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get());
+
+  auto FiniCB = [&](InsertPointTy IP) {
+    ++NumFinalizationPoints;
+    Builder.restoreIP(IP);
+    Builder.CreateCall(FakeDestructor,
+                       {Builder.getInt32(NumFinalizationPoints)});
+  };
+
+  IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
+      Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
+      nullptr, OMP_PB_default, true);
+
+  EXPECT_EQ(NumBodiesGenerated, 1U);
+  EXPECT_EQ(NumPrivatizedVars, 0U);
+  EXPECT_EQ(NumFinalizationPoints, 2U);
+  EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
+
+  Builder.restoreIP(AfterIP);
+  Builder.CreateRetVoid();
+
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+
+  BasicBlock *ExitBB = nullptr;
+  for (const User *Usr : FakeDestructor->users()) {
+    const CallInst *CI = dyn_cast<CallInst>(Usr);
+    ASSERT_EQ(CI->getCalledFunction(), FakeDestructor);
+    ASSERT_TRUE(isa<BranchInst>(CI->getNextNode()));
+    ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U);
+    if (ExitBB)
+      ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB);
+    else
+      ExitBB = CI->getNextNode()->getSuccessor(0);
+    ASSERT_EQ(ExitBB->size(), 1U);
+    ASSERT_TRUE(isa<ReturnInst>(ExitBB->front()));
+  }
+}
+
 } // namespace


        


More information about the llvm-commits mailing list