[llvm] e4add97 - [OpenMP][IR-Builder] Introduce "pragma omp parallel" code generation
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 25 16:03:02 PST 2019
Author: Johannes Doerfert
Date: 2019-12-25T18:02:23-06:00
New Revision: e4add9727b43f413a93993add5d97695b8c3b2be
URL: https://github.com/llvm/llvm-project/commit/e4add9727b43f413a93993add5d97695b8c3b2be
DIFF: https://github.com/llvm/llvm-project/commit/e4add9727b43f413a93993add5d97695b8c3b2be.diff
LOG: [OpenMP][IR-Builder] Introduce "pragma omp parallel" code generation
This patch combines the `emitParallel` logic prototyped in D61953 with
the OpenMPIRBuilder (D69785) and introduces `CreateParallel`.
Reviewed By: fghanim
Differential Revision: https://reviews.llvm.org/D70109
Added:
Modified:
llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/unittests/Frontend/CMakeLists.txt
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index 9fc949aac790..2f9a5ee71e67 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -49,6 +49,16 @@ enum class RuntimeFunction {
#define OMP_RTL(Enum, ...) constexpr auto Enum = omp::RuntimeFunction::Enum;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
+/// IDs for the
diff erent proc bind kinds.
+enum class ProcBindKind {
+#define OMP_PROC_BIND_KIND(Enum, Str, Value) Enum = Value,
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+};
+
+#define OMP_PROC_BIND_KIND(Enum, ...) \
+ constexpr auto Enum = omp::ProcBindKind::Enum;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
/// IDs for all omp runtime library ident_t flag encodings (see
/// their defintion in openmp/runtime/src/kmp.h).
enum class IdentFlag {
@@ -67,8 +77,8 @@ Directive getOpenMPDirectiveKind(StringRef Str);
StringRef getOpenMPDirectiveName(Directive D);
/// Forward declarations for LLVM-IR types (simple, function and structure) are
-/// generated below. Their names are defined and used in OpenMPKinds.def. Here
-/// we provide the forward declarations, the initializeTypes function will
+/// generated below. Their names are defined and used in OpenMP/OMPKinds.def.
+/// Here we provide the forward declarations, the initializeTypes function will
/// provide the values.
///
///{
@@ -83,10 +93,10 @@ namespace types {
extern PointerType *VarName##Ptr;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
-/// Helper to initialize all types defined in OpenMPKinds.def.
+/// Helper to initialize all types defined in OpenMP/OMPKinds.def.
void initializeTypes(Module &M);
-/// Helper to uninitialize all types defined in OpenMPKinds.def.
+/// Helper to uninitialize all types defined in OpenMP/OMPKinds.def.
void uninitializeTypes();
} // namespace types
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 933b0a2ceece..880add6b9bba 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -75,6 +75,40 @@ class OpenMPIRBuilder {
/// NOTE: Temporary solution until Clang CG is gone.
void popFinalizationCB() { FinalizationStack.pop_back(); }
+ /// Callback type for body (=inner region) code generation
+ ///
+ /// The callback takes code locations as arguments, each describing a
+ /// location at which code might need to be generated or a location that is
+ /// the target of control transfer.
+ ///
+ /// \param AllocaIP is the insertion point at which new alloca instructions
+ /// should be placed.
+ /// \param CodeGenIP is the insertion point at which the body code should be
+ /// placed.
+ /// \param ContinuationBB is the basic block target to leave the body.
+ ///
+ /// Note that all blocks pointed to by the arguments have terminators.
+ using BodyGenCallbackTy = function_ref<void(
+ InsertPointTy /* AllocaIP */, InsertPointTy /* CodeGenIP */,
+ BasicBlock & /* ContinuationBB */)>;
+
+ /// Callback type for variable privatization (think copy & default
+ /// constructor).
+ ///
+ /// \param AllocaIP is the insertion point at which new alloca instructions
+ /// should be placed.
+ /// \param CodeGenIP is the insertion point at which the privatization code
+ /// should be placed.
+ /// \param Val The value beeing copied/created.
+ /// \param ReplVal The replacement value, thus a copy or new created version
+ /// of \p Val.
+ ///
+ /// \returns The new insertion point where code generation continues and
+ /// \p ReplVal the replacement of \p Val.
+ using PrivatizeCallbackTy = function_ref<InsertPointTy(
+ InsertPointTy /* AllocaIP */, InsertPointTy /* CodeGenIP */,
+ Value & /* Val */, Value *& /* ReplVal */)>;
+
/// Description of a LLVM-IR insertion point (IP) and a debug/source location
/// (filename, line, column, ...).
struct LocationDescription {
@@ -105,6 +139,24 @@ class OpenMPIRBuilder {
bool ForceSimpleCall = false,
bool CheckCancelFlag = true);
+ /// Generator for '#omp parallel'
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param BodyGenCB Callback that will generate the region code.
+ /// \param PrivCB Callback to copy a given variable (think copy constructor).
+ /// \param FiniCB Callback to finalize variable copies.
+ /// \param IfCondition The evaluated 'if' clause expression, if any.
+ /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
+ /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
+ /// \param IsCancellable Flag to indicate a cancellable parallel region.
+ ///
+ /// \returns The insertion position *after* the parallel.
+ IRBuilder<>::InsertPoint
+ CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
+ PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB,
+ Value *IfCondition, Value *NumThreads,
+ omp::ProcBindKind ProcBind, bool IsCancellable);
+
///}
private:
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 874d3f91bf31..cf54da528cbc 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -167,6 +167,11 @@ __OMP_RTL(__kmpc_barrier, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_cancel_barrier, false, Int32, IdentPtr, Int32)
__OMP_RTL(__kmpc_global_thread_num, false, Int32, IdentPtr)
__OMP_RTL(__kmpc_fork_call, true, Void, IdentPtr, Int32, ParallelTaskPtr)
+__OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */Int32)
+__OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */Int32)
+__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
+
__OMP_RTL(omp_get_thread_num, false, Int32, )
#undef __OMP_RTL
@@ -234,3 +239,26 @@ __OMP_IDENT_FLAG(BARRIER_IMPL_WORKSHARE, 0x01C0)
#undef OMP_IDENT_FLAG
///}
+
+
+/// Proc bind kinds
+///
+///{
+
+#ifndef OMP_PROC_BIND_KIND
+#define OMP_PROC_BIND_KIND(Enum, Str, Value)
+#endif
+
+#define __OMP_PROC_BIND_KIND(Name, Value) \
+ OMP_PROC_BIND_KIND(OMP_PB_##Name, #Name, Value)
+
+__OMP_PROC_BIND_KIND(master, 2)
+__OMP_PROC_BIND_KIND(close, 3)
+__OMP_PROC_BIND_KIND(spread, 4)
+__OMP_PROC_BIND_KIND(default, 6)
+__OMP_PROC_BIND_KIND(unknown, 7)
+
+#undef __OMP_PROC_BIND_KIND
+#undef OMP_PROC_BIND_KIND
+
+///}
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 4c173e032e84..e59f964a3c0c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -16,10 +16,13 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
#include <sstream>
@@ -216,8 +219,17 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
if (UseCancelBarrier && CheckCancelFlag) {
// For a cancel barrier we create two new blocks.
BasicBlock *BB = Builder.GetInsertBlock();
- BasicBlock *NonCancellationBlock = BasicBlock::Create(
- BB->getContext(), BB->getName() + ".cont", BB->getParent());
+ BasicBlock *NonCancellationBlock;
+ if (Builder.GetInsertPoint() == BB->end()) {
+ // TODO: This branch will not be needed once we moved to the
+ // OpenMPIRBuilder codegen completely.
+ NonCancellationBlock = BasicBlock::Create(
+ BB->getContext(), BB->getName() + ".cont", BB->getParent());
+ } else {
+ NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
+ BB->getTerminator()->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ }
BasicBlock *CancellationBlock = BasicBlock::Create(
BB->getContext(), BB->getName() + ".cncl", BB->getParent());
@@ -233,8 +245,310 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
FI.FiniCB(Builder.saveIP());
// The continuation block is where code generation continues.
- Builder.SetInsertPoint(NonCancellationBlock);
+ Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
}
return Builder.saveIP();
}
+
+IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
+ const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
+ PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
+ Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ Value *ThreadID = getOrCreateThreadID(Ident);
+
+ if (NumThreads) {
+ // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
+ Value *Args[] = {
+ Ident, ThreadID,
+ Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
+ Builder.CreateCall(
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args);
+ }
+
+ if (ProcBind != OMP_PB_default) {
+ // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
+ Value *Args[] = {
+ Ident, ThreadID,
+ ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
+ Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind),
+ Args);
+ }
+
+ BasicBlock *InsertBB = Builder.GetInsertBlock();
+ Function *OuterFn = InsertBB->getParent();
+
+ // Vector to remember instructions we used only during the modeling but which
+ // we want to delete at the end.
+ SmallVector<Instruction *, 4> ToBeDeleted;
+
+ Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
+ AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
+ AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
+
+ // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
+ // program, otherwise we only need them for modeling purposes to get the
+ // associated arguments in the outlined function. In the former case,
+ // initialize the allocas properly, in the latter case, delete them later.
+ if (IfCondition) {
+ Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
+ Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
+ } else {
+ ToBeDeleted.push_back(TIDAddr);
+ ToBeDeleted.push_back(ZeroAddr);
+ }
+
+ // Create an artificial insertion point that will also ensure the blocks we
+ // are about to split are not degenerated.
+ auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
+
+ Instruction *ThenTI = UI, *ElseTI = nullptr;
+ if (IfCondition)
+ SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
+
+ BasicBlock *ThenBB = ThenTI->getParent();
+ BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
+ BasicBlock *PRegBodyBB =
+ PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
+ BasicBlock *PRegPreFiniBB =
+ PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
+ BasicBlock *PRegExitBB =
+ PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
+
+ auto FiniCBWrapper = [&](InsertPointTy IP) {
+ // Hide "open-ended" blocks from the given FiniCB by setting the right jump
+ // target to the region exit block.
+ if (IP.getBlock()->end() == IP.getPoint()) {
+ IRBuilder<>::InsertPointGuard IPG(Builder);
+ Builder.restoreIP(IP);
+ Instruction *I = Builder.CreateBr(PRegExitBB);
+ IP = InsertPointTy(I->getParent(), I->getIterator());
+ }
+ assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
+ IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
+ "Unexpected insertion point for finalization call!");
+ return FiniCB(IP);
+ };
+
+ FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
+
+ // Generate the privatization allocas in the block that will become the entry
+ // of the outlined function.
+ InsertPointTy AllocaIP(PRegEntryBB,
+ PRegEntryBB->getTerminator()->getIterator());
+ Builder.restoreIP(AllocaIP);
+ AllocaInst *PrivTIDAddr =
+ Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
+ Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
+
+ // Add some fake uses for OpenMP provided arguments.
+ ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
+ ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use"));
+
+ // ThenBB
+ // |
+ // V
+ // PRegionEntryBB <- Privatization allocas are placed here.
+ // |
+ // V
+ // PRegionBodyBB <- BodeGen is invoked here.
+ // |
+ // V
+ // PRegPreFiniBB <- The block we will start finalization from.
+ // |
+ // V
+ // PRegionExitBB <- A common exit to simplify block collection.
+ //
+
+ LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
+
+ // Let the caller create the body.
+ assert(BodyGenCB && "Expected body generation callback!");
+ InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
+ BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
+
+ LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n");
+
+ SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
+ SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
+ ParallelRegionBlockSet.insert(PRegEntryBB);
+ ParallelRegionBlockSet.insert(PRegExitBB);
+
+ // Collect all blocks in-between PRegEntryBB and PRegExitBB.
+ Worklist.push_back(PRegEntryBB);
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+ ParallelRegionBlocks.push_back(BB);
+ for (BasicBlock *SuccBB : successors(BB))
+ if (ParallelRegionBlockSet.insert(SuccBB).second)
+ Worklist.push_back(SuccBB);
+ }
+
+ CodeExtractorAnalysisCache CEAC(*OuterFn);
+ CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
+ /* AggregateArgs */ false,
+ /* BlockFrequencyInfo */ nullptr,
+ /* BranchProbabilityInfo */ nullptr,
+ /* AssumptionCache */ nullptr,
+ /* AllowVarArgs */ true,
+ /* AllowAlloca */ true,
+ /* Suffix */ ".omp_par");
+
+ // Find inputs to, outputs from the code region.
+ BasicBlock *CommonExit = nullptr;
+ SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
+ Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
+ Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
+
+ LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
+
+ FunctionCallee TIDRTLFn =
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
+
+ auto PrivHelper = [&](Value &V) {
+ if (&V == TIDAddr || &V == ZeroAddr)
+ return;
+
+ SmallVector<Use *, 8> Uses;
+ for (Use &U : V.uses())
+ if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
+ if (ParallelRegionBlockSet.count(UserI->getParent()))
+ Uses.push_back(&U);
+
+ Value *ReplacementValue = nullptr;
+ CallInst *CI = dyn_cast<CallInst>(&V);
+ if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
+ ReplacementValue = PrivTID;
+ } else {
+ Builder.restoreIP(
+ PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
+ assert(ReplacementValue &&
+ "Expected copy/create callback to set replacement value!");
+ if (ReplacementValue == &V)
+ return;
+ }
+
+ for (Use *UPtr : Uses)
+ UPtr->set(ReplacementValue);
+ };
+
+ for (Value *Input : Inputs) {
+ LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
+ PrivHelper(*Input);
+ }
+ for (Value *Output : Outputs) {
+ LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
+ PrivHelper(*Output);
+ }
+
+ LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n");
+ LLVM_DEBUG({
+ for (auto *BB : ParallelRegionBlocks)
+ dbgs() << " PBR: " << BB->getName() << "\n";
+ });
+
+ Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
+ LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n");
+ LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
+
+ // Remove the artificial entry introduced by the extractor right away, we
+ // made our own entry block after all.
+ {
+ BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
+ assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
+ assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
+ PRegEntryBB->moveBefore(&ArtificialEntry);
+ ArtificialEntry.eraseFromParent();
+ }
+ LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
+ assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
+
+ assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
+ assert(OutlinedFn->arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+ unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
+
+ CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
+ CI->getParent()->setName("omp_parallel");
+ Builder.SetInsertPoint(CI);
+
+ // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
+ Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
+ Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
+
+ SmallVector<Value *, 16> RealArgs;
+ RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
+ RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
+
+ FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
+ Builder.CreateCall(RTLFn, RealArgs);
+
+ LLVM_DEBUG(dbgs() << "With fork_call placed: "
+ << *Builder.GetInsertBlock()->getParent() << "\n");
+
+ InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
+ InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
+ UI->eraseFromParent();
+
+ // Initialize the local TID stack location with the argument value.
+ Builder.SetInsertPoint(PrivTID);
+ Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
+ Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
+
+ // If no "if" clause was present we do not need the call created during
+ // outlining, otherwise we reuse it in the serialized parallel region.
+ if (!ElseTI) {
+ CI->eraseFromParent();
+ } else {
+
+ // If an "if" clause was present we are now generating the serialized
+ // version into the "else" branch.
+ Builder.SetInsertPoint(ElseTI);
+
+ // Build calls __kmpc_serialized_parallel(&Ident, GTid);
+ Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
+ Builder.CreateCall(
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
+ SerializedParallelCallArgs);
+
+ // OutlinedFn(>id, &zero, CapturedStruct);
+ CI->removeFromParent();
+ Builder.Insert(CI);
+
+ // __kmpc_end_serialized_parallel(&Ident, GTid);
+ Value *EndArgs[] = {Ident, ThreadID};
+ Builder.CreateCall(
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
+ EndArgs);
+
+ LLVM_DEBUG(dbgs() << "With serialized parallel region: "
+ << *Builder.GetInsertBlock()->getParent() << "\n");
+ }
+
+ // Adjust the finalization stack, verify the adjustment, and call the
+ // finalize function a last time to finalize values between the pre-fini block
+ // and the exit block if we left the parallel "the normal way".
+ auto FiniInfo = FinalizationStack.pop_back_val();
+ (void)FiniInfo;
+ assert(FiniInfo.DK == OMPD_parallel &&
+ "Unexpected finalization stack state!");
+
+ Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
+ assert(PreFiniTI->getNumSuccessors() == 1 &&
+ PreFiniTI->getSuccessor(0)->size() == 1 &&
+ isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
+ "Unexpected CFG structure!");
+
+ InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
+ FiniCB(PreFiniIP);
+
+ for (Instruction *I : ToBeDeleted)
+ I->eraseFromParent();
+
+ return AfterIP;
+}
diff --git a/llvm/unittests/Frontend/CMakeLists.txt b/llvm/unittests/Frontend/CMakeLists.txt
index f63a3f85dd61..530c188ca8e5 100644
--- a/llvm/unittests/Frontend/CMakeLists.txt
+++ b/llvm/unittests/Frontend/CMakeLists.txt
@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
FrontendOpenMP
Support
Passes
+ TransformUtils
)
add_llvm_unittest(LLVMFrontendTests
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 39dd83232d9a..13e93446d45f 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -14,6 +14,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "gtest/gtest.h"
using namespace llvm;
@@ -99,20 +100,18 @@ TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
}
TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
new UnreachableInst(Ctx, CBB);
- auto FiniCB = [CBB](llvm::OpenMPIRBuilder::InsertPointTy IP) {
- assert(IP.getBlock()->end() == IP.getPoint() &&
- "Clang CG should cause non-terminated block!");
+ auto FiniCB = [&](InsertPointTy IP) {
+ ASSERT_NE(IP.getBlock(), nullptr);
+ ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
BranchInst::Create(CBB, IP.getBlock());
};
- // Emulate an outer parallel.
- llvm::OpenMPIRBuilder::FinalizationInfo FI(
- {FiniCB, OMPD_parallel, /* HasCancel */ true});
- OMPBuilder.pushFinalizationCB(std::move(FI));
+ OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
IRBuilder<> Builder(BB);
@@ -141,6 +140,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
+ EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
1U);
EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
@@ -188,4 +188,309 @@ TEST_F(OpenMPIRBuilderTest, DbgLoc) {
return;
EXPECT_EQ(SrcSrc->getAsCString(), ";test.dbg;foo;3;7;;");
}
+
+TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func");
+ IRBuilder<> Builder(BB);
+
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+ AllocaInst *PrivAI = nullptr;
+
+ unsigned NumBodiesGenerated = 0;
+ unsigned NumPrivatizedVars = 0;
+ unsigned NumFinalizationPoints = 0;
+
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationIP) {
+ ++NumBodiesGenerated;
+
+ Builder.restoreIP(AllocaIP);
+ PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
+ Builder.CreateStore(F->arg_begin(), PrivAI);
+
+ Builder.restoreIP(CodeGenIP);
+ Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use");
+ Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
+ Instruction *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
+ &ThenTerm, &ElseTerm);
+
+ Builder.SetInsertPoint(ThenTerm);
+ Builder.CreateBr(&ContinuationIP);
+ ThenTerm->eraseFromParent();
+ };
+
+ auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+ ++NumPrivatizedVars;
+
+ if (!isa<AllocaInst>(VPtr)) {
+ EXPECT_EQ(&VPtr, F->arg_begin());
+ ReplacementValue = &VPtr;
+ return CodeGenIP;
+ }
+
+ // Trivial copy (=firstprivate).
+ Builder.restoreIP(AllocaIP);
+ Type *VTy = VPtr.getType()->getPointerElementType();
+ Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
+ ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+ Builder.restoreIP(CodeGenIP);
+ Builder.CreateStore(V, ReplacementValue);
+ return CodeGenIP;
+ };
+
+ auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+
+ IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
+ Loc, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PB_default, false);
+
+ EXPECT_EQ(NumBodiesGenerated, 1U);
+ EXPECT_EQ(NumPrivatizedVars, 1U);
+ EXPECT_EQ(NumFinalizationPoints, 1U);
+
+ Builder.restoreIP(AfterIP);
+ Builder.CreateRetVoid();
+
+ EXPECT_NE(PrivAI, nullptr);
+ Function *OutlinedFn = PrivAI->getFunction();
+ EXPECT_NE(F, OutlinedFn);
+ EXPECT_FALSE(verifyModule(*M));
+
+ EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
+ EXPECT_EQ(OutlinedFn->arg_size(), 3U);
+
+ EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
+ EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
+ User *Usr = OutlinedFn->user_back();
+ ASSERT_TRUE(isa<ConstantExpr>(Usr));
+ CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
+ ASSERT_NE(ForkCI, nullptr);
+
+ EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
+ EXPECT_EQ(ForkCI->getNumArgOperands(), 4U);
+ EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
+ EXPECT_EQ(ForkCI->getArgOperand(1),
+ ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
+ EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
+ EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
+}
+
+TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func");
+ IRBuilder<> Builder(BB);
+
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+ AllocaInst *PrivAI = nullptr;
+
+ unsigned NumBodiesGenerated = 0;
+ unsigned NumPrivatizedVars = 0;
+ unsigned NumFinalizationPoints = 0;
+
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationIP) {
+ ++NumBodiesGenerated;
+
+ Builder.restoreIP(AllocaIP);
+ PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
+ Builder.CreateStore(F->arg_begin(), PrivAI);
+
+ Builder.restoreIP(CodeGenIP);
+ Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use");
+ Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
+ Instruction *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
+ &ThenTerm, &ElseTerm);
+
+ Builder.SetInsertPoint(ThenTerm);
+ Builder.CreateBr(&ContinuationIP);
+ ThenTerm->eraseFromParent();
+ };
+
+ auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+ ++NumPrivatizedVars;
+
+ if (!isa<AllocaInst>(VPtr)) {
+ EXPECT_EQ(&VPtr, F->arg_begin());
+ ReplacementValue = &VPtr;
+ return CodeGenIP;
+ }
+
+ // Trivial copy (=firstprivate).
+ Builder.restoreIP(AllocaIP);
+ Type *VTy = VPtr.getType()->getPointerElementType();
+ Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
+ ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+ Builder.restoreIP(CodeGenIP);
+ Builder.CreateStore(V, ReplacementValue);
+ return CodeGenIP;
+ };
+
+ auto FiniCB = [&](InsertPointTy CodeGenIP) {
+ ++NumFinalizationPoints;
+ // No destructors.
+ };
+
+ IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
+ Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
+ nullptr, OMP_PB_default, false);
+
+ EXPECT_EQ(NumBodiesGenerated, 1U);
+ EXPECT_EQ(NumPrivatizedVars, 1U);
+ EXPECT_EQ(NumFinalizationPoints, 1U);
+
+ Builder.restoreIP(AfterIP);
+ Builder.CreateRetVoid();
+
+ EXPECT_NE(PrivAI, nullptr);
+ Function *OutlinedFn = PrivAI->getFunction();
+ EXPECT_NE(F, OutlinedFn);
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+
+ EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
+ EXPECT_EQ(OutlinedFn->arg_size(), 3U);
+
+ EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
+ ASSERT_EQ(OutlinedFn->getNumUses(), 2U);
+
+ CallInst *DirectCI = nullptr;
+ CallInst *ForkCI = nullptr;
+ for (User *Usr : OutlinedFn->users()) {
+ if (isa<CallInst>(Usr)) {
+ ASSERT_EQ(DirectCI, nullptr);
+ DirectCI = cast<CallInst>(Usr);
+ } else {
+ ASSERT_TRUE(isa<ConstantExpr>(Usr));
+ ASSERT_EQ(Usr->getNumUses(), 1U);
+ ASSERT_TRUE(isa<CallInst>(Usr->user_back()));
+ ForkCI = cast<CallInst>(Usr->user_back());
+ }
+ }
+
+ EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
+ EXPECT_EQ(ForkCI->getNumArgOperands(), 4U);
+ EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
+ EXPECT_EQ(ForkCI->getArgOperand(1),
+ ConstantInt::get(Type::getInt32Ty(Ctx), 1));
+ EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
+
+ EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn);
+ EXPECT_EQ(DirectCI->getNumArgOperands(), 3U);
+ EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(0)));
+ EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(1)));
+ EXPECT_EQ(DirectCI->getArgOperand(2), F->arg_begin());
+}
+
+TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func");
+ IRBuilder<> Builder(BB);
+
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+ unsigned NumBodiesGenerated = 0;
+ unsigned NumPrivatizedVars = 0;
+ unsigned NumFinalizationPoints = 0;
+
+ CallInst *CheckedBarrier = nullptr;
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationIP) {
+ ++NumBodiesGenerated;
+
+ Builder.restoreIP(CodeGenIP);
+
+ // Create three barriers, two cancel barriers but only one checked.
+ Function *CBFn, *BFn;
+
+ Builder.restoreIP(
+ OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel));
+
+ CBFn = M->getFunction("__kmpc_cancel_barrier");
+ BFn = M->getFunction("__kmpc_barrier");
+ ASSERT_NE(CBFn, nullptr);
+ ASSERT_EQ(BFn, nullptr);
+ ASSERT_EQ(CBFn->getNumUses(), 1U);
+ ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
+ ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
+ CheckedBarrier = cast<CallInst>(CBFn->user_back());
+
+ Builder.restoreIP(
+ OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel, true));
+ CBFn = M->getFunction("__kmpc_cancel_barrier");
+ BFn = M->getFunction("__kmpc_barrier");
+ ASSERT_NE(CBFn, nullptr);
+ ASSERT_NE(BFn, nullptr);
+ ASSERT_EQ(CBFn->getNumUses(), 1U);
+ ASSERT_EQ(BFn->getNumUses(), 1U);
+ ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
+ ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
+
+ Builder.restoreIP(OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel,
+ false, false));
+ ASSERT_EQ(CBFn->getNumUses(), 2U);
+ ASSERT_EQ(BFn->getNumUses(), 1U);
+ ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
+ ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
+ ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
+ };
+
+ auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V,
+ Value *&) -> InsertPointTy {
+ ++NumPrivatizedVars;
+ llvm_unreachable("No privatization callback call expected!");
+ };
+
+ FunctionType *FakeDestructorTy =
+ FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
+ /*isVarArg=*/false);
+ auto *FakeDestructor = Function::Create(
+ FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get());
+
+ auto FiniCB = [&](InsertPointTy IP) {
+ ++NumFinalizationPoints;
+ Builder.restoreIP(IP);
+ Builder.CreateCall(FakeDestructor,
+ {Builder.getInt32(NumFinalizationPoints)});
+ };
+
+ IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
+ Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
+ nullptr, OMP_PB_default, true);
+
+ EXPECT_EQ(NumBodiesGenerated, 1U);
+ EXPECT_EQ(NumPrivatizedVars, 0U);
+ EXPECT_EQ(NumFinalizationPoints, 2U);
+ EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
+
+ Builder.restoreIP(AfterIP);
+ Builder.CreateRetVoid();
+
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+
+ BasicBlock *ExitBB = nullptr;
+ for (const User *Usr : FakeDestructor->users()) {
+ const CallInst *CI = dyn_cast<CallInst>(Usr);
+ ASSERT_EQ(CI->getCalledFunction(), FakeDestructor);
+ ASSERT_TRUE(isa<BranchInst>(CI->getNextNode()));
+ ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U);
+ if (ExitBB)
+ ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB);
+ else
+ ExitBB = CI->getNextNode()->getSuccessor(0);
+ ASSERT_EQ(ExitBB->size(), 1U);
+ ASSERT_TRUE(isa<ReturnInst>(ExitBB->front()));
+ }
+}
+
} // namespace
More information about the llvm-commits
mailing list