[clang] 19756ef - [OpenMP][IRBuilder] Support allocas in nested parallel regions
Johannes Doerfert via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 30 08:21:33 PDT 2020
Author: Johannes Doerfert
Date: 2020-07-30T10:19:39-05:00
New Revision: 19756ef53a498b7aa1fbac9e3a7cd3aa8e110fad
URL: https://github.com/llvm/llvm-project/commit/19756ef53a498b7aa1fbac9e3a7cd3aa8e110fad
DIFF: https://github.com/llvm/llvm-project/commit/19756ef53a498b7aa1fbac9e3a7cd3aa8e110fad.diff
LOG: [OpenMP][IRBuilder] Support allocas in nested parallel regions
We need to keep track of the alloca insertion point (which we already
communicate via the callback to the user) as we place allocas as well.
Reviewed By: fghanim, SouraVX
Differential Revision: https://reviews.llvm.org/D82470
Added:
Modified:
clang/lib/CodeGen/CGStmtOpenMP.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 0ee1133ebaa1..df1cc1666de4 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1707,9 +1707,11 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
- Builder.restoreIP(OMPBuilder.CreateParallel(Builder, BodyGenCB, PrivCB,
- FiniCB, IfCond, NumThreads,
- ProcBind, S.hasCancel()));
+ llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+ AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
+ Builder.restoreIP(
+ OMPBuilder.CreateParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+ IfCond, NumThreads, ProcBind, S.hasCancel()));
return;
}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 95eed59f1b3d..f813a730342e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -156,6 +156,7 @@ class OpenMPIRBuilder {
/// Generator for '#omp parallel'
///
/// \param Loc The insert and source location description.
+ /// \param AllocaIP The insertion points to be used for alloca instructions.
/// \param BodyGenCB Callback that will generate the region code.
/// \param PrivCB Callback to copy a given variable (think copy constructor).
/// \param FiniCB Callback to finalize variable copies.
@@ -166,10 +167,11 @@ class OpenMPIRBuilder {
///
/// \returns The insertion position *after* the parallel.
IRBuilder<>::InsertPoint
- CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
- PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB,
- Value *IfCondition, Value *NumThreads,
- omp::ProcBindKind ProcBind, bool IsCancellable);
+ CreateParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
+ BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
+ FinalizeCallbackTy FiniCB, Value *IfCondition,
+ Value *NumThreads, omp::ProcBindKind ProcBind,
+ bool IsCancellable);
/// Generator for '#omp flush'
///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 9468a3aa3c8d..a5fe4ec87c46 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -394,9 +394,10 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(
}
IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
- const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
- PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
- Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
+ const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
+ BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
+ FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
+ omp::ProcBindKind ProcBind, bool IsCancellable) {
if (!updateToLocation(Loc))
return Loc.IP;
@@ -429,7 +430,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
// we want to delete at the end.
SmallVector<Instruction *, 4> ToBeDeleted;
- Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
+ // Change the location to the outer alloca insertion point to create and
+ // initialize the allocas we pass into the parallel region.
+ Builder.restoreIP(OuterAllocaIP);
AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
@@ -481,9 +484,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
// Generate the privatization allocas in the block that will become the entry
// of the outlined function.
- InsertPointTy AllocaIP(PRegEntryBB,
- PRegEntryBB->getTerminator()->getIterator());
- Builder.restoreIP(AllocaIP);
+ Builder.SetInsertPoint(PRegEntryBB->getTerminator());
+ InsertPointTy InnerAllocaIP = Builder.saveIP();
+
AllocaInst *PrivTIDAddr =
Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
@@ -512,7 +515,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
// Let the caller create the body.
assert(BodyGenCB && "Expected body generation callback!");
InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
- BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
+ BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
@@ -671,7 +674,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
ReplacementValue = PrivTID;
} else {
Builder.restoreIP(
- PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
+ PrivCB(InnerAllocaIP, Builder.saveIP(), V, ReplacementValue));
assert(ReplacementValue &&
"Expected copy/create callback to set replacement value!");
if (ReplacementValue == &V)
@@ -686,6 +689,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
PrivHelper(*Input);
}
+ LLVM_DEBUG({
+ for (Value *Output : Outputs)
+ LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
+ });
assert(Outputs.empty() &&
"OpenMP outlining should not produce live-out values!");
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 2ba9d85a0f9e..edd2c8f5dd88 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6,13 +6,14 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "gtest/gtest.h"
@@ -360,9 +361,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+ IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+ F->getEntryBlock().getFirstInsertionPt());
IRBuilder<>::InsertPoint AfterIP =
- OMPBuilder.CreateParallel(Loc, BodyGenCB, PrivCB, FiniCB, nullptr,
- nullptr, OMP_PROC_BIND_default, false);
+ OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+ nullptr, nullptr, OMP_PROC_BIND_default, false);
EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 1U);
EXPECT_EQ(NumFinalizationPoints, 1U);
@@ -400,6 +403,205 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
}
+TEST_F(OpenMPIRBuilderTest, ParallelNested) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func");
+ IRBuilder<> Builder(BB);
+
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+ unsigned NumInnerBodiesGenerated = 0;
+ unsigned NumOuterBodiesGenerated = 0;
+ unsigned NumFinalizationPoints = 0;
+
+ auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationIP) {
+ ++NumInnerBodiesGenerated;
+ };
+
+ auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+ // Trivial copy (=firstprivate).
+ Builder.restoreIP(AllocaIP);
+ Type *VTy = VPtr.getType()->getPointerElementType();
+ Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
+ ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+ Builder.restoreIP(CodeGenIP);
+ Builder.CreateStore(V, ReplacementValue);
+ return CodeGenIP;
+ };
+
+ auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+
+ auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationIP) {
+ ++NumOuterBodiesGenerated;
+ Builder.restoreIP(CodeGenIP);
+ BasicBlock *CGBB = CodeGenIP.getBlock();
+ BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
+ CGBB->getTerminator()->eraseFromParent();
+ ;
+
+ IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
+ InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
+ FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+ Builder.restoreIP(AfterIP);
+ Builder.CreateBr(NewBB);
+ };
+
+ IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+ F->getEntryBlock().getFirstInsertionPt());
+ IRBuilder<>::InsertPoint AfterIP =
+ OMPBuilder.CreateParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
+ nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+ EXPECT_EQ(NumInnerBodiesGenerated, 1U);
+ EXPECT_EQ(NumOuterBodiesGenerated, 1U);
+ EXPECT_EQ(NumFinalizationPoints, 2U);
+
+ Builder.restoreIP(AfterIP);
+ Builder.CreateRetVoid();
+
+ OMPBuilder.finalize();
+
+ EXPECT_EQ(M->size(), 5U);
+ for (Function &OutlinedFn : *M) {
+ if (F == &OutlinedFn || OutlinedFn.isDeclaration())
+ continue;
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+ EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
+ EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
+ EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
+ EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
+
+ EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
+ EXPECT_EQ(OutlinedFn.arg_size(), 2U);
+
+ EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
+ User *Usr = OutlinedFn.user_back();
+ ASSERT_TRUE(isa<ConstantExpr>(Usr));
+ CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
+ ASSERT_NE(ForkCI, nullptr);
+
+ EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
+ EXPECT_EQ(ForkCI->getNumArgOperands(), 3U);
+ EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
+ EXPECT_EQ(ForkCI->getArgOperand(1),
+ ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
+ EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
+ }
+}
+
+TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func");
+ IRBuilder<> Builder(BB);
+
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+ unsigned NumInnerBodiesGenerated = 0;
+ unsigned NumOuterBodiesGenerated = 0;
+ unsigned NumFinalizationPoints = 0;
+
+ auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationIP) {
+ ++NumInnerBodiesGenerated;
+ };
+
+ auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+ // Trivial copy (=firstprivate).
+ Builder.restoreIP(AllocaIP);
+ Type *VTy = VPtr.getType()->getPointerElementType();
+ Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
+ ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+ Builder.restoreIP(CodeGenIP);
+ Builder.CreateStore(V, ReplacementValue);
+ return CodeGenIP;
+ };
+
+ auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+
+ auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationIP) {
+ ++NumOuterBodiesGenerated;
+ Builder.restoreIP(CodeGenIP);
+ BasicBlock *CGBB = CodeGenIP.getBlock();
+ BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint());
+ BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt());
+ CGBB->getTerminator()->eraseFromParent();
+ ;
+ NewBB1->getTerminator()->eraseFromParent();
+ ;
+
+ IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.CreateParallel(
+ InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
+ FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+ Builder.restoreIP(AfterIP1);
+ Builder.CreateBr(NewBB1);
+
+ IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.CreateParallel(
+ InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB,
+ FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+ Builder.restoreIP(AfterIP2);
+ Builder.CreateBr(NewBB2);
+ };
+
+ IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+ F->getEntryBlock().getFirstInsertionPt());
+ IRBuilder<>::InsertPoint AfterIP =
+ OMPBuilder.CreateParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
+ nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+ EXPECT_EQ(NumInnerBodiesGenerated, 2U);
+ EXPECT_EQ(NumOuterBodiesGenerated, 1U);
+ EXPECT_EQ(NumFinalizationPoints, 3U);
+
+ Builder.restoreIP(AfterIP);
+ Builder.CreateRetVoid();
+
+ OMPBuilder.finalize();
+
+ EXPECT_EQ(M->size(), 6U);
+ for (Function &OutlinedFn : *M) {
+ if (F == &OutlinedFn || OutlinedFn.isDeclaration())
+ continue;
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+ EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
+ EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
+ EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
+ EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
+
+ EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
+ EXPECT_EQ(OutlinedFn.arg_size(), 2U);
+
+ unsigned NumAllocas = 0;
+ for (Instruction &I : instructions(OutlinedFn))
+ NumAllocas += isa<AllocaInst>(I);
+ EXPECT_EQ(NumAllocas, 1U);
+
+ EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
+ User *Usr = OutlinedFn.user_back();
+ ASSERT_TRUE(isa<ConstantExpr>(Usr));
+ CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
+ ASSERT_NE(ForkCI, nullptr);
+
+ EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
+ EXPECT_EQ(ForkCI->getNumArgOperands(), 3U);
+ EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
+ EXPECT_EQ(ForkCI->getArgOperand(1),
+ ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
+ EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
+ }
+}
+
TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
@@ -460,9 +662,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
// No destructors.
};
- IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
- Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
- nullptr, OMP_PROC_BIND_default, false);
+ IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+ F->getEntryBlock().getFirstInsertionPt());
+ IRBuilder<>::InsertPoint AfterIP =
+ OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+ Builder.CreateIsNotNull(F->arg_begin()),
+ nullptr, OMP_PROC_BIND_default, false);
EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 1U);
@@ -585,9 +790,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
{Builder.getInt32(NumFinalizationPoints)});
};
- IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
- Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
- nullptr, OMP_PROC_BIND_default, true);
+ IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+ F->getEntryBlock().getFirstInsertionPt());
+ IRBuilder<>::InsertPoint AfterIP =
+ OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+ Builder.CreateIsNotNull(F->arg_begin()),
+ nullptr, OMP_PROC_BIND_default, true);
EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 0U);
More information about the cfe-commits
mailing list