[clang] 19756ef - [OpenMP][IRBuilder] Support allocas in nested parallel regions

Johannes Doerfert via cfe-commits cfe-commits at lists.llvm.org
Thu Jul 30 08:21:33 PDT 2020


Author: Johannes Doerfert
Date: 2020-07-30T10:19:39-05:00
New Revision: 19756ef53a498b7aa1fbac9e3a7cd3aa8e110fad

URL: https://github.com/llvm/llvm-project/commit/19756ef53a498b7aa1fbac9e3a7cd3aa8e110fad
DIFF: https://github.com/llvm/llvm-project/commit/19756ef53a498b7aa1fbac9e3a7cd3aa8e110fad.diff

LOG: [OpenMP][IRBuilder] Support allocas in nested parallel regions

We need to keep track of the alloca insertion point (which we already
communicate via the callback to the user) as we place allocas as well.

Reviewed By: fghanim, SouraVX

Differential Revision: https://reviews.llvm.org/D82470

Added: 
    

Modified: 
    clang/lib/CodeGen/CGStmtOpenMP.cpp
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 0ee1133ebaa1..df1cc1666de4 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1707,9 +1707,11 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
 
     CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
-    Builder.restoreIP(OMPBuilder.CreateParallel(Builder, BodyGenCB, PrivCB,
-                                                FiniCB, IfCond, NumThreads,
-                                                ProcBind, S.hasCancel()));
+    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+        AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
+    Builder.restoreIP(
+        OMPBuilder.CreateParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+                                  IfCond, NumThreads, ProcBind, S.hasCancel()));
     return;
   }
 

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 95eed59f1b3d..f813a730342e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -156,6 +156,7 @@ class OpenMPIRBuilder {
   /// Generator for '#omp parallel'
   ///
   /// \param Loc The insert and source location description.
+  /// \param AllocaIP The insertion points to be used for alloca instructions.
   /// \param BodyGenCB Callback that will generate the region code.
   /// \param PrivCB Callback to copy a given variable (think copy constructor).
   /// \param FiniCB Callback to finalize variable copies.
@@ -166,10 +167,11 @@ class OpenMPIRBuilder {
   ///
   /// \returns The insertion position *after* the parallel.
   IRBuilder<>::InsertPoint
-  CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
-                 PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB,
-                 Value *IfCondition, Value *NumThreads,
-                 omp::ProcBindKind ProcBind, bool IsCancellable);
+  CreateParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
+                 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
+                 FinalizeCallbackTy FiniCB, Value *IfCondition,
+                 Value *NumThreads, omp::ProcBindKind ProcBind,
+                 bool IsCancellable);
 
   /// Generator for '#omp flush'
   ///

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 9468a3aa3c8d..a5fe4ec87c46 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -394,9 +394,10 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(
 }
 
 IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
-    const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
-    PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
-    Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
+    const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
+    BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
+    FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
+    omp::ProcBindKind ProcBind, bool IsCancellable) {
   if (!updateToLocation(Loc))
     return Loc.IP;
 
@@ -429,7 +430,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
   // we want to delete at the end.
   SmallVector<Instruction *, 4> ToBeDeleted;
 
-  Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
+  // Change the location to the outer alloca insertion point to create and
+  // initialize the allocas we pass into the parallel region.
+  Builder.restoreIP(OuterAllocaIP);
   AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
   AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
 
@@ -481,9 +484,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
 
   // Generate the privatization allocas in the block that will become the entry
   // of the outlined function.
-  InsertPointTy AllocaIP(PRegEntryBB,
-                         PRegEntryBB->getTerminator()->getIterator());
-  Builder.restoreIP(AllocaIP);
+  Builder.SetInsertPoint(PRegEntryBB->getTerminator());
+  InsertPointTy InnerAllocaIP = Builder.saveIP();
+
   AllocaInst *PrivTIDAddr =
       Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
   Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
@@ -512,7 +515,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
   // Let the caller create the body.
   assert(BodyGenCB && "Expected body generation callback!");
   InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
-  BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
+  BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
 
   LLVM_DEBUG(dbgs() << "After  body codegen: " << *OuterFn << "\n");
 
@@ -671,7 +674,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
       ReplacementValue = PrivTID;
     } else {
       Builder.restoreIP(
-          PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
+          PrivCB(InnerAllocaIP, Builder.saveIP(), V, ReplacementValue));
       assert(ReplacementValue &&
              "Expected copy/create callback to set replacement value!");
       if (ReplacementValue == &V)
@@ -686,6 +689,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
     LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
     PrivHelper(*Input);
   }
+  LLVM_DEBUG({
+    for (Value *Output : Outputs)
+      LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
+  });
   assert(Outputs.empty() &&
          "OpenMP outlining should not produce live-out values!");
 

diff  --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 2ba9d85a0f9e..edd2c8f5dd88 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6,13 +6,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "gtest/gtest.h"
@@ -360,9 +361,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
 
   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
 
+  IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+                                    F->getEntryBlock().getFirstInsertionPt());
   IRBuilder<>::InsertPoint AfterIP =
-      OMPBuilder.CreateParallel(Loc, BodyGenCB, PrivCB, FiniCB, nullptr,
-                                nullptr, OMP_PROC_BIND_default, false);
+      OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+                                nullptr, nullptr, OMP_PROC_BIND_default, false);
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 1U);
   EXPECT_EQ(NumFinalizationPoints, 1U);
@@ -400,6 +403,205 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
   EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
 }
 
+TEST_F(OpenMPIRBuilderTest, ParallelNested) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  unsigned NumInnerBodiesGenerated = 0;
+  unsigned NumOuterBodiesGenerated = 0;
+  unsigned NumFinalizationPoints = 0;
+
+  auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                            BasicBlock &ContinuationIP) {
+    ++NumInnerBodiesGenerated;
+  };
+
+  auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                    Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+    // Trivial copy (=firstprivate).
+    Builder.restoreIP(AllocaIP);
+    Type *VTy = VPtr.getType()->getPointerElementType();
+    Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
+    ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+    Builder.restoreIP(CodeGenIP);
+    Builder.CreateStore(V, ReplacementValue);
+    return CodeGenIP;
+  };
+
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+
+  auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                            BasicBlock &ContinuationIP) {
+    ++NumOuterBodiesGenerated;
+    Builder.restoreIP(CodeGenIP);
+    BasicBlock *CGBB = CodeGenIP.getBlock();
+    BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
+    CGBB->getTerminator()->eraseFromParent();
+    ;
+
+    IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
+        InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
+        FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+    Builder.restoreIP(AfterIP);
+    Builder.CreateBr(NewBB);
+  };
+
+  IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+                                    F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<>::InsertPoint AfterIP =
+      OMPBuilder.CreateParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
+                                nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+  EXPECT_EQ(NumInnerBodiesGenerated, 1U);
+  EXPECT_EQ(NumOuterBodiesGenerated, 1U);
+  EXPECT_EQ(NumFinalizationPoints, 2U);
+
+  Builder.restoreIP(AfterIP);
+  Builder.CreateRetVoid();
+
+  OMPBuilder.finalize();
+
+  EXPECT_EQ(M->size(), 5U);
+  for (Function &OutlinedFn : *M) {
+    if (F == &OutlinedFn || OutlinedFn.isDeclaration())
+      continue;
+    EXPECT_FALSE(verifyModule(*M, &errs()));
+    EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
+    EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
+    EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
+    EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
+
+    EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
+    EXPECT_EQ(OutlinedFn.arg_size(), 2U);
+
+    EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
+    User *Usr = OutlinedFn.user_back();
+    ASSERT_TRUE(isa<ConstantExpr>(Usr));
+    CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
+    ASSERT_NE(ForkCI, nullptr);
+
+    EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
+    EXPECT_EQ(ForkCI->getNumArgOperands(), 3U);
+    EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
+    EXPECT_EQ(ForkCI->getArgOperand(1),
+              ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
+    EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
+  }
+}
+
+TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  unsigned NumInnerBodiesGenerated = 0;
+  unsigned NumOuterBodiesGenerated = 0;
+  unsigned NumFinalizationPoints = 0;
+
+  auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                            BasicBlock &ContinuationIP) {
+    ++NumInnerBodiesGenerated;
+  };
+
+  auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                    Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+    // Trivial copy (=firstprivate).
+    Builder.restoreIP(AllocaIP);
+    Type *VTy = VPtr.getType()->getPointerElementType();
+    Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
+    ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+    Builder.restoreIP(CodeGenIP);
+    Builder.CreateStore(V, ReplacementValue);
+    return CodeGenIP;
+  };
+
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+
+  auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                            BasicBlock &ContinuationIP) {
+    ++NumOuterBodiesGenerated;
+    Builder.restoreIP(CodeGenIP);
+    BasicBlock *CGBB = CodeGenIP.getBlock();
+    BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint());
+    BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt());
+    CGBB->getTerminator()->eraseFromParent();
+    ;
+    NewBB1->getTerminator()->eraseFromParent();
+    ;
+
+    IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.CreateParallel(
+        InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
+        FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+    Builder.restoreIP(AfterIP1);
+    Builder.CreateBr(NewBB1);
+
+    IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.CreateParallel(
+        InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB,
+        FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+    Builder.restoreIP(AfterIP2);
+    Builder.CreateBr(NewBB2);
+  };
+
+  IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+                                    F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<>::InsertPoint AfterIP =
+      OMPBuilder.CreateParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
+                                nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+  EXPECT_EQ(NumInnerBodiesGenerated, 2U);
+  EXPECT_EQ(NumOuterBodiesGenerated, 1U);
+  EXPECT_EQ(NumFinalizationPoints, 3U);
+
+  Builder.restoreIP(AfterIP);
+  Builder.CreateRetVoid();
+
+  OMPBuilder.finalize();
+
+  EXPECT_EQ(M->size(), 6U);
+  for (Function &OutlinedFn : *M) {
+    if (F == &OutlinedFn || OutlinedFn.isDeclaration())
+      continue;
+    EXPECT_FALSE(verifyModule(*M, &errs()));
+    EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
+    EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
+    EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
+    EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
+
+    EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
+    EXPECT_EQ(OutlinedFn.arg_size(), 2U);
+
+    unsigned NumAllocas = 0;
+    for (Instruction &I : instructions(OutlinedFn))
+      NumAllocas += isa<AllocaInst>(I);
+    EXPECT_EQ(NumAllocas, 1U);
+
+    EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
+    User *Usr = OutlinedFn.user_back();
+    ASSERT_TRUE(isa<ConstantExpr>(Usr));
+    CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
+    ASSERT_NE(ForkCI, nullptr);
+
+    EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
+    EXPECT_EQ(ForkCI->getNumArgOperands(), 3U);
+    EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
+    EXPECT_EQ(ForkCI->getArgOperand(1),
+              ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
+    EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
+  }
+}
+
 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
@@ -460,9 +662,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
     // No destructors.
   };
 
-  IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
-      Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
-      nullptr, OMP_PROC_BIND_default, false);
+  IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+                                    F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<>::InsertPoint AfterIP =
+      OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+                                Builder.CreateIsNotNull(F->arg_begin()),
+                                nullptr, OMP_PROC_BIND_default, false);
 
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 1U);
@@ -585,9 +790,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
                        {Builder.getInt32(NumFinalizationPoints)});
   };
 
-  IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
-      Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
-      nullptr, OMP_PROC_BIND_default, true);
+  IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+                                    F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<>::InsertPoint AfterIP =
+      OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+                                Builder.CreateIsNotNull(F->arg_begin()),
+                                nullptr, OMP_PROC_BIND_default, true);
 
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 0U);


        


More information about the cfe-commits mailing list