[clang] 7af287d - [OpenMP][IRBuilder] Support nested parallel regions

Johannes Doerfert via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 14 20:41:14 PDT 2020


Author: Johannes Doerfert
Date: 2020-07-14T22:39:06-05:00
New Revision: 7af287d0d921471f18b5c3054ce42381c0f973ed

URL: https://github.com/llvm/llvm-project/commit/7af287d0d921471f18b5c3054ce42381c0f973ed
DIFF: https://github.com/llvm/llvm-project/commit/7af287d0d921471f18b5c3054ce42381c0f973ed.diff

LOG: [OpenMP][IRBuilder] Support nested parallel regions

During code generation we might change/add basic blocks so keeping a
list of them is fairly easy to break. Nested parallel regions were
enough. The new scheme does recompute the list of blocks to be outlined
once it is needed.

Reviewed By: anchu-rajendran

Differential Revision: https://reviews.llvm.org/D82722

Added: 
    clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c

Modified: 
    clang/test/OpenMP/cancel_codegen.cpp
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp
index b7d1cea56721..a21a9db1e39a 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -175,7 +175,7 @@ for (int i = 0; i < argc; ++i) {
 
 // IRBUILDER: define internal void @main
 
-// IRBUILDER: [[RETURN:omp.par.exit[^:]*]]
+// IRBUILDER: [[RETURN:omp.par.outlined.exit[^:]*]]
 // IRBUILDER-NEXT: ret void
 // IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}},
 
@@ -192,10 +192,8 @@ for (int i = 0; i < argc; ++i) {
 // IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0
 // IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]]
 // IRBUILDER: [[EXIT]]
-// IRBUILDER: br label %[[EXIT2:.+]]
-// IRBUILDER: [[CONTINUE]]
-// IRBUILDER: br label %[[ELSE:.+]]
-// IRBUILDER: [[EXIT2]]
 // IRBUILDER: br label %[[RETURN]]
+// IRBUILDER: [[CONTINUE]]
+// IRBUILDER: br label %[[ELSE2:.+]]
 
 #endif

diff  --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
new file mode 100644
index 000000000000..552455eb9779
--- /dev/null
+++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
@@ -0,0 +1,110 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+//  %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o /tmp/t1 %s
+//  %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch /tmp/t1 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,IRBUILDER-DEBUG %s
+
+// expected-no-diagnostics
+
+// TODO: Teach the update script to check new functions too.
+
+#ifndef HEADER
+#define HEADER
+
+// ALL-LABEL: @_Z17nested_parallel_0v(
+// ALL-NEXT:  entry:
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// ALL:       omp_parallel:
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*))
+// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT12:%.*]]
+// ALL:       omp.par.outlined.exit12:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// ALL:       omp.par.exit.split:
+// ALL-NEXT:    ret void
+//
+void nested_parallel_0(void) {
+#pragma omp parallel
+  {
+#pragma omp parallel
+    {
+    }
+  }
+}
+
+// ALL-LABEL: @_Z17nested_parallel_1Pfid(
+// ALL-NEXT:  entry:
+// ALL-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// ALL-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// ALL-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// ALL-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// ALL-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// ALL-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// ALL:       omp_parallel:
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT13:%.*]]
+// ALL:       omp.par.outlined.exit13:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// ALL:       omp.par.exit.split:
+// ALL-NEXT:    ret void
+//
+void nested_parallel_1(float *r, int a, double b) {
+#pragma omp parallel
+  {
+#pragma omp parallel
+    {
+      *r = a + b;
+    }
+  }
+}
+
+// ALL-LABEL: @_Z17nested_parallel_2Pfid(
+// ALL-NEXT:  entry:
+// ALL-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// ALL-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// ALL-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// ALL-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// ALL-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// ALL-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// ALL:       omp_parallel:
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT55:%.*]]
+// ALL:       omp.par.outlined.exit55:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// ALL:       omp.par.exit.split:
+// ALL-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// ALL-NEXT:    [[CONV56:%.*]] = sitofp i32 [[TMP0]] to double
+// ALL-NEXT:    [[TMP1:%.*]] = load double, double* [[B_ADDR]], align 8
+// ALL-NEXT:    [[ADD57:%.*]] = fadd double [[CONV56]], [[TMP1]]
+// ALL-NEXT:    [[CONV58:%.*]] = fptrunc double [[ADD57]] to float
+// ALL-NEXT:    [[TMP2:%.*]] = load float*, float** [[R_ADDR]], align 8
+// ALL-NEXT:    store float [[CONV58]], float* [[TMP2]], align 4
+// ALL-NEXT:    ret void
+//
+void nested_parallel_2(float *r, int a, double b) {
+#pragma omp parallel
+  {
+    *r = a + b;
+#pragma omp parallel
+    {
+      *r = a + b;
+#pragma omp parallel
+      {
+        *r = a + b;
+      }
+      *r = a + b;
+#pragma omp parallel
+      {
+        *r = a + b;
+      }
+      *r = a + b;
+    }
+    *r = a + b;
+  }
+  *r = a + b;
+}
+
+#endif

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 2a3a64a5f4ac..95eed59f1b3d 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -285,9 +285,14 @@ class OpenMPIRBuilder {
   /// Helper that contains information about regions we need to outline
   /// during finalization.
   struct OutlineInfo {
-    SmallVector<BasicBlock *, 32> Blocks;
     using PostOutlineCBTy = std::function<void(Function &)>;
     PostOutlineCBTy PostOutlineCB;
+    BasicBlock *EntryBB, *ExitBB;
+
+    /// Collect all blocks in between EntryBB and ExitBB in both the given
+    /// vector and set.
+    void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
+                       SmallVectorImpl<BasicBlock *> &BlockVector);
   };
 
   /// Collection of regions that need to be outlined during finalization.

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index b7212edab6ab..9468a3aa3c8d 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -127,13 +127,16 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
 
 void OpenMPIRBuilder::finalize() {
+  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
+  SmallVector<BasicBlock *, 32> Blocks;
   for (OutlineInfo &OI : OutlineInfos) {
-    assert(!OI.Blocks.empty() &&
-           "Outlined regions should have at least a single block!");
-    BasicBlock *RegEntryBB = OI.Blocks.front();
-    Function *OuterFn = RegEntryBB->getParent();
+    ParallelRegionBlockSet.clear();
+    Blocks.clear();
+    OI.collectBlocks(ParallelRegionBlockSet, Blocks);
+
+    Function *OuterFn = OI.EntryBB->getParent();
     CodeExtractorAnalysisCache CEAC(*OuterFn);
-    CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
+    CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
                             /* AggregateArgs */ false,
                             /* BlockFrequencyInfo */ nullptr,
                             /* BranchProbabilityInfo */ nullptr,
@@ -143,6 +146,8 @@ void OpenMPIRBuilder::finalize() {
                             /* Suffix */ ".omp_par");
 
     LLVM_DEBUG(dbgs() << "Before     outlining: " << *OuterFn << "\n");
+    LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
+                      << " Exit: " << OI.ExitBB->getName() << "\n");
     assert(Extractor.isEligible() &&
            "Expected OpenMP outlining to be possible!");
 
@@ -162,12 +167,12 @@ void OpenMPIRBuilder::finalize() {
     // made our own entry block after all.
     {
       BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
-      assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB);
-      assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry);
-      RegEntryBB->moveBefore(&ArtificialEntry);
+      assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
+      assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
+      OI.EntryBB->moveBefore(&ArtificialEntry);
       ArtificialEntry.eraseFromParent();
     }
-    assert(&OutlinedFn->getEntryBlock() == RegEntryBB);
+    assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
     assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
 
     // Run a user callback, e.g. to add attributes.
@@ -614,20 +619,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
   InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
   FiniCB(PreFiniIP);
 
-  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
-  SmallVector<BasicBlock *, 32> Worklist;
-  ParallelRegionBlockSet.insert(PRegEntryBB);
-  ParallelRegionBlockSet.insert(PRegExitBB);
+  OI.EntryBB = PRegEntryBB;
+  OI.ExitBB = PRegExitBB;
 
-  // Collect all blocks in-between PRegEntryBB and PRegExitBB.
-  Worklist.push_back(PRegEntryBB);
-  while (!Worklist.empty()) {
-    BasicBlock *BB = Worklist.pop_back_val();
-    OI.Blocks.push_back(BB);
-    for (BasicBlock *SuccBB : successors(BB))
-      if (ParallelRegionBlockSet.insert(SuccBB).second)
-        Worklist.push_back(SuccBB);
-  }
+  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
+  SmallVector<BasicBlock *, 32> Blocks;
+  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
 
   // Ensure a single exit node for the outlined region by creating one.
   // We might have multiple incoming edges to the exit now due to finalizations,
@@ -635,10 +632,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
   BasicBlock *PRegOutlinedExitBB = PRegExitBB;
   PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
   PRegOutlinedExitBB->setName("omp.par.outlined.exit");
-  OI.Blocks.push_back(PRegOutlinedExitBB);
+  Blocks.push_back(PRegOutlinedExitBB);
 
   CodeExtractorAnalysisCache CEAC(*OuterFn);
-  CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
+  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
                           /* AggregateArgs */ false,
                           /* BlockFrequencyInfo */ nullptr,
                           /* BranchProbabilityInfo */ nullptr,
@@ -694,7 +691,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
 
   LLVM_DEBUG(dbgs() << "After  privatization: " << *OuterFn << "\n");
   LLVM_DEBUG({
-    for (auto *BB : OI.Blocks)
+    for (auto *BB : Blocks)
       dbgs() << " PBR: " << BB->getName() << "\n";
   });
 
@@ -1112,3 +1109,20 @@ void OpenMPIRBuilder::initializeTypes(Module &M) {
   VarName##Ptr = PointerType::getUnqual(T);
 #include "llvm/Frontend/OpenMP/OMPKinds.def"
 }
+
+void OpenMPIRBuilder::OutlineInfo::collectBlocks(
+    SmallPtrSetImpl<BasicBlock *> &BlockSet,
+    SmallVectorImpl<BasicBlock *> &BlockVector) {
+  SmallVector<BasicBlock *, 32> Worklist;
+  BlockSet.insert(EntryBB);
+  BlockSet.insert(ExitBB);
+
+  Worklist.push_back(EntryBB);
+  while (!Worklist.empty()) {
+    BasicBlock *BB = Worklist.pop_back_val();
+    BlockVector.push_back(BB);
+    for (BasicBlock *SuccBB : successors(BB))
+      if (BlockSet.insert(SuccBB).second)
+        Worklist.push_back(SuccBB);
+  }
+}


        


More information about the cfe-commits mailing list