[flang-commits] [flang] [mlir] [OpenMP][flang][MLIR] Decouple alloc, init, and copy regions for `omp.private|declare_reduction` ops (PR #125699)

via flang-commits flang-commits at lists.llvm.org
Tue Feb 4 07:32:50 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-openmp

Author: Kareem Ergawy (ergawy)

<details>
<summary>Changes</summary>

This PR changes the emitted block structure of alloc, init, and copy regions for `omp.private` and `omp.declare_reduction` ops a little bit. In particular, this decouples init and copy regions from the alloca insertion-point. The main motivation is fix "Instruction does not dominate all uses!" errors that happen specially when an init region uses a value from the OpenMP region it is being inlined into. The issue happens because, previous to this PR, we inline the init region right after the latest alloc block (since we used the alloca IP); which in some cases (see exmaple below), is too early and causes the use dominance issue.

Example that would break without this PR (when delayed privatization is enabled for `omp.wsloop`s):
```fortran
subroutine test2 (xyz)
  integer :: i
  integer :: xyz(:)

  !$omp target map(from:xyz)
    !$omp do private(xyz)
      do i = 1, 10
        xyz(i) = i
      end do
  !$omp end target
end subroutine
```

---

Patch is 29.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125699.diff


6 Files Affected:

- (modified) flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 (+32-32) 
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+130-77) 
- (modified) mlir/test/Target/LLVMIR/openmp-firstprivate.mlir (+7-4) 
- (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+11-6) 
- (modified) mlir/test/Target/LLVMIR/openmp-private.mlir (+3-3) 
- (modified) mlir/test/Target/LLVMIR/openmp-wsloop-private-cond_br.mlir (+3-3) 


``````````diff
diff --git a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
index 6facce56123ab2..7e735b64995041 100644
--- a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
+++ b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
@@ -32,46 +32,52 @@ subroutine worst_case(a, b, c, d)
 ! CHECK-LABEL: define internal void @worst_case_..omp_par
 ! CHECK-NEXT:  omp.par.entry:
 !                [reduction alloc regions inlined here]
-! CHECK:         br label %omp.private.init
+! CHECK:         br label %omp.region.after_alloca
 
-! CHECK:       omp.private.init:                            ; preds = %omp.par.entry
-! CHECK-NEXT:  br label %omp.private.init7
+! CHECK:       omp.region.after_alloca:
+! CHECK-NEXT:    br label %omp.par.region
+
+! CHECK:       omp.par.region:
+! CHECK-NEXT:    br label %omp.private.init
+
+! CHECK:       omp.private.init:
+! CHECK-NEXT:  br label %omp.private.init2
 
-! CHECK:       omp.private.init7:                               ; preds = %omp.private.init
+! CHECK:       omp.private.init2:
 !                [begin private alloc for first var]
 !                [read the length from the mold argument]
 !                [if it is non-zero...]
-! CHECK:         br i1 {{.*}}, label %omp.private.init8, label %omp.private.init9
+! CHECK:         br i1 %{{.*}}, label %omp.private.init3, label %omp.private.init4
 
-! CHECK:       omp.private.init9:                               ; preds = %omp.private.init7
-!                [finish private alloc for first var with zero extent]
-! CHECK:         br label %omp.private.init10
+! CHECK:       omp.private.init4:                               ; preds = %omp.private.init2
+!                [finish private alloc for second var with zero extent]
+! CHECK:         br label %omp.private.init5
 
-! CHECK:       omp.private.init10:                               ; preds = %omp.private.init8, %omp.private.init9
-! CHECK-NEXT:    br label %omp.region.cont6
+! CHECK:       omp.private.init5:                               ; preds = %omp.private.init3, %omp.private.init4
+! CHECK-NEXT:    br label %omp.region.cont
 
-! CHECK:       omp.region.cont6:                                 ; preds = %omp.private.init10
+! CHECK:       omp.region.cont:                                  ; preds = %omp.private.init5
 ! CHECK-NEXT:    %{{.*}} = phi ptr
-! CHECK-NEXT:    br label %omp.private.init1
+! CHECK-NEXT:    br label %omp.private.init7
 
-! CHECK:       omp.private.init1:                                ; preds = %omp.region.cont6
+! CHECK:       omp.private.init7:
 !                [begin private alloc for first var]
 !                [read the length from the mold argument]
 !                [if it is non-zero...]
-! CHECK:         br i1 %{{.*}}, label %omp.private.init2, label %omp.private.init3
+! CHECK:         br i1 {{.*}}, label %omp.private.init8, label %omp.private.init9
 
-! CHECK:       omp.private.init3:                               ; preds = %omp.private.init1
-!                [finish private alloc for second var with zero extent]
-! CHECK:         br label %omp.private.init4
+! CHECK:       omp.private.init9:                               ; preds = %omp.private.init7
+!                [finish private alloc for first var with zero extent]
+! CHECK:         br label %omp.private.init10
 
-! CHECK:       omp.private.init4:                               ; preds = %omp.private.init2, %omp.private.init3
-! CHECK-NEXT:    br label %omp.region.cont
+! CHECK:       omp.private.init10:                               ; preds = %omp.private.init8, %omp.private.init9
+! CHECK-NEXT:    br label %omp.region.cont6
 
-! CHECK:       omp.region.cont:                                  ; preds = %omp.private.init4
+! CHECK:       omp.region.cont6:                                 ; preds = %omp.private.init10
 ! CHECK-NEXT:    %{{.*}} = phi ptr
 ! CHECK-NEXT:    br label %omp.private.copy
 
-! CHECK:       omp.private.copy:                                 ; preds = %omp.region.cont
+! CHECK:       omp.private.copy:
 ! CHECK-NEXT:    br label %omp.private.copy12
 
 ! CHECK:       omp.private.copy12:                               ; preds = %omp.private.copy
@@ -96,15 +102,9 @@ subroutine worst_case(a, b, c, d)
 
 ! CHECK:       omp.region.cont15:                                ; preds = %omp.private.copy18
 ! CHECK-NEXT:    %{{.*}} = phi ptr
-! CHECK-NEXT:    br label %omp.region.after_alloca
-
-! CHECK:       omp.region.after_alloca:
-! CHECK-NEXT:    br label %omp.par.region
-
-! CHECK:       omp.par.region:                                   ; preds = %omp.region.after_alloca
 ! CHECK-NEXT:    br label %omp.reduction.init
 
-! CHECK:       omp.reduction.init:                               ; preds = %omp.par.region
+! CHECK:       omp.reduction.init:                               ; preds = %omp.region.cont15
 !                [deffered stores for results of reduction alloc regions]
 ! CHECK:         br label %[[VAL_96:.*]]
 
@@ -211,13 +211,13 @@ subroutine worst_case(a, b, c, d)
 !                [source length was non-zero: call assign runtime]
 ! CHECK:         br label %omp.private.copy14
 
-! CHECK:       omp.private.init2:                               ; preds = %omp.private.init1
-!                [var extent was non-zero: malloc a private array]
-! CHECK:         br label %omp.private.init4
-
 ! CHECK:       omp.private.init8:                               ; preds = %omp.private.init7
 !                [var extent was non-zero: malloc a private array]
 ! CHECK:         br label %omp.private.init10
 
+! CHECK:       omp.private.init3:                               ; preds = %omp.private.init2
+!                [var extent was non-zero: malloc a private array]
+! CHECK:         br label %omp.private.init5
+
 ! CHECK:       omp.par.outlined.exit.exitStub:                   ; preds = %omp.region.cont52
 ! CHECK-NEXT:    ret void
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index ea044fe0c8c196..3f47dcfc0fbbbd 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1025,6 +1025,18 @@ mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation,
   }
 }
 
+static void
+setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder,
+                                    llvm::BasicBlock *block = nullptr) {
+  if (block == nullptr)
+    block = builder.GetInsertBlock();
+
+  if (block->empty() || block->getTerminator() == nullptr)
+    builder.SetInsertPoint(block);
+  else
+    builder.SetInsertPoint(block->getTerminator());
+}
+
 /// Inline reductions' `init` regions. This functions assumes that the
 /// `builder`'s insertion point is where the user wants the `init` regions to be
 /// inlined; i.e. it does not try to find a proper insertion location for the
@@ -1063,10 +1075,7 @@ initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
     }
   }
 
-  if (initBlock->empty() || initBlock->getTerminator() == nullptr)
-    builder.SetInsertPoint(initBlock);
-  else
-    builder.SetInsertPoint(initBlock->getTerminator());
+  setInsertPointForPossiblyEmptyBlock(builder, initBlock);
 
   // store result of the alloc region to the allocated pointer to the real
   // reduction variable
@@ -1091,11 +1100,7 @@ initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
     assert(phis.size() == 1 && "expected one value to be yielded from the "
                                "reduction neutral element declaration region");
 
-    if (builder.GetInsertBlock()->empty() ||
-        builder.GetInsertBlock()->getTerminator() == nullptr)
-      builder.SetInsertPoint(builder.GetInsertBlock());
-    else
-      builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+    setInsertPointForPossiblyEmptyBlock(builder);
 
     if (isByRef[i]) {
       if (!reductionDecls[i].getAllocRegion().empty())
@@ -1331,18 +1336,15 @@ findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
 
 /// Initialize a single (first)private variable. You probably want to use
 /// allocateAndInitPrivateVars instead of this.
-static llvm::Error
-initPrivateVar(llvm::IRBuilderBase &builder,
-               LLVM::ModuleTranslation &moduleTranslation,
-               omp::PrivateClauseOp &privDecl, Value mlirPrivVar,
-               BlockArgument &blockArg, llvm::Value *llvmPrivateVar,
-               llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
-               llvm::BasicBlock *privInitBlock,
-               llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
+static llvm::Error initPrivateVar(
+    llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
+    omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
+    llvm::SmallVectorImpl<llvm::Value *>::iterator llvmPrivateVarIt,
+    llvm::BasicBlock *privInitBlock,
+    llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
   Region &initRegion = privDecl.getInitRegion();
   if (initRegion.empty()) {
-    moduleTranslation.mapValue(blockArg, llvmPrivateVar);
-    llvmPrivateVars.push_back(llvmPrivateVar);
+    moduleTranslation.mapValue(blockArg, *llvmPrivateVarIt);
     return llvm::Error::success();
   }
 
@@ -1351,11 +1353,10 @@ initPrivateVar(llvm::IRBuilderBase &builder,
       mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
   assert(nonPrivateVar);
   moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar);
-  moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar);
+  moduleTranslation.mapValue(privDecl.getInitPrivateArg(), *llvmPrivateVarIt);
 
   // in-place convert the private initialization region
   SmallVector<llvm::Value *, 1> phis;
-  builder.SetInsertPoint(privInitBlock->getTerminator());
   if (failed(inlineConvertOmpRegions(initRegion, "omp.private.init", builder,
                                      moduleTranslation, &phis)))
     return llvm::createStringError(
@@ -1367,7 +1368,7 @@ initPrivateVar(llvm::IRBuilderBase &builder,
   // variable in case the region is operating on arguments by-value (e.g.
   // Fortran character boxes).
   moduleTranslation.mapValue(blockArg, phis[0]);
-  llvmPrivateVars.push_back(phis[0]);
+  *llvmPrivateVarIt = phis[0];
 
   // clear init region block argument mapping in case it needs to be
   // re-created with a different source for another use of the same
@@ -1376,17 +1377,49 @@ initPrivateVar(llvm::IRBuilderBase &builder,
   return llvm::Error::success();
 }
 
+static llvm::Error
+initPrivateVars(llvm::IRBuilderBase &builder,
+                LLVM::ModuleTranslation &moduleTranslation,
+                MutableArrayRef<BlockArgument> privateBlockArgs,
+                MutableArrayRef<omp::PrivateClauseOp> privateDecls,
+                MutableArrayRef<mlir::Value> mlirPrivateVars,
+                llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
+                llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
+  if (privateBlockArgs.empty())
+    return llvm::Error::success();
+
+  llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init");
+  setInsertPointForPossiblyEmptyBlock(builder, privInitBlock);
+
+  unsigned idx = 0;
+  for (auto [privDecl, mlirPrivVar, blockArg] :
+       llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
+    llvm::Error err = initPrivateVar(
+        builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
+        llvmPrivateVars.begin() + idx, privInitBlock, mappedPrivateVars);
+
+    if (err)
+      return err;
+
+    setInsertPointForPossiblyEmptyBlock(builder);
+    ++idx;
+  }
+
+  return llvm::Error::success();
+}
+
 /// Allocate and initialize delayed private variables. Returns the basic block
 /// which comes after all of these allocations. llvm::Value * for each of these
 /// private variables are populated in llvmPrivateVars.
-static llvm::Expected<llvm::BasicBlock *> allocateAndInitPrivateVars(
-    llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
-    MutableArrayRef<BlockArgument> privateBlockArgs,
-    MutableArrayRef<omp::PrivateClauseOp> privateDecls,
-    MutableArrayRef<mlir::Value> mlirPrivateVars,
-    llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
-    const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
-    llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
+static llvm::Expected<llvm::BasicBlock *>
+allocatePrivateVars(llvm::IRBuilderBase &builder,
+                    LLVM::ModuleTranslation &moduleTranslation,
+                    MutableArrayRef<BlockArgument> privateBlockArgs,
+                    MutableArrayRef<omp::PrivateClauseOp> privateDecls,
+                    MutableArrayRef<mlir::Value> mlirPrivateVars,
+                    llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
+                    const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+                    llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
   // Allocate private vars
   llvm::BranchInst *allocaTerminator =
       llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
@@ -1404,9 +1437,6 @@ static llvm::Expected<llvm::BasicBlock *> allocateAndInitPrivateVars(
 
   llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
 
-  llvm::BasicBlock *privInitBlock = nullptr;
-  if (!privateBlockArgs.empty())
-    privInitBlock = splitBB(builder, true, "omp.private.init");
   for (auto [privDecl, mlirPrivVar, blockArg] :
        llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
     llvm::Type *llvmAllocType =
@@ -1414,24 +1444,18 @@ static llvm::Expected<llvm::BasicBlock *> allocateAndInitPrivateVars(
     builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
     llvm::Value *llvmPrivateVar = builder.CreateAlloca(
         llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
-
-    llvm::Error err = initPrivateVar(
-        builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
-        llvmPrivateVar, llvmPrivateVars, privInitBlock, mappedPrivateVars);
-    if (err)
-      return err;
+    llvmPrivateVars.push_back(llvmPrivateVar);
   }
+
   return afterAllocas;
 }
 
 static LogicalResult
-initFirstPrivateVars(llvm::IRBuilderBase &builder,
+copyFirstPrivateVars(llvm::IRBuilderBase &builder,
                      LLVM::ModuleTranslation &moduleTranslation,
                      SmallVectorImpl<mlir::Value> &mlirPrivateVars,
                      SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
-                     SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
-                     llvm::BasicBlock *afterAllocas) {
-  llvm::IRBuilderBase::InsertPointGuard guard(builder);
+                     SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
   // Apply copy region for firstprivate.
   bool needsFirstprivate =
       llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
@@ -1442,13 +1466,9 @@ initFirstPrivateVars(llvm::IRBuilderBase &builder,
   if (!needsFirstprivate)
     return success();
 
-  assert(afterAllocas->getSinglePredecessor());
-
-  // Find the end of the allocation blocks
-  builder.SetInsertPoint(afterAllocas->getSinglePredecessor()->getTerminator());
   llvm::BasicBlock *copyBlock =
       splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
-  builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
+  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
 
   for (auto [decl, mlirVar, llvmVar] :
        llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
@@ -1467,11 +1487,12 @@ initFirstPrivateVars(llvm::IRBuilderBase &builder,
     moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
 
     // in-place convert copy region
-    builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
     if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
                                        moduleTranslation)))
       return decl.emitError("failed to inline `copy` region of `omp.private`");
 
+    setInsertPointForPossiblyEmptyBlock(builder);
+
     // ignore unused value yielded from copy region
 
     // clear copy region block argument mapping in case it needs to be
@@ -1757,20 +1778,25 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
     LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
         moduleTranslation, allocaIP);
 
-    llvm::Expected<llvm::BasicBlock *> afterAllocas =
-        allocateAndInitPrivateVars(builder, moduleTranslation, privateBlockArgs,
-                                   privateDecls, mlirPrivateVars,
-                                   llvmPrivateVars, allocaIP);
+    llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
+        builder, moduleTranslation, privateBlockArgs, privateDecls,
+        mlirPrivateVars, llvmPrivateVars, allocaIP);
     if (handleError(afterAllocas, *taskOp).failed())
       return llvm::make_error<PreviouslyReportedError>();
 
-    if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
-                                    llvmPrivateVars, privateDecls,
-                                    afterAllocas.get())))
+    builder.restoreIP(codegenIP);
+    if (handleError(initPrivateVars(builder, moduleTranslation,
+                                    privateBlockArgs, privateDecls,
+                                    mlirPrivateVars, llvmPrivateVars),
+                    *taskOp)
+            .failed())
+      return llvm::make_error<PreviouslyReportedError>();
+
+    if (failed(copyFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+                                    llvmPrivateVars, privateDecls)))
       return llvm::make_error<PreviouslyReportedError>();
 
     // translate the body of the task:
-    builder.restoreIP(codegenIP);
     auto continuationBlockOrError = convertOmpOpRegions(
         taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
     if (failed(handleError(continuationBlockOrError, *taskOp)))
@@ -1892,7 +1918,7 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   SmallVector<llvm::Value *> privateReductionVariables(
       wsloopOp.getNumReductionVars());
 
-  llvm::Expected<llvm::BasicBlock *> afterAllocas = allocateAndInitPrivateVars(
+  llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
       builder, moduleTranslation, privateBlockArgs, privateDecls,
       mlirPrivateVars, llvmPrivateVars, allocaIP);
   if (handleError(afterAllocas, opInst).failed())
@@ -1911,9 +1937,15 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
                                 deferredStores, isByRef)))
     return failure();
 
-  if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
-                                  llvmPrivateVars, privateDecls,
-                                  afterAllocas.get())))
+  if (handleError(initPrivateVars(builder, moduleTranslation, privateBlockArgs,
+                                  privateDecls, mlirPrivateVars,
+                                  llvmPrivateVars),
+                  opInst)
+          .failed())
+    return failure();
+
+  if (failed(copyFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+                                  llvmPrivateVars, privateDecls)))
     return failure();
 
   assert(afterAllocas.get()->getSinglePredecessor());
@@ -2064,10 +2096,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
 
   auto bodyGenCB = [&](InsertPointTy allocaIP,
                        InsertPointTy codeGenIP) -> llvm::Error {
-    llvm::Expected<llvm::BasicBlock *> afterAllocas =
-        allocateAndInitPrivateVars(builder, moduleTranslation, privateBlockArgs,
-                                   privateDecls, mlirPrivateVars,
-                                   llvmPrivateVars, allocaIP);
+    llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
+        builder, moduleTranslation, privateBlockArgs, privateDecls,
+        mlirPrivateVars, llvmPrivateVars, allocaIP);
     if (handleError(afterAllocas, *opInst).failed())
       return llvm::make_error<...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/125699


More information about the flang-commits mailing list