[Mlir-commits] [mlir] [OpenMP][OMPIRBuilder] Add delayed privatization support for `wsloop` (PR #118463)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Tue Dec 3 02:26:50 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-openmp

Author: Kareem Ergawy (ergawy)

<details>
<summary>Changes</summary>

Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for
delayed privatization. This also refactors a few bit of code to isolate
the logic needed for `firstprivate` initialization in a shared util that
can be used across constructs that need it.

Parent PR: https://github.com/llvm/llvm-project/pull/118447. Only latest commit is relevant for this PR.

---

Patch is 29.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118463.diff


6 Files Affected:

- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+209-217) 
- (modified) mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (-19) 
- (added) mlir/test/Target/LLVMIR/openmp-wsloop-private.mlir (+79) 
- (modified) mlir/test/Target/LLVMIR/openmp-wsloop-reduction-cleanup.mlir (+2-2) 


``````````diff
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 35b0633a04a352..e189480e6b07d3 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -268,7 +268,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkAllocate(op, result);
         checkLinear(op, result);
         checkOrder(op, result);
-        checkPrivate(op, result);
       })
       .Case([&](omp::ParallelOp op) { checkAllocate(op, result); })
       .Case([&](omp::SimdOp op) {
@@ -1030,6 +1029,99 @@ mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation,
   }
 }
 
+template <typename OP>
+static LogicalResult
+initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
+                  llvm::IRBuilderBase &builder,
+                  LLVM::ModuleTranslation &moduleTranslation,
+                  llvm::BasicBlock *latestAllocaBlock,
+                  SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+                  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+                  DenseMap<Value, llvm::Value *> &reductionVariableMap,
+                  llvm::ArrayRef<bool> isByRef,
+                  SmallVectorImpl<DeferredStore> &deferredStores) {
+  if (op.getNumReductionVars() == 0)
+    return success();
+
+  llvm::IRBuilderBase::InsertPointGuard guard(builder);
+
+  builder.SetInsertPoint(latestAllocaBlock->getTerminator());
+  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
+  auto allocaIP = llvm::IRBuilderBase::InsertPoint(
+      latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
+  builder.restoreIP(allocaIP);
+  SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
+
+  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+    if (isByRef[i]) {
+      if (!reductionDecls[i].getAllocRegion().empty())
+        continue;
+
+      // TODO: remove after all users of by-ref are updated to use the alloc
+      // region: Allocate reduction variable (which is a pointer to the real
+      // reduciton variable allocated in the inlined region)
+      byRefVars[i] = builder.CreateAlloca(
+          moduleTranslation.convertType(reductionDecls[i].getType()));
+    }
+  }
+
+  builder.SetInsertPoint(&*initBlock->getFirstNonPHIOrDbgOrAlloca());
+
+  // store result of the alloc region to the allocated pointer to the real
+  // reduction variable
+  for (auto [data, addr] : deferredStores)
+    builder.CreateStore(data, addr);
+
+  // Before the loop, store the initial values of reductions into reduction
+  // variables. Although this could be done after allocas, we don't want to mess
+  // up with the alloca insertion point.
+  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+    SmallVector<llvm::Value *, 1> phis;
+
+    // map block argument to initializer region
+    mapInitializationArgs(op, moduleTranslation, reductionDecls,
+                          reductionVariableMap, i);
+
+    if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
+                                       "omp.reduction.neutral", builder,
+                                       moduleTranslation, &phis)))
+      return failure();
+
+    assert(phis.size() == 1 && "expected one value to be yielded from the "
+                               "reduction neutral element declaration region");
+
+    builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+
+    if (isByRef[i]) {
+      if (!reductionDecls[i].getAllocRegion().empty())
+        // done in allocReductionVars
+        continue;
+
+      // TODO: this path can be removed once all users of by-ref are updated to
+      // use an alloc region
+
+      // Store the result of the inlined region to the allocated reduction var
+      // ptr
+      builder.CreateStore(phis[0], byRefVars[i]);
+
+      privateReductionVariables[i] = byRefVars[i];
+      moduleTranslation.mapValue(reductionArgs[i], phis[0]);
+      reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
+    } else {
+      // for by-ref case the store is inside of the reduction region
+      builder.CreateStore(phis[0], privateReductionVariables[i]);
+      // the rest was handled in allocByValReductionVars
+    }
+
+    // forget the mapping for the initializer region because we might need a
+    // different mapping if this reduction declaration is re-used for a
+    // different variable
+    moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
+  }
+
+  return success();
+}
+
 /// Collect reduction info
 template <typename T>
 static void collectReductionInfo(
@@ -1183,6 +1275,7 @@ static LogicalResult allocAndInitializeReductionVars(
   if (op.getNumReductionVars() == 0)
     return success();
 
+  llvm::IRBuilderBase::InsertPointGuard guard(builder);
   SmallVector<DeferredStore> deferredStores;
 
   if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
@@ -1191,59 +1284,10 @@ static LogicalResult allocAndInitializeReductionVars(
                                 deferredStores, isByRef)))
     return failure();
 
-  // store result of the alloc region to the allocated pointer to the real
-  // reduction variable
-  for (auto [data, addr] : deferredStores)
-    builder.CreateStore(data, addr);
-
-  // Before the loop, store the initial values of reductions into reduction
-  // variables. Although this could be done after allocas, we don't want to mess
-  // up with the alloca insertion point.
-  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
-    SmallVector<llvm::Value *, 1> phis;
-
-    // map block argument to initializer region
-    mapInitializationArgs(op, moduleTranslation, reductionDecls,
-                          reductionVariableMap, i);
-
-    if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
-                                       "omp.reduction.neutral", builder,
-                                       moduleTranslation, &phis)))
-      return failure();
-    assert(phis.size() == 1 && "expected one value to be yielded from the "
-                               "reduction neutral element declaration region");
-    if (isByRef[i]) {
-      if (!reductionDecls[i].getAllocRegion().empty())
-        // done in allocReductionVars
-        continue;
-
-      // TODO: this path can be removed once all users of by-ref are updated to
-      // use an alloc region
-
-      // Allocate reduction variable (which is a pointer to the real reduction
-      // variable allocated in the inlined region)
-      llvm::Value *var = builder.CreateAlloca(
-          moduleTranslation.convertType(reductionDecls[i].getType()));
-      // Store the result of the inlined region to the allocated reduction var
-      // ptr
-      builder.CreateStore(phis[0], var);
-
-      privateReductionVariables[i] = var;
-      moduleTranslation.mapValue(reductionArgs[i], phis[0]);
-      reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
-    } else {
-      // for by-ref case the store is inside of the reduction region
-      builder.CreateStore(phis[0], privateReductionVariables[i]);
-      // the rest was handled in allocByValReductionVars
-    }
-
-    // forget the mapping for the initializer region because we might need a
-    // different mapping if this reduction declaration is re-used for a
-    // different variable
-    moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
-  }
-
-  return success();
+  return initReductionVars(op, reductionArgs, builder, moduleTranslation,
+                           allocaIP.getBlock(), reductionDecls,
+                           privateReductionVariables, reductionVariableMap,
+                           isByRef, deferredStores);
 }
 
 /// Allocate delayed private variables. Returns the basic block which comes
@@ -1257,6 +1301,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
                     MutableArrayRef<mlir::Value> mlirPrivateVars,
                     llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
                     const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
+  llvm::IRBuilderBase::InsertPointGuard guard(builder);
   // Allocate private vars
   llvm::BranchInst *allocaTerminator =
       llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
@@ -1318,6 +1363,63 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
   return afterAllocas;
 }
 
+static LogicalResult
+initFirstPrivateVars(llvm::IRBuilderBase &builder,
+                     LLVM::ModuleTranslation &moduleTranslation,
+                     SmallVectorImpl<mlir::Value> &mlirPrivateVars,
+                     SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
+                     SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
+                     llvm::BasicBlock *afterAllocas) {
+  llvm::IRBuilderBase::InsertPointGuard guard(builder);
+  // Apply copy region for firstprivate.
+  bool needsFirstprivate =
+      llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
+        return privOp.getDataSharingType() ==
+               omp::DataSharingClauseType::FirstPrivate;
+      });
+
+  if (needsFirstprivate) {
+    // Find the end of the allocation blocks
+    builder.SetInsertPoint(
+        afterAllocas->getSinglePredecessor()->getTerminator());
+    llvm::BasicBlock *copyBlock =
+        splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
+    builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
+  }
+
+  for (auto [decl, mlirVar, llvmVar] :
+       llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
+    if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
+      continue;
+
+    // copyRegion implements `lhs = rhs`
+    Region &copyRegion = decl.getCopyRegion();
+
+    // map copyRegion rhs arg
+    llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
+    assert(nonPrivateVar);
+    moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
+
+    // map copyRegion lhs arg
+    moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
+
+    // in-place convert copy region
+    builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+    if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
+                                       moduleTranslation)))
+      return decl.emitError("failed to inline `copy` region of `omp.private`");
+
+    // ignore unused value yielded from copy region
+
+    // clear copy region block argument mapping in case it needs to be
+    // re-created with different sources for reuse of the same reduction
+    // decl
+    moduleTranslation.forgetMapping(copyRegion);
+  }
+
+  return success();
+}
+
 static LogicalResult
 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
                    LLVM::ModuleTranslation &moduleTranslation) {
@@ -1577,50 +1679,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
     if (handleError(afterAllocas, *taskOp).failed())
       return llvm::make_error<PreviouslyReportedError>();
 
-    // Apply copy region for firstprivate
-    bool needsFirstPrivate =
-        llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
-          return privOp.getDataSharingType() ==
-                 omp::DataSharingClauseType::FirstPrivate;
-        });
-    if (needsFirstPrivate) {
-      // Find the end of the allocation blocks
-      assert(afterAllocas.get()->getSinglePredecessor());
-      builder.SetInsertPoint(
-          afterAllocas.get()->getSinglePredecessor()->getTerminator());
-      llvm::BasicBlock *copyBlock =
-          splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
-      builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
-    }
-    for (auto [decl, mlirVar, llvmVar] :
-         llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
-      if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
-        continue;
-
-      // copyRegion implements `lhs = rhs`
-      Region &copyRegion = decl.getCopyRegion();
-
-      // map copyRegion rhs arg
-      llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
-      assert(nonPrivateVar);
-      moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
-
-      // map copyRegion lhs arg
-      moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
-
-      // in-place convert copy region
-      builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
-      if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
-                                         builder, moduleTranslation)))
-        return llvm::createStringError(
-            "failed to inline `copy` region of an `omp.private` op in taskOp");
-
-      // ignore unused value yielded from copy region
-
-      // clear copy region block argument mapping in case it needs to be
-      // re-created with different source for reuse of the same reduction decl
-      moduleTranslation.forgetMapping(copyRegion);
-    }
+    if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+                                    llvmPrivateVars, privateDecls,
+                                    afterAllocas.get())))
+      return llvm::make_error<PreviouslyReportedError>();
 
     // translate the body of the task:
     builder.restoreIP(codegenIP);
@@ -1732,6 +1794,18 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
     chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
   }
 
+  MutableArrayRef<BlockArgument> privateBlockArgs =
+      cast<omp::BlockArgOpenMPOpInterface>(*wsloopOp).getPrivateBlockArgs();
+  SmallVector<mlir::Value> mlirPrivateVars;
+  SmallVector<llvm::Value *> llvmPrivateVars;
+  SmallVector<omp::PrivateClauseOp> privateDecls;
+  mlirPrivateVars.reserve(privateBlockArgs.size());
+  llvmPrivateVars.reserve(privateBlockArgs.size());
+  collectPrivatizationDecls(wsloopOp, privateDecls);
+
+  for (mlir::Value privateVar : wsloopOp.getPrivateVars())
+    mlirPrivateVars.push_back(privateVar);
+
   SmallVector<omp::DeclareReductionOp> reductionDecls;
   collectReductionDecls(wsloopOp, reductionDecls);
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
@@ -1739,15 +1813,37 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
 
   SmallVector<llvm::Value *> privateReductionVariables(
       wsloopOp.getNumReductionVars());
+
+  llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
+      builder, moduleTranslation, privateBlockArgs, privateDecls,
+      mlirPrivateVars, llvmPrivateVars, allocaIP);
+  if (handleError(afterAllocas, opInst).failed())
+    return failure();
+
   DenseMap<Value, llvm::Value *> reductionVariableMap;
 
   MutableArrayRef<BlockArgument> reductionArgs =
       cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
 
-  if (failed(allocAndInitializeReductionVars(
-          wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
-          reductionDecls, privateReductionVariables, reductionVariableMap,
-          isByRef)))
+  SmallVector<DeferredStore> deferredStores;
+
+  if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
+                                moduleTranslation, allocaIP, reductionDecls,
+                                privateReductionVariables, reductionVariableMap,
+                                deferredStores, isByRef)))
+    return failure();
+
+  if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+                                  llvmPrivateVars, privateDecls,
+                                  afterAllocas.get())))
+    return failure();
+
+  assert(afterAllocas.get()->getSinglePredecessor());
+  if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
+                               moduleTranslation,
+                               afterAllocas.get()->getSinglePredecessor(),
+                               reductionDecls, privateReductionVariables,
+                               reductionVariableMap, isByRef, deferredStores)))
     return failure();
 
   // TODO: Replace this with proper composite translation support.
@@ -1914,122 +2010,18 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
             deferredStores, isByRef)))
       return llvm::make_error<PreviouslyReportedError>();
 
-    // Apply copy region for firstprivate.
-    bool needsFirstprivate =
-        llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
-          return privOp.getDataSharingType() ==
-                 omp::DataSharingClauseType::FirstPrivate;
-        });
-    if (needsFirstprivate) {
-      // Find the end of the allocation blocks
-      assert(afterAllocas.get()->getSinglePredecessor());
-      builder.SetInsertPoint(
-          afterAllocas.get()->getSinglePredecessor()->getTerminator());
-      llvm::BasicBlock *copyBlock =
-          splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
-      builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
-    }
-    for (auto [decl, mlirVar, llvmVar] :
-         llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
-      if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
-        continue;
-
-      // copyRegion implements `lhs = rhs`
-      Region &copyRegion = decl.getCopyRegion();
-
-      // map copyRegion rhs arg
-      llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
-      assert(nonPrivateVar);
-      moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
-
-      // map copyRegion lhs arg
-      moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
-
-      // in-place convert copy region
-      builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
-      if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
-                                         builder, moduleTranslation)))
-        return llvm::createStringError(
-            "failed to inline `copy` region of `omp.private`");
-
-      // ignore unused value yielded from copy region
-
-      // clear copy region block argument mapping in case it needs to be
-      // re-created with different sources for reuse of the same reduction
-      // decl
-      moduleTranslation.forgetMapping(copyRegion);
-    }
-
-    // Initialize reduction vars
-    builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
-    llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
-    allocaIP =
-        InsertPointTy(allocaIP.getBlock(),
-                      allocaIP.getBlock()->getTerminator()->getIterator());
-
-    builder.restoreIP(allocaIP);
-    SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars());
-    for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
-      if (isByRef[i]) {
-        if (!reductionDecls[i].getAllocRegion().empty())
-          continue;
-
-        // TODO: remove after all users of by-ref are updated to use the alloc
-        // region: Allocate reduction variable (which is a pointer to the real
-        // reduciton variable allocated in the inlined region)
-        byRefVars[i] = builder.CreateAlloca(
-            moduleTranslation.convertType(reductionDecls[i].getType()));
-      }
-    }
-
-    builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
-
-    // insert stores deferred until after all allocas
-    // these store the results of the alloc region into the allocation for the
-    // pointer to the reduction variable
-    for (auto [data, addr] : deferredStores)
-      builder.CreateStore(data, addr);
-
-    for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
-      SmallVector<llvm::Value *> phis;
-
-      // map the block argument
-      mapInitializationArgs(opInst, moduleTranslation, reductionDecls,
-                            reductionVariableMap, i);
-      if (failed(inlineConvertOmpRegions(
-              reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
-              builder, moduleTranslation, &phis)))
-        return llvm::createStringError(
-            "failed to inline `init` ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/118463


More information about the Mlir-commits mailing list