[Mlir-commits] [mlir] [OpenMP][OMPIRBuilder] Add delayed privatization support for `wsloop` (PR #118463)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Dec 3 02:26:48 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-openmp
Author: Kareem Ergawy (ergawy)
<details>
<summary>Changes</summary>
Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for
delayed privatization. This also refactors a few bit of code to isolate
the logic needed for `firstprivate` initialization in a shared util that
can be used across constructs that need it.
Parent PR: https://github.com/llvm/llvm-project/pull/118447. Only latest commit is relevant for this PR.
---
Patch is 29.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118463.diff
6 Files Affected:
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+209-217)
- (modified) mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir (+1-1)
- (modified) mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir (+1-1)
- (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (-19)
- (added) mlir/test/Target/LLVMIR/openmp-wsloop-private.mlir (+79)
- (modified) mlir/test/Target/LLVMIR/openmp-wsloop-reduction-cleanup.mlir (+2-2)
``````````diff
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 35b0633a04a352..e189480e6b07d3 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -268,7 +268,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
checkAllocate(op, result);
checkLinear(op, result);
checkOrder(op, result);
- checkPrivate(op, result);
})
.Case([&](omp::ParallelOp op) { checkAllocate(op, result); })
.Case([&](omp::SimdOp op) {
@@ -1030,6 +1029,99 @@ mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation,
}
}
+template <typename OP>
+static LogicalResult
+initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
+ llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::BasicBlock *latestAllocaBlock,
+ SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+ SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+ DenseMap<Value, llvm::Value *> &reductionVariableMap,
+ llvm::ArrayRef<bool> isByRef,
+ SmallVectorImpl<DeferredStore> &deferredStores) {
+ if (op.getNumReductionVars() == 0)
+ return success();
+
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+
+ builder.SetInsertPoint(latestAllocaBlock->getTerminator());
+ llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
+ auto allocaIP = llvm::IRBuilderBase::InsertPoint(
+ latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
+ builder.restoreIP(allocaIP);
+ SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
+
+ for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+ if (isByRef[i]) {
+ if (!reductionDecls[i].getAllocRegion().empty())
+ continue;
+
+ // TODO: remove after all users of by-ref are updated to use the alloc
+ // region: Allocate reduction variable (which is a pointer to the real
+ // reduciton variable allocated in the inlined region)
+ byRefVars[i] = builder.CreateAlloca(
+ moduleTranslation.convertType(reductionDecls[i].getType()));
+ }
+ }
+
+ builder.SetInsertPoint(&*initBlock->getFirstNonPHIOrDbgOrAlloca());
+
+ // store result of the alloc region to the allocated pointer to the real
+ // reduction variable
+ for (auto [data, addr] : deferredStores)
+ builder.CreateStore(data, addr);
+
+ // Before the loop, store the initial values of reductions into reduction
+ // variables. Although this could be done after allocas, we don't want to mess
+ // up with the alloca insertion point.
+ for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+ SmallVector<llvm::Value *, 1> phis;
+
+ // map block argument to initializer region
+ mapInitializationArgs(op, moduleTranslation, reductionDecls,
+ reductionVariableMap, i);
+
+ if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
+ "omp.reduction.neutral", builder,
+ moduleTranslation, &phis)))
+ return failure();
+
+ assert(phis.size() == 1 && "expected one value to be yielded from the "
+ "reduction neutral element declaration region");
+
+ builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+
+ if (isByRef[i]) {
+ if (!reductionDecls[i].getAllocRegion().empty())
+ // done in allocReductionVars
+ continue;
+
+ // TODO: this path can be removed once all users of by-ref are updated to
+ // use an alloc region
+
+ // Store the result of the inlined region to the allocated reduction var
+ // ptr
+ builder.CreateStore(phis[0], byRefVars[i]);
+
+ privateReductionVariables[i] = byRefVars[i];
+ moduleTranslation.mapValue(reductionArgs[i], phis[0]);
+ reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
+ } else {
+ // for by-ref case the store is inside of the reduction region
+ builder.CreateStore(phis[0], privateReductionVariables[i]);
+ // the rest was handled in allocByValReductionVars
+ }
+
+ // forget the mapping for the initializer region because we might need a
+ // different mapping if this reduction declaration is re-used for a
+ // different variable
+ moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
+ }
+
+ return success();
+}
+
/// Collect reduction info
template <typename T>
static void collectReductionInfo(
@@ -1183,6 +1275,7 @@ static LogicalResult allocAndInitializeReductionVars(
if (op.getNumReductionVars() == 0)
return success();
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
SmallVector<DeferredStore> deferredStores;
if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
@@ -1191,59 +1284,10 @@ static LogicalResult allocAndInitializeReductionVars(
deferredStores, isByRef)))
return failure();
- // store result of the alloc region to the allocated pointer to the real
- // reduction variable
- for (auto [data, addr] : deferredStores)
- builder.CreateStore(data, addr);
-
- // Before the loop, store the initial values of reductions into reduction
- // variables. Although this could be done after allocas, we don't want to mess
- // up with the alloca insertion point.
- for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
- SmallVector<llvm::Value *, 1> phis;
-
- // map block argument to initializer region
- mapInitializationArgs(op, moduleTranslation, reductionDecls,
- reductionVariableMap, i);
-
- if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
- "omp.reduction.neutral", builder,
- moduleTranslation, &phis)))
- return failure();
- assert(phis.size() == 1 && "expected one value to be yielded from the "
- "reduction neutral element declaration region");
- if (isByRef[i]) {
- if (!reductionDecls[i].getAllocRegion().empty())
- // done in allocReductionVars
- continue;
-
- // TODO: this path can be removed once all users of by-ref are updated to
- // use an alloc region
-
- // Allocate reduction variable (which is a pointer to the real reduction
- // variable allocated in the inlined region)
- llvm::Value *var = builder.CreateAlloca(
- moduleTranslation.convertType(reductionDecls[i].getType()));
- // Store the result of the inlined region to the allocated reduction var
- // ptr
- builder.CreateStore(phis[0], var);
-
- privateReductionVariables[i] = var;
- moduleTranslation.mapValue(reductionArgs[i], phis[0]);
- reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
- } else {
- // for by-ref case the store is inside of the reduction region
- builder.CreateStore(phis[0], privateReductionVariables[i]);
- // the rest was handled in allocByValReductionVars
- }
-
- // forget the mapping for the initializer region because we might need a
- // different mapping if this reduction declaration is re-used for a
- // different variable
- moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
- }
-
- return success();
+ return initReductionVars(op, reductionArgs, builder, moduleTranslation,
+ allocaIP.getBlock(), reductionDecls,
+ privateReductionVariables, reductionVariableMap,
+ isByRef, deferredStores);
}
/// Allocate delayed private variables. Returns the basic block which comes
@@ -1257,6 +1301,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
MutableArrayRef<mlir::Value> mlirPrivateVars,
llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
// Allocate private vars
llvm::BranchInst *allocaTerminator =
llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
@@ -1318,6 +1363,63 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
return afterAllocas;
}
+static LogicalResult
+initFirstPrivateVars(llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ SmallVectorImpl<mlir::Value> &mlirPrivateVars,
+ SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
+ SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
+ llvm::BasicBlock *afterAllocas) {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+ // Apply copy region for firstprivate.
+ bool needsFirstprivate =
+ llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
+ return privOp.getDataSharingType() ==
+ omp::DataSharingClauseType::FirstPrivate;
+ });
+
+ if (needsFirstprivate) {
+ // Find the end of the allocation blocks
+ builder.SetInsertPoint(
+ afterAllocas->getSinglePredecessor()->getTerminator());
+ llvm::BasicBlock *copyBlock =
+ splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
+ builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
+ }
+
+ for (auto [decl, mlirVar, llvmVar] :
+ llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
+ if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
+ continue;
+
+ // copyRegion implements `lhs = rhs`
+ Region ©Region = decl.getCopyRegion();
+
+ // map copyRegion rhs arg
+ llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
+ assert(nonPrivateVar);
+ moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
+
+ // map copyRegion lhs arg
+ moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
+
+ // in-place convert copy region
+ builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+ if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
+ moduleTranslation)))
+ return decl.emitError("failed to inline `copy` region of `omp.private`");
+
+ // ignore unused value yielded from copy region
+
+ // clear copy region block argument mapping in case it needs to be
+ // re-created with different sources for reuse of the same reduction
+ // decl
+ moduleTranslation.forgetMapping(copyRegion);
+ }
+
+ return success();
+}
+
static LogicalResult
convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
@@ -1577,50 +1679,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
if (handleError(afterAllocas, *taskOp).failed())
return llvm::make_error<PreviouslyReportedError>();
- // Apply copy region for firstprivate
- bool needsFirstPrivate =
- llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
- return privOp.getDataSharingType() ==
- omp::DataSharingClauseType::FirstPrivate;
- });
- if (needsFirstPrivate) {
- // Find the end of the allocation blocks
- assert(afterAllocas.get()->getSinglePredecessor());
- builder.SetInsertPoint(
- afterAllocas.get()->getSinglePredecessor()->getTerminator());
- llvm::BasicBlock *copyBlock =
- splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
- builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
- }
- for (auto [decl, mlirVar, llvmVar] :
- llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
- if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
- continue;
-
- // copyRegion implements `lhs = rhs`
- Region ©Region = decl.getCopyRegion();
-
- // map copyRegion rhs arg
- llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
- assert(nonPrivateVar);
- moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
-
- // map copyRegion lhs arg
- moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
-
- // in-place convert copy region
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
- builder, moduleTranslation)))
- return llvm::createStringError(
- "failed to inline `copy` region of an `omp.private` op in taskOp");
-
- // ignore unused value yielded from copy region
-
- // clear copy region block argument mapping in case it needs to be
- // re-created with different source for reuse of the same reduction decl
- moduleTranslation.forgetMapping(copyRegion);
- }
+ if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+ llvmPrivateVars, privateDecls,
+ afterAllocas.get())))
+ return llvm::make_error<PreviouslyReportedError>();
// translate the body of the task:
builder.restoreIP(codegenIP);
@@ -1732,6 +1794,18 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
}
+ MutableArrayRef<BlockArgument> privateBlockArgs =
+ cast<omp::BlockArgOpenMPOpInterface>(*wsloopOp).getPrivateBlockArgs();
+ SmallVector<mlir::Value> mlirPrivateVars;
+ SmallVector<llvm::Value *> llvmPrivateVars;
+ SmallVector<omp::PrivateClauseOp> privateDecls;
+ mlirPrivateVars.reserve(privateBlockArgs.size());
+ llvmPrivateVars.reserve(privateBlockArgs.size());
+ collectPrivatizationDecls(wsloopOp, privateDecls);
+
+ for (mlir::Value privateVar : wsloopOp.getPrivateVars())
+ mlirPrivateVars.push_back(privateVar);
+
SmallVector<omp::DeclareReductionOp> reductionDecls;
collectReductionDecls(wsloopOp, reductionDecls);
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
@@ -1739,15 +1813,37 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<llvm::Value *> privateReductionVariables(
wsloopOp.getNumReductionVars());
+
+ llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
+ builder, moduleTranslation, privateBlockArgs, privateDecls,
+ mlirPrivateVars, llvmPrivateVars, allocaIP);
+ if (handleError(afterAllocas, opInst).failed())
+ return failure();
+
DenseMap<Value, llvm::Value *> reductionVariableMap;
MutableArrayRef<BlockArgument> reductionArgs =
cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
- if (failed(allocAndInitializeReductionVars(
- wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
- reductionDecls, privateReductionVariables, reductionVariableMap,
- isByRef)))
+ SmallVector<DeferredStore> deferredStores;
+
+ if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
+ moduleTranslation, allocaIP, reductionDecls,
+ privateReductionVariables, reductionVariableMap,
+ deferredStores, isByRef)))
+ return failure();
+
+ if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+ llvmPrivateVars, privateDecls,
+ afterAllocas.get())))
+ return failure();
+
+ assert(afterAllocas.get()->getSinglePredecessor());
+ if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
+ moduleTranslation,
+ afterAllocas.get()->getSinglePredecessor(),
+ reductionDecls, privateReductionVariables,
+ reductionVariableMap, isByRef, deferredStores)))
return failure();
// TODO: Replace this with proper composite translation support.
@@ -1914,122 +2010,18 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
deferredStores, isByRef)))
return llvm::make_error<PreviouslyReportedError>();
- // Apply copy region for firstprivate.
- bool needsFirstprivate =
- llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
- return privOp.getDataSharingType() ==
- omp::DataSharingClauseType::FirstPrivate;
- });
- if (needsFirstprivate) {
- // Find the end of the allocation blocks
- assert(afterAllocas.get()->getSinglePredecessor());
- builder.SetInsertPoint(
- afterAllocas.get()->getSinglePredecessor()->getTerminator());
- llvm::BasicBlock *copyBlock =
- splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
- builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
- }
- for (auto [decl, mlirVar, llvmVar] :
- llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
- if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
- continue;
-
- // copyRegion implements `lhs = rhs`
- Region ©Region = decl.getCopyRegion();
-
- // map copyRegion rhs arg
- llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
- assert(nonPrivateVar);
- moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
-
- // map copyRegion lhs arg
- moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
-
- // in-place convert copy region
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
- builder, moduleTranslation)))
- return llvm::createStringError(
- "failed to inline `copy` region of `omp.private`");
-
- // ignore unused value yielded from copy region
-
- // clear copy region block argument mapping in case it needs to be
- // re-created with different sources for reuse of the same reduction
- // decl
- moduleTranslation.forgetMapping(copyRegion);
- }
-
- // Initialize reduction vars
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
- allocaIP =
- InsertPointTy(allocaIP.getBlock(),
- allocaIP.getBlock()->getTerminator()->getIterator());
-
- builder.restoreIP(allocaIP);
- SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars());
- for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
- if (isByRef[i]) {
- if (!reductionDecls[i].getAllocRegion().empty())
- continue;
-
- // TODO: remove after all users of by-ref are updated to use the alloc
- // region: Allocate reduction variable (which is a pointer to the real
- // reduciton variable allocated in the inlined region)
- byRefVars[i] = builder.CreateAlloca(
- moduleTranslation.convertType(reductionDecls[i].getType()));
- }
- }
-
- builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
-
- // insert stores deferred until after all allocas
- // these store the results of the alloc region into the allocation for the
- // pointer to the reduction variable
- for (auto [data, addr] : deferredStores)
- builder.CreateStore(data, addr);
-
- for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
- SmallVector<llvm::Value *> phis;
-
- // map the block argument
- mapInitializationArgs(opInst, moduleTranslation, reductionDecls,
- reductionVariableMap, i);
- if (failed(inlineConvertOmpRegions(
- reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
- builder, moduleTranslation, &phis)))
- return llvm::createStringError(
- "failed to inline `init` ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/118463
More information about the Mlir-commits
mailing list