[Mlir-commits] [mlir] [mlir][OpenMP] Lower REDUCTION clause for SECTIONS construct (PR #97859)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Jul 5 14:13:13 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
@llvm/pr-subscribers-mlir-openmp
@llvm/pr-subscribers-flang-openmp
Author: Tom Eccles (tblah)
<details>
<summary>Changes</summary>
This shares code with WsloopOp (the changes to Wsloop should be NFC). OpenMPIRBuilder basically implements SECTIONS as a wsloop over a case statement with each SECTION as a case for a particular loopiv value.
Unfortunately it proved very difficult to share code between these and ParallelOp. ParallelOp does quite a few things differently (doing more work inside of the bodygen callback and laying out blocks differently). Aligning reduction implementations for wsloop and parallel will probably involve functional changes to both, so I won't attempt that in this commit.
---
Patch is 36.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97859.diff
3 Files Affected:
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+297-213)
- (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+2)
- (added) mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir (+168)
``````````diff
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 35971fbacbf91..3dd87632a82c6 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -555,6 +555,239 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
return bodyGenStatus;
}
+/// Allocate space for privatized reduction variables.
+template <typename T>
+static void allocByValReductionVars(
+ T loop, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+ SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+ SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+ DenseMap<Value, llvm::Value *> &reductionVariableMap,
+ llvm::ArrayRef<bool> isByRefs) {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+ builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+
+ for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
+ if (isByRefs[i])
+ continue;
+ llvm::Value *var = builder.CreateAlloca(
+ moduleTranslation.convertType(reductionDecls[i].getType()));
+ moduleTranslation.mapValue(reductionArgs[i], var);
+ privateReductionVariables[i] = var;
+ reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
+ }
+}
+
+/// Map input argument to all reduction initialization regions
+template <typename T>
+static void
+mapInitializationArg(T loop, LLVM::ModuleTranslation &moduleTranslation,
+ SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+ unsigned i) {
+ // map input argument to the initialization region
+ mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
+ Region &initializerRegion = reduction.getInitializerRegion();
+ Block &entry = initializerRegion.front();
+ assert(entry.getNumArguments() == 1 &&
+ "the initialization region has one argument");
+
+ mlir::Value mlirSource = loop.getReductionVars()[i];
+ llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
+ assert(llvmSource && "lookup reduction var");
+ moduleTranslation.mapValue(entry.getArgument(0), llvmSource);
+}
+
+/// Collect reduction info
+template <typename T>
+static void collectReductionInfo(
+ T loop, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+ SmallVectorImpl<OwningReductionGen> &owningReductionGens,
+ SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
+ const ArrayRef<llvm::Value *> privateReductionVariables,
+ SmallVectorImpl<llvm::OpenMPIRBuilder::ReductionInfo> &reductionInfos) {
+ unsigned numReductions = loop.getNumReductionVars();
+
+ for (unsigned i = 0; i < numReductions; ++i) {
+ owningReductionGens.push_back(
+ makeReductionGen(reductionDecls[i], builder, moduleTranslation));
+ owningAtomicReductionGens.push_back(
+ makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
+ }
+
+ // Collect the reduction information.
+ reductionInfos.reserve(numReductions);
+ for (unsigned i = 0; i < numReductions; ++i) {
+ llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
+ if (owningAtomicReductionGens[i])
+ atomicGen = owningAtomicReductionGens[i];
+ llvm::Value *variable =
+ moduleTranslation.lookupValue(loop.getReductionVars()[i]);
+ reductionInfos.push_back(
+ {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
+ privateReductionVariables[i],
+ /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
+ owningReductionGens[i],
+ /*ReductionGenClang=*/nullptr, atomicGen});
+ }
+}
+
+/// handling of DeclareReductionOp's cleanup region
+static LogicalResult
+inlineOmpRegionCleanup(llvm::SmallVectorImpl<Region *> &cleanupRegions,
+ llvm::ArrayRef<llvm::Value *> privateVariables,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::IRBuilderBase &builder, StringRef regionName,
+ bool shouldLoadCleanupRegionArg = true) {
+ for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
+ if (cleanupRegion->empty())
+ continue;
+
+ // map the argument to the cleanup region
+ Block &entry = cleanupRegion->front();
+
+ llvm::Instruction *potentialTerminator =
+ builder.GetInsertBlock()->empty() ? nullptr
+ : &builder.GetInsertBlock()->back();
+ if (potentialTerminator && potentialTerminator->isTerminator())
+ builder.SetInsertPoint(potentialTerminator);
+ llvm::Value *prviateVarValue =
+ shouldLoadCleanupRegionArg
+ ? builder.CreateLoad(
+ moduleTranslation.convertType(entry.getArgument(0).getType()),
+ privateVariables[i])
+ : privateVariables[i];
+
+ moduleTranslation.mapValue(entry.getArgument(0), prviateVarValue);
+
+ if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
+ moduleTranslation)))
+ return failure();
+
+ // clear block argument mapping in case it needs to be re-created with a
+ // different source for another use of the same reduction decl
+ moduleTranslation.forgetMapping(*cleanupRegion);
+ }
+ return success();
+}
+
+// TODO: not used by ParallelOp
+template <class OP>
+static LogicalResult createReductionsAndCleanup(
+ OP op, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+ SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+ ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef) {
+ // Process the reductions if required.
+ if (op.getNumReductionVars() == 0)
+ return success();
+
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+
+ // Create the reduction generators. We need to own them here because
+ // ReductionInfo only accepts references to the generators.
+ SmallVector<OwningReductionGen> owningReductionGens;
+ SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
+ SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
+ collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
+ owningReductionGens, owningAtomicReductionGens,
+ privateReductionVariables, reductionInfos);
+
+ // The call to createReductions below expects the block to have a
+ // terminator. Create an unreachable instruction to serve as terminator
+ // and remove it later.
+ llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
+ builder.SetInsertPoint(tempTerminator);
+ llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
+ ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
+ isByRef, op.getNowait());
+ if (!contInsertPoint.getBlock())
+ return op->emitOpError() << "failed to convert reductions";
+ auto nextInsertionPoint =
+ ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
+ tempTerminator->eraseFromParent();
+ builder.restoreIP(nextInsertionPoint);
+
+ // after the construct, deallocate private reduction variables
+ SmallVector<Region *> reductionRegions;
+ llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
+ [](omp::DeclareReductionOp reductionDecl) {
+ return &reductionDecl.getCleanupRegion();
+ });
+ return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
+ moduleTranslation, builder,
+ "omp.reduction.cleanup");
+ return success();
+}
+
+static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
+ if (!attr)
+ return {};
+ return *attr;
+}
+
+// TODO: not used by omp.parallel
+template <typename OP>
+static LogicalResult allocAndInitializeReductionVars(
+ OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+ SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+ SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+ DenseMap<Value, llvm::Value *> &reductionVariableMap,
+ llvm::ArrayRef<bool> isByRef) {
+ if (op.getNumReductionVars() == 0)
+ return success();
+
+ allocByValReductionVars(op, reductionArgs, builder, moduleTranslation,
+ allocaIP, reductionDecls, privateReductionVariables,
+ reductionVariableMap, isByRef);
+
+ // Before the loop, store the initial values of reductions into reduction
+ // variables. Although this could be done after allocas, we don't want to mess
+ // up with the alloca insertion point.
+ for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+ SmallVector<llvm::Value *> phis;
+
+ // map block argument to initializer region
+ mapInitializationArg(op, moduleTranslation, reductionDecls, i);
+
+ if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
+ "omp.reduction.neutral", builder,
+ moduleTranslation, &phis)))
+ return failure();
+ assert(phis.size() == 1 && "expected one value to be yielded from the "
+ "reduction neutral element declaration region");
+ if (isByRef[i]) {
+ // Allocate reduction variable (which is a pointer to the real reduction
+ // variable allocated in the inlined region)
+ llvm::Value *var = builder.CreateAlloca(
+ moduleTranslation.convertType(reductionDecls[i].getType()));
+ // Store the result of the inlined region to the allocated reduction var
+ // ptr
+ builder.CreateStore(phis[0], var);
+
+ privateReductionVariables[i] = var;
+ moduleTranslation.mapValue(reductionArgs[i], phis[0]);
+ reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
+ } else {
+ // for by-ref case the store is inside of the reduction region
+ builder.CreateStore(phis[0], privateReductionVariables[i]);
+ // the rest was handled in allocByValReductionVars
+ }
+
+ // forget the mapping for the initializer region because we might need a
+ // different mapping if this reduction declaration is re-used for a
+ // different variable
+ moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
+ }
+
+ return success();
+}
+
static LogicalResult
convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
@@ -565,13 +798,38 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
auto sectionsOp = cast<omp::SectionsOp>(opInst);
// TODO: Support the following clauses: private, firstprivate, lastprivate,
- // reduction, allocate
- if (!sectionsOp.getReductionVars().empty() || sectionsOp.getReductions() ||
- !sectionsOp.getAllocateVars().empty() ||
+ // allocate
+ if (!sectionsOp.getAllocateVars().empty() ||
!sectionsOp.getAllocatorsVars().empty())
return emitError(sectionsOp.getLoc())
- << "reduction and allocate clauses are not supported for sections "
- "construct";
+ << "allocate clause is not supported for sections construct";
+
+ llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionVarsByref());
+ assert(isByRef.size() == sectionsOp.getNumReductionVars());
+
+ SmallVector<omp::DeclareReductionOp> reductionDecls;
+ collectReductionDecls(sectionsOp, reductionDecls);
+ llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+ findAllocaInsertPoint(builder, moduleTranslation);
+
+ SmallVector<llvm::Value *> privateReductionVariables(
+ sectionsOp.getNumReductionVars());
+ DenseMap<Value, llvm::Value *> reductionVariableMap;
+
+ MutableArrayRef<BlockArgument> reductionArgs =
+ sectionsOp.getRegion().getArguments();
+
+ if (failed(allocAndInitializeReductionVars(
+ sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
+ reductionDecls, privateReductionVariables, reductionVariableMap,
+ isByRef)))
+ return failure();
+
+ // Store the mapping between reduction variables and their private copies on
+ // ModuleTranslation stack. It can be then recovered when translating
+ // omp.reduce operations in a separate call.
+ LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
+ moduleTranslation, reductionVariableMap);
LogicalResult bodyGenStatus = success();
SmallVector<StorableBodyGenCallbackTy> sectionCBs;
@@ -582,9 +840,24 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
continue;
Region ®ion = sectionOp.getRegion();
- auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus](
- InsertPointTy allocaIP, InsertPointTy codeGenIP) {
+ auto sectionCB = [§ionsOp, ®ion, &builder, &moduleTranslation,
+ &bodyGenStatus](InsertPointTy allocaIP,
+ InsertPointTy codeGenIP) {
builder.restoreIP(codeGenIP);
+
+ // map the omp.section reduction block argument to the omp.sections block
+ // arguments
+ // TODO: this assumes that the only block arguments are reduction
+ // variables
+ assert(region.getNumArguments() ==
+ sectionsOp.getRegion().getNumArguments());
+ for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
+ sectionsOp.getRegion().getArguments(), region.getArguments())) {
+ llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
+ assert(llvmVal);
+ moduleTranslation.mapValue(sectionArg, llvmVal);
+ }
+
convertOmpOpRegions(region, "omp.section.region", builder,
moduleTranslation, bodyGenStatus);
};
@@ -613,13 +886,19 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
// called for variables which have destructors/finalizers.
auto finiCB = [&](InsertPointTy codeGenIP) {};
- llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
sectionsOp.getNowait()));
- return bodyGenStatus;
+
+ if (failed(bodyGenStatus))
+ return bodyGenStatus;
+
+ // Process the reductions if required.
+ return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation,
+ allocaIP, reductionDecls,
+ privateReductionVariables, isByRef);
}
/// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
@@ -769,131 +1048,6 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
ompLoc, allocaIP, bodyCB));
return bodyGenStatus;
}
-
-/// Allocate space for privatized reduction variables.
-template <typename T>
-static void allocByValReductionVars(
- T loop, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation,
- llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
- SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
- SmallVectorImpl<llvm::Value *> &privateReductionVariables,
- DenseMap<Value, llvm::Value *> &reductionVariableMap,
- llvm::ArrayRef<bool> isByRefs) {
- llvm::IRBuilderBase::InsertPointGuard guard(builder);
- builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
-
- for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
- if (isByRefs[i])
- continue;
- llvm::Value *var = builder.CreateAlloca(
- moduleTranslation.convertType(reductionDecls[i].getType()));
- moduleTranslation.mapValue(reductionArgs[i], var);
- privateReductionVariables[i] = var;
- reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
- }
-}
-
-/// Map input argument to all reduction initialization regions
-template <typename T>
-static void
-mapInitializationArg(T loop, LLVM::ModuleTranslation &moduleTranslation,
- SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
- unsigned i) {
- // map input argument to the initialization region
- mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
- Region &initializerRegion = reduction.getInitializerRegion();
- Block &entry = initializerRegion.front();
- assert(entry.getNumArguments() == 1 &&
- "the initialization region has one argument");
-
- mlir::Value mlirSource = loop.getReductionVars()[i];
- llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
- assert(llvmSource && "lookup reduction var");
- moduleTranslation.mapValue(entry.getArgument(0), llvmSource);
-}
-
-/// Collect reduction info
-template <typename T>
-static void collectReductionInfo(
- T loop, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation,
- SmallVector<omp::DeclareReductionOp> &reductionDecls,
- SmallVector<OwningReductionGen> &owningReductionGens,
- SmallVector<OwningAtomicReductionGen> &owningAtomicReductionGens,
- const SmallVector<llvm::Value *> &privateReductionVariables,
- SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> &reductionInfos) {
- unsigned numReductions = loop.getNumReductionVars();
-
- for (unsigned i = 0; i < numReductions; ++i) {
- owningReductionGens.push_back(
- makeReductionGen(reductionDecls[i], builder, moduleTranslation));
- owningAtomicReductionGens.push_back(
- makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
- }
-
- // Collect the reduction information.
- reductionInfos.reserve(numReductions);
- for (unsigned i = 0; i < numReductions; ++i) {
- llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
- if (owningAtomicReductionGens[i])
- atomicGen = owningAtomicReductionGens[i];
- llvm::Value *variable =
- moduleTranslation.lookupValue(loop.getReductionVars()[i]);
- reductionInfos.push_back(
- {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
- privateReductionVariables[i],
- /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
- owningReductionGens[i],
- /*ReductionGenClang=*/nullptr, atomicGen});
- }
-}
-
-/// handling of DeclareReductionOp's cleanup region
-static LogicalResult
-inlineOmpRegionCleanup(llvm::SmallVectorImpl<Region *> &cleanupRegions,
- llvm::ArrayRef<llvm::Value *> privateVariables,
- LLVM::ModuleTranslation &moduleTranslation,
- llvm::IRBuilderBase &builder, StringRef regionName,
- bool shouldLoadCleanupRegionArg = true) {
- for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
- if (cleanupRegion->empty())
- continue;
-
- // map the argument to the cleanup region
- Block &entry = cleanupRegion->front();
-
- llvm::Instruction *potentialTerminator =
- builder.GetInsertBlock()->empty() ? nullptr
- : &builder.GetInsertBlock()->back();
- if (potentialTerminator && potentialTerminator->isTerminator())
- builder.SetInsertPoint(potentialTerminator);
- llvm::Value *prviateVarValue =
- shouldLoadCleanupRegionArg
- ? builder.CreateLoad(
- moduleTranslation.convertType(entry.getArgument(0).getType()),
- privateVariables[i])
- : privateVariables[i];
-
- moduleTranslation.mapValue(entry.getArgument(0), prviateVarValue);
-
- if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
- ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/97859
More information about the Mlir-commits
mailing list