[Mlir-commits] [mlir] de72243 - [openmp][mlir] Parallel reduction LLVM IR generation
Ethan Luis McDonough
llvmlistbot at llvm.org
Tue Aug 15 22:58:56 PDT 2023
Author: Ethan Luis McDonough
Date: 2023-08-16T00:58:49-05:00
New Revision: de7224399acdf2c7e9d452a1e29e63b8ff92887c
URL: https://github.com/llvm/llvm-project/commit/de7224399acdf2c7e9d452a1e29e63b8ff92887c
DIFF: https://github.com/llvm/llvm-project/commit/de7224399acdf2c7e9d452a1e29e63b8ff92887c.diff
LOG: [openmp][mlir] Parallel reduction LLVM IR generation
This patch extends the existing WsLoop reduction IR generation to parallel blocks.
Reviewed By: kiranchandramohan
Differential Revision: https://reviews.llvm.org/D155157
Added:
Modified:
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
mlir/test/Target/LLVMIR/openmp-reduction.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index d096791490cce9..5bd26523593895 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -188,6 +188,10 @@ def ParallelOp : OpenMP_Op<"parallel", [
let builders = [
OpBuilder<(ins CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>
];
+ let extraClassDeclaration = [{
+ /// Returns the number of reduction variables.
+ unsigned getNumReductionVars() { return getReductionVars().size(); }
+ }];
let assemblyFormat = [{
oilist( `reduction` `(`
custom<ReductionVarList>(
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 7e955b91e19c5e..cae307000b12c2 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -253,64 +253,6 @@ static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
llvm_unreachable("Unknown ClauseProcBindKind kind");
}
-/// Converts the OpenMP parallel operation to LLVM IR.
-static LogicalResult
-convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) {
- using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
- // TODO: support error propagation in OpenMPIRBuilder and use it instead of
- // relying on captured variables.
- LogicalResult bodyGenStatus = success();
-
- auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
- // Save the alloca insertion point on ModuleTranslation stack for use in
- // nested regions.
- LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
- moduleTranslation, allocaIP);
-
- // ParallelOp has only one region associated with it.
- builder.restoreIP(codeGenIP);
- convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
- moduleTranslation, bodyGenStatus);
- };
-
- // TODO: Perform appropriate actions according to the data-sharing
- // attribute (shared, private, firstprivate, ...) of variables.
- // Currently defaults to shared.
- auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
- llvm::Value &, llvm::Value &vPtr,
- llvm::Value *&replacementValue) -> InsertPointTy {
- replacementValue = &vPtr;
-
- return codeGenIP;
- };
-
- // TODO: Perform finalization actions for variables. This has to be
- // called for variables which have destructors/finalizers.
- auto finiCB = [&](InsertPointTy codeGenIP) {};
-
- llvm::Value *ifCond = nullptr;
- if (auto ifExprVar = opInst.getIfExprVar())
- ifCond = moduleTranslation.lookupValue(ifExprVar);
- llvm::Value *numThreads = nullptr;
- if (auto numThreadsVar = opInst.getNumThreadsVar())
- numThreads = moduleTranslation.lookupValue(numThreadsVar);
- auto pbKind = llvm::omp::OMP_PROC_BIND_default;
- if (auto bind = opInst.getProcBindVal())
- pbKind = getProcBindKind(*bind);
- // TODO: Is the Parallel construct cancellable?
- bool isCancellable = false;
-
- llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
- llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
- builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel(
- ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind,
- isCancellable));
-
- return bodyGenStatus;
-}
-
/// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
@@ -383,26 +325,56 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
/// Returns a reduction declaration that corresponds to the given reduction
/// operation in the given container. Currently only supports reductions inside
-/// WsLoopOp but can be easily extended.
-static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,
- omp::ReductionOp reduction) {
- SymbolRefAttr reductionSymbol;
+/// WsLoopOp and ParallelOp but can be easily extended as long as the given
+/// construct implements getNumReductionVars.
+template <typename T>
+static std::optional<omp::ReductionDeclareOp>
+findReductionDeclInContainer(T container, omp::ReductionOp reduction) {
for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
if (container.getReductionVars()[i] != reduction.getAccumulator())
continue;
- reductionSymbol = cast<SymbolRefAttr>((*container.getReductions())[i]);
- break;
+
+ SymbolRefAttr reductionSymbol =
+ cast<SymbolRefAttr>((*container.getReductions())[i]);
+ auto declareOp =
+ SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
+ container, reductionSymbol);
+ return declareOp;
+ }
+ return std::nullopt;
+}
+
+/// Searches for a reduction in a provided region and the regions
+/// it is nested in
+static omp::ReductionDeclareOp findReductionDecl(Operation &containerOp,
+ omp::ReductionOp reduction) {
+ std::optional<omp::ReductionDeclareOp> declareOp = std::nullopt;
+ Operation *container = &containerOp;
+
+ while (!declareOp.has_value() && container) {
+ // Check if current container is supported for reductions searches
+ if (auto par = dyn_cast<omp::ParallelOp>(*container)) {
+ declareOp = findReductionDeclInContainer(par, reduction);
+ } else if (auto loop = dyn_cast<omp::WsLoopOp>(*container)) {
+ declareOp = findReductionDeclInContainer(loop, reduction);
+ } else {
+ break;
+ }
+
+ // See if we can search parent for reductions as well
+ container = containerOp.getParentOp();
}
- assert(reductionSymbol &&
+
+ assert(declareOp.has_value() &&
"reduction operation must be associated with a declaration");
- return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
- container, reductionSymbol);
+ return *declareOp;
}
/// Populates `reductions` with reduction declarations used in the given loop.
+template <typename T>
static void
-collectReductionDecls(omp::WsLoopOp loop,
+collectReductionDecls(T loop,
SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {
std::optional<ArrayAttr> attr = loop.getReductions();
if (!attr)
@@ -760,6 +732,62 @@ convertOmpTaskgroupOp(omp::TaskGroupOp tgOp, llvm::IRBuilderBase &builder,
return bodyGenStatus;
}
+/// Allocate space for privatized reduction variables.
+template <typename T>
+static void
+allocReductionVars(T loop, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+ SmallVector<omp::ReductionDeclareOp> &reductionDecls,
+ SmallVector<llvm::Value *> &privateReductionVariables,
+ DenseMap<Value, llvm::Value *> &reductionVariableMap) {
+ unsigned numReductions = loop.getNumReductionVars();
+ privateReductionVariables.reserve(numReductions);
+ if (numReductions != 0) {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+ builder.restoreIP(allocaIP);
+ for (unsigned i = 0; i < numReductions; ++i) {
+ llvm::Value *var = builder.CreateAlloca(
+ moduleTranslation.convertType(reductionDecls[i].getType()));
+ privateReductionVariables.push_back(var);
+ reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
+ }
+ }
+}
+
+/// Collect reduction info
+template <typename T>
+static void collectReductionInfo(
+ T loop, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ SmallVector<omp::ReductionDeclareOp> &reductionDecls,
+ SmallVector<OwningReductionGen> &owningReductionGens,
+ SmallVector<OwningAtomicReductionGen> &owningAtomicReductionGens,
+ const SmallVector<llvm::Value *> &privateReductionVariables,
+ SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> &reductionInfos) {
+ unsigned numReductions = loop.getNumReductionVars();
+
+ for (unsigned i = 0; i < numReductions; ++i) {
+ owningReductionGens.push_back(
+ makeReductionGen(reductionDecls[i], builder, moduleTranslation));
+ owningAtomicReductionGens.push_back(
+ makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
+ }
+
+ // Collect the reduction information.
+ reductionInfos.reserve(numReductions);
+ for (unsigned i = 0; i < numReductions; ++i) {
+ llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
+ if (owningAtomicReductionGens[i])
+ atomicGen = owningAtomicReductionGens[i];
+ llvm::Value *variable =
+ moduleTranslation.lookupValue(loop.getReductionVars()[i]);
+ reductionInfos.push_back(
+ {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
+ privateReductionVariables[i], owningReductionGens[i], atomicGen});
+ }
+}
+
/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
@@ -788,21 +816,10 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);
- // Allocate space for privatized reduction variables.
SmallVector<llvm::Value *> privateReductionVariables;
DenseMap<Value, llvm::Value *> reductionVariableMap;
- unsigned numReductions = loop.getNumReductionVars();
- privateReductionVariables.reserve(numReductions);
- if (numReductions != 0) {
- llvm::IRBuilderBase::InsertPointGuard guard(builder);
- builder.restoreIP(allocaIP);
- for (unsigned i = 0; i < numReductions; ++i) {
- llvm::Value *var = builder.CreateAlloca(
- moduleTranslation.convertType(reductionDecls[i].getType()));
- privateReductionVariables.push_back(var);
- reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
- }
- }
+ allocReductionVars(loop, builder, moduleTranslation, allocaIP, reductionDecls,
+ privateReductionVariables, reductionVariableMap);
// Store the mapping between reduction variables and their private copies on
// ModuleTranslation stack. It can be then recovered when translating
@@ -813,7 +830,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
// Before the loop, store the initial values of reductions into reduction
// variables. Although this could be done after allocas, we don't want to mess
// up with the alloca insertion point.
- for (unsigned i = 0; i < numReductions; ++i) {
+ for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) {
SmallVector<llvm::Value *> phis;
if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
"omp.reduction.neutral", builder,
@@ -908,33 +925,17 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
builder.restoreIP(afterIP);
// Process the reductions if required.
- if (numReductions == 0)
+ if (loop.getNumReductionVars() == 0)
return success();
// Create the reduction generators. We need to own them here because
// ReductionInfo only accepts references to the generators.
SmallVector<OwningReductionGen> owningReductionGens;
SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
- for (unsigned i = 0; i < numReductions; ++i) {
- owningReductionGens.push_back(
- makeReductionGen(reductionDecls[i], builder, moduleTranslation));
- owningAtomicReductionGens.push_back(
- makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
- }
-
- // Collect the reduction information.
SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
- reductionInfos.reserve(numReductions);
- for (unsigned i = 0; i < numReductions; ++i) {
- llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
- if (owningAtomicReductionGens[i])
- atomicGen = owningAtomicReductionGens[i];
- llvm::Value *variable =
- moduleTranslation.lookupValue(loop.getReductionVars()[i]);
- reductionInfos.push_back(
- {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
- privateReductionVariables[i], owningReductionGens[i], atomicGen});
- }
+ collectReductionInfo(loop, builder, moduleTranslation, reductionDecls,
+ owningReductionGens, owningAtomicReductionGens,
+ privateReductionVariables, reductionInfos);
// The call to createReductions below expects the block to have a
// terminator. Create an unreachable instruction to serve as terminator
@@ -954,6 +955,128 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}
+/// Converts the OpenMP parallel operation to LLVM IR.
+static LogicalResult
+convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ // TODO: support error propagation in OpenMPIRBuilder and use it instead of
+ // relying on captured variables.
+ LogicalResult bodyGenStatus = success();
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+
+ auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
+ // Collect reduction declarations
+ SmallVector<omp::ReductionDeclareOp> reductionDecls;
+ collectReductionDecls(opInst, reductionDecls);
+
+ // Allocate reduction vars
+ SmallVector<llvm::Value *> privateReductionVariables;
+ DenseMap<Value, llvm::Value *> reductionVariableMap;
+ allocReductionVars(opInst, builder, moduleTranslation, allocaIP,
+ reductionDecls, privateReductionVariables,
+ reductionVariableMap);
+
+ // Store the mapping between reduction variables and their private copies on
+ // ModuleTranslation stack. It can be then recovered when translating
+ // omp.reduce operations in a separate call.
+ LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
+ moduleTranslation, reductionVariableMap);
+
+ // Initialize reduction vars
+ builder.restoreIP(allocaIP);
+ for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
+ SmallVector<llvm::Value *> phis;
+ if (failed(inlineConvertOmpRegions(
+ reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
+ builder, moduleTranslation, &phis)))
+ bodyGenStatus = failure();
+ assert(phis.size() == 1 &&
+ "expected one value to be yielded from the "
+ "reduction neutral element declaration region");
+ builder.restoreIP(allocaIP);
+ builder.CreateStore(phis[0], privateReductionVariables[i]);
+ }
+
+ // Save the alloca insertion point on ModuleTranslation stack for use in
+ // nested regions.
+ LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+ moduleTranslation, allocaIP);
+
+ // ParallelOp has only one region associated with it.
+ builder.restoreIP(codeGenIP);
+ auto regionBlock =
+ convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
+ moduleTranslation, bodyGenStatus);
+
+ // Process the reductions if required.
+ if (opInst.getNumReductionVars() > 0) {
+ // Collect reduction info
+ SmallVector<OwningReductionGen> owningReductionGens;
+ SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
+ SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
+ collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
+ owningReductionGens, owningAtomicReductionGens,
+ privateReductionVariables, reductionInfos);
+
+ // Move to region cont block
+ builder.SetInsertPoint(regionBlock->getTerminator());
+
+ // Generate reductions from info
+ llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
+ builder.SetInsertPoint(tempTerminator);
+
+ llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
+ ompBuilder->createReductions(builder.saveIP(), allocaIP,
+ reductionInfos, false);
+ if (!contInsertPoint.getBlock()) {
+ bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
+ return;
+ }
+
+ tempTerminator->eraseFromParent();
+ builder.restoreIP(contInsertPoint);
+ }
+ };
+
+ // TODO: Perform appropriate actions according to the data-sharing
+ // attribute (shared, private, firstprivate, ...) of variables.
+ // Currently defaults to shared.
+ auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
+ llvm::Value &, llvm::Value &vPtr,
+ llvm::Value *&replacementValue) -> InsertPointTy {
+ replacementValue = &vPtr;
+
+ return codeGenIP;
+ };
+
+ // TODO: Perform finalization actions for variables. This has to be
+ // called for variables which have destructors/finalizers.
+ auto finiCB = [&](InsertPointTy codeGenIP) {};
+
+ llvm::Value *ifCond = nullptr;
+ if (auto ifExprVar = opInst.getIfExprVar())
+ ifCond = moduleTranslation.lookupValue(ifExprVar);
+ llvm::Value *numThreads = nullptr;
+ if (auto numThreadsVar = opInst.getNumThreadsVar())
+ numThreads = moduleTranslation.lookupValue(numThreadsVar);
+ auto pbKind = llvm::omp::OMP_PROC_BIND_default;
+ if (auto bind = opInst.getProcBindVal())
+ pbKind = getProcBindKind(*bind);
+ // TODO: Is the Parallel construct cancellable?
+ bool isCancellable = false;
+
+ llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+ findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+
+ builder.restoreIP(
+ ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
+ ifCond, numThreads, pbKind, isCancellable));
+
+ return bodyGenStatus;
+}
+
/// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
@@ -1286,15 +1409,20 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
/// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
/// mapping between reduction variables and their private equivalents to have
/// been stored on the ModuleTranslation stack. Currently only supports
-/// reduction within WsLoopOp, but can be easily extended.
+/// reduction within WsLoopOp and ParallelOp, but can be easily extended.
static LogicalResult
convertOmpReductionOp(omp::ReductionOp reductionOp,
llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
// Find the declaration that corresponds to the reduction op.
- auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();
- omp::ReductionDeclareOp declaration =
- findReductionDecl(reductionContainer, reductionOp);
+ omp::ReductionDeclareOp declaration;
+ Operation *reductionParent = reductionOp->getParentOp();
+ if (dyn_cast<omp::ParallelOp>(reductionParent) ||
+ dyn_cast<omp::WsLoopOp>(reductionParent)) {
+ declaration = findReductionDecl(*reductionParent, reductionOp);
+ } else {
+ llvm_unreachable("Unhandled reduction container");
+ }
assert(declaration && "could not find reduction declaration");
// Retrieve the mapping between reduction variables and their private
@@ -1302,11 +1430,13 @@ convertOmpReductionOp(omp::ReductionOp reductionOp,
const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
[&](const OpenMPVarMappingStackFrame &frame) {
- reductionVariableMap = &frame.mapping;
- return WalkResult::interrupt();
+ if (frame.mapping.contains(reductionOp.getAccumulator())) {
+ reductionVariableMap = &frame.mapping;
+ return WalkResult::interrupt();
+ }
+ return WalkResult::advance();
});
assert(reductionVariableMap && "couldn't find private reduction variables");
-
// Translate the reduction operation by emitting the body of the corresponding
// reduction declaration.
Region &reductionRegion = declaration.getReductionRegion();
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
index c17f39febd83d8..93ab578df9e4e8 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
@@ -416,3 +416,146 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
// CHECK: define internal void @[[REDFUNC]]
// CHECK: fadd float
// CHECK: fmul float
+
+// -----
+
+omp.reduction.declare @add_f32 : f32
+init {
+^bb0(%arg: f32):
+ %0 = llvm.mlir.constant(0.0 : f32) : f32
+ omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+ %1 = llvm.fadd %arg0, %arg1 : f32
+ omp.yield (%1 : f32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
+ %2 = llvm.load %arg3 : !llvm.ptr -> f32
+ llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
+ omp.yield
+}
+
+// CHECK-LABEL: @simple_reduction_parallel
+llvm.func @simple_reduction_parallel() {
+ %c1 = llvm.mlir.constant(1 : i32) : i32
+ %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
+ omp.parallel reduction(@add_f32 -> %0 : !llvm.ptr) {
+ %1 = llvm.mlir.constant(2.0 : f32) : f32
+ omp.reduction %1, %0 : f32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+}
+
+// Call to the outlined function.
+// CHECK: call void {{.*}} @__kmpc_fork_call
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %[[PRIVATE:.+]] = alloca float
+// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]
+
+// Update of the private variable
+// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
+// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
+// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Atomic reduction.
+// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
+// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]
+
+// Non-atomic reduction:
+// CHECK: fadd float
+// CHECK: call void @__kmpc_end_reduce
+// CHECK: br label %[[FINALIZE:.+]]
+
+// CHECK: [[FINALIZE]]:
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: fadd float
+
+// -----
+
+omp.reduction.declare @add_i32 : i32
+init {
+^bb0(%arg: i32):
+ %0 = llvm.mlir.constant(0 : i32) : i32
+ omp.yield (%0 : i32)
+}
+combiner {
+^bb1(%arg0: i32, %arg1: i32):
+ %1 = llvm.add %arg0, %arg1 : i32
+ omp.yield (%1 : i32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
+ %2 = llvm.load %arg3 : !llvm.ptr -> i32
+ llvm.atomicrmw add %arg2, %2 monotonic : !llvm.ptr, i32
+ omp.yield
+}
+
+// CHECK-LABEL: @parallel_nested_workshare_reduction
+llvm.func @parallel_nested_workshare_reduction(%ub : i64) {
+ %c1 = llvm.mlir.constant(1 : i32) : i32
+ %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
+
+ %lb = llvm.mlir.constant(1 : i64) : i64
+ %step = llvm.mlir.constant(1 : i64) : i64
+
+ omp.parallel reduction(@add_i32 -> %0 : !llvm.ptr) {
+ omp.wsloop for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ %ival = llvm.trunc %iv : i64 to i32
+ omp.reduction %ival, %0 : i32, !llvm.ptr
+ omp.yield
+ }
+ omp.terminator
+ }
+
+ llvm.return
+}
+
+// Call to the outlined function.
+// CHECK: call void {{.*}} @__kmpc_fork_call
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %[[PRIVATE:[0-9]+]] = alloca i32
+// CHECK: store i32 0, ptr %[[PRIVATE]]
+
+// Loop exit:
+// CHECK: call void @__kmpc_barrier
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Atomic reduction:
+// CHECK: %[[PARTIAL:.+]] = load i32, ptr %[[PRIVATE]]
+// CHECK: atomicrmw add ptr %{{.*}}, i32 %[[PARTIAL]]
+
+// Non-atomic reduction:
+// CHECK: add i32
+// CHECK: call void @__kmpc_end_reduce
+
+// Update of the private variable using the reduction region
+// (the body block currently comes after all the other blocks).
+// CHECK: %[[PARTIAL:.+]] = load i32, ptr %[[PRIVATE]]
+// CHECK: %[[UPDATED:.+]] = add i32 %[[PARTIAL]], {{.*}}
+// CHECK: store i32 %[[UPDATED]], ptr %[[PRIVATE]]
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: add i32
More information about the Mlir-commits
mailing list