[Mlir-commits] [mlir] 0e9198c - [MLIR][OpenMP] Add support for basic SIMD construct
Arnamoy Bhattacharyya
llvmlistbot at llvm.org
Tue Mar 15 06:41:27 PDT 2022
Author: Arnamoy Bhattacharyya
Date: 2022-03-15T09:41:04-04:00
New Revision: 0e9198c3e95adced7213999dcd14daed4acfd16c
URL: https://github.com/llvm/llvm-project/commit/0e9198c3e95adced7213999dcd14daed4acfd16c
DIFF: https://github.com/llvm/llvm-project/commit/0e9198c3e95adced7213999dcd14daed4acfd16c.diff
LOG: [MLIR][OpenMP] Add support for basic SIMD construct
Patch adds a new operation for the SIMD construct. The op is designed to be very similar to the existing `wsloop` operation, so that the `CanonicalLoopInfo` of `OpenMPIRBuilder` can be used.
Reviewed By: shraiysh
Differential Revision: https://reviews.llvm.org/D118065
Added:
Modified:
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
mlir/test/Dialect/OpenMP/invalid.mlir
mlir/test/Dialect/OpenMP/ops.mlir
mlir/test/Target/LLVMIR/openmp-llvm.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index aedd1e5fe95d1..0cf991855cb79 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -308,9 +308,53 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
let hasVerifier = 1;
}
+//===----------------------------------------------------------------------===//
+// Simd construct [2.9.3.1]
+//===----------------------------------------------------------------------===//
+
+def SimdLoopOp : OpenMP_Op<"simdloop", [AttrSizedOperandSegments,
+ AllTypesMatch<["lowerBound", "upperBound", "step"]>]> {
+ let summary = "simd loop construct";
+ let description = [{
+ The simd construct can be applied to a loop to indicate that the loop can be
+ transformed into a SIMD loop (that is, multiple iterations of the loop can
+ be executed concurrently using SIMD instructions).. The lower and upper
+ bounds specify a half-open range: the range includes the lower bound but
+ does not include the upper bound.
+
+ The body region can contain any number of blocks. The region is terminated
+ by "omp.yield" instruction without operands.
+ ```
+ omp.simdloop (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10)
+ step (%c1, %c1) {
+ // block operations
+ omp.yield
+ }
+ ```
+ }];
+
+ // TODO: Add other clauses
+ let arguments = (ins Variadic<IntLikeType>:$lowerBound,
+ Variadic<IntLikeType>:$upperBound,
+ Variadic<IntLikeType>:$step);
+
+ let regions = (region AnyRegion:$region);
+
+ let extraClassDeclaration = [{
+ /// Returns the number of loops in the simd loop nest.
+ unsigned getNumLoops() { return lowerBound().size(); }
+
+ }];
+
+ let hasCustomAssemblyFormat = 1;
+ let hasVerifier = 1;
+}
+
+
def YieldOp : OpenMP_Op<"yield",
[NoSideEffect, ReturnLike, Terminator,
- ParentOneOf<["WsLoopOp", "ReductionDeclareOp", "AtomicUpdateOp"]>]> {
+ ParentOneOf<["WsLoopOp", "ReductionDeclareOp",
+ "AtomicUpdateOp", "SimdLoopOp"]>]> {
let summary = "loop yield and termination operation";
let description = [{
"omp.yield" yields SSA values from the OpenMP dialect op region and
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 15e9cbcb35b36..774b6b30b9456 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -838,6 +838,80 @@ void WsLoopOp::print(OpAsmPrinter &p) {
p.printRegion(region(), /*printEntryBlockArgs=*/false);
}
+//===----------------------------------------------------------------------===//
+// SimdLoopOp
+//===----------------------------------------------------------------------===//
+/// Parses an OpenMP Simd construct [2.9.3.1]
+///
+/// simdloop ::= `omp.simdloop` loop-control clause-list
+/// loop-control ::= `(` ssa-id-list `)` `:` type `=` loop-bounds
+/// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` steps
+/// steps := `step` `(`ssa-id-list`)`
+/// clause-list ::= clause clause-list | empty
+/// clause ::= TODO
+ParseResult SimdLoopOp::parse(OpAsmParser &parser, OperationState &result) {
+ // Parse an opening `(` followed by induction variables followed by `)`
+ SmallVector<OpAsmParser::OperandType> ivs;
+ if (parser.parseRegionArgumentList(ivs, /*requiredOperandCount=*/-1,
+ OpAsmParser::Delimiter::Paren))
+ return failure();
+ int numIVs = static_cast<int>(ivs.size());
+ Type loopVarType;
+ if (parser.parseColonType(loopVarType))
+ return failure();
+ // Parse loop bounds.
+ SmallVector<OpAsmParser::OperandType> lower;
+ if (parser.parseEqual() ||
+ parser.parseOperandList(lower, numIVs, OpAsmParser::Delimiter::Paren) ||
+ parser.resolveOperands(lower, loopVarType, result.operands))
+ return failure();
+ SmallVector<OpAsmParser::OperandType> upper;
+ if (parser.parseKeyword("to") ||
+ parser.parseOperandList(upper, numIVs, OpAsmParser::Delimiter::Paren) ||
+ parser.resolveOperands(upper, loopVarType, result.operands))
+ return failure();
+
+ // Parse step values.
+ SmallVector<OpAsmParser::OperandType> steps;
+ if (parser.parseKeyword("step") ||
+ parser.parseOperandList(steps, numIVs, OpAsmParser::Delimiter::Paren) ||
+ parser.resolveOperands(steps, loopVarType, result.operands))
+ return failure();
+
+ SmallVector<int> segments{numIVs, numIVs, numIVs};
+ // TODO: Add parseClauses() when we support clauses
+ result.addAttribute("operand_segment_sizes",
+ parser.getBuilder().getI32VectorAttr(segments));
+
+ // Now parse the body.
+ Region *body = result.addRegion();
+ SmallVector<Type> ivTypes(numIVs, loopVarType);
+ SmallVector<OpAsmParser::OperandType> blockArgs(ivs);
+ if (parser.parseRegion(*body, blockArgs, ivTypes))
+ return failure();
+ return success();
+}
+
+void SimdLoopOp::print(OpAsmPrinter &p) {
+ auto args = getRegion().front().getArguments();
+ p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound()
+ << ") to (" << upperBound() << ") ";
+ p << "step (" << step() << ") ";
+
+ p.printRegion(region(), /*printEntryBlockArgs=*/false);
+}
+
+//===----------------------------------------------------------------------===//
+// Verifier for Simd construct [2.9.3.1]
+//===----------------------------------------------------------------------===//
+
+LogicalResult SimdLoopOp::verify() {
+ if (this->lowerBound().empty()) {
+ return emitOpError() << "empty lowerbound for simd loop operation";
+ }
+ return success();
+}
+
//===----------------------------------------------------------------------===//
// ReductionOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 21f2d01769574..e1652af2c8f67 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -880,6 +880,82 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}
+/// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
+static LogicalResult
+convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ auto loop = cast<omp::SimdLoopOp>(opInst);
+
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+
+ // Generator of the canonical loop body.
+ // TODO: support error propagation in OpenMPIRBuilder and use it instead of
+ // relying on captured variables.
+ SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
+ SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
+ LogicalResult bodyGenStatus = success();
+ auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
+ // Make sure further conversions know about the induction variable.
+ moduleTranslation.mapValue(
+ loop.getRegion().front().getArgument(loopInfos.size()), iv);
+
+ // Capture the body insertion point for use in nested loops. BodyIP of the
+ // CanonicalLoopInfo always points to the beginning of the entry block of
+ // the body.
+ bodyInsertPoints.push_back(ip);
+
+ if (loopInfos.size() != loop.getNumLoops() - 1)
+ return;
+
+ // Convert the body of the loop.
+ llvm::BasicBlock *entryBlock = ip.getBlock();
+ llvm::BasicBlock *exitBlock =
+ entryBlock->splitBasicBlock(ip.getPoint(), "omp.simdloop.exit");
+ convertOmpOpRegions(loop.region(), "omp.simdloop.region", *entryBlock,
+ *exitBlock, builder, moduleTranslation, bodyGenStatus);
+ };
+
+ // Delegate actual loop construction to the OpenMP IRBuilder.
+ // TODO: this currently assumes SimdLoop is semantically similar to SCF loop,
+ // i.e. it has a positive step, uses signed integer semantics. Reconsider
+ // this code when SimdLoop clearly supports more cases.
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
+ llvm::Value *lowerBound =
+ moduleTranslation.lookupValue(loop.lowerBound()[i]);
+ llvm::Value *upperBound =
+ moduleTranslation.lookupValue(loop.upperBound()[i]);
+ llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
+
+ // Make sure loop trip count are emitted in the preheader of the outermost
+ // loop at the latest so that they are all available for the new collapsed
+ // loop will be created below.
+ llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
+ llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
+ if (i != 0) {
+ loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
+ ompLoc.DL);
+ computeIP = loopInfos.front()->getPreheaderIP();
+ }
+ loopInfos.push_back(ompBuilder->createCanonicalLoop(
+ loc, bodyGen, lowerBound, upperBound, step,
+ /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
+
+ if (failed(bodyGenStatus))
+ return failure();
+ }
+
+ // Collapse loops.
+ llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
+ llvm::CanonicalLoopInfo *loopInfo =
+ ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
+
+ ompBuilder->applySimd(ompLoc.DL, loopInfo);
+
+ builder.restoreIP(afterIP);
+ return success();
+}
+
/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
llvm::AtomicOrdering
convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) {
@@ -1160,6 +1236,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::WsLoopOp) {
return convertOmpWsLoop(*op, builder, moduleTranslation);
})
+ .Case([&](omp::SimdLoopOp) {
+ return convertOmpSimdLoop(*op, builder, moduleTranslation);
+ })
.Case([&](omp::AtomicReadOp) {
return convertOmpAtomicRead(*op, builder, moduleTranslation);
})
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index bbfc1e9525f14..d871b431e6bb2 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -182,6 +182,19 @@ llvm.func @test_omp_wsloop_dynamic_wrong_modifier3(%lb : i64, %ub : i64, %step :
// -----
+func @omp_simdloop(%lb : index, %ub : index, %step : i32) -> () {
+ // expected-error @below {{op failed to verify that all of {lowerBound, upperBound, step} have same type}}
+ "omp.simdloop" (%lb, %ub, %step) ({
+ ^bb0(%iv: index):
+ omp.yield
+ }) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} :
+ (index, index, i32) -> ()
+
+ return
+}
+
+// -----
+
// expected-error @below {{op expects initializer region with one argument of the reduction type}}
omp.reduction.declare @add_f32 : f64
init {
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 62776e2db9d93..9e66295be3175 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -299,6 +299,37 @@ func @omp_wsloop_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i3
return
}
+// CHECK-LABEL: omp_simdloop
+func @omp_simdloop(%lb : index, %ub : index, %step : index) -> () {
+ // CHECK: omp.simdloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+ "omp.simdloop" (%lb, %ub, %step) ({
+ ^bb0(%iv: index):
+ omp.yield
+ }) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} :
+ (index, index, index) -> ()
+
+ return
+}
+
+
+// CHECK-LABEL: omp_simdloop_pretty
+func @omp_simdloop_pretty(%lb : index, %ub : index, %step : index) -> () {
+ // CHECK: omp.simdloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+ omp.simdloop (%iv) : index = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ return
+}
+
+// CHECK-LABEL: omp_simdloop_pretty_multiple
+func @omp_simdloop_pretty_multiple(%lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> () {
+ // CHECK: omp.simdloop (%{{.*}}, %{{.*}}) : index = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
+ omp.simdloop (%iv1, %iv2) : index = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+ omp.yield
+ }
+ return
+}
+
// CHECK-LABEL: omp_target
func @omp_target(%if_cond : i1, %device : si32, %num_threads : si32) -> () {
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 3b707b157ef9a..b29ba0d361877 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -663,6 +663,48 @@ llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> ()
// -----
+// CHECK-LABEL: @simdloop_simple
+llvm.func @simdloop_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr<f32>) {
+ "omp.simdloop" (%lb, %ub, %step) ({
+ ^bb0(%iv: i64):
+ %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
+ // The form of the emitted IR is controlled by OpenMPIRBuilder and
+ // tested there. Just check that the right metadata is added.
+ // CHECK: llvm.access.group
+ %4 = llvm.getelementptr %arg0[%iv] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+ llvm.store %3, %4 : !llvm.ptr<f32>
+ omp.yield
+ }) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} :
+ (i64, i64, i64) -> ()
+
+ llvm.return
+}
+// CHECK: llvm.loop.parallel_accesses
+// CHECK-NEXT: llvm.loop.vectorize.enable
+
+// -----
+
+// CHECK-LABEL: @simdloop_simple_multiple
+llvm.func @simdloop_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr<f32>, %arg1: !llvm.ptr<f32>) {
+ omp.simdloop (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+ %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
+ // The form of the emitted IR is controlled by OpenMPIRBuilder and
+ // tested there. Just check that the right metadata is added.
+ // CHECK: llvm.access.group
+ // CHECK-NEXT: llvm.access.group
+ %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+ %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+ llvm.store %3, %4 : !llvm.ptr<f32>
+ llvm.store %3, %5 : !llvm.ptr<f32>
+ omp.yield
+ }
+ llvm.return
+}
+// CHECK: llvm.loop.parallel_accesses
+// CHECK-NEXT: llvm.loop.vectorize.enable
+
+// -----
+
omp.critical.declare @mutex hint(contended)
// CHECK-LABEL: @omp_critical
More information about the Mlir-commits
mailing list