[Mlir-commits] [mlir] 10ec186 - [MLIR][GPU] Add error checking to loop.parallel to gpu transform.
Stephan Herhut
llvmlistbot at llvm.org
Tue Mar 3 04:30:59 PST 2020
Author: Stephan Herhut
Date: 2020-03-03T13:29:09+01:00
New Revision: 10ec1860a826a120c522a0a5dadafc844e273eb4
URL: https://github.com/llvm/llvm-project/commit/10ec1860a826a120c522a0a5dadafc844e273eb4
DIFF: https://github.com/llvm/llvm-project/commit/10ec1860a826a120c522a0a5dadafc844e273eb4.diff
LOG: [MLIR][GPU] Add error checking to loop.parallel to gpu transform.
Summary:
Instead of crashing on malformed input, the pass now produces error
messages.
Differential Revision: https://reviews.llvm.org/D75468
Added:
Modified:
mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
index ed9400fc2ad0..293d93512147 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
@@ -572,6 +572,7 @@ static LogicalResult processParallelLoop(ParallelOp parallelOp,
gpu::LaunchOp launchOp,
BlockAndValueMapping &cloningMap,
SmallVectorImpl<Operation *> &worklist,
+ DenseMap<int, Value> &bounds,
PatternRewriter &rewriter) {
// TODO(herhut): Verify that this is a valid GPU mapping.
// processor ids: 0-2 block [x/y/z], 3-5 -> thread [x/y/z], 6-> sequential
@@ -631,22 +632,27 @@ static LogicalResult processParallelLoop(ParallelOp parallelOp,
// conditional. If the lower-bound is constant or defined before the
// launch, we can use it in the launch bounds. Otherwise fail.
if (!launchIndependent(lowerBound) &&
- !isa<ConstantOp>(lowerBound.getDefiningOp()))
+ !isa_and_nonnull<ConstantOp>(lowerBound.getDefiningOp()))
return failure();
// The step must also be constant or defined outside of the loop nest.
- if (!launchIndependent(step) && !isa<ConstantOp>(step.getDefiningOp()))
+ if (!launchIndependent(step) &&
+ !isa_and_nonnull<ConstantOp>(step.getDefiningOp()))
return failure();
// If the upper-bound is constant or defined before the launch, we can
// use it in the launch bounds directly. Otherwise try derive a bound.
- bool boundIsPrecise = launchIndependent(upperBound) ||
- isa<ConstantOp>(upperBound.getDefiningOp());
+ bool boundIsPrecise =
+ launchIndependent(upperBound) ||
+ isa_and_nonnull<ConstantOp>(upperBound.getDefiningOp());
{
PatternRewriter::InsertionGuard guard(rewriter);
rewriter.setInsertionPoint(launchOp);
if (!boundIsPrecise) {
upperBound = deriveStaticUpperBound(upperBound, rewriter);
- if (!upperBound)
- return failure();
+ if (!upperBound) {
+ return parallelOp.emitOpError()
+ << "cannot derive loop-invariant upper bound for number "
+ "of iterations";
+ }
}
// Compute the number of iterations needed. We compute this as an
// affine expression ceilDiv (upperBound - lowerBound) step. We use
@@ -654,8 +660,8 @@ static LogicalResult processParallelLoop(ParallelOp parallelOp,
AffineMap stepMap =
AffineMap::get(0, 3,
((rewriter.getAffineSymbolExpr(0) -
- rewriter.getAffineSymbolExpr(1)).ceilDiv(
- rewriter.getAffineSymbolExpr(2))));
+ rewriter.getAffineSymbolExpr(1))
+ .ceilDiv(rewriter.getAffineSymbolExpr(2))));
Value launchBound = rewriter.create<AffineApplyOp>(
loc, annotation.boundMap.compose(stepMap),
ValueRange{
@@ -664,7 +670,12 @@ static LogicalResult processParallelLoop(ParallelOp parallelOp,
ensureLaunchIndependent(
cloningMap.lookupOrDefault(lowerBound)),
ensureLaunchIndependent(cloningMap.lookupOrDefault(step))});
- launchOp.setOperand(annotation.processor, launchBound);
+ if (bounds.find(annotation.processor) != bounds.end()) {
+ return parallelOp.emitOpError()
+ << "cannot redefine the bound for processor "
+ << annotation.processor;
+ }
+ bounds[annotation.processor] = launchBound;
}
if (!boundIsPrecise) {
// We are using an approximation, create a surrounding conditional.
@@ -746,9 +757,10 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp,
rewriter.setInsertionPointToStart(&launchOp.body().front());
BlockAndValueMapping cloningMap;
+ llvm::DenseMap<int, Value> launchBounds;
SmallVector<Operation *, 16> worklist;
if (failed(processParallelLoop(parallelOp, launchOp, cloningMap, worklist,
- rewriter)))
+ launchBounds, rewriter)))
return matchFailure();
// Whether we have seen any side-effects. Reset when leaving an inner scope.
@@ -770,8 +782,9 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp,
// A nested loop.parallel needs insertion of code to compute indices.
// Insert that now. This will also update the worklist with the loops
// body.
- processParallelLoop(nestedParallel, launchOp, cloningMap, worklist,
- rewriter);
+ if (failed(processParallelLoop(nestedParallel, launchOp, cloningMap,
+ worklist, launchBounds, rewriter)))
+ return matchFailure();
} else if (op == launchOp.getOperation()) {
// Found our sentinel value. We have finished the operations from one
// nesting level, pop one level back up.
@@ -791,6 +804,11 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp,
}
}
+ // Now that we succeeded creating the launch operation, also update the
+ // bounds.
+ for (auto bound : launchBounds)
+ launchOp.setOperand(std::get<0>(bound), std::get<1>(bound));
+
rewriter.eraseOp(parallelOp);
return matchSuccess();
}
diff --git a/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir b/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir
index 2a440a4456ba..24ea0320f0ac 100644
--- a/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file %s | FileCheck %s -dump-input-on-failure
+// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file -verify-diagnostics %s | FileCheck %s -dump-input-on-failure
// 2-d parallel loop mapped to block.y and block.x
@@ -299,3 +299,55 @@ module {
// CHECK: return
// CHECK: }
// CHECK: }
+
+// -----
+
+// Mapping to the same processor twice.
+
+func @parallel_double_map(%arg0 : index, %arg1 : index, %arg2 : index,
+ %arg3 : index,
+ %buf : memref<?x?xf32>,
+ %res : memref<?x?xf32>) {
+ %four = constant 4 : index
+ // expected-error at +2 {{cannot redefine the bound for processor 1}}
+ // expected-error at +1 {{failed to legalize operation 'loop.parallel'}}
+ loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+ step (%four, %four) {
+ } { mapping = [
+ {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>},
+ {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}
+ ] }
+ return
+}
+
+// -----
+
+// Loop with loop-variant upper bound.
+
+func @parallel_loop_loop_variant_bound(%arg0 : index, %arg1 : index, %arg2 : index,
+ %arg3 : index,
+ %buf : memref<?x?xf32>,
+ %res : memref<?x?xf32>) {
+ %zero = constant 0 : index
+ %one = constant 1 : index
+ %four = constant 4 : index
+ // expected-error at +1 {{failed to legalize operation 'loop.parallel'}}
+ loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+ step (%four, %four) {
+ // expected-error at +1 {{cannot derive loop-invariant upper bound}}
+ loop.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1)
+ step (%one, %one) {
+ %idx0 = addi %i0, %si0 : index
+ %idx1 = addi %i1, %si1 : index
+ %val = load %buf[%idx0, %idx1] : memref<?x?xf32>
+ store %val, %res[%idx1, %idx0] : memref<?x?xf32>
+ } { mapping = [
+ {processor = 4, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>},
+ {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}
+ ] }
+ } { mapping = [
+ {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>},
+ {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}
+ ] }
+ return
+}
More information about the Mlir-commits
mailing list