[Mlir-commits] [mlir] 5da2423 - [mlir][gpu] Only transform mapped parallel loops to GPU.
Stephan Herhut
llvmlistbot at llvm.org
Fri Nov 13 00:22:34 PST 2020
Author: Stephan Herhut
Date: 2020-11-13T09:15:17+01:00
New Revision: 5da2423bc02f83598405fdfc532de0faa3502ec7
URL: https://github.com/llvm/llvm-project/commit/5da2423bc02f83598405fdfc532de0faa3502ec7
DIFF: https://github.com/llvm/llvm-project/commit/5da2423bc02f83598405fdfc532de0faa3502ec7.diff
LOG: [mlir][gpu] Only transform mapped parallel loops to GPU.
This exposes a hook to configure legality of operations such that only
`scf.parallel` operations that have mapping attributes are marked as
illegal. Consequently, the transformation can now also be applied to
mixed forms.
Differential Revision: https://reviews.llvm.org/D91340
Added:
Modified:
mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h
mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp
mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h b/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h
index 900bc6e3fe84..d6316f663aa8 100644
--- a/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h
+++ b/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h
@@ -12,9 +12,10 @@
namespace mlir {
class AffineForOp;
+class ConversionTarget;
+struct LogicalResult;
class MLIRContext;
class OwningRewritePatternList;
-struct LogicalResult;
class Value;
namespace scf {
@@ -44,6 +45,10 @@ LogicalResult convertAffineLoopNestToGPULaunch(AffineForOp forOp,
void populateParallelLoopToGPUPatterns(OwningRewritePatternList &patterns,
MLIRContext *ctx);
+/// Configures the rewrite target such that only `scf.parallel` operations that
+/// are not rewritten by the provided patterns are legal.
+void configureParallelLoopToGPULegality(ConversionTarget &target);
+
} // namespace mlir
#endif // MLIR_CONVERSION_SCFTOGPU_SCFTOGPU_H_
diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
index d494d12d0e4f..b7b4e7aab859 100644
--- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
+++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
@@ -458,9 +458,10 @@ static LogicalResult processParallelLoop(
if (!boundIsPrecise) {
upperBound = deriveStaticUpperBound(upperBound, rewriter);
if (!upperBound) {
- return parallelOp.emitOpError()
- << "cannot derive loop-invariant upper bound for number "
- "of iterations";
+ return rewriter.notifyMatchFailure(
+ parallelOp,
+ "cannot derive loop-invariant upper bound for number of"
+ "iterations");
}
}
// Compute the number of iterations needed. We compute this as an
@@ -481,9 +482,9 @@ static LogicalResult processParallelLoop(
// todo(herhut,ravishankarm): Update the behavior of setMappingAttr
// when this condition is relaxed.
if (bounds.find(processor) != bounds.end()) {
- return parallelOp.emitOpError()
- << "cannot redefine the bound for processor "
- << static_cast<int64_t>(processor);
+ return rewriter.notifyMatchFailure(
+ parallelOp, "cannot redefine the bound for processor " +
+ Twine(static_cast<int64_t>(processor)));
}
bounds[processor] = launchBound;
}
@@ -565,6 +566,10 @@ static LogicalResult processParallelLoop(
LogicalResult
ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp,
PatternRewriter &rewriter) const {
+ // We can only transform starting at the outer-most loop. Launches inside of
+ // parallel loops are not supported.
+ if (auto parentLoop = parallelOp.getParentOfType<ParallelOp>())
+ return failure();
// Create a launch operation. We start with bound one for all grid/block
// sizes. Those will be refined later as we discover them from mappings.
Location loc = parallelOp.getLoc();
@@ -640,3 +645,9 @@ void mlir::populateParallelLoopToGPUPatterns(OwningRewritePatternList &patterns,
MLIRContext *ctx) {
patterns.insert<ParallelToGpuLaunchLowering>(ctx);
}
+
+void mlir::configureParallelLoopToGPULegality(ConversionTarget &target) {
+ target.addDynamicallyLegalOp<scf::ParallelOp>([](scf::ParallelOp parallelOp) {
+ return !parallelOp.getAttr(gpu::getMappingAttrName());
+ });
+}
diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp
index d04a773939b7..2941b400babe 100644
--- a/mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp
+++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp
@@ -53,7 +53,7 @@ struct ParallelLoopToGpuPass
target.addLegalDialect<AffineDialect>();
target.addLegalDialect<gpu::GPUDialect>();
target.addLegalDialect<scf::SCFDialect>();
- target.addIllegalOp<scf::ParallelOp>();
+ configureParallelLoopToGPULegality(target);
if (failed(applyPartialConversion(getOperation(), target,
std::move(patterns))))
signalPassFailure();
diff --git a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
index 3af50da6165f..2454ced8ac35 100644
--- a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
+++ b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
@@ -317,15 +317,13 @@ func @parallel_loop_optional_attr() {
// -----
-// Mapping to the same processor twice.
+// Mapping to the same processor twice. Cannot be mapped.
func @parallel_double_map(%arg0 : index, %arg1 : index, %arg2 : index,
%arg3 : index,
%buf : memref<?x?xf32>,
%res : memref<?x?xf32>) {
%four = constant 4 : index
- // expected-error at +2 {{cannot redefine the bound for processor 1}}
- // expected-error at +1 {{failed to legalize operation 'scf.parallel'}}
scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
step (%four, %four) {
} { mapping = [
@@ -335,9 +333,12 @@ func @parallel_double_map(%arg0 : index, %arg1 : index, %arg2 : index,
return
}
+// CHECK-LABEL: @parallel_double_map
+// CHECK: scf.parallel
+
// -----
-// Loop with loop-variant upper bound.
+// Loop with loop-variant upper bound. Cannot be mapped.
func @parallel_loop_loop_variant_bound(%arg0 : index, %arg1 : index, %arg2 : index,
%arg3 : index,
@@ -346,10 +347,8 @@ func @parallel_loop_loop_variant_bound(%arg0 : index, %arg1 : index, %arg2 : ind
%zero = constant 0 : index
%one = constant 1 : index
%four = constant 4 : index
- // expected-error at +1 {{failed to legalize operation 'scf.parallel'}}
scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
step (%four, %four) {
- // expected-error at +1 {{cannot derive loop-invariant upper bound}}
scf.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1)
step (%one, %one) {
%idx0 = addi %i0, %si0 : index
@@ -366,3 +365,25 @@ func @parallel_loop_loop_variant_bound(%arg0 : index, %arg1 : index, %arg2 : ind
] }
return
}
+
+// CHECK-LABEL: @parallel_loop_loop_variant_bound
+// CHECK: scf.parallel
+// CHECK: scf.parallel
+
+// -----
+
+// Loop without annotations. Cannot be mapped.
+
+func @parallel_no_annotations(%arg0 : index, %arg1 : index, %arg2 : index,
+ %arg3 : index,
+ %buf : memref<?x?xf32>,
+ %res : memref<?x?xf32>) {
+ %four = constant 4 : index
+ scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+ step (%four, %four) {
+ }
+ return
+}
+
+// CHECK-LABEL: @parallel_no_annotations
+// CHECK: scf.parallel
More information about the Mlir-commits
mailing list