[Mlir-commits] [mlir] 92f088d - [mlir][gpu][transform] Provide better error messages and avoid crashing in MapForallToBlocks.
Nicolas Vasilache
llvmlistbot at llvm.org
Mon Sep 4 07:11:47 PDT 2023
Author: Nicolas Vasilache
Date: 2023-09-04T14:11:38Z
New Revision: 92f088d335e4a55f10e74c541ac52ec2a4d4ceeb
URL: https://github.com/llvm/llvm-project/commit/92f088d335e4a55f10e74c541ac52ec2a4d4ceeb
DIFF: https://github.com/llvm/llvm-project/commit/92f088d335e4a55f10e74c541ac52ec2a4d4ceeb.diff
LOG: [mlir][gpu][transform] Provide better error messages and avoid crashing in MapForallToBlocks.
This revision addresses issues surfaced in https://reviews.llvm.org/D159093
Added:
Modified:
mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
mlir/test/Dialect/GPU/transform-gpu-failing.mlir
mlir/test/python/dialects/transform_gpu_ext.py
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
index 3a58c7ccd0997d..616a82b08a6148 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
@@ -298,6 +298,8 @@ def MapForallToBlocks :
attr-dict
`:` functional-type($target, $result)
}];
+ let hasVerifier = 1;
+
let extraClassDeclaration = [{
::mlir::DiagnosedSilenceableFailure applyToOne(
::mlir::transform::TransformRewriter &rewriter,
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 71db26999a4944..6434f99838f005 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -149,10 +149,10 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
DefaultValuedAttr<StrAttr, "\"memref.alloc\"">:
$alloc_op,
UnitAttr:$bufferize_destination_only);
- let hasVerifier = 1;
let results = (outs Transform_AnyValue:$allocated_buffer,
Transform_AnyOpType:$new_ops);
let assemblyFormat = "$target attr-dict `:` type($target)";
+ let hasVerifier = 1;
let builders = [
OpBuilder<(ins "Value":$target, "Attribute":$memorySpace)>,
@@ -1325,8 +1325,8 @@ def SplitOp : Op<Transform_Dialect, "structured.split",
I64Attr:$static_split_point);
let results = (outs TransformHandleTypeInterface:$first,
TransformHandleTypeInterface:$second);
- let hasVerifier = 1;
let hasCustomAssemblyFormat = 1;
+ let hasVerifier = 1;
}
//===----------------------------------------------------------------------===//
@@ -2131,7 +2131,6 @@ def MaskedVectorizeOp : Op<Transform_Dialect, "structured.masked_vectorize",
attr-dict
`:` type($target)
}];
-
let hasVerifier = 1;
let extraClassDeclaration = [{
diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
index aaf337f36a6081..835cd978cd3a4e 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
@@ -40,6 +40,7 @@
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include <type_traits>
using namespace mlir;
using namespace mlir::gpu;
@@ -835,6 +836,13 @@ void EliminateBarriersOp::populatePatterns(RewritePatternSet &patterns) {
// Block and thread mapping utilities.
//===----------------------------------------------------------------------===//
+namespace {
+/// Local types used for mapping verification.
+struct MappingKind {};
+struct BlockMappingKind : MappingKind {};
+struct ThreadMappingKind : MappingKind {};
+} // namespace
+
static DiagnosedSilenceableFailure
definiteFailureHelper(std::optional<TransformOpInterface> transformOp,
Operation *target, const Twine &message) {
@@ -844,12 +852,14 @@ definiteFailureHelper(std::optional<TransformOpInterface> transformOp,
}
/// Check if given mapping attributes are one of the desired attributes
+template <typename MappingKindType>
static DiagnosedSilenceableFailure
checkMappingAttributeTypes(std::optional<TransformOpInterface> transformOp,
scf::ForallOp forallOp) {
- if (!forallOp.getMapping().has_value())
+ if (!forallOp.getMapping().has_value()) {
return definiteFailureHelper(transformOp, forallOp,
- "mapping must be present");
+ "scf.forall op requires a mapping attribute");
+ }
bool hasBlockMapping =
llvm::any_of(forallOp.getMapping().value(), [](Attribute attr) {
@@ -877,6 +887,18 @@ checkMappingAttributeTypes(std::optional<TransformOpInterface> transformOp,
transformOp, forallOp,
"cannot mix
diff erent mapping types, use nesting");
}
+ if (std::is_same<MappingKindType, BlockMappingKind>::value &&
+ !hasBlockMapping) {
+ return definiteFailureHelper(
+ transformOp, forallOp,
+ "scf.forall op requires a mapping attribute of kind 'block'");
+ }
+ if (std::is_same<MappingKindType, ThreadMappingKind>::value &&
+ !hasThreadMapping && !hasWarpMapping && !hasWarpgroupMapping) {
+ return definiteFailureHelper(transformOp, forallOp,
+ "scf.forall op requires a mapping attribute "
+ "of kind 'thread' or 'warp'");
+ }
DenseSet<Attribute> seen;
for (Attribute map : forallOp.getMapping()->getValue()) {
@@ -902,12 +924,13 @@ checkMappingAttributeTypes(std::optional<TransformOpInterface> transformOp,
return DiagnosedSilenceableFailure::success();
}
+template <typename MappingKindType>
static DiagnosedSilenceableFailure
verifyGpuMapping(std::optional<TransformOpInterface> transformOp,
scf::ForallOp forallOp) {
// Check the types of the mapping attributes match.
DiagnosedSilenceableFailure typeRes =
- checkMappingAttributeTypes(transformOp, forallOp);
+ checkMappingAttributeTypes<MappingKindType>(transformOp, forallOp);
if (!typeRes.succeeded())
return typeRes;
@@ -967,13 +990,6 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl(
ForallRewriteResult &result, const GpuIdBuilder &gpuIdBuilder) {
LDBG("--start rewriteOneForallCommonImpl");
- // Step 0. GPU-specific verifications. There is no better place to anchor
- // those right now: the ForallOp is target-independent and the transform
- // op does not apply to individual ForallOp.
- DiagnosedSilenceableFailure diag = verifyGpuMapping(transformOp, forallOp);
- if (!diag.succeeded())
- return diag;
-
// Step 1. Complete the mapping to a full mapping (with 1s) if necessary.
auto numParallelIterations =
getConstantIntValues(forallOp.getMixedUpperBound());
@@ -1143,6 +1159,16 @@ DiagnosedSilenceableFailure mlir::transform::gpu::mapForallToBlocksImpl(
const GpuIdBuilder &gpuIdBuilder) {
LDBG("Start mapForallToBlocksImpl");
+ {
+ // GPU-specific verifications. There is no better place to anchor
+ // those right now: the ForallOp is target-independent and the transform
+ // op does not apply to individual ForallOp.
+ DiagnosedSilenceableFailure diag =
+ verifyGpuMapping<BlockMappingKind>(transformOp, forallOp);
+ if (!diag.succeeded())
+ return diag;
+ }
+
Location loc = forallOp.getLoc();
Block *parentBlock = forallOp->getBlock();
Value zero;
@@ -1194,7 +1220,7 @@ mlir::transform::gpu::findTopLevelForallOp(Operation *target,
return WalkResult::advance();
});
- if (walkResult.wasInterrupted())
+ if (walkResult.wasInterrupted() || !topLevelForallOp)
return transformOp.emitSilenceableError()
<< "could not find a unique topLevel scf.forall";
return DiagnosedSilenceableFailure::success();
@@ -1222,6 +1248,7 @@ DiagnosedSilenceableFailure transform::MapForallToBlocks::applyToOne(
diag.attachNote(target->getLoc()) << "when applied to this payload op";
return diag;
}
+ assert(topLevelForallOp && "expect an scf.forall");
SmallVector<int64_t> gridDims{getGridDims()};
if (!getGenerateGpuLaunch() && gridDims.size() != 3)
@@ -1244,9 +1271,12 @@ DiagnosedSilenceableFailure transform::MapForallToBlocks::applyToOne(
}
// The BlockIdBuilder adapts to whatever is thrown at it.
- auto mappingAttr = cast<DeviceMappingAttrInterface>(
- topLevelForallOp.getMapping()->getValue().front());
- bool useLinearMapping = mappingAttr.isLinearMapping();
+ bool useLinearMapping = false;
+ if (topLevelForallOp.getMapping()) {
+ auto mappingAttr = cast<DeviceMappingAttrInterface>(
+ topLevelForallOp.getMapping()->getValue().front());
+ useLinearMapping = mappingAttr.isLinearMapping();
+ }
GpuBlockIdBuilder gpuBlockIdBuilder(getContext(), useLinearMapping);
diag = mlir::transform::gpu::mapForallToBlocksImpl(
@@ -1264,6 +1294,13 @@ DiagnosedSilenceableFailure transform::MapForallToBlocks::applyToOne(
return diag;
}
+LogicalResult transform::MapForallToBlocks::verify() {
+ if (!getGridDims().empty() && getGridDims().size() != 3) {
+ return emitOpError() << "transform requires empty or size-3 grid_dims";
+ }
+ return success();
+}
+
//===----------------------------------------------------------------------===//
// MapNestedForallToThreads
//===----------------------------------------------------------------------===//
@@ -1283,8 +1320,8 @@ static DiagnosedSilenceableFailure checkMappingSpec(
computeProduct(blockOrGridSizes)) {
auto diag = definiteFailureHelper(
transformOp, forallOp,
- Twine(
- "the number of required parallel resources (blocks or threads) ") +
+ Twine("the number of required parallel resources (blocks or "
+ "threads) ") +
std::to_string(computeProduct(numParallelIterations) * factor) +
std::string(" overflows the number of available resources ") +
std::to_string(computeProduct(blockOrGridSizes)));
@@ -1345,6 +1382,16 @@ DiagnosedSilenceableFailure mlir::transform::gpu::mapOneForallToThreadsImpl(
scf::ForallOp forallOp, ArrayRef<int64_t> blockSizes, int64_t warpSize,
bool syncAfterDistribute) {
+ {
+ // GPU-specific verifications. There is no better place to anchor
+ // those right now: the ForallOp is target-independent and the transform
+ // op does not apply to individual ForallOp.
+ DiagnosedSilenceableFailure diag =
+ verifyGpuMapping<ThreadMappingKind>(transformOp, forallOp);
+ if (!diag.succeeded())
+ return diag;
+ }
+
GpuIdBuilder gpuIdBuilder;
{
// Try to construct the id builder, if it fails, return.
diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
index 564fdd46e91b17..74b34a88fe73c3 100644
--- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
+++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
@@ -328,3 +328,92 @@ transform.sequence failures(propagate) {
// expected-error @below {{cannot mix linear and non-linear mapping modes}}
transform.gpu.map_nested_forall_to_threads %funcop block_dims = [32, 32, 1] : (!transform.any_op) -> !transform.any_op
}
+
+// -----
+
+// expected-note @below {{when applied to this payload op}}
+module {
+transform.sequence failures(propagate) {
+^bb1(%op: !transform.any_op):
+ // expected-error @below {{could not find a unique topLevel scf.forall}}
+ %gpu_launch = transform.gpu.map_forall_to_blocks %op generate_gpu_launch grid_dims = [1, 1, 1]
+ : (!transform.any_op) -> !transform.any_op
+}
+}
+
+// -----
+
+func.func public @improperly_sized_grid_dims(%arg0: memref<32x32xf32>, %arg1: memref<32x32xf32>, %arg2: memref<32x32xf32>) {
+ scf.forall (%arg3, %arg4) in (1, 1) {
+ linalg.matmul ins(%arg0, %arg1 : memref<32x32xf32>, memref<32x32xf32>) outs(%arg2 : memref<32x32xf32>)
+ } {mapping = [#gpu.block<x>, #gpu.block<y>]}
+ return
+}
+
+transform.sequence failures(propagate) {
+^bb0(%arg1: !transform.any_op):
+ %arg0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // expected-error @below {{transform requires empty or size-3 grid_dims}}
+ %5 = transform.gpu.map_forall_to_blocks %arg1 generate_gpu_launch grid_dims = [50, 16] : (!transform.any_op) -> !transform.any_op
+}
+
+// -----
+
+func.func public @missing_mapping_attribute(%arg0: memref<32x32xf32>, %arg1: memref<32x32xf32>, %arg2: memref<32x32xf32>) {
+ scf.forall (%arg3, %arg4) in (1, 1) {
+ linalg.matmul ins(%arg0, %arg1 : memref<32x32xf32>, memref<32x32xf32>) outs(%arg2 : memref<32x32xf32>)
+ }
+ return
+}
+
+transform.sequence failures(propagate) {
+^bb0(%arg1: !transform.any_op):
+ %arg0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // expected-error @below {{scf.forall op requires a mapping attribute}}
+ %5 = transform.gpu.map_forall_to_blocks %arg1 generate_gpu_launch grid_dims = [50, 16, 1] : (!transform.any_op) -> !transform.any_op
+}
+
+// -----
+
+func.func public @not_a_block_mapping_attribute(%arg0: memref<32x32xf32>, %arg1: memref<32x32xf32>, %arg2: memref<32x32xf32>) {
+ scf.forall (%arg3, %arg4) in (1, 1) {
+ linalg.matmul ins(%arg0, %arg1 : memref<32x32xf32>, memref<32x32xf32>) outs(%arg2 : memref<32x32xf32>)
+ } {mapping = [#gpu.thread<x>, #gpu.thread<y>]}
+ return
+}
+
+transform.sequence failures(propagate) {
+^bb0(%arg1: !transform.any_op):
+ %arg0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // expected-error @below {{scf.forall op requires a mapping attribute of kind 'block'}}
+ %5 = transform.gpu.map_forall_to_blocks %arg1 generate_gpu_launch grid_dims = [50, 16, 1] : (!transform.any_op) -> !transform.any_op
+}
+
+// -----
+
+func.func @not_a_thread_or_warp_mapping_attribute(%x: memref<2 x 32 x f32>, %y: memref<2 x 32 x f32>, %t: memref<32 x f32>, %alpha : f32, %stream : !gpu.async.token) -> memref<2 x 32 x f32> {
+ %one = arith.constant 1 : index
+ %c900 = arith.constant 900 : index
+ %c9 = arith.constant 9 : index
+ %c7 = arith.constant 7 : index
+ %name = gpu.launch async[%stream] blocks(%arg3, %arg4, %arg5) in (%arg9 = %one, %arg10 = %one, %arg11 = %one)
+ threads(%arg6, %arg7, %arg8) in (%arg12 = %one, %arg13 = %one, %arg14 = %one)
+ {
+ scf.forall (%i, %j) in (%c7, %c900) {
+ %4 = memref.load %x[%i, %j] : memref<2 x 32 x f32>
+ %5 = memref.load %y[%i, %j] : memref<2 x 32 x f32>
+ %6 = math.fma %alpha, %4, %5 : f32
+ memref.store %6, %y[%i, %j] : memref<2 x 32 x f32>
+ } { mapping = [#gpu.block<y>, #gpu.block<x>] }
+ gpu.terminator
+ }
+
+ return %y : memref<2 x 32 x f32>
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg0: !transform.any_op):
+ %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ // expected-error @below {{scf.forall op requires a mapping attribute of kind 'thread' or 'warp'}}
+ transform.gpu.map_nested_forall_to_threads %funcop block_dims = [1, 1, 1] : (!transform.any_op) -> !transform.any_op
+}
diff --git a/mlir/test/python/dialects/transform_gpu_ext.py b/mlir/test/python/dialects/transform_gpu_ext.py
index db2899592609c5..17f5a6d38f8c1a 100644
--- a/mlir/test/python/dialects/transform_gpu_ext.py
+++ b/mlir/test/python/dialects/transform_gpu_ext.py
@@ -42,10 +42,10 @@ def testMapForallToBlocksTyped(target):
@run
def testMapForallToBlocksGridDims(target):
- gpu.MapForallToBlocks(target, grid_dims=[4, 2])
+ gpu.MapForallToBlocks(target, grid_dims=[4, 2, 1])
# CHECK-LABEL: TEST: testMapForallToBlocksGridDims
# CHECK: = transform.gpu.map_forall_to_blocks
- # CHECK-SAME: grid_dims = [4, 2]
+ # CHECK-SAME: grid_dims = [4, 2, 1]
# CHECK-SAME: (!transform.any_op) -> !transform.any_op
More information about the Mlir-commits
mailing list