[Mlir-commits] [mlir] a800ffa - [mlir][gpu] Disjoint patterns for lowering clustered subgroup reduce (#109158)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Sep 18 12:55:57 PDT 2024
Author: Andrea Faulds
Date: 2024-09-18T15:55:53-04:00
New Revision: a800ffac4115259a76d803512eda31e4de787570
URL: https://github.com/llvm/llvm-project/commit/a800ffac4115259a76d803512eda31e4de787570
DIFF: https://github.com/llvm/llvm-project/commit/a800ffac4115259a76d803512eda31e4de787570.diff
LOG: [mlir][gpu] Disjoint patterns for lowering clustered subgroup reduce (#109158)
Making the existing populateGpuLowerSubgroupReduceToShufflePatterns()
function also cover the new "clustered" subgroup reductions is proving
to be inconvenient, because certain backends may have more specific
lowerings that only cover the non-clustered type, and this creates pass
ordering constraints. This commit removes coverage of clustered
reductions from this function in favour of a new separate function,
which makes controlling the lowering much more straightforward.
Added:
Modified:
mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp
mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index 67baa8777a6fcc..8eb711962583da 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -73,10 +73,20 @@ void populateGpuBreakDownSubgroupReducePatterns(
/// Collect a set of patterns to lower `gpu.subgroup_reduce` into `gpu.shuffle`
/// ops over `shuffleBitwidth` scalar types. Assumes that the subgroup has
/// `subgroupSize` lanes. Uses the butterfly shuffle algorithm.
+///
+/// The patterns populated by this function will ignore ops with the
+/// `cluster_size` attribute.
+/// `populateGpuLowerClusteredSubgroupReduceToShufflePatterns` is the opposite.
void populateGpuLowerSubgroupReduceToShufflePatterns(
RewritePatternSet &patterns, unsigned subgroupSize,
unsigned shuffleBitwidth = 32, PatternBenefit benefit = 1);
+/// Disjoint counterpart of `populateGpuLowerSubgroupReduceToShufflePatterns`
+/// that only matches `gpu.subgroup_reduce` ops with a `cluster_size`.
+void populateGpuLowerClusteredSubgroupReduceToShufflePatterns(
+ RewritePatternSet &patterns, unsigned subgroupSize,
+ unsigned shuffleBitwidth = 32, PatternBenefit benefit = 1);
+
/// Collect all patterns to rewrite ops within the GPU dialect.
inline void populateGpuRewritePatterns(RewritePatternSet &patterns) {
populateGpuAllReducePatterns(patterns);
diff --git a/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp b/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp
index b166f1cd469a4d..185f824351a230 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp
@@ -210,13 +210,21 @@ Value createSubgroupShuffleReduction(OpBuilder &builder, Location loc,
struct ScalarSubgroupReduceToShuffles final
: OpRewritePattern<gpu::SubgroupReduceOp> {
ScalarSubgroupReduceToShuffles(MLIRContext *ctx, unsigned subgroupSize,
- unsigned shuffleBitwidth,
+ unsigned shuffleBitwidth, bool matchClustered,
PatternBenefit benefit)
: OpRewritePattern(ctx, benefit), subgroupSize(subgroupSize),
- shuffleBitwidth(shuffleBitwidth) {}
+ shuffleBitwidth(shuffleBitwidth), matchClustered(matchClustered) {}
LogicalResult matchAndRewrite(gpu::SubgroupReduceOp op,
PatternRewriter &rewriter) const override {
+ if (op.getClusterSize().has_value() != matchClustered) {
+ return rewriter.notifyMatchFailure(
+ op, llvm::formatv("op is {0}clustered but pattern is configured to "
+ "only match {1}clustered ops",
+ matchClustered ? "non-" : "",
+ matchClustered ? "" : "non-"));
+ }
+
auto ci = getAndValidateClusterInfo(op, subgroupSize);
if (failed(ci))
return failure();
@@ -262,19 +270,28 @@ struct ScalarSubgroupReduceToShuffles final
private:
unsigned subgroupSize = 0;
unsigned shuffleBitwidth = 0;
+ bool matchClustered = false;
};
/// Lowers vector gpu subgroup reductions to a series of shuffles.
struct VectorSubgroupReduceToShuffles final
: OpRewritePattern<gpu::SubgroupReduceOp> {
VectorSubgroupReduceToShuffles(MLIRContext *ctx, unsigned subgroupSize,
- unsigned shuffleBitwidth,
+ unsigned shuffleBitwidth, bool matchClustered,
PatternBenefit benefit)
: OpRewritePattern(ctx, benefit), subgroupSize(subgroupSize),
- shuffleBitwidth(shuffleBitwidth) {}
+ shuffleBitwidth(shuffleBitwidth), matchClustered(matchClustered) {}
LogicalResult matchAndRewrite(gpu::SubgroupReduceOp op,
PatternRewriter &rewriter) const override {
+ if (op.getClusterSize().has_value() != matchClustered) {
+ return rewriter.notifyMatchFailure(
+ op, llvm::formatv("op is {0}clustered but pattern is configured to "
+ "only match {1}clustered ops",
+ matchClustered ? "non-" : "",
+ matchClustered ? "" : "non-"));
+ }
+
auto ci = getAndValidateClusterInfo(op, subgroupSize);
if (failed(ci))
return failure();
@@ -343,6 +360,7 @@ struct VectorSubgroupReduceToShuffles final
private:
unsigned subgroupSize = 0;
unsigned shuffleBitwidth = 0;
+ bool matchClustered = false;
};
} // namespace
@@ -358,5 +376,14 @@ void mlir::populateGpuLowerSubgroupReduceToShufflePatterns(
RewritePatternSet &patterns, unsigned subgroupSize,
unsigned shuffleBitwidth, PatternBenefit benefit) {
patterns.add<ScalarSubgroupReduceToShuffles, VectorSubgroupReduceToShuffles>(
- patterns.getContext(), subgroupSize, shuffleBitwidth, benefit);
+ patterns.getContext(), subgroupSize, shuffleBitwidth,
+ /*matchClustered=*/false, benefit);
+}
+
+void mlir::populateGpuLowerClusteredSubgroupReduceToShufflePatterns(
+ RewritePatternSet &patterns, unsigned subgroupSize,
+ unsigned shuffleBitwidth, PatternBenefit benefit) {
+ patterns.add<ScalarSubgroupReduceToShuffles, VectorSubgroupReduceToShuffles>(
+ patterns.getContext(), subgroupSize, shuffleBitwidth,
+ /*matchClustered=*/true, benefit);
}
diff --git a/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp b/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp
index 99a914506b011a..74d057c0b7b6cb 100644
--- a/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp
+++ b/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp
@@ -78,9 +78,12 @@ struct TestGpuSubgroupReduceLoweringPass
populateGpuBreakDownSubgroupReducePatterns(patterns,
/*maxShuffleBitwidth=*/32,
PatternBenefit(2));
- if (expandToShuffles)
+ if (expandToShuffles) {
populateGpuLowerSubgroupReduceToShufflePatterns(
patterns, /*subgroupSize=*/32, /*shuffleBitwidth=*/32);
+ populateGpuLowerClusteredSubgroupReduceToShufflePatterns(
+ patterns, /*subgroupSize=*/32, /*shuffleBitwidth=*/32);
+ }
(void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
}
More information about the Mlir-commits
mailing list