[Mlir-commits] [mlir] 57eda9b - [MLIR][GPU] Add constant propagator for gpu.launch op
Uday Bondhugula
llvmlistbot at llvm.org
Sat Sep 18 00:36:59 PDT 2021
Author: Uday Bondhugula
Date: 2021-09-18T12:02:46+05:30
New Revision: 57eda9beccfebddc2a5eccc488de85285d0ed76e
URL: https://github.com/llvm/llvm-project/commit/57eda9beccfebddc2a5eccc488de85285d0ed76e
DIFF: https://github.com/llvm/llvm-project/commit/57eda9beccfebddc2a5eccc488de85285d0ed76e.diff
LOG: [MLIR][GPU] Add constant propagator for gpu.launch op
Add a constant propagator for gpu.launch op in cases where the
grid/thread IDs can be trivially determined to take a single constant
value of zero.
Differential Revision: https://reviews.llvm.org/D109994
Added:
Modified:
mlir/include/mlir/Dialect/GPU/GPUOps.td
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
mlir/test/Dialect/GPU/canonicalize.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 9290e161058b1..f3ce173108590 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -523,6 +523,7 @@ def GPU_LaunchOp : GPU_Op<"launch">,
let parser = [{ return parseLaunchOp(parser, result); }];
let printer = [{ printLaunchOp(p, *this); }];
let verifier = [{ return ::verify(*this); }];
+ let hasCanonicalizer = 1;
}
def GPU_ReturnOp : GPU_Op<"return", [HasParent<"GPUFuncOp">, NoSideEffect,
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index f148d6e4ad9e6..b3fe10d05ce04 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -21,6 +21,7 @@
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/DialectImplementation.h"
#include "mlir/IR/FunctionImplementation.h"
+#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/TypeUtilities.h"
@@ -530,6 +531,49 @@ static ParseResult parseLaunchOp(OpAsmParser &parser, OperationState &result) {
parser.parseOptionalAttrDict(result.attributes));
}
+/// Simplify the gpu.launch when the range of the thread and block IDs is
+/// trivially known to be one.
+struct FoldLaunchArguments : public OpRewritePattern<LaunchOp> {
+ using OpRewritePattern<LaunchOp>::OpRewritePattern;
+ LogicalResult matchAndRewrite(LaunchOp op,
+ PatternRewriter &rewriter) const override {
+ auto isTriviallyOne = [](Value size) {
+ IntegerAttr cst;
+ return matchPattern(size, m_Constant(&cst)) && cst.getInt() == 1;
+ };
+
+ // If the range implies a single value for `id`, replace `id`'s uses by
+ // zero.
+ Value zero;
+ bool simplified = false;
+ auto constPropIdUses = [&](Value id, Value size) {
+ if (!isTriviallyOne(size))
+ return;
+ if (!simplified) {
+ // Create a zero value the first time.
+ OpBuilder::InsertionGuard guard(rewriter);
+ rewriter.setInsertionPointToStart(&op.body().front());
+ zero = rewriter.create<ConstantIndexOp>(op.getLoc(), /*value=*/0);
+ }
+ id.replaceAllUsesWith(zero);
+ simplified = true;
+ };
+ constPropIdUses(op.getBlockIds().x, op.gridSizeX());
+ constPropIdUses(op.getBlockIds().y, op.gridSizeY());
+ constPropIdUses(op.getBlockIds().z, op.gridSizeZ());
+ constPropIdUses(op.getThreadIds().x, op.blockSizeX());
+ constPropIdUses(op.getThreadIds().y, op.blockSizeY());
+ constPropIdUses(op.getThreadIds().z, op.blockSizeZ());
+
+ return success(simplified);
+ }
+};
+
+void LaunchOp::getCanonicalizationPatterns(RewritePatternSet &rewrites,
+ MLIRContext *context) {
+ rewrites.add<FoldLaunchArguments>(context);
+}
+
//===----------------------------------------------------------------------===//
// LaunchFuncOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/GPU/canonicalize.mlir b/mlir/test/Dialect/GPU/canonicalize.mlir
index 38abfd172eead..c89059bb4b297 100644
--- a/mlir/test/Dialect/GPU/canonicalize.mlir
+++ b/mlir/test/Dialect/GPU/canonicalize.mlir
@@ -31,3 +31,59 @@ func @gpu_dim_of_alloc(%size: index) -> index {
%1 = memref.dim %0, %c0 : memref<?xindex>
return %1 : index
}
+
+// -----
+
+// CHECK-LABEL: func @simplify_gpu_launch
+func @simplify_gpu_launch() attributes {llvm.emit_c_interface} {
+ %cst = constant 0.000000e+00 : f32
+ %c1 = constant 1 : index
+ %c32 = constant 32 : index
+ %c16 = constant 16 : index
+ %c2 = constant 2 : index
+ %c0 = constant 0 : index
+ %0 = memref.alloc() : memref<2x16x16xf32>
+ scf.for %arg0 = %c0 to %c2 step %c1 {
+ scf.for %arg1 = %c0 to %c16 step %c1 {
+ scf.for %arg2 = %c0 to %c16 step %c1 {
+ memref.store %cst, %0[%arg0, %arg1, %arg2] : memref<2x16x16xf32>
+ }
+ }
+ }
+ %1 = gpu.wait async
+ %memref, %asyncToken = gpu.alloc async [%1] () : memref<2x16x16xf32>
+ %2 = gpu.memcpy async [%1] %memref, %0 : memref<2x16x16xf32>, memref<2x16x16xf32>
+ gpu.wait [%1]
+ gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1)
+ threads(%arg3, %arg4, %arg5) in (%arg9 = %c32, %arg10 = %c1, %arg11 = %c1) {
+ %3 = muli %arg5, %c32 : index
+ %4 = muli %arg4, %c32 : index
+ %5 = addi %3, %4 : index
+ %6 = addi %5, %arg3 : index
+ %7 = divi_unsigned %6, %c32 : index
+ %8 = muli %arg0, %c16 : index
+ %9 = muli %arg1, %c2 : index
+ %10 = muli %7, %c2 : index
+ %11 = addi %9, %10 : index
+ %12 = memref.load %memref[%11, %c0, %8] : memref<2x16x16xf32>
+ %13 = addi %11, %c1 : index
+ %14 = memref.load %memref[%13, %c0, %8] : memref<2x16x16xf32>
+ memref.store %12, %memref[%11, %c0, %8] : memref<2x16x16xf32>
+ memref.store %14, %memref[%13, %c0, %8] : memref<2x16x16xf32>
+ gpu.terminator
+ }
+ return
+}
+
+// CHECK-DAG: %[[C1:.*]] = constant 1 : index
+// CHECK-DAG: %[[C0:.*]] = constant 0 : index
+// CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %[[C1]], %{{.*}} = %[[C1]], %{{.*}} = %[[C1]]) threads(%[[TIDX:.*]], %{{.*}}, %{{.*}}) in (%{{.*}} = %c32, %{{.*}} = %[[C1]], %{{.*}} = %[[C1]]) {
+// CHECK-NEXT: divi_unsigned %[[TIDX]], %c32 : index
+// CHECK-NEXT: muli %{{.*}}, %c2 : index
+// CHECK-NEXT: memref.load %memref[%{{.*}}, %[[C0]], %[[C0]]] : memref<2x16x16xf32>
+// CHECK-NEXT: addi %{{.*}}, %[[C1]] : index
+// CHECK-NEXT: memref.load %memref[%{{.*}}, %[[C0]], %[[C0]]] : memref<2x16x16xf32>
+// CHECK-NEXT: memref.store %{{.*}}, %memref[%{{.*}}, %[[C0]], %[[C0]]] : memref<2x16x16xf32>
+// CHECK-NEXT: memref.store %{{.*}}, %memref[%{{.*}}, %[[C0]], %[[C0]]] : memref<2x16x16xf32>
+// CHECK-NEXT: gpu.terminator
+// CHECK-NEXT: }
More information about the Mlir-commits
mailing list