[Mlir-commits] [mlir] [mlir][Linalg]: Optimize linalg generic in transform::PromoteOp to avoid unnecessary copies (PR #68555)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sun Oct 8 23:02:53 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
<details>
<summary>Changes</summary>
If the operands are not used in the payload of generic linalg operations, there is no need to copy them before the operation.
---
Full diff: https://github.com/llvm/llvm-project/pull/68555.diff
3 Files Affected:
- (modified) mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp (+10)
- (modified) mlir/test/Dialect/GPU/promotion.mlir (+1)
- (modified) mlir/test/Dialect/Linalg/promote.mlir (-1)
``````````diff
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
index ad399f57f72cb1b..a131f3097666197 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
@@ -28,6 +28,7 @@
#include "mlir/Transforms/FoldUtils.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -142,6 +143,8 @@ struct LinalgOpInstancePromotionOptions {
const LinalgPromotionOptions &options);
/// SubViews to promote.
MapVector<int64_t, Value> subViews;
+ /// Subviews operand numbers to copy in using copyInFn.
+ llvm::SmallSet<int64_t, 4> operandsNumbersToCopyIn;
/// True if the full view should be used for the promoted buffer.
DenseMap<Value, bool> useFullTileBuffers;
@@ -174,6 +177,11 @@ LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions(
Operation *op = opOperand.get().getDefiningOp();
if (auto sv = dyn_cast_or_null<memref::SubViewOp>(op)) {
subViews[operandNumber] = sv;
+ // In case of linalg generic, copy in only if subview is used in linalg
+ // payload.
+ if (!isa<linalg::GenericOp>(linalgOp) ||
+ linalgOp.payloadUsesValueFromOperand(&opOperand))
+ operandsNumbersToCopyIn.insert(operandNumber);
useFullTileBuffers[sv] = vUseFullTileBuffers[operandNumber];
}
}
@@ -324,6 +332,8 @@ promoteSubViews(ImplicitLocOpBuilder &b,
auto info = promotionInfoMap.find(v.first);
if (info == promotionInfoMap.end())
continue;
+ if (options.operandsNumbersToCopyIn.count(v.first) == 0)
+ continue;
if (failed(options.copyInFn(
b, cast<memref::SubViewOp>(v.second.getDefiningOp()),
info->second.partialLocalView)))
diff --git a/mlir/test/Dialect/GPU/promotion.mlir b/mlir/test/Dialect/GPU/promotion.mlir
index b4668b56788943d..2da1be597753bfd 100644
--- a/mlir/test/Dialect/GPU/promotion.mlir
+++ b/mlir/test/Dialect/GPU/promotion.mlir
@@ -1,3 +1,4 @@
+
// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='builtin.module(gpu.module(gpu.func(test-gpu-memory-promotion)))' -split-input-file %s | FileCheck %s
gpu.module @foo {
diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir
index 5cd56db7fd2d88c..31b29c0e105d99d 100644
--- a/mlir/test/Dialect/Linalg/promote.mlir
+++ b/mlir/test/Dialect/Linalg/promote.mlir
@@ -353,7 +353,6 @@ func.func @linalg_generic_update_all_function_inputs_outputs(%arg0: memref<3x4xf
// CHECK: %[[VAL_62:.*]] = memref.subview %[[VAL_61]][0, 0] {{\[}}%[[VAL_52]], %[[VAL_55]]] [1, 1] : memref<?x?xf32, #gpu.address_space<workgroup>> to memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>
// CHECK: memref.copy %[[VAL_3]], %[[VAL_24]] : memref<4x3xf32, strided<[4, 1]>, 1> to memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>
// CHECK: memref.copy %[[VAL_4]], %[[VAL_43]] : memref<4x3xf32, strided<[4, 1]>, 1> to memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>
- // CHECK: memref.copy %[[VAL_5]], %[[VAL_62]] : memref<4x3xf32, strided<[4, 1]>, 1> to memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>
// CHECK: linalg.generic {doc = "", indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"], library_call = ""} ins(%[[VAL_24]], %[[VAL_43]] : memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>, memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>) outs(%[[VAL_62]] : memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>) {
// CHECK: ^bb0(%[[VAL_63:.*]]: f32, %[[VAL_64:.*]]: f32, %[[VAL_65:.*]]: f32):
// CHECK: %[[VAL_66:.*]] = arith.addf %[[VAL_63]], %[[VAL_64]] : f32
``````````
</details>
https://github.com/llvm/llvm-project/pull/68555
More information about the Mlir-commits
mailing list