[Mlir-commits] [mlir] 5c1d356 - [mlir][Linalg] Enable vectorization of explicit broadcasts
Diego Caballero
llvmlistbot at llvm.org
Tue Oct 12 14:10:19 PDT 2021
Author: Diego Caballero
Date: 2021-10-12T21:08:22Z
New Revision: 5c1d356c18c3ef9e63cf9607af660fb0b1c8a6ca
URL: https://github.com/llvm/llvm-project/commit/5c1d356c18c3ef9e63cf9607af660fb0b1c8a6ca
DIFF: https://github.com/llvm/llvm-project/commit/5c1d356c18c3ef9e63cf9607af660fb0b1c8a6ca.diff
LOG: [mlir][Linalg] Enable vectorization of explicit broadcasts
This patch teaches `isProjectedPermutation` and `inverseAndBroadcastProjectedPermutation`
utilities to deal with maps representing an explicit broadcast, e.g., (d0, d1) -> (d0, 0).
This extension is needed to enable vectorization of such explicit broadcast in Linalg.
Reviewed By: pifon2a, nicolasvasilache
Differential Revision: https://reviews.llvm.org/D111563
Added:
Modified:
mlir/include/mlir/IR/AffineMap.h
mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
mlir/lib/IR/AffineMap.cpp
mlir/test/Dialect/Linalg/vectorization.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h
index 906c53db4b32d..40052d76d8a37 100644
--- a/mlir/include/mlir/IR/AffineMap.h
+++ b/mlir/include/mlir/IR/AffineMap.h
@@ -273,8 +273,11 @@ class AffineMap {
SmallVector<int64_t, 4> compose(ArrayRef<int64_t> values) const;
/// Returns true if the AffineMap represents a subset (i.e. a projection) of a
- /// symbol-less permutation map.
- bool isProjectedPermutation() const;
+ /// symbol-less permutation map. `allowZeroInResults` allows projected
+ /// permutation maps with constant zero result expressions.
+ /// TODO: Remove `allowZeroInResults` when constant zero result expressions
+ /// are broadly supported.
+ bool isProjectedPermutation(bool allowZeroInResults = false) const;
/// Returns true if the AffineMap represents a symbol-less permutation map.
bool isPermutation() const;
@@ -464,6 +467,17 @@ AffineMap inversePermutation(AffineMap map);
/// ```mlir
/// affine_map<(d0) -> (0, 0, d0, 0)>
/// ```
+/// Example 4:
+///
+/// ```mlir
+/// affine_map<(d0, d1, d2) -> (d0, 0)>
+/// ```
+///
+/// returns:
+///
+/// ```mlir
+/// affine_map<(d0, d1) -> (d0, 0, 0)>
+/// ```
AffineMap inverseAndBroadcastProjectedPermuation(AffineMap map);
/// Concatenates a list of `maps` into a single AffineMap, stepping over
@@ -518,9 +532,16 @@ SmallVector<T> applyPermutationMap(AffineMap map, llvm::ArrayRef<T> source) {
assert(map.getNumInputs() == source.size());
SmallVector<T> result;
result.reserve(map.getNumResults());
- for (unsigned i = 0, e = map.getNumResults(); i < e; ++i) {
- unsigned dim = map.getDimPosition(i);
- result.push_back(source[dim]);
+ for (AffineExpr expr : map.getResults()) {
+ if (auto dimExpr = expr.dyn_cast<AffineDimExpr>()) {
+ result.push_back(source[dimExpr.getPosition()]);
+ } else if (auto constExpr = expr.dyn_cast<AffineConstantExpr>()) {
+ assert(constExpr.getValue() == 0 &&
+ "Unexpected constant in projected permutation map");
+ result.push_back(0);
+ } else {
+ llvm_unreachable("Unexpected result in projected permutation map");
+ }
}
return result;
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index b6408135b7b7a..cbe7e3f430678 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -77,7 +77,8 @@ static OpType getSingleOpOfType(Block &block) {
/// map is reindexed to `affine_map<(d0, d1, d2) -> (d2, d0, d1)>`, the second
/// affine map is reindexed to `affine_map<(d0, d1) -> (d0, d1)>`.
static AffineMap reindexIndexingMap(AffineMap map) {
- assert(map.isProjectedPermutation() && "expected projected permutation");
+ assert(map.isProjectedPermutation(/*allowZerosInResults=*/true) &&
+ "expected projected permutation");
auto res = compressUnusedDims(map);
assert(res.getNumDims() == res.getNumResults() &&
"expected reindexed map with same number of dims and results");
@@ -593,8 +594,9 @@ static LogicalResult vectorizeContraction(OpBuilder &b, LinalgOp linalgOp,
}
static bool allIndexingsAreProjectedPermutation(LinalgOp op) {
- return llvm::all_of(op.getIndexingMaps(),
- [](AffineMap m) { return m.isProjectedPermutation(); });
+ return llvm::all_of(op.getIndexingMaps(), [](AffineMap m) {
+ return m.isProjectedPermutation(/*allowZerosInResults=*/true);
+ });
}
// TODO: probably need some extra checks for reduction followed by consumer
diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp
index 9c6f25d3c53e0..beb47d11ba35d 100644
--- a/mlir/lib/IR/AffineMap.cpp
+++ b/mlir/lib/IR/AffineMap.cpp
@@ -495,19 +495,33 @@ SmallVector<int64_t, 4> AffineMap::compose(ArrayRef<int64_t> values) const {
return res;
}
-bool AffineMap::isProjectedPermutation() const {
+bool AffineMap::isProjectedPermutation(bool allowZeroInResults) const {
if (getNumSymbols() > 0)
return false;
+
+ // Having more results than inputs means that results have duplicated dims or
+ // zeros that can't be mapped to input dims.
+ if (getNumResults() > getNumInputs())
+ return false;
+
SmallVector<bool, 8> seen(getNumInputs(), false);
+ // A projected permutation can have, at most, only one instance of each input
+ // dimension in the result expressions. Zeros are allowed as long as the
+ // number of result expressions is lower or equal than the number of input
+ // expressions.
for (auto expr : getResults()) {
if (auto dim = expr.dyn_cast<AffineDimExpr>()) {
if (seen[dim.getPosition()])
return false;
seen[dim.getPosition()] = true;
- continue;
+ } else {
+ auto constExpr = expr.dyn_cast<AffineConstantExpr>();
+ if (!allowZeroInResults || !constExpr || constExpr.getValue() != 0)
+ return false;
}
- return false;
}
+
+ // Results are either dims or zeros and zeros can be mapped to input dims.
return true;
}
@@ -696,13 +710,21 @@ AffineMap mlir::inversePermutation(AffineMap map) {
}
AffineMap mlir::inverseAndBroadcastProjectedPermuation(AffineMap map) {
- assert(map.isProjectedPermutation());
+ assert(map.isProjectedPermutation(/*allowZeroInResults=*/true));
MLIRContext *context = map.getContext();
AffineExpr zero = mlir::getAffineConstantExpr(0, context);
// Start with all the results as 0.
SmallVector<AffineExpr, 4> exprs(map.getNumInputs(), zero);
for (unsigned i : llvm::seq(unsigned(0), map.getNumResults())) {
- // Reverse each dimension existing in the oringal map result.
+ // Skip zeros from input map. 'exprs' is already initialized to zero.
+ if (auto constExpr = map.getResult(i).dyn_cast<AffineConstantExpr>()) {
+ assert(constExpr.getValue() == 0 &&
+ "Unexpected constant in projected permutation");
+ (void)constExpr;
+ continue;
+ }
+
+ // Reverse each dimension existing in the original map result.
exprs[map.getDimPosition(i)] = getAffineDimExpr(i, context);
}
return AffineMap::get(map.getNumResults(), /*symbolCount=*/0, exprs, context);
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index c3a4a05413deb..1e6c8010c3099 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -863,6 +863,65 @@ func @red_min_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
// -----
+// CHECK-DAG: #[[$M5:.*]] = affine_map<(d0, d1) -> (d0, 0)>
+
+// CHECK-LABEL: func @explicit_broadcast(
+func @explicit_broadcast(%arg0: tensor<4x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
+ // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true]} : tensor<4x4xf32>, vector<4x4xf32>
+ // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true], permutation_map = #[[$M5]]} : tensor<4x1xf32>, vector<4x4xf32>
+ // CHECK: subf {{.*}} : vector<4x4xf32>
+ // CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true]} : vector<4x4xf32>, tensor<4x4xf32>
+ %c0 = constant 0.0 : f32
+ %init = linalg.init_tensor [4, 4] : tensor<4x4xf32>
+ %fill = linalg.fill(%c0, %init) : f32, tensor<4x4xf32> -> tensor<4x4xf32>
+ %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+ affine_map<(d0, d1) -> (d0, 0)>,
+ affine_map<(d0, d1) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel"]}
+ ins(%arg0, %arg1 : tensor<4x4xf32>, tensor<4x1xf32>)
+ outs(%fill : tensor<4x4xf32>) {
+ ^bb0(%arg7: f32, %arg8: f32, %arg9: f32):
+ %40 = subf %arg7, %arg8 : f32
+ linalg.yield %40 : f32
+ } -> tensor<4x4xf32>
+ return %red : tensor<4x4xf32>
+}
+
+// -----
+
+// CHECK-DAG: #[[$M6:.*]] = affine_map<(d0, d1) -> (d0, 0)>
+// CHECK-DAG: #[[$M7:.*]] = affine_map<(d0) -> (d0, 0)>
+
+// CHECK-LABEL: func @fused_broadcast_red_2d
+func @fused_broadcast_red_2d(%arg0: tensor<4x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4xf32> {
+ // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true]} : tensor<4x4xf32>, vector<4x4xf32>
+ // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true], permutation_map = #[[$M6]]} : tensor<4x1xf32>, vector<4x4xf32>
+ // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true], permutation_map = #[[$M7]]} : tensor<4xf32>, vector<4x4xf32>
+ // CHECK: subf {{.*}} : vector<4x4xf32>
+ // CHECK: math.exp {{.*}} : vector<4x4xf32>
+ // CHECK: addf {{.*}} : vector<4x4xf32>
+ // CHECK: vector.multi_reduction #vector.kind<add>, {{.*}} : vector<4x4xf32> to vector<4xf32>
+ // CHECK: vector.transfer_write {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<4xf32>
+ %c0 = constant 0.0 : f32
+ %init = linalg.init_tensor [4] : tensor<4xf32>
+ %fill = linalg.fill(%c0, %init) : f32, tensor<4xf32> -> tensor<4xf32>
+ %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+ affine_map<(d0, d1) -> (d0, 0)>,
+ affine_map<(d0, d1) -> (d0)>],
+ iterator_types = ["parallel", "reduction"]}
+ ins(%arg0, %arg1 : tensor<4x4xf32>, tensor<4x1xf32>)
+ outs(%fill : tensor<4xf32>) {
+ ^bb0(%arg7: f32, %arg8: f32, %arg9: f32):
+ %40 = subf %arg7, %arg8 : f32
+ %41 = math.exp %40 : f32
+ %42 = addf %41, %arg9 : f32
+ linalg.yield %42 : f32
+ } -> tensor<4xf32>
+ return %red : tensor<4xf32>
+}
+
+// -----
+
// CHECK-LABEL: func @reduce_1d(
// CHECK-SAME: %[[A:.*]]: tensor<32xf32>
func @reduce_1d(%arg0: tensor<32xf32>) -> tensor<f32> {
@@ -899,4 +958,3 @@ func @reduce_1d(%arg0: tensor<32xf32>) -> tensor<f32> {
return %2 : tensor<f32>
}
-
More information about the Mlir-commits
mailing list