[Mlir-commits] [mlir] 2108ed0 - [MLIR] Fix affine.for unroll for multi-result upper bound maps
Uday Bondhugula
llvmlistbot at llvm.org
Fri Dec 3 17:51:13 PST 2021
Author: Uday Bondhugula
Date: 2021-12-04T07:20:26+05:30
New Revision: 2108ed0671c0581efed8de929c862dbbc3e31c24
URL: https://github.com/llvm/llvm-project/commit/2108ed0671c0581efed8de929c862dbbc3e31c24
DIFF: https://github.com/llvm/llvm-project/commit/2108ed0671c0581efed8de929c862dbbc3e31c24.diff
LOG: [MLIR] Fix affine.for unroll for multi-result upper bound maps
Fix affine.for unroll for multi-result upper bound maps: these can't be
unrolled/unroll-and-jammed in cases where the trip count isn't known to
be a multiple of the unroll factor.
Fix and clean up repeated/unnecessary checks/comments at helper callees.
Also, fix clang-tidy variable naming warnings and redundant includes.
Differential Revision: https://reviews.llvm.org/D114662
Added:
Modified:
mlir/lib/Transforms/Utils/LoopUtils.cpp
mlir/test/Dialect/Affine/unroll-jam.mlir
mlir/test/Dialect/Affine/unroll.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index c43ee3f70ea63..78df3d130f4a8 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -18,20 +18,15 @@
#include "mlir/Analysis/Utils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
-#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/IR/AffineMap.h"
#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/IntegerSet.h"
#include "mlir/Support/MathExtras.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/RegionUtils.h"
#include "mlir/Transforms/Utils.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -56,28 +51,21 @@ struct LoopParams {
/// part of the unrolled loop. Computes the bound as an AffineMap with its
/// operands or a null map when the trip count can't be expressed as an affine
/// expression.
-static void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
- AffineMap &map,
- SmallVectorImpl<Value> &operands) {
- auto lbMap = forOp.getLowerBoundMap();
-
- // Single result lower bound map only.
- if (lbMap.getNumResults() != 1) {
- map = AffineMap();
- return;
- }
-
+static void
+getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
+ AffineMap &cleanupLbMap,
+ SmallVectorImpl<Value> &cleanupLbOperands) {
AffineMap tripCountMap;
SmallVector<Value, 4> tripCountOperands;
getTripCountMapAndOperands(forOp, &tripCountMap, &tripCountOperands);
-
- // Sometimes the trip count cannot be expressed as an affine expression.
+ // Trip count can't be computed.
if (!tripCountMap) {
- map = AffineMap();
+ cleanupLbMap = AffineMap();
return;
}
OpBuilder b(forOp);
+ auto lbMap = forOp.getLowerBoundMap();
auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap,
forOp.getLowerBoundOperands());
@@ -102,15 +90,15 @@ static void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
for (unsigned i = 0, e = bumpExprs.size(); i < e; i++)
newUbExprs[i] = b.getAffineDimExpr(0) + b.getAffineDimExpr(i + 1);
- operands.clear();
- operands.push_back(lb);
- operands.append(bumpValues.begin(), bumpValues.end());
- map = AffineMap::get(1 + tripCountMap.getNumResults(), 0, newUbExprs,
- b.getContext());
- // Simplify the map + operands.
- fullyComposeAffineMapAndOperands(&map, &operands);
- map = simplifyAffineMap(map);
- canonicalizeMapAndOperands(&map, &operands);
+ cleanupLbOperands.clear();
+ cleanupLbOperands.push_back(lb);
+ cleanupLbOperands.append(bumpValues.begin(), bumpValues.end());
+ cleanupLbMap = AffineMap::get(1 + tripCountMap.getNumResults(), 0, newUbExprs,
+ b.getContext());
+ // Simplify the cleanupLbMap + cleanupLbOperands.
+ fullyComposeAffineMapAndOperands(&cleanupLbMap, &cleanupLbOperands);
+ cleanupLbMap = simplifyAffineMap(cleanupLbMap);
+ canonicalizeMapAndOperands(&cleanupLbMap, &cleanupLbOperands);
// Remove any affine.apply's that became dead from the simplification above.
for (auto v : bumpValues)
if (v.use_empty())
@@ -192,7 +180,7 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
} else {
auto lbOperands = forOp.getLowerBoundOperands();
auto lbMap = forOp.getLowerBoundMap();
- OpBuilder builder(parentBlock, Block::iterator(forOp));
+ OpBuilder builder(forOp);
if (lbMap == builder.getDimIdentityMap()) {
// No need of generating an affine.apply.
iv.replaceAllUsesWith(lbOperands[0]);
@@ -1132,8 +1120,8 @@ static void generateUnrolledLoop(
/// Helper to generate cleanup loop for unroll or unroll-and-jam when the trip
/// count is not a multiple of `unrollFactor`.
-static void generateCleanupLoopForUnroll(AffineForOp forOp,
- uint64_t unrollFactor) {
+static LogicalResult generateCleanupLoopForUnroll(AffineForOp forOp,
+ uint64_t unrollFactor) {
// Insert the cleanup loop right after 'forOp'.
OpBuilder builder(forOp->getBlock(), std::next(Block::iterator(forOp)));
auto cleanupForOp = cast<AffineForOp>(builder.clone(*forOp));
@@ -1152,9 +1140,9 @@ static void generateCleanupLoopForUnroll(AffineForOp forOp,
AffineMap cleanupMap;
SmallVector<Value, 4> cleanupOperands;
getCleanupLoopLowerBound(forOp, unrollFactor, cleanupMap, cleanupOperands);
- assert(cleanupMap &&
- "cleanup loop lower bound map for single result lower bound maps "
- "can always be determined");
+ if (!cleanupMap)
+ return failure();
+
cleanupForOp.setLowerBound(cleanupOperands, cleanupMap);
// Promote the loop body up if this has turned into a single iteration loop.
(void)promoteIfSingleIteration(cleanupForOp);
@@ -1162,6 +1150,7 @@ static void generateCleanupLoopForUnroll(AffineForOp forOp,
// Adjust upper bound of the original loop; this is the same as the lower
// bound of the cleanup loop.
forOp.setUpperBound(cleanupOperands, cleanupMap);
+ return success();
}
/// Unrolls this loop by the specified factor. Returns success if the loop
@@ -1178,14 +1167,6 @@ LogicalResult mlir::loopUnrollByFactor(
if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
return success();
- // Loops where the lower bound is a max expression isn't supported for
- // unrolling since the trip count can be expressed as an affine function when
- // both the lower bound and the upper bound are multi-result maps. However,
- // one meaningful way to do such unrolling would be to specialize the loop for
- // the 'hotspot' case and unroll that hotspot.
- if (forOp.getLowerBoundMap().getNumResults() != 1)
- return failure();
-
// If the trip count is lower than the unroll factor, no unrolled body.
// TODO: option to specify cleanup loop unrolling.
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
@@ -1194,8 +1175,18 @@ LogicalResult mlir::loopUnrollByFactor(
return failure();
// Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
- if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0)
- generateCleanupLoopForUnroll(forOp, unrollFactor);
+ if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
+ // Loops where the lower bound is a max expression or the upper bound is
+ // a min expression and the trip count doesn't divide the unroll factor
+ // can't be unrolled since the lower bound of the cleanup loop in such cases
+ // cannot be expressed as an affine function or a max over affine functions.
+ if (forOp.getLowerBoundMap().getNumResults() != 1 ||
+ forOp.getUpperBoundMap().getNumResults() != 1)
+ return failure();
+ if (failed(generateCleanupLoopForUnroll(forOp, unrollFactor)))
+ assert(false && "cleanup loop lower bound map for single result lower "
+ "and upper bound maps can always be determined");
+ }
ValueRange iterArgs(forOp.getRegionIterArgs());
auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
@@ -1399,15 +1390,6 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
return success();
- // Loops where both lower and upper bounds are multi-result maps won't be
- // unrolled (since the trip can't be expressed as an affine function in
- // general).
- // TODO: this may not be common, but we could support the case
- // where the lower bound is a multi-result map and the ub is a single result
- // one.
- if (forOp.getLowerBoundMap().getNumResults() != 1)
- return failure();
-
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
// If the trip count is lower than the unroll jam factor, no unroll jam.
if (mayBeConstantTripCount.hasValue() &&
@@ -1440,8 +1422,18 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
// Generate the cleanup loop if trip count isn't a multiple of
// unrollJamFactor.
- if (getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0)
- generateCleanupLoopForUnroll(forOp, unrollJamFactor);
+ if (getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0) {
+ // Loops where the lower bound is a max expression or the upper bound is
+ // a min expression and the trip count doesn't divide the unroll factor
+ // can't be unrolled since the lower bound of the cleanup loop in such cases
+ // cannot be expressed as an affine function or a max over affine functions.
+ if (forOp.getLowerBoundMap().getNumResults() != 1 ||
+ forOp.getUpperBoundMap().getNumResults() != 1)
+ return failure();
+ if (failed(generateCleanupLoopForUnroll(forOp, unrollJamFactor)))
+ assert(false && "cleanup loop lower bound map for single result lower "
+ "and upper bound maps can always be determined");
+ }
// `operandMaps[i - 1]` carries old->new operand mapping for the ith unrolled
// iteration. There are (`unrollJamFactor` - 1) iterations.
diff --git a/mlir/test/Dialect/Affine/unroll-jam.mlir b/mlir/test/Dialect/Affine/unroll-jam.mlir
index 4f208cda50747..36c2bc483f4e7 100644
--- a/mlir/test/Dialect/Affine/unroll-jam.mlir
+++ b/mlir/test/Dialect/Affine/unroll-jam.mlir
@@ -3,7 +3,6 @@
// CHECK-DAG: [[$MAP_PLUS_1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
// CHECK-DAG: [[$MAP_DIV_OFFSET:#map[0-9]+]] = affine_map<()[s0] -> (((s0 - 1) floordiv 2) * 2 + 1)>
-// CHECK-DAG: [[$MAP_MULTI_RES:#map[0-9]+]] = affine_map<()[s0, s1] -> ((s0 floordiv 2) * 2, (s1 floordiv 2) * 2, 1024)>
// CHECK-DAG: [[$MAP_SYM_UB:#map[0-9]+]] = affine_map<()[s0, s1] -> (s0, s1, 1024)>
// UJAM-FOUR-DAG: [[$UBMAP:#map[0-9]+]] = affine_map<()[s0] -> (s0 + 8)>
@@ -104,21 +103,16 @@ func @loop_nest_unknown_count_2(%N : index) {
func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
affine.for %i = 0 to min affine_map<()[s0, s1] -> (s0, s1, 1024)>()[%M, %N] {
affine.for %j = 0 to %K {
- "foo"(%i, %j) : (index, index) -> ()
+ "test.foo"(%i, %j) : (index, index) -> ()
}
}
return
}
-// CHECK-NEXT: affine.for [[IV0:%arg[0-9]+]] = 0 to min [[$MAP_MULTI_RES]]()[%[[M]], %[[N]]] step 2 {
+// No unroll-and-jam possible here as the lower bound for the cleanup loop won't
+// be representable.
+// CHECK-NEXT: affine.for [[IV0:%arg[0-9]+]] = 0 to min #map{{.*}}()[%[[M]], %[[N]]] {
// CHECK-NEXT: affine.for [[IV1:%arg[0-9]+]] = 0 to %[[K]] {
-// CHECK-NEXT: "foo"([[IV0]], [[IV1]])
-// CHECK-NEXT: [[RES:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
-// CHECK-NEXT: "foo"([[RES]], [[IV1]])
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: affine.for [[IV0]] = max [[$MAP_MULTI_RES]]()[%[[M]], %[[N]]] to min [[$MAP_SYM_UB]]()[%[[M]], %[[N]]] {
-// CHECK-NEXT: affine.for [[IV1]] = 0 to %[[K]] {
-// CHECK-NEXT: "foo"([[IV0]], [[IV1]])
+// CHECK-NEXT: "test.foo"([[IV0]], [[IV1]])
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: return
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index fe7b8e4c10add..336e3313abab8 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -21,7 +21,6 @@
// UNROLL-BY-4-DAG: [[$MAP5:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 + s0 + 1)>
// UNROLL-BY-4-DAG: [[$MAP6:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 * 16 + d1)>
// UNROLL-BY-4-DAG: [[$MAP11:#map[0-9]+]] = affine_map<(d0) -> (d0)>
-// UNROLL-BY-4-DAG: [[$MAP_TRIP_COUNT_MULTIPLE_FOUR:#map[0-9]+]] = affine_map<()[s0, s1, s2] -> (s0 + ((-s0 + s1) floordiv 4) * 4, s0 + ((-s0 + s2) floordiv 4) * 4, s0 + ((-s0) floordiv 4) * 4 + 1024)>
// UNROLL-FULL-LABEL: func @loop_nest_simplest() {
func @loop_nest_simplest() {
@@ -558,20 +557,15 @@ func @loop_nest_symbolic_bound_with_step(%N : index) {
// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_and_min_upper_bound
func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
affine.for %i = %M to min affine_map<()[s0, s1] -> (s0, s1, 1024)>()[%N, %K] {
- "foo"() : () -> ()
+ "test.foo"() : () -> ()
}
return
}
-// CHECK-NEXT: affine.for %arg0 = %arg0 to min [[$MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] step 4 {
-// CHECK-NEXT: "foo"() : () -> ()
-// CHECK-NEXT: "foo"() : () -> ()
-// CHECK-NEXT: "foo"() : () -> ()
-// CHECK-NEXT: "foo"() : () -> ()
-// CHECK-NEXT: }
-// CHECK-NEXT: affine.for %arg1 = max [[$MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] to min #map28()[%arg1, %arg2] {
-// CHECK-NEXT: "foo"() : () -> ()
-// CHECK-NEXT: }
-// CHECK-NEXT: return
+// No unrolling here.
+// UNROLL-BY-4: affine.for %{{.*}} = %{{.*}} to min #map{{.*}}()[%{{.*}}, %{{.*}}] {
+// UNROLL-BY-4-NEXT: "test.foo"() : () -> ()
+// UNROLL-BY-4-NEXT: }
+// UNROLL-BY-4-NEXT: return
// The trip count here is a multiple of four, but this can be inferred only
// through composition. Check for no cleanup scf.
@@ -588,6 +582,28 @@ func @loop_nest_non_trivial_multiple_upper_bound(%M : index, %N : index) {
// UNROLL-BY-4-NOT: for
// UNROLL-BY-4: return
+// UNROLL-BY-4-LABEL: func @multi_upper_bound
+func @multi_upper_bound(%arg0: index) {
+ affine.for %i = 0 to min affine_map<()[s0] -> (8 * s0, 12 * s0)>()[%arg0] {
+ "test.foo"() : () -> ()
+ }
+ // No unrolling possible here.
+ // UNROLL-BY-4: affine.for %{{.*}} = 0 to min #map{{.*}}()[%{{.*}}]
+ return
+}
+
+// UNROLL-BY-4-LABEL: func @multi_lower_bound
+func @multi_lower_bound(%arg0: index) {
+ affine.for %i = max affine_map<()[s0] -> (8 * s0, 12 * s0)>()[%arg0] to 100 {
+ "test.foo"() : () -> ()
+ }
+ // TODO: Extend getTripCountMapAndOperands to handle multi-result lower bound
+ // maps.
+ // UNROLL-BY-4: affine.for %{{.*}} = max #map{{.*}}()[%{{.*}}] to 100
+ // UNROLL-BY-4-NOT: affine.for
+ return
+}
+
// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound_alt
func @loop_nest_non_trivial_multiple_upper_bound_alt(%M : index, %N : index) {
%K = affine.apply affine_map<(d0) -> (4*d0)> (%M)
More information about the Mlir-commits
mailing list