[Mlir-commits] [mlir] 8f72e56 - [MLIR] Fix a bug in affine-loop-normalize
Uday Bondhugula
llvmlistbot at llvm.org
Thu Jul 27 05:14:34 PDT 2023
Author: Vimal Patel
Date: 2023-07-27T17:33:24+05:30
New Revision: 8f72e5600080535a5f714e186cd6e3a2f8ec9893
URL: https://github.com/llvm/llvm-project/commit/8f72e5600080535a5f714e186cd6e3a2f8ec9893
DIFF: https://github.com/llvm/llvm-project/commit/8f72e5600080535a5f714e186cd6e3a2f8ec9893.diff
LOG: [MLIR] Fix a bug in affine-loop-normalize
The existing logic was not sufficient for the case of loops with lower
and upper bounds with different operand lists.
Reviewed By: bondhugula
Differential Revision: https://reviews.llvm.org/D155578
Added:
Modified:
mlir/lib/Dialect/Affine/Utils/Utils.cpp
mlir/test/Dialect/Affine/affine-loop-normalize.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
index 8781efb3193137..88bfc5789f2d64 100644
--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -550,6 +550,7 @@ void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
ubExprs, op.getContext());
op.setUpperBounds(ranges.getOperands(), newUpperMap);
}
+
LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
bool promoteSingleIter) {
if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
@@ -571,86 +572,54 @@ LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
OpBuilder opBuilder(op);
int64_t origLoopStep = op.getStep();
- AffineBound lb = op.getLowerBound();
- AffineMap originalLbMap = lb.getMap();
- SmallVector<Value, 4> origLbOperands;
- llvm::append_range(origLbOperands, lb.getOperands());
-
- AffineBound ub = op.getUpperBound();
- AffineMap originalUbMap = ub.getMap();
- SmallVector<Value, 4> origUbOperands;
- llvm::append_range(origUbOperands, ub.getOperands());
-
- // Calculate upperBound for normalized loop.
- SmallVector<Value, 4> ubOperands;
- ubOperands.reserve(ub.getNumOperands() + lb.getNumOperands());
-
- // Add dimension operands from upper/lower bound.
- for (unsigned j = 0, e = originalUbMap.getNumDims(); j < e; ++j)
- ubOperands.push_back(ub.getOperand(j));
- for (unsigned j = 0, e = originalLbMap.getNumDims(); j < e; ++j)
- ubOperands.push_back(lb.getOperand(j));
-
- // Add symbol operands from upper/lower bound.
- for (unsigned j = 0, e = originalUbMap.getNumSymbols(); j < e; ++j)
- ubOperands.push_back(ub.getOperand(originalUbMap.getNumDims() + j));
- for (unsigned j = 0, e = originalLbMap.getNumSymbols(); j < e; ++j)
- ubOperands.push_back(lb.getOperand(originalLbMap.getNumDims() + j));
-
- // Add original result expressions from lower/upper bound map.
- SmallVector<AffineExpr, 1> origLbExprs(originalLbMap.getResults().begin(),
- originalLbMap.getResults().end());
- SmallVector<AffineExpr, 2> origUbExprs(originalUbMap.getResults().begin(),
- originalUbMap.getResults().end());
- SmallVector<AffineExpr, 4> newUbExprs;
-
- // The original upperBound can have more than one result. For the new
- // upperBound of this loop, take
diff erence of all possible combinations of
- // the ub results and lb result and ceildiv with the loop step. For e.g.,
- //
- // affine.for %i1 = 0 to min affine_map<(d0)[] -> (d0 + 32, 1024)>(%i0)
- // will have an upperBound map as,
- // affine_map<(d0)[] -> (((d0 + 32) - 0) ceildiv 1, (1024 - 0) ceildiv
- // 1)>(%i0)
- //
- // Insert all combinations of upper/lower bound results.
- for (unsigned i = 0, e = origUbExprs.size(); i < e; ++i) {
- newUbExprs.push_back(
- (origUbExprs[i] - origLbExprs[0]).ceilDiv(origLoopStep));
- }
-
- // Construct newUbMap.
- AffineMap newUbMap = AffineMap::get(
- originalLbMap.getNumDims() + originalUbMap.getNumDims(),
- originalLbMap.getNumSymbols() + originalUbMap.getNumSymbols(), newUbExprs,
- opBuilder.getContext());
- canonicalizeMapAndOperands(&newUbMap, &ubOperands);
-
- SmallVector<Value, 4> lbOperands(lb.getOperands().begin(),
- lb.getOperands().begin() +
- originalLbMap.getNumDims());
-
- // Normalize the loop.
- op.setUpperBound(ubOperands, newUbMap);
+ // Construct the new upper bound value map.
+ AffineMap oldLbMap = op.getLowerBoundMap();
+ // The upper bound can have multiple results. To use
+ // AffineValueMap::
diff erence, we need to have the same number of results in
+ // both lower and upper bound maps. So, we just create a value map for the
+ // lower bound with the only available lower bound result repeated to pad up
+ // to the number of upper bound results.
+ SmallVector<AffineExpr> lbExprs(op.getUpperBoundMap().getNumResults(),
+ op.getLowerBoundMap().getResult(0));
+ AffineValueMap lbMap(oldLbMap, op.getLowerBoundOperands());
+ AffineMap paddedLbMap =
+ AffineMap::get(oldLbMap.getNumDims(), oldLbMap.getNumSymbols(), lbExprs,
+ op.getContext());
+ AffineValueMap paddedLbValueMap(paddedLbMap, op.getLowerBoundOperands());
+ AffineValueMap ubValueMap(op.getUpperBoundMap(), op.getUpperBoundOperands());
+ AffineValueMap newUbValueMap;
+ // Compute the `upper bound - lower bound`.
+ AffineValueMap::
diff erence(ubValueMap, paddedLbValueMap, &newUbValueMap);
+ (void)newUbValueMap.canonicalize();
+
+ // Scale down the upper bound value map by the loop step.
+ unsigned numResult = newUbValueMap.getNumResults();
+ SmallVector<AffineExpr> scaleDownExprs(numResult);
+ for (unsigned i = 0; i < numResult; ++i)
+ scaleDownExprs[i] = opBuilder.getAffineDimExpr(i).ceilDiv(origLoopStep);
+ // `scaleDownMap` is (d0, d1, ..., d_n) -> (d0 / step, d1 / step, ..., d_n /
+ // step). Where `n` is the number of results in the upper bound map.
+ AffineMap scaleDownMap =
+ AffineMap::get(numResult, 0, scaleDownExprs, op.getContext());
+ AffineMap newUbMap = scaleDownMap.compose(newUbValueMap.getAffineMap());
+
+ // Set the newly create upper bound map and operands.
+ op.setUpperBound(newUbValueMap.getOperands(), newUbMap);
op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
op.setStep(1);
// Calculate the Value of new loopIV. Create affine.apply for the value of
// the loopIV in normalized loop.
opBuilder.setInsertionPointToStart(op.getBody());
- // Add an extra dim operand for loopIV.
- lbOperands.push_back(op.getInductionVar());
- // Add symbol operands from lower bound.
- for (unsigned j = 0, e = originalLbMap.getNumSymbols(); j < e; ++j)
- lbOperands.push_back(origLbOperands[originalLbMap.getNumDims() + j]);
-
- AffineExpr origIVExpr =
- opBuilder.getAffineDimExpr(originalLbMap.getNumDims());
- AffineExpr newIVExpr = origIVExpr * origLoopStep + originalLbMap.getResult(0);
- AffineMap ivMap = AffineMap::get(originalLbMap.getNumDims() + 1,
- originalLbMap.getNumSymbols(), newIVExpr);
- canonicalizeMapAndOperands(&ivMap, &lbOperands);
- Operation *newIV = opBuilder.create<AffineApplyOp>(loc, ivMap, lbOperands);
+ // Construct an affine.apply op mapping the new IV to the old IV.
+ AffineMap scaleIvMap =
+ AffineMap::get(1, 0, -opBuilder.getAffineDimExpr(0) * origLoopStep);
+ AffineValueMap scaleIvValueMap(scaleIvMap, ValueRange{op.getInductionVar()});
+ AffineValueMap newIvToOldIvMap;
+ AffineValueMap::
diff erence(lbMap, scaleIvValueMap, &newIvToOldIvMap);
+ (void)newIvToOldIvMap.canonicalize();
+ auto newIV = opBuilder.create<AffineApplyOp>(
+ loc, newIvToOldIvMap.getAffineMap(), newIvToOldIvMap.getOperands());
op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
return success();
}
diff --git a/mlir/test/Dialect/Affine/affine-loop-normalize.mlir b/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
index 12dc5c0ed5d406..7d90efec0c21ba 100644
--- a/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
@@ -88,7 +88,7 @@ func.func @simple_loop_nest(){
// CHECK-LABEL: func @loop_with_unknown_upper_bound
// CHECK-SAME: (%[[ARG0:.*]]: memref<?x?xf32>, %[[ARG1:.*]]: index)
-// CHECK-NEXT: %{{.*}} = arith.constant 0 : index
+// CHECK-NEXT: arith.constant 0 : index
// CHECK-NEXT: %[[DIM:.*]] = memref.dim %arg0, %c0 : memref<?x?xf32>
// CHECK-NEXT: affine.for %[[I:.*]] = 0 to [[$UB00]]()[%[[DIM]]] {
// CHECK-NEXT: %[[IIV:.*]] = affine.apply [[$IV00]](%[[I]])
@@ -119,7 +119,7 @@ func.func @loop_with_unknown_upper_bound(%arg0: memref<?x?xf32>, %arg1: index) {
// CHECK-LABEL: func @loop_with_multiple_upper_bounds
// CHECK-SAME: (%[[ARG0:.*]]: memref<?x?xf32>, %[[ARG1:.*]]: index)
-// CHECK-NEXT: %{{.*}} = arith.constant 0 : index
+// CHECK-NEXT: arith.constant 0 : index
// CHECK-NEXT: %[[DIM:.*]] = memref.dim %arg0, %c0 : memref<?x?xf32>
// CHECK-NEXT: affine.for %[[I:.*]] = 0 to [[$OUTERUB]]()[%[[DIM]]] {
// CHECK-NEXT: %[[IIV:.*]] = affine.apply [[$OUTERIV]](%[[I]])
@@ -146,12 +146,12 @@ func.func @loop_with_multiple_upper_bounds(%arg0: memref<?x?xf32>, %arg1 : index
// CHECK-DAG: [[$INTERUB:#map[0-9]*]] = affine_map<()[s0] -> (s0 ceildiv 32)>
// CHECK-DAG: [[$INTERIV:#map[0-9]*]] = affine_map<(d0) -> (d0 * 32)>
// CHECK-DAG: [[$INTRAUB:#map[0-9]*]] = affine_map<(d0)[s0] -> (32, -d0 + s0)>
-// CHECK-DAG: [[$INTRAIV:#map[0-9]*]] = affine_map<(d0, d1) -> (d1 + d0)>
+// CHECK-DAG: [[$INTRAIV:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + d1)>
// CHECK-LABEL: func @tiled_matmul
// CHECK-SAME: (%[[ARG0:.*]]: memref<1024x1024xf32>, %[[ARG1:.*]]: memref<1024x1024xf32>, %[[ARG2:.*]]: memref<1024x1024xf32>)
-// CHECK-NEXT: %{{.*}} = arith.constant 0 : index
-// CHECK-NEXT: %{{.*}} = arith.constant 1 : index
+// CHECK-NEXT: arith.constant 0 : index
+// CHECK-NEXT: arith.constant 1 : index
// CHECK-NEXT: %[[DIM0:.*]] = memref.dim %[[ARG0]], %{{.*}}
// CHECK-NEXT: %[[DIM1:.*]] = memref.dim %[[ARG1]], %{{.*}}
// CHECK-NEXT: %[[DIM2:.*]] = memref.dim %[[ARG0]], %{{.*}}
@@ -167,11 +167,11 @@ func.func @loop_with_multiple_upper_bounds(%arg0: memref<?x?xf32>, %arg1 : index
// CHECK-NEXT: %[[JJIV:.*]] = affine.apply [[$INTRAIV]](%[[JIV]], %[[JJ]])
// CHECK-NEXT: affine.for %[[KK:.*]] = 0 to min [[$INTRAUB]](%[[KIV]])[%[[DIM2]]] {
// CHECK-NEXT: %[[KKIV:.*]] = affine.apply [[$INTRAIV]](%[[KIV]], %[[KK]])
-// CHECK-NEXT: %{{.*}} = affine.load %[[ARG0]][%[[IIIV]], %[[KKIV]]] : memref<1024x1024xf32>
-// CHECK-NEXT: %{{.*}} = affine.load %[[ARG1]][%[[KKIV]], %[[JJIV]]] : memref<1024x1024xf32>
-// CHECK-NEXT: %{{.*}} = affine.load %[[ARG2]][%[[IIIV]], %[[JJIV]]] : memref<1024x1024xf32>
-// CHECK-NEXT: %{{.*}} = arith.mulf
-// CHECK-NEXT: %{{.*}} = arith.addf
+// CHECK-NEXT: affine.load %[[ARG0]][%[[IIIV]], %[[KKIV]]] : memref<1024x1024xf32>
+// CHECK-NEXT: affine.load %[[ARG1]][%[[KKIV]], %[[JJIV]]] : memref<1024x1024xf32>
+// CHECK-NEXT: affine.load %[[ARG2]][%[[IIIV]], %[[JJIV]]] : memref<1024x1024xf32>
+// CHECK-NEXT: arith.mulf
+// CHECK-NEXT: arith.addf
// CHECK-NEXT: affine.store %{{.*}}, %[[ARG2]]{{.*}} : memref<1024x1024xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
@@ -223,7 +223,7 @@ func.func @constant_lower_bound() {
scf.for %j = %c0 to %c1 step %c1 {
// CHECK: affine.for %[[ARG0:.*]] =
affine.for %i = %c0 to %c1 {
- // CHECK-NEXT: %{{.*}} = affine.apply #map{{.*}}(%[[ARG0]])
+ // CHECK-NEXT: affine.apply #map{{.*}}(%[[ARG0]])
}
}
return
@@ -292,3 +292,34 @@ func.func @upper_bound_by_dim(%arg0: index, %arg1: index) {
}
return
}
+
+// -----
+
+// CHECK: [[$MAP:#map[0-9]*]] = affine_map<(d0) -> (d0 * 64)>
+// CHECK: [[$MAP1:#map[0-9]*]] = affine_map<(d0) -> (2, (-d0 + 1024) ceildiv 32)>
+// CHECK: [[$MAP2:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + d1 * 32)>
+// CHECK: [[$MAP3:#map[0-9]*]] = affine_map<(d0, d1) -> (32, d0 - d1 + 64, -d1 + 1024)>
+// CHECK: [[$MAP4:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + d1)>
+#map0 = affine_map<(d0) -> (d0)>
+#map1 = affine_map<(d0) -> (d0 + 64, 1024)>
+#map2 = affine_map<(d0, d1) -> (d1 + 32, d0 + 64, 1024)>
+// CHECK-LABEL: @multi_level_tiled_matmul()
+func.func @multi_level_tiled_matmul() {
+ // CHECK-NEXT: %[[BUF:.*]] = memref.alloc() : memref<1024xf16>
+ %0 = memref.alloc() : memref<1024xf16>
+ affine.for %arg0 = 0 to 1024 step 64 {
+ // CHECK-NEXT: affine.for %[[ARG0:.*]] = 0 to 16 {
+ // CHECK-NEXT: %[[IV0:.*]] = affine.apply [[$MAP]](%[[ARG0]])
+ affine.for %arg3 = #map0(%arg0) to min #map1(%arg0) step 32 {
+ // CHECK-NEXT: affine.for %[[ARG1:.*]] = 0 to min [[$MAP1]](%[[IV0]]) {
+ // CHECK-NEXT: %[[IV1:.*]] = affine.apply [[$MAP2]](%[[IV0]], %[[ARG1]])
+ affine.for %arg6 = #map0(%arg3) to min #map2(%arg0, %arg3) {
+ // CHECK-NEXT: affine.for %[[ARG2:.*]] = 0 to min [[$MAP3]](%[[IV0]], %[[IV1]]) {
+ // CHECK-NEXT: %[[IV2:.*]] = affine.apply [[$MAP4]](%[[IV1]], %[[ARG2]])
+ // CHECK-NEXT: affine.load %[[BUF]][%[[IV2]]] : memref<1024xf16>
+ affine.load %0[%arg6] : memref<1024xf16>
+ }
+ }
+ }
+ return
+}
More information about the Mlir-commits
mailing list