[Mlir-commits] [mlir] [MLIR][Affine] Fix/complete access index invariance, add isInvariantAccess (PR #84602)
Uday Bondhugula
llvmlistbot at llvm.org
Fri Mar 8 20:42:32 PST 2024
https://github.com/bondhugula updated https://github.com/llvm/llvm-project/pull/84602
>From 2bdd36fe05ca6f09726b4b725478d8bf5a37ba64 Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday at polymagelabs.com>
Date: Sat, 9 Mar 2024 08:52:03 +0530
Subject: [PATCH] [MLIR][Affine] Fix/complete access index invariance, add
isInvariantAccess
isAccessIndexInvariant had outdated code and didn't handle IR with multiple
affine.apply ops, which is inconvenient when used as a utility. This is
addressed by switching to use the proper API on AffineValueMap. Add
mlir::affine::isInvariantAccess exposed for outside use and tested via
the test pass. Add a method on AffineValueMap. Add test cases to
exercise simplification and composition for invariant access analysis.
A TODO/FIXME has been added but this issue existed before.
---
.../Dialect/Affine/Analysis/LoopAnalysis.h | 5 ++
.../mlir/Dialect/Affine/IR/AffineValueMap.h | 5 ++
.../Dialect/Affine/Analysis/LoopAnalysis.cpp | 59 ++++++++-----------
mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp | 9 +++
mlir/test/Dialect/Affine/access-analysis.mlir | 33 ++++++++++-
.../lib/Dialect/Affine/TestAccessAnalysis.cpp | 19 ++++--
6 files changed, 88 insertions(+), 42 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
index 1f64b57cac5782..7b92b930fb5f57 100644
--- a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
@@ -48,6 +48,11 @@ std::optional<uint64_t> getConstantTripCount(AffineForOp forOp);
/// this method is thus able to determine non-trivial divisors.
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp);
+/// Checks if an affine read or write operation depends on `forOp`'s IV, i.e.,
+/// if the memory access is invariant on `forOp`.
+template <typename LoadOrStoreOp>
+bool isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp);
+
/// Given an induction variable `iv` of type AffineForOp and `indices` of type
/// IndexType, returns the set of `indices` that are independent of `iv`.
///
diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h b/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h
index 8439930a87467c..7ad0e4a1e5ea04 100644
--- a/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h
@@ -44,6 +44,11 @@ class AffineValueMap {
// Resets this AffineValueMap with 'map', 'operands', and 'results'.
void reset(AffineMap map, ValueRange operands, ValueRange results = {});
+ /// Composes all incoming affine.apply ops and then simplifies and
+ /// canonicalizes the map and operands. This can change the number of
+ /// operands, but the result count remains the same.
+ void composeSimplifyAndCanonicalize();
+
/// Return the value map that is the difference of value maps 'a' and 'b',
/// represented as an affine map and its operands. The output map + operands
/// are canonicalized and simplified.
diff --git a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
index fc0515ba95f4fe..1c28d6b00b3c81 100644
--- a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
@@ -145,45 +145,36 @@ uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
return *gcd;
}
-/// Given an induction variable `iv` of type AffineForOp and an access `index`
-/// of type index, returns `true` if `index` is independent of `iv` and
-/// false otherwise. The determination supports composition with at most one
-/// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that
-/// the composition of AffineApplyOp needs to be canonicalized by construction
-/// to avoid writing code that composes arbitrary numbers of AffineApplyOps
-/// everywhere. To achieve this, at the very least, the compose-affine-apply
-/// pass must have been run.
+/// Given an affine.for `iv` and an access `index` of type index, returns `true`
+/// if `index` is independent of `iv` and false otherwise.
///
-/// Prerequisites:
-/// 1. `iv` and `index` of the proper type;
-/// 2. at most one reachable AffineApplyOp from index;
-///
-/// Returns false in cases with more than one AffineApplyOp, this is
-/// conservative.
+/// Prerequisites: `iv` and `index` of the proper type;
static bool isAccessIndexInvariant(Value iv, Value index) {
- assert(isAffineForInductionVar(iv) && "iv must be a AffineForOp");
- assert(isa<IndexType>(index.getType()) && "index must be of IndexType");
- SmallVector<Operation *, 4> affineApplyOps;
- getReachableAffineApplyOps({index}, affineApplyOps);
-
- if (affineApplyOps.empty()) {
- // Pointer equality test because of Value pointer semantics.
- return index != iv;
- }
-
- if (affineApplyOps.size() > 1) {
- affineApplyOps[0]->emitRemark(
- "CompositionAffineMapsPass must have been run: there should be at most "
- "one AffineApplyOp, returning false conservatively.");
- return false;
- }
+ assert(isAffineForInductionVar(iv) && "iv must be an affine.for iv");
+ assert(isa<IndexType>(index.getType()) && "index must be of 'index' type");
+ auto map = AffineMap::getMultiDimIdentityMap(/*numDims=*/1, iv.getContext());
+ SmallVector<Value> operands = {index};
+ AffineValueMap avm(map, operands);
+ avm.composeSimplifyAndCanonicalize();
+ return !avm.isFunctionOf(0, iv);
+}
- auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]);
- // We need yet another level of indirection because the `dim` index of the
- // access may not correspond to the `dim` index of composeOp.
- return !composeOp.getAffineValueMap().isFunctionOf(0, iv);
+// Pre-requisite: Loop bounds should be in canonical form.
+template <typename LoadOrStoreOp>
+bool mlir::affine::isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp) {
+ AffineValueMap avm(memOp.getAffineMap(), memOp.getMapOperands());
+ avm.composeSimplifyAndCanonicalize();
+ return !llvm::is_contained(avm.getOperands(), forOp.getInductionVar());
}
+// Explicitly instantiate the template so that the compiler knows we need them.
+template bool mlir::affine::isInvariantAccess(AffineReadOpInterface,
+ AffineForOp);
+template bool mlir::affine::isInvariantAccess(AffineWriteOpInterface,
+ AffineForOp);
+template bool mlir::affine::isInvariantAccess(AffineLoadOp, AffineForOp);
+template bool mlir::affine::isInvariantAccess(AffineStoreOp, AffineForOp);
+
DenseSet<Value> mlir::affine::getInvariantAccesses(Value iv,
ArrayRef<Value> indices) {
DenseSet<Value> res;
diff --git a/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp b/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp
index 2800237fd05ac6..6a52849186872e 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp
@@ -24,6 +24,15 @@ void AffineValueMap::reset(AffineMap map, ValueRange operands,
this->results.assign(results.begin(), results.end());
}
+void AffineValueMap::composeSimplifyAndCanonicalize() {
+ AffineMap sMap = getAffineMap();
+ fullyComposeAffineMapAndOperands(&sMap, &operands);
+ // Full composition also canonicalizes and simplifies before returning. We
+ // need to canonicalize once more to drop unused operands.
+ canonicalizeMapAndOperands(&sMap, &operands);
+ this->map.reset(sMap);
+}
+
void AffineValueMap::difference(const AffineValueMap &a,
const AffineValueMap &b, AffineValueMap *res) {
assert(a.getNumResults() == b.getNumResults() && "invalid inputs");
diff --git a/mlir/test/Dialect/Affine/access-analysis.mlir b/mlir/test/Dialect/Affine/access-analysis.mlir
index 68310b9323535a..789de646a8f9e2 100644
--- a/mlir/test/Dialect/Affine/access-analysis.mlir
+++ b/mlir/test/Dialect/Affine/access-analysis.mlir
@@ -1,13 +1,14 @@
// RUN: mlir-opt %s -split-input-file -test-affine-access-analysis -verify-diagnostics | FileCheck %s
-// CHECK-LABEL: func @loop_1d
-func.func @loop_1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
+// CHECK-LABEL: func @loop_simple
+func.func @loop_simple(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
%c0 = arith.constant 0 : index
%M = memref.dim %A, %c0 : memref<?x?xf32>
affine.for %i = 0 to %M {
affine.for %j = 0 to %M {
affine.load %A[%c0, %i] : memref<?x?xf32>
// expected-remark at above {{contiguous along loop 0}}
+ // expected-remark at above {{invariant along loop 1}}
affine.load %A[%c0, 8 * %i + %j] : memref<?x?xf32>
// expected-remark at above {{contiguous along loop 1}}
// Note/FIXME: access stride isn't being checked.
@@ -15,6 +16,7 @@ func.func @loop_1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// These are all non-contiguous along both loops. Nothing is emitted.
affine.load %A[%i, %c0] : memref<?x?xf32>
+ // expected-remark at above {{invariant along loop 1}}
// Note/FIXME: access stride isn't being checked.
affine.load %A[%i, 8 * %j] : memref<?x?xf32>
// expected-remark at above {{contiguous along loop 1}}
@@ -27,6 +29,22 @@ func.func @loop_1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// -----
+// CHECK-LABEL: func @loop_unsimplified
+func.func @loop_unsimplified(%A : memref<100xf32>) {
+ affine.for %i = 0 to 100 {
+ affine.load %A[2 * %i - %i - %i] : memref<100xf32>
+ // expected-remark at above {{invariant along loop 0}}
+
+ %m = affine.apply affine_map<(d0) -> (-2 * d0)>(%i)
+ %n = affine.apply affine_map<(d0) -> (2 * d0)>(%i)
+ affine.load %A[(%m + %n) floordiv 2] : memref<100xf32>
+ // expected-remark at above {{invariant along loop 0}}
+ }
+ return
+}
+
+// -----
+
#map = affine_map<(d0) -> (d0 * 16)>
#map1 = affine_map<(d0) -> (d0 * 16 + 16)>
#map2 = affine_map<(d0) -> (d0)>
@@ -41,11 +59,19 @@ func.func @tiled(%arg0: memref<*xf32>) {
%alloc_0 = memref.alloc() : memref<1x16x1x16xf32>
affine.for %arg4 = #map(%arg1) to #map1(%arg1) {
affine.for %arg5 = #map(%arg3) to #map1(%arg3) {
+ // TODO: here and below, the access isn't really invariant
+ // along tile-space IVs where the intra-tile IVs' bounds
+ // depend on them.
%0 = affine.load %cast[%arg4] : memref<64xf32>
// expected-remark at above {{contiguous along loop 3}}
+ // expected-remark at above {{invariant along loop 0}}
+ // expected-remark at above {{invariant along loop 1}}
+ // expected-remark at above {{invariant along loop 2}}
+ // expected-remark at above {{invariant along loop 4}}
affine.store %0, %alloc_0[0, %arg1 * -16 + %arg4, 0, %arg3 * -16 + %arg5] : memref<1x16x1x16xf32>
// expected-remark at above {{contiguous along loop 4}}
// expected-remark at above {{contiguous along loop 2}}
+ // expected-remark at above {{invariant along loop 1}}
}
}
affine.for %arg4 = #map(%arg1) to #map1(%arg1) {
@@ -56,6 +82,9 @@ func.func @tiled(%arg0: memref<*xf32>) {
// expected-remark at above {{contiguous along loop 2}}
affine.store %0, %alloc[0, %arg5, %arg6, %arg4] : memref<1x224x224x64xf32>
// expected-remark at above {{contiguous along loop 3}}
+ // expected-remark at above {{invariant along loop 0}}
+ // expected-remark at above {{invariant along loop 1}}
+ // expected-remark at above {{invariant along loop 2}}
}
}
}
diff --git a/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp b/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp
index b38046299d504a..751302550092d7 100644
--- a/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp
@@ -59,18 +59,25 @@ void TestAccessAnalysis::runOnOperation() {
enclosingOps.clear();
getAffineForIVs(*memOp, &enclosingOps);
for (unsigned d = 0, e = enclosingOps.size(); d < e; d++) {
+ AffineForOp loop = enclosingOps[d];
int memRefDim;
- bool isContiguous;
+ bool isContiguous, isInvariant;
if (auto read = dyn_cast<AffineReadOpInterface>(memOp)) {
- isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(),
- read, &memRefDim);
+ isContiguous =
+ isContiguousAccess(loop.getInductionVar(), read, &memRefDim);
+ isInvariant = isInvariantAccess(read, loop);
} else {
- isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(),
- cast<AffineWriteOpInterface>(memOp),
- &memRefDim);
+ auto write = cast<AffineWriteOpInterface>(memOp);
+ isContiguous =
+ isContiguousAccess(loop.getInductionVar(), write, &memRefDim);
+ isInvariant = isInvariantAccess(write, loop);
}
+ // Check for contiguity for the innermost memref dimension to avoid
+ // emitting too many diagnostics.
if (isContiguous && memRefDim == 0)
memOp->emitRemark("contiguous along loop ") << d << '\n';
+ if (isInvariant)
+ memOp->emitRemark("invariant along loop ") << d << '\n';
}
}
}
More information about the Mlir-commits
mailing list