[Mlir-commits] [mlir] [MLIR][Affine] Add test pass for affine isContiguousAccess (PR #82923)

Tue Feb 27 05:43:06 PST 2024

https://github.com/bondhugula updated https://github.com/llvm/llvm-project/pull/82923

>From d7675e0d283df2525a06d06cdaa277a7ce1a11f0 Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday at polymagelabs.com>
Date: Fri, 23 Feb 2024 08:19:52 +0530
Subject: [PATCH] [MLIR][Affine] Add test pass for affine isContiguousAccess

isContiguousAccess is an important affine analysis utility, but is only
tested very indirectly via passes like vectorization and is not exposed.
Expose it and add a test pass for it that'll make it easier/feasible to
write test cases.  This is especially needed since the utility can be
significantly enhanced in power and we need a test pass to exercise it
directly.

This pass can in future be used to test the utility for invariant
accesses as well.
---
 .../Dialect/Affine/Analysis/LoopAnalysis.h    | 20 +++++
 .../Dialect/Affine/Analysis/LoopAnalysis.cpp  | 54 +++++-------
 mlir/test/Dialect/Affine/access-analysis.mlir | 67 +++++++++++++++
 mlir/test/lib/Dialect/Affine/CMakeLists.txt   |  2 +
 .../lib/Dialect/Affine/TestAccessAnalysis.cpp | 83 +++++++++++++++++++
 mlir/tools/mlir-opt/mlir-opt.cpp              |  2 +
 6 files changed, 197 insertions(+), 31 deletions(-)
 create mode 100644 mlir/test/Dialect/Affine/access-analysis.mlir
 create mode 100644 mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp

diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
index 92f3d5a2c4925b..1f64b57cac5782 100644
--- a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
@@ -60,6 +60,26 @@ uint64_t getLargestDivisorOfTripCount(AffineForOp forOp);
 DenseSet<Value, DenseMapInfo<Value>>
 getInvariantAccesses(Value iv, ArrayRef<Value> indices);
 
+/// Given:
+///   1. an induction variable `iv` of type AffineForOp;
+///   2. a `memoryOp` of type const LoadOp& or const StoreOp&;
+/// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
+/// is defined as either invariant or varying only along a unique MemRef dim.
+/// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
+/// convey the memRef access is invariant along `iv`).
+///
+/// Prerequisites:
+///   1. `memRefDim` ~= nullptr;
+///   2. `iv` of the proper type;
+///   3. the MemRef accessed by `memoryOp` has no layout map or at most an
+///      identity layout map.
+///
+/// Currently only supports no layout map or identity layout map in the memref.
+/// Returns false if the memref has a non-identity layoutMap. This behavior is
+/// conservative.
+template <typename LoadOrStoreOp>
+bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim);
+
 using VectorizableLoopFun = std::function<bool(AffineForOp)>;
 
 /// Checks whether the loop is structurally vectorizable; i.e.:
diff --git a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
index e645afe7cd3e8f..fc0515ba95f4fe 100644
--- a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
@@ -195,43 +195,25 @@ DenseSet<Value> mlir::affine::getInvariantAccesses(Value iv,
   return res;
 }
 
-/// Given:
-///   1. an induction variable `iv` of type AffineForOp;
-///   2. a `memoryOp` of type const LoadOp& or const StoreOp&;
-/// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
-/// is defined as either invariant or varying only along a unique MemRef dim.
-/// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
-/// convey the memRef access is invariant along `iv`).
-///
-/// Prerequisites:
-///   1. `memRefDim` ~= nullptr;
-///   2. `iv` of the proper type;
-///   3. the MemRef accessed by `memoryOp` has no layout map or at most an
-///      identity layout map.
-///
-/// Currently only supports no layoutMap or identity layoutMap in the MemRef.
-/// Returns false if the MemRef has a non-identity layoutMap or more than 1
-/// layoutMap. This is conservative.
-///
-// TODO: check strides.
+// TODO: check access stride.
 template <typename LoadOrStoreOp>
-static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
-                               int *memRefDim) {
-  static_assert(
-      llvm::is_one_of<LoadOrStoreOp, AffineLoadOp, AffineStoreOp>::value,
-      "Must be called on either LoadOp or StoreOp");
+bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
+                                      int *memRefDim) {
+  static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
+                                AffineWriteOpInterface>::value,
+                "Must be called on either an affine read or write op");
   assert(memRefDim && "memRefDim == nullptr");
   auto memRefType = memoryOp.getMemRefType();
 
   if (!memRefType.getLayout().isIdentity())
-    return memoryOp.emitError("NYI: non-trivial layoutMap"), false;
+    return memoryOp.emitError("NYI: non-trivial layout map"), false;
 
   int uniqueVaryingIndexAlongIv = -1;
   auto accessMap = memoryOp.getAffineMap();
   SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
   unsigned numDims = accessMap.getNumDims();
   for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
-    // Gather map operands used result expr 'i' in 'exprOperands'.
+    // Gather map operands used in result expr 'i' in 'exprOperands'.
     SmallVector<Value, 4> exprOperands;
     auto resultExpr = accessMap.getResult(i);
     resultExpr.walk([&](AffineExpr expr) {
@@ -241,7 +223,7 @@ static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
         exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
     });
     // Check access invariance of each operand in 'exprOperands'.
-    for (auto exprOperand : exprOperands) {
+    for (Value exprOperand : exprOperands) {
       if (!isAccessIndexInvariant(iv, exprOperand)) {
         if (uniqueVaryingIndexAlongIv != -1) {
           // 2+ varying indices -> do not vectorize along iv.
@@ -259,6 +241,13 @@ static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
   return true;
 }
 
+template bool mlir::affine::isContiguousAccess(Value iv,
+                                               AffineReadOpInterface loadOp,
+                                               int *memRefDim);
+template bool mlir::affine::isContiguousAccess(Value iv,
+                                               AffineWriteOpInterface loadOp,
+                                               int *memRefDim);
+
 template <typename LoadOrStoreOp>
 static bool isVectorElement(LoadOrStoreOp memoryOp) {
   auto memRefType = memoryOp.getMemRefType();
@@ -344,10 +333,13 @@ bool mlir::affine::isVectorizableLoopBody(
     auto load = dyn_cast<AffineLoadOp>(op);
     auto store = dyn_cast<AffineStoreOp>(op);
     int thisOpMemRefDim = -1;
-    bool isContiguous = load ? isContiguousAccess(loop.getInductionVar(), load,
-                                                  &thisOpMemRefDim)
-                             : isContiguousAccess(loop.getInductionVar(), store,
-                                                  &thisOpMemRefDim);
+    bool isContiguous =
+        load ? isContiguousAccess(loop.getInductionVar(),
+                                  cast<AffineReadOpInterface>(*load),
+                                  &thisOpMemRefDim)
+             : isContiguousAccess(loop.getInductionVar(),
+                                  cast<AffineWriteOpInterface>(*store),
+                                  &thisOpMemRefDim);
     if (thisOpMemRefDim != -1) {
       // If memory accesses vary across different dimensions then the loop is
       // not vectorizable.
diff --git a/mlir/test/Dialect/Affine/access-analysis.mlir b/mlir/test/Dialect/Affine/access-analysis.mlir
new file mode 100644
index 00000000000000..68310b9323535a
--- /dev/null
+++ b/mlir/test/Dialect/Affine/access-analysis.mlir
@@ -0,0 +1,67 @@
+// RUN: mlir-opt %s -split-input-file -test-affine-access-analysis -verify-diagnostics | FileCheck %s
+
+// CHECK-LABEL: func @loop_1d
+func.func @loop_1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
+   %c0 = arith.constant 0 : index
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   affine.for %i = 0 to %M {
+     affine.for %j = 0 to %M {
+       affine.load %A[%c0, %i] : memref<?x?xf32>
+       // expected-remark at above {{contiguous along loop 0}}
+       affine.load %A[%c0, 8 * %i + %j] : memref<?x?xf32>
+       // expected-remark at above {{contiguous along loop 1}}
+       // Note/FIXME: access stride isn't being checked.
+       // expected-remark at -3 {{contiguous along loop 0}}
+
+       // These are all non-contiguous along both loops. Nothing is emitted.
+       affine.load %A[%i, %c0] : memref<?x?xf32>
+       // Note/FIXME: access stride isn't being checked.
+       affine.load %A[%i, 8 * %j] : memref<?x?xf32>
+       // expected-remark at above {{contiguous along loop 1}}
+       affine.load %A[%j, 4 * %i] : memref<?x?xf32>
+       // expected-remark at above {{contiguous along loop 0}}
+     }
+   }
+   return
+}
+
+// -----
+
+#map = affine_map<(d0) -> (d0 * 16)>
+#map1 = affine_map<(d0) -> (d0 * 16 + 16)>
+#map2 = affine_map<(d0) -> (d0)>
+#map3 = affine_map<(d0) -> (d0 + 1)>
+
+func.func @tiled(%arg0: memref<*xf32>) {
+  %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x224x224x64xf32>
+  %cast = memref.cast %arg0 : memref<*xf32> to memref<64xf32>
+  affine.for %arg1 = 0 to 4 {
+    affine.for %arg2 = 0 to 224 {
+      affine.for %arg3 = 0 to 14 {
+        %alloc_0 = memref.alloc() : memref<1x16x1x16xf32>
+        affine.for %arg4 = #map(%arg1) to #map1(%arg1) {
+          affine.for %arg5 = #map(%arg3) to #map1(%arg3) {
+            %0 = affine.load %cast[%arg4] : memref<64xf32>
+            // expected-remark at above {{contiguous along loop 3}}
+            affine.store %0, %alloc_0[0, %arg1 * -16 + %arg4, 0, %arg3 * -16 + %arg5] : memref<1x16x1x16xf32>
+            // expected-remark at above {{contiguous along loop 4}}
+            // expected-remark at above {{contiguous along loop 2}}
+          }
+        }
+        affine.for %arg4 = #map(%arg1) to #map1(%arg1) {
+          affine.for %arg5 = #map2(%arg2) to #map3(%arg2) {
+            affine.for %arg6 = #map(%arg3) to #map1(%arg3) {
+              %0 = affine.load %alloc_0[0, %arg1 * -16 + %arg4, -%arg2 + %arg5, %arg3 * -16 + %arg6] : memref<1x16x1x16xf32>
+              // expected-remark at above {{contiguous along loop 5}}
+              // expected-remark at above {{contiguous along loop 2}}
+              affine.store %0, %alloc[0, %arg5, %arg6, %arg4] : memref<1x224x224x64xf32>
+              // expected-remark at above {{contiguous along loop 3}}
+            }
+          }
+        }
+        memref.dealloc %alloc_0 : memref<1x16x1x16xf32>
+      }
+    }
+  }
+  return
+}
diff --git a/mlir/test/lib/Dialect/Affine/CMakeLists.txt b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
index af9f312694abc9..14960a45d39bab 100644
--- a/mlir/test/lib/Dialect/Affine/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
@@ -3,6 +3,7 @@ add_mlir_library(MLIRAffineTransformsTestPasses
   TestAffineDataCopy.cpp
   TestAffineLoopUnswitching.cpp
   TestAffineLoopParametricTiling.cpp
+  TestAccessAnalysis.cpp
   TestDecomposeAffineOps.cpp
   TestReifyValueBounds.cpp
   TestLoopFusion.cpp
@@ -21,6 +22,7 @@ add_mlir_library(MLIRAffineTransformsTestPasses
 
   LINK_LIBS PUBLIC
   MLIRArithTransforms
+  MLIRAffineAnalysis
   MLIRAffineTransforms
   MLIRAffineUtils
   MLIRIR
diff --git a/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp b/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp
new file mode 100644
index 00000000000000..b38046299d504a
--- /dev/null
+++ b/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp
@@ -0,0 +1,83 @@
+//===- TestAccessAnalysis.cpp - Test affine access analysis utility -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to test affine access analysis utilities.
+//
+//===----------------------------------------------------------------------===//
+#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
+#include "mlir/Dialect/Affine/Analysis/Utils.h"
+#include "mlir/Dialect/Affine/LoopFusionUtils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/Pass.h"
+
+#define PASS_NAME "test-affine-access-analysis"
+
+using namespace mlir;
+using namespace mlir::affine;
+
+namespace {
+
+struct TestAccessAnalysis
+    : public PassWrapper<TestAccessAnalysis, OperationPass<func::FuncOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAccessAnalysis)
+
+  StringRef getArgument() const final { return PASS_NAME; }
+  StringRef getDescription() const final {
+    return "Tests affine memory access analysis utility";
+  }
+
+  void runOnOperation() override;
+};
+
+} // namespace
+
+/// Gathers all affine load/store ops in loop nest rooted at 'forOp' into
+/// 'loadAndStoreOps'.
+static void
+gatherLoadsAndStores(AffineForOp forOp,
+                     SmallVectorImpl<Operation *> &loadAndStoreOps) {
+  forOp.walk([&](Operation *op) {
+    if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
+      loadAndStoreOps.push_back(op);
+  });
+}
+
+void TestAccessAnalysis::runOnOperation() {
+  SmallVector<Operation *> loadStores;
+  SmallVector<AffineForOp> enclosingOps;
+  // Go over all top-level affine.for ops and test each contained affine
+  // access's contiguity along every surrounding loop IV.
+  for (auto forOp : getOperation().getOps<AffineForOp>()) {
+    loadStores.clear();
+    gatherLoadsAndStores(forOp, loadStores);
+    for (Operation *memOp : loadStores) {
+      enclosingOps.clear();
+      getAffineForIVs(*memOp, &enclosingOps);
+      for (unsigned d = 0, e = enclosingOps.size(); d < e; d++) {
+        int memRefDim;
+        bool isContiguous;
+        if (auto read = dyn_cast<AffineReadOpInterface>(memOp)) {
+          isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(),
+                                            read, &memRefDim);
+        } else {
+          isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(),
+                                            cast<AffineWriteOpInterface>(memOp),
+                                            &memRefDim);
+        }
+        if (isContiguous && memRefDim == 0)
+          memOp->emitRemark("contiguous along loop ") << d << '\n';
+      }
+    }
+  }
+}
+
+namespace mlir {
+void registerTestAffineAccessAnalysisPass() {
+  PassRegistration<TestAccessAnalysis>();
+}
+} // namespace mlir
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index 4dfa05cc8ca885..0ba1a3a534e35c 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -43,6 +43,7 @@ void registerSliceAnalysisTestPass();
 void registerSymbolTestPasses();
 void registerRegionTestPasses();
 void registerTestAffineDataCopyPass();
+void registerTestAffineAccessAnalysisPass();
 void registerTestAffineReifyValueBoundsPass();
 void registerTestAffineLoopUnswitchingPass();
 void registerTestAffineWalk();
@@ -169,6 +170,7 @@ void registerTestPasses() {
   registerSymbolTestPasses();
   registerRegionTestPasses();
   registerTestAffineDataCopyPass();
+  registerTestAffineAccessAnalysisPass();
   registerTestAffineLoopUnswitchingPass();
   registerTestAffineReifyValueBoundsPass();
   registerTestAffineWalk();