[Mlir-commits] [mlir] [mlir][Affine] take strides into account for contiguity check (PR #177756)

Sat Jan 24 04:05:29 PST 2026

https://github.com/gdehame created https://github.com/llvm/llvm-project/pull/177756

The isContiguousAccess utility function in LoopAnalysis.cpp didn't check for access strides. This patch adds another utility function which walks an affine expression to check that a given value acts as an offset in that expression. This new function is used to check whether a given affine access is of the shape IV + ... and therefore is contiguous taking into account strides. The access-analysis unit tests are also modified accordingly.

>From aa2dd6f3ca2c45110a61e7302dc93759bc39bc8a Mon Sep 17 00:00:00 2001
From: gdehame <gabrieldehame at gmail.com>
Date: Mon, 10 Feb 2025 17:58:02 +0100
Subject: [PATCH] [mlir][Affine] take strides into account for contiguity check

The isContiguousAccess utility function in LoopAnalysis.cpp didn't check for access strides.
This patch adds another utility function which walks an affine expression to check that a given value acts as an offset in that expression.
This new function is used to check whether a given affine access is of the shape IV + ... and therefore is contiguous taking into account strides.
The access-analysis unit tests are also modified accordingly.
---
 .../Dialect/Affine/Analysis/LoopAnalysis.cpp  | 67 ++++++++++++++++++-
 mlir/test/Dialect/Affine/access-analysis.mlir |  7 --
 2 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
index 166d39e88d41e..87efbb4285ab2 100644
--- a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
@@ -18,6 +18,9 @@
 #include "mlir/Dialect/Affine/Analysis/NestedMatcher.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/IR/AffineExpr.h"
+#include "mlir/IR/Visitors.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/MathExtras.h"
 
 #include "llvm/Support/Debug.h"
@@ -315,7 +318,57 @@ DenseSet<Value> mlir::affine::getInvariantAccesses(Value iv,
   return res;
 }
 
-// TODO: check access stride.
+/// Check that x is an offset in resultExpr
+/// That is, check that the result is of the shape x + ...  
+static bool isOffset(AffineExpr resultExpr, int numDims, ArrayRef<Value> operands, Value offset) {
+  // Check if the expression is only the offset
+  if (isa<AffineDimExpr>(resultExpr))
+    return operands[cast<AffineDimExpr>(resultExpr).getPosition()] == offset;
+  if (isa<AffineSymbolExpr>(resultExpr))
+    return operands[cast<AffineSymbolExpr>(resultExpr).getPosition() + numDims] == offset;
+
+  // Otherwise, walk through the expression and check that it's of one of the shapes:
+  // - x + ...
+  // - (x + ...) mod ...
+  // The second pattern leads to piecewise contiguous accesses which can be considered contiguous
+  // for vectorization if the vectorization factor is a divisor of the modulo's left-hand-side
+  WalkResult walkRes = resultExpr.walk([&](AffineExpr expr) {
+    if (!isa<AffineBinaryOpExpr>(expr))
+      return WalkResult::skip();
+    if (expr.getKind() == AffineExprKind::Add) {
+      AffineExpr lhs = cast<AffineBinaryOpExpr>(expr).getLHS();
+      AffineExpr rhs = cast<AffineBinaryOpExpr>(expr).getRHS();
+      if (auto dimExpr = dyn_cast<AffineDimExpr>(lhs)) {
+        if (operands[dimExpr.getPosition()] == offset)
+          return WalkResult::interrupt();
+      }
+      else if (auto symExpr = dyn_cast<AffineSymbolExpr>(lhs))
+        if (operands[symExpr.getPosition() + numDims])
+          return WalkResult::interrupt();
+      if (auto dimExpr = dyn_cast<AffineDimExpr>(rhs)) {
+        if (operands[dimExpr.getPosition()] == offset)
+          return WalkResult::interrupt();
+      }
+      else if (auto symExpr = dyn_cast<AffineSymbolExpr>(rhs))
+        if (operands[symExpr.getPosition() + numDims])
+          return WalkResult::interrupt();
+      return WalkResult::advance();
+    }
+    if (expr.getKind() == AffineExprKind::Mod) {
+      AffineExpr lhs = cast<AffineBinaryOpExpr>(expr).getLHS();
+      if (auto dimExpr = dyn_cast<AffineDimExpr>(lhs)) {
+        if (operands[dimExpr.getPosition()] == offset)
+          return WalkResult::interrupt();
+      }
+      else if (auto symExpr = dyn_cast<AffineSymbolExpr>(lhs))
+        if (operands[symExpr.getPosition() + numDims])
+          return WalkResult::interrupt();
+    }
+    return WalkResult::skip();
+  });
+  return walkRes.wasInterrupted();
+}
+
 template <typename LoadOrStoreOp>
 bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
                                       int *memRefDim) {
@@ -344,7 +397,17 @@ bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
     });
     // Check access invariance of each operand in 'exprOperands'.
     for (Value exprOperand : exprOperands) {
-      if (!isAccessIndexInvariant(iv, exprOperand)) {
+      // Verify that the access is contiguous along the induction variable if it depends on it
+      // by checking that at most one of the op's access map's result is of the shape IV + constant
+      auto map = AffineMap::getMultiDimIdentityMap(/*numDims=*/1, iv.getContext());
+      SmallVector<Value> operands = {exprOperand};
+      AffineValueMap avm(map, operands);
+      avm.composeSimplifyAndCanonicalize();
+      if (avm.isFunctionOf(0, iv)) {
+        if (!isOffset(resultExpr, numDims, mapOperands, exprOperand) || 
+            !isOffset(avm.getResult(0), avm.getNumDims(), avm.getOperands(), iv)) {
+          return false;
+        }
         if (uniqueVaryingIndexAlongIv != -1) {
           // 2+ varying indices -> do not vectorize along iv.
           return false;
diff --git a/mlir/test/Dialect/Affine/access-analysis.mlir b/mlir/test/Dialect/Affine/access-analysis.mlir
index 789de646a8f9e..4f3532d3549c0 100644
--- a/mlir/test/Dialect/Affine/access-analysis.mlir
+++ b/mlir/test/Dialect/Affine/access-analysis.mlir
@@ -11,17 +11,12 @@ func.func @loop_simple(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
        // expected-remark at above {{invariant along loop 1}}
        affine.load %A[%c0, 8 * %i + %j] : memref<?x?xf32>
        // expected-remark at above {{contiguous along loop 1}}
-       // Note/FIXME: access stride isn't being checked.
-       // expected-remark at -3 {{contiguous along loop 0}}
 
        // These are all non-contiguous along both loops. Nothing is emitted.
        affine.load %A[%i, %c0] : memref<?x?xf32>
        // expected-remark at above {{invariant along loop 1}}
-       // Note/FIXME: access stride isn't being checked.
        affine.load %A[%i, 8 * %j] : memref<?x?xf32>
-       // expected-remark at above {{contiguous along loop 1}}
        affine.load %A[%j, 4 * %i] : memref<?x?xf32>
-       // expected-remark at above {{contiguous along loop 0}}
      }
    }
    return
@@ -70,7 +65,6 @@ func.func @tiled(%arg0: memref<*xf32>) {
             // expected-remark at above {{invariant along loop 4}}
             affine.store %0, %alloc_0[0, %arg1 * -16 + %arg4, 0, %arg3 * -16 + %arg5] : memref<1x16x1x16xf32>
             // expected-remark at above {{contiguous along loop 4}}
-            // expected-remark at above {{contiguous along loop 2}}
             // expected-remark at above {{invariant along loop 1}}
           }
         }
@@ -79,7 +73,6 @@ func.func @tiled(%arg0: memref<*xf32>) {
             affine.for %arg6 = #map(%arg3) to #map1(%arg3) {
               %0 = affine.load %alloc_0[0, %arg1 * -16 + %arg4, -%arg2 + %arg5, %arg3 * -16 + %arg6] : memref<1x16x1x16xf32>
               // expected-remark at above {{contiguous along loop 5}}
-              // expected-remark at above {{contiguous along loop 2}}
               affine.store %0, %alloc[0, %arg5, %arg6, %arg4] : memref<1x224x224x64xf32>
               // expected-remark at above {{contiguous along loop 3}}
               // expected-remark at above {{invariant along loop 0}}