[Mlir-commits] [mlir] ec54ec6 - [MLIR][Affine] Improve memref region bounding size and shape computation (#129009)

Tue Mar 4 03:14:18 PST 2025

Author: Uday Bondhugula
Date: 2025-03-04T16:44:14+05:30
New Revision: ec54ec65e5172b624f34a801012fe471ccf5d261

URL: https://github.com/llvm/llvm-project/commit/ec54ec65e5172b624f34a801012fe471ccf5d261
DIFF: https://github.com/llvm/llvm-project/commit/ec54ec65e5172b624f34a801012fe471ccf5d261.diff

LOG: [MLIR][Affine] Improve memref region bounding size and shape computation (#129009)

Improve memref region utility (`getConstantBoundingSizeAndShape`) to get
its constant bounding size and shape using affine expressions/maps by
also considering local variables in the system. Leads to significantly
precise and tighter bounding size and shape in the presence of div/mod
expressions (as evident from the test cases). The approach is now more
robust, proper, and complete. For affine fusion, this leads to private
memrefs of accurate size in several cases. This also impacts other
affine analysis-based passes like data copy generation that use memref
regions.

With contributions from `Vinayaka Bandishti <vinayaka at polymagelabs.com>`
on `getConstantBoundingSizeAndShape` and getConstantBoundOnDimSize`.

Fixes: https://github.com/llvm/llvm-project/issues/46317

Co-authored-by: Vinayaka Bandishti <vinayaka at polymagelabs.com>

Added: 
    

Modified: 
    mlir/include/mlir/Analysis/FlatLinearValueConstraints.h
    mlir/include/mlir/Analysis/Presburger/IntegerRelation.h
    mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
    mlir/lib/Analysis/FlatLinearValueConstraints.cpp
    mlir/lib/Analysis/Presburger/IntegerRelation.cpp
    mlir/lib/Dialect/Affine/Analysis/Utils.cpp
    mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
    mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
    mlir/test/Dialect/Affine/dma-generate.mlir
    mlir/test/Dialect/Affine/loop-fusion-3.mlir
    mlir/test/Dialect/Affine/loop-fusion.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h b/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h
index c8167014b5300..e481ef85562b3 100644

--- a/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h
+++ b/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h
@@ -118,6 +118,31 @@ class FlatLinearConstraints : public presburger::IntegerPolyhedron {
   /// we explicitly introduce them here.
   using IntegerPolyhedron::addBound;
 
+  /// Returns a non-negative constant bound on the extent (upper bound - lower
+  /// bound) of the specified variable if it is found to be a constant; returns
+  /// std::nullopt if it's not a constant. This method treats symbolic
+  /// variables specially, i.e., it looks for constant 
diff erences between
+  /// affine expressions involving only the symbolic variables. 'lb', if
+  /// provided, is set to the lower bound map associated with the constant
+  /// 
diff erence, and similarly, `ub` to the upper bound. Note that 'lb', 'ub'
+  /// are purely symbolic and will correspond to the symbolic variables of the
+  /// constaint set.
+  //  Egs: 0 <= i <= 15, return 16.
+  //       s0 + 2 <= i <= s0 + 17, returns 16. (s0 has to be a symbol)
+  //       s0 + s1 + 16 <= d0 <= s0 + s1 + 31, returns 16.
+  //       s0 - 7 <= 8*j <= s0 returns 1 with lb = s0, lbDivisor = 8 (since lb =
+  //       ceil(s0 - 7 / 8) = floor(s0 / 8)).
+  /// The 
diff erence between this method and
+  /// IntegerRelation::getConstantBoundOnDimSize is that unlike the latter, this
+  /// makes use of affine expressions and maps in its inference and provides
+  /// output with affine maps; it thus handles local variables by detecting them
+  /// as affine functions of the symbols when possible.
+  std::optional<int64_t>
+  getConstantBoundOnDimSize(MLIRContext *context, unsigned pos,
+                            AffineMap *lb = nullptr, AffineMap *ub = nullptr,
+                            unsigned *minLbPos = nullptr,
+                            unsigned *minUbPos = nullptr) const;
+
   /// Returns the constraint system as an integer set. Returns a null integer
   /// set if the system has no constraints, or if an integer set couldn't be
   /// constructed as a result of a local variable's explicit representation not

diff  --git a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h
index 626b6fcae2f0c..b68262f09f485 100644
--- a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h
+++ b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h
@@ -152,6 +152,13 @@ class IntegerRelation {
   /// intersection with no simplification of any sort attempted.
   void append(const IntegerRelation &other);
 
+  /// Finds an equality that equates the specified variable to a constant.
+  /// Returns the position of the equality row. If 'symbolic' is set to true,
+  /// symbols are also treated like a constant, i.e., an affine function of the
+  /// symbols is also treated like a constant. Returns -1 if such an equality
+  /// could not be found.
+  int findEqualityToConstant(unsigned pos, bool symbolic = false) const;
+
   /// Return the intersection of the two relations.
   /// If there are locals, they will be merged.
   IntegerRelation intersect(IntegerRelation other) const;

diff  --git a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
index de9ed6a683c24..0dd8de4f70039 100644
--- a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
@@ -500,6 +500,8 @@ struct MemRefRegion {
   ///    to slice operands (which correspond to symbols).
   /// If 'addMemRefDimBounds' is true, constant upper/lower bounds
   /// [0, memref.getDimSize(i)) are added for each MemRef dimension 'i'.
+  /// If `dropLocalVars` is true, all local variables in `cst` are projected
+  /// out.
   ///
   ///  For example, the memref region for this operation at loopDepth = 1 will
   ///  be:
@@ -513,9 +515,14 @@ struct MemRefRegion {
   ///   {memref = %A, write = false, {%i <= m0 <= %i + 7} }
   /// The last field is a 2-d FlatAffineValueConstraints symbolic in %i.
   ///
+  /// If `dropOuterIVs` is true, project out any IVs other than those among
+  /// `loopDepth` surrounding IVs, which would be symbols. If `dropOuterIVs`
+  /// is false, the IVs would be turned into local variables instead of being
+  /// projected out.
   LogicalResult compute(Operation *op, unsigned loopDepth,
                         const ComputationSliceState *sliceState = nullptr,
-                        bool addMemRefDimBounds = true);
+                        bool addMemRefDimBounds = true,
+                        bool dropLocalVars = true, bool dropOuterIVs = true);
 
   FlatAffineValueConstraints *getConstraints() { return &cst; }
   const FlatAffineValueConstraints *getConstraints() const { return &cst; }
@@ -530,31 +537,18 @@ struct MemRefRegion {
   /// corresponding dimension-wise bounds major to minor. The number of elements
   /// and all the dimension-wise bounds are guaranteed to be non-negative. We
   /// use int64_t instead of uint64_t since index types can be at most
-  /// int64_t. `lbs` are set to the lower bounds for each of the rank
-  /// dimensions, and lbDivisors contains the corresponding denominators for
-  /// floorDivs.
+  /// int64_t. `lbs` are set to the lower bound maps for each of the rank
+  /// dimensions where each of these maps is purely symbolic in the constraints
+  /// set's symbols.
   std::optional<int64_t> getConstantBoundingSizeAndShape(
       SmallVectorImpl<int64_t> *shape = nullptr,
-      std::vector<SmallVector<int64_t, 4>> *lbs = nullptr,
-      SmallVectorImpl<int64_t> *lbDivisors = nullptr) const;
+      SmallVectorImpl<AffineMap> *lbs = nullptr) const;
 
   /// Gets the lower and upper bound map for the dimensional variable at
   /// `pos`.
   void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap,
                              AffineMap &ubMap) const;
 
-  /// A wrapper around FlatAffineValueConstraints::getConstantBoundOnDimSize().
-  /// 'pos' corresponds to the position of the memref shape's dimension (major
-  /// to minor) which matches 1:1 with the dimensional variable positions in
-  /// 'cst'.
-  std::optional<int64_t>
-  getConstantBoundOnDimSize(unsigned pos,
-                            SmallVectorImpl<int64_t> *lb = nullptr,
-                            int64_t *lbFloorDivisor = nullptr) const {
-    assert(pos < getRank() && "invalid position");
-    return cst.getConstantBoundOnDimSize64(pos, lb);
-  }
-
   /// Returns the size of this MemRefRegion in bytes.
   std::optional<int64_t> getRegionSize();
 

diff  --git a/mlir/lib/Analysis/FlatLinearValueConstraints.cpp b/mlir/lib/Analysis/FlatLinearValueConstraints.cpp
index 8c179cb2a38ba..6ad39a3a91293 100644
--- a/mlir/lib/Analysis/FlatLinearValueConstraints.cpp
+++ b/mlir/lib/Analysis/FlatLinearValueConstraints.cpp
@@ -581,6 +581,49 @@ std::pair<AffineMap, AffineMap> FlatLinearConstraints::getLowerAndUpperBound(
   return {lbMap, ubMap};
 }
 
+/// Express the pos^th identifier of `cst` as an affine expression in
+/// terms of other identifiers, if they are available in `exprs`, using the
+/// equality at position `idx` in `cs`t. Populates `exprs` with such an
+/// expression if possible, and return true. Returns false otherwise.
+static bool detectAsExpr(const FlatLinearConstraints &cst, unsigned pos,
+                         unsigned idx, MLIRContext *context,
+                         SmallVectorImpl<AffineExpr> &exprs) {
+  // Initialize with a `0` expression.
+  auto expr = getAffineConstantExpr(0, context);
+
+  // Traverse `idx`th equality and construct the possible affine expression in
+  // terms of known identifiers.
+  unsigned j, e;
+  for (j = 0, e = cst.getNumVars(); j < e; ++j) {
+    if (j == pos)
+      continue;
+    int64_t c = cst.atEq64(idx, j);
+    if (c == 0)
+      continue;
+    // If any of the involved IDs hasn't been found yet, we can't proceed.
+    if (!exprs[j])
+      break;
+    expr = expr + exprs[j] * c;
+  }
+  if (j < e)
+    // Can't construct expression as it depends on a yet uncomputed
+    // identifier.
+    return false;
+
+  // Add constant term to AffineExpr.
+  expr = expr + cst.atEq64(idx, cst.getNumVars());
+  int64_t vPos = cst.atEq64(idx, pos);
+  assert(vPos != 0 && "expected non-zero here");
+  if (vPos > 0)
+    expr = (-expr).floorDiv(vPos);
+  else
+    // vPos < 0.
+    expr = expr.floorDiv(-vPos);
+  // Successfully constructed expression.
+  exprs[pos] = expr;
+  return true;
+}
+
 /// Compute a representation of `num` identifiers starting at `offset` in `cst`
 /// as affine expressions involving other known identifiers. Each identifier's
 /// expression (in terms of known identifiers) is populated into `memo`.
@@ -636,41 +679,13 @@ static void computeUnknownVars(const FlatLinearConstraints &cst,
 
       // Detect a variable as an expression of other variables.
       std::optional<unsigned> idx;
-      if (!(idx = cst.findConstraintWithNonZeroAt(pos, /*isEq=*/true))) {
+      if (!(idx = cst.findConstraintWithNonZeroAt(pos, /*isEq=*/true)))
         continue;
-      }
 
-      // Build AffineExpr solving for variable 'pos' in terms of all others.
-      auto expr = getAffineConstantExpr(0, context);
-      unsigned j, e;
-      for (j = 0, e = cst.getNumVars(); j < e; ++j) {
-        if (j == pos)
-          continue;
-        int64_t c = cst.atEq64(*idx, j);
-        if (c == 0)
-          continue;
-        // If any of the involved IDs hasn't been found yet, we can't proceed.
-        if (!memo[j])
-          break;
-        expr = expr + memo[j] * c;
-      }
-      if (j < e)
-        // Can't construct expression as it depends on a yet uncomputed
-        // variable.
+      if (detectAsExpr(cst, pos, *idx, context, memo)) {
+        changed = true;
         continue;
-
-      // Add constant term to AffineExpr.
-      expr = expr + cst.atEq64(*idx, cst.getNumVars());
-      int64_t vPos = cst.atEq64(*idx, pos);
-      assert(vPos != 0 && "expected non-zero here");
-      if (vPos > 0)
-        expr = (-expr).floorDiv(vPos);
-      else
-        // vPos < 0.
-        expr = expr.floorDiv(-vPos);
-      // Successfully constructed expression.
-      memo[pos] = expr;
-      changed = true;
+      }
     }
     // This loop is guaranteed to reach a fixed point - since once an
     // variable's explicit form is computed (in memo[pos]), it's not updated
@@ -891,6 +906,185 @@ FlatLinearConstraints::computeLocalVars(SmallVectorImpl<AffineExpr> &memo,
       llvm::all_of(localExprs, [](AffineExpr expr) { return expr; }));
 }
 
+/// Given an equality or inequality (`isEquality` used to disambiguate) of `cst`
+/// at `idx`, traverse and sum up `AffineExpr`s of all known ids other than the
+/// `pos`th. Known `AffineExpr`s are given in `exprs` (unknowns are null). If
+/// the equality/inequality contains any unknown id, return None. Otherwise
+/// return sum as `AffineExpr`.
+static std::optional<AffineExpr> getAsExpr(const FlatLinearConstraints &cst,
+                                           unsigned pos, MLIRContext *context,
+                                           ArrayRef<AffineExpr> exprs,
+                                           unsigned idx, bool isEquality) {
+  // Initialize with a `0` expression.
+  auto expr = getAffineConstantExpr(0, context);
+
+  SmallVector<int64_t, 8> row =
+      isEquality ? cst.getEquality64(idx) : cst.getInequality64(idx);
+
+  // Traverse `idx`th equality and construct the possible affine expression in
+  // terms of known identifiers.
+  unsigned j, e;
+  for (j = 0, e = cst.getNumVars(); j < e; ++j) {
+    if (j == pos)
+      continue;
+    int64_t c = row[j];
+    if (c == 0)
+      continue;
+    // If any of the involved IDs hasn't been found yet, we can't proceed.
+    if (!exprs[j])
+      break;
+    expr = expr + exprs[j] * c;
+  }
+  if (j < e)
+    // Can't construct expression as it depends on a yet uncomputed
+    // identifier.
+    return std::nullopt;
+
+  // Add constant term to AffineExpr.
+  expr = expr + row[cst.getNumVars()];
+  return expr;
+}
+
+std::optional<int64_t> FlatLinearConstraints::getConstantBoundOnDimSize(
+    MLIRContext *context, unsigned pos, AffineMap *lb, AffineMap *ub,
+    unsigned *minLbPos, unsigned *minUbPos) const {
+
+  assert(pos < getNumDimVars() && "Invalid identifier position");
+
+  auto freeOfUnknownLocalVars = [&](ArrayRef<int64_t> cst,
+                                    ArrayRef<AffineExpr> whiteListCols) {
+    for (int i = getNumDimAndSymbolVars(), e = cst.size() - 1; i < e; ++i) {
+      if (whiteListCols[i] && whiteListCols[i].isSymbolicOrConstant())
+        continue;
+      if (cst[i] != 0)
+        return false;
+    }
+    return true;
+  };
+
+  // Detect the necesary local variables first.
+  SmallVector<AffineExpr, 8> memo(getNumVars(), AffineExpr());
+  (void)computeLocalVars(memo, context);
+
+  // Find an equality for 'pos'^th identifier that equates it to some function
+  // of the symbolic identifiers (+ constant).
+  int eqPos = findEqualityToConstant(pos, /*symbolic=*/true);
+  // If the equality involves a local var that can not be expressed as a
+  // symbolic or constant affine expression, we bail out.
+  if (eqPos != -1 && freeOfUnknownLocalVars(getEquality64(eqPos), memo)) {
+    // This identifier can only take a single value.
+    if (lb && detectAsExpr(*this, pos, eqPos, context, memo)) {
+      AffineExpr equalityExpr =
+          simplifyAffineExpr(memo[pos], 0, getNumSymbolVars());
+      *lb = AffineMap::get(/*dimCount=*/0, getNumSymbolVars(), equalityExpr);
+      if (ub)
+        *ub = *lb;
+    }
+    if (minLbPos)
+      *minLbPos = eqPos;
+    if (minUbPos)
+      *minUbPos = eqPos;
+    return 1;
+  }
+
+  // Positions of constraints that are lower/upper bounds on the variable.
+  SmallVector<unsigned, 4> lbIndices, ubIndices;
+
+  // Note inequalities that give lower and upper bounds.
+  getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices,
+                               /*eqIndices=*/nullptr, /*offset=*/0,
+                               /*num=*/getNumDimVars());
+
+  std::optional<int64_t> minDiff = std::nullopt;
+  unsigned minLbPosition = 0, minUbPosition = 0;
+  AffineExpr minLbExpr, minUbExpr;
+
+  // Traverse each lower bound and upper bound pair, to compute the 
diff erence
+  // between them.
+  for (unsigned ubPos : ubIndices) {
+    // Construct sum of all ids other than `pos`th in the given upper bound row.
+    std::optional<AffineExpr> maybeUbExpr =
+        getAsExpr(*this, pos, context, memo, ubPos, /*isEquality=*/false);
+    if (!maybeUbExpr.has_value() || !(*maybeUbExpr).isSymbolicOrConstant())
+      continue;
+
+    // Canonical form of an inequality that constrains the upper bound on
+    // an id `x_i` is of the form:
+    // `c_1*x_1 + c_2*x_2 + ... + c_0 >= 0`, where `c_i` <= -1.
+    // Therefore the upper bound on `x_i` will be
+    // `(
+    //    sum(c_j*x_j) where j != i
+    //    +
+    //    c_0
+    //  )
+    //  /
+    //  -(c_i)`. Divison here is a floorDiv.
+    AffineExpr ubExpr = maybeUbExpr->floorDiv(-atIneq64(ubPos, pos));
+    assert(-atIneq64(ubPos, pos) > 0 && "invalid upper bound index");
+
+    // Go over each lower bound.
+    for (unsigned lbPos : lbIndices) {
+      // Construct sum of all ids other than `pos`th in the given lower bound
+      // row.
+      std::optional<AffineExpr> maybeLbExpr =
+          getAsExpr(*this, pos, context, memo, lbPos, /*isEquality=*/false);
+      if (!maybeLbExpr.has_value() || !(*maybeLbExpr).isSymbolicOrConstant())
+        continue;
+
+      // Canonical form of an inequality that is constraining the lower bound
+      // on an id `x_i is of the form:
+      // `c_1*x_1 + c_2*x_2 + ... + c_0 >= 0`, where `c_i` >= 1.
+      // Therefore upperBound on `x_i` will be
+      // `-(
+      //    sum(c_j*x_j) where j != i
+      //    +
+      //    c_0
+      //   )
+      //  /
+      //  c_i`. Divison here is a ceilDiv.
+      int64_t divisor = atIneq64(lbPos, pos);
+      // We convert the `ceilDiv` for floordiv with the formula:
+      // `expr ceildiv divisor is (expr + divisor - 1) floordiv divisor`,
+      // since uniformly keeping divisons as `floorDiv` helps their
+      // simplification.
+      AffineExpr lbExpr = (-(*maybeLbExpr) + divisor - 1).floorDiv(divisor);
+      assert(atIneq64(lbPos, pos) > 0 && "invalid lower bound index");
+
+      AffineExpr 
diff erence =
+          simplifyAffineExpr(ubExpr - lbExpr + 1, 0, getNumSymbolVars());
+      // If the 
diff erence is not constant, ignore the lower bound - upper bound
+      // pair.
+      auto constantDiff = dyn_cast<AffineConstantExpr>(
diff erence);
+      if (!constantDiff)
+        continue;
+
+      int64_t 
diff Value = constantDiff.getValue();
+      // This bound is non-negative by definition.
+      
diff Value = std::max<int64_t>(
diff Value, 0);
+      if (!minDiff || 
diff Value < *minDiff) {
+        minDiff = 
diff Value;
+        minLbPosition = lbPos;
+        minUbPosition = ubPos;
+        minLbExpr = lbExpr;
+        minUbExpr = ubExpr;
+      }
+    }
+  }
+
+  // Populate outputs where available and needed.
+  if (lb && minDiff) {
+    *lb = AffineMap::get(/*dimCount=*/0, getNumSymbolVars(), minLbExpr);
+  }
+  if (ub)
+    *ub = AffineMap::get(/*dimCount=*/0, getNumSymbolVars(), minUbExpr);
+  if (minLbPos)
+    *minLbPos = minLbPosition;
+  if (minUbPos)
+    *minUbPos = minUbPosition;
+
+  return minDiff;
+}
+
 IntegerSet FlatLinearConstraints::getAsIntegerSet(MLIRContext *context) const {
   if (getNumConstraints() == 0)
     // Return universal set (always true): 0 == 0.

diff  --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
index 5de3fd920e4e0..097cb9c2201aa 100644
--- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
+++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
@@ -1521,25 +1521,19 @@ void IntegerRelation::addLocalFloorDiv(ArrayRef<DynamicAPInt> dividend,
       getDivUpperBound(dividendCopy, divisor, dividendCopy.size() - 2));
 }
 
-/// Finds an equality that equates the specified variable to a constant.
-/// Returns the position of the equality row. If 'symbolic' is set to true,
-/// symbols are also treated like a constant, i.e., an affine function of the
-/// symbols is also treated like a constant. Returns -1 if such an equality
-/// could not be found.
-static int findEqualityToConstant(const IntegerRelation &cst, unsigned pos,
-                                  bool symbolic = false) {
-  assert(pos < cst.getNumVars() && "invalid position");
-  for (unsigned r = 0, e = cst.getNumEqualities(); r < e; r++) {
-    DynamicAPInt v = cst.atEq(r, pos);
+int IntegerRelation::findEqualityToConstant(unsigned pos, bool symbolic) const {
+  assert(pos < getNumVars() && "invalid position");
+  for (unsigned r = 0, e = getNumEqualities(); r < e; r++) {
+    DynamicAPInt v = atEq(r, pos);
     if (v * v != 1)
       continue;
     unsigned c;
-    unsigned f = symbolic ? cst.getNumDimVars() : cst.getNumVars();
+    unsigned f = symbolic ? getNumDimVars() : getNumVars();
     // This checks for zeros in all positions other than 'pos' in [0, f)
     for (c = 0; c < f; c++) {
       if (c == pos)
         continue;
-      if (cst.atEq(r, c) != 0) {
+      if (atEq(r, c) != 0) {
         // Dependent on another variable.
         break;
       }
@@ -1554,7 +1548,7 @@ static int findEqualityToConstant(const IntegerRelation &cst, unsigned pos,
 LogicalResult IntegerRelation::constantFoldVar(unsigned pos) {
   assert(pos < getNumVars() && "invalid position");
   int rowIdx;
-  if ((rowIdx = findEqualityToConstant(*this, pos)) == -1)
+  if ((rowIdx = findEqualityToConstant(pos)) == -1)
     return failure();
 
   // atEq(rowIdx, pos) is either -1 or 1.
@@ -1593,12 +1587,13 @@ std::optional<DynamicAPInt> IntegerRelation::getConstantBoundOnDimSize(
 
   // Find an equality for 'pos'^th variable that equates it to some function
   // of the symbolic variables (+ constant).
-  int eqPos = findEqualityToConstant(*this, pos, /*symbolic=*/true);
+  int eqPos = findEqualityToConstant(pos, /*symbolic=*/true);
   if (eqPos != -1) {
     auto eq = getEquality(eqPos);
-    // If the equality involves a local var, punt for now.
-    // TODO: this can be handled in the future by using the explicit
-    // representation of the local vars.
+    // If the equality involves a local var, we do not handle it.
+    // FlatLinearConstraints can instead be used to detect the local variable as
+    // an affine function (potentially div/mod) of other variables and use
+    // affine expressions/maps to represent output.
     if (!std::all_of(eq.begin() + getNumDimAndSymbolVars(), eq.end() - 1,
                      [](const DynamicAPInt &coeff) { return coeff == 0; }))
       return std::nullopt;
@@ -1719,7 +1714,7 @@ IntegerRelation::computeConstantLowerOrUpperBound(unsigned pos) {
   projectOut(0, pos);
   projectOut(1, getNumVars() - 1);
   // Check if there's an equality equating the '0'^th variable to a constant.
-  int eqRowIdx = findEqualityToConstant(*this, 0, /*symbolic=*/false);
+  int eqRowIdx = findEqualityToConstant(/*pos=*/0, /*symbolic=*/false);
   if (eqRowIdx != -1)
     // atEq(rowIdx, 0) is either -1 or 1.
     return -atEq(eqRowIdx, getNumCols() - 1) / atEq(eqRowIdx, 0);

diff  --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
index db9cfc4c23bba..56a7d8de1b7f8 100644
--- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
@@ -1069,9 +1069,9 @@ unsigned MemRefRegion::getRank() const {
 }
 
 std::optional<int64_t> MemRefRegion::getConstantBoundingSizeAndShape(
-    SmallVectorImpl<int64_t> *shape, std::vector<SmallVector<int64_t, 4>> *lbs,
-    SmallVectorImpl<int64_t> *lbDivisors) const {
+    SmallVectorImpl<int64_t> *shape, SmallVectorImpl<AffineMap> *lbs) const {
   auto memRefType = cast<MemRefType>(memref.getType());
+  MLIRContext *context = memref.getContext();
   unsigned rank = memRefType.getRank();
   if (shape)
     shape->reserve(rank);
@@ -1083,7 +1083,7 @@ std::optional<int64_t> MemRefRegion::getConstantBoundingSizeAndShape(
   // over-approximation from projection or union bounding box. We may not add
   // this on the region itself since they might just be redundant constraints
   // that will need non-trivials means to eliminate.
-  FlatAffineValueConstraints cstWithShapeBounds(cst);
+  FlatLinearValueConstraints cstWithShapeBounds(cst);
   for (unsigned r = 0; r < rank; r++) {
     cstWithShapeBounds.addBound(BoundType::LB, r, 0);
     int64_t dimSize = memRefType.getDimSize(r);
@@ -1092,39 +1092,34 @@ std::optional<int64_t> MemRefRegion::getConstantBoundingSizeAndShape(
     cstWithShapeBounds.addBound(BoundType::UB, r, dimSize - 1);
   }
 
-  // Find a constant upper bound on the extent of this memref region along each
-  // dimension.
+  // Find a constant upper bound on the extent of this memref region along
+  // each dimension.
   int64_t numElements = 1;
   int64_t 
diff Constant;
-  int64_t lbDivisor;
   for (unsigned d = 0; d < rank; d++) {
-    SmallVector<int64_t, 4> lb;
+    AffineMap lb;
     std::optional<int64_t> 
diff  =
-        cstWithShapeBounds.getConstantBoundOnDimSize64(d, &lb, &lbDivisor);
+        cstWithShapeBounds.getConstantBoundOnDimSize(context, d, &lb);
     if (
diff .has_value()) {
       
diff Constant = *
diff ;
-      assert(
diff Constant >= 0 && "Dim size bound can't be negative");
-      assert(lbDivisor > 0);
+      assert(
diff Constant >= 0 && "dim size bound cannot be negative");
     } else {
       // If no constant bound is found, then it can always be bound by the
       // memref's dim size if the latter has a constant size along this dim.
       auto dimSize = memRefType.getDimSize(d);
-      if (dimSize == ShapedType::kDynamic)
+      if (ShapedType::isDynamic(dimSize))
         return std::nullopt;
       
diff Constant = dimSize;
       // Lower bound becomes 0.
-      lb.resize(cstWithShapeBounds.getNumSymbolVars() + 1, 0);
-      lbDivisor = 1;
+      lb = AffineMap::get(/*dimCount=*/0, cstWithShapeBounds.getNumSymbolVars(),
+                          /*result=*/getAffineConstantExpr(0, context));
     }
     numElements *= 
diff Constant;
-    if (lbs) {
+    // Populate outputs if available.
+    if (lbs)
       lbs->push_back(lb);
-      assert(lbDivisors && "both lbs and lbDivisor or none");
-      lbDivisors->push_back(lbDivisor);
-    }
-    if (shape) {
+    if (shape)
       shape->push_back(
diff Constant);
-    }
   }
   return numElements;
 }
@@ -1172,7 +1167,8 @@ LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
 // (dma_start, dma_wait).
 LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
                                     const ComputationSliceState *sliceState,
-                                    bool addMemRefDimBounds) {
+                                    bool addMemRefDimBounds, bool dropLocalVars,
+                                    bool dropOuterIvs) {
   assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
          "affine read/write op expected");
 
@@ -1287,15 +1283,25 @@ LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
   enclosingIVs.resize(loopDepth);
   SmallVector<Value, 4> vars;
   cst.getValues(cst.getNumDimVars(), cst.getNumDimAndSymbolVars(), &vars);
-  for (Value var : vars) {
-    if ((isAffineInductionVar(var)) && !llvm::is_contained(enclosingIVs, var)) {
-      cst.projectOut(var);
+  for (auto en : llvm::enumerate(vars)) {
+    if ((isAffineInductionVar(en.value())) &&
+        !llvm::is_contained(enclosingIVs, en.value())) {
+      if (dropOuterIvs) {
+        cst.projectOut(en.value());
+      } else {
+        unsigned varPosition;
+        cst.findVar(en.value(), &varPosition);
+        auto varKind = cst.getVarKindAt(varPosition);
+        varPosition -= cst.getNumDimVars();
+        cst.convertToLocal(varKind, varPosition, varPosition + 1);
+      }
     }
   }
 
   // Project out any local variables (these would have been added for any
-  // mod/divs).
-  cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars());
+  // mod/divs) if specified.
+  if (dropLocalVars)
+    cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars());
 
   // Constant fold any symbolic variables.
   cst.constantFoldVarRange(/*pos=*/cst.getNumDimVars(),

diff  --git a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
index 1c4793626a152..21cd0e2e49c2a 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
@@ -349,17 +349,19 @@ static Value createPrivateMemRef(AffineForOp forOp,
 
   // Compute MemRefRegion for 'srcStoreOpInst' at depth 'dstLoopDepth'.
   MemRefRegion region(srcStoreOp->getLoc());
-  bool validRegion = succeeded(region.compute(srcStoreOp, dstLoopDepth));
+  bool validRegion = succeeded(
+      region.compute(srcStoreOp, dstLoopDepth, /*sliceState=*/nullptr,
+                     /*addMemRefDimBounds=*/true, /*dropLocalVars=*/false));
+
   (void)validRegion;
   assert(validRegion && "unexpected memref region failure");
   SmallVector<int64_t, 4> newShape;
-  std::vector<SmallVector<int64_t, 4>> lbs;
-  SmallVector<int64_t, 8> lbDivisors;
+  SmallVector<AffineMap, 4> lbs;
   lbs.reserve(rank);
   // Query 'region' for 'newShape' and lower bounds of MemRefRegion accessed
   // by 'srcStoreOpInst' at depth 'dstLoopDepth'.
   std::optional<int64_t> numElements =
-      region.getConstantBoundingSizeAndShape(&newShape, &lbs, &lbDivisors);
+      region.getConstantBoundingSizeAndShape(&newShape, &lbs);
   assert(numElements && "non-constant number of elts in local buffer");
 
   const FlatAffineValueConstraints *cst = region.getConstraints();
@@ -367,22 +369,21 @@ static Value createPrivateMemRef(AffineForOp forOp,
   // on; this would correspond to loop IVs surrounding the level at which the
   // slice is being materialized.
   SmallVector<Value, 8> outerIVs;
-  cst->getValues(rank, cst->getNumVars(), &outerIVs);
+  cst->getValues(rank, cst->getNumDimAndSymbolVars(), &outerIVs);
 
   // Build 'rank' AffineExprs from MemRefRegion 'lbs'
   SmallVector<AffineExpr, 4> offsets;
   offsets.reserve(rank);
-  for (unsigned d = 0; d < rank; ++d) {
-    assert(lbs[d].size() == cst->getNumCols() - rank && "incorrect bound size");
 
-    AffineExpr offset = top.getAffineConstantExpr(0);
-    for (unsigned j = 0, e = cst->getNumCols() - rank - 1; j < e; j++) {
-      offset = offset + lbs[d][j] * top.getAffineDimExpr(j);
-    }
-    assert(lbDivisors[d] > 0);
-    offset =
-        (offset + lbs[d][cst->getNumCols() - 1 - rank]).floorDiv(lbDivisors[d]);
-    offsets.push_back(offset);
+  // Outer IVs are considered symbols during memref region computation. Replace
+  // them uniformly with dims so that valid IR is guaranteed.
+  SmallVector<AffineExpr> replacements;
+  for (unsigned j = 0, e = lbs[0].getNumSymbols(); j < e; ++j)
+    replacements.push_back(mlir::getAffineDimExpr(j, forOp.getContext()));
+  for (unsigned d = 0; d < rank; ++d) {
+    assert(lbs[d].getNumResults() == 1 &&
+           "invalid private memref bound calculation");
+    offsets.push_back(lbs[d].getResult(0).replaceSymbols(replacements));
   }
 
   // Create 'newMemRefType' using 'newShape' from MemRefRegion accessed

diff  --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index c7e412b2b0fd9..5c94ec2985c3d 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -1995,16 +1995,22 @@ static LogicalResult generateCopy(
   SmallVector<int64_t, 4> fastBufferShape;
 
   // Compute the extents of the buffer.
-  std::vector<SmallVector<int64_t, 4>> lbs;
-  SmallVector<int64_t, 8> lbDivisors;
+  SmallVector<AffineMap, 2> lbs;
   lbs.reserve(rank);
-  std::optional<int64_t> numElements = region.getConstantBoundingSizeAndShape(
-      &fastBufferShape, &lbs, &lbDivisors);
+  std::optional<int64_t> numElements =
+      region.getConstantBoundingSizeAndShape(&fastBufferShape, &lbs);
   if (!numElements) {
     LLVM_DEBUG(llvm::dbgs() << "Non-constant region size not supported\n");
     return failure();
   }
 
+  if (llvm::any_of(lbs, [](AffineMap lb) { return lb.getNumResults() > 1; })) {
+    // This can be supported in the future if needed.
+    LLVM_DEBUG(llvm::dbgs()
+               << "Max lower bound for memref region start not supported\n");
+    return failure();
+  }
+
   if (*numElements == 0) {
     LLVM_DEBUG(llvm::dbgs() << "Nothing to copy\n");
     return success();
@@ -2028,7 +2034,7 @@ static LogicalResult generateCopy(
   SmallVector<Value, 8> regionSymbols;
   cst->getValues(rank, cst->getNumVars(), &regionSymbols);
 
-  // Construct the index expressions for the fast memory buffer. The index
+  // Construct the access expression for the fast memory buffer. The access
   // expression for a particular dimension of the fast buffer is obtained by
   // subtracting out the lower bound on the original memref's data region
   // along the corresponding dimension.
@@ -2037,19 +2043,13 @@ static LogicalResult generateCopy(
   SmallVector<AffineExpr, 4> fastBufOffsets;
   fastBufOffsets.reserve(rank);
   for (unsigned d = 0; d < rank; d++) {
-    assert(lbs[d].size() == cst->getNumCols() - rank && "incorrect bound size");
-
-    AffineExpr offset = top.getAffineConstantExpr(0);
-    for (unsigned j = 0, e = cst->getNumCols() - rank - 1; j < e; j++)
-      offset = offset + lbs[d][j] * top.getAffineDimExpr(j);
-    assert(lbDivisors[d] > 0);
-    offset =
-        (offset + lbs[d][cst->getNumCols() - 1 - rank]).floorDiv(lbDivisors[d]);
+    assert(lbs[d].getNumSymbols() == cst->getNumCols() - rank - 1 &&
+           "incorrect bound size");
 
     // Set copy start location for this dimension in the lower memory space
     // memref.
-    if (auto caf = dyn_cast<AffineConstantExpr>(offset)) {
-      auto indexVal = caf.getValue();
+    if (auto caf = lbs[d].isSingleConstant()) {
+      auto indexVal = lbs[d].getSingleConstantResult();
       if (indexVal == 0) {
         memIndices.push_back(zeroIndex);
       } else {
@@ -2059,16 +2059,23 @@ static LogicalResult generateCopy(
     } else {
       // The coordinate for the start location is just the lower bound along the
       // corresponding dimension on the memory region (stored in 'offset').
-      auto map = AffineMap::get(
-          cst->getNumDimVars() + cst->getNumSymbolVars() - rank, 0, offset);
-      memIndices.push_back(b.create<AffineApplyOp>(loc, map, regionSymbols));
+      // Remap all inputs of the map to dimensions uniformly since in the
+      // generate IR we need valid affine symbols as opposed to "symbols" for
+      // the purpose of the memref region.
+      SmallVector<AffineExpr> symReplacements(lbs[d].getNumSymbols());
+      for (unsigned i = 0, e = lbs[d].getNumSymbols(); i < e; ++i)
+        symReplacements[i] = top.getAffineDimExpr(i);
+      lbs[d] = lbs[d].replaceDimsAndSymbols(
+          /*dimReplacements=*/{}, symReplacements, lbs[d].getNumSymbols(),
+          /*numResultSyms=*/0);
+      memIndices.push_back(b.create<AffineApplyOp>(loc, lbs[d], regionSymbols));
     }
     // The fast buffer is copied into at location zero; addressing is relative.
     bufIndices.push_back(zeroIndex);
 
     // Record the offsets since they are needed to remap the memory accesses of
     // the original memref further below.
-    fastBufOffsets.push_back(offset);
+    fastBufOffsets.push_back(lbs[d].getResult(0));
   }
 
   // The faster memory space buffer.
@@ -2596,10 +2603,11 @@ static AffineIfOp createSeparationCondition(MutableArrayRef<AffineForOp> loops,
     cst.setDimSymbolSeparation(/*newSymbolCount=*/cst.getNumDimAndSymbolVars() -
                                1);
     unsigned fullTileLbPos, fullTileUbPos;
-    if (!cst.getConstantBoundOnDimSize(0, /*lb=*/nullptr,
-                                       /*boundFloorDivisor=*/nullptr,
-                                       /*ub=*/nullptr, &fullTileLbPos,
-                                       &fullTileUbPos)) {
+    if (!((IntegerRelation)cst)
+             .getConstantBoundOnDimSize(0, /*lb=*/nullptr,
+                                        /*boundFloorDivisor=*/nullptr,
+                                        /*ub=*/nullptr, &fullTileLbPos,
+                                        &fullTileUbPos)) {
       LLVM_DEBUG(llvm::dbgs() << "Can't get constant 
diff  pair for a loop\n");
       return nullptr;
     }
@@ -2669,9 +2677,10 @@ createFullTiles(MutableArrayRef<AffineForOp> inputNest,
     // pair of <lb, ub> with a constant 
diff erence.
     cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - 1);
     unsigned lbPos, ubPos;
-    if (!cst.getConstantBoundOnDimSize(/*pos=*/0, /*lb=*/nullptr,
-                                       /*boundFloorDivisor=*/nullptr,
-                                       /*ub=*/nullptr, &lbPos, &ubPos) ||
+    if (!((IntegerRelation)cst)
+             .getConstantBoundOnDimSize(/*pos=*/0, /*lb=*/nullptr,
+                                        /*boundFloorDivisor=*/nullptr,
+                                        /*ub=*/nullptr, &lbPos, &ubPos) ||
         lbPos == ubPos) {
       LLVM_DEBUG(llvm::dbgs() << "[tile separation] Can't get constant 
diff  / "
                                  "equalities not yet handled\n");

diff  --git a/mlir/test/Dialect/Affine/dma-generate.mlir b/mlir/test/Dialect/Affine/dma-generate.mlir
index 62011681091cc..d01bf5d0a7805 100644
--- a/mlir/test/Dialect/Affine/dma-generate.mlir
+++ b/mlir/test/Dialect/Affine/dma-generate.mlir
@@ -485,9 +485,6 @@ func.func @test_read_write_region_union() {
 
 // This should create a buffer of size 2 affine.for %arg2.
 
-#map_lb = affine_map<(d0) -> (d0)>
-#map_ub = affine_map<(d0) -> (d0 + 3)>
-#map_acc = affine_map<(d0) -> (d0 floordiv 8)>
 // CHECK-LABEL: func @test_analysis_util
 func.func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>, %arg2: memref<2xf32>) -> (memref<144x9xf32>, memref<2xf32>) {
   %c0 = arith.constant 0 : index
@@ -495,13 +492,11 @@ func.func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf
   %1 = memref.alloc() : memref<144x4xf32>
   %2 =  arith.constant 0.0 : f32
   affine.for %i8 = 0 to 9 step 3 {
-    affine.for %i9 = #map_lb(%i8) to #map_ub(%i8) {
+    affine.for %i9 = affine_map<(d0) -> (d0)>(%i8) to affine_map<(d0) -> (d0 + 3)>(%i8) {
       affine.for %i17 = 0 to 64 {
-        %23 = affine.apply #map_acc(%i9)
-        %25 = affine.load %arg2[%23] : memref<2xf32>
-        %26 = affine.apply #map_lb(%i17)
-        %27 = affine.load %0[%26, %c0] : memref<64x1xf32>
-        affine.store %27, %arg2[%23] : memref<2xf32>
+        %25 = affine.load %arg2[%i9 floordiv 8] : memref<2xf32>
+        %27 = affine.load %0[%i17, %c0] : memref<64x1xf32>
+        affine.store %27, %arg2[%i17] : memref<2xf32>
       }
     }
   }
@@ -509,7 +504,7 @@ func.func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf
 }
 // CHECK:       affine.for %{{.*}} = 0 to 9 step 3 {
 // CHECK:         [[BUF:%[0-9a-zA-Z_]+]] = memref.alloc() : memref<2xf32, 2>
-// CHECK:         affine.dma_start %{{.*}}[%{{.*}} floordiv 8], [[BUF]]
+// CHECK:         affine.dma_start %{{.*}}[%c0{{.*}}], [[BUF]]
 // CHECK:         affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
 // CHECK:         affine.for %{{.*}} =
 

diff  --git a/mlir/test/Dialect/Affine/loop-fusion-3.mlir b/mlir/test/Dialect/Affine/loop-fusion-3.mlir
index 2116cfb9f884a..70d6c82105543 100644
--- a/mlir/test/Dialect/Affine/loop-fusion-3.mlir
+++ b/mlir/test/Dialect/Affine/loop-fusion-3.mlir
@@ -521,10 +521,7 @@ func.func @fuse_minor_affine_map(%in: memref<128xf32>, %out: memref<20x512xf32>)
   return
 }
 
-// TODO: The size of the private memref is not properly computed in the presence
-// of the 'mod' operation. It should be memref<1xf32> instead of
-// memref<128xf32>: https://bugs.llvm.org/show_bug.cgi?id=46973
-// MAXIMAL:       memref.alloc() : memref<128xf32>
+// MAXIMAL:       memref.alloc() : memref<1xf32>
 // MAXIMAL:       affine.for
 // MAXIMAL-NEXT:    affine.for
 // MAXIMAL-NOT:   affine.for
@@ -553,7 +550,7 @@ func.func @should_fuse_multi_store_producer_and_privatize_memfefs() {
     %0 = affine.load %b[%arg0] : memref<10xf32>
   }
 
-	// All the memrefs should be privatized except '%c', which is not involved in
+  // All the memrefs should be privatized except '%c', which is not involved in
   // the producer-consumer fusion.
   // CHECK:      affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
@@ -584,7 +581,7 @@ func.func @should_fuse_multi_store_producer_with_escaping_memrefs_and_remove_src
     %0 = affine.load %b[%i2] : memref<10xf32>
   }
 
-	// Producer loop '%i0' should be removed after fusion since fusion is maximal.
+  // Producer loop '%i0' should be removed after fusion since fusion is maximal.
   // No memref should be privatized since they escape the function, and the
   // producer is removed after fusion.
   // CHECK:       affine.for %{{.*}} = 0 to 10 {
@@ -769,7 +766,7 @@ func.func @should_not_fuse_defining_node_has_transitive_dependence_from_source_l
     %2 = arith.divf %0, %1 : f32
   }
 
-	// When loops '%i0' and '%i2' are evaluated first, they should not be
+  // When loops '%i0' and '%i2' are evaluated first, they should not be
   // fused. The defining node of '%0' in loop '%i2' has transitive dependence
   // from loop '%i0'. After that, loops '%i0' and '%i1' are evaluated, and they
   // will be fused as usual.

diff  --git a/mlir/test/Dialect/Affine/loop-fusion.mlir b/mlir/test/Dialect/Affine/loop-fusion.mlir
index dcd2e1cdb275a..1ea42517988c3 100644
--- a/mlir/test/Dialect/Affine/loop-fusion.mlir
+++ b/mlir/test/Dialect/Affine/loop-fusion.mlir
@@ -748,7 +748,7 @@ func.func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
 
 //
 // CHECK-LABEL: func @R6_to_R2_reshape
-// CHECK:       memref.alloc() : memref<1x2x3x3x16x1xi32>
+// CHECK:       memref.alloc() : memref<1x1x1x1x1x1xi32>
 // CHECK:       memref.alloc() : memref<1x1xi32>
 // CHECK:       memref.alloc() : memref<64x9xi32>
 // CHECK-NEXT:  affine.for %{{.*}} = 0 to 64 {
@@ -759,7 +759,7 @@ func.func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
 // CHECK-NEXT:      affine.apply [[$MAP3]](%{{.*}}, %{{.*}})
 // CHECK-NEXT:      affine.apply [[$MAP4]](%{{.*}}, %{{.*}})
 // CHECK-NEXT:      "foo"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (index, index, index, index, index, index) -> i32
-// CHECK-NEXT:      affine.store %{{.*}}, %{{.*}}[0, ((%{{.*}} * 9 + %{{.*}}) mod 288) floordiv 144, ((%{{.*}} * 9 + %{{.*}}) mod 144) floordiv 48, ((%{{.*}} * 9 + %{{.*}}) mod 48) floordiv 16, (%{{.*}} * 9 + %{{.*}}) mod 16, 0] : memref<1x2x3x3x16x1xi32>
+// CHECK-NEXT:      affine.store %{{.*}}, %{{.*}}[0, 0, 0, 0, 0, 0] : memref<1x1x1x1x1x1xi32>
 // CHECK-NEXT:      affine.apply [[$MAP11]](%{{.*}}, %{{.*}})
 // CHECK-NEXT:      affine.apply [[$MAP12]](%{{.*}})
 // CHECK-NEXT:      affine.apply [[$MAP13]](%{{.*}})
@@ -767,7 +767,7 @@ func.func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
 // CHECK-NEXT:      affine.apply [[$MAP15]](%{{.*}})
 // CHECK-NEXT:      affine.apply [[$MAP16]](%{{.*}})
 // CHECK-NEXT:      affine.apply [[$MAP17]](%{{.*}})
-// CHECK-NEXT:      affine.load %{{.*}}[0, ((%{{.*}} * 9 + %{{.*}}) mod 288) floordiv 144, ((%{{.*}} * 9 + %{{.*}}) mod 144) floordiv 48, ((%{{.*}} * 9 + %{{.*}}) mod 48) floordiv 16, (%{{.*}} * 9 + %{{.*}}) mod 16, 0] : memref<1x2x3x3x16x1xi32>
+// CHECK-NEXT:      affine.load %{{.*}}[0, 0, 0, 0, 0, 0] : memref<1x1x1x1x1x1xi32>
 // CHECK-NEXT:      affine.store %{{.*}}, %{{.*}}[0, 0] : memref<1x1xi32>
 // CHECK-NEXT:      affine.load %{{.*}}[0, 0] : memref<1x1xi32>
 // CHECK-NEXT:      arith.muli %{{.*}}, %{{.*}} : i32