[Mlir-commits] [mlir] [WIP][MLIR][Affine] Add vector support to affine.linearize_index and affine.delinearize_index (PR #188369)

Tue Mar 24 15:56:48 PDT 2026

https://github.com/keshavvinayak01 created https://github.com/llvm/llvm-project/pull/188369

Allow `affine.delinearize_index` and `affine.linearize_index` to operate on `vector<...x index>` types in addition to scalar index.

>From 3f09929fea3253c7ec367e81bb93763bff32bd02 Mon Sep 17 00:00:00 2001
From: Keshav Vinayak Jha <keshavvinayakjha at gmail.com>
Date: Tue, 24 Mar 2026 22:08:13 +0000
Subject: [PATCH] [mlir][affine] Add vector support to affine.linearize_index
 and affine.delinearize_index

Allow affine.delinearize_index and affine.linearize_index to operate on
vector<...xindex> types in addition to scalar index. The basis remains
scalar (it describes the shape of the index space, not per-lane data).

This enables expressing element-wise index computations across vector
lanes directly, rather than manually lowering to vector.broadcast +
vector.step + arith patterns.

Changes:
- Add Affine_IndexOrVectorOfIndex type constraint in AffineOps.td
- Implement custom parse/print for both ops
- Add type consistency verifiers (all results/inputs must match)
- Update canonicalizers to produce vector zeros where needed
- Add vector lowering path in AffineExpandIndexOpsAsAffine using
  arith.divsi/muli/subi/addi (which natively support vectors)
- Scalar behavior is completely unchanged

Co-authored-by: Claude Opus 4.6 <noreply at anthropic.com>
Signed-off-by: Keshav Vinayak <keshavvinayak01 at gmail.com>
---
 .../mlir/Dialect/Affine/IR/AffineOps.td       |  27 ++-
 mlir/lib/Dialect/Affine/IR/AffineOps.cpp      | 170 +++++++++++++++++-
 .../AffineExpandIndexOpsAsAffine.cpp          | 141 +++++++++++++--
 3 files changed, 301 insertions(+), 37 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
index 9cb0f3242db17..8a7e49c05f526 100644
--- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
@@ -32,6 +32,12 @@ def Affine_Dialect : Dialect {
 class Affine_Op<string mnemonic, list<Trait> traits = []> :
     Op<Affine_Dialect, mnemonic, traits>;
 
+// Type constraint for index-like types: index or vector of index.
+def Affine_IndexOrVectorOfIndex :
+    Type<Or<[Index.predicate,
+             VectorOfAnyRankOf<[Index]>.predicate]>,
+         "index or vector of index">;
+
 // Require regions to have affine.yield.
 def ImplicitAffineTerminator
     : SingleBlockImplicitTerminator<"AffineYieldOp">;
@@ -1118,16 +1124,12 @@ def AffineDelinearizeIndexOp : Affine_Op<"delinearize_index", [Pure]> {
     - that is, the product of all basis elements is positive as an `index` as well.
   }];
 
-  let arguments = (ins Index:$linear_index,
+  let arguments = (ins Affine_IndexOrVectorOfIndex:$linear_index,
     Variadic<Index>:$dynamic_basis,
     DenseI64ArrayAttr:$static_basis);
-  let results = (outs Variadic<Index>:$multi_index);
+  let results = (outs Variadic<Affine_IndexOrVectorOfIndex>:$multi_index);
 
-  let assemblyFormat = [{
-    $linear_index `into`
-    custom<DynamicIndexList>($dynamic_basis, $static_basis, "{}", "::mlir::AsmParser::Delimiter::Paren")
-    attr-dict `:` type($multi_index)
-  }];
+  let hasCustomAssemblyFormat = 1;
 
   let builders = [
     OpBuilder<(ins "Value":$linear_index, "ValueRange":$dynamic_basis, "ArrayRef<int64_t>":$static_basis, CArg<"bool", "true">:$hasOuterBound)>,
@@ -1221,18 +1223,13 @@ def AffineLinearizeIndexOp : Affine_Op<"linearize_index",
     ```
   }];
 
-  let arguments = (ins Variadic<Index>:$multi_index,
+  let arguments = (ins Variadic<Affine_IndexOrVectorOfIndex>:$multi_index,
     Variadic<Index>:$dynamic_basis,
     DenseI64ArrayAttr:$static_basis,
     UnitProp:$disjoint);
-  let results = (outs Index:$linear_index);
+  let results = (outs Affine_IndexOrVectorOfIndex:$linear_index);
 
-  let assemblyFormat = [{
-    (`disjoint` $disjoint^)? ` `
-    `[` $multi_index `]` `by`
-    custom<DynamicIndexList>($dynamic_basis, $static_basis, "{}", "::mlir::AsmParser::Delimiter::Paren")
-    attr-dict `:` type($linear_index)
-  }];
+  let hasCustomAssemblyFormat = 1;
 
   let builders = [
     OpBuilder<(ins "ValueRange":$multi_index, "ValueRange":$basis, CArg<"bool", "false">:$disjoint)>,
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 839d34b41cbd4..2060a74b061e7 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -21,6 +21,7 @@
 #include "mlir/IR/Value.h"
 #include "mlir/Interfaces/ShapedOpInterfaces.h"
 #include "mlir/Interfaces/ValueBoundsOpInterface.h"
+#include "mlir/Interfaces/ViewLikeInterface.h"
 #include "mlir/Transforms/InliningUtils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallBitVector.h"
@@ -4855,6 +4856,58 @@ LogicalResult AffineVectorStoreOp::verify() {
 // DelinearizeIndexOp
 //===----------------------------------------------------------------------===//
 
+/// Parse format:
+///   affine.delinearize_index %idx into (%c4, %c8)
+///     : index, index         (scalar)
+///   affine.delinearize_index %vec into (%c4, %c8)
+///     : vector<16xindex>, vector<16xindex>  (vector)
+ParseResult AffineDelinearizeIndexOp::parse(OpAsmParser &parser,
+                                            OperationState &result) {
+  OpAsmParser::UnresolvedOperand linearIndex;
+  if (parser.parseOperand(linearIndex) || parser.parseKeyword("into"))
+    return failure();
+
+  SmallVector<OpAsmParser::UnresolvedOperand> dynamicBasis;
+  DenseI64ArrayAttr staticBasis;
+  if (parseDynamicIndexList(parser, dynamicBasis, staticBasis, nullptr,
+                            AsmParser::Delimiter::Paren))
+    return failure();
+
+  if (parser.parseOptionalAttrDict(result.attributes))
+    return failure();
+
+  if (parser.parseColon())
+    return failure();
+
+  SmallVector<Type> resultTypes;
+  if (parser.parseTypeList(resultTypes))
+    return failure();
+
+  // Infer the linear index type from the first result type. All types must
+  // match (enforced by the verifier).
+  Type indexType = resultTypes.empty() ? IndexType::get(parser.getContext())
+                                       : resultTypes.front();
+  if (parser.resolveOperand(linearIndex, indexType, result.operands))
+    return failure();
+  if (parser.resolveOperands(dynamicBasis, IndexType::get(parser.getContext()),
+                             result.operands))
+    return failure();
+
+  result.addTypes(resultTypes);
+  result.getOrAddProperties<AffineDelinearizeIndexOp::Properties>()
+      .static_basis = staticBasis;
+  return success();
+}
+
+void AffineDelinearizeIndexOp::print(OpAsmPrinter &p) {
+  p << ' ' << getLinearIndex() << " into ";
+  printDynamicIndexList(p, *this, getDynamicBasis(), getStaticBasisAttr(),
+                        /*scalableFlags=*/{}, AsmParser::Delimiter::Paren);
+  p.printOptionalAttrDict((*this)->getAttrs(), {getStaticBasisAttrName()});
+  p << " : ";
+  llvm::interleaveComma(getResultTypes(), p);
+}
+
 void AffineDelinearizeIndexOp::build(OpBuilder &odsBuilder,
                                      OperationState &odsState,
                                      Value linearIndex, ValueRange dynamicBasis,
@@ -4925,6 +4978,14 @@ LogicalResult AffineDelinearizeIndexOp::verify() {
       }))
     return emitOpError("no basis element may be statically non-positive");
 
+  // All result types must match the input type.
+  Type inputType = getLinearIndex().getType();
+  for (Type resultType : getResultTypes()) {
+    if (resultType != inputType)
+      return emitOpError("result types must match the linear index type, got ")
+             << resultType << " vs " << inputType;
+  }
+
   return success();
 }
 
@@ -5036,9 +5097,17 @@ struct DropUnitExtentBasis
     SmallVector<Value> replacements(delinearizeOp->getNumResults(), nullptr);
     std::optional<Value> zero = std::nullopt;
     Location loc = delinearizeOp->getLoc();
+    Type indexType = delinearizeOp.getLinearIndex().getType();
     auto getZero = [&]() -> Value {
-      if (!zero)
-        zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
+      if (!zero) {
+        Value scalarZero = arith::ConstantIndexOp::create(rewriter, loc, 0);
+        if (auto vecTy = dyn_cast<VectorType>(indexType))
+          zero = arith::ConstantOp::create(
+              rewriter, loc,
+              DenseElementsAttr::get(vecTy, rewriter.getIndexAttr(0)));
+        else
+          zero = scalarZero;
+      }
       return zero.value();
     };
 
@@ -5204,9 +5273,9 @@ struct SplitDelinearizeSpanningLastLinearizeArg final
           "need at least two elements to form the basis product");
 
     Value linearizeWithoutBack = affine::AffineLinearizeIndexOp::create(
-        rewriter, linearizeOp.getLoc(), linearizeOp.getMultiIndex().drop_back(),
-        linearizeOp.getDynamicBasis(), linearizeOp.getStaticBasis().drop_back(),
-        linearizeOp.getDisjoint());
+        rewriter, linearizeOp.getLoc(), linearizeOp.getLinearIndex().getType(),
+        linearizeOp.getMultiIndex().drop_back(), linearizeOp.getDynamicBasis(),
+        linearizeOp.getStaticBasis().drop_back(), linearizeOp.getDisjoint());
     auto delinearizeWithoutSplitPart = affine::AffineDelinearizeIndexOp::create(
         rewriter, delinearizeOp.getLoc(), linearizeWithoutBack,
         delinearizeOp.getDynamicBasis(), basis.drop_back(elemsToSplit),
@@ -5236,6 +5305,69 @@ void affine::AffineDelinearizeIndexOp::getCanonicalizationPatterns(
 // LinearizeIndexOp
 //===----------------------------------------------------------------------===//
 
+/// Parse format:
+///   affine.linearize_index [%x, %y] by (%c4, %c8) : index
+///   affine.linearize_index disjoint [%v0, %v1] by (%c4, %c8)
+///     : vector<16xindex>
+ParseResult AffineLinearizeIndexOp::parse(OpAsmParser &parser,
+                                          OperationState &result) {
+  bool disjoint = succeeded(parser.parseOptionalKeyword("disjoint"));
+
+  SmallVector<OpAsmParser::UnresolvedOperand> multiIndex;
+  if (parser.parseOperandList(multiIndex, AsmParser::Delimiter::Square) ||
+      parser.parseKeyword("by"))
+    return failure();
+
+  SmallVector<OpAsmParser::UnresolvedOperand> dynamicBasis;
+  DenseI64ArrayAttr staticBasis;
+  if (parseDynamicIndexList(parser, dynamicBasis, staticBasis, nullptr,
+                            AsmParser::Delimiter::Paren))
+    return failure();
+
+  if (parser.parseOptionalAttrDict(result.attributes))
+    return failure();
+
+  Type resultType;
+  if (parser.parseColonType(resultType))
+    return failure();
+
+  if (parser.resolveOperands(multiIndex, resultType, result.operands))
+    return failure();
+  if (parser.resolveOperands(dynamicBasis, IndexType::get(parser.getContext()),
+                             result.operands))
+    return failure();
+
+  result.addTypes(resultType);
+  auto &props = result.getOrAddProperties<AffineLinearizeIndexOp::Properties>();
+  props.static_basis = staticBasis;
+  props.disjoint = disjoint;
+  props.operandSegmentSizes = {static_cast<int32_t>(multiIndex.size()),
+                               static_cast<int32_t>(dynamicBasis.size())};
+  return success();
+}
+
+void AffineLinearizeIndexOp::print(OpAsmPrinter &p) {
+  if (getDisjoint())
+    p << " disjoint";
+  p << " [";
+  llvm::interleaveComma(getMultiIndex(), p);
+  p << "] by ";
+  printDynamicIndexList(p, *this, getDynamicBasis(), getStaticBasisAttr(),
+                        /*scalableFlags=*/{}, AsmParser::Delimiter::Paren);
+  p.printOptionalAttrDict(
+      (*this)->getAttrs(),
+      {getStaticBasisAttrName(), getOperandSegmentSizesAttrName()});
+  p << " : " << getLinearIndex().getType();
+}
+
+/// Infer the index type from a set of multi-index values. Returns the common
+/// type (index or vector<...xindex>), or IndexType if the set is empty.
+static Type inferIndexType(MLIRContext *ctx, ValueRange multiIndex) {
+  if (multiIndex.empty())
+    return IndexType::get(ctx);
+  return multiIndex.front().getType();
+}
+
 void AffineLinearizeIndexOp::build(OpBuilder &odsBuilder,
                                    OperationState &odsState,
                                    ValueRange multiIndex, ValueRange basis,
@@ -5246,7 +5378,9 @@ void AffineLinearizeIndexOp::build(OpBuilder &odsBuilder,
   SmallVector<int64_t> staticBasis;
   dispatchIndexOpFoldResults(getAsOpFoldResult(basis), dynamicBasis,
                              staticBasis);
-  build(odsBuilder, odsState, multiIndex, dynamicBasis, staticBasis, disjoint);
+  Type resultType = inferIndexType(odsBuilder.getContext(), multiIndex);
+  build(odsBuilder, odsState, resultType, multiIndex, dynamicBasis, staticBasis,
+        disjoint);
 }
 
 void AffineLinearizeIndexOp::build(OpBuilder &odsBuilder,
@@ -5259,14 +5393,18 @@ void AffineLinearizeIndexOp::build(OpBuilder &odsBuilder,
   SmallVector<Value> dynamicBasis;
   SmallVector<int64_t> staticBasis;
   dispatchIndexOpFoldResults(basis, dynamicBasis, staticBasis);
-  build(odsBuilder, odsState, multiIndex, dynamicBasis, staticBasis, disjoint);
+  Type resultType = inferIndexType(odsBuilder.getContext(), multiIndex);
+  build(odsBuilder, odsState, resultType, multiIndex, dynamicBasis, staticBasis,
+        disjoint);
 }
 
 void AffineLinearizeIndexOp::build(OpBuilder &odsBuilder,
                                    OperationState &odsState,
                                    ValueRange multiIndex,
                                    ArrayRef<int64_t> basis, bool disjoint) {
-  build(odsBuilder, odsState, multiIndex, ValueRange{}, basis, disjoint);
+  Type resultType = inferIndexType(odsBuilder.getContext(), multiIndex);
+  build(odsBuilder, odsState, resultType, multiIndex, ValueRange{}, basis,
+        disjoint);
 }
 
 LogicalResult AffineLinearizeIndexOp::verify() {
@@ -5284,6 +5422,14 @@ LogicalResult AffineLinearizeIndexOp::verify() {
         "corresponding dynamic basis entry) -- this can only happen due to an "
         "incorrect fold/rewrite");
 
+  // All multi_index types must match the result type.
+  Type resultType = getLinearIndex().getType();
+  for (Value idx : getMultiIndex()) {
+    if (idx.getType() != resultType)
+      return emitOpError("multi_index types must match the result type, got ")
+             << idx.getType() << " vs " << resultType;
+  }
+
   return success();
 }
 
@@ -5402,7 +5548,13 @@ struct DropLinearizeUnitComponentsIfDisjointOrZero final
                                          "no unit basis entries to replace");
 
     if (newIndices.empty()) {
-      rewriter.replaceOpWithNewOp<arith::ConstantIndexOp>(op, 0);
+      Type resultType = op.getLinearIndex().getType();
+      if (auto vecTy = dyn_cast<VectorType>(resultType)) {
+        rewriter.replaceOpWithNewOp<arith::ConstantOp>(
+            op, DenseElementsAttr::get(vecTy, rewriter.getIndexAttr(0)));
+      } else {
+        rewriter.replaceOpWithNewOp<arith::ConstantIndexOp>(op, 0);
+      }
       return success();
     }
     rewriter.replaceOpWithNewOp<affine::AffineLinearizeIndexOp>(
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOpsAsAffine.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOpsAsAffine.cpp
index e919bc6d36265..0178c5159df53 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOpsAsAffine.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOpsAsAffine.cpp
@@ -15,7 +15,9 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/Transforms/Transforms.h"
 #include "mlir/Dialect/Affine/Utils.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
 namespace mlir {
@@ -29,6 +31,33 @@ using namespace mlir;
 using namespace mlir::affine;
 
 namespace {
+
+/// Create a constant splat of the given type with the given integer value.
+static Value createTypedConstant(OpBuilder &b, Location loc, Type type,
+                                 int64_t value) {
+  if (auto vecTy = dyn_cast<VectorType>(type))
+    return arith::ConstantOp::create(
+        b, loc, DenseElementsAttr::get(vecTy, b.getIndexAttr(value)));
+  return arith::ConstantIndexOp::create(b, loc, value);
+}
+
+/// Materialize an OpFoldResult (which represents a scalar index or constant)
+/// as a Value matching the given target type. For vector target types, scalar
+/// constants are splatted. Returns failure for dynamic basis with vector types
+/// since that requires vector.broadcast which is not available here.
+static FailureOr<Value> materializeBasis(OpBuilder &b, Location loc,
+                                         OpFoldResult ofr, Type targetType) {
+  std::optional<int64_t> cst = getConstantIntValue(ofr);
+  if (cst)
+    return createTypedConstant(b, loc, targetType, *cst);
+  // Dynamic scalar basis value. For scalar target types, return as-is.
+  if (isa<IndexType>(targetType))
+    return getValueOrCreateConstantIndexOp(b, loc, ofr);
+  // Dynamic scalar basis with vector target type -- would need
+  // vector.broadcast, bail out.
+  return failure();
+}
+
 /// Lowers `affine.delinearize_index` into a sequence of division and remainder
 /// operations.
 struct LowerDelinearizeIndexOps
@@ -36,12 +65,51 @@ struct LowerDelinearizeIndexOps
   using OpRewritePattern<AffineDelinearizeIndexOp>::OpRewritePattern;
   LogicalResult matchAndRewrite(AffineDelinearizeIndexOp op,
                                 PatternRewriter &rewriter) const override {
-    FailureOr<SmallVector<Value>> multiIndex =
-        delinearizeIndex(rewriter, op->getLoc(), op.getLinearIndex(),
-                         op.getEffectiveBasis(), /*hasOuterBound=*/false);
-    if (failed(multiIndex))
-      return failure();
-    rewriter.replaceOp(op, *multiIndex);
+    // For scalar types, use the existing affine lowering path.
+    if (isa<IndexType>(op.getLinearIndex().getType())) {
+      FailureOr<SmallVector<Value>> multiIndex =
+          delinearizeIndex(rewriter, op->getLoc(), op.getLinearIndex(),
+                           op.getEffectiveBasis(), /*hasOuterBound=*/false);
+      if (failed(multiIndex))
+        return failure();
+      rewriter.replaceOp(op, *multiIndex);
+      return success();
+    }
+
+    // Vector lowering: emit arith div/rem ops (which work element-wise on
+    // vectors).
+    Location loc = op.getLoc();
+    Value linearIndex = op.getLinearIndex();
+    Type type = linearIndex.getType();
+    SmallVector<OpFoldResult> basis = op.getEffectiveBasis();
+
+    // Compute cumulative products of basis from the right. These serve as
+    // divisors: for basis (B0, B1, B2), the divisors are (B1*B2, B2).
+    SmallVector<Value> divisors;
+    Value cumulativeProd = createTypedConstant(rewriter, loc, type, 1);
+    for (OpFoldResult basisElem : llvm::reverse(basis)) {
+      FailureOr<Value> basisVal =
+          materializeBasis(rewriter, loc, basisElem, type);
+      if (failed(basisVal))
+        return failure();
+      cumulativeProd =
+          arith::MulIOp::create(rewriter, loc, cumulativeProd, *basisVal);
+      divisors.push_back(cumulativeProd);
+    }
+
+    // Emit div/mod pairs from the most-significant dimension to the least.
+    SmallVector<Value> results;
+    results.reserve(divisors.size() + 1);
+    Value residual = linearIndex;
+    for (Value divisor : llvm::reverse(divisors)) {
+      Value quotient = arith::DivSIOp::create(rewriter, loc, residual, divisor);
+      Value product = arith::MulIOp::create(rewriter, loc, quotient, divisor);
+      Value remainder = arith::SubIOp::create(rewriter, loc, residual, product);
+      results.push_back(quotient);
+      residual = remainder;
+    }
+    results.push_back(residual);
+    rewriter.replaceOp(op, results);
     return success();
   }
 };
@@ -58,13 +126,60 @@ struct LowerLinearizeIndexOps final : OpRewritePattern<AffineLinearizeIndexOp> {
       return success();
     }
 
-    SmallVector<OpFoldResult> multiIndex =
-        getAsOpFoldResult(op.getMultiIndex());
-    OpFoldResult linearIndex =
-        linearizeIndex(rewriter, op.getLoc(), multiIndex, op.getMixedBasis());
-    Value linearIndexValue =
-        getValueOrCreateConstantIntOp(rewriter, op.getLoc(), linearIndex);
-    rewriter.replaceOp(op, linearIndexValue);
+    // For scalar types, use the existing affine lowering path.
+    if (isa<IndexType>(op.getLinearIndex().getType())) {
+      SmallVector<OpFoldResult> multiIndex =
+          getAsOpFoldResult(op.getMultiIndex());
+      OpFoldResult linearIndex =
+          linearizeIndex(rewriter, op.getLoc(), multiIndex, op.getMixedBasis());
+      Value linearIndexValue =
+          getValueOrCreateConstantIntOp(rewriter, op.getLoc(), linearIndex);
+      rewriter.replaceOp(op, linearIndexValue);
+      return success();
+    }
+
+    // Vector lowering: emit arith ops (which work element-wise on vectors).
+    //
+    // linearize_index [i0, i1, ..., iN-1] by (B0, B1, ..., BN-1)
+    // = i0 * stride_0 + i1 * stride_1 + ... + iN-1
+    // where stride_k = B_{k+1} * B_{k+2} * ... * B_{N-1}
+    //
+    // We compute from the back: result = iN-1, stride = 1, then:
+    //   stride *= B_{k}, result += i_k * stride
+    Location loc = op.getLoc();
+    Type type = op.getLinearIndex().getType();
+    SmallVector<OpFoldResult> effectiveBasis = op.getEffectiveBasis();
+    ValueRange indices = op.getMultiIndex();
+
+    // effectiveBasis drops the outer bound. For indices [i0, i1, ..., iN-1]:
+    //   no outer bound:  effectiveBasis = [B1, B2, ..., BN-1] (N-1 elems)
+    //   has outer bound: effectiveBasis = [B0, B1, ..., BN-1] (N elems,
+    //                    but B0 is advisory, dropped by getEffectiveBasis)
+    //
+    // Computation: result = iN-1 + BN-1 * (iN-2 + BN-2 * (... + B1 * i0))
+    // Or equivalently, accumulate from back:
+    //   result = iN-1
+    //   stride = 1
+    //   for k = numBasis-1 downto 0:
+    //     stride *= effectiveBasis[k]
+    //     result += indices[k] * stride
+    //
+    // This works because effectiveBasis[k] is the "size" of dimension k+1,
+    // and indices[k] is paired with the product of all sizes after it.
+    Value result = indices.back();
+    Value stride = createTypedConstant(rewriter, loc, type, 1);
+
+    for (int i = static_cast<int>(effectiveBasis.size()) - 1; i >= 0; --i) {
+      FailureOr<Value> basisVal =
+          materializeBasis(rewriter, loc, effectiveBasis[i], type);
+      if (failed(basisVal))
+        return failure();
+      stride = arith::MulIOp::create(rewriter, loc, stride, *basisVal);
+      Value term = arith::MulIOp::create(rewriter, loc, indices[i], stride);
+      result = arith::AddIOp::create(rewriter, loc, term, result);
+    }
+
+    rewriter.replaceOp(op, result);
     return success();
   }
 };