[Mlir-commits] [mlir] e99fae8 - [mlir] more aggressive folding in tiling/fusion transformations

Wed Jul 27 01:52:28 PDT 2022

Author: Alex Zinenko
Date: 2022-07-27T08:52:18Z
New Revision: e99fae899710b041994cef4beb6764f8dfbe8ef0

URL: https://github.com/llvm/llvm-project/commit/e99fae899710b041994cef4beb6764f8dfbe8ef0
DIFF: https://github.com/llvm/llvm-project/commit/e99fae899710b041994cef4beb6764f8dfbe8ef0.diff

LOG: [mlir] more aggressive folding in tiling/fusion transformations

Combine the recently added utilities for folded-by-construction affine
operations with the attribute-based Range to enable more folding. This
decreases the amount of emitted code but has little effect on test
precisely because the tests are not checking for the spurious constants.
The difference in the shape of affine maps comes from the internals of
affine folding.

Depends on D129633

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D130167

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
    mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
    mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
    mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
    mlir/lib/Dialect/Affine/IR/AffineOps.cpp
    mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
    mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
    mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
    mlir/lib/Dialect/Linalg/Transforms/BubbleUpExtractSlice.cpp
    mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp
    mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
    mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
    mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
    mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
    mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
    mlir/lib/Dialect/Linalg/Utils/Utils.cpp
    mlir/lib/Dialect/Utils/StaticValueUtils.cpp
    mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
    mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
    mlir/test/Dialect/Linalg/tile-conv.mlir
    mlir/test/Dialect/Linalg/transform-op-fuse.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
index a48c48a4a91cf..d39efbb16bab7 100644

--- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
@@ -392,6 +392,13 @@ OpFoldResult makeComposedFoldedAffineApply(RewriterBase &b, Location loc,
 OpFoldResult makeComposedFoldedAffineApply(RewriterBase &b, Location loc,
                                            AffineExpr expr,
                                            ArrayRef<OpFoldResult> operands);
+/// Variant of `makeComposedFoldedAffineApply` suitable for multi-result maps.
+/// Note that this may create as many affine.apply operations as the map has
+/// results given that affine.apply must be single-result.
+SmallVector<OpFoldResult>
+makeComposedFoldedMultiResultAffineApply(RewriterBase &b, Location loc,
+                                         AffineMap map,
+                                         ArrayRef<OpFoldResult> operands);
 
 /// Returns an AffineMinOp obtained by composing `map` and `operands` with
 /// AffineApplyOps supplying those operands.
@@ -405,16 +412,17 @@ OpFoldResult makeComposedFoldedAffineMin(RewriterBase &b, Location loc,
                                          AffineMap map,
                                          ArrayRef<OpFoldResult> operands);
 
+/// Constructs an AffineMinOp that computes a maximum across the results of
+/// applying `map` to `operands`, then immediately attempts to fold it. If
+/// folding results in a constant value, erases all created ops.
+OpFoldResult makeComposedFoldedAffineMax(RewriterBase &b, Location loc,
+                                         AffineMap map,
+                                         ArrayRef<OpFoldResult> operands);
+
 /// Returns the values obtained by applying `map` to the list of values.
 SmallVector<Value, 4> applyMapToValues(OpBuilder &b, Location loc,
                                        AffineMap map, ValueRange values);
 
-/// Returns the values obtained by applying `map` to the list of values, which
-/// may be known constants.
-SmallVector<OpFoldResult> applyMapToValues(RewriterBase &b, Location loc,
-                                           AffineMap map,
-                                           ArrayRef<OpFoldResult> values);
-
 /// Given an affine map `map` and its input `operands`, this method composes
 /// into `map`, maps of AffineApplyOps whose results are the values in
 /// `operands`, iteratively until no more of `operands` are the result of an

diff  --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
index 7ebf5b6ce5ed2..d6bc06218a918 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
@@ -1133,7 +1133,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> {
   let extraClassDeclaration = [{
     /// Return the flat list of all operand dimension sizes in the order they
     /// appear in the operands.
-    SmallVector<Value, 4> createFlatListOfOperandDims(OpBuilder &, Location);
+    SmallVector<OpFoldResult> createFlatListOfOperandDims(OpBuilder &, Location);
 
     /// Return the flat list of all operands' static dimension sizes in the
     /// order they appear in the operands. All operand dimension sizes have to

diff  --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index e74905edadffc..5a4c41a938175 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -410,7 +410,8 @@ using TileSizeComputationFunction =
 using LoopIndexToRangeIndexMap = DenseMap<int, int>;
 std::tuple<SmallVector<Range, 4>, LoopIndexToRangeIndexMap>
 makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map,
-                    ValueRange allShapeSizes, ValueRange allTileSizes);
+                    ArrayRef<OpFoldResult> allShapeSizes,
+                    ArrayRef<OpFoldResult> allTileSizes);
 
 /// A description of a multi-size tiling comprising tile sizes and numbers of
 /// tiles, expressed as Values which may or may not be constant. Multi-size

diff  --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index 1a792da3c278c..4b81ffecb43e6 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -48,6 +48,8 @@ bool isPermutation(ArrayRef<int64_t> permutation);
 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
 /// the type of `source`.
 Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim);
+OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value source,
+                               int64_t dim);
 
 /// Given an operation, retrieves the value of each dynamic dimension through
 /// constructing the necessary DimOp operators.
@@ -179,16 +181,17 @@ bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer,
 
 /// Computes tile offsets, given a list of loop `ivs` and `tileSizes`. In case a
 /// tile size is zero (i.e., no tiling), the corresponding offset is also zero.
-SmallVector<Value> computeTileOffsets(OpBuilder &b, Location loc,
-                                      ValueRange ivs, ValueRange tileSizes);
+SmallVector<OpFoldResult> computeTileOffsets(OpBuilder &b, Location loc,
+                                             ArrayRef<OpFoldResult> ivs,
+                                             ArrayRef<OpFoldResult> tileSizes);
 
 /// Computes tile sizes, given a list of `tileSizes` and dimension
 /// sizes (`sizeBounds`). In case a tile size is zero (i.e., no tiling), the
 /// corresponding result size is the corresponding value from `sizeBounds`.
 /// Note: The returned tile sizes are closed intervals.
-SmallVector<Value> computeTileSizes(OpBuilder &b, Location loc,
-                                    ValueRange tileSizes,
-                                    ArrayRef<Value> sizeBounds);
+SmallVector<OpFoldResult> computeTileSizes(OpBuilder &b, Location loc,
+                                           ArrayRef<OpFoldResult> tileSizes,
+                                           ArrayRef<OpFoldResult> sizeBounds);
 
 /// Returns the list of tensor output types produced when the given structured
 /// operation `op` is applied to the given `operands`. Note that `operands` are
@@ -217,8 +220,9 @@ Value materializeOpFoldResult(OpBuilder &b, Location loc,
 /// controls whether to omit the partial/boundary tile condition check in cases
 /// where we statically know that it is unnecessary.
 Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
-                     ValueRange tileSizes, AffineMap map, ValueRange lbs,
-                     ValueRange ubs, ValueRange subShapeSizes,
+                     ArrayRef<OpFoldResult> tileSizes, AffineMap map,
+                     ArrayRef<OpFoldResult> lbs, ArrayRef<OpFoldResult> ubs,
+                     ArrayRef<OpFoldResult> subShapeSizes,
                      bool omitPartialTileCheck);
 
 /// Creates extract_slice/subview ops for all `valuesToTile` of the given
@@ -232,18 +236,20 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
 /// Note that a constant zero in `tileSizes` means no tiling at that implicit
 /// loop. The number of non-zero values in `tileSizes` should be equal to the
 /// number of values in `ivs`.
-SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
-                                      LinalgOp linalgOp,
-                                      ArrayRef<Value> valuesToTile,
-                                      ValueRange ivs, ValueRange tileSizes,
-                                      ArrayRef<Value> sizeBounds,
-                                      bool omitPartialTileCheck);
+SmallVector<Value> makeTiledShapes(OpBuilder &builder, Location loc,
+                                   LinalgOp linalgOp, ValueRange valuesToTile,
+                                   ArrayRef<OpFoldResult> ivs,
+                                   ArrayRef<OpFoldResult> tileSizes,
+                                   ArrayRef<OpFoldResult> sizeBounds,
+                                   bool omitPartialTileCheck);
 
 /// Add the specified offsets to any `linalg.index` ops contained in the given
 /// `linalgOp`. The offsets are provided in the same order as iteration space
 /// dimensions. Null offests are assumed to be zero.
-void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef<Value> offests);
-void offsetIndices(RewriterBase &b, LinalgOp linalgOp, ArrayRef<Value> offests);
+void offsetIndices(OpBuilder &b, LinalgOp linalgOp,
+                   ArrayRef<OpFoldResult> offests);
+void offsetIndices(RewriterBase &b, LinalgOp linalgOp,
+                   ArrayRef<OpFoldResult> offests);
 
 using FusableOpDependencesTy = llvm::MapVector<
     Operation *,

diff  --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 00fba1fd1f303..0681f710df155 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -790,33 +790,6 @@ AffineApplyOp mlir::makeComposedAffineApply(OpBuilder &b, Location loc,
       values);
 }
 
-OpFoldResult
-mlir::makeComposedFoldedAffineApply(RewriterBase &b, Location loc,
-                                    AffineMap map,
-                                    ArrayRef<OpFoldResult> operands) {
-  assert(map.getNumResults() == 1 && "building affine.apply with !=1 result");
-
-  SmallVector<Operation *> constants;
-  SmallVector<Value> actualValues;
-  materializeConstants(b, loc, operands, constants, actualValues);
-  composeAffineMapAndOperands(&map, &actualValues);
-  OpFoldResult result = createOrFold<AffineApplyOp>(b, loc, actualValues, map);
-  if (result.is<Attribute>()) {
-    for (Operation *op : constants)
-      b.eraseOp(op);
-  }
-  return result;
-}
-
-OpFoldResult
-mlir::makeComposedFoldedAffineApply(RewriterBase &b, Location loc,
-                                    AffineExpr expr,
-                                    ArrayRef<OpFoldResult> operands) {
-  return makeComposedFoldedAffineApply(
-      b, loc, AffineMap::inferFromExprList(ArrayRef<AffineExpr>{expr}).front(),
-      operands);
-}
-
 /// Composes the given affine map with the given list of operands, pulling in
 /// the maps from any affine.apply operations that supply the operands.
 static void composeMultiResultAffineMap(AffineMap &map,
@@ -847,6 +820,44 @@ static void composeMultiResultAffineMap(AffineMap &map,
   canonicalizeMapAndOperands(&map, &operands);
 }
 
+OpFoldResult
+mlir::makeComposedFoldedAffineApply(RewriterBase &b, Location loc,
+                                    AffineMap map,
+                                    ArrayRef<OpFoldResult> operands) {
+  assert(map.getNumResults() == 1 && "building affine.apply with !=1 result");
+
+  SmallVector<Operation *> constants;
+  SmallVector<Value> actualValues;
+  materializeConstants(b, loc, operands, constants, actualValues);
+  composeAffineMapAndOperands(&map, &actualValues);
+  OpFoldResult result = createOrFold<AffineApplyOp>(b, loc, actualValues, map);
+
+  // Constants are always folded into affine min/max because they can be
+  // represented as constant expressions, so delete them.
+  for (Operation *op : constants)
+    b.eraseOp(op);
+  return result;
+}
+
+OpFoldResult
+mlir::makeComposedFoldedAffineApply(RewriterBase &b, Location loc,
+                                    AffineExpr expr,
+                                    ArrayRef<OpFoldResult> operands) {
+  return makeComposedFoldedAffineApply(
+      b, loc, AffineMap::inferFromExprList(ArrayRef<AffineExpr>{expr}).front(),
+      operands);
+}
+
+SmallVector<OpFoldResult> mlir::makeComposedFoldedMultiResultAffineApply(
+    RewriterBase &b, Location loc, AffineMap map,
+    ArrayRef<OpFoldResult> operands) {
+  return llvm::to_vector(llvm::map_range(
+      llvm::seq<unsigned>(0, map.getNumResults()), [&](unsigned i) {
+        return makeComposedFoldedAffineApply(b, loc, map.getSubMap({i}),
+                                             operands);
+      }));
+}
+
 Value mlir::makeComposedAffineMin(OpBuilder &b, Location loc, AffineMap map,
                                   ValueRange operands) {
   SmallVector<Value> allOperands = llvm::to_vector(operands);
@@ -854,22 +865,36 @@ Value mlir::makeComposedAffineMin(OpBuilder &b, Location loc, AffineMap map,
   return b.createOrFold<AffineMinOp>(loc, b.getIndexType(), map, allOperands);
 }
 
-OpFoldResult
-mlir::makeComposedFoldedAffineMin(RewriterBase &b, Location loc, AffineMap map,
-                                  ArrayRef<OpFoldResult> operands) {
+template <typename OpTy>
+static OpFoldResult makeComposedFoldedMinMax(RewriterBase &b, Location loc,
+                                             AffineMap map,
+                                             ArrayRef<OpFoldResult> operands) {
   SmallVector<Operation *> constants;
   SmallVector<Value> actualValues;
   materializeConstants(b, loc, operands, constants, actualValues);
   composeMultiResultAffineMap(map, actualValues);
   OpFoldResult result =
-      createOrFold<AffineMinOp>(b, loc, actualValues, b.getIndexType(), map);
-  if (result.is<Attribute>()) {
-    for (Operation *op : constants)
-      b.eraseOp(op);
-  }
+      createOrFold<OpTy>(b, loc, actualValues, b.getIndexType(), map);
+
+  // Constants are always folded into affine min/max because they can be
+  // represented as constant expressions, so delete them.
+  for (Operation *op : constants)
+    b.eraseOp(op);
   return result;
 }
 
+OpFoldResult
+mlir::makeComposedFoldedAffineMin(RewriterBase &b, Location loc, AffineMap map,
+                                  ArrayRef<OpFoldResult> operands) {
+  return makeComposedFoldedMinMax<AffineMinOp>(b, loc, map, operands);
+}
+
+OpFoldResult
+mlir::makeComposedFoldedAffineMax(RewriterBase &b, Location loc, AffineMap map,
+                                  ArrayRef<OpFoldResult> operands) {
+  return makeComposedFoldedMinMax<AffineMaxOp>(b, loc, map, operands);
+}
+
 /// Fully compose map with operands and canonicalize the result.
 /// Return the `createOrFold`'ed AffineApply op.
 static Value createFoldedComposedAffineApply(OpBuilder &b, Location loc,
@@ -896,40 +921,6 @@ SmallVector<Value, 4> mlir::applyMapToValues(OpBuilder &b, Location loc,
   return res;
 }
 
-SmallVector<OpFoldResult>
-mlir::applyMapToValues(RewriterBase &b, Location loc, AffineMap map,
-                       ArrayRef<OpFoldResult> values) {
-  // Materialize constants and keep track of produced operations so we can clean
-  // them up later.
-  SmallVector<Operation *> constants;
-  SmallVector<Value> actualValues;
-  materializeConstants(b, loc, values, constants, actualValues);
-
-  // Compose, fold and construct maps for each result independently because they
-  // may simplify more effectively.
-  SmallVector<OpFoldResult> results;
-  results.reserve(map.getNumResults());
-  bool foldedAll = true;
-  for (auto i : llvm::seq<unsigned>(0, map.getNumResults())) {
-    AffineMap submap = map.getSubMap({i});
-    SmallVector<Value> operands = actualValues;
-    fullyComposeAffineMapAndOperands(&submap, &operands);
-    canonicalizeMapAndOperands(&submap, &operands);
-    results.push_back(createOrFold<AffineApplyOp>(b, loc, operands, submap));
-    if (!results.back().is<Attribute>())
-      foldedAll = false;
-  }
-
-  // If the entire map could be folded, remove the constants that were used in
-  // the initial ops.
-  if (foldedAll) {
-    for (Operation *constant : constants)
-      b.eraseOp(constant);
-  }
-
-  return results;
-}
-
 // A symbol may appear as a dim in affine.apply operations. This function
 // canonicalizes dims that are valid symbols into actual symbols.
 template <class MapOrSet>

diff  --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
index f5ec9246c5826..4f925a27b10aa 100644
--- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
@@ -16,6 +16,7 @@ add_mlir_dialect_library(MLIRLinalgDialect
   LINK_LIBS PUBLIC
   MLIRAffineDialect
   MLIRArithmeticDialect
+  MLIRArithmeticUtils
   MLIRBufferizationDialect
   MLIRDialectUtils
   MLIRInferTypeOpInterface

diff  --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
index 7f5b9f119d426..f389c18efe5e0 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@@ -10,6 +10,7 @@
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Arithmetic/Utils/Utils.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
@@ -486,13 +487,20 @@ static Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source,
     return b.createOrFold<tensor::DimOp>(loc, source, dim);
   llvm_unreachable("Expected MemRefType or TensorType");
 }
+static OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value source,
+                                      int64_t dim) {
+  auto shapedType = source.getType().cast<ShapedType>();
+  if (!shapedType.hasRank() || shapedType.isDynamicDim(dim))
+    return createOrFoldDimOp(b, loc, source, dim);
+  return b.getIndexAttr(shapedType.getDimSize(dim));
+}
 
-SmallVector<Value, 4> LinalgOp::createFlatListOfOperandDims(OpBuilder &b,
-                                                            Location loc) {
-  SmallVector<Value, 4> res;
+SmallVector<OpFoldResult> LinalgOp::createFlatListOfOperandDims(OpBuilder &b,
+                                                                Location loc) {
+  SmallVector<OpFoldResult> res;
   for (OpOperand *opOperand : getInputAndOutputOperands()) {
     for (int64_t i = 0, e = getRank(opOperand); i < e; ++i)
-      res.push_back(createOrFoldDimOp(b, loc, opOperand->get(), i));
+      res.push_back(createFoldedDimOp(b, loc, opOperand->get(), i));
   }
   return res;
 }
@@ -510,14 +518,13 @@ SmallVector<Range, 4> LinalgOp::createLoopRanges(OpBuilder &b, Location loc) {
   unsigned numDims = map.getNumDims(), numRes = map.getNumResults();
   auto viewSizes = createFlatListOfOperandDims(b, loc);
   SmallVector<Range, 4> res(numDims);
-  Value zeroVal = b.create<arith::ConstantIndexOp>(loc, 0);
-  Value oneVal = b.create<arith::ConstantIndexOp>(loc, 1);
   for (unsigned idx = 0; idx < numRes; ++idx) {
     auto result = map.getResult(idx);
     if (auto d = result.dyn_cast<AffineDimExpr>()) {
       if (res[d.getPosition()].offset)
         continue;
-      res[d.getPosition()] = Range{zeroVal, viewSizes[idx], oneVal};
+      res[d.getPosition()] =
+          Range{b.getIndexAttr(0), viewSizes[idx], b.getIndexAttr(1)};
     }
   }
   return res;
@@ -591,9 +598,11 @@ LinalgOp::reifyResultShapes(OpBuilder &b,
   outputDims.set(resultShapesSubMapPos.first, resultShapesSubMapPos.second);
   HasAffineDimExprVisitor checkDimExpr(std::move(outputDims));
   Location loc = getOperation()->getLoc();
-  auto allResultDimValues =
-      applyMapToValues(b, loc, resultShapesFromInputShapesMap,
-                       createFlatListOfOperandDims(b, loc));
+  IRRewriter rewriter(b);
+  SmallVector<OpFoldResult> allResultDimValues =
+      makeComposedFoldedMultiResultAffineApply(
+          rewriter, loc, resultShapesFromInputShapesMap,
+          createFlatListOfOperandDims(b, loc));
   int64_t pos = 0;
   ArrayRef<AffineExpr> shapeExprs = resultShapesFromInputShapesMap.getResults();
   for (OpOperand *opOperand : getOutputOperands()) {
@@ -602,7 +611,8 @@ LinalgOp::reifyResultShapes(OpBuilder &b,
       if (checkDimExpr.visit(shapeExprs[pos]))
         shapes.push_back(createOrFoldDimOp(b, loc, opOperand->get(), dim));
       else
-        shapes.push_back(allResultDimValues[pos]);
+        shapes.push_back(
+            getValueOrCreateConstantIndexOp(b, loc, allResultDimValues[pos]));
       pos++;
     }
     reifiedReturnShapes.emplace_back(std::move(shapes));

diff  --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 8669165fe76f5..2d7c0c30f068e 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -630,12 +630,8 @@ struct FoldInsertPadIntoFill : public OpRewritePattern<tensor::InsertSliceOp> {
     // plus low padding sizes.
     SmallVector<OpFoldResult, 4> newOffsets;
     for (const auto &p : llvm::zip(lowPads, oldOffsets)) {
-      Value padValue = getValueOrCreateConstantIndexOp(
-          rewriter, srcPadOp.getLoc(), std::get<0>(p));
-      Value offsetValue = getValueOrCreateConstantIndexOp(
-          rewriter, insertOp.getLoc(), std::get<1>(p));
-      newOffsets.push_back(
-          applyMapToValues(rewriter, loc, addMap, {offsetValue, padValue})[0]);
+      newOffsets.push_back(makeComposedFoldedAffineApply(
+          rewriter, loc, addMap, {std::get<0>(p), std::get<1>(p)}));
     }
 
     SmallVector<OpFoldResult, 4> newSizes;

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/BubbleUpExtractSlice.cpp b/mlir/lib/Dialect/Linalg/Transforms/BubbleUpExtractSlice.cpp
index 2ca594ef4422b..78dd1ead31728 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/BubbleUpExtractSlice.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/BubbleUpExtractSlice.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
+#include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
 using namespace mlir;
@@ -88,40 +89,35 @@ struct BubbleUpExtractSliceOpPattern
     }
 
     auto linalgLoc = linalgOp.getLoc();
-    auto allShapeSizes =
+    SmallVector<OpFoldResult> allShapeSizes =
         linalgOp.createFlatListOfOperandDims(rewriter, linalgLoc);
     AffineMap shapeSizesToLoopsMap = linalgOp.getShapesToLoopsMap();
     if (!shapeSizesToLoopsMap) {
       return rewriter.notifyMatchFailure(
           linalgOp, "failed to get loops map from shape sizes");
     }
-    auto sizeBounds = applyMapToValues(rewriter, linalgLoc,
-                                       shapeSizesToLoopsMap, allShapeSizes);
-
-    auto sliceLoc = sliceOp.getLoc();
-    auto offsetVals = getValueOrCreateConstantIndexOp(
-        rewriter, sliceLoc, sliceOp.getMixedOffsets());
-    auto sizeVals = getValueOrCreateConstantIndexOp(rewriter, sliceLoc,
-                                                    sliceOp.getMixedSizes());
+    SmallVector<OpFoldResult> sizeBounds =
+        makeComposedFoldedMultiResultAffineApply(
+            rewriter, linalgLoc, shapeSizesToLoopsMap, allShapeSizes);
 
     // The offsets and sizes from the slice operation only give you the tile
     // size of the output. Use that compute the tile sizes and offsets of the
     // loops. For loops not used to access the output, set the tile sizes to
     // loop bounds and set the offset to 0.
-    Value zero = rewriter.create<arith::ConstantIndexOp>(linalgLoc, 0);
-    SmallVector<Value, 4> tileOffsets(sizeBounds.size(), zero);
-    SmallVector<Value, 4> tileSizes = sizeBounds;
+    SmallVector<OpFoldResult> tileOffsets(sizeBounds.size(),
+                                          rewriter.getIndexAttr(0));
+    SmallVector<OpFoldResult> tileSizes = sizeBounds;
     for (auto const &result : enumerate(indexingMap.getResults())) {
       unsigned position = result.value().cast<AffineDimExpr>().getPosition();
-      tileOffsets[position] = offsetVals[result.index()];
-      tileSizes[position] = sizeVals[result.index()];
+      tileOffsets[position] = sliceOp.getMixedOffsets()[result.index()];
+      tileSizes[position] = sliceOp.getMixedSizes()[result.index()];
     }
 
     SmallVector<Value> valuesToTile = linalgOp.getInputAndOutputOperands();
-
-    SmallVector<Value, 4> tiledOperands = makeTiledShapes(
-        rewriter, linalgLoc, linalgOp, valuesToTile, tileOffsets, tileSizes,
-        sizeBounds, /*omitPartialTileCheck=*/true);
+    SmallVector<Value> tiledOperands =
+        makeTiledShapes(rewriter, linalgLoc, linalgOp, valuesToTile,
+                        tileOffsets, tileSizes, sizeBounds,
+                        /*omitPartialTileCheck=*/true);
 
     SmallVector<Type, 4> resultTensorTypes;
     for (OpOperand *opOperand : linalgOp.getOutputTensorOperands())

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp b/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp
index 1f5fef6873174..889c6f034f897 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp
@@ -109,7 +109,9 @@ static SmallVector<OpFoldResult> getGenericOpLoopRange(OpBuilder &b,
   auto allShapesSizes =
       cast<LinalgOp>(op.getOperation()).createFlatListOfOperandDims(b, loc);
   AffineMap map = op.getShapesToLoopsMap();
-  return getAsOpFoldResult(applyMapToValues(b, loc, map, allShapesSizes));
+  IRRewriter rewriter(b);
+  return makeComposedFoldedMultiResultAffineApply(rewriter, loc, map,
+                                                  allShapesSizes);
 }
 
 /// Helper method to permute the list of `values` based on the `map`.

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
index 266c71027b7b5..9d38dd6084fe8 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
@@ -113,24 +113,24 @@ static SmallVector<Value> getTiledOperands(LinalgOp producer) {
 /// obtained from the producer itself, since they are not tiled + fused.
 static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
                      const DenseMap<unsigned, Range> &fusedLoopsAndRanges) {
-  SmallVector<Value, 8> ivs, tileSizes, sizeBounds;
-  SmallVector<Range, 8> loopRanges;
+  SmallVector<OpFoldResult> ivs, tileSizes, sizeBounds;
+  SmallVector<Range> loopRanges;
   Location loc = producer.getLoc();
-  auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
 
   for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) {
     auto shapeDim = getShapeDefiningLoopRange(producer, i);
-    Value dim = createOrFoldDimOp(b, loc, shapeDim.shape, shapeDim.dimension);
+    OpFoldResult dim =
+        createFoldedDimOp(b, loc, shapeDim.shape, shapeDim.dimension);
     sizeBounds.push_back(dim);
     auto it = fusedLoopsAndRanges.find(i);
     if (it != fusedLoopsAndRanges.end()) {
-      ivs.push_back(materializeOpFoldResult(b, loc, it->second.offset));
-      tileSizes.push_back(materializeOpFoldResult(b, loc, it->second.size));
+      ivs.push_back(it->second.offset);
+      tileSizes.push_back(it->second.size);
       loopRanges.push_back(it->second);
       LLVM_DEBUG(llvm::dbgs() << "tiled loop#" << i << " with LoopRange "
                               << loopRanges.back() << "\n");
     } else {
-      tileSizes.push_back(zero);
+      tileSizes.push_back(b.getIndexAttr(0));
       loopRanges.push_back(Range{b.getIndexAttr(0), dim, b.getIndexAttr(1)});
       LLVM_DEBUG(llvm::dbgs() << "full loop#" << i << " with LoopRange "
                               << loopRanges.back() << "\n");
@@ -166,10 +166,8 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
   Operation *clonedOp = producer.clone(b, loc, resultTypes, clonedShapes);
 
   // Shift all IndexOp results by the tile offset.
-  SmallVector<Value> allIvs;
-  llvm::transform(loopRanges, std::back_inserter(allIvs), [&](Range range) {
-    return materializeOpFoldResult(b, loc, range.offset);
-  });
+  SmallVector<OpFoldResult> allIvs = llvm::to_vector(
+      llvm::map_range(loopRanges, [&](Range range) { return range.offset; }));
   offsetIndices(b, clonedOp, allIvs);
 
   return clonedOp;

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
index df8e9f87f0587..24caab3b58950 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
@@ -141,30 +141,27 @@ static LinalgOp getTiledProducer(OpBuilder &b, OpResult producerResult,
   Location loc = producerOp.getLoc();
 
   // Obtain the `producerOp` loop bounds and the `sliceOp` ranges.
-  SmallVector<Value> producerLoopBounds;
+  SmallVector<OpFoldResult> producerLoopBounds;
   llvm::transform(producerOp.createLoopRanges(b, loc),
-                  std::back_inserter(producerLoopBounds), [&](Range range) {
-                    return materializeOpFoldResult(b, loc, range.size);
-                  });
+                  std::back_inserter(producerLoopBounds),
+                  [&](Range range) { return range.size; });
   SmallVector<Range> sliceOpRanges = sliceOp.getOrCreateRanges(b, loc);
 
   // Tile the producer operands given the `sliceOp` ranges. Iterate the
   // `tiledSliceDimIndices` and store the tile offset and size for the tiled
   // slice dimension.
-  auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
-  SmallVector<Value> tileIvs(producerOp.getNumLoops(), nullptr);
-  SmallVector<Value> tileSizes(producerOp.getNumLoops(), zero);
-  SmallVector<Value> allIvs(producerOp.getNumLoops(), nullptr);
+  SmallVector<OpFoldResult> tileIvs(producerOp.getNumLoops(), nullptr);
+  SmallVector<OpFoldResult> tileSizes(producerOp.getNumLoops(),
+                                      b.getIndexAttr(0));
+  SmallVector<OpFoldResult> allIvs(producerOp.getNumLoops(), nullptr);
   for (auto it : zip(tiledSliceDimIndices, tiledProducerLoopIndices)) {
     int64_t tiledSliceDim = std::get<0>(it);
     int64_t tiledProducerLoop = std::get<1>(it);
-    tileIvs[tiledProducerLoop] =
-        materializeOpFoldResult(b, loc, sliceOpRanges[tiledSliceDim].offset);
-    tileSizes[tiledProducerLoop] =
-        materializeOpFoldResult(b, loc, sliceOpRanges[tiledSliceDim].size);
+    tileIvs[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].offset;
+    tileSizes[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].size;
     allIvs[tiledProducerLoop] = tileIvs[tiledProducerLoop];
   }
-  erase_value(tileIvs, nullptr);
+  erase_value(tileIvs, OpFoldResult());
   SmallVector<Value> tiledOperands = producerOp.getInputAndOutputOperands();
   tiledOperands = makeTiledShapes(b, loc, producerOp, tiledOperands, tileIvs,
                                   tileSizes, producerLoopBounds,

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index 9066bb83410ea..1c9db2d121f3b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -13,6 +13,7 @@
 #include <utility>
 
 #include "PassDetail.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arithmetic/Utils/Utils.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
@@ -36,20 +37,27 @@ using namespace mlir::scf;
 
 #define DEBUG_TYPE "linalg-tiling"
 
-static bool isZero(Value v) {
-  if (auto cst = v.getDefiningOp<arith::ConstantIndexOp>())
+static bool isZero(OpFoldResult v) {
+  if (!v)
+    return false;
+  if (auto attr = v.dyn_cast<Attribute>()) {
+    IntegerAttr intAttr = attr.dyn_cast<IntegerAttr>();
+    return intAttr && intAttr.getValue().isZero();
+  }
+  if (auto cst = v.get<Value>().getDefiningOp<arith::ConstantIndexOp>())
     return cst.value() == 0;
   return false;
 }
 
 std::tuple<SmallVector<Range, 4>, LoopIndexToRangeIndexMap>
 mlir::linalg::makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map,
-                                  ValueRange allShapeSizes,
-                                  ValueRange allTileSizes) {
+                                  ArrayRef<OpFoldResult> allShapeSizes,
+                                  ArrayRef<OpFoldResult> allTileSizes) {
   assert(allTileSizes.size() == map.getNumResults());
   // Apply `map` to get shape sizes in loop order.
-  auto shapeSizes = applyMapToValues(b, loc, map, allShapeSizes);
-  SmallVector<Value, 4> tileSizes(allTileSizes.begin(), allTileSizes.end());
+  SmallVector<OpFoldResult> shapeSizes =
+      makeComposedFoldedMultiResultAffineApply(b, loc, map, allShapeSizes);
+  SmallVector<OpFoldResult> tileSizes(allTileSizes.begin(), allTileSizes.end());
 
   // Traverse the tile sizes, which are in loop order, erase zeros everywhere.
   LoopIndexToRangeIndexMap loopIndexToRangeIndex;
@@ -80,7 +88,7 @@ void mlir::linalg::transformIndexOps(
       continue;
     en.value() = ivs[rangeIndex->second];
   }
-  offsetIndices(b, op, allIvs);
+  offsetIndices(b, op, getAsOpFoldResult(allIvs));
 }
 
 /// Asserts that the given index-typed value is strictly positive. If the value
@@ -121,14 +129,15 @@ mlir::linalg::computeMultiTileSizes(OpBuilder &builder, LinalgOp op,
 
   // Find the trip count of the iteration space dimension for which the tile
   // sizes are computed.
-  // TODO: update createFlatListOfOperandDims to return OpFoldResults and avoid
-  // littering by useless constant materialization.
-  SmallVector<Value, 4> allShapes =
+  SmallVector<OpFoldResult> allShapes =
       op.createFlatListOfOperandDims(b, b.getLoc());
   AffineMap shapesToLoops = op.getShapesToLoopsMap();
-  SmallVector<Value, 4> loopRanges =
-      applyMapToValues(b, op.getLoc(), shapesToLoops, allShapes);
-  Value tripCount = loopRanges[dimension];
+  IRRewriter rewriter(b);
+  SmallVector<OpFoldResult> loopRanges =
+      makeComposedFoldedMultiResultAffineApply(rewriter, op.getLoc(),
+                                               shapesToLoops, allShapes);
+  Value tripCount =
+      materializeOpFoldResult(rewriter, op.getLoc(), loopRanges[dimension]);
 
   // Compute the tile sizes and the respective numbers of tiles.
   AffineExpr s0 = b.getAffineSymbolExpr(0);
@@ -181,15 +190,6 @@ createMatchingParallelSubsetInsertOp(OpBuilder &b, Location loc,
       subsetExtractOp.getMixedSizes(), subsetExtractOp.getMixedStrides());
 }
 
-/// Build an `affine_max` of all the `vals`.
-static OpFoldResult buildMax(OpBuilder &b, Location loc,
-                             ArrayRef<OpFoldResult> vals) {
-  SmallVector<Value> args = getValueOrCreateConstantIndexOp(b, loc, vals);
-  return b.createOrFold<AffineMaxOp>(
-      loc, AffineMap::getMultiDimIdentityMap(vals.size(), loc.getContext()),
-      args);
-}
-
 /// Returns true if the maximum tile offset `tileSize * numThreads-1` is less
 /// than `iterationSize`.
 static bool canOmitTileOffsetInBoundsCheck(OpFoldResult tileSize,
@@ -203,6 +203,24 @@ static bool canOmitTileOffsetInBoundsCheck(OpFoldResult tileSize,
   return *tileSizeConst * (*numThreadsConst - 1) < *iterSizeConst;
 }
 
+/// Build an `affine_max` of all the `vals`.
+static OpFoldResult buildMax(OpBuilder &b, Location loc,
+                             ArrayRef<OpFoldResult> vals) {
+  IRRewriter rewriter(b);
+  return makeComposedFoldedAffineMax(
+      rewriter, loc,
+      AffineMap::getMultiDimIdentityMap(vals.size(), loc.getContext()), vals);
+}
+
+/// Build an `affine_min` of all the `vals`.
+static OpFoldResult buildMin(OpBuilder &b, Location loc,
+                             ArrayRef<OpFoldResult> vals) {
+  IRRewriter rewriter(b);
+  return makeComposedFoldedAffineMin(
+      rewriter, loc,
+      AffineMap::getMultiDimIdentityMap(vals.size(), loc.getContext()), vals);
+}
+
 /// Rewrite a TilingInterface `op` to a tiled `scf.foreach_thread`. The
 /// tiling is specified by the number of tiles/threads `numThreads` and the
 /// optional nominal tile size `nominalTileSizes`. If `nominalTilSizes` is
@@ -242,7 +260,6 @@ static FailureOr<ForeachThreadTilingResult> tileToForeachThreadOpImpl(
         return materializeOpFoldResult(ilocb, ofr);
       }));
 
-  Value zero = b.create<arith::ConstantIndexOp>(loc, 0);
   Operation *tiledOp = nullptr;
 
   // Create the ForeachThreadOp. We don't use the lambda body-builder
@@ -273,9 +290,9 @@ static FailureOr<ForeachThreadTilingResult> tileToForeachThreadOpImpl(
     AffineExpr i, j, M, N, O;
     bindDims(b.getContext(), i, j);
     bindSymbols(b.getContext(), M, N, O);
-    Value size = loopRanges[loopIdx].size;
-    Value offset = loopRanges[loopIdx].offset;
-    Value threadId = threadIds[threadIdIdx];
+    OpFoldResult size = loopRanges[loopIdx].size;
+    OpFoldResult offset = loopRanges[loopIdx].offset;
+    OpFoldResult threadId = threadIds[threadIdIdx];
     // Symbolic fixed max size per thread.
     // TODO: floor + 0/1 depending on case for better load-balancing.
     OpFoldResult tileSizePerThread =
@@ -295,9 +312,8 @@ static FailureOr<ForeachThreadTilingResult> tileToForeachThreadOpImpl(
     if (!isConstantIntValue(residualTileSize, 0)) {
       OpFoldResult sizeMinusOffsetPerThread = makeComposedFoldedAffineApply(
           b, loc, -i + M, {offsetPerThread, size});
-      tileSizePerThread = makeComposedFoldedAffineMin(
-          b, loc, AffineMap::getMultiDimIdentityMap(2, b.getContext()),
-          ArrayRef<OpFoldResult>{sizeMinusOffsetPerThread, tileSizePerThread});
+      tileSizePerThread =
+          buildMin(b, loc, {sizeMinusOffsetPerThread, tileSizePerThread});
     }
 
     tiledOffsets.push_back(offsetPerThread);
@@ -305,7 +321,8 @@ static FailureOr<ForeachThreadTilingResult> tileToForeachThreadOpImpl(
     if (!omitTileOffsetBoundsCheck &&
         !canOmitTileOffsetInBoundsCheck(tileSizePerThread,
                                         nonZeroNumThreads[threadIdIdx], size))
-      tileSizePerThread = buildMax(b, loc, {zero, tileSizePerThread});
+      tileSizePerThread =
+          buildMax(b, loc, {b.getIndexAttr(0), tileSizePerThread});
 
     tiledSizes.push_back(tileSizePerThread);
     ++threadIdIdx;
@@ -380,7 +397,7 @@ static Value insertSliceIntoTensor(RewriterBase &b, Location loc,
 
 template <typename LoopTy>
 static FailureOr<TiledLinalgOp>
-tileLinalgOpImpl(RewriterBase &b, LinalgOp op, ValueRange tileSizes,
+tileLinalgOpImpl(RewriterBase &b, LinalgOp op, ArrayRef<OpFoldResult> tileSizes,
                  const LinalgTilingOptions &options) {
   auto nLoops = op.getNumLoops();
   // Initial tile sizes may be too big, only take the first nLoops.
@@ -395,7 +412,8 @@ tileLinalgOpImpl(RewriterBase &b, LinalgOp op, ValueRange tileSizes,
   }
 
   // 1. Build the tiled loop ranges.
-  auto allShapeSizes = op.createFlatListOfOperandDims(b, op.getLoc());
+  SmallVector<OpFoldResult> allShapeSizes =
+      op.createFlatListOfOperandDims(b, op.getLoc());
   AffineMap shapeSizesToLoopsMap = op.getShapesToLoopsMap();
   if (!shapeSizesToLoopsMap)
     return failure();
@@ -460,11 +478,14 @@ tileLinalgOpImpl(RewriterBase &b, LinalgOp op, ValueRange tileSizes,
                static_cast<size_t>(op.getNumInputsAndOutputs()) &&
            "expect the number of operands and inputs and outputs to match");
     SmallVector<Value> valuesToTile = operandValuesToUse;
-    auto sizeBounds =
-        applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes);
-    SmallVector<Value, 4> tiledOperands =
-        makeTiledShapes(b, loc, op, valuesToTile, interchangedIvs, tileSizes,
-                        sizeBounds, /*omitPartialTileCheck=*/false);
+    IRRewriter rewriter(b);
+    SmallVector<OpFoldResult> sizeBounds =
+        makeComposedFoldedMultiResultAffineApply(
+            rewriter, loc, shapeSizesToLoopsMap, allShapeSizes);
+    SmallVector<Value> tiledOperands = makeTiledShapes(
+        b, loc, op, valuesToTile, getAsOpFoldResult(interchangedIvs), tileSizes,
+        sizeBounds,
+        /*omitPartialTileCheck=*/false);
 
     SmallVector<Type> resultTensorTypes =
         getTensorOutputTypes(op, tiledOperands);
@@ -518,11 +539,10 @@ FailureOr<TiledLinalgOp> static tileLinalgOpImpl(
   // dimension. This convention is significantly simpler to handle instead of
   // adjusting affine maps to account for missing dimensions.
   auto nLoops = op.getNumLoops();
-  SmallVector<Value, 4> tileSizeVector =
-      options.tileSizeComputationFunction(b, op);
+  SmallVector<OpFoldResult> tileSizeVector =
+      getAsOpFoldResult(options.tileSizeComputationFunction(b, op));
   if (tileSizeVector.size() < nLoops) {
-    auto zero = b.create<arith::ConstantIndexOp>(op.getLoc(), 0);
-    tileSizeVector.append(nLoops - tileSizeVector.size(), zero);
+    tileSizeVector.append(nLoops - tileSizeVector.size(), b.getIndexAttr(0));
   }
 
   return tileLinalgOpImpl<LoopTy>(b, op, tileSizeVector, options);
@@ -555,24 +575,22 @@ static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op,
   newPadOp = cast<tensor::PadOp>(builder.clone(*op.getOperation()));
   // Get rank and tile sizes.
   int64_t rank = op.getResultType().getRank();
-  SmallVector<Value> tileSizes =
-      options.tileSizeComputationFunction(builder, op);
+  SmallVector<OpFoldResult> tileSizes =
+      getAsOpFoldResult(options.tileSizeComputationFunction(builder, op));
   // Normalize untiled padding dimensions to 0.
-  Value zero = builder.create<arith::ConstantIndexOp>(loc, 0);
-  tileSizes.append(rank - tileSizes.size(), zero);
+  tileSizes.append(rank - tileSizes.size(), builder.getIndexAttr(0));
   // Compute lower and upper bounds of the loop nest.
   TilingInterface tilingInterface =
       dyn_cast<TilingInterface>(op.getOperation());
   SmallVector<Range> ranges = tilingInterface.getIterationDomain(builder);
-  SmallVector<Value> lbs, dims, allDims, steps;
+  SmallVector<Value> lbs, dims, steps;
+  SmallVector<OpFoldResult> allDims;
   for (int64_t i = 0; i < rank; ++i) {
-    Value materializedSize =
-        materializeOpFoldResult(builder, loc, ranges[i].size);
-    allDims.push_back(materializedSize);
+    allDims.push_back(ranges[i].size);
     if (!isZero(tileSizes[i])) {
       lbs.push_back(materializeOpFoldResult(builder, loc, ranges[i].offset));
-      dims.push_back(materializedSize);
-      steps.push_back(tileSizes[i]);
+      dims.push_back(materializeOpFoldResult(builder, loc, ranges[i].size));
+      steps.push_back(materializeOpFoldResult(builder, loc, tileSizes[i]));
     }
   }
   // Generate loop nest: One loop per dimension.
@@ -583,9 +601,11 @@ static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op,
       [&](OpBuilder &b, Location loc, ValueRange localIvs,
           ValueRange iterArgs) -> scf::ValueVector {
         // Compute offsets and sizes of ExtractSliceOp.
-        SmallVector<Value> offsets =
-            computeTileOffsets(b, loc, localIvs, tileSizes);
-        SmallVector<Value> sizes = computeTileSizes(b, loc, tileSizes, allDims);
+        SmallVector<Value> localIVVector = llvm::to_vector(localIvs);
+        SmallVector<OpFoldResult> offsets = computeTileOffsets(
+            b, loc, getAsOpFoldResult(localIVVector), tileSizes);
+        SmallVector<OpFoldResult> sizes =
+            computeTileSizes(b, loc, tileSizes, allDims);
         // Create ExtractSliceOp: Extract a tile from the tensor::PadOp.
         // Note: The tensor::PadOp is located outside of the loop nest. It is
         // later moved inside by ExtractSliceOfPadTensorSwapPattern.

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
index c6b8e5f4b8b24..9f838585d5742 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
@@ -32,7 +32,7 @@ struct LinalgOpTilingInterface
                                             LinalgOpTy> {
   /// Return the destination operands.
   SmallVector<Value> getDestinationOperands(Operation *op, OpBuilder &b) const {
-    return llvm::cast<LinalgOp>(op).getOutputOperands();
+    return cast<LinalgOp>(op).getOutputOperands();
   }
 
   /// Return the loop iterator type.
@@ -50,13 +50,16 @@ struct LinalgOpTilingInterface
     b.setInsertionPoint(op);
     Location loc = op->getLoc();
     LinalgOp linalgOp = cast<LinalgOp>(op);
-    auto allShapesSizes = linalgOp.createFlatListOfOperandDims(b, loc);
+    SmallVector<OpFoldResult> allShapesSizes =
+        linalgOp.createFlatListOfOperandDims(b, loc);
     AffineMap map = linalgOp.getShapesToLoopsMap();
-    Value zero = b.create<arith::ConstantIndexOp>(loc, 0);
-    Value one = b.create<arith::ConstantIndexOp>(loc, 1);
-    return llvm::to_vector(llvm::map_range(
-        applyMapToValues(b, loc, map, allShapesSizes), [&](Value v) {
-          return Range{zero, v, one};
+
+    IRRewriter rewriter(b);
+    return llvm::to_vector(
+        llvm::map_range(map.getResults(), [&](AffineExpr loopExpr) {
+          OpFoldResult ofr = makeComposedFoldedAffineApply(
+              rewriter, loc, loopExpr, allShapesSizes);
+          return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)};
         }));
   }
 
@@ -71,11 +74,8 @@ struct LinalgOpTilingInterface
     Location loc = op->getLoc();
     LinalgOp linalgOp = cast<LinalgOp>(op);
     SmallVector<Value> valuesToTile = linalgOp.getInputAndOutputOperands();
-    SmallVector<Value> offsetValues =
-        getValueOrCreateConstantIndexOp(b, loc, offsets);
     SmallVector<Value, 4> tiledOperands = makeTiledShapes(
-        b, loc, linalgOp, valuesToTile, offsetValues,
-        getValueOrCreateConstantIndexOp(b, loc, sizes), {}, true);
+        b, loc, linalgOp, valuesToTile, offsets, sizes, {}, true);
 
     SmallVector<Type> resultTensorTypes = llvm::to_vector(llvm::map_range(
         linalgOp.getOutputTensorOperands(), [&](OpOperand *opOperand) {
@@ -84,7 +84,7 @@ struct LinalgOpTilingInterface
 
     Operation *tiledOp =
         linalgOp.clone(b, loc, resultTensorTypes, tiledOperands);
-    offsetIndices(b, cast<LinalgOp>(tiledOp), offsetValues);
+    offsetIndices(b, cast<LinalgOp>(tiledOp), offsets);
 
     return {tiledOp};
   }
@@ -102,28 +102,16 @@ struct LinalgOpTilingInterface
 
     AffineExpr d0;
     bindDims(b.getContext(), d0);
-
-    auto fullyComposeAffineMapAndOperands = [](OpBuilder &builder, Location loc,
-                                               AffineExpr expr,
-                                               ValueRange operands) -> Value {
-      AffineMap map = AffineMap::inferFromExprList({expr}).front();
-      SmallVector<Value> normalizedOperands(operands.begin(), operands.end());
-      mlir::fullyComposeAffineMapAndOperands(&map, &normalizedOperands);
-      canonicalizeMapAndOperands(&map, &normalizedOperands);
-      return builder.createOrFold<AffineApplyOp>(loc, map, normalizedOperands);
-    };
-
-    SmallVector<Value> sizeVals =
-        getValueOrCreateConstantIndexOp(b, loc, sizes);
-    SmallVector<Value> subShapeSizes =
-        llvm::to_vector(llvm::map_range(sizeVals, [&](Value v) {
-          return fullyComposeAffineMapAndOperands(b, loc, d0 - 1, v);
+    IRRewriter rewriter(b);
+    SmallVector<OpFoldResult> subShapeSizes =
+        llvm::to_vector(llvm::map_range(sizes, [&](OpFoldResult ofr) {
+          return makeComposedFoldedAffineApply(rewriter, loc, d0 - 1, ofr);
         }));
+
     OpOperand *outOperand = linalgOp.getOutputOperand(resultNumber);
     Value sliceOpResult =
-        makeTiledShape(b, loc, outOperand->get(), sizeVals,
-                       linalgOp.getTiedIndexingMap(outOperand),
-                       getValueOrCreateConstantIndexOp(b, loc, offsets),
+        makeTiledShape(b, loc, outOperand->get(), sizes,
+                       linalgOp.getTiedIndexingMap(outOperand), offsets,
                        /*ubs*/ {}, subShapeSizes, true);
     auto sliceOp = sliceOpResult.getDefiningOp<tensor::ExtractSliceOp>();
     if (!sliceOp)

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 416c72fe11ac9..e7dfa725aa67c 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -142,14 +142,18 @@ LinalgTilingOptions &mlir::linalg::LinalgTilingOptions::scalarizeDynamicDims() {
     if (!linalgOp)
       return tileSizes;
     Location loc = linalgOp.getLoc();
-    auto allShapeSizes = linalgOp.createFlatListOfOperandDims(b, loc);
+    SmallVector<OpFoldResult> allShapeSizes =
+        linalgOp.createFlatListOfOperandDims(b, loc);
     AffineMap map = linalgOp.getShapesToLoopsMap();
     if (!map)
       return tileSizes;
-    auto shapeSizes = applyMapToValues(b, loc, map, allShapeSizes);
+    IRRewriter rewriter(b);
+    SmallVector<OpFoldResult> shapeSizes =
+        makeComposedFoldedMultiResultAffineApply(rewriter, loc, map,
+                                                 allShapeSizes);
     // If the shape size is dynamic, tile by 1. Otherwise, do not tile (tile
     // size 0).
-    for (Value shapeSize : shapeSizes)
+    for (OpFoldResult shapeSize : shapeSizes)
       tileSizes.push_back(getConstantIntValue(shapeSize)
                               ? b.create<arith::ConstantIndexOp>(loc, 0)
                               : b.create<arith::ConstantIndexOp>(loc, 1));

diff  --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 59db977095d3b..4f14164bf26ca 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -42,8 +42,14 @@ using namespace presburger;
 using namespace mlir::linalg;
 using namespace mlir::scf;
 
-static bool isZero(Value v) {
-  if (auto cst = v.getDefiningOp<arith::ConstantIndexOp>())
+static bool isZero(OpFoldResult v) {
+  if (!v)
+    return false;
+  if (auto attr = v.dyn_cast<Attribute>()) {
+    IntegerAttr intAttr = attr.dyn_cast<IntegerAttr>();
+    return intAttr && intAttr.getValue().isZero();
+  }
+  if (auto cst = v.get<Value>().getDefiningOp<arith::ConstantIndexOp>())
     return cst.value() == 0;
   return false;
 }
@@ -59,7 +65,7 @@ namespace {
 //   `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
 //
 struct TileCheck : public AffineExprVisitor<TileCheck> {
-  TileCheck(ValueRange tileSizes) : tileSizes(tileSizes) {}
+  TileCheck(ArrayRef<OpFoldResult> tileSizes) : tileSizes(tileSizes) {}
 
   void visitDimExpr(AffineDimExpr expr) {
     isTiled |= !isZero(tileSizes[expr.getPosition()]);
@@ -72,12 +78,12 @@ struct TileCheck : public AffineExprVisitor<TileCheck> {
              "nonpositive multiplying coefficient");
   }
   bool isTiled = false;
-  ValueRange tileSizes;
+  ArrayRef<OpFoldResult> tileSizes;
 };
 
 } // namespace
 
-static bool isTiled(AffineExpr expr, ValueRange tileSizes) {
+static bool isTiled(AffineExpr expr, ArrayRef<OpFoldResult> tileSizes) {
   if (!expr)
     return false;
   TileCheck t(tileSizes);
@@ -86,7 +92,7 @@ static bool isTiled(AffineExpr expr, ValueRange tileSizes) {
 }
 
 // Checks whether the `map  varies with respect to a non-zero `tileSize`.
-static bool isTiled(AffineMap map, ValueRange tileSizes) {
+static bool isTiled(AffineMap map, ArrayRef<OpFoldResult> tileSizes) {
   if (!map)
     return false;
   for (unsigned r = 0; r < map.getNumResults(); ++r)
@@ -201,6 +207,14 @@ Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim) {
   llvm_unreachable("Expected MemRefType or TensorType");
 }
 
+OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value source,
+                               int64_t dim) {
+  auto shapedType = source.getType().cast<ShapedType>();
+  if (!shapedType.hasRank() || shapedType.isDynamicDim(dim))
+    return createOrFoldDimOp(b, loc, source, dim);
+  return b.getIndexAttr(shapedType.getDimSize(dim));
+}
+
 /// Given an operation, retrieves the value of each dynamic dimension through
 /// constructing the necessary DimOp operators.
 SmallVector<Value, 4> getDynOperands(Location loc, Value val, OpBuilder &b) {
@@ -788,18 +802,10 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
   assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
 }
 
-static Value fullyComposeAndAffineApply(OpBuilder &b, Location loc,
-                                        AffineExpr expr, ValueRange operands) {
-  AffineMap map = AffineMap::inferFromExprList({expr}).front();
-  SmallVector<Value> normalizedOperands(operands.begin(), operands.end());
-  mlir::fullyComposeAffineMapAndOperands(&map, &normalizedOperands);
-  canonicalizeMapAndOperands(&map, &normalizedOperands);
-  return b.createOrFold<AffineApplyOp>(loc, map, normalizedOperands);
-}
-
 Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
-                     ValueRange tileSizes, AffineMap map, ValueRange lbs,
-                     ValueRange ubs, ValueRange subShapeSizes,
+                     ArrayRef<OpFoldResult> tileSizes, AffineMap map,
+                     ArrayRef<OpFoldResult> lbs, ArrayRef<OpFoldResult> ubs,
+                     ArrayRef<OpFoldResult> subShapeSizes,
                      bool omitPartialTileCheck) {
   auto shapedType = valueToTile.getType().dyn_cast<ShapedType>();
   assert(shapedType && "only shaped types can be tiled");
@@ -815,8 +821,8 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
     LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: for dim#" << r);
     if (!isTiled(map.getSubMap({r}), tileSizes)) {
       offsets.push_back(builder.getIndexAttr(0));
-      Value dim = createOrFoldDimOp(builder, loc, valueToTile, r);
-      sizes.push_back(getAsOpFoldResult(dim));
+      OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r);
+      sizes.push_back(dim);
       strides.push_back(builder.getIndexAttr(1));
       LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
       continue;
@@ -827,14 +833,15 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
     // (i.e. the op does not subsample, stepping occurs in the loop).
     auto m = map.getSubMap({r});
     LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: submap: " << m << "\n");
-    auto offset = applyMapToValues(builder, loc, m, lbs).front();
-    offsets.push_back(getAsOpFoldResult(offset));
-    auto closedIntSize =
-        applyMapToValues(builder, loc, m, subShapeSizes).front();
+    IRRewriter rewriter(builder);
+    OpFoldResult offset = makeComposedFoldedAffineApply(rewriter, loc, m, lbs);
+    offsets.push_back(offset);
+    OpFoldResult closedIntSize =
+        makeComposedFoldedAffineApply(rewriter, loc, m, subShapeSizes);
     // Resulting size needs to be made half open interval again.
     AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
-    Value size =
-        fullyComposeAndAffineApply(builder, loc, s0 + 1, closedIntSize);
+    OpFoldResult size =
+        makeComposedFoldedAffineApply(rewriter, loc, s0 + 1, closedIntSize);
     LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: raw size: " << size << "\n");
     LLVM_DEBUG(llvm::dbgs()
                << "makeTiledShape: new offset: " << offset << "\n");
@@ -844,7 +851,7 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
       // We statically know that the partial/boundary tile condition is
       // unnecessary.
       LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
-      sizes.push_back(getAsOpFoldResult(size));
+      sizes.push_back(size);
       continue;
     }
 
@@ -854,10 +861,10 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
     // b. The subshape size is 1. According to the way the loops are set up,
     //    tensors with "0" dimensions would never be constructed.
     int64_t shapeSize = shape[r];
-    auto sizeCst = size.getDefiningOp<arith::ConstantIndexOp>();
-    auto hasTileSizeOne = sizeCst && sizeCst.value() == 1;
+    Optional<int64_t> sizeCst = getConstantIntValue(size);
+    auto hasTileSizeOne = sizeCst && *sizeCst == 1;
     auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
-                         ((shapeSize % sizeCst.value()) == 0);
+                         ((shapeSize % *sizeCst) == 0);
     if (!hasTileSizeOne && !dividesEvenly) {
       LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
                               << ", size: " << size
@@ -878,25 +885,25 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
       AffineMap plusOneMap =
           AffineMap::inferFromExprList({ArrayRef<AffineExpr>{dim0 + 1}})
               .front();
-      auto maxIndices = llvm::to_vector<8>(llvm::map_range(ubs, [&](Value ub) {
-        return makeComposedAffineApply(builder, loc, minusOneMap, {ub})
-            .getResult();
-      }));
-      Value maxIndex = applyMapToValues(builder, loc, m, maxIndices).front();
-      Value d = makeComposedAffineApply(builder, loc, plusOneMap, {maxIndex});
+      SmallVector<OpFoldResult> maxIndices =
+          llvm::to_vector(llvm::map_range(ubs, [&](OpFoldResult ub) {
+            return makeComposedFoldedAffineApply(rewriter, loc, minusOneMap,
+                                                 {ub});
+          }));
+      OpFoldResult maxIndex =
+          makeComposedFoldedAffineApply(rewriter, loc, m, maxIndices);
+      OpFoldResult d =
+          makeComposedFoldedAffineApply(rewriter, loc, plusOneMap, {maxIndex});
 
       // Compute min(dim - offset, size) to avoid out-of-bounds accesses.
       AffineMap minMap = AffineMap::inferFromExprList(
                              {ArrayRef<AffineExpr>{dim1 - dim2, dim0}})
                              .front();
-      SmallVector<Value, 4> operands{size, d, offset};
-      fullyComposeAffineMapAndOperands(&minMap, &operands);
-      canonicalizeMapAndOperands(&minMap, &operands);
-      size = builder.create<AffineMinOp>(loc, builder.getIndexType(), minMap,
-                                         operands);
+      size =
+          makeComposedFoldedAffineMin(rewriter, loc, minMap, {size, d, offset});
     }
     LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
-    sizes.push_back(getAsOpFoldResult(size));
+    sizes.push_back(size);
   }
 
   auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
@@ -914,31 +921,31 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
   return sliceOp->getResult(0);
 }
 
-SmallVector<Value> computeTileOffsets(OpBuilder &b, Location loc,
-                                      ValueRange ivs, ValueRange tileSizes) {
-  SmallVector<Value> offsets;
+SmallVector<OpFoldResult> computeTileOffsets(OpBuilder &b, Location loc,
+                                             ArrayRef<OpFoldResult> ivs,
+                                             ArrayRef<OpFoldResult> tileSizes) {
+  SmallVector<OpFoldResult> offsets;
   for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
     LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
     bool isTiled = !isZero(tileSizes[idx]);
-    offsets.push_back(
-        isTiled ? ivs[idxIvs++]
-                : b.create<arith::ConstantIndexOp>(loc, 0).getResult());
+    offsets.push_back(isTiled ? ivs[idxIvs++] : b.getIndexAttr(0));
     LLVM_DEBUG(llvm::dbgs()
                << "computeTileOffsets: " << offsets.back() << "\n");
   }
   return offsets;
 }
 
-SmallVector<Value> computeTileSizes(OpBuilder &b, Location loc,
-                                    ValueRange tileSizes,
-                                    ArrayRef<Value> sizeBounds) {
-  SmallVector<Value> sizes;
+SmallVector<OpFoldResult> computeTileSizes(OpBuilder &b, Location loc,
+                                           ArrayRef<OpFoldResult> tileSizes,
+                                           ArrayRef<OpFoldResult> sizeBounds) {
+  SmallVector<OpFoldResult> sizes;
   for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
     bool isTiled = !isZero(tileSizes[idx]);
     // Before composing, we need to make range a closed interval.
-    Value size = isTiled ? tileSizes[idx] : sizeBounds[idx];
+    OpFoldResult size = isTiled ? tileSizes[idx] : sizeBounds[idx];
     AffineExpr d0 = getAffineDimExpr(0, b.getContext());
-    sizes.push_back(fullyComposeAndAffineApply(b, loc, d0 - 1, size));
+    IRRewriter rewriter(b);
+    sizes.push_back(makeComposedFoldedAffineApply(rewriter, loc, d0 - 1, size));
     LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");
   }
   return sizes;
@@ -981,6 +988,9 @@ SmallVector<Value> insertSlicesBack(OpBuilder &builder, Location loc,
 
 Value materializeOpFoldResult(ImplicitLocOpBuilder &builder,
                               OpFoldResult opFoldResult) {
+  if (!opFoldResult)
+    return nullptr;
+
   if (auto value = opFoldResult.dyn_cast<Value>())
     return value;
   auto attr = opFoldResult.get<Attribute>().cast<IntegerAttr>();
@@ -993,27 +1003,27 @@ Value materializeOpFoldResult(OpBuilder &builder, Location loc,
   return materializeOpFoldResult(b, opFoldResult);
 }
 
-SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
-                                      LinalgOp linalgOp,
-                                      ArrayRef<Value> valuesToTile,
-                                      ValueRange ivs, ValueRange tileSizes,
-                                      ArrayRef<Value> sizeBounds,
-                                      bool omitPartialTileCheck) {
+SmallVector<Value> makeTiledShapes(OpBuilder &b, Location loc,
+                                   LinalgOp linalgOp, ValueRange valuesToTile,
+                                   ArrayRef<OpFoldResult> ivs,
+                                   ArrayRef<OpFoldResult> tileSizes,
+                                   ArrayRef<OpFoldResult> sizeBounds,
+                                   bool omitPartialTileCheck) {
   assert(ivs.size() == static_cast<size_t>(llvm::count_if(
                            llvm::make_range(tileSizes.begin(), tileSizes.end()),
-                           [](Value v) { return !isZero(v); })) &&
+                           [](OpFoldResult v) { return !isZero(v); })) &&
          "expected as many ivs as non-zero sizes");
 
   // Construct (potentially temporary) mins and maxes on which to apply maps
   // that define tile subshapes.
-  SmallVector<Value> lbs = computeTileOffsets(b, loc, ivs, tileSizes);
-  SmallVector<Value> subShapeSizes =
+  SmallVector<OpFoldResult> lbs = computeTileOffsets(b, loc, ivs, tileSizes);
+  SmallVector<OpFoldResult> subShapeSizes =
       computeTileSizes(b, loc, tileSizes, sizeBounds);
 
   assert(static_cast<int64_t>(valuesToTile.size()) ==
              linalgOp.getNumInputsAndOutputs() &&
          "expected one value to tile for every operand");
-  SmallVector<Value, 4> tiledShapes;
+  SmallVector<Value> tiledShapes;
   tiledShapes.reserve(valuesToTile.size());
   for (OpOperand *opOperand : linalgOp.getInputAndOutputOperands()) {
     Value shapedOp = valuesToTile[opOperand->getOperandNumber()];
@@ -1040,28 +1050,30 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
   return tiledShapes;
 }
 
-void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef<Value> offsets) {
+void offsetIndices(OpBuilder &b, LinalgOp linalgOp,
+                   ArrayRef<OpFoldResult> offsets) {
   IRRewriter rewriter(b);
   offsetIndices(rewriter, linalgOp, offsets);
 }
 
 void offsetIndices(RewriterBase &b, LinalgOp linalgOp,
-                   ArrayRef<Value> offsets) {
+                   ArrayRef<OpFoldResult> offsets) {
   if (!linalgOp.hasIndexSemantics())
     return;
 
   for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
-    if (indexOp.dim() >= offsets.size() || offsets[indexOp.dim()] == nullptr)
+    if (indexOp.dim() >= offsets.size() || !offsets[indexOp.dim()])
       continue;
     OpBuilder::InsertionGuard guard(b);
     b.setInsertionPointAfter(indexOp);
     AffineExpr index, offset;
     bindDims(b.getContext(), index, offset);
-    AffineApplyOp applyOp = makeComposedAffineApply(
+    OpFoldResult applied = makeComposedFoldedAffineApply(
         b, indexOp.getLoc(), index + offset,
-        ValueRange{indexOp.getResult(), offsets[indexOp.dim()]});
-    b.replaceOpWithIf(indexOp, applyOp.getResult(), [&](OpOperand &use) {
-      return use.getOwner() != applyOp;
+        {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.dim()]});
+    Value materialized = materializeOpFoldResult(b, indexOp.getLoc(), applied);
+    b.replaceOpWithIf(indexOp, materialized, [&](OpOperand &use) {
+      return use.getOwner() != materialized.getDefiningOp();
     });
   }
 }

diff  --git a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp
index c650f3f5af295..db59141d57da2 100644
--- a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp
+++ b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp
@@ -52,6 +52,8 @@ SmallVector<int64_t, 4> extractFromI64ArrayAttr(Attribute attr) {
 /// Given a value, try to extract a constant Attribute. If this fails, return
 /// the original value.
 OpFoldResult getAsOpFoldResult(Value val) {
+  if (!val)
+    return OpFoldResult();
   Attribute attr;
   if (matchPattern(val, m_Constant(&attr)))
     return attr;

diff  --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
index d5c18a49944d4..3667176adb216 100644
--- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
+++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
@@ -65,7 +65,7 @@ func.func @remove_dim_result_uses
   return %3, %4 : index, index
 }
 //       CHECK: #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 + s1)>
-//       CHECK: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s1 - s0)>
+//       CHECK: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (-s0 + s1)>
 //       CHECK: func @remove_dim_result_uses
 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>

diff  --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
index 6c0bbc3c1cf95..3509b566ef961 100644
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -196,7 +196,7 @@ func.func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?
 // CHECK: #[[BOUND8_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s1, -d0 + s0, 8)>
 // CHECK: #[[BOUND16_MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 16)>
 // CHECK: #[[X2_MAP:.+]] = affine_map<(d0) -> (d0 * 2)>
-// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d1 * -2 + s0 + s1 * 2 - 2, d0 * 2 + s0 - 2)>
+// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * -2 + s0 * 2 + s1 - 2, d1 * 2 + s1 - 2)>
 // CHECK: #[[BOUND16_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s1, -d0 + s0, 16)>
 // CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
 // CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
@@ -234,13 +234,13 @@ func.func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?
 // CHECK-NEXT:     scf.for %[[IV1:.+]] = %{{.+}} to %[[ELEM_OH]]
 // CHECK-NEXT:       %[[SIZE_ELEM_OH:.+]] = affine.min #[[BOUND16_MAP]](%[[IV1]])[%[[ELEM_OH]]]
 // CHECK-NEXT:       %[[OFFSET_OH:.+]] = affine.apply #[[X2_MAP]](%[[IV1]])
-// CHECK-NEXT:       %[[SIZE_INPUT_H:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OH]], %[[IV1]])[%[[FILTER_H]], %[[FILL_H]]]
+// CHECK-NEXT:       %[[SIZE_INPUT_H:.+]] = affine.min #[[INPUT_BOUND]](%[[IV1]], %[[SIZE_ELEM_OH]])[%[[FILL_H]], %[[FILTER_H]]]
 // CHECK-NEXT:       %[[SIZE_ELEM_OH_2:.+]] = affine.min #[[BOUND16_MAP_2]](%[[IV1]])[%[[FILL_H]], %[[ELEM_OH]]]
 // CHECK-NEXT:       scf.for %[[IV2:.+]] = %{{.+}} to %[[ELEM_OW]]
 // CHECK-NEXT:         %[[SIZE_ELEM_OW:.+]] = affine.min #[[BOUND4_MAP]](%[[IV2]])[%[[ELEM_OW]]]
 // CHECK-NEXT:         %[[SIZE_ELEM_OC:.+]] = affine.min #[[BOUND2_MAP]](%[[IV2]])[%[[ELEM_OC]]]
 // CHECK-NEXT:         %[[OFFSET_OW:.+]] = affine.apply #[[X2_MAP]](%[[IV2]])
-// CHECK-NEXT:         %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OW]], %[[IV2]])[%[[FILTER_W]], %[[FILL_W]]]
+// CHECK-NEXT:         %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[IV2]], %[[SIZE_ELEM_OW]])[%[[FILL_W]], %[[FILTER_W]]]
 // CHECK-NEXT:         %[[ST_INPUT:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0]
 // CHECK-SAME:               [%[[SIZE_INPUT_N]], %[[SIZE_INPUT_H]], %[[SIZE_INPUT_W]], %[[INPUT_C]]]
 // CHECK-NEXT:         %[[SIZE_ELEM_OW_2:.+]] = affine.min #[[BOUND4_MAP_2]](%[[IV2]])[%[[FILL_W]], %[[ELEM_OW]]]

diff  --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir
index b76d6f0d164f5..77518acf1fc28 100644
--- a/mlir/test/Dialect/Linalg/tile-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-conv.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3" | FileCheck %s
 
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s0 + 1)>
-//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s0 + 2)>
+//  CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 1)>
+//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 2)>
 //  CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
 //  CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 3)>
 
@@ -24,8 +24,8 @@ func.func @conv(%arg0 : memref<?x?xf32>, %arg1 : memref<?x?xf32>, %arg2 : memref
 //   CHECK-DAG:   %[[T3:.*]] = memref.dim %[[ARG2]], %[[C1]]
 //       CHECK:   scf.for %[[ARG3:.*]] = %[[C0]] to %[[T2]] step %[[C2]]
 //       CHECK:     scf.for %[[ARG4:.*]] = %[[C0]] to %[[T3]] step %[[C3]]
-//       CHECK:       %[[T4:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T0]], %[[T2]]]
-//       CHECK:       %[[T5:.*]] = affine.min #[[MAP1]](%[[ARG4]])[%[[T1]], %[[T3]]]
+//       CHECK:       %[[T4:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T2]], %[[T0]]]
+//       CHECK:       %[[T5:.*]] = affine.min #[[MAP1]](%[[ARG4]])[%[[T3]], %[[T1]]]
 //       CHECK:       %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]] [%[[T4]], %[[T5]]]
 //       CHECK:       %[[T6:.*]] = affine.min #[[MAP2]](%[[ARG3]])[%[[T2]]
 //       CHECK:       %[[T7:.*]] = affine.min #[[MAP3]](%[[ARG4]])[%[[T3]]]

diff  --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
index 34c994d329628..7c062294ef525 100644
--- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
@@ -61,15 +61,14 @@ func.func @interchange_reduction(%input: tensor<12x7x25xf32>) -> tensor<12x25xf3
   %five = arith.constant 5.0 : f32
   %init = linalg.init_tensor [12, 25] : tensor<12x25xf32>
 
-//   CHECK-DAG:  %[[C4:.+]] = arith.constant 4 : index
-//   CHECK-DAG:  %[[C5:.+]] = arith.constant 5 : index
-//   CHECK-DAG:  %[[C7:.+]] = arith.constant 7 : index
-
 //       CHECK: %[[INIT:.+]] = linalg.init_tensor [12, 25]
+//   CHECK-DAG: %[[C5:.+]] = arith.constant 5 : index
+//   CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index
 //       CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %[[C5]] iter_args(%[[FOR_ARG0:.+]] = %[[INIT]])
 //       CHECK:   scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]])
 //       CHECK:     %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]]
 //       CHECK:     %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE0]] : tensor<?x?xf32>)
+//       CHECK:     %[[C4:.+]] = arith.constant 4 : index
 //       CHECK:     scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]])
 //       CHECK:       %[[IN_SLICE:.+]] = tensor.extract_slice %[[INPUT]]
 //       CHECK:       %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0]