[Mlir-commits] [mlir] 0b20413 - Revert "[Canonicalizer] Process regions top-down instead of bottom up & reuse existing constants."

Thu Mar 25 09:51:22 PDT 2021

Author: Uday Bondhugula
Date: 2021-03-25T22:17:26+05:30
New Revision: 0b20413ef6717d914a8b8a9ece86d8eae27c221f

URL: https://github.com/llvm/llvm-project/commit/0b20413ef6717d914a8b8a9ece86d8eae27c221f
DIFF: https://github.com/llvm/llvm-project/commit/0b20413ef6717d914a8b8a9ece86d8eae27c221f.diff

LOG: Revert "[Canonicalizer] Process regions top-down instead of bottom up & reuse existing constants."

This reverts commit 361b7d125b438cda13fa45f13790767a62252be9 by Chris
Lattner <clattner at nondot.org> dated Fri Mar 19 21:22:15 2021 -0700.

The change to the greedy rewriter driver picking a different order was
made without adequate analysis of the trade-offs and experimentation. A
change like this has far reaching consequences on transformation
pipelines, and a major impact upstream and downstream. For eg., one
can’t be sure that it doesn’t slow down a large number of cases by small
amounts or create other issues. More discussion here:
https://llvm.discourse.group/t/speeding-up-canonicalize/3015/25

Reverting this so that improvements to the traversal order can be made
on a clean slate, in bigger steps, and higher bar.

Differential Revision: https://reviews.llvm.org/D99329

Added: 
    

Modified: 
    mlir/include/mlir/Transforms/FoldUtils.h
    mlir/include/mlir/Transforms/GreedyPatternRewriteDriver.h
    mlir/lib/Transforms/Utils/FoldUtils.cpp
    mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
    mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
    mlir/test/Dialect/Affine/canonicalize.mlir
    mlir/test/Dialect/Linalg/transform-patterns.mlir
    mlir/test/Dialect/Vector/canonicalize.mlir
    mlir/test/Transforms/canonicalize.mlir
    mlir/test/mlir-tblgen/pattern.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Transforms/FoldUtils.h b/mlir/include/mlir/Transforms/FoldUtils.h
index c31ac15eb9c9..7f4166c12ded 100644

--- a/mlir/include/mlir/Transforms/FoldUtils.h
+++ b/mlir/include/mlir/Transforms/FoldUtils.h
@@ -33,11 +33,6 @@ class OperationFolder {
 public:
   OperationFolder(MLIRContext *ctx) : interfaces(ctx) {}
 
-  /// Scan the specified region for constants that can be used in folding,
-  /// moving them to the entry block and adding them to our known-constants
-  /// table.
-  void processExistingConstants(Region &region);
-
   /// Tries to perform folding on the given `op`, including unifying
   /// deduplicated constants. If successful, replaces `op`'s uses with
   /// folded results, and returns success. `preReplaceAction` is invoked on `op`

diff  --git a/mlir/include/mlir/Transforms/GreedyPatternRewriteDriver.h b/mlir/include/mlir/Transforms/GreedyPatternRewriteDriver.h
index cbbe5c10948d..3a76fbd3e0b0 100644
--- a/mlir/include/mlir/Transforms/GreedyPatternRewriteDriver.h
+++ b/mlir/include/mlir/Transforms/GreedyPatternRewriteDriver.h
@@ -35,26 +35,26 @@ namespace mlir {
 ///       before attempting to match any of the provided patterns.
 LogicalResult
 applyPatternsAndFoldGreedily(Operation *op,
-                             const FrozenRewritePatternSet &patterns,
-                             bool useTopDownTraversal = true);
+                             const FrozenRewritePatternSet &patterns);
 
 /// Rewrite the regions of the specified operation, with a user-provided limit
 /// on iterations to attempt before reaching convergence.
-LogicalResult applyPatternsAndFoldGreedily(
-    Operation *op, const FrozenRewritePatternSet &patterns,
-    unsigned maxIterations, bool useTopDownTraversal = true);
+LogicalResult
+applyPatternsAndFoldGreedily(Operation *op,
+                             const FrozenRewritePatternSet &patterns,
+                             unsigned maxIterations);
 
 /// Rewrite the given regions, which must be isolated from above.
 LogicalResult
 applyPatternsAndFoldGreedily(MutableArrayRef<Region> regions,
-                             const FrozenRewritePatternSet &patterns,
-                             bool useTopDownTraversal = true);
+                             const FrozenRewritePatternSet &patterns);
 
 /// Rewrite the given regions, with a user-provided limit on iterations to
 /// attempt before reaching convergence.
-LogicalResult applyPatternsAndFoldGreedily(
-    MutableArrayRef<Region> regions, const FrozenRewritePatternSet &patterns,
-    unsigned maxIterations, bool useTopDownTraversal = true);
+LogicalResult
+applyPatternsAndFoldGreedily(MutableArrayRef<Region> regions,
+                             const FrozenRewritePatternSet &patterns,
+                             unsigned maxIterations);
 
 /// Applies the specified patterns on `op` alone while also trying to fold it,
 /// by selecting the highest benefits patterns in a greedy manner. Returns

diff  --git a/mlir/lib/Transforms/Utils/FoldUtils.cpp b/mlir/lib/Transforms/Utils/FoldUtils.cpp
index 616a6ef6af57..024ae1892861 100644
--- a/mlir/lib/Transforms/Utils/FoldUtils.cpp
+++ b/mlir/lib/Transforms/Utils/FoldUtils.cpp
@@ -84,81 +84,6 @@ static Operation *materializeConstant(Dialect *dialect, OpBuilder &builder,
 // OperationFolder
 //===----------------------------------------------------------------------===//
 
-/// Scan the specified region for constants that can be used in folding,
-/// moving them to the entry block and adding them to our known-constants
-/// table.
-void OperationFolder::processExistingConstants(Region &region) {
-  if (region.empty())
-    return;
-
-  // March the constant insertion point forward, moving all constants to the
-  // top of the block, but keeping them in their order of discovery.
-  Region *insertRegion = getInsertionRegion(interfaces, &region.front());
-  auto &uniquedConstants = foldScopes[insertRegion];
-
-  Block &insertBlock = insertRegion->front();
-  Block::iterator constantIterator = insertBlock.begin();
-
-  // Process each constant that we discover in this region.
-  auto processConstant = [&](Operation *op, Attribute value) {
-    // Check to see if we already have an instance of this constant.
-    Operation *&constOp = uniquedConstants[std::make_tuple(
-        op->getDialect(), value, op->getResult(0).getType())];
-
-    // If we already have an instance of this constant, CSE/delete this one as
-    // we go.
-    if (constOp) {
-      if (constantIterator == Block::iterator(op))
-        ++constantIterator; // Don't invalidate our iterator when scanning.
-      op->getResult(0).replaceAllUsesWith(constOp->getResult(0));
-      op->erase();
-      return;
-    }
-
-    // Otherwise, remember that we have this constant.
-    constOp = op;
-    referencedDialects[op].push_back(op->getDialect());
-
-    // If the constant isn't already at the insertion point then move it up.
-    if (constantIterator == insertBlock.end() || &*constantIterator != op)
-      op->moveBefore(&insertBlock, constantIterator);
-    else
-      ++constantIterator; // It was pointing at the constant.
-  };
-
-  SmallVector<Operation *> isolatedOps;
-  region.walk<WalkOrder::PreOrder>([&](Operation *op) {
-    // If this is a constant, process it.
-    Attribute value;
-    if (matchPattern(op, m_Constant(&value))) {
-      processConstant(op, value);
-      // We may have deleted the operation, don't check it for regions.
-      return WalkResult::skip();
-    }
-
-    // If the operation has regions and is isolated, don't recurse into it.
-    if (op->getNumRegions() != 0) {
-      auto hasDifferentInsertRegion = [&](Region &region) {
-        return !region.empty() &&
-               getInsertionRegion(interfaces, &region.front()) != insertRegion;
-      };
-      if (llvm::any_of(op->getRegions(), hasDifferentInsertRegion)) {
-        isolatedOps.push_back(op);
-        return WalkResult::skip();
-      }
-    }
-
-    // Otherwise keep going.
-    return WalkResult::advance();
-  });
-
-  // Process regions in any isolated ops separately.
-  for (Operation *isolated : isolatedOps) {
-    for (Region &region : isolated->getRegions())
-      processExistingConstants(region);
-  }
-}
-
 LogicalResult OperationFolder::tryToFold(
     Operation *op, function_ref<void(Operation *)> processGeneratedConstants,
     function_ref<void(Operation *)> preReplaceAction, bool *inPlaceUpdate) {
@@ -337,19 +262,19 @@ Operation *OperationFolder::tryGetOrCreateConstant(
     Attribute value, Type type, Location loc) {
   // Check if an existing mapping already exists.
   auto constKey = std::make_tuple(dialect, value, type);
-  auto *&constOp = uniquedConstants[constKey];
-  if (constOp)
-    return constOp;
+  auto *&constInst = uniquedConstants[constKey];
+  if (constInst)
+    return constInst;
 
   // If one doesn't exist, try to materialize one.
-  if (!(constOp = materializeConstant(dialect, builder, value, type, loc)))
+  if (!(constInst = materializeConstant(dialect, builder, value, type, loc)))
     return nullptr;
 
   // Check to see if the generated constant is in the expected dialect.
-  auto *newDialect = constOp->getDialect();
+  auto *newDialect = constInst->getDialect();
   if (newDialect == dialect) {
-    referencedDialects[constOp].push_back(dialect);
-    return constOp;
+    referencedDialects[constInst].push_back(dialect);
+    return constInst;
   }
 
   // If it isn't, then we also need to make sure that the mapping for the new
@@ -359,13 +284,13 @@ Operation *OperationFolder::tryGetOrCreateConstant(
   // If an existing operation in the new dialect already exists, delete the
   // materialized operation in favor of the existing one.
   if (auto *existingOp = uniquedConstants.lookup(newKey)) {
-    constOp->erase();
+    constInst->erase();
     referencedDialects[existingOp].push_back(dialect);
-    return constOp = existingOp;
+    return constInst = existingOp;
   }
 
   // Otherwise, update the new dialect to the materialized operation.
-  referencedDialects[constOp].assign({dialect, newDialect});
-  auto newIt = uniquedConstants.insert({newKey, constOp});
+  referencedDialects[constInst].assign({dialect, newDialect});
+  auto newIt = uniquedConstants.insert({newKey, constInst});
   return newIt.first->second;
 }

diff  --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
index f28f228737a8..c82076bde082 100644
--- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
+++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
@@ -37,10 +37,8 @@ namespace {
 class GreedyPatternRewriteDriver : public PatternRewriter {
 public:
   explicit GreedyPatternRewriteDriver(MLIRContext *ctx,
-                                      const FrozenRewritePatternSet &patterns,
-                                      bool useTopDownTraversal)
-      : PatternRewriter(ctx), matcher(patterns), folder(ctx),
-        useTopDownTraversal(useTopDownTraversal) {
+                                      const FrozenRewritePatternSet &patterns)
+      : PatternRewriter(ctx), matcher(patterns), folder(ctx) {
     worklist.reserve(64);
 
     // Apply a simple cost model based solely on pattern benefit.
@@ -136,9 +134,6 @@ class GreedyPatternRewriteDriver : public PatternRewriter {
 
   /// Non-pattern based folder for operations.
   OperationFolder folder;
-
-  // Whether to use top-down or bottom-up traversal order.
-  bool useTopDownTraversal;
 };
 } // end anonymous namespace
 
@@ -146,31 +141,15 @@ class GreedyPatternRewriteDriver : public PatternRewriter {
 /// if the rewrite converges in `maxIterations`.
 bool GreedyPatternRewriteDriver::simplify(MutableArrayRef<Region> regions,
                                           int maxIterations) {
-  // Perform a prepass over the IR to discover constants.
-  for (auto &region : regions)
-    folder.processExistingConstants(region);
+  // Add the given operation to the worklist.
+  auto collectOps = [this](Operation *op) { addToWorklist(op); };
 
   bool changed = false;
-  int iteration = 0;
+  int i = 0;
   do {
-    worklist.clear();
-    worklistMap.clear();
-
-    // Add all nested operations to the worklist in preorder.
+    // Add all nested operations to the worklist.
     for (auto &region : regions)
-      if (useTopDownTraversal)
-        region.walk<WalkOrder::PreOrder>(
-            [this](Operation *op) { worklist.push_back(op); });
-      else
-        region.walk([this](Operation *op) { addToWorklist(op); });
-
-    if (useTopDownTraversal) {
-      // Reverse the list so our pop-back loop processes them in-order.
-      std::reverse(worklist.begin(), worklist.end());
-      // Remember the reverse index.
-      for (unsigned i = 0, e = worklist.size(); i != e; ++i)
-        worklistMap[worklist[i]] = i;
-    }
+      region.walk(collectOps);
 
     // These are scratch vectors used in the folding loop below.
     SmallVector<Value, 8> originalOperands, resultValues;
@@ -208,9 +187,6 @@ bool GreedyPatternRewriteDriver::simplify(MutableArrayRef<Region> regions,
         notifyOperationRemoved(op);
       };
 
-      // Add the given operation to the worklist.
-      auto collectOps = [this](Operation *op) { addToWorklist(op); };
-
       // Try to fold this op.
       bool inPlaceUpdate;
       if ((succeeded(folder.tryToFold(op, collectOps, preReplaceAction,
@@ -228,8 +204,7 @@ bool GreedyPatternRewriteDriver::simplify(MutableArrayRef<Region> regions,
     // After applying patterns, make sure that the CFG of each of the regions is
     // kept up to date.
     changed |= succeeded(simplifyRegions(*this, regions));
-  } while (changed && ++iteration < maxIterations);
-
+  } while (changed && ++i < maxIterations);
   // Whether the rewrite converges, i.e. wasn't changed in the last iteration.
   return !changed;
 }
@@ -242,28 +217,27 @@ bool GreedyPatternRewriteDriver::simplify(MutableArrayRef<Region> regions,
 ///
 LogicalResult
 mlir::applyPatternsAndFoldGreedily(Operation *op,
-                                   const FrozenRewritePatternSet &patterns,
-                                   bool useTopDownTraversal) {
-  return applyPatternsAndFoldGreedily(op, patterns, maxPatternMatchIterations,
-                                      useTopDownTraversal);
+                                   const FrozenRewritePatternSet &patterns) {
+  return applyPatternsAndFoldGreedily(op, patterns, maxPatternMatchIterations);
 }
-LogicalResult mlir::applyPatternsAndFoldGreedily(
-    Operation *op, const FrozenRewritePatternSet &patterns,
-    unsigned maxIterations, bool useTopDownTraversal) {
-  return applyPatternsAndFoldGreedily(op->getRegions(), patterns, maxIterations,
-                                      useTopDownTraversal);
+LogicalResult
+mlir::applyPatternsAndFoldGreedily(Operation *op,
+                                   const FrozenRewritePatternSet &patterns,
+                                   unsigned maxIterations) {
+  return applyPatternsAndFoldGreedily(op->getRegions(), patterns,
+                                      maxIterations);
 }
 /// Rewrite the given regions, which must be isolated from above.
 LogicalResult
 mlir::applyPatternsAndFoldGreedily(MutableArrayRef<Region> regions,
-                                   const FrozenRewritePatternSet &patterns,
-                                   bool useTopDownTraversal) {
-  return applyPatternsAndFoldGreedily(
-      regions, patterns, maxPatternMatchIterations, useTopDownTraversal);
+                                   const FrozenRewritePatternSet &patterns) {
+  return applyPatternsAndFoldGreedily(regions, patterns,
+                                      maxPatternMatchIterations);
 }
-LogicalResult mlir::applyPatternsAndFoldGreedily(
-    MutableArrayRef<Region> regions, const FrozenRewritePatternSet &patterns,
-    unsigned maxIterations, bool useTopDownTraversal) {
+LogicalResult
+mlir::applyPatternsAndFoldGreedily(MutableArrayRef<Region> regions,
+                                   const FrozenRewritePatternSet &patterns,
+                                   unsigned maxIterations) {
   if (regions.empty())
     return success();
 
@@ -278,8 +252,7 @@ LogicalResult mlir::applyPatternsAndFoldGreedily(
          "patterns can only be applied to operations IsolatedFromAbove");
 
   // Start the pattern driver.
-  GreedyPatternRewriteDriver driver(regions[0].getContext(), patterns,
-                                    useTopDownTraversal);
+  GreedyPatternRewriteDriver driver(regions[0].getContext(), patterns);
   bool converged = driver.simplify(regions, maxIterations);
   LLVM_DEBUG(if (!converged) {
     llvm::dbgs() << "The pattern rewrite doesn't converge after scanning "

diff  --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
index 47896c010433..1ebacc8ef274 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
@@ -204,13 +204,12 @@ func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x
   // CHECK-DAG: %[[C0:.*]] = constant 0 : index
   // CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32>
   // CHECK-DAG: %[[alloc:.*]] = memref.alloca() : memref<3xvector<15xf32>>
-  // CHECK-DAG: [[CST:%.*]] = constant 7.000000e+00 : f32
   // CHECK-DAG: %[[dim:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
   // CHECK: affine.for %[[I:.*]] = 0 to 3 {
   // CHECK:   %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
   // CHECK:   %[[cond1:.*]] = cmpi slt, %[[add]], %[[dim]] : index
   // CHECK:   scf.if %[[cond1]] {
-  // CHECK:     %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], [[CST]] : memref<?x?xf32>, vector<15xf32>
+  // CHECK:     %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %cst : memref<?x?xf32>, vector<15xf32>
   // CHECK:     store %[[vec_1d]], %[[alloc]][%[[I]]] : memref<3xvector<15xf32>>
   // CHECK:   } else {
   // CHECK:     store %[[splat]], %[[alloc]][%[[I]]] : memref<3xvector<15xf32>>
@@ -218,14 +217,13 @@ func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x
   // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
   // CHECK: %[[cst:.*]] = memref.load %[[vmemref]][] : memref<vector<3x15xf32>>
 
-  // FULL-UNROLL-DAG: %[[VEC0:.*]] = constant dense<7.000000e+00> : vector<3x15xf32>
-  // FULL-UNROLL-DAG: %[[C0:.*]] = constant 0 : index
-  // FULL-UNROLL-DAG: %[[SPLAT:.*]] = constant dense<7.000000e+00> : vector<15xf32>
-  // FULL-UNROLL-DAG: [[CST:%.*]] = constant 7.000000e+00 : f32
+  // FULL-UNROLL: %[[VEC0:.*]] = constant dense<7.000000e+00> : vector<3x15xf32>
+  // FULL-UNROLL: %[[C0:.*]] = constant 0 : index
+  // FULL-UNROLL: %[[SPLAT:.*]] = constant dense<7.000000e+00> : vector<15xf32>
   // FULL-UNROLL: %[[DIM:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
   // FULL-UNROLL: cmpi slt, %[[base]], %[[DIM]] : index
   // FULL-UNROLL: %[[VEC1:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
-  // FULL-UNROLL:   vector.transfer_read %[[A]][%[[base]], %[[base]]], [[CST]] : memref<?x?xf32>, vector<15xf32>
+  // FULL-UNROLL:   vector.transfer_read %[[A]][%[[base]], %[[base]]], %cst : memref<?x?xf32>, vector<15xf32>
   // FULL-UNROLL:   vector.insert %{{.*}}, %[[VEC0]] [0] : vector<15xf32> into vector<3x15xf32>
   // FULL-UNROLL:   scf.yield %{{.*}} : vector<3x15xf32>
   // FULL-UNROLL: } else {
@@ -235,7 +233,7 @@ func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x
   // FULL-UNROLL: affine.apply #[[$MAP1]]()[%[[base]]]
   // FULL-UNROLL: cmpi slt, %{{.*}}, %[[DIM]] : index
   // FULL-UNROLL: %[[VEC2:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
-  // FULL-UNROLL:   vector.transfer_read %[[A]][%{{.*}}, %[[base]]], [[CST]] : memref<?x?xf32>, vector<15xf32>
+  // FULL-UNROLL:   vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %cst : memref<?x?xf32>, vector<15xf32>
   // FULL-UNROLL:   vector.insert %{{.*}}, %[[VEC1]] [1] : vector<15xf32> into vector<3x15xf32>
   // FULL-UNROLL:   scf.yield %{{.*}} : vector<3x15xf32>
   // FULL-UNROLL: } else {
@@ -245,7 +243,7 @@ func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x
   // FULL-UNROLL: affine.apply #[[$MAP2]]()[%[[base]]]
   // FULL-UNROLL: cmpi slt, %{{.*}}, %[[DIM]] : index
   // FULL-UNROLL: %[[VEC3:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
-  // FULL-UNROLL:   vector.transfer_read %[[A]][%{{.*}}, %[[base]]], [[CST]] : memref<?x?xf32>, vector<15xf32>
+  // FULL-UNROLL:   vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %cst : memref<?x?xf32>, vector<15xf32>
   // FULL-UNROLL:   vector.insert %{{.*}}, %[[VEC2]] [2] : vector<15xf32> into vector<3x15xf32>
   // FULL-UNROLL:   scf.yield %{{.*}} : vector<3x15xf32>
   // FULL-UNROLL: } else {
@@ -379,16 +377,16 @@ func @transfer_read_minor_identity(%A : memref<?x?x?x?xf32>) -> vector<3x3xf32>
 
 // CHECK-LABEL: transfer_read_minor_identity(
 //  CHECK-SAME:   %[[A:.*]]: memref<?x?x?x?xf32>) -> vector<3x3xf32>
+//  CHECK-DAG:   %[[c0:.*]] = constant 0 : index
+//  CHECK-DAG:   %cst = constant 0.000000e+00 : f32
 //       CHECK-DAG:   %[[c2:.*]] = constant 2 : index
 //       CHECK-DAG:   %[[cst0:.*]] = constant dense<0.000000e+00> : vector<3xf32>
 //       CHECK:   %[[m:.*]] = memref.alloca() : memref<3xvector<3xf32>>
-//       CHECK-DAG:   %[[cst:.*]] = constant 0.000000e+00 : f32
-//       CHECK-DAG:   %[[c0:.*]] = constant 0 : index
 //       CHECK:   %[[d:.*]] = memref.dim %[[A]], %[[c2]] : memref<?x?x?x?xf32>
 //       CHECK:   affine.for %[[arg1:.*]] = 0 to 3 {
 //       CHECK:      %[[cmp:.*]] = cmpi slt, %[[arg1]], %[[d]] : index
 //       CHECK:      scf.if %[[cmp]] {
-//       CHECK:        %[[tr:.*]] = vector.transfer_read %[[A]][%c0, %c0, %[[arg1]], %c0], %[[cst]] : memref<?x?x?x?xf32>, vector<3xf32>
+//       CHECK:        %[[tr:.*]] = vector.transfer_read %[[A]][%c0, %c0, %[[arg1]], %c0], %cst : memref<?x?x?x?xf32>, vector<3xf32>
 //       CHECK:        store %[[tr]], %[[m]][%[[arg1]]] : memref<3xvector<3xf32>>
 //       CHECK:      } else {
 //       CHECK:        store %[[cst0]], %[[m]][%[[arg1]]] : memref<3xvector<3xf32>>
@@ -411,8 +409,8 @@ func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref<?x?x?x?xf3
 //  CHECK-SAME:   %[[A:.*]]: vector<3x3xf32>,
 //  CHECK-SAME:   %[[B:.*]]: memref<?x?x?x?xf32>)
 //       CHECK-DAG:   %[[c2:.*]] = constant 2 : index
-//       CHECK:   %[[m:.*]] = memref.alloca() : memref<3xvector<3xf32>>
 //       CHECK-DAG:   %[[c0:.*]] = constant 0 : index
+//       CHECK:   %[[m:.*]] = memref.alloca() : memref<3xvector<3xf32>>
 //       CHECK:   %[[cast:.*]] = vector.type_cast %[[m]] : memref<3xvector<3xf32>> to memref<vector<3x3xf32>>
 //       CHECK:   store %[[A]], %[[cast]][] : memref<vector<3x3xf32>>
 //       CHECK:   %[[d:.*]] = memref.dim %[[B]], %[[c2]] : memref<?x?x?x?xf32>

diff  --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir
index e0279a8048ee..8ede55a9d284 100644
--- a/mlir/test/Dialect/Affine/canonicalize.mlir
+++ b/mlir/test/Dialect/Affine/canonicalize.mlir
@@ -207,7 +207,7 @@ func @compose_affine_maps_diamond_dependency(%arg0: f32, %arg1: memref<4x4xf32>)
 
 // -----
 
-// CHECK-DAG: #[[$MAP14:.*]] = affine_map<()[s0, s1] -> ((s0 * 4 + s1 * 4) floordiv s0)>
+// CHECK-DAG: #[[$MAP14:.*]] = affine_map<()[s0, s1] -> (((s1 + s0) * 4) floordiv s0)>
 
 // CHECK-LABEL: func @compose_affine_maps_multiple_symbols
 func @compose_affine_maps_multiple_symbols(%arg0: index, %arg1: index) -> index {
@@ -312,7 +312,7 @@ func @symbolic_composition_c(%arg0: index, %arg1: index, %arg2: index, %arg3: in
 
 // -----
 
-// CHECK-DAG: #[[$MAP_symbolic_composition_d:.*]] = affine_map<()[s0, s1] -> (s0 * 3 + s1)>
+// CHECK-DAG: #[[$MAP_symbolic_composition_d:.*]] = affine_map<()[s0, s1] -> (s0 + s1 * 3)>
 
 // CHECK-LABEL: func @symbolic_composition_d(
 //  CHECK-SAME:   %[[ARG0:[0-9a-zA-Z]+]]: index
@@ -321,7 +321,7 @@ func @symbolic_composition_d(%arg0: index, %arg1: index, %arg2: index, %arg3: in
   %0 = affine.apply affine_map<(d0) -> (d0)>(%arg0)
   %1 = affine.apply affine_map<()[s0] -> (s0)>()[%arg1]
   %2 = affine.apply affine_map<()[s0, s1, s2, s3] -> (s0 + s1 + s2 + s3)>()[%0, %0, %0, %1]
-  // CHECK: %{{.*}} = affine.apply #[[$MAP_symbolic_composition_d]]()[%[[ARG0]], %[[ARG1]]]
+  // CHECK: %{{.*}} = affine.apply #[[$MAP_symbolic_composition_d]]()[%[[ARG1]], %[[ARG0]]]
   return %2 : index
 }
 
@@ -722,7 +722,7 @@ func @deduplicate_affine_max_expressions(%i0: index, %i1: index) -> index {
 // -----
 
 // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1, s2] -> (s0 * 3, 16, -s1 + s2)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> (-s2 + 5, 16, -s0 + s1)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> (-s1 + 5, 16, -s0 + s2)>
 
 // CHECK: func @merge_affine_min_ops
 // CHECK-SAME: (%[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index, %[[I3:.+]]: index)
@@ -731,7 +731,7 @@ func @merge_affine_min_ops(%i0: index, %i1: index, %i2: index, %i3: index) -> (i
 
  // CHECK: affine.min #[[MAP0]]()[%[[I2]], %[[I1]], %[[I0]]]
   %1 = affine.min affine_map<(d0)[s0] -> (3 * s0, d0)> (%0)[%i2] // Use as dim
- // CHECK: affine.min #[[MAP1]]()[%[[I1]], %[[I0]], %[[I3]]]
+ // CHECK: affine.min #[[MAP1]]()[%[[I1]], %[[I3]], %[[I0]]]
   %2 = affine.min affine_map<(d0)[s0] -> (s0, 5 - d0)> (%i3)[%0] // Use as symbol
 
   return %1, %2: index, index
@@ -805,7 +805,7 @@ func @dont_merge_affine_min_if_not_single_sym(%i0: index, %i1: index, %i2: index
 // -----
 
 // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1, s2] -> (s0 * 3, 16, -s1 + s2)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> (-s2 + 5, 16, -s0 + s1)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> (-s1 + 5, 16, -s0 + s2)>
 
 // CHECK: func @merge_affine_max_ops
 // CHECK-SAME: (%[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index, %[[I3:.+]]: index)
@@ -814,7 +814,7 @@ func @merge_affine_max_ops(%i0: index, %i1: index, %i2: index, %i3: index) -> (i
 
  // CHECK: affine.max #[[MAP0]]()[%[[I2]], %[[I1]], %[[I0]]]
   %1 = affine.max affine_map<(d0)[s0] -> (3 * s0, d0)> (%0)[%i2] // Use as dim
- // CHECK: affine.max #[[MAP1]]()[%[[I1]], %[[I0]], %[[I3]]]
+ // CHECK: affine.max #[[MAP1]]()[%[[I1]], %[[I3]], %[[I0]]]
   %2 = affine.max affine_map<(d0)[s0] -> (s0, 5 - d0)> (%i3)[%0] // Use as symbol
 
   return %1, %2: index, index

diff  --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir
index 95555ceb6844..a70816984c00 100644
--- a/mlir/test/Dialect/Linalg/transform-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir
@@ -336,7 +336,7 @@ func @aligned_promote_fill(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>) {
   return
 }
 // CHECK-LABEL: func @aligned_promote_fill
-// CHECK:	  %[[cf:.*]] = constant 1.0{{.*}} : f32
+// CHECK:	  %[[cf:.*]] = constant {{.*}} : f32
 // CHECK:         %[[s0:.*]] = memref.subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
 // CHECK:         %[[a0:.*]] = memref.alloc({{%.*}}) {alignment = 32 : i64} : memref<?xi8>
 // CHECK:         %[[v0:.*]] = memref.view %[[a0]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>

diff  --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index a0448ea32967..c6ec156e1519 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -234,10 +234,10 @@ func @transpose_3D_sequence(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> {
   // CHECK: [[T0:%.*]] = vector.transpose [[ARG]], [2, 1, 0]
   %0 = vector.transpose %arg, [1, 2, 0] : vector<4x3x2xf32> to vector<3x2x4xf32>
   %1 = vector.transpose %0, [1, 0, 2] : vector<3x2x4xf32> to vector<2x3x4xf32>
-  // CHECK: [[T1:%.*]] = vector.transpose [[ARG]], [2, 1, 0]
+  // CHECK-NOT: transpose
   %2 = vector.transpose %1, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
   %3 = vector.transpose %2, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
-  // CHECK: [[MUL:%.*]] = mulf [[T0]], [[T1]]
+  // CHECK: [[MUL:%.*]] = mulf [[T0]], [[T0]]
   %4 = mulf %1, %3 : vector<2x3x4xf32>
   // CHECK: [[T5:%.*]] = vector.transpose [[MUL]], [2, 1, 0]
   %5 = vector.transpose %4, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>

diff  --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index c6e535723b44..a65c46452cc8 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -630,7 +630,7 @@ func @lowered_affine_floordiv() -> (index, index) {
 //
 // CHECK-LABEL: func @lowered_affine_ceildiv
 func @lowered_affine_ceildiv() -> (index, index) {
-// CHECK-DAG:  %c-1 = constant -1 : index
+// CHECK-NEXT:  %c-1 = constant -1 : index
   %c-43 = constant -43 : index
   %c42 = constant 42 : index
   %c0 = constant 0 : index
@@ -643,7 +643,7 @@ func @lowered_affine_ceildiv() -> (index, index) {
   %5 = subi %c0, %4 : index
   %6 = addi %4, %c1 : index
   %7 = select %0, %5, %6 : index
-// CHECK-DAG:  %c2 = constant 2 : index
+// CHECK-NEXT:  %c2 = constant 2 : index
   %c43 = constant 43 : index
   %c42_0 = constant 42 : index
   %c0_1 = constant 0 : index

diff  --git a/mlir/test/mlir-tblgen/pattern.mlir b/mlir/test/mlir-tblgen/pattern.mlir
index 100a7bae7689..0425cf819e60 100644
--- a/mlir/test/mlir-tblgen/pattern.mlir
+++ b/mlir/test/mlir-tblgen/pattern.mlir
@@ -5,8 +5,8 @@ func @verifyFusedLocs(%arg0 : i32) -> i32 {
   %0 = "test.op_a"(%arg0) {attr = 10 : i32} : (i32) -> i32 loc("a")
   %result = "test.op_a"(%0) {attr = 20 : i32} : (i32) -> i32 loc("b")
 
-  // CHECK: %0 = "test.op_b"(%arg0) {attr = 10 : i32} : (i32) -> i32 loc("a")
-  // CHECK: %1 = "test.op_b"(%0) {attr = 20 : i32} : (i32) -> i32 loc("b")
+  // CHECK: "test.op_b"(%arg0) {attr = 10 : i32} : (i32) -> i32 loc("a")
+  // CHECK: "test.op_b"(%arg0) {attr = 20 : i32} : (i32) -> i32 loc(fused["b", "a"])
   return %result : i32
 }
 
@@ -67,7 +67,7 @@ func @verifyBenefit(%arg0 : i32) -> i32 {
   %2 = "test.op_g"(%1) : (i32) -> i32
 
   // CHECK: "test.op_f"(%arg0)
-  // CHECK: "test.op_b"(%arg0) {attr = 20 : i32}
+  // CHECK: "test.op_b"(%arg0) {attr = 34 : i32}
   return %0 : i32
 }