[Mlir-commits] [mlir] 1b272d2 - Revert "[mlir][VectorOps] Use SCF for vector.print and allow scalable vectors"

Wed Aug 9 19:37:36 PDT 2023

Author: Mehdi Amini
Date: 2023-08-09T19:37:01-07:00
New Revision: 1b272d21c8162ff577d1c45d1f9320f3465db23c

URL: https://github.com/llvm/llvm-project/commit/1b272d21c8162ff577d1c45d1f9320f3465db23c
DIFF: https://github.com/llvm/llvm-project/commit/1b272d21c8162ff577d1c45d1f9320f3465db23c.diff

LOG: Revert "[mlir][VectorOps] Use SCF for vector.print and allow scalable vectors"

This reverts commit 490dae26cb3bee2e8401e4c2a7ad3e0996be67d0.

Bot is broken, seems like there is a problem of ambiguity in the parser.

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
    mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
    mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
    mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
    mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
    mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-compare-results-i16.mlir
    mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-constants-i16.mlir
    mlir/test/Integration/Dialect/LLVMIR/CPU/X86/test-inline-asm-vector.mlir
    mlir/test/Integration/Dialect/LLVMIR/CPU/test-vp-intrinsic.mlir
    mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sm80-lt/sparse-matmul-2-4-lib.mlir
    mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir
    mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-sve.mlir
    mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-dot.mlir
    mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-mask-compress.mlir
    mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-rsqrt.mlir
    mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-vp2intersect-i32.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-broadcast.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-constant-mask.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-contraction.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-create-mask-v4i1.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-extract-strided-slice.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-col.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-row.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-fma.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-index-vectors.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-insert-strided-slice.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-col.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-row.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-f32.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-i64.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-print-fp.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-print-int.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-realloc.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32-reassoc.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64-reassoc.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i32.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i4.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i64.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-si4.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-ui4.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-shape-cast.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-shuffle.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-shuffle16x16.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transpose.mlir
    mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir
    mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir
    mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir
    mlir/test/mlir-cpu-runner/X86Vector/math-polynomial-approx-avx2.mlir
    mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
    mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir
    mlir/test/python/dialects/vector.py

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
index c4c33cdc90a83e..357795cb262d4b 100644

--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -2464,66 +2464,33 @@ def Vector_TransposeOp :
   let hasVerifier = 1;
 }
 
-def PrintPunctuation : I32EnumAttr<"PrintPunctuation",
-                                  "Punctuation for separating vectors or vector elements", [
-  I32EnumAttrCase<"NoPunctuation", 0, "no_punctuation">,
-  I32EnumAttrCase<"NewLine", 1, "newline">,
-  I32EnumAttrCase<"Comma", 2, "comma">,
-  I32EnumAttrCase<"Open", 3, "open">,
-  I32EnumAttrCase<"Close", 4, "close">
-]> {
-  let cppNamespace = "::mlir::vector";
-  let genSpecializedAttr = 0;
-}
-
-def Vector_PrintPunctuation : EnumAttr<Vector_Dialect, PrintPunctuation, "punctuation"> {
-  let assemblyFormat = "`<` $value `>`";
-}
-
 def Vector_PrintOp :
   Vector_Op<"print", []>,
-  Arguments<(ins Optional<Type<Or<[
+  Arguments<(ins Type<Or<[
     AnyVectorOfAnyRank.predicate,
     AnyInteger.predicate, Index.predicate, AnyFloat.predicate
-  ]>>>:$source, DefaultValuedAttr<Vector_PrintPunctuation,
-                      "::mlir::vector::PrintPunctuation::NewLine">:$punctuation)
-  > {
+  ]>>:$source)> {
   let summary = "print operation (for testing and debugging)";
   let description = [{
-    Prints the source vector (or scalar) to stdout in a human-readable format
-    (for testing and debugging). No return value.
+    Prints the source vector (or scalar) to stdout in human readable
+    format (for testing and debugging). No return value.
 
     Example:
 
     ```mlir
-    %v = arith.constant dense<0.0> : vector<4xf32>
-    vector.print %v : vector<4xf32>
-    ```
+    %0 = arith.constant 0.0 : f32
+    %1 = vector.broadcast %0 : f32 to vector<4xf32>
+    vector.print %1 : vector<4xf32>
 
-    When lowered to LLVM, the vector print is decomposed into elementary
-    printing method calls that at runtime will yield:
+    when lowered to LLVM, the vector print is unrolled into
+    elementary printing method calls that at runtime will yield
 
-    ```
     ( 0.0, 0.0, 0.0, 0.0 )
-    ```
-
-    This is printed to stdout via a small runtime support library, which only
-    needs to provide a few printing methods (single value for all data
-    types, opening/closing bracket, comma, newline).
-
-    By default `vector.print` adds a newline after the vector, but this can be
-    controlled by the `punctuation` attribute. For example, to print a comma
-    after instead do:
 
-    ```mlir
-    vector.print %v : vector<4xf32> #vector.punctuation<comma>
-    ```
-
-    Note that it is possible to use the punctuation attribute alone. The
-    following will print a single newline:
-
-    ```mlir
-    vector.print #vector.punctuation<newline>
+    on stdout when linked with a small runtime support library,
+    which only needs to provide a few printing methods (single
+    value for all data types, opening/closing bracket, comma,
+    newline).
     ```
   }];
   let extraClassDeclaration = [{
@@ -2531,13 +2498,7 @@ def Vector_PrintOp :
       return getSource().getType();
     }
   }];
-  let builders = [
-    OpBuilder<(ins "PrintPunctuation":$punctuation), [{
-      build($_builder, $_state, {}, punctuation);
-    }]>,
-  ];
-
-  let assemblyFormat = "($source^ `:` type($source))? ($punctuation^)? attr-dict";
+  let assemblyFormat = "$source attr-dict `:` type($source)";
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index 307743ada998a5..1e9913048e8ac4 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -28,6 +28,13 @@
 using namespace mlir;
 using namespace mlir::vector;
 
+// Helper to reduce vector type by one rank at front.
+static VectorType reducedVectorTypeFront(VectorType tp) {
+  assert((tp.getRank() > 1) && "unlowerable vector type");
+  return VectorType::get(tp.getShape().drop_front(), tp.getElementType(),
+                         tp.getScalableDims().drop_front());
+}
+
 // Helper to reduce vector type by *all* but one rank at back.
 static VectorType reducedVectorTypeBack(VectorType tp) {
   assert((tp.getRank() > 1) && "unlowerable vector type");
@@ -1409,89 +1416,45 @@ class VectorPrintOpConversion : public ConvertOpToLLVMPattern<vector::PrintOp> {
 public:
   using ConvertOpToLLVMPattern<vector::PrintOp>::ConvertOpToLLVMPattern;
 
-  // Lowering implementation that relies on a small runtime support library,
-  // which only needs to provide a few printing methods (single value for all
-  // data types, opening/closing bracket, comma, newline). The lowering splits
-  // the vector into elementary printing operations. The advantage of this
-  // approach is that the library can remain unaware of all low-level
-  // implementation details of vectors while still supporting output of any
-  // shaped and dimensioned vector.
-  //
-  // Note: This lowering only handles scalars, n-D vectors are broken into
-  // printing scalars in loops in VectorToSCF.
+  // Proof-of-concept lowering implementation that relies on a small
+  // runtime support library, which only needs to provide a few
+  // printing methods (single value for all data types, opening/closing
+  // bracket, comma, newline). The lowering fully unrolls a vector
+  // in terms of these elementary printing operations. The advantage
+  // of this approach is that the library can remain unaware of all
+  // low-level implementation details of vectors while still supporting
+  // output of any shaped and dimensioned vector. Due to full unrolling,
+  // this approach is less suited for very large vectors though.
   //
   // TODO: rely solely on libc in future? something else?
   //
   LogicalResult
   matchAndRewrite(vector::PrintOp printOp, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    auto parent = printOp->getParentOfType<ModuleOp>();
-    auto loc = printOp->getLoc();
+    Type printType = printOp.getPrintType();
 
-    if (auto value = adaptor.getSource()) {
-      Type printType = printOp.getPrintType();
-      if (isa<VectorType>(printType)) {
-        // Vectors should be broken into elementary print ops in VectorToSCF.
-        return failure();
-      }
-      if (failed(emitScalarPrint(rewriter, parent, loc, printType, value)))
-        return failure();
-    }
-
-    auto punct = printOp.getPunctuation();
-    if (punct != PrintPunctuation::NoPunctuation) {
-      emitCall(rewriter, printOp->getLoc(), [&] {
-        switch (punct) {
-        case PrintPunctuation::Close:
-          return LLVM::lookupOrCreatePrintCloseFn(parent);
-        case PrintPunctuation::Open:
-          return LLVM::lookupOrCreatePrintOpenFn(parent);
-        case PrintPunctuation::Comma:
-          return LLVM::lookupOrCreatePrintCommaFn(parent);
-        case PrintPunctuation::NewLine:
-          return LLVM::lookupOrCreatePrintNewlineFn(parent);
-        default:
-          llvm_unreachable("unexpected punctuation");
-        }
-      }());
-    }
-
-    rewriter.eraseOp(printOp);
-    return success();
-  }
-
-private:
-  enum class PrintConversion {
-    // clang-format off
-    None,
-    ZeroExt64,
-    SignExt64,
-    Bitcast16
-    // clang-format on
-  };
-
-  LogicalResult emitScalarPrint(ConversionPatternRewriter &rewriter,
-                                ModuleOp parent, Location loc, Type printType,
-                                Value value) const {
     if (typeConverter->convertType(printType) == nullptr)
       return failure();
 
     // Make sure element type has runtime support.
     PrintConversion conversion = PrintConversion::None;
+    VectorType vectorType = dyn_cast<VectorType>(printType);
+    Type eltType = vectorType ? vectorType.getElementType() : printType;
+    auto parent = printOp->getParentOfType<ModuleOp>();
     Operation *printer;
-    if (printType.isF32()) {
+    if (eltType.isF32()) {
       printer = LLVM::lookupOrCreatePrintF32Fn(parent);
-    } else if (printType.isF64()) {
+    } else if (eltType.isF64()) {
       printer = LLVM::lookupOrCreatePrintF64Fn(parent);
-    } else if (printType.isF16()) {
+    } else if (eltType.isF16()) {
       conversion = PrintConversion::Bitcast16; // bits!
       printer = LLVM::lookupOrCreatePrintF16Fn(parent);
-    } else if (printType.isBF16()) {
+    } else if (eltType.isBF16()) {
       conversion = PrintConversion::Bitcast16; // bits!
       printer = LLVM::lookupOrCreatePrintBF16Fn(parent);
-    } else if (printType.isIndex()) {
+    } else if (eltType.isIndex()) {
       printer = LLVM::lookupOrCreatePrintU64Fn(parent);
-    } else if (auto intTy = dyn_cast<IntegerType>(printType)) {
+    } else if (auto intTy = dyn_cast<IntegerType>(eltType)) {
       // Integers need a zero or sign extension on the operand
       // (depending on the source type) as well as a signed or
       // unsigned print method. Up to 64-bit is supported.
@@ -1522,26 +1485,88 @@ class VectorPrintOpConversion : public ConvertOpToLLVMPattern<vector::PrintOp> {
       return failure();
     }
 
-    switch (conversion) {
-    case PrintConversion::ZeroExt64:
-      value = rewriter.create<arith::ExtUIOp>(
-          loc, IntegerType::get(rewriter.getContext(), 64), value);
-      break;
-    case PrintConversion::SignExt64:
-      value = rewriter.create<arith::ExtSIOp>(
-          loc, IntegerType::get(rewriter.getContext(), 64), value);
-      break;
-    case PrintConversion::Bitcast16:
-      value = rewriter.create<LLVM::BitcastOp>(
-          loc, IntegerType::get(rewriter.getContext(), 16), value);
-      break;
-    case PrintConversion::None:
-      break;
-    }
-    emitCall(rewriter, loc, printer, value);
+    // Unroll vector into elementary print calls.
+    int64_t rank = vectorType ? vectorType.getRank() : 0;
+    Type type = vectorType ? vectorType : eltType;
+    emitRanks(rewriter, printOp, adaptor.getSource(), type, printer, rank,
+              conversion);
+    emitCall(rewriter, printOp->getLoc(),
+             LLVM::lookupOrCreatePrintNewlineFn(parent));
+    rewriter.eraseOp(printOp);
     return success();
   }
 
+private:
+  enum class PrintConversion {
+    // clang-format off
+    None,
+    ZeroExt64,
+    SignExt64,
+    Bitcast16
+    // clang-format on
+  };
+
+  void emitRanks(ConversionPatternRewriter &rewriter, Operation *op,
+                 Value value, Type type, Operation *printer, int64_t rank,
+                 PrintConversion conversion) const {
+    VectorType vectorType = dyn_cast<VectorType>(type);
+    Location loc = op->getLoc();
+    if (!vectorType) {
+      assert(rank == 0 && "The scalar case expects rank == 0");
+      switch (conversion) {
+      case PrintConversion::ZeroExt64:
+        value = rewriter.create<arith::ExtUIOp>(
+            loc, IntegerType::get(rewriter.getContext(), 64), value);
+        break;
+      case PrintConversion::SignExt64:
+        value = rewriter.create<arith::ExtSIOp>(
+            loc, IntegerType::get(rewriter.getContext(), 64), value);
+        break;
+      case PrintConversion::Bitcast16:
+        value = rewriter.create<LLVM::BitcastOp>(
+            loc, IntegerType::get(rewriter.getContext(), 16), value);
+        break;
+      case PrintConversion::None:
+        break;
+      }
+      emitCall(rewriter, loc, printer, value);
+      return;
+    }
+
+    auto parent = op->getParentOfType<ModuleOp>();
+    emitCall(rewriter, loc, LLVM::lookupOrCreatePrintOpenFn(parent));
+    Operation *printComma = LLVM::lookupOrCreatePrintCommaFn(parent);
+
+    if (rank <= 1) {
+      auto reducedType = vectorType.getElementType();
+      auto llvmType = typeConverter->convertType(reducedType);
+      int64_t dim = rank == 0 ? 1 : vectorType.getDimSize(0);
+      for (int64_t d = 0; d < dim; ++d) {
+        Value nestedVal = extractOne(rewriter, *getTypeConverter(), loc, value,
+                                     llvmType, /*rank=*/0, /*pos=*/d);
+        emitRanks(rewriter, op, nestedVal, reducedType, printer, /*rank=*/0,
+                  conversion);
+        if (d != dim - 1)
+          emitCall(rewriter, loc, printComma);
+      }
+      emitCall(rewriter, loc, LLVM::lookupOrCreatePrintCloseFn(parent));
+      return;
+    }
+
+    int64_t dim = vectorType.getDimSize(0);
+    for (int64_t d = 0; d < dim; ++d) {
+      auto reducedType = reducedVectorTypeFront(vectorType);
+      auto llvmType = typeConverter->convertType(reducedType);
+      Value nestedVal = extractOne(rewriter, *getTypeConverter(), loc, value,
+                                   llvmType, rank, d);
+      emitRanks(rewriter, op, nestedVal, reducedType, printer, rank - 1,
+                conversion);
+      if (d != dim - 1)
+        emitCall(rewriter, loc, printComma);
+    }
+    emitCall(rewriter, loc, LLVM::lookupOrCreatePrintCloseFn(parent));
+  }
+
   // Helper to emit a call.
   static void emitCall(ConversionPatternRewriter &rewriter, Location loc,
                        Operation *ref, ValueRange params = ValueRange()) {

diff  --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
index 901def2f53ba7b..5e19e422b61116 100644
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -10,7 +10,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <numeric>
 #include <optional>
 #include <type_traits>
 
@@ -652,171 +651,6 @@ struct PrepareTransferWriteConversion
   }
 };
 
-/// Decompose a n-D PrintOp into a loop of elementary/scalar prints. This allows
-/// printing both 1D scalable vectors and n-D fixed size vectors.
-///
-/// E.g.:
-/// ```
-/// vector.print %v : vector<[4]xi32>
-/// ```
-/// is rewritten to:
-/// ```
-/// %c0 = arith.constant 0 : index
-/// %c4 = arith.constant 4 : index
-/// %c1 = arith.constant 1 : index
-/// %vscale = vector.vscale
-/// %length = arith.muli %vscale, %c4 : index
-/// %lastIndex = arith.subi %length, %c1 : index
-/// vector.print <open>
-/// scf.for %i = %c0 to %length step %c1 {
-///   %el = vector.extractelement %v[%i : index] : vector<[4]xi32>
-///   vector.print %el : i32 <no_punctuation>
-///   %notLastIndex = arith.cmpi ult, %i, %lastIndex : index
-///   scf.if %notLastIndex {
-///     vector.print <comma>
-///   }
-/// }
-/// vector.print <close>
-/// vector.print
-/// ```
-struct DecomposePrintOpConversion : public VectorToSCFPattern<vector::PrintOp> {
-  using VectorToSCFPattern<vector::PrintOp>::VectorToSCFPattern;
-  LogicalResult matchAndRewrite(vector::PrintOp printOp,
-                                PatternRewriter &rewriter) const override {
-    if (!printOp.getSource())
-      return failure();
-
-    VectorType vectorType = dyn_cast<VectorType>(printOp.getPrintType());
-    if (!vectorType)
-      return failure();
-
-    // Currently >= 2D scalable vectors are not supported.
-    // These can't be lowered to LLVM (as LLVM does not support scalable vectors
-    // of scalable vectors), and due to limitations of current ops can't be
-    // indexed with SSA values or flattened. This may change after
-    // https://reviews.llvm.org/D155034, though there still needs to be a path
-    // for lowering to LLVM.
-    if (vectorType.getRank() > 1 && vectorType.isScalable())
-      return failure();
-
-    auto loc = printOp.getLoc();
-    auto value = printOp.getSource();
-
-    if (auto intTy = dyn_cast<IntegerType>(vectorType.getElementType())) {
-      // Oddly sized integers are (somewhat) buggy on a lot of backends, so to
-      // avoid issues extend them to a more standard size.
-      // https://github.com/llvm/llvm-project/issues/30613
-      auto width = intTy.getWidth();
-      auto legalWidth = llvm::NextPowerOf2(std::max(8u, width) - 1);
-      auto legalIntTy = IntegerType::get(rewriter.getContext(), legalWidth,
-                                         intTy.getSignedness());
-      // arith can only take signless integers, so we must cast back and forth.
-      auto signlessSourceVectorType =
-          vectorType.cloneWith({}, getIntTypeWithSignlessSemantics(intTy));
-      auto signlessTargetVectorType =
-          vectorType.cloneWith({}, getIntTypeWithSignlessSemantics(legalIntTy));
-      auto targetVectorType = vectorType.cloneWith({}, legalIntTy);
-      value = rewriter.create<vector::BitCastOp>(loc, signlessSourceVectorType,
-                                                 value);
-      if (width == 1 || intTy.isUnsigned())
-        value = rewriter.create<arith::ExtUIOp>(loc, signlessTargetVectorType,
-                                                value);
-      else
-        value = rewriter.create<arith::ExtSIOp>(loc, signlessTargetVectorType,
-                                                value);
-      value = rewriter.create<vector::BitCastOp>(loc, targetVectorType, value);
-      vectorType = targetVectorType;
-    }
-
-    auto scalableDimensions = vectorType.getScalableDims();
-    auto shape = vectorType.getShape();
-    constexpr int64_t singletonShape[] = {1};
-    if (vectorType.getRank() == 0)
-      shape = singletonShape;
-
-    if (vectorType.getRank() != 1) {
-      // Flatten n-D vectors to 1D. This is done to allow indexing with a
-      // non-constant value (which can currently only be done via
-      // vector.extractelement for 1D vectors).
-      auto flatLength = std::accumulate(shape.begin(), shape.end(), 1,
-                                        std::multiplies<int64_t>());
-      auto flatVectorType =
-          VectorType::get({flatLength}, vectorType.getElementType());
-      value = rewriter.create<vector::ShapeCastOp>(loc, flatVectorType, value);
-    }
-
-    vector::PrintOp firstClose;
-    SmallVector<Value, 8> loopIndices;
-    for (unsigned d = 0; d < shape.size(); d++) {
-      // Setup loop bounds and step.
-      Value lowerBound = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-      Value upperBound = rewriter.create<arith::ConstantIndexOp>(loc, shape[d]);
-      Value step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
-      if (!scalableDimensions.empty() && scalableDimensions[d]) {
-        auto vscale = rewriter.create<vector::VectorScaleOp>(
-            loc, rewriter.getIndexType());
-        upperBound = rewriter.create<arith::MulIOp>(loc, upperBound, vscale);
-      }
-      auto lastIndex = rewriter.create<arith::SubIOp>(loc, upperBound, step);
-
-      // Create a loop to print the elements surrounded by parentheses.
-      rewriter.create<vector::PrintOp>(loc, vector::PrintPunctuation::Open);
-      auto loop =
-          rewriter.create<scf::ForOp>(loc, lowerBound, upperBound, step);
-      auto printClose = rewriter.create<vector::PrintOp>(
-          loc, vector::PrintPunctuation::Close);
-      if (!firstClose)
-        firstClose = printClose;
-
-      auto loopIdx = loop.getInductionVar();
-      loopIndices.push_back(loopIdx);
-
-      // Print a comma after all but the last element.
-      rewriter.setInsertionPointToStart(loop.getBody());
-      auto notLastIndex = rewriter.create<arith::CmpIOp>(
-          loc, arith::CmpIPredicate::ult, loopIdx, lastIndex);
-      rewriter.create<scf::IfOp>(loc, notLastIndex,
-                                 [&](OpBuilder &builder, Location loc) {
-                                   builder.create<vector::PrintOp>(
-                                       loc, vector::PrintPunctuation::Comma);
-                                   builder.create<scf::YieldOp>(loc);
-                                 });
-
-      rewriter.setInsertionPointToStart(loop.getBody());
-    }
-
-    // Compute the flattened index.
-    // Note: For the > rank 1 vectors this assumes non-scalable.
-    Value flatIndex;
-    auto currentStride = 1;
-    for (int d = shape.size() - 1; d >= 0; d--) {
-      auto stride = rewriter.create<arith::ConstantIndexOp>(loc, currentStride);
-      auto index = rewriter.create<arith::MulIOp>(loc, stride, loopIndices[d]);
-      if (flatIndex)
-        flatIndex = rewriter.create<arith::AddIOp>(loc, flatIndex, index);
-      else
-        flatIndex = index;
-      currentStride *= shape[d];
-    }
-
-    // Print the scalar elements in the inner most loop.
-    auto element =
-        rewriter.create<vector::ExtractElementOp>(loc, value, flatIndex);
-    rewriter.create<vector::PrintOp>(loc, element,
-                                     vector::PrintPunctuation::NoPunctuation);
-
-    rewriter.setInsertionPointAfter(firstClose);
-    rewriter.create<vector::PrintOp>(loc, printOp.getPunctuation());
-    rewriter.eraseOp(printOp);
-    return success();
-  }
-
-  static IntegerType getIntTypeWithSignlessSemantics(IntegerType intTy) {
-    return IntegerType::get(intTy.getContext(), intTy.getWidth(),
-                            IntegerType::Signless);
-  };
-};
-
 /// Progressive lowering of vector transfer ops: Unpack one dimension.
 ///
 /// 1. Unpack one dimension from the current buffer type and cast the buffer
@@ -1446,8 +1280,6 @@ void mlir::populateVectorToSCFConversionPatterns(
                  lowering_1_d::TransferOp1dConversion<TransferWriteOp>>(
         patterns.getContext(), options);
   }
-  patterns.add<lowering_n_d::DecomposePrintOpConversion>(patterns.getContext(),
-                                                         options);
 }
 
 namespace {

diff  --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 514594240d22a1..fa119e290ae8d8 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -1044,6 +1044,57 @@ func.func @vector_print_scalar_f64(%arg0: f64) {
 
 // -----
 
+func.func @vector_print_vector_0d(%arg0: vector<f32>) {
+  vector.print %arg0 : vector<f32>
+  return
+}
+// CHECK-LABEL: @vector_print_vector_0d(
+// CHECK-SAME: %[[A:.*]]: vector<f32>)
+//       CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<f32> to vector<1xf32>
+//       CHECK: llvm.call @printOpen() : () -> ()
+//       CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : index) : i64
+//       CHECK: %[[T2:.*]] = llvm.extractelement %[[T0]][%[[T1]] : i64] : vector<1xf32>
+//       CHECK: llvm.call @printF32(%[[T2]]) : (f32) -> ()
+//       CHECK: llvm.call @printClose() : () -> ()
+//       CHECK: llvm.call @printNewline() : () -> ()
+//       CHECK: return
+
+// -----
+
+func.func @vector_print_vector(%arg0: vector<2x2xf32>) {
+  vector.print %arg0 : vector<2x2xf32>
+  return
+}
+// CHECK-LABEL: @vector_print_vector(
+// CHECK-SAME: %[[A:.*]]: vector<2x2xf32>)
+//       CHECK:    %[[VAL_1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<2x2xf32> to !llvm.array<2 x vector<2xf32>>
+//       CHECK:    llvm.call @printOpen() : () -> ()
+//       CHECK:    %[[x0:.*]] = llvm.extractvalue %[[VAL_1]][0] : !llvm.array<2 x vector<2xf32>>
+//       CHECK:    llvm.call @printOpen() : () -> ()
+//       CHECK:    %[[x1:.*]] = llvm.mlir.constant(0 : index) : i64
+//       CHECK:    %[[x2:.*]] = llvm.extractelement %[[x0]][%[[x1]] : i64] : vector<2xf32>
+//       CHECK:    llvm.call @printF32(%[[x2]]) : (f32) -> ()
+//       CHECK:    llvm.call @printComma() : () -> ()
+//       CHECK:    %[[x3:.*]] = llvm.mlir.constant(1 : index) : i64
+//       CHECK:    %[[x4:.*]] = llvm.extractelement %[[x0]][%[[x3]] : i64] : vector<2xf32>
+//       CHECK:    llvm.call @printF32(%[[x4]]) : (f32) -> ()
+//       CHECK:    llvm.call @printClose() : () -> ()
+//       CHECK:    llvm.call @printComma() : () -> ()
+//       CHECK:    %[[x5:.*]] = llvm.extractvalue %[[VAL_1]][1] : !llvm.array<2 x vector<2xf32>>
+//       CHECK:    llvm.call @printOpen() : () -> ()
+//       CHECK:    %[[x6:.*]] = llvm.mlir.constant(0 : index) : i64
+//       CHECK:    %[[x7:.*]] = llvm.extractelement %[[x5]][%[[x6]] : i64] : vector<2xf32>
+//       CHECK:    llvm.call @printF32(%[[x7]]) : (f32) -> ()
+//       CHECK:    llvm.call @printComma() : () -> ()
+//       CHECK:    %[[x8:.*]] = llvm.mlir.constant(1 : index) : i64
+//       CHECK:    %[[x9:.*]] = llvm.extractelement %[[x5]][%[[x8]] : i64] : vector<2xf32>
+//       CHECK:    llvm.call @printF32(%[[x9]]) : (f32) -> ()
+//       CHECK:    llvm.call @printClose() : () -> ()
+//       CHECK:    llvm.call @printClose() : () -> ()
+//       CHECK:    llvm.call @printNewline() : () -> ()
+
+// -----
+
 func.func @extract_strided_slice1(%arg0: vector<4xf32>) -> vector<2xf32> {
   %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
   return %0 : vector<2xf32>

diff  --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
index a9144ef28b7bfb..587dc39799777b 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
@@ -546,92 +546,3 @@ func.func @transfer_write_scalable(%arg0: memref<?xf32, strided<[?], offset: ?>>
 // CHECK:             } else {
 // CHECK:             }
 // CHECK:           }
-
-// -----
-
-func.func @vector_print_vector_0d(%arg0: vector<f32>) {
-  vector.print %arg0 : vector<f32>
-  return
-}
-// CHECK-LABEL:   func.func @vector_print_vector_0d(
-// CHECK-SAME:                                      %[[VEC:.*]]: vector<f32>) {
-// CHECK:           %[[C0:.*]] = arith.constant 0 : index
-// CHECK:           %[[C1:.*]] = arith.constant 1 : index
-// CHECK:           %[[FLAT_VEC:.*]] = vector.shape_cast %[[VEC]] : vector<f32> to vector<1xf32>
-// CHECK:           vector.print <open>
-// CHECK:           scf.for %[[IDX:.*]] = %[[C0]] to %[[C1]] step %[[C1]] {
-// CHECK:             %[[EL:.*]] = vector.extractelement %[[FLAT_VEC]]{{\[}}%[[IDX]] : index] : vector<1xf32>
-// CHECK:             vector.print %[[EL]] : f32 <no_punctuation>
-// CHECK:             %[[IS_NOT_LAST:.*]] = arith.cmpi ult, %[[IDX]], %[[C0]] : index
-// CHECK:             scf.if %[[IS_NOT_LAST]] {
-// CHECK:               vector.print <comma>
-// CHECK:             }
-// CHECK:           }
-// CHECK:           vector.print <close>
-// CHECK:           vector.print
-// CHECK:           return
-// CHECK:         }
-
-// -----
-
-func.func @vector_print_vector(%arg0: vector<2x2xf32>) {
-  vector.print %arg0 : vector<2x2xf32>
-  return
-}
-// CHECK-LABEL:   func.func @vector_print_vector(
-// CHECK-SAME:                                   %[[VEC:.*]]: vector<2x2xf32>) {
-// CHECK:           %[[C0:.*]] = arith.constant 0 : index
-// CHECK:           %[[C2:.*]] = arith.constant 2 : index
-// CHECK:           %[[C1:.*]] = arith.constant 1 : index
-// CHECK:           %[[FLAT_VEC:.*]] = vector.shape_cast %[[VEC]] : vector<2x2xf32> to vector<4xf32>
-// CHECK:           vector.print <open>
-// CHECK:           scf.for %[[I:.*]] = %[[C0]] to %[[C2]] step %[[C1]] {
-// CHECK:             vector.print <open>
-// CHECK:             scf.for %[[J:.*]] = %[[C0]] to %[[C2]] step %[[C1]] {
-// CHECK:               %[[OUTER_INDEX:.*]] = arith.muli %[[I]], %[[C2]] : index
-// CHECK:               %[[FLAT_INDEX:.*]] = arith.addi %[[J]], %[[OUTER_INDEX]] : index
-// CHECK:               %[[EL:.*]] = vector.extractelement %[[FLAT_VEC]]{{\[}}%[[FLAT_INDEX]] : index] : vector<4xf32>
-// CHECK:               vector.print %[[EL]] : f32 <no_punctuation>
-// CHECK:               %[[IS_NOT_LAST_J:.*]] = arith.cmpi ult, %[[J]], %[[C1]] : index
-// CHECK:               scf.if %[[IS_NOT_LAST_J]] {
-// CHECK:                 vector.print <comma>
-// CHECK:               }
-// CHECK:             }
-// CHECK:             vector.print <close>
-// CHECK:             %[[IS_NOT_LAST_I:.*]] = arith.cmpi ult, %[[I]], %[[C1]] : index
-// CHECK:             scf.if %[[IS_NOT_LAST_I]] {
-// CHECK:               vector.print <comma>
-// CHECK:             }
-// CHECK:           }
-// CHECK:           vector.print <close>
-// CHECK:           vector.print
-// CHECK:           return
-// CHECK:         }
-
-// -----
-
-func.func @vector_print_scalable_vector(%arg0: vector<[4]xi32>) {
-  vector.print %arg0 : vector<[4]xi32>
-  return
-}
-// CHECK-LABEL:   func.func @vector_print_scalable_vector(
-// CHECK-SAME:                                            %[[VEC:.*]]: vector<[4]xi32>) {
-// CHECK:           %[[C0:.*]] = arith.constant 0 : index
-// CHECK:           %[[C4:.*]] = arith.constant 4 : index
-// CHECK:           %[[C1:.*]] = arith.constant 1 : index
-// CHECK:           %[[VSCALE:.*]] = vector.vscale
-// CHECK:           %[[UPPER_BOUND:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index
-// CHECK:           %[[LAST_INDEX:.*]] = arith.subi %[[UPPER_BOUND]], %[[C1]] : index
-// CHECK:           vector.print <open>
-// CHECK:           scf.for %[[IDX:.*]] = %[[C0]] to %[[UPPER_BOUND]] step %[[C1]] {
-// CHECK:             %[[EL:.*]] = vector.extractelement %[[VEC]]{{\[}}%[[IDX]] : index] : vector<[4]xi32>
-// CHECK:             vector.print %[[EL]] : i32 <no_punctuation>
-// CHECK:             %[[IS_NOT_LAST:.*]] = arith.cmpi ult, %[[IDX]], %[[LAST_INDEX]] : index
-// CHECK:             scf.if %[[IS_NOT_LAST]] {
-// CHECK:               vector.print <comma>
-// CHECK:             }
-// CHECK:           }
-// CHECK:           vector.print <close>
-// CHECK:           vector.print
-// CHECK:           return
-// CHECK:         }

diff  --git a/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-compare-results-i16.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-compare-results-i16.mlir
index 213cd4de1ea931..19fd8c4bba8b46 100644
--- a/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-compare-results-i16.mlir
+++ b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-compare-results-i16.mlir
@@ -2,9 +2,8 @@
 // calculations. Emulate i16 ops with i8 ops.
 
 // RUN: mlir-opt %s --test-arith-emulate-wide-int="widest-int-supported=8" \
-// RUN:             --convert-vector-to-scf --convert-scf-to-cf --convert-cf-to-llvm \
-// RUN:             --convert-vector-to-llvm --convert-func-to-llvm --convert-arith-to-llvm \
-// RUN:             --reconcile-unrealized-casts | \
+// RUN:             --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \
+// RUN:             --convert-func-to-llvm --convert-arith-to-llvm | \
 // RUN:   mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:      --shared-libs="%mlir_c_runner_utils,%mlir_runner_utils" | \
 // RUN:   FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-constants-i16.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-constants-i16.mlir
index c6a48f61d434e1..dcc0b145c51ab8 100644
--- a/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-constants-i16.mlir
+++ b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-constants-i16.mlir
@@ -2,8 +2,8 @@
 // constants and that printing works. Emulate i16 ops with i8 ops.
 
 // RUN: mlir-opt %s --test-arith-emulate-wide-int="widest-int-supported=8" \
-// RUN:             --convert-vector-to-scf --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \
-// RUN:             --convert-func-to-llvm --convert-arith-to-llvm --reconcile-unrealized-casts | \
+// RUN:             --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \
+// RUN:             --convert-func-to-llvm --convert-arith-to-llvm | \
 // RUN:   mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:                   --shared-libs=%mlir_c_runner_utils | \
 // RUN:   FileCheck %s --match-full-lines --check-prefix=EMULATED

diff  --git a/mlir/test/Integration/Dialect/LLVMIR/CPU/X86/test-inline-asm-vector.mlir b/mlir/test/Integration/Dialect/LLVMIR/CPU/X86/test-inline-asm-vector.mlir
index 87041ccde427d6..725b13475adaf4 100644
--- a/mlir/test/Integration/Dialect/LLVMIR/CPU/X86/test-inline-asm-vector.mlir
+++ b/mlir/test/Integration/Dialect/LLVMIR/CPU/X86/test-inline-asm-vector.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts |  \
+// RUN: mlir-opt %s -convert-vector-to-llvm |  \
 // RUN: mlir-cpu-runner -e entry_point_with_all_constants -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils
 
 module {
-  func.func @function_to_run(%a: vector<8xf32>, %b: vector<8xf32>)  {
+  llvm.func @function_to_run(%a: vector<8xf32>, %b: vector<8xf32>)  {
     // CHECK: ( 8, 10, 12, 14, 16, 18, 20, 22 )
     %r0 = llvm.inline_asm asm_dialect = intel
         "vaddps $0, $1, $2", "=x,x,x" %a, %b:
@@ -36,21 +36,21 @@ module {
       : vector<8xf32>, vector<8xf32>
     vector.print %r4: vector<8xf32>
 
-    return
+    llvm.return
   }
 
   // Solely exists to prevent inlining and get the expected assembly.
-  func.func @entry_point(%a: vector<8xf32>, %b: vector<8xf32>)  {
-    func.call @function_to_run(%a, %b) : (vector<8xf32>, vector<8xf32>) -> ()
-    return
+  llvm.func @entry_point(%a: vector<8xf32>, %b: vector<8xf32>)  {
+    llvm.call @function_to_run(%a, %b) : (vector<8xf32>, vector<8xf32>) -> ()
+    llvm.return
   }
 
-  func.func @entry_point_with_all_constants()  {
+  llvm.func @entry_point_with_all_constants()  {
     %a = llvm.mlir.constant(dense<[0.0, 1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0]>
       : vector<8xf32>) : vector<8xf32>
     %b = llvm.mlir.constant(dense<[8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]>
       : vector<8xf32>) : vector<8xf32>
-    func.call @function_to_run(%a, %b) : (vector<8xf32>, vector<8xf32>) -> ()
-    return
+    llvm.call @function_to_run(%a, %b) : (vector<8xf32>, vector<8xf32>) -> ()
+    llvm.return
   }
 }

diff  --git a/mlir/test/Integration/Dialect/LLVMIR/CPU/test-vp-intrinsic.mlir b/mlir/test/Integration/Dialect/LLVMIR/CPU/test-vp-intrinsic.mlir
index a7cd3558bc6fed..f3e8f31ea55e5f 100644
--- a/mlir/test/Integration/Dialect/LLVMIR/CPU/test-vp-intrinsic.mlir
+++ b/mlir/test/Integration/Dialect/LLVMIR/CPU/test-vp-intrinsic.mlir
@@ -1,6 +1,5 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm \
-// RUN: -convert-vector-to-llvm -convert-index-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm \
-// RUN: -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-llvm -finalize-memref-to-llvm \
+// RUN:             -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-translate -mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry \
 // RUN:      --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sm80-lt/sparse-matmul-2-4-lib.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sm80-lt/sparse-matmul-2-4-lib.mlir
index 1c9e6a956c0b28..0b546c59605552 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sm80-lt/sparse-matmul-2-4-lib.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sm80-lt/sparse-matmul-2-4-lib.mlir
@@ -1,9 +1,9 @@
 //
 // NOTE: this test requires gpu-sm80 and cusparselt
 //
-// DEFINE: %{compile} = mlir-opt --convert-vector-to-scf --convert-scf-to-cf -convert-cf-to-llvm --convert-vector-to-llvm \
+// DEFINE: %{compile} = mlir-opt --convert-scf-to-cf -convert-cf-to-llvm --convert-vector-to-llvm \
 // DEFINE: --convert-arith-to-llvm --gpu-to-llvm --reconcile-unrealized-casts \
-// DEFINE: %s
+// DEFINE: %s 
 // DEFINE: %{run} = mlir-cpu-runner \
 // DEFINE:   --shared-libs=%mlir_cuda_runtime \
 // DEFINE:   --shared-libs=%mlir_c_runner_utils \

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir
index 8eb90fd3ca9946..0443f1a533659a 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir
@@ -4,7 +4,7 @@
 // RUN: mlir-opt \
 // RUN: --pass-pipeline="builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-nvgpu-to-nvvm,affine-expand-index-ops,lower-affine,convert-arith-to-llvm),convert-vector-to-llvm,canonicalize,cse,gpu.module(gpu-to-cubin{chip=sm_80 features=+ptx71}))" \
 // RUN: %s \
-// RUN: | mlir-opt --convert-vector-to-scf --convert-scf-to-cf -convert-cf-to-llvm --convert-vector-to-llvm \
+// RUN: | mlir-opt --convert-scf-to-cf -convert-cf-to-llvm --convert-vector-to-llvm \
 // RUN:            --convert-arith-to-llvm --gpu-to-llvm --reconcile-unrealized-casts \
 // RUN: | mlir-cpu-runner \
 // RUN:   --shared-libs=%mlir_cuda_runtime \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-sve.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-sve.mlir
index 824963cdea3b33..3229122b388526 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-sve.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-sve.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -lower-affine -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm="enable-arm-sve" -finalize-memref-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -canonicalize | \
+// RUN: mlir-opt %s -lower-affine -convert-scf-to-cf -convert-vector-to-llvm="enable-arm-sve" -finalize-memref-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -canonicalize | \
 // RUN: %mcr_aarch64_cmd -e=entry -entry-point-result=void --march=aarch64 --mattr="+sve" -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-dot.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-dot.mlir
index 4b901289d1a4be..de031adf5c5876 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-dot.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-dot.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm="enable-x86vector" -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm="enable-x86vector" -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-translate --mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="avx" --dlopen=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-mask-compress.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-mask-compress.mlir
index f1d7caeb4f3daf..7eaa518e0255e5 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-mask-compress.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-mask-compress.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm="enable-x86vector" -convert-func-to-llvm -reconcile-unrealized-casts  | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm="enable-x86vector" -convert-func-to-llvm -reconcile-unrealized-casts  | \
 // RUN: mlir-translate  --mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="avx512bw" --dlopen=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-rsqrt.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-rsqrt.mlir
index 225f9963aeeeaf..98be37ce5b6819 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-rsqrt.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-rsqrt.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm="enable-x86vector" -convert-func-to-llvm -reconcile-unrealized-casts  | \
+// RUN: mlir-opt %s -convert-vector-to-llvm="enable-x86vector" -convert-func-to-llvm | \
 // RUN: mlir-translate --mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="avx" --dlopen=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-vp2intersect-i32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-vp2intersect-i32.mlir
index 2eccf00f221a72..a2b058f0aaa38d 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-vp2intersect-i32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-vp2intersect-i32.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm="enable-x86vector" -convert-func-to-llvm -reconcile-unrealized-casts  | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm="enable-x86vector" -convert-func-to-llvm -reconcile-unrealized-casts  | \
 // RUN: mlir-translate  --mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="avx512bw,avx512vp2intersect" --dlopen=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir
index 8efa01c7144f05..9a4a2a87d3dbad 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-broadcast.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-broadcast.mlir
index a33aef5a32a407..0472329f7ee1f3 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-broadcast.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-broadcast.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts  | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
index e21334f493d230..a92619ca219cdb 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-constant-mask.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-constant-mask.mlir
index ec08dc59ac25f0..33bc9925dc292e 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-constant-mask.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-constant-mask.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-contraction.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-contraction.mlir
index ad35ff65b11574..579dc86cad55b8 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-contraction.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-contraction.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask-v4i1.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask-v4i1.mlir
index cc8c57427dc3fe..29a95a46ef0ba7 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask-v4i1.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask-v4i1.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts| \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir
index 58a3b81530193d..51dce0f967e963 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
index ba61f83f760f1d..a22cfc75fab384 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-extract-strided-slice.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-extract-strided-slice.mlir
index 47c3211b8c4870..472e369deafa8b 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-extract-strided-slice.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-extract-strided-slice.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-col.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-col.mlir
index 779ef4ed4e6fe9..ccc165f042be6e 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-col.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-col.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -O0 -enable-matrix -matrix-allow-contract -matrix-default-layout=column-major \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-row.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-row.mlir
index 724949d38ee3e0..f3bf666aa7368b 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-row.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-row.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -O0 -enable-matrix -matrix-allow-contract -matrix-default-layout=row-major \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-fma.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-fma.mlir
index 10d18dec515a7a..afd08612db7886 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-fma.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-fma.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
index 30700858d2f4e4..9524f97672493f 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-index-vectors.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-index-vectors.mlir
index cf19af8f30a055..ee01af8c00d762 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-index-vectors.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-index-vectors.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-insert-strided-slice.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-insert-strided-slice.mlir
index 91cf95a6ec3761..882aa8a92e500a 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-insert-strided-slice.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-insert-strided-slice.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
index c668aa62c823a8..1a07f131cbdabf 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
index 4dcafc94164f46..c100787efaf5fc 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-col.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-col.mlir
index 5c048518cb9524..26f2e9e9d33172 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-col.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-col.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -O0 -enable-matrix -matrix-allow-contract -matrix-default-layout=column-major \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-row.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-row.mlir
index 7b5cfbb1f3b2c8..5001258e86997a 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-row.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-row.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -O0 -enable-matrix -matrix-allow-contract -matrix-default-layout=row-major \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-f32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-f32.mlir
index 90088ba8ffbf31..1df8d0a52831f3 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-f32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-f32.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-i64.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-i64.mlir
index 8253f037a34d56..4ad531885b6150 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-i64.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-i64.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-print-fp.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-print-fp.mlir
index ae2a31fc936676..eeee363d246f35 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-print-fp.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-print-fp.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-print-int.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-print-int.mlir
index 977c676e86cb1c..8d89873357aab5 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-print-int.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-print-int.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-realloc.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-realloc.mlir
index 726e9a300b06cf..a128415f031e53 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-realloc.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-realloc.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts |\
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts |\
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm='use-aligned-alloc=1' -convert-func-to-llvm -arith-expand -reconcile-unrealized-casts |\
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm='use-aligned-alloc=1' -convert-func-to-llvm -arith-expand -reconcile-unrealized-casts |\
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils | FileCheck %s
 

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32-reassoc.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32-reassoc.mlir
index d71a6a3de64923..a16fe1cd307bb5 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32-reassoc.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32-reassoc.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf \
+// RUN: mlir-opt %s -convert-scf-to-cf \
 // RUN:             -convert-vector-to-llvm='reassociate-fp-reductions' \
 // RUN:             -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32.mlir
index 91b0c477822667..2e18f30bd1f9ae 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64-reassoc.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64-reassoc.mlir
index 49a92ff41d7fa2..1735e331cc9af2 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64-reassoc.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64-reassoc.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf \
+// RUN: mlir-opt %s -convert-scf-to-cf \
 // RUN:             -convert-vector-to-llvm='reassociate-fp-reductions' \
 // RUN:             -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64.mlir
index 1b063b7a6b460f..8b85a8fefea4fa 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i32.mlir
index 29df68e9c54a40..8fe41849c38594 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i32.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i4.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i4.mlir
index dd82fa304a0bf4..2466131aa139f6 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i4.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i4.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i64.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i64.mlir
index d87c15b84d5c36..54869f3a878bd2 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i64.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i64.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-si4.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-si4.mlir
index d62e23a782577f..d9cccaf7daa5a0 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-si4.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-si4.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-ui4.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-ui4.mlir
index f1bac61ce7627e..21ed2e469ef33c 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-ui4.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-ui4.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir
index 6a0357202e0ad3..1877a39399ef9f 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-vector-scan-lowering -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts  | \
+// RUN: mlir-opt %s -test-vector-scan-lowering -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
index dba453f2f09a7d..70e17c8f96230b 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-shape-cast.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-shape-cast.mlir
index b01db12767afd9..f2eb3d9b7d4c48 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-shape-cast.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-shape-cast.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts  | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle.mlir
index 1aac72142a1037..86b62f4e88ccbb 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle16x16.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle16x16.mlir
index 5c21a69dd19918..3b3a56aaf1087d 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle16x16.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle16x16.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf \
+// RUN: mlir-opt %s -convert-scf-to-cf \
 // RUN:   -test-transform-dialect-interpreter \
 // RUN:   -test-transform-dialect-erase-schedule \
 // RUN:   -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
index c3677b12cefd98..a9b29526769b49 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
index ad8cedf9eb7e0d..75550dbe4372e5 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transpose.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transpose.mlir
index 11327ee2c99882..16783650f83217 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transpose.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transpose.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_c_runner_utils | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir
index 8571c5ca5f3dc2..cec692b8378477 100644
--- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir
+++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" -canonicalize |\
 // RUN: mlir-opt -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if |\
-// RUN: mlir-opt -lower-affine -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm \
+// RUN: mlir-opt  -lower-affine -convert-scf-to-cf -convert-vector-to-llvm \
 // RUN:  -convert-arith-to-llvm -gpu-kernel-outlining |\
 // RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\
 // RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\

diff  --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir
index c671c1843862f9..157a1ac4277f21 100644
--- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir
+++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir
@@ -1,7 +1,7 @@
 // Run the test cases without distributing ops to test default lowering. Run
 // everything on the same thread.
 // RUN: mlir-opt %s -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \
-// RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
+// RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
 // RUN:  -gpu-kernel-outlining |\
 // RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\
 // RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\
@@ -14,7 +14,7 @@
 // Run the same test cases with distribution and propagation.
 // RUN: mlir-opt %s  -test-vector-warp-distribute="hoist-uniform distribute-transfer-write" \
 // RUN:   -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \
-// RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
+// RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
 // RUN:  -gpu-kernel-outlining |\
 // RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\
 // RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\
@@ -26,7 +26,7 @@
 
 // RUN: mlir-opt %s  -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" \
 // RUN:   -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \
-// RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
+// RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
 // RUN:  -gpu-kernel-outlining |\
 // RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\
 // RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\

diff  --git a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir
index 02219675f528a3..70b5d84fba8131 100644
--- a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir
+++ b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir
@@ -1,6 +1,5 @@
 // RUN: mlir-opt %s \
 // RUN: | mlir-opt -gpu-kernel-outlining \
-// RUN: | mlir-opt -convert-vector-to-scf -convert-scf-to-cf \
 // RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
 // RUN: | mlir-opt -gpu-to-llvm -reconcile-unrealized-casts \
 // RUN: | mlir-cpu-runner \

diff  --git a/mlir/test/mlir-cpu-runner/X86Vector/math-polynomial-approx-avx2.mlir b/mlir/test/mlir-cpu-runner/X86Vector/math-polynomial-approx-avx2.mlir
index bf236dc7659f05..f6bce759e79387 100644
--- a/mlir/test/mlir-cpu-runner/X86Vector/math-polynomial-approx-avx2.mlir
+++ b/mlir/test/mlir-cpu-runner/X86Vector/math-polynomial-approx-avx2.mlir
@@ -1,10 +1,8 @@
 // RUN:   mlir-opt %s -test-math-polynomial-approximation="enable-avx2"        \
-// RUN:               -convert-vector-to-scf                                   \
-// RUN:               -convert-scf-to-cf                                       \
 // RUN:               -convert-arith-to-llvm                                   \
 // RUN:               -convert-vector-to-llvm="enable-x86vector"               \
 // RUN:               -convert-math-to-llvm                                    \
-// RUN:               -convert-func-to-llvm                                    \
+// RUN:               -convert-func-to-llvm                                     \
 // RUN:               -reconcile-unrealized-casts                              \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:     -e main -entry-point-result=void -O0                               \

diff  --git a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
index d3b19be9ecaf8f..058ebb28dff270 100644
--- a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
+++ b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
@@ -1,4 +1,4 @@
-// RUN:   mlir-opt %s -pass-pipeline="builtin.module(func.func(test-math-polynomial-approximation,convert-arith-to-llvm),convert-vector-to-scf,convert-scf-to-cf,convert-cf-to-llvm,convert-vector-to-llvm,func.func(convert-math-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \
+// RUN:   mlir-opt %s -pass-pipeline="builtin.module(func.func(test-math-polynomial-approximation,convert-arith-to-llvm),convert-vector-to-llvm,func.func(convert-math-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:     -e main -entry-point-result=void -O0                               \
 // RUN:     -shared-libs=%mlir_c_runner_utils  \

diff  --git a/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir b/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir
index d6943e5fc2831d..30f30def56fdd5 100644
--- a/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir
+++ b/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir
@@ -1,4 +1,4 @@
-// RUN:   mlir-opt %s -pass-pipeline="builtin.module(func.func(test-expand-math,convert-arith-to-llvm),convert-vector-to-scf,convert-scf-to-cf,convert-cf-to-llvm,convert-vector-to-llvm,func.func(convert-math-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \
+// RUN:   mlir-opt %s -pass-pipeline="builtin.module(func.func(test-expand-math,convert-arith-to-llvm),convert-vector-to-llvm,func.func(convert-math-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:     -e main -entry-point-result=void -O0                               \
 // RUN:     -shared-libs=%mlir_c_runner_utils  \

diff  --git a/mlir/test/python/dialects/vector.py b/mlir/test/python/dialects/vector.py
index 36896cd4dc98d8..2347abb62b410c 100644
--- a/mlir/test/python/dialects/vector.py
+++ b/mlir/test/python/dialects/vector.py
@@ -21,7 +21,7 @@ def testPrintOp():
 
         @func.FuncOp.from_py_func(VectorType.get((12, 5), F32Type.get()))
         def print_vector(arg):
-            return vector.PrintOp(source=arg)
+            return vector.PrintOp(arg)
 
     # CHECK-LABEL: func @print_vector(
     # CHECK-SAME:                     %[[ARG:.*]]: vector<12x5xf32>) {