[flang-commits] [flang] ace0160 - [mlir] Split out a new ControlFlow dialect from Standard

Sun Feb 6 14:52:01 PST 2022

Author: River Riddle
Date: 2022-02-06T14:51:16-08:00
New Revision: ace01605e04d094c243b0cad873e8919b80a0ced

URL: https://github.com/llvm/llvm-project/commit/ace01605e04d094c243b0cad873e8919b80a0ced
DIFF: https://github.com/llvm/llvm-project/commit/ace01605e04d094c243b0cad873e8919b80a0ced.diff

LOG: [mlir] Split out a new ControlFlow dialect from Standard

This dialect is intended to model lower level/branch based control-flow constructs. The initial set
of operations are: AssertOp, BranchOp, CondBranchOp, SwitchOp; all split out from the current
standard dialect.

See https://discourse.llvm.org/t/standard-dialect-the-final-chapter/6061

Differential Revision: https://reviews.llvm.org/D118966

Added: 
    mlir/include/mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h
    mlir/include/mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.h
    mlir/include/mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h
    mlir/include/mlir/Dialect/ControlFlow/CMakeLists.txt
    mlir/include/mlir/Dialect/ControlFlow/IR/CMakeLists.txt
    mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlow.h
    mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlowOps.h
    mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlowOps.td
    mlir/lib/Conversion/ControlFlowToLLVM/CMakeLists.txt
    mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp
    mlir/lib/Conversion/ControlFlowToSPIRV/CMakeLists.txt
    mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.cpp
    mlir/lib/Conversion/SCFToControlFlow/CMakeLists.txt
    mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp
    mlir/lib/Dialect/ControlFlow/CMakeLists.txt
    mlir/lib/Dialect/ControlFlow/IR/CMakeLists.txt
    mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp
    mlir/test/Conversion/ControlFlowToSPIRV/cf-ops-to-spirv.mlir
    mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir
    mlir/test/Dialect/ControlFlow/canonicalize.mlir
    mlir/test/Dialect/ControlFlow/invalid.mlir
    mlir/test/Dialect/ControlFlow/ops.mlir

Modified: 
    flang/include/flang/Optimizer/Support/InitFIR.h
    flang/include/flang/Tools/CLOptions.inc
    flang/lib/Lower/CMakeLists.txt
    flang/lib/Optimizer/CodeGen/CodeGen.cpp
    flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
    flang/lib/Optimizer/Transforms/RewriteLoop.cpp
    flang/test/Fir/Todo/select_case_with_character.fir
    flang/test/Fir/convert-to-llvm.fir
    flang/test/Fir/memref-data-flow.fir
    flang/tools/bbc/CMakeLists.txt
    flang/tools/bbc/bbc.cpp
    flang/tools/fir-opt/CMakeLists.txt
    flang/tools/tco/CMakeLists.txt
    flang/tools/tco/tco.cpp
    mlir/benchmark/python/common.py
    mlir/docs/BufferDeallocationInternals.md
    mlir/docs/Diagnostics.md
    mlir/docs/DialectConversion.md
    mlir/docs/Dialects/emitc.md
    mlir/docs/LangRef.md
    mlir/docs/PatternRewriter.md
    mlir/docs/Rationale/Rationale.md
    mlir/docs/Tutorials/Toy/Ch-6.md
    mlir/docs/includes/img/branch_example_post_move.svg
    mlir/docs/includes/img/branch_example_pre_move.svg
    mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
    mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
    mlir/include/mlir/Conversion/Passes.h
    mlir/include/mlir/Conversion/Passes.td
    mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
    mlir/include/mlir/Dialect/CMakeLists.txt
    mlir/include/mlir/Dialect/SCF/SCFOps.td
    mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
    mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
    mlir/include/mlir/InitAllDialects.h
    mlir/lib/Conversion/CMakeLists.txt
    mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
    mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
    mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
    mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
    mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
    mlir/lib/Conversion/PassDetail.h
    mlir/lib/Conversion/ShapeToStandard/ConvertShapeConstraints.cpp
    mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
    mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
    mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
    mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp
    mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRVPass.cpp
    mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp
    mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp
    mlir/lib/Dialect/Async/Transforms/CMakeLists.txt
    mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
    mlir/lib/Dialect/CMakeLists.txt
    mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
    mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
    mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
    mlir/lib/Dialect/SCF/SCF.cpp
    mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
    mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
    mlir/lib/Dialect/StandardOps/CMakeLists.txt
    mlir/lib/Dialect/StandardOps/IR/Ops.cpp
    mlir/lib/Target/Cpp/TranslateRegistration.cpp
    mlir/lib/Target/Cpp/TranslateToCpp.cpp
    mlir/test/Analysis/test-alias-analysis.mlir
    mlir/test/Analysis/test-dominance.mlir
    mlir/test/Analysis/test-liveness.mlir
    mlir/test/CAPI/ir.c
    mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
    mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
    mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
    mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir
    mlir/test/Conversion/StandardToLLVM/convert-funcs.mlir
    mlir/test/Conversion/StandardToLLVM/func-memref.mlir
    mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
    mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
    mlir/test/Dialect/Affine/invalid.mlir
    mlir/test/Dialect/Async/async-runtime-ref-counting.mlir
    mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir
    mlir/test/Dialect/Async/async-to-async-runtime.mlir
    mlir/test/Dialect/Bufferization/Transforms/buffer-deallocation.mlir
    mlir/test/Dialect/GPU/all-reduce-max.mlir
    mlir/test/Dialect/GPU/all-reduce.mlir
    mlir/test/Dialect/GPU/outlining.mlir
    mlir/test/Dialect/Linalg/canonicalize.mlir
    mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir
    mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
    mlir/test/Dialect/Linalg/detensorize_if.mlir
    mlir/test/Dialect/Linalg/detensorize_while.mlir
    mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
    mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
    mlir/test/Dialect/OpenMP/ops.mlir
    mlir/test/Dialect/SCF/canonicalize.mlir
    mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir
    mlir/test/Dialect/SCF/ops.mlir
    mlir/test/Dialect/Standard/canonicalize.mlir
    mlir/test/Dialect/Standard/func-bufferize.mlir
    mlir/test/IR/invalid.mlir
    mlir/test/IR/parser.mlir
    mlir/test/IR/region.mlir
    mlir/test/IR/traits.mlir
    mlir/test/IR/visitors.mlir
    mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
    mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
    mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
    mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
    mlir/test/Integration/Dialect/Memref/memref_abi.c
    mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
    mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
    mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir
    mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir
    mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-ext.mlir
    mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir
    mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir
    mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero-block.mlir
    mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero.mlir
    mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-dot.mlir
    mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir
    mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-mask-compress.mlir
    mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-sparse-dot-product.mlir
    mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-vp2intersect-i32.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-broadcast.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-constant-mask.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-contraction.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-create-mask-v4i1.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-extract-strided-slice.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-col.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-row.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-fma.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-insert-strided-slice.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-col.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-row.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-f32.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-i64.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-print-int.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32-reassoc.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64-reassoc.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i32.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i4.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i64.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-si4.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-reductions-ui4.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-shape-cast.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-shuffle.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transpose.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
    mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir
    mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir
    mlir/test/Integration/GPU/CUDA/shuffle.mlir
    mlir/test/Integration/GPU/ROCM/vecadd.mlir
    mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
    mlir/test/Target/Cpp/control_flow.mlir
    mlir/test/Target/Cpp/invalid.mlir
    mlir/test/Transforms/buffer-hoisting.mlir
    mlir/test/Transforms/buffer-loop-hoisting.mlir
    mlir/test/Transforms/canonicalize-block-merge.mlir
    mlir/test/Transforms/canonicalize-dce.mlir
    mlir/test/Transforms/canonicalize.mlir
    mlir/test/Transforms/control-flow-sink.mlir
    mlir/test/Transforms/cse.mlir
    mlir/test/Transforms/inlining.mlir
    mlir/test/Transforms/normalize-memrefs.mlir
    mlir/test/Transforms/promote-buffers-to-stack.mlir
    mlir/test/Transforms/sccp-callgraph.mlir
    mlir/test/Transforms/sccp.mlir
    mlir/test/Transforms/test-legalizer-full.mlir
    mlir/test/mlir-cpu-runner/async-error.mlir
    mlir/test/mlir-cpu-runner/async.mlir
    mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
    mlir/test/mlir-cpu-runner/copy.mlir
    mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir
    mlir/test/mlir-cpu-runner/memref-reshape.mlir
    mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
    mlir/test/mlir-cpu-runner/unranked-memref.mlir
    mlir/test/mlir-cpu-runner/utils.mlir
    mlir/test/mlir-lsp-server/hover.test
    mlir/test/mlir-opt/async.mlir
    mlir/test/mlir-opt/commandline.mlir
    mlir/test/mlir-reduce/multiple-function.mlir
    mlir/test/mlir-reduce/simple-test.mlir
    mlir/test/python/execution_engine.py
    mlir/test/python/integration/dialects/linalg/opsrun.py
    mlir/test/python/ir/blocks.py
    mlir/test/python/ir/dialects.py

Removed: 
    mlir/include/mlir/Conversion/SCFToStandard/SCFToStandard.h
    mlir/lib/Conversion/SCFToStandard/CMakeLists.txt
    mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp
    mlir/test/Conversion/SCFToStandard/convert-to-cfg.mlir
    mlir/test/Dialect/Standard/canonicalize-cf.mlir
    mlir/test/Dialect/Standard/ops.mlir
    mlir/test/Dialect/Standard/parser.mlir


################################################################################
diff  --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h
index 2e8c1685a06f7..30108ec2069b9 100644

--- a/flang/include/flang/Optimizer/Support/InitFIR.h
+++ b/flang/include/flang/Optimizer/Support/InitFIR.h
@@ -27,8 +27,8 @@ namespace fir::support {
 #define FLANG_NONCODEGEN_DIALECT_LIST                                          \
   mlir::AffineDialect, FIROpsDialect, mlir::acc::OpenACCDialect,               \
       mlir::omp::OpenMPDialect, mlir::scf::SCFDialect,                         \
-      mlir::arith::ArithmeticDialect, mlir::StandardOpsDialect,                \
-      mlir::vector::VectorDialect
+      mlir::arith::ArithmeticDialect, mlir::cf::ControlFlowDialect,            \
+      mlir::StandardOpsDialect, mlir::vector::VectorDialect
 
 // The definitive list of dialects used by flang.
 #define FLANG_DIALECT_LIST                                                     \

diff  --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 1be3d59dde490..d47aa504d433b 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -9,7 +9,7 @@
 /// This file defines some shared command-line options that can be used when
 /// debugging the test tools. This file must be included into the tool.
 
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "mlir/Transforms/Passes.h"
@@ -139,7 +139,7 @@ inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm) {
 
   // convert control flow to CFG form
   fir::addCfgConversionPass(pm);
-  pm.addPass(mlir::createLowerToCFGPass());
+  pm.addPass(mlir::createConvertSCFToCFPass());
 
   pm.addPass(mlir::createCanonicalizerPass(config));
 }

diff  --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index b5197fedaadd2..025abdcd8d56a 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -32,7 +32,7 @@ add_flang_library(FortranLower
   FortranSemantics
   MLIRAffineToStandard
   MLIRLLVMIR
-  MLIRSCFToStandard
+  MLIRSCFToControlFlow
   MLIRStandard
 
   LINK_COMPONENTS

diff  --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index dc766ab2fde54..d27add522a89b 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -18,6 +18,7 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/Support/TypeCode.h"
 #include "mlir/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/IR/BuiltinTypes.h"
@@ -3293,6 +3294,8 @@ class FIRToLLVMLowering : public fir::FIRToLLVMLoweringBase<FIRToLLVMLowering> {
     mlir::populateStdToLLVMConversionPatterns(typeConverter, pattern);
     mlir::arith::populateArithmeticToLLVMConversionPatterns(typeConverter,
                                                             pattern);
+    mlir::cf::populateControlFlowToLLVMConversionPatterns(typeConverter,
+                                                          pattern);
     mlir::ConversionTarget target{*context};
     target.addLegalDialect<mlir::LLVM::LLVMDialect>();
 

diff  --git a/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp b/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
index c2a607a43ae20..d433069cc7c77 100644
--- a/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
+++ b/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
@@ -13,6 +13,7 @@
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "flang/Optimizer/Support/FIRContext.h"
 #include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "llvm/Support/Debug.h"
@@ -332,9 +333,9 @@ ArrayCopyAnalysis::arrayAccesses(ArrayLoadOp load) {
                  << "add modify {" << *owner << "} to array value set\n");
       accesses.push_back(owner);
       appendToQueue(update.getResult(1));
-    } else if (auto br = mlir::dyn_cast<mlir::BranchOp>(owner)) {
+    } else if (auto br = mlir::dyn_cast<mlir::cf::BranchOp>(owner)) {
       branchOp(br.getDest(), br.getDestOperands());
-    } else if (auto br = mlir::dyn_cast<mlir::CondBranchOp>(owner)) {
+    } else if (auto br = mlir::dyn_cast<mlir::cf::CondBranchOp>(owner)) {
       branchOp(br.getTrueDest(), br.getTrueOperands());
       branchOp(br.getFalseDest(), br.getFalseOperands());
     } else if (mlir::isa<ArrayMergeStoreOp>(owner)) {
@@ -789,9 +790,9 @@ class ArrayValueCopyConverter
     patterns1.insert<ArrayUpdateConversion>(context, analysis, useMap);
     patterns1.insert<ArrayModifyConversion>(context, analysis, useMap);
     mlir::ConversionTarget target(*context);
-    target.addLegalDialect<FIROpsDialect, mlir::scf::SCFDialect,
-                           mlir::arith::ArithmeticDialect,
-                           mlir::StandardOpsDialect>();
+    target.addLegalDialect<
+        FIROpsDialect, mlir::scf::SCFDialect, mlir::arith::ArithmeticDialect,
+        mlir::cf::ControlFlowDialect, mlir::StandardOpsDialect>();
     target.addIllegalOp<ArrayFetchOp, ArrayUpdateOp, ArrayModifyOp>();
     // Rewrite the array fetch and array update ops.
     if (mlir::failed(

diff  --git a/flang/lib/Optimizer/Transforms/RewriteLoop.cpp b/flang/lib/Optimizer/Transforms/RewriteLoop.cpp
index 92da77e74e8ba..e8eefb5c3f4f6 100644
--- a/flang/lib/Optimizer/Transforms/RewriteLoop.cpp
+++ b/flang/lib/Optimizer/Transforms/RewriteLoop.cpp
@@ -11,6 +11,7 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/Transforms/Passes.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -84,7 +85,7 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
     loopOperands.append(operands.begin(), operands.end());
     loopOperands.push_back(iters);
 
-    rewriter.create<mlir::BranchOp>(loc, conditionalBlock, loopOperands);
+    rewriter.create<mlir::cf::BranchOp>(loc, conditionalBlock, loopOperands);
 
     // Last loop block
     auto *terminator = lastBlock->getTerminator();
@@ -105,7 +106,7 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
                                    : terminator->operand_begin();
     loopCarried.append(begin, terminator->operand_end());
     loopCarried.push_back(itersMinusOne);
-    rewriter.create<mlir::BranchOp>(loc, conditionalBlock, loopCarried);
+    rewriter.create<mlir::cf::BranchOp>(loc, conditionalBlock, loopCarried);
     rewriter.eraseOp(terminator);
 
     // Conditional block
@@ -114,9 +115,9 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
     auto comparison = rewriter.create<mlir::arith::CmpIOp>(
         loc, arith::CmpIPredicate::sgt, itersLeft, zero);
 
-    rewriter.create<mlir::CondBranchOp>(loc, comparison, firstBlock,
-                                        llvm::ArrayRef<mlir::Value>(), endBlock,
-                                        llvm::ArrayRef<mlir::Value>());
+    rewriter.create<mlir::cf::CondBranchOp>(
+        loc, comparison, firstBlock, llvm::ArrayRef<mlir::Value>(), endBlock,
+        llvm::ArrayRef<mlir::Value>());
 
     // The result of the loop operation is the values of the condition block
     // arguments except the induction variable on the last iteration.
@@ -155,7 +156,7 @@ class CfgIfConv : public mlir::OpRewritePattern<fir::IfOp> {
     } else {
       continueBlock =
           rewriter.createBlock(remainingOpsBlock, ifOp.getResultTypes());
-      rewriter.create<mlir::BranchOp>(loc, remainingOpsBlock);
+      rewriter.create<mlir::cf::BranchOp>(loc, remainingOpsBlock);
     }
 
     // Move blocks from the "then" region to the region containing 'fir.if',
@@ -165,7 +166,8 @@ class CfgIfConv : public mlir::OpRewritePattern<fir::IfOp> {
     auto *ifOpTerminator = ifOpRegion.back().getTerminator();
     auto ifOpTerminatorOperands = ifOpTerminator->getOperands();
     rewriter.setInsertionPointToEnd(&ifOpRegion.back());
-    rewriter.create<mlir::BranchOp>(loc, continueBlock, ifOpTerminatorOperands);
+    rewriter.create<mlir::cf::BranchOp>(loc, continueBlock,
+                                        ifOpTerminatorOperands);
     rewriter.eraseOp(ifOpTerminator);
     rewriter.inlineRegionBefore(ifOpRegion, continueBlock);
 
@@ -179,14 +181,14 @@ class CfgIfConv : public mlir::OpRewritePattern<fir::IfOp> {
       auto *otherwiseTerm = otherwiseRegion.back().getTerminator();
       auto otherwiseTermOperands = otherwiseTerm->getOperands();
       rewriter.setInsertionPointToEnd(&otherwiseRegion.back());
-      rewriter.create<mlir::BranchOp>(loc, continueBlock,
-                                      otherwiseTermOperands);
+      rewriter.create<mlir::cf::BranchOp>(loc, continueBlock,
+                                          otherwiseTermOperands);
       rewriter.eraseOp(otherwiseTerm);
       rewriter.inlineRegionBefore(otherwiseRegion, continueBlock);
     }
 
     rewriter.setInsertionPointToEnd(condBlock);
-    rewriter.create<mlir::CondBranchOp>(
+    rewriter.create<mlir::cf::CondBranchOp>(
         loc, ifOp.condition(), ifOpBlock, llvm::ArrayRef<mlir::Value>(),
         otherwiseBlock, llvm::ArrayRef<mlir::Value>());
     rewriter.replaceOp(ifOp, continueBlock->getArguments());
@@ -241,7 +243,7 @@ class CfgIterWhileConv : public mlir::OpRewritePattern<fir::IterWhileOp> {
     auto begin = whileOp.finalValue() ? std::next(terminator->operand_begin())
                                       : terminator->operand_begin();
     loopCarried.append(begin, terminator->operand_end());
-    rewriter.create<mlir::BranchOp>(loc, conditionBlock, loopCarried);
+    rewriter.create<mlir::cf::BranchOp>(loc, conditionBlock, loopCarried);
     rewriter.eraseOp(terminator);
 
     // Compute loop bounds before branching to the condition.
@@ -256,7 +258,7 @@ class CfgIterWhileConv : public mlir::OpRewritePattern<fir::IterWhileOp> {
     destOperands.push_back(lowerBound);
     auto iterOperands = whileOp.getIterOperands();
     destOperands.append(iterOperands.begin(), iterOperands.end());
-    rewriter.create<mlir::BranchOp>(loc, conditionBlock, destOperands);
+    rewriter.create<mlir::cf::BranchOp>(loc, conditionBlock, destOperands);
 
     // With the body block done, we can fill in the condition block.
     rewriter.setInsertionPointToEnd(conditionBlock);
@@ -278,9 +280,9 @@ class CfgIterWhileConv : public mlir::OpRewritePattern<fir::IterWhileOp> {
     // Remember to AND in the early-exit bool.
     auto comparison =
         rewriter.create<mlir::arith::AndIOp>(loc, iterateVar, cmp2);
-    rewriter.create<mlir::CondBranchOp>(loc, comparison, firstBodyBlock,
-                                        llvm::ArrayRef<mlir::Value>(), endBlock,
-                                        llvm::ArrayRef<mlir::Value>());
+    rewriter.create<mlir::cf::CondBranchOp>(
+        loc, comparison, firstBodyBlock, llvm::ArrayRef<mlir::Value>(),
+        endBlock, llvm::ArrayRef<mlir::Value>());
     // The result of the loop operation is the values of the condition block
     // arguments except the induction variable on the last iteration.
     auto args = whileOp.finalValue()
@@ -300,8 +302,8 @@ class CfgConversion : public CFGConversionBase<CfgConversion> {
     patterns.insert<CfgLoopConv, CfgIfConv, CfgIterWhileConv>(
         context, forceLoopToExecuteOnce);
     mlir::ConversionTarget target(*context);
-    target.addLegalDialect<mlir::AffineDialect, FIROpsDialect,
-                           mlir::StandardOpsDialect>();
+    target.addLegalDialect<mlir::AffineDialect, mlir::cf::ControlFlowDialect,
+                           FIROpsDialect, mlir::StandardOpsDialect>();
 
     // apply the patterns
     target.addIllegalOp<ResultOp, DoLoopOp, IfOp, IterWhileOp>();

diff  --git a/flang/test/Fir/Todo/select_case_with_character.fir b/flang/test/Fir/Todo/select_case_with_character.fir
index 823a65b1c59f2..5b7aa081c1759 100644
--- a/flang/test/Fir/Todo/select_case_with_character.fir
+++ b/flang/test/Fir/Todo/select_case_with_character.fir
@@ -10,10 +10,10 @@ func @select_case_charachter(%arg0: !fir.char<2, 10>, %arg1: !fir.char<2, 10>, %
                                             unit, ^bb3]
 ^bb1:
   %c1_i32 = arith.constant 1 : i32
-  br ^bb3
+  cf.br ^bb3
 ^bb2:
   %c2_i32 = arith.constant 2 : i32
-  br ^bb3
+  cf.br ^bb3
 ^bb3:
   return
 }

diff  --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir
index e5256629062fb..fc384879b7fec 100644
--- a/flang/test/Fir/convert-to-llvm.fir
+++ b/flang/test/Fir/convert-to-llvm.fir
@@ -1175,23 +1175,23 @@ func @select_case_integer(%arg0: !fir.ref<i32>) -> i32 {
 ^bb1:  // pred: ^bb0
   %c1_i32_0 = arith.constant 1 : i32
   fir.store %c1_i32_0 to %arg0 : !fir.ref<i32>
-  br ^bb6
+  cf.br ^bb6
 ^bb2:  // pred: ^bb0
   %c2_i32_1 = arith.constant 2 : i32
   fir.store %c2_i32_1 to %arg0 : !fir.ref<i32>
-  br ^bb6
+  cf.br ^bb6
 ^bb3:  // pred: ^bb0
   %c0_i32 = arith.constant 0 : i32
   fir.store %c0_i32 to %arg0 : !fir.ref<i32>
-  br ^bb6
+  cf.br ^bb6
 ^bb4:  // pred: ^bb0
   %c4_i32_2 = arith.constant 4 : i32
   fir.store %c4_i32_2 to %arg0 : !fir.ref<i32>
-  br ^bb6
+  cf.br ^bb6
 ^bb5:  // 3 preds: ^bb0, ^bb0, ^bb0
   %c7_i32_3 = arith.constant 7 : i32
   fir.store %c7_i32_3 to %arg0 : !fir.ref<i32>
-  br ^bb6
+  cf.br ^bb6
 ^bb6:  // 5 preds: ^bb1, ^bb2, ^bb3, ^bb4, ^bb5
   %3 = fir.load %arg0 : !fir.ref<i32>
   return %3 : i32
@@ -1275,10 +1275,10 @@ func @select_case_logical(%arg0: !fir.ref<!fir.logical<4>>) {
                             unit, ^bb3]
 ^bb1:
   %c1_i32 = arith.constant 1 : i32
-  br ^bb3
+  cf.br ^bb3
 ^bb2:
   %c2_i32 = arith.constant 2 : i32
-  br ^bb3
+  cf.br ^bb3
 ^bb3:
   return
 }

diff  --git a/flang/test/Fir/memref-data-flow.fir b/flang/test/Fir/memref-data-flow.fir
index 797d2a0ab3d2b..610e8171a4a4c 100644
--- a/flang/test/Fir/memref-data-flow.fir
+++ b/flang/test/Fir/memref-data-flow.fir
@@ -9,10 +9,10 @@ func @load_store_chain_removal(%arg0: !fir.ref<!fir.array<60xi32>>, %arg1: !fir.
   %c1 = arith.constant 1 : index
   %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFf1dcEi"}
   %1 = fir.alloca !fir.array<60xi32> {bindc_name = "t1", uniq_name = "_QFf1dcEt1"}
-  br ^bb1(%c1, %c60 : index, index)
+  cf.br ^bb1(%c1, %c60 : index, index)
 ^bb1(%2: index, %3: index):  // 2 preds: ^bb0, ^bb2
   %4 = arith.cmpi sgt, %3, %c0 : index
-  cond_br %4, ^bb2, ^bb3
+  cf.cond_br %4, ^bb2, ^bb3
 ^bb2:  // pred: ^bb1
   %5 = fir.convert %2 : (index) -> i32
   fir.store %5 to %0 : !fir.ref<i32>
@@ -26,14 +26,14 @@ func @load_store_chain_removal(%arg0: !fir.ref<!fir.array<60xi32>>, %arg1: !fir.
   fir.store %11 to %12 : !fir.ref<i32>
   %13 = arith.addi %2, %c1 : index
   %14 = arith.subi %3, %c1 : index
-  br ^bb1(%13, %14 : index, index)
+  cf.br ^bb1(%13, %14 : index, index)
 ^bb3:  // pred: ^bb1
   %15 = fir.convert %2 : (index) -> i32
   fir.store %15 to %0 : !fir.ref<i32>
-  br ^bb4(%c1, %c60 : index, index)
+  cf.br ^bb4(%c1, %c60 : index, index)
 ^bb4(%16: index, %17: index):  // 2 preds: ^bb3, ^bb5
   %18 = arith.cmpi sgt, %17, %c0 : index
-  cond_br %18, ^bb5, ^bb6
+  cf.cond_br %18, ^bb5, ^bb6
 ^bb5:  // pred: ^bb4
   %19 = fir.convert %16 : (index) -> i32
   fir.store %19 to %0 : !fir.ref<i32>
@@ -49,7 +49,7 @@ func @load_store_chain_removal(%arg0: !fir.ref<!fir.array<60xi32>>, %arg1: !fir.
   fir.store %27 to %28 : !fir.ref<i32>
   %29 = arith.addi %16, %c1 : index
   %30 = arith.subi %17, %c1 : index
-  br ^bb4(%29, %30 : index, index)
+  cf.br ^bb4(%29, %30 : index, index)
 ^bb6:  // pred: ^bb4
   %31 = fir.convert %16 : (index) -> i32
   fir.store %31 to %0 : !fir.ref<i32>

diff  --git a/flang/tools/bbc/CMakeLists.txt b/flang/tools/bbc/CMakeLists.txt
index a2e92cf1beb3b..72e0355e5e9ed 100644
--- a/flang/tools/bbc/CMakeLists.txt
+++ b/flang/tools/bbc/CMakeLists.txt
@@ -13,7 +13,7 @@ FIRTransforms
 FIRBuilder
 ${dialect_libs}
 MLIRAffineToStandard
-MLIRSCFToStandard
+MLIRSCFToControlFlow
 FortranCommon
 FortranParser
 FortranEvaluate

diff  --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 8ce24b5df3473..f8d76321cf124 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -38,7 +38,6 @@
 #include "flang/Semantics/semantics.h"
 #include "flang/Semantics/unparse-with-symbols.h"
 #include "flang/Version.inc"
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
 #include "mlir/IR/AsmState.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/MLIRContext.h"

diff  --git a/flang/tools/fir-opt/CMakeLists.txt b/flang/tools/fir-opt/CMakeLists.txt
index d6bddd0eec8f6..b0d1099c80adb 100644
--- a/flang/tools/fir-opt/CMakeLists.txt
+++ b/flang/tools/fir-opt/CMakeLists.txt
@@ -18,7 +18,7 @@ target_link_libraries(fir-opt PRIVATE
   MLIRTransforms
   MLIRAffineToStandard
   MLIRAnalysis
-  MLIRSCFToStandard
+  MLIRSCFToControlFlow
   MLIRParser
   MLIRStandardToLLVM
   MLIRSupport

diff  --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt
index f986ea1a64cf8..ead3062dcfbbd 100644
--- a/flang/tools/tco/CMakeLists.txt
+++ b/flang/tools/tco/CMakeLists.txt
@@ -17,7 +17,7 @@ target_link_libraries(tco PRIVATE
   MLIRTransforms
   MLIRAffineToStandard
   MLIRAnalysis
-  MLIRSCFToStandard
+  MLIRSCFToControlFlow
   MLIRParser
   MLIRStandardToLLVM
   MLIRSupport

diff  --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp
index d242e70c0641e..2e1eec79306a5 100644
--- a/flang/tools/tco/tco.cpp
+++ b/flang/tools/tco/tco.cpp
@@ -17,7 +17,6 @@
 #include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Support/KindMapping.h"
 #include "flang/Optimizer/Transforms/Passes.h"
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
 #include "mlir/IR/AsmState.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/MLIRContext.h"

diff  --git a/mlir/benchmark/python/common.py b/mlir/benchmark/python/common.py
index 23b667e362851..f2f156c292cfa 100644
--- a/mlir/benchmark/python/common.py
+++ b/mlir/benchmark/python/common.py
@@ -26,7 +26,7 @@ def setup_passes(mlir_module):
         f"sparse-tensor-conversion,"
         f"builtin.func"
         f"(linalg-bufferize,convert-linalg-to-loops,convert-vector-to-scf),"
-        f"convert-scf-to-std,"
+        f"convert-scf-to-cf,"
         f"func-bufferize,"
         f"arith-bufferize,"
         f"builtin.func(tensor-bufferize,finalizing-bufferize),"

diff  --git a/mlir/docs/BufferDeallocationInternals.md b/mlir/docs/BufferDeallocationInternals.md
index 0ed1d70976bda..131e527a54a0c 100644
--- a/mlir/docs/BufferDeallocationInternals.md
+++ b/mlir/docs/BufferDeallocationInternals.md
@@ -41,12 +41,12 @@ Example for breaking the invariant:
 ```mlir
 func @condBranch(%arg0: i1, %arg1: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3()
+  cf.br ^bb3()
 ^bb2:
   partial_write(%0, %0)
-  br ^bb3()
+  cf.br ^bb3()
 ^bb3():
   test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>) -> ()
   return
@@ -74,13 +74,13 @@ untracked allocations are mixed:
 func @mixedAllocation(%arg0: i1) {
    %0 = memref.alloca() : memref<2xf32>  // aliases: %2
    %1 = memref.alloc() : memref<2xf32>  // aliases: %2
-   cond_br %arg0, ^bb1, ^bb2
+   cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
   use(%0)
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb2:
   use(%1)
-  br ^bb3(%1 : memref<2xf32>)
+  cf.br ^bb3(%1 : memref<2xf32>)
 ^bb3(%2: memref<2xf32>):
   ...
 }
@@ -129,13 +129,13 @@ BufferHoisting pass:
 
 ```mlir
 func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>  // aliases: %1
   use(%0)
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):  // %1 could be %0 or %arg1
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
   return
@@ -150,12 +150,12 @@ of code:
 ```mlir
 func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>  // moved to bb0
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
    use(%0)
-   br ^bb3(%0 : memref<2xf32>)
+   cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
   return
@@ -175,14 +175,14 @@ func @condBranchDynamicType(
   %arg1: memref<?xf32>,
   %arg2: memref<?xf32>,
   %arg3: index) {
-  cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
 ^bb1:
-  br ^bb3(%arg1 : memref<?xf32>)
+  cf.br ^bb3(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
   %1 = memref.alloc(%0) : memref<?xf32>   // cannot be moved upwards to the data
                                    // dependency to %0
   use(%1)
-  br ^bb3(%1 : memref<?xf32>)
+  cf.br ^bb3(%1 : memref<?xf32>)
 ^bb3(%2: memref<?xf32>):
   test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
   return
@@ -201,14 +201,14 @@ allocations have already been placed:
 ```mlir
 func @branch(%arg0: i1) {
   %0 = memref.alloc() : memref<2xf32>  // aliases: %2
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
   %1 = memref.alloc() : memref<2xf32>  // resides here for demonstration purposes
                                 // aliases: %2
-  br ^bb3(%1 : memref<2xf32>)
+  cf.br ^bb3(%1 : memref<2xf32>)
 ^bb2:
   use(%0)
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%2: memref<2xf32>):
   …
   return
@@ -233,16 +233,16 @@ result:
 ```mlir
 func @branch(%arg0: i1) {
   %0 = memref.alloc() : memref<2xf32>
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
   %1 = memref.alloc() : memref<2xf32>
   %3 = bufferization.clone %1 : (memref<2xf32>) -> (memref<2xf32>)
   memref.dealloc %1 : memref<2xf32> // %1 can be safely freed here
-  br ^bb3(%3 : memref<2xf32>)
+  cf.br ^bb3(%3 : memref<2xf32>)
 ^bb2:
   use(%0)
   %4 = bufferization.clone %0 : (memref<2xf32>) -> (memref<2xf32>)
-  br ^bb3(%4 : memref<2xf32>)
+  cf.br ^bb3(%4 : memref<2xf32>)
 ^bb3(%2: memref<2xf32>):
   …
   memref.dealloc %2 : memref<2xf32> // free temp buffer %2
@@ -273,23 +273,23 @@ func @condBranchDynamicTypeNested(
   %arg1: memref<?xf32>,  // aliases: %3, %4
   %arg2: memref<?xf32>,
   %arg3: index) {
-  cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
 ^bb1:
-  br ^bb6(%arg1 : memref<?xf32>)
+  cf.br ^bb6(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
   %1 = memref.alloc(%0) : memref<?xf32>   // cannot be moved upwards due to the data
                                    // dependency to %0
                                    // aliases: %2, %3, %4
   use(%1)
-  cond_br %arg0, ^bb3, ^bb4
+  cf.cond_br %arg0, ^bb3, ^bb4
 ^bb3:
-  br ^bb5(%1 : memref<?xf32>)
+  cf.br ^bb5(%1 : memref<?xf32>)
 ^bb4:
-  br ^bb5(%1 : memref<?xf32>)
+  cf.br ^bb5(%1 : memref<?xf32>)
 ^bb5(%2: memref<?xf32>):  // non-crit. alias of %1, since %1 dominates %2
-  br ^bb6(%2 : memref<?xf32>)
+  cf.br ^bb6(%2 : memref<?xf32>)
 ^bb6(%3: memref<?xf32>):  // crit. alias of %arg1 and %2 (in other words %1)
-  br ^bb7(%3 : memref<?xf32>)
+  cf.br ^bb7(%3 : memref<?xf32>)
 ^bb7(%4: memref<?xf32>):  // non-crit. alias of %3, since %3 dominates %4
   test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
   return
@@ -306,25 +306,25 @@ func @condBranchDynamicTypeNested(
   %arg1: memref<?xf32>,
   %arg2: memref<?xf32>,
   %arg3: index) {
-  cond_br %arg0, ^bb1, ^bb2(%arg3 : index)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg3 : index)
 ^bb1:
   // temp buffer required due to alias %3
   %5 = bufferization.clone %arg1 : (memref<?xf32>) -> (memref<?xf32>)
-  br ^bb6(%5 : memref<?xf32>)
+  cf.br ^bb6(%5 : memref<?xf32>)
 ^bb2(%0: index):
   %1 = memref.alloc(%0) : memref<?xf32>
   use(%1)
-  cond_br %arg0, ^bb3, ^bb4
+  cf.cond_br %arg0, ^bb3, ^bb4
 ^bb3:
-  br ^bb5(%1 : memref<?xf32>)
+  cf.br ^bb5(%1 : memref<?xf32>)
 ^bb4:
-  br ^bb5(%1 : memref<?xf32>)
+  cf.br ^bb5(%1 : memref<?xf32>)
 ^bb5(%2: memref<?xf32>):
   %6 = bufferization.clone %1 : (memref<?xf32>) -> (memref<?xf32>)
   memref.dealloc %1 : memref<?xf32>
-  br ^bb6(%6 : memref<?xf32>)
+  cf.br ^bb6(%6 : memref<?xf32>)
 ^bb6(%3: memref<?xf32>):
-  br ^bb7(%3 : memref<?xf32>)
+  cf.br ^bb7(%3 : memref<?xf32>)
 ^bb7(%4: memref<?xf32>):
   test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
   memref.dealloc %3 : memref<?xf32>  // free %3, since %4 is a non-crit. alias of %3

diff  --git a/mlir/docs/Diagnostics.md b/mlir/docs/Diagnostics.md
index 82beaaff530d0..221f11a8ece62 100644
--- a/mlir/docs/Diagnostics.md
+++ b/mlir/docs/Diagnostics.md
@@ -295,7 +295,7 @@ A few examples are shown below:
 ```mlir
 // Expect an error on the same line.
 func @bad_branch() {
-  br ^missing  // expected-error {{reference to an undefined block}}
+  cf.br ^missing  // expected-error {{reference to an undefined block}}
 }
 
 // Expect an error on an adjacent line.

diff  --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md
index 39cee4b822b5f..4731ef5efbc5a 100644
--- a/mlir/docs/DialectConversion.md
+++ b/mlir/docs/DialectConversion.md
@@ -114,8 +114,8 @@ struct MyTarget : public ConversionTarget {
     /// All operations within the GPU dialect are illegal.
     addIllegalDialect<GPUDialect>();
 
-    /// Mark `std.br` and `std.cond_br` as illegal.
-    addIllegalOp<BranchOp, CondBranchOp>();
+    /// Mark `cf.br` and `cf.cond_br` as illegal.
+    addIllegalOp<cf::BranchOp, cf::CondBranchOp>();
   }
 
   /// Implement the default legalization handler to handle operations marked as

diff  --git a/mlir/docs/Dialects/emitc.md b/mlir/docs/Dialects/emitc.md
index 5e53d3d259a9d..e90fd32d3328a 100644
--- a/mlir/docs/Dialects/emitc.md
+++ b/mlir/docs/Dialects/emitc.md
@@ -23,10 +23,11 @@ argument `-declare-variables-at-top`.
 Besides operations part of the EmitC dialect, the Cpp targets supports
 translating the following operations:
 
+*   'cf' Dialect
+    *   `cf.br`
+    *   `cf.cond_br`
 *   'std' Dialect
-    *   `std.br`
     *   `std.call`
-    *   `std.cond_br`
     *   `std.constant`
     *   `std.return`
 *   'scf' Dialect

diff  --git a/mlir/docs/LangRef.md b/mlir/docs/LangRef.md
index 4b956c581cb55..92a5413a656d8 100644
--- a/mlir/docs/LangRef.md
+++ b/mlir/docs/LangRef.md
@@ -391,21 +391,21 @@ arguments:
 ```mlir
 func @simple(i64, i1) -> i64 {
 ^bb0(%a: i64, %cond: i1): // Code dominated by ^bb0 may refer to %a
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
-  br ^bb3(%a: i64)    // Branch passes %a as the argument
+  cf.br ^bb3(%a: i64)    // Branch passes %a as the argument
 
 ^bb2:
   %b = arith.addi %a, %a : i64
-  br ^bb3(%b: i64)    // Branch passes %b as the argument
+  cf.br ^bb3(%b: i64)    // Branch passes %b as the argument
 
 // ^bb3 receives an argument, named %c, from predecessors
 // and passes it on to bb4 along with %a. %a is referenced
 // directly from its defining operation and is not passed through
 // an argument of ^bb3.
 ^bb3(%c: i64):
-  br ^bb4(%c, %a : i64, i64)
+  cf.br ^bb4(%c, %a : i64, i64)
 
 ^bb4(%d : i64, %e : i64):
   %0 = arith.addi %d, %e : i64
@@ -525,12 +525,12 @@ Example:
 ```mlir
 func @accelerator_compute(i64, i1) -> i64 { // An SSACFG region
 ^bb0(%a: i64, %cond: i1): // Code dominated by ^bb0 may refer to %a
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
   // This def for %value does not dominate ^bb2
   %value = "op.convert"(%a) : (i64) -> i64
-  br ^bb3(%a: i64)    // Branch passes %a as the argument
+  cf.br ^bb3(%a: i64)    // Branch passes %a as the argument
 
 ^bb2:
   accelerator.launch() { // An SSACFG region

diff  --git a/mlir/docs/PatternRewriter.md b/mlir/docs/PatternRewriter.md
index a012237b742c7..7b0db967c35b7 100644
--- a/mlir/docs/PatternRewriter.md
+++ b/mlir/docs/PatternRewriter.md
@@ -356,24 +356,24 @@ Example output is shown below:
 
 ```
 //===-------------------------------------------===//
-Processing operation : 'std.cond_br'(0x60f000001120) {
-  "std.cond_br"(%arg0)[^bb2, ^bb2] {operand_segment_sizes = dense<[1, 0, 0]> : vector<3xi32>} : (i1) -> ()
+Processing operation : 'cf.cond_br'(0x60f000001120) {
+  "cf.cond_br"(%arg0)[^bb2, ^bb2] {operand_segment_sizes = dense<[1, 0, 0]> : vector<3xi32>} : (i1) -> ()
 
-  * Pattern SimplifyConstCondBranchPred : 'std.cond_br -> ()' {
+  * Pattern SimplifyConstCondBranchPred : 'cf.cond_br -> ()' {
   } -> failure : pattern failed to match
 
-  * Pattern SimplifyCondBranchIdenticalSuccessors : 'std.cond_br -> ()' {
-    ** Insert  : 'std.br'(0x60b000003690)
-    ** Replace : 'std.cond_br'(0x60f000001120)
+  * Pattern SimplifyCondBranchIdenticalSuccessors : 'cf.cond_br -> ()' {
+    ** Insert  : 'cf.br'(0x60b000003690)
+    ** Replace : 'cf.cond_br'(0x60f000001120)
   } -> success : pattern applied successfully
 } -> success : pattern matched
 //===-------------------------------------------===//
 ```
 
-This output is describing the processing of a `std.cond_br` operation. We first
+This output is describing the processing of a `cf.cond_br` operation. We first
 try to apply the `SimplifyConstCondBranchPred`, which fails. From there, another
 pattern (`SimplifyCondBranchIdenticalSuccessors`) is applied that matches the
-`std.cond_br` and replaces it with a `std.br`.
+`cf.cond_br` and replaces it with a `cf.br`.
 
 ## Debugging
 

diff  --git a/mlir/docs/Rationale/Rationale.md b/mlir/docs/Rationale/Rationale.md
index 0f19c2a812ce0..90c9199692925 100644
--- a/mlir/docs/Rationale/Rationale.md
+++ b/mlir/docs/Rationale/Rationale.md
@@ -560,24 +560,24 @@ func @search(%A: memref<?x?xi32>, %S: <?xi32>, %key : i32) {
 
 func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32, %i : i32) {
   %nj = memref.dim %A, 1 : memref<?x?xi32>
-  br ^bb1(0)
+  cf.br ^bb1(0)
 
 ^bb1(%j: i32)
   %p1 = arith.cmpi "lt", %j, %nj : i32
-  cond_br %p1, ^bb2, ^bb5
+  cf.cond_br %p1, ^bb2, ^bb5
 
 ^bb2:
   %v = affine.load %A[%i, %j] : memref<?x?xi32>
   %p2 = arith.cmpi "eq", %v, %key : i32
-  cond_br %p2, ^bb3(%j), ^bb4
+  cf.cond_br %p2, ^bb3(%j), ^bb4
 
 ^bb3(%j: i32)
   affine.store %j, %S[%i] : memref<?xi32>
-  br ^bb5
+  cf.br ^bb5
 
 ^bb4:
   %jinc = arith.addi %j, 1 : i32
-  br ^bb1(%jinc)
+  cf.br ^bb1(%jinc)
 
 ^bb5:
   return

diff  --git a/mlir/docs/Tutorials/Toy/Ch-6.md b/mlir/docs/Tutorials/Toy/Ch-6.md
index 9a31c8d249fa7..06c0681f2294f 100644
--- a/mlir/docs/Tutorials/Toy/Ch-6.md
+++ b/mlir/docs/Tutorials/Toy/Ch-6.md
@@ -94,10 +94,11 @@ multiple stages by relying on
 ```c++
   mlir::RewritePatternSet patterns(&getContext());
   mlir::populateAffineToStdConversionPatterns(patterns, &getContext());
-  mlir::populateLoopToStdConversionPatterns(patterns, &getContext());
+  mlir::cf::populateSCFToControlFlowConversionPatterns(patterns, &getContext());
   mlir::arith::populateArithmeticToLLVMConversionPatterns(typeConverter,
                                                           patterns);
   mlir::populateStdToLLVMConversionPatterns(typeConverter, patterns);
+  mlir::cf::populateControlFlowToLLVMConversionPatterns(patterns, &getContext());
 
   // The only remaining operation, to lower from the `toy` dialect, is the
   // PrintOp.
@@ -207,7 +208,7 @@ define void @main() {
   %109 = memref.load double, double* %108
   %110 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @frmt_spec, i64 0, i64 0), double %109)
   %111 = add i64 %100, 1
-  br label %99
+  cf.br label %99
 
   ...
 

diff  --git a/mlir/docs/includes/img/branch_example_post_move.svg b/mlir/docs/includes/img/branch_example_post_move.svg
index 29d037a6eb361..870df495a13c6 100644
--- a/mlir/docs/includes/img/branch_example_post_move.svg
+++ b/mlir/docs/includes/img/branch_example_post_move.svg
@@ -361,7 +361,7 @@
 </tspan></tspan><tspan
          x="73.476562"
          y="88.293896"><tspan
-           style="font-size:5.64444px">br bb3(%0)</tspan></tspan></text>
+           style="font-size:5.64444px">cf.br bb3(%0)</tspan></tspan></text>
     <text
        xml:space="preserve"
        id="text1894"

diff  --git a/mlir/docs/includes/img/branch_example_pre_move.svg b/mlir/docs/includes/img/branch_example_pre_move.svg
index 5ce713ba8b0fc..5eb15fd13946e 100644
--- a/mlir/docs/includes/img/branch_example_pre_move.svg
+++ b/mlir/docs/includes/img/branch_example_pre_move.svg
@@ -362,7 +362,7 @@
 </tspan></tspan><tspan
          x="73.476562"
          y="88.293896"><tspan
-           style="font-size:5.64444px">br bb3(%0)</tspan></tspan></text>
+           style="font-size:5.64444px">cf.br bb3(%0)</tspan></tspan></text>
     <text
        xml:space="preserve"
        id="text1894"

diff  --git a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
index 648b9e87e1f8e..855566b3f46fb 100644
--- a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
+++ b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
@@ -26,10 +26,11 @@
 
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
 #include "mlir/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
@@ -200,10 +201,11 @@ void ToyToLLVMLoweringPass::runOnOperation() {
   // set of legal ones.
   RewritePatternSet patterns(&getContext());
   populateAffineToStdConversionPatterns(patterns);
-  populateLoopToStdConversionPatterns(patterns);
+  populateSCFToControlFlowConversionPatterns(patterns);
   mlir::arith::populateArithmeticToLLVMConversionPatterns(typeConverter,
                                                           patterns);
   populateMemRefToLLVMConversionPatterns(typeConverter, patterns);
+  cf::populateControlFlowToLLVMConversionPatterns(typeConverter, patterns);
   populateStdToLLVMConversionPatterns(typeConverter, patterns);
 
   // The only remaining operation to lower from the `toy` dialect, is the

diff  --git a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
index 648b9e87e1f8e..855566b3f46fb 100644
--- a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
+++ b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
@@ -26,10 +26,11 @@
 
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
 #include "mlir/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
@@ -200,10 +201,11 @@ void ToyToLLVMLoweringPass::runOnOperation() {
   // set of legal ones.
   RewritePatternSet patterns(&getContext());
   populateAffineToStdConversionPatterns(patterns);
-  populateLoopToStdConversionPatterns(patterns);
+  populateSCFToControlFlowConversionPatterns(patterns);
   mlir::arith::populateArithmeticToLLVMConversionPatterns(typeConverter,
                                                           patterns);
   populateMemRefToLLVMConversionPatterns(typeConverter, patterns);
+  cf::populateControlFlowToLLVMConversionPatterns(typeConverter, patterns);
   populateStdToLLVMConversionPatterns(typeConverter, patterns);
 
   // The only remaining operation to lower from the `toy` dialect, is the

diff  --git a/mlir/include/mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h b/mlir/include/mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h
new file mode 100644
index 0000000000000..92608c748f2d8
--- /dev/null
+++ b/mlir/include/mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h
@@ -0,0 +1,35 @@
+//===- ControlFlowToLLVM.h - ControlFlow to LLVM -----------*- C++ ------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Define conversions from the ControlFlow dialect to the LLVM IR dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_CONTROLFLOWTOLLVM_CONTROLFLOWTOLLVM_H
+#define MLIR_CONVERSION_CONTROLFLOWTOLLVM_CONTROLFLOWTOLLVM_H
+
+#include <memory>
+
+namespace mlir {
+class LLVMTypeConverter;
+class RewritePatternSet;
+class Pass;
+
+namespace cf {
+/// Collect the patterns to convert from the ControlFlow dialect to LLVM. The
+/// conversion patterns capture the LLVMTypeConverter by reference meaning the
+/// references have to remain alive during the entire pattern lifetime.
+void populateControlFlowToLLVMConversionPatterns(LLVMTypeConverter &converter,
+                                                 RewritePatternSet &patterns);
+
+/// Creates a pass to convert the ControlFlow dialect into the LLVMIR dialect.
+std::unique_ptr<Pass> createConvertControlFlowToLLVMPass();
+} // namespace cf
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_CONTROLFLOWTOLLVM_CONTROLFLOWTOLLVM_H

diff  --git a/mlir/include/mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.h b/mlir/include/mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.h
new file mode 100644
index 0000000000000..43578ffffae2d
--- /dev/null
+++ b/mlir/include/mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.h
@@ -0,0 +1,28 @@
+//===- ControlFlowToSPIRV.h - CF to SPIR-V Patterns --------*- C++ ------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Provides patterns to convert ControlFlow dialect to SPIR-V dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_CONTROLFLOWTOSPIRV_CONTROLFLOWTOSPIRV_H
+#define MLIR_CONVERSION_CONTROLFLOWTOSPIRV_CONTROLFLOWTOSPIRV_H
+
+namespace mlir {
+class RewritePatternSet;
+class SPIRVTypeConverter;
+
+namespace cf {
+/// Appends to a pattern list additional patterns for translating ControlFLow
+/// ops to SPIR-V ops.
+void populateControlFlowToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
+                                        RewritePatternSet &patterns);
+} // namespace cf
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_CONTROLFLOWTOSPIRV_CONTROLFLOWTOSPIRV_H

diff  --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h
index b39105aad3892..8fd3274dcf62e 100644
--- a/mlir/include/mlir/Conversion/Passes.h
+++ b/mlir/include/mlir/Conversion/Passes.h
@@ -17,6 +17,8 @@
 #include "mlir/Conversion/BufferizationToMemRef/BufferizationToMemRef.h"
 #include "mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h"
 #include "mlir/Conversion/ComplexToStandard/ComplexToStandard.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.h"
 #include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
 #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
 #include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
@@ -35,10 +37,10 @@
 #include "mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h"
 #include "mlir/Conversion/PDLToPDLInterp/PDLToPDLInterp.h"
 #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
 #include "mlir/Conversion/SCFToOpenMP/SCFToOpenMP.h"
 #include "mlir/Conversion/SCFToSPIRV/SCFToSPIRVPass.h"
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
 #include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.h"
 #include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"

diff  --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 1133db2b8bb24..adb97abf925da 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -181,6 +181,28 @@ def ConvertComplexToStandard : Pass<"convert-complex-to-standard", "FuncOp"> {
   let dependentDialects = ["math::MathDialect"];
 }
 
+//===----------------------------------------------------------------------===//
+// ControlFlowToLLVM
+//===----------------------------------------------------------------------===//
+
+def ConvertControlFlowToLLVM : Pass<"convert-cf-to-llvm", "ModuleOp"> {
+  let summary = "Convert ControlFlow operations to the LLVM dialect";
+  let description = [{
+    Convert ControlFlow operations into LLVM IR dialect operations.
+
+    If other operations are present and their results are required by the LLVM
+    IR dialect operations, the pass will fail.  Any LLVM IR operations or types
+    already present in the IR will be kept as is.
+  }];
+  let constructor = "mlir::cf::createConvertControlFlowToLLVMPass()";
+  let dependentDialects = ["LLVM::LLVMDialect"];
+  let options = [
+    Option<"indexBitwidth", "index-bitwidth", "unsigned",
+           /*default=kDeriveIndexBitwidthFromDataLayout*/"0",
+           "Bitwidth of the index type, 0 to use size of machine word">,
+  ];
+}
+
 //===----------------------------------------------------------------------===//
 // GPUCommon
 //===----------------------------------------------------------------------===//
@@ -460,6 +482,17 @@ def ReconcileUnrealizedCasts : Pass<"reconcile-unrealized-casts"> {
   let constructor = "mlir::createReconcileUnrealizedCastsPass()";
 }
 
+//===----------------------------------------------------------------------===//
+// SCFToControlFlow
+//===----------------------------------------------------------------------===//
+
+def SCFToControlFlow : Pass<"convert-scf-to-cf"> {
+  let summary = "Convert SCF dialect to ControlFlow dialect, replacing structured"
+                " control flow with a CFG";
+  let constructor = "mlir::createConvertSCFToCFPass()";
+  let dependentDialects = ["cf::ControlFlowDialect"];
+}
+
 //===----------------------------------------------------------------------===//
 // SCFToOpenMP
 //===----------------------------------------------------------------------===//
@@ -488,17 +521,6 @@ def SCFToSPIRV : Pass<"convert-scf-to-spirv", "ModuleOp"> {
   let dependentDialects = ["spirv::SPIRVDialect"];
 }
 
-//===----------------------------------------------------------------------===//
-// SCFToStandard
-//===----------------------------------------------------------------------===//
-
-def SCFToStandard : Pass<"convert-scf-to-std"> {
-  let summary = "Convert SCF dialect to Standard dialect, replacing structured"
-                " control flow with a CFG";
-  let constructor = "mlir::createLowerToCFGPass()";
-  let dependentDialects = ["StandardOpsDialect"];
-}
-
 //===----------------------------------------------------------------------===//
 // SCFToGPU
 //===----------------------------------------------------------------------===//
@@ -547,7 +569,7 @@ def ConvertShapeConstraints: Pass<"convert-shape-constraints", "FuncOp"> {
     computation lowering.
   }];
   let constructor = "mlir::createConvertShapeConstraintsPass()";
-  let dependentDialects = ["StandardOpsDialect", "scf::SCFDialect"];
+  let dependentDialects = ["cf::ControlFlowDialect", "scf::SCFDialect"];
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/include/mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h b/mlir/include/mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h
new file mode 100644
index 0000000000000..d26b0b2711b15
--- /dev/null
+++ b/mlir/include/mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h
@@ -0,0 +1,28 @@
+//===- ConvertSCFToControlFlow.h - Pass entrypoint --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_SCFTOCONTROLFLOW_SCFTOCONTROLFLOW_H_
+#define MLIR_CONVERSION_SCFTOCONTROLFLOW_SCFTOCONTROLFLOW_H_
+
+#include <memory>
+
+namespace mlir {
+class Pass;
+class RewritePatternSet;
+
+/// Collect a set of patterns to convert SCF operations to CFG branch-based
+/// operations within the ControlFlow dialect.
+void populateSCFToControlFlowConversionPatterns(RewritePatternSet &patterns);
+
+/// Creates a pass to convert SCF operations to CFG branch-based operation in
+/// the ControlFlow dialect.
+std::unique_ptr<Pass> createConvertSCFToCFPass();
+
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_SCFTOCONTROLFLOW_SCFTOCONTROLFLOW_H_

diff  --git a/mlir/include/mlir/Conversion/SCFToStandard/SCFToStandard.h b/mlir/include/mlir/Conversion/SCFToStandard/SCFToStandard.h
deleted file mode 100644
index b29fdb72f7ecc..0000000000000
--- a/mlir/include/mlir/Conversion/SCFToStandard/SCFToStandard.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===- ConvertSCFToStandard.h - Pass entrypoint -----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CONVERSION_SCFTOSTANDARD_SCFTOSTANDARD_H_
-#define MLIR_CONVERSION_SCFTOSTANDARD_SCFTOSTANDARD_H_
-
-#include <memory>
-#include <vector>
-
-namespace mlir {
-struct LogicalResult;
-class Pass;
-
-class RewritePatternSet;
-
-/// Collect a set of patterns to lower from scf.for, scf.if, and
-/// loop.terminator to CFG operations within the Standard dialect, in particular
-/// convert structured control flow into CFG branch-based control flow.
-void populateLoopToStdConversionPatterns(RewritePatternSet &patterns);
-
-/// Creates a pass to convert scf.for, scf.if and loop.terminator ops to CFG.
-std::unique_ptr<Pass> createLowerToCFGPass();
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_SCFTOSTANDARD_SCFTOSTANDARD_H_

diff  --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index ad2062a45e776..dbee453bdec89 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -26,9 +26,9 @@ def BufferDeallocation : Pass<"buffer-deallocation", "FuncOp"> {
     #map0 = affine_map<(d0) -> (d0)>
     module {
       func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-        cond_br %arg0, ^bb1, ^bb2
+        cf.cond_br %arg0, ^bb1, ^bb2
       ^bb1:
-        br ^bb3(%arg1 : memref<2xf32>)
+        cf.br ^bb3(%arg1 : memref<2xf32>)
       ^bb2:
         %0 = memref.alloc() : memref<2xf32>
         linalg.generic {
@@ -40,7 +40,7 @@ def BufferDeallocation : Pass<"buffer-deallocation", "FuncOp"> {
           %tmp1 = exp %gen1_arg0 : f32
           linalg.yield %tmp1 : f32
         }: memref<2xf32>, memref<2xf32>
-        br ^bb3(%0 : memref<2xf32>)
+        cf.br ^bb3(%0 : memref<2xf32>)
       ^bb3(%1: memref<2xf32>):
         "memref.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
         return
@@ -55,11 +55,11 @@ def BufferDeallocation : Pass<"buffer-deallocation", "FuncOp"> {
     #map0 = affine_map<(d0) -> (d0)>
     module {
       func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-        cond_br %arg0, ^bb1, ^bb2
+        cf.cond_br %arg0, ^bb1, ^bb2
       ^bb1:  // pred: ^bb0
         %0 = memref.alloc() : memref<2xf32>
         memref.copy(%arg1, %0) : memref<2xf32>, memref<2xf32>
-        br ^bb3(%0 : memref<2xf32>)
+        cf.br ^bb3(%0 : memref<2xf32>)
       ^bb2:  // pred: ^bb0
         %1 = memref.alloc() : memref<2xf32>
         linalg.generic {
@@ -74,7 +74,7 @@ def BufferDeallocation : Pass<"buffer-deallocation", "FuncOp"> {
         %2 = memref.alloc() : memref<2xf32>
         memref.copy(%1, %2) : memref<2xf32>, memref<2xf32>
         dealloc %1 : memref<2xf32>
-        br ^bb3(%2 : memref<2xf32>)
+        cf.br ^bb3(%2 : memref<2xf32>)
       ^bb3(%3: memref<2xf32>):  // 2 preds: ^bb1, ^bb2
         memref.copy(%3, %arg2) : memref<2xf32>, memref<2xf32>
         dealloc %3 : memref<2xf32>

diff  --git a/mlir/include/mlir/Dialect/CMakeLists.txt b/mlir/include/mlir/Dialect/CMakeLists.txt
index 40fc101862e0e..7dd5843288135 100644
--- a/mlir/include/mlir/Dialect/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/CMakeLists.txt
@@ -6,6 +6,7 @@ add_subdirectory(ArmSVE)
 add_subdirectory(AMX)
 add_subdirectory(Bufferization)
 add_subdirectory(Complex)
+add_subdirectory(ControlFlow)
 add_subdirectory(DLTI)
 add_subdirectory(EmitC)
 add_subdirectory(GPU)

diff  --git a/mlir/include/mlir/Dialect/ControlFlow/CMakeLists.txt b/mlir/include/mlir/Dialect/ControlFlow/CMakeLists.txt
new file mode 100644
index 0000000000000..f33061b2d87cf
--- /dev/null
+++ b/mlir/include/mlir/Dialect/ControlFlow/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(IR)

diff  --git a/mlir/include/mlir/Dialect/ControlFlow/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/ControlFlow/IR/CMakeLists.txt
new file mode 100644
index 0000000000000..65e40632f45d0
--- /dev/null
+++ b/mlir/include/mlir/Dialect/ControlFlow/IR/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_mlir_dialect(ControlFlowOps cf ControlFlowOps)
+add_mlir_doc(ControlFlowOps ControlFlowDialect Dialects/ -gen-dialect-doc)

diff  --git a/mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlow.h b/mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlow.h
new file mode 100644
index 0000000000000..cd2269f490216
--- /dev/null
+++ b/mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlow.h
@@ -0,0 +1,21 @@
+//===- ControlFlow.h - ControlFlow Dialect ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ControlFlow dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_CONTROLFLOW_IR_CONTROLFLOW_H
+#define MLIR_DIALECT_CONTROLFLOW_IR_CONTROLFLOW_H
+
+#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/IR/Dialect.h"
+
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOpsDialect.h.inc"
+
+#endif // MLIR_DIALECT_CONTROLFLOW_IR_CONTROLFLOW_H

diff  --git a/mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlowOps.h b/mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlowOps.h
new file mode 100644
index 0000000000000..259be091490e6
--- /dev/null
+++ b/mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlowOps.h
@@ -0,0 +1,30 @@
+//===- ControlFlowOps.h - ControlFlow Operations ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the operations of the ControlFlow dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_CONTROLFLOW_IR_CONTROLFLOWOPS_H
+#define MLIR_DIALECT_CONTROLFLOW_IR_CONTROLFLOWOPS_H
+
+#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/OpImplementation.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+
+namespace mlir {
+class PatternRewriter;
+} // namespace mlir
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h.inc"
+
+#endif // MLIR_DIALECT_CONTROLFLOW_IR_CONTROLFLOWOPS_H

diff  --git a/mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlowOps.td b/mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlowOps.td
new file mode 100644
index 0000000000000..ba0ed638b7df3
--- /dev/null
+++ b/mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlowOps.td
@@ -0,0 +1,313 @@
+//===- ControlFlowOps.td - ControlFlow operations ----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitions for the operations within the ControlFlow
+// dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef STANDARD_OPS
+#define STANDARD_OPS
+
+include "mlir/IR/OpAsmInterface.td"
+include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+
+def ControlFlow_Dialect : Dialect {
+  let name = "cf";
+  let cppNamespace = "::mlir::cf";
+  let dependentDialects = ["arith::ArithmeticDialect"];
+  let emitAccessorPrefix = kEmitAccessorPrefix_Prefixed;
+  let description = [{
+    This dialect contains low-level, i.e. non-region based, control flow
+    constructs. These constructs generally represent control flow directly
+    on SSA blocks of a control flow graph.
+  }];
+}
+
+class CF_Op<string mnemonic, list<Trait> traits = []> :
+    Op<ControlFlow_Dialect, mnemonic, traits>;
+
+//===----------------------------------------------------------------------===//
+// AssertOp
+//===----------------------------------------------------------------------===//
+
+def AssertOp : CF_Op<"assert"> {
+  let summary = "Assert operation with message attribute";
+  let description = [{
+    Assert operation with single boolean operand and an error message attribute.
+    If the argument is `true` this operation has no effect. Otherwise, the
+    program execution will abort. The provided error message may be used by a
+    runtime to propagate the error to the user.
+
+    Example:
+
+    ```mlir
+    assert %b, "Expected ... to be true"
+    ```
+  }];
+
+  let arguments = (ins I1:$arg, StrAttr:$msg);
+
+  let assemblyFormat = "$arg `,` $msg attr-dict";
+  let hasCanonicalizeMethod = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// BranchOp
+//===----------------------------------------------------------------------===//
+
+def BranchOp : CF_Op<"br", [
+    DeclareOpInterfaceMethods<BranchOpInterface, ["getSuccessorForOperands"]>,
+    NoSideEffect, Terminator
+  ]> {
+  let summary = "branch operation";
+  let description = [{
+    The `cf.br` operation represents a direct branch operation to a given
+    block. The operands of this operation are forwarded to the successor block,
+    and the number and type of the operands must match the arguments of the
+    target block.
+
+    Example:
+
+    ```mlir
+    ^bb2:
+      %2 = call @someFn()
+      cf.br ^bb3(%2 : tensor<*xf32>)
+    ^bb3(%3: tensor<*xf32>):
+    ```
+  }];
+
+  let arguments = (ins Variadic<AnyType>:$destOperands);
+  let successors = (successor AnySuccessor:$dest);
+
+  let builders = [
+    OpBuilder<(ins "Block *":$dest,
+                   CArg<"ValueRange", "{}">:$destOperands), [{
+      $_state.addSuccessors(dest);
+      $_state.addOperands(destOperands);
+    }]>];
+
+  let extraClassDeclaration = [{
+    void setDest(Block *block);
+
+    /// Erase the operand at 'index' from the operand list.
+    void eraseOperand(unsigned index);
+  }];
+
+  let hasCanonicalizeMethod = 1;
+  let assemblyFormat = [{
+    $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// CondBranchOp
+//===----------------------------------------------------------------------===//
+
+def CondBranchOp : CF_Op<"cond_br",
+    [AttrSizedOperandSegments,
+     DeclareOpInterfaceMethods<BranchOpInterface, ["getSuccessorForOperands"]>,
+     NoSideEffect, Terminator]> {
+  let summary = "conditional branch operation";
+  let description = [{
+    The `cond_br` terminator operation represents a conditional branch on a
+    boolean (1-bit integer) value. If the bit is set, then the first destination
+    is jumped to; if it is false, the second destination is chosen. The count
+    and types of operands must align with the arguments in the corresponding
+    target blocks.
+
+    The MLIR conditional branch operation is not allowed to target the entry
+    block for a region. The two destinations of the conditional branch operation
+    are allowed to be the same.
+
+    The following example illustrates a function with a conditional branch
+    operation that targets the same block.
+
+    Example:
+
+    ```mlir
+    func @select(%a: i32, %b: i32, %flag: i1) -> i32 {
+      // Both targets are the same, operands 
diff er
+      cond_br %flag, ^bb1(%a : i32), ^bb1(%b : i32)
+
+    ^bb1(%x : i32) :
+      return %x : i32
+    }
+    ```
+  }];
+
+  let arguments = (ins I1:$condition,
+                       Variadic<AnyType>:$trueDestOperands,
+                       Variadic<AnyType>:$falseDestOperands);
+  let successors = (successor AnySuccessor:$trueDest, AnySuccessor:$falseDest);
+
+  let builders = [
+    OpBuilder<(ins "Value":$condition, "Block *":$trueDest,
+      "ValueRange":$trueOperands, "Block *":$falseDest,
+      "ValueRange":$falseOperands), [{
+      build($_builder, $_state, condition, trueOperands, falseOperands, trueDest,
+            falseDest);
+    }]>,
+    OpBuilder<(ins "Value":$condition, "Block *":$trueDest,
+      "Block *":$falseDest, CArg<"ValueRange", "{}">:$falseOperands), [{
+      build($_builder, $_state, condition, trueDest, ValueRange(), falseDest,
+            falseOperands);
+    }]>];
+
+  let extraClassDeclaration = [{
+    // These are the indices into the dests list.
+    enum { trueIndex = 0, falseIndex = 1 };
+
+    // Accessors for operands to the 'true' destination.
+    Value getTrueOperand(unsigned idx) {
+      assert(idx < getNumTrueOperands());
+      return getOperand(getTrueDestOperandIndex() + idx);
+    }
+
+    void setTrueOperand(unsigned idx, Value value) {
+      assert(idx < getNumTrueOperands());
+      setOperand(getTrueDestOperandIndex() + idx, value);
+    }
+
+    unsigned getNumTrueOperands()  { return getTrueOperands().size(); }
+
+    /// Erase the operand at 'index' from the true operand list.
+    void eraseTrueOperand(unsigned index)  {
+      getTrueDestOperandsMutable().erase(index);
+    }
+
+    // Accessors for operands to the 'false' destination.
+    Value getFalseOperand(unsigned idx) {
+      assert(idx < getNumFalseOperands());
+      return getOperand(getFalseDestOperandIndex() + idx);
+    }
+    void setFalseOperand(unsigned idx, Value value) {
+      assert(idx < getNumFalseOperands());
+      setOperand(getFalseDestOperandIndex() + idx, value);
+    }
+
+    operand_range getTrueOperands() { return getTrueDestOperands(); }
+    operand_range getFalseOperands() { return getFalseDestOperands(); }
+
+    unsigned getNumFalseOperands() { return getFalseOperands().size(); }
+
+    /// Erase the operand at 'index' from the false operand list.
+    void eraseFalseOperand(unsigned index) {
+      getFalseDestOperandsMutable().erase(index);
+    }
+
+  private:
+    /// Get the index of the first true destination operand.
+    unsigned getTrueDestOperandIndex() { return 1; }
+
+    /// Get the index of the first false destination operand.
+    unsigned getFalseDestOperandIndex() {
+      return getTrueDestOperandIndex() + getNumTrueOperands();
+    }
+  }];
+
+  let hasCanonicalizer = 1;
+  let assemblyFormat = [{
+    $condition `,`
+    $trueDest (`(` $trueDestOperands^ `:` type($trueDestOperands) `)`)? `,`
+    $falseDest (`(` $falseDestOperands^ `:` type($falseDestOperands) `)`)?
+    attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// SwitchOp
+//===----------------------------------------------------------------------===//
+
+def SwitchOp : CF_Op<"switch",
+    [AttrSizedOperandSegments,
+     DeclareOpInterfaceMethods<BranchOpInterface, ["getSuccessorForOperands"]>,
+     NoSideEffect, Terminator]> {
+  let summary = "switch operation";
+  let description = [{
+    The `switch` terminator operation represents a switch on a signless integer
+    value. If the flag matches one of the specified cases, then the
+    corresponding destination is jumped to. If the flag does not match any of
+    the cases, the default destination is jumped to. The count and types of
+    operands must align with the arguments in the corresponding target blocks.
+
+    Example:
+
+    ```mlir
+    switch %flag : i32, [
+      default: ^bb1(%a : i32),
+      42: ^bb1(%b : i32),
+      43: ^bb3(%c : i32)
+    ]
+    ```
+  }];
+
+  let arguments = (ins
+    AnyInteger:$flag,
+    Variadic<AnyType>:$defaultOperands,
+    VariadicOfVariadic<AnyType, "case_operand_segments">:$caseOperands,
+    OptionalAttr<AnyIntElementsAttr>:$case_values,
+    I32ElementsAttr:$case_operand_segments
+  );
+  let successors = (successor
+    AnySuccessor:$defaultDestination,
+    VariadicSuccessor<AnySuccessor>:$caseDestinations
+  );
+  let builders = [
+    OpBuilder<(ins "Value":$flag,
+      "Block *":$defaultDestination,
+      "ValueRange":$defaultOperands,
+      CArg<"ArrayRef<APInt>", "{}">:$caseValues,
+      CArg<"BlockRange", "{}">:$caseDestinations,
+      CArg<"ArrayRef<ValueRange>", "{}">:$caseOperands)>,
+    OpBuilder<(ins "Value":$flag,
+      "Block *":$defaultDestination,
+      "ValueRange":$defaultOperands,
+      CArg<"ArrayRef<int32_t>", "{}">:$caseValues,
+      CArg<"BlockRange", "{}">:$caseDestinations,
+      CArg<"ArrayRef<ValueRange>", "{}">:$caseOperands)>,
+    OpBuilder<(ins "Value":$flag,
+      "Block *":$defaultDestination,
+      "ValueRange":$defaultOperands,
+      CArg<"DenseIntElementsAttr", "{}">:$caseValues,
+      CArg<"BlockRange", "{}">:$caseDestinations,
+      CArg<"ArrayRef<ValueRange>", "{}">:$caseOperands)>
+  ];
+
+  let assemblyFormat = [{
+    $flag `:` type($flag) `,` `[` `\n`
+      custom<SwitchOpCases>(ref(type($flag)),$defaultDestination,
+                            $defaultOperands,
+                            type($defaultOperands),
+                            $case_values,
+                            $caseDestinations,
+                            $caseOperands,
+                            type($caseOperands))
+   `]`
+    attr-dict
+  }];
+
+  let extraClassDeclaration = [{
+    /// Return the operands for the case destination block at the given index.
+    OperandRange getCaseOperands(unsigned index) {
+      return getCaseOperands()[index];
+    }
+
+    /// Return a mutable range of operands for the case destination block at the
+    /// given index.
+    MutableOperandRange getCaseOperandsMutable(unsigned index) {
+      return getCaseOperandsMutable()[index];
+    }
+  }];
+
+  let hasCanonicalizer = 1;
+  let hasVerifier = 1;
+}
+
+#endif // STANDARD_OPS

diff  --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td
index 8acda8d0e371b..b06c90093ff65 100644
--- a/mlir/include/mlir/Dialect/SCF/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td
@@ -84,15 +84,15 @@ def ExecuteRegionOp : SCF_Op<"execute_region"> {
     affine.for %i = 0 to 100 {
       "foo"() : () -> ()
       %v = scf.execute_region -> i64 {
-        cond_br %cond, ^bb1, ^bb2
+        cf.cond_br %cond, ^bb1, ^bb2
 
       ^bb1:
         %c1 = arith.constant 1 : i64
-        br ^bb3(%c1 : i64)
+        cf.br ^bb3(%c1 : i64)
 
       ^bb2:
         %c2 = arith.constant 2 : i64
-        br ^bb3(%c2 : i64)
+        cf.br ^bb3(%c2 : i64)
 
       ^bb3(%x : i64):
         scf.yield %x : i64

diff  --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
index 6b4486501be5c..6964621d67563 100644
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
@@ -14,7 +14,7 @@
 #ifndef MLIR_DIALECT_STANDARDOPS_IR_OPS_H
 #define MLIR_DIALECT_STANDARDOPS_IR_OPS_H
 
-#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
@@ -24,7 +24,6 @@
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
-#include "mlir/Interfaces/VectorInterfaces.h"
 
 // Pull in all enum type definitions and utility function declarations.
 #include "mlir/Dialect/StandardOps/IR/OpsEnums.h.inc"

diff  --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
index 2aca33eda3c46..ffd16c16c09d2 100644
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -20,12 +20,11 @@ include "mlir/Interfaces/CastInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
-include "mlir/Interfaces/VectorInterfaces.td"
 
 def StandardOps_Dialect : Dialect {
   let name = "std";
   let cppNamespace = "::mlir";
-  let dependentDialects = ["arith::ArithmeticDialect"];
+  let dependentDialects = ["cf::ControlFlowDialect"];
   let hasConstantMaterializer = 1;
   let emitAccessorPrefix = kEmitAccessorPrefix_Prefixed;
 }
@@ -42,78 +41,6 @@ class Std_Op<string mnemonic, list<Trait> traits = []> :
   let parser = [{ return ::parse$cppClass(parser, result); }];
 }
 
-//===----------------------------------------------------------------------===//
-// AssertOp
-//===----------------------------------------------------------------------===//
-
-def AssertOp : Std_Op<"assert"> {
-  let summary = "Assert operation with message attribute";
-  let description = [{
-    Assert operation with single boolean operand and an error message attribute.
-    If the argument is `true` this operation has no effect. Otherwise, the
-    program execution will abort. The provided error message may be used by a
-    runtime to propagate the error to the user.
-
-    Example:
-
-    ```mlir
-    assert %b, "Expected ... to be true"
-    ```
-  }];
-
-  let arguments = (ins I1:$arg, StrAttr:$msg);
-
-  let assemblyFormat = "$arg `,` $msg attr-dict";
-  let hasCanonicalizeMethod = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// BranchOp
-//===----------------------------------------------------------------------===//
-
-def BranchOp : Std_Op<"br",
-    [DeclareOpInterfaceMethods<BranchOpInterface, ["getSuccessorForOperands"]>,
-     NoSideEffect, Terminator]> {
-  let summary = "branch operation";
-  let description = [{
-    The `br` operation represents a branch operation in a function.
-    The operation takes variable number of operands and produces no results.
-    The operand number and types for each successor must match the arguments of
-    the block successor.
-
-    Example:
-
-    ```mlir
-    ^bb2:
-      %2 = call @someFn()
-      br ^bb3(%2 : tensor<*xf32>)
-    ^bb3(%3: tensor<*xf32>):
-    ```
-  }];
-
-  let arguments = (ins Variadic<AnyType>:$destOperands);
-  let successors = (successor AnySuccessor:$dest);
-
-  let builders = [
-    OpBuilder<(ins "Block *":$dest,
-                  CArg<"ValueRange", "{}">:$destOperands), [{
-      $_state.addSuccessors(dest);
-      $_state.addOperands(destOperands);
-    }]>];
-
-  let extraClassDeclaration = [{
-    void setDest(Block *block);
-
-    /// Erase the operand at 'index' from the operand list.
-    void eraseOperand(unsigned index);
-  }];
-
-  let hasCanonicalizeMethod = 1;
-  let assemblyFormat = [{
-    $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict
-  }];
-}
-
 //===----------------------------------------------------------------------===//
 // CallOp
 //===----------------------------------------------------------------------===//
@@ -246,121 +173,6 @@ def CallIndirectOp : Std_Op<"call_indirect", [
     "$callee `(` $callee_operands `)` attr-dict `:` type($callee)";
 }
 
-//===----------------------------------------------------------------------===//
-// CondBranchOp
-//===----------------------------------------------------------------------===//
-
-def CondBranchOp : Std_Op<"cond_br",
-    [AttrSizedOperandSegments,
-     DeclareOpInterfaceMethods<BranchOpInterface, ["getSuccessorForOperands"]>,
-     NoSideEffect, Terminator]> {
-  let summary = "conditional branch operation";
-  let description = [{
-    The `cond_br` terminator operation represents a conditional branch on a
-    boolean (1-bit integer) value. If the bit is set, then the first destination
-    is jumped to; if it is false, the second destination is chosen. The count
-    and types of operands must align with the arguments in the corresponding
-    target blocks.
-
-    The MLIR conditional branch operation is not allowed to target the entry
-    block for a region. The two destinations of the conditional branch operation
-    are allowed to be the same.
-
-    The following example illustrates a function with a conditional branch
-    operation that targets the same block.
-
-    Example:
-
-    ```mlir
-    func @select(%a: i32, %b: i32, %flag: i1) -> i32 {
-      // Both targets are the same, operands 
diff er
-      cond_br %flag, ^bb1(%a : i32), ^bb1(%b : i32)
-
-    ^bb1(%x : i32) :
-      return %x : i32
-    }
-    ```
-  }];
-
-  let arguments = (ins I1:$condition,
-                       Variadic<AnyType>:$trueDestOperands,
-                       Variadic<AnyType>:$falseDestOperands);
-  let successors = (successor AnySuccessor:$trueDest, AnySuccessor:$falseDest);
-
-  let builders = [
-    OpBuilder<(ins "Value":$condition, "Block *":$trueDest,
-      "ValueRange":$trueOperands, "Block *":$falseDest,
-      "ValueRange":$falseOperands), [{
-      build($_builder, $_state, condition, trueOperands, falseOperands, trueDest,
-            falseDest);
-    }]>,
-    OpBuilder<(ins "Value":$condition, "Block *":$trueDest,
-      "Block *":$falseDest, CArg<"ValueRange", "{}">:$falseOperands), [{
-      build($_builder, $_state, condition, trueDest, ValueRange(), falseDest,
-            falseOperands);
-    }]>];
-
-  let extraClassDeclaration = [{
-    // These are the indices into the dests list.
-    enum { trueIndex = 0, falseIndex = 1 };
-
-    // Accessors for operands to the 'true' destination.
-    Value getTrueOperand(unsigned idx) {
-      assert(idx < getNumTrueOperands());
-      return getOperand(getTrueDestOperandIndex() + idx);
-    }
-
-    void setTrueOperand(unsigned idx, Value value) {
-      assert(idx < getNumTrueOperands());
-      setOperand(getTrueDestOperandIndex() + idx, value);
-    }
-
-    unsigned getNumTrueOperands()  { return getTrueOperands().size(); }
-
-    /// Erase the operand at 'index' from the true operand list.
-    void eraseTrueOperand(unsigned index)  {
-      getTrueDestOperandsMutable().erase(index);
-    }
-
-    // Accessors for operands to the 'false' destination.
-    Value getFalseOperand(unsigned idx) {
-      assert(idx < getNumFalseOperands());
-      return getOperand(getFalseDestOperandIndex() + idx);
-    }
-    void setFalseOperand(unsigned idx, Value value) {
-      assert(idx < getNumFalseOperands());
-      setOperand(getFalseDestOperandIndex() + idx, value);
-    }
-
-    operand_range getTrueOperands() { return getTrueDestOperands(); }
-    operand_range getFalseOperands() { return getFalseDestOperands(); }
-
-    unsigned getNumFalseOperands() { return getFalseOperands().size(); }
-
-    /// Erase the operand at 'index' from the false operand list.
-    void eraseFalseOperand(unsigned index) {
-      getFalseDestOperandsMutable().erase(index);
-    }
-
-  private:
-    /// Get the index of the first true destination operand.
-    unsigned getTrueDestOperandIndex() { return 1; }
-
-    /// Get the index of the first false destination operand.
-    unsigned getFalseDestOperandIndex() {
-      return getTrueDestOperandIndex() + getNumTrueOperands();
-    }
-  }];
-
-  let hasCanonicalizer = 1;
-  let assemblyFormat = [{
-    $condition `,`
-    $trueDest (`(` $trueDestOperands^ `:` type($trueDestOperands) `)`)? `,`
-    $falseDest (`(` $falseDestOperands^ `:` type($falseDestOperands) `)`)?
-    attr-dict
-  }];
-}
-
 //===----------------------------------------------------------------------===//
 // ConstantOp
 //===----------------------------------------------------------------------===//
@@ -443,93 +255,4 @@ def ReturnOp : Std_Op<"return", [NoSideEffect, HasParent<"FuncOp">,
   let hasVerifier = 1;
 }
 
-//===----------------------------------------------------------------------===//
-// SwitchOp
-//===----------------------------------------------------------------------===//
-
-def SwitchOp : Std_Op<"switch",
-    [AttrSizedOperandSegments,
-     DeclareOpInterfaceMethods<BranchOpInterface, ["getSuccessorForOperands"]>,
-     NoSideEffect, Terminator]> {
-  let summary = "switch operation";
-  let description = [{
-    The `switch` terminator operation represents a switch on a signless integer
-    value. If the flag matches one of the specified cases, then the
-    corresponding destination is jumped to. If the flag does not match any of
-    the cases, the default destination is jumped to. The count and types of
-    operands must align with the arguments in the corresponding target blocks.
-
-    Example:
-
-    ```mlir
-    switch %flag : i32, [
-      default: ^bb1(%a : i32),
-      42: ^bb1(%b : i32),
-      43: ^bb3(%c : i32)
-    ]
-    ```
-  }];
-
-  let arguments = (ins
-    AnyInteger:$flag,
-    Variadic<AnyType>:$defaultOperands,
-    VariadicOfVariadic<AnyType, "case_operand_segments">:$caseOperands,
-    OptionalAttr<AnyIntElementsAttr>:$case_values,
-    I32ElementsAttr:$case_operand_segments
-  );
-  let successors = (successor
-    AnySuccessor:$defaultDestination,
-    VariadicSuccessor<AnySuccessor>:$caseDestinations
-  );
-  let builders = [
-    OpBuilder<(ins "Value":$flag,
-      "Block *":$defaultDestination,
-      "ValueRange":$defaultOperands,
-      CArg<"ArrayRef<APInt>", "{}">:$caseValues,
-      CArg<"BlockRange", "{}">:$caseDestinations,
-      CArg<"ArrayRef<ValueRange>", "{}">:$caseOperands)>,
-    OpBuilder<(ins "Value":$flag,
-      "Block *":$defaultDestination,
-      "ValueRange":$defaultOperands,
-      CArg<"ArrayRef<int32_t>", "{}">:$caseValues,
-      CArg<"BlockRange", "{}">:$caseDestinations,
-      CArg<"ArrayRef<ValueRange>", "{}">:$caseOperands)>,
-    OpBuilder<(ins "Value":$flag,
-      "Block *":$defaultDestination,
-      "ValueRange":$defaultOperands,
-      CArg<"DenseIntElementsAttr", "{}">:$caseValues,
-      CArg<"BlockRange", "{}">:$caseDestinations,
-      CArg<"ArrayRef<ValueRange>", "{}">:$caseOperands)>
-  ];
-
-  let assemblyFormat = [{
-    $flag `:` type($flag) `,` `[` `\n`
-      custom<SwitchOpCases>(ref(type($flag)),$defaultDestination,
-                            $defaultOperands,
-                            type($defaultOperands),
-                            $case_values,
-                            $caseDestinations,
-                            $caseOperands,
-                            type($caseOperands))
-   `]`
-    attr-dict
-  }];
-
-  let extraClassDeclaration = [{
-    /// Return the operands for the case destination block at the given index.
-    OperandRange getCaseOperands(unsigned index) {
-      return getCaseOperands()[index];
-    }
-
-    /// Return a mutable range of operands for the case destination block at the
-    /// given index.
-    MutableOperandRange getCaseOperandsMutable(unsigned index) {
-      return getCaseOperandsMutable()[index];
-    }
-  }];
-
-  let hasCanonicalizer = 1;
-  let hasVerifier = 1;
-}
-
 #endif // STANDARD_OPS

diff  --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
index 9632658f3884e..980bed05b1dc0 100644
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -22,6 +22,7 @@
 #include "mlir/Dialect/Async/IR/Async.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
 #include "mlir/Dialect/EmitC/IR/EmitC.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
@@ -61,6 +62,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
                   arm_neon::ArmNeonDialect,
                   async::AsyncDialect,
                   bufferization::BufferizationDialect,
+                  cf::ControlFlowDialect,
                   complex::ComplexDialect,
                   DLTIDialect,
                   emitc::EmitCDialect,

diff  --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt
index 602b9e72b330a..39ea2efc30015 100644
--- a/mlir/lib/Conversion/CMakeLists.txt
+++ b/mlir/lib/Conversion/CMakeLists.txt
@@ -6,6 +6,8 @@ add_subdirectory(AsyncToLLVM)
 add_subdirectory(BufferizationToMemRef)
 add_subdirectory(ComplexToLLVM)
 add_subdirectory(ComplexToStandard)
+add_subdirectory(ControlFlowToLLVM)
+add_subdirectory(ControlFlowToSPIRV)
 add_subdirectory(GPUCommon)
 add_subdirectory(GPUToNVVM)
 add_subdirectory(GPUToROCDL)
@@ -25,10 +27,10 @@ add_subdirectory(OpenACCToSCF)
 add_subdirectory(OpenMPToLLVM)
 add_subdirectory(PDLToPDLInterp)
 add_subdirectory(ReconcileUnrealizedCasts)
+add_subdirectory(SCFToControlFlow)
 add_subdirectory(SCFToGPU)
 add_subdirectory(SCFToOpenMP)
 add_subdirectory(SCFToSPIRV)
-add_subdirectory(SCFToStandard)
 add_subdirectory(ShapeToStandard)
 add_subdirectory(SPIRVToLLVM)
 add_subdirectory(StandardToLLVM)

diff  --git a/mlir/lib/Conversion/ControlFlowToLLVM/CMakeLists.txt b/mlir/lib/Conversion/ControlFlowToLLVM/CMakeLists.txt
new file mode 100644
index 0000000000000..67d98dc1d3957
--- /dev/null
+++ b/mlir/lib/Conversion/ControlFlowToLLVM/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_mlir_conversion_library(MLIRControlFlowToLLVM
+  ControlFlowToLLVM.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/ControlFlowToLLVM
+
+  DEPENDS
+  MLIRConversionPassIncGen
+  intrinsics_gen
+
+  LINK_COMPONENTS
+  Core
+
+  LINK_LIBS PUBLIC
+  MLIRAnalysis
+  MLIRControlFlow
+  MLIRLLVMCommonConversion
+  MLIRLLVMIR
+  MLIRPass
+  MLIRTransformUtils
+  )

diff  --git a/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp b/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp
new file mode 100644
index 0000000000000..2ba829567600c
--- /dev/null
+++ b/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp
@@ -0,0 +1,148 @@
+//===- ControlFlowToLLVM.cpp - ControlFlow to LLVM dialect conversion -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to convert MLIR standard and builtin dialects
+// into the LLVM IR dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "../PassDetail.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
+#include "mlir/Conversion/LLVMCommon/VectorPattern.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include <functional>
+
+using namespace mlir;
+
+#define PASS_NAME "convert-cf-to-llvm"
+
+namespace {
+/// Lower `std.assert`. The default lowering calls the `abort` function if the
+/// assertion is violated and has no effect otherwise. The failure message is
+/// ignored by the default lowering but should be propagated by any custom
+/// lowering.
+struct AssertOpLowering : public ConvertOpToLLVMPattern<cf::AssertOp> {
+  using ConvertOpToLLVMPattern<cf::AssertOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(cf::AssertOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto loc = op.getLoc();
+
+    // Insert the `abort` declaration if necessary.
+    auto module = op->getParentOfType<ModuleOp>();
+    auto abortFunc = module.lookupSymbol<LLVM::LLVMFuncOp>("abort");
+    if (!abortFunc) {
+      OpBuilder::InsertionGuard guard(rewriter);
+      rewriter.setInsertionPointToStart(module.getBody());
+      auto abortFuncTy = LLVM::LLVMFunctionType::get(getVoidType(), {});
+      abortFunc = rewriter.create<LLVM::LLVMFuncOp>(rewriter.getUnknownLoc(),
+                                                    "abort", abortFuncTy);
+    }
+
+    // Split block at `assert` operation.
+    Block *opBlock = rewriter.getInsertionBlock();
+    auto opPosition = rewriter.getInsertionPoint();
+    Block *continuationBlock = rewriter.splitBlock(opBlock, opPosition);
+
+    // Generate IR to call `abort`.
+    Block *failureBlock = rewriter.createBlock(opBlock->getParent());
+    rewriter.create<LLVM::CallOp>(loc, abortFunc, llvm::None);
+    rewriter.create<LLVM::UnreachableOp>(loc);
+
+    // Generate assertion test.
+    rewriter.setInsertionPointToEnd(opBlock);
+    rewriter.replaceOpWithNewOp<LLVM::CondBrOp>(
+        op, adaptor.getArg(), continuationBlock, failureBlock);
+
+    return success();
+  }
+};
+
+// Base class for LLVM IR lowering terminator operations with successors.
+template <typename SourceOp, typename TargetOp>
+struct OneToOneLLVMTerminatorLowering
+    : public ConvertOpToLLVMPattern<SourceOp> {
+  using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
+  using Base = OneToOneLLVMTerminatorLowering<SourceOp, TargetOp>;
+
+  LogicalResult
+  matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<TargetOp>(op, adaptor.getOperands(),
+                                          op->getSuccessors(), op->getAttrs());
+    return success();
+  }
+};
+
+// FIXME: this should be tablegen'ed as well.
+struct BranchOpLowering
+    : public OneToOneLLVMTerminatorLowering<cf::BranchOp, LLVM::BrOp> {
+  using Base::Base;
+};
+struct CondBranchOpLowering
+    : public OneToOneLLVMTerminatorLowering<cf::CondBranchOp, LLVM::CondBrOp> {
+  using Base::Base;
+};
+struct SwitchOpLowering
+    : public OneToOneLLVMTerminatorLowering<cf::SwitchOp, LLVM::SwitchOp> {
+  using Base::Base;
+};
+
+} // namespace
+
+void mlir::cf::populateControlFlowToLLVMConversionPatterns(
+    LLVMTypeConverter &converter, RewritePatternSet &patterns) {
+  // clang-format off
+  patterns.add<
+      AssertOpLowering,
+      BranchOpLowering,
+      CondBranchOpLowering,
+      SwitchOpLowering>(converter);
+  // clang-format on
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Definition
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// A pass converting MLIR operations into the LLVM IR dialect.
+struct ConvertControlFlowToLLVM
+    : public ConvertControlFlowToLLVMBase<ConvertControlFlowToLLVM> {
+  ConvertControlFlowToLLVM() = default;
+
+  /// Run the dialect converter on the module.
+  void runOnOperation() override {
+    LLVMConversionTarget target(getContext());
+    RewritePatternSet patterns(&getContext());
+
+    LowerToLLVMOptions options(&getContext());
+    if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
+      options.overrideIndexBitwidth(indexBitwidth);
+
+    LLVMTypeConverter converter(&getContext(), options);
+    mlir::cf::populateControlFlowToLLVMConversionPatterns(converter, patterns);
+
+    if (failed(applyPartialConversion(getOperation(), target,
+                                      std::move(patterns))))
+      signalPassFailure();
+  }
+};
+} // namespace
+
+std::unique_ptr<Pass> mlir::cf::createConvertControlFlowToLLVMPass() {
+  return std::make_unique<ConvertControlFlowToLLVM>();
+}

diff  --git a/mlir/lib/Conversion/ControlFlowToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/ControlFlowToSPIRV/CMakeLists.txt
new file mode 100644
index 0000000000000..48dcde97de831
--- /dev/null
+++ b/mlir/lib/Conversion/ControlFlowToSPIRV/CMakeLists.txt
@@ -0,0 +1,19 @@
+add_mlir_conversion_library(MLIRControlFlowToSPIRV
+  ControlFlowToSPIRV.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SPIRV
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/IR
+
+  DEPENDS
+  MLIRConversionPassIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+  MLIRControlFlow
+  MLIRPass
+  MLIRSPIRV
+  MLIRSPIRVConversion
+  MLIRSupport
+  MLIRTransformUtils
+  )

diff  --git a/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.cpp b/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.cpp
new file mode 100644
index 0000000000000..742542a0196f7
--- /dev/null
+++ b/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.cpp
@@ -0,0 +1,73 @@
+//===- ControlFlowToSPIRV.cpp - ControlFlow to SPIR-V Patterns ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements patterns to convert standard dialect to SPIR-V dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.h"
+#include "../SPIRVCommon/Pattern.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
+#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
+#include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h"
+#include "mlir/Dialect/SPIRV/Utils/LayoutUtils.h"
+#include "mlir/IR/AffineMap.h"
+#include "mlir/Support/LogicalResult.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "cf-to-spirv-pattern"
+
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// Operation conversion
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// Converts cf.br to spv.Branch.
+struct BranchOpPattern final : public OpConversionPattern<cf::BranchOp> {
+  using OpConversionPattern<cf::BranchOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(cf::BranchOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<spirv::BranchOp>(op, op.getDest(),
+                                                 adaptor.getDestOperands());
+    return success();
+  }
+};
+
+/// Converts cf.cond_br to spv.BranchConditional.
+struct CondBranchOpPattern final
+    : public OpConversionPattern<cf::CondBranchOp> {
+  using OpConversionPattern<cf::CondBranchOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(cf::CondBranchOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<spirv::BranchConditionalOp>(
+        op, op.getCondition(), op.getTrueDest(), adaptor.getTrueDestOperands(),
+        op.getFalseDest(), adaptor.getFalseDestOperands());
+    return success();
+  }
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// Pattern population
+//===----------------------------------------------------------------------===//
+
+void mlir::cf::populateControlFlowToSPIRVPatterns(
+    SPIRVTypeConverter &typeConverter, RewritePatternSet &patterns) {
+  MLIRContext *context = patterns.getContext();
+
+  patterns.add<BranchOpPattern, CondBranchOpPattern>(typeConverter, context);
+}

diff  --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 9d564ee7d19d4..67cc76e8aab53 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
 
 #include "mlir/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
@@ -172,8 +173,8 @@ struct LowerGpuOpsToNVVMOpsPass
     populateGpuRewritePatterns(patterns);
     (void)applyPatternsAndFoldGreedily(m, std::move(patterns));
 
-    mlir::arith::populateArithmeticToLLVMConversionPatterns(converter,
-                                                            llvmPatterns);
+    arith::populateArithmeticToLLVMConversionPatterns(converter, llvmPatterns);
+    cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
     populateStdToLLVMConversionPatterns(converter, llvmPatterns);
     populateMemRefToLLVMConversionPatterns(converter, llvmPatterns);
     populateGpuToNVVMConversionPatterns(converter, llvmPatterns);

diff  --git a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
index e731fea0d7779..3a5a94f48b6dc 100644
--- a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
@@ -19,7 +19,7 @@ add_mlir_conversion_library(MLIRLinalgToLLVM
   MLIRLLVMCommonConversion
   MLIRLLVMIR
   MLIRMemRefToLLVM
-  MLIRSCFToStandard
+  MLIRSCFToControlFlow
   MLIRTransforms
   MLIRVectorToLLVM
   MLIRVectorToSCF

diff  --git a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
index cb5b67128c08d..a54489cc2af36 100644
--- a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
+++ b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
@@ -14,7 +14,7 @@
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"

diff  --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
index 288c252b81bbd..a4a82d66b0cb9 100644
--- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
+++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
@@ -433,7 +433,7 @@ struct LoadStoreOpLowering : public ConvertOpToLLVMPattern<Derived> {
 ///      +---------------------------------+
 ///      |   <code before the AtomicRMWOp> |
 ///      |   <compute initial %loaded>     |
-///      |   br loop(%loaded)              |
+///      |   cf.br loop(%loaded)              |
 ///      +---------------------------------+
 ///             |
 ///  -------|   |
@@ -444,7 +444,7 @@ struct LoadStoreOpLowering : public ConvertOpToLLVMPattern<Derived> {
 ///  |   |   %pair = cmpxchg              |
 ///  |   |   %ok = %pair[0]               |
 ///  |   |   %new = %pair[1]              |
-///  |   |   cond_br %ok, end, loop(%new) |
+///  |   |   cf.cond_br %ok, end, loop(%new) |
 ///  |   +--------------------------------+
 ///  |          |        |
 ///  |-----------        |

diff  --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 3de90c9b92e4a..f2cb819d6646a 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -10,6 +10,7 @@
 
 #include "../PassDetail.h"
 #include "mlir/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
@@ -66,7 +67,8 @@ void ConvertOpenMPToLLVMPass::runOnOperation() {
   // Convert to OpenMP operations with LLVM IR dialect
   RewritePatternSet patterns(&getContext());
   LLVMTypeConverter converter(&getContext());
-  mlir::arith::populateArithmeticToLLVMConversionPatterns(converter, patterns);
+  arith::populateArithmeticToLLVMConversionPatterns(converter, patterns);
+  cf::populateControlFlowToLLVMConversionPatterns(converter, patterns);
   populateMemRefToLLVMConversionPatterns(converter, patterns);
   populateStdToLLVMConversionPatterns(converter, patterns);
   populateOpenMPToLLVMConversionPatterns(converter, patterns);

diff  --git a/mlir/lib/Conversion/PassDetail.h b/mlir/lib/Conversion/PassDetail.h
index 628d6995feeb3..99841f0ec34d5 100644
--- a/mlir/lib/Conversion/PassDetail.h
+++ b/mlir/lib/Conversion/PassDetail.h
@@ -29,6 +29,10 @@ namespace arith {
 class ArithmeticDialect;
 } // namespace arith
 
+namespace cf {
+class ControlFlowDialect;
+} // namespace cf
+
 namespace complex {
 class ComplexDialect;
 } // namespace complex

diff  --git a/mlir/lib/Conversion/SCFToStandard/CMakeLists.txt b/mlir/lib/Conversion/SCFToControlFlow/CMakeLists.txt
similarity index 51%
rename from mlir/lib/Conversion/SCFToStandard/CMakeLists.txt
rename to mlir/lib/Conversion/SCFToControlFlow/CMakeLists.txt
index 1a4e369fb4d3d..39ace3ae5c557 100644
--- a/mlir/lib/Conversion/SCFToStandard/CMakeLists.txt
+++ b/mlir/lib/Conversion/SCFToControlFlow/CMakeLists.txt
@@ -1,8 +1,8 @@
-add_mlir_conversion_library(MLIRSCFToStandard
-  SCFToStandard.cpp
+add_mlir_conversion_library(MLIRSCFToControlFlow
+  SCFToControlFlow.cpp
 
   ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/SCFToStandard
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/SCFToControlFlow
 
   DEPENDS
   MLIRConversionPassIncGen
@@ -12,6 +12,7 @@ add_mlir_conversion_library(MLIRSCFToStandard
 
   LINK_LIBS PUBLIC
   MLIRArithmetic
+  MLIRControlFlow
   MLIRSCF
   MLIRTransforms
   )

diff  --git a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp b/mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp
similarity index 89%
rename from mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp
rename to mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp
index 27808a0a72c61..dc8e6e72522a4 100644
--- a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp
+++ b/mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp
@@ -1,4 +1,4 @@
-//===- SCFToStandard.cpp - ControlFlow to CFG conversion ------------------===//
+//===- SCFToControlFlow.cpp - SCF to CF conversion ------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "../PassDetail.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
@@ -29,7 +29,8 @@ using namespace mlir::scf;
 
 namespace {
 
-struct SCFToStandardPass : public SCFToStandardBase<SCFToStandardPass> {
+struct SCFToControlFlowPass
+    : public SCFToControlFlowBase<SCFToControlFlowPass> {
   void runOnOperation() override;
 };
 
@@ -57,7 +58,7 @@ struct SCFToStandardPass : public SCFToStandardBase<SCFToStandardPass> {
 //      |   <code before the ForOp>       |
 //      |   <definitions of %init...>     |
 //      |   <compute initial %iv value>   |
-//      |   br cond(%iv, %init...)        |
+//      |   cf.br cond(%iv, %init...)        |
 //      +---------------------------------+
 //             |
 //  -------|   |
@@ -65,7 +66,7 @@ struct SCFToStandardPass : public SCFToStandardBase<SCFToStandardPass> {
 //  |   +--------------------------------+
 //  |   | cond(%iv, %init...):           |
 //  |   |   <compare %iv to upper bound> |
-//  |   |   cond_br %r, body, end        |
+//  |   |   cf.cond_br %r, body, end        |
 //  |   +--------------------------------+
 //  |          |               |
 //  |          |               -------------|
@@ -83,7 +84,7 @@ struct SCFToStandardPass : public SCFToStandardBase<SCFToStandardPass> {
 //  |   |   <body contents>              |  |
 //  |   |   <operands of yield = %yields>|  |
 //  |   |   %new_iv =<add step to %iv>   |  |
-//  |   |   br cond(%new_iv, %yields)    |  |
+//  |   |   cf.br cond(%new_iv, %yields)    |  |
 //  |   +--------------------------------+  |
 //  |          |                            |
 //  |-----------        |--------------------
@@ -125,7 +126,7 @@ struct ForLowering : public OpRewritePattern<ForOp> {
 //
 //      +--------------------------------+
 //      | <code before the IfOp>         |
-//      | cond_br %cond, %then, %else    |
+//      | cf.cond_br %cond, %then, %else    |
 //      +--------------------------------+
 //             |              |
 //             |              --------------|
@@ -133,7 +134,7 @@ struct ForLowering : public OpRewritePattern<ForOp> {
 //      +--------------------------------+  |
 //      | then:                          |  |
 //      |   <then contents>              |  |
-//      |   br continue                  |  |
+//      |   cf.br continue                  |  |
 //      +--------------------------------+  |
 //             |                            |
 //   |----------               |-------------
@@ -141,7 +142,7 @@ struct ForLowering : public OpRewritePattern<ForOp> {
 //   |  +--------------------------------+
 //   |  | else:                          |
 //   |  |   <else contents>              |
-//   |  |   br continue                  |
+//   |  |   cf.br continue                  |
 //   |  +--------------------------------+
 //   |         |
 //   ------|   |
@@ -155,7 +156,7 @@ struct ForLowering : public OpRewritePattern<ForOp> {
 //
 //      +--------------------------------+
 //      | <code before the IfOp>         |
-//      | cond_br %cond, %then, %else    |
+//      | cf.cond_br %cond, %then, %else    |
 //      +--------------------------------+
 //             |              |
 //             |              --------------|
@@ -163,7 +164,7 @@ struct ForLowering : public OpRewritePattern<ForOp> {
 //      +--------------------------------+  |
 //      | then:                          |  |
 //      |   <then contents>              |  |
-//      |   br dom(%args...)             |  |
+//      |   cf.br dom(%args...)             |  |
 //      +--------------------------------+  |
 //             |                            |
 //   |----------               |-------------
@@ -171,14 +172,14 @@ struct ForLowering : public OpRewritePattern<ForOp> {
 //   |  +--------------------------------+
 //   |  | else:                          |
 //   |  |   <else contents>              |
-//   |  |   br dom(%args...)             |
+//   |  |   cf.br dom(%args...)             |
 //   |  +--------------------------------+
 //   |         |
 //   ------|   |
 //         v   v
 //      +--------------------------------+
 //      | dom(%args...):                 |
-//      |   br continue                  |
+//      |   cf.br continue                  |
 //      +--------------------------------+
 //             |
 //             v
@@ -218,7 +219,7 @@ struct ParallelLowering : public OpRewritePattern<mlir::scf::ParallelOp> {
 ///
 ///      +---------------------------------+
 ///      |   <code before the WhileOp>     |
-///      |   br ^before(%operands...)      |
+///      |   cf.br ^before(%operands...)      |
 ///      +---------------------------------+
 ///             |
 ///  -------|   |
@@ -233,7 +234,7 @@ struct ParallelLowering : public OpRewritePattern<mlir::scf::ParallelOp> {
 ///  |   +--------------------------------+
 ///  |   | ^before-last:
 ///  |   |   %cond = <compute condition>  |
-///  |   |   cond_br %cond,               |
+///  |   |   cf.cond_br %cond,               |
 ///  |   |        ^after(%vals...), ^cont |
 ///  |   +--------------------------------+
 ///  |          |               |
@@ -249,7 +250,7 @@ struct ParallelLowering : public OpRewritePattern<mlir::scf::ParallelOp> {
 ///  |   +--------------------------------+  |
 ///  |   | ^after-last:                   |  |
 ///  |   |   %yields... = <some payload>  |  |
-///  |   |   br ^before(%yields...)       |  |
+///  |   |   cf.br ^before(%yields...)       |  |
 ///  |   +--------------------------------+  |
 ///  |          |                            |
 ///  |-----------        |--------------------
@@ -321,7 +322,7 @@ LogicalResult ForLowering::matchAndRewrite(ForOp forOp,
   SmallVector<Value, 8> loopCarried;
   loopCarried.push_back(stepped);
   loopCarried.append(terminator->operand_begin(), terminator->operand_end());
-  rewriter.create<BranchOp>(loc, conditionBlock, loopCarried);
+  rewriter.create<cf::BranchOp>(loc, conditionBlock, loopCarried);
   rewriter.eraseOp(terminator);
 
   // Compute loop bounds before branching to the condition.
@@ -337,15 +338,16 @@ LogicalResult ForLowering::matchAndRewrite(ForOp forOp,
   destOperands.push_back(lowerBound);
   auto iterOperands = forOp.getIterOperands();
   destOperands.append(iterOperands.begin(), iterOperands.end());
-  rewriter.create<BranchOp>(loc, conditionBlock, destOperands);
+  rewriter.create<cf::BranchOp>(loc, conditionBlock, destOperands);
 
   // With the body block done, we can fill in the condition block.
   rewriter.setInsertionPointToEnd(conditionBlock);
   auto comparison = rewriter.create<arith::CmpIOp>(
       loc, arith::CmpIPredicate::slt, iv, upperBound);
 
-  rewriter.create<CondBranchOp>(loc, comparison, firstBodyBlock,
-                                ArrayRef<Value>(), endBlock, ArrayRef<Value>());
+  rewriter.create<cf::CondBranchOp>(loc, comparison, firstBodyBlock,
+                                    ArrayRef<Value>(), endBlock,
+                                    ArrayRef<Value>());
   // The result of the loop operation is the values of the condition block
   // arguments except the induction variable on the last iteration.
   rewriter.replaceOp(forOp, conditionBlock->getArguments().drop_front());
@@ -369,7 +371,7 @@ LogicalResult IfLowering::matchAndRewrite(IfOp ifOp,
     continueBlock =
         rewriter.createBlock(remainingOpsBlock, ifOp.getResultTypes(),
                              SmallVector<Location>(ifOp.getNumResults(), loc));
-    rewriter.create<BranchOp>(loc, remainingOpsBlock);
+    rewriter.create<cf::BranchOp>(loc, remainingOpsBlock);
   }
 
   // Move blocks from the "then" region to the region containing 'scf.if',
@@ -379,7 +381,7 @@ LogicalResult IfLowering::matchAndRewrite(IfOp ifOp,
   Operation *thenTerminator = thenRegion.back().getTerminator();
   ValueRange thenTerminatorOperands = thenTerminator->getOperands();
   rewriter.setInsertionPointToEnd(&thenRegion.back());
-  rewriter.create<BranchOp>(loc, continueBlock, thenTerminatorOperands);
+  rewriter.create<cf::BranchOp>(loc, continueBlock, thenTerminatorOperands);
   rewriter.eraseOp(thenTerminator);
   rewriter.inlineRegionBefore(thenRegion, continueBlock);
 
@@ -393,15 +395,15 @@ LogicalResult IfLowering::matchAndRewrite(IfOp ifOp,
     Operation *elseTerminator = elseRegion.back().getTerminator();
     ValueRange elseTerminatorOperands = elseTerminator->getOperands();
     rewriter.setInsertionPointToEnd(&elseRegion.back());
-    rewriter.create<BranchOp>(loc, continueBlock, elseTerminatorOperands);
+    rewriter.create<cf::BranchOp>(loc, continueBlock, elseTerminatorOperands);
     rewriter.eraseOp(elseTerminator);
     rewriter.inlineRegionBefore(elseRegion, continueBlock);
   }
 
   rewriter.setInsertionPointToEnd(condBlock);
-  rewriter.create<CondBranchOp>(loc, ifOp.getCondition(), thenBlock,
-                                /*trueArgs=*/ArrayRef<Value>(), elseBlock,
-                                /*falseArgs=*/ArrayRef<Value>());
+  rewriter.create<cf::CondBranchOp>(loc, ifOp.getCondition(), thenBlock,
+                                    /*trueArgs=*/ArrayRef<Value>(), elseBlock,
+                                    /*falseArgs=*/ArrayRef<Value>());
 
   // Ok, we're done!
   rewriter.replaceOp(ifOp, continueBlock->getArguments());
@@ -419,13 +421,13 @@ ExecuteRegionLowering::matchAndRewrite(ExecuteRegionOp op,
 
   auto &region = op.getRegion();
   rewriter.setInsertionPointToEnd(condBlock);
-  rewriter.create<BranchOp>(loc, &region.front());
+  rewriter.create<cf::BranchOp>(loc, &region.front());
 
   for (Block &block : region) {
     if (auto terminator = dyn_cast<scf::YieldOp>(block.getTerminator())) {
       ValueRange terminatorOperands = terminator->getOperands();
       rewriter.setInsertionPointToEnd(&block);
-      rewriter.create<BranchOp>(loc, remainingOpsBlock, terminatorOperands);
+      rewriter.create<cf::BranchOp>(loc, remainingOpsBlock, terminatorOperands);
       rewriter.eraseOp(terminator);
     }
   }
@@ -538,20 +540,21 @@ LogicalResult WhileLowering::matchAndRewrite(WhileOp whileOp,
 
   // Branch to the "before" region.
   rewriter.setInsertionPointToEnd(currentBlock);
-  rewriter.create<BranchOp>(loc, before, whileOp.getInits());
+  rewriter.create<cf::BranchOp>(loc, before, whileOp.getInits());
 
   // Replace terminators with branches. Assuming bodies are SESE, which holds
   // given only the patterns from this file, we only need to look at the last
   // block. This should be reconsidered if we allow break/continue in SCF.
   rewriter.setInsertionPointToEnd(beforeLast);
   auto condOp = cast<ConditionOp>(beforeLast->getTerminator());
-  rewriter.replaceOpWithNewOp<CondBranchOp>(condOp, condOp.getCondition(),
-                                            after, condOp.getArgs(),
-                                            continuation, ValueRange());
+  rewriter.replaceOpWithNewOp<cf::CondBranchOp>(condOp, condOp.getCondition(),
+                                                after, condOp.getArgs(),
+                                                continuation, ValueRange());
 
   rewriter.setInsertionPointToEnd(afterLast);
   auto yieldOp = cast<scf::YieldOp>(afterLast->getTerminator());
-  rewriter.replaceOpWithNewOp<BranchOp>(yieldOp, before, yieldOp.getResults());
+  rewriter.replaceOpWithNewOp<cf::BranchOp>(yieldOp, before,
+                                            yieldOp.getResults());
 
   // Replace the op with values "yielded" from the "before" region, which are
   // visible by dominance.
@@ -593,14 +596,14 @@ DoWhileLowering::matchAndRewrite(WhileOp whileOp,
 
   // Branch to the "before" region.
   rewriter.setInsertionPointToEnd(currentBlock);
-  rewriter.create<BranchOp>(whileOp.getLoc(), before, whileOp.getInits());
+  rewriter.create<cf::BranchOp>(whileOp.getLoc(), before, whileOp.getInits());
 
   // Loop around the "before" region based on condition.
   rewriter.setInsertionPointToEnd(beforeLast);
   auto condOp = cast<ConditionOp>(beforeLast->getTerminator());
-  rewriter.replaceOpWithNewOp<CondBranchOp>(condOp, condOp.getCondition(),
-                                            before, condOp.getArgs(),
-                                            continuation, ValueRange());
+  rewriter.replaceOpWithNewOp<cf::CondBranchOp>(condOp, condOp.getCondition(),
+                                                before, condOp.getArgs(),
+                                                continuation, ValueRange());
 
   // Replace the op with values "yielded" from the "before" region, which are
   // visible by dominance.
@@ -609,17 +612,18 @@ DoWhileLowering::matchAndRewrite(WhileOp whileOp,
   return success();
 }
 
-void mlir::populateLoopToStdConversionPatterns(RewritePatternSet &patterns) {
+void mlir::populateSCFToControlFlowConversionPatterns(
+    RewritePatternSet &patterns) {
   patterns.add<ForLowering, IfLowering, ParallelLowering, WhileLowering,
                ExecuteRegionLowering>(patterns.getContext());
   patterns.add<DoWhileLowering>(patterns.getContext(), /*benefit=*/2);
 }
 
-void SCFToStandardPass::runOnOperation() {
+void SCFToControlFlowPass::runOnOperation() {
   RewritePatternSet patterns(&getContext());
-  populateLoopToStdConversionPatterns(patterns);
-  // Configure conversion to lower out scf.for, scf.if, scf.parallel and
-  // scf.while. Anything else is fine.
+  populateSCFToControlFlowConversionPatterns(patterns);
+
+  // Configure conversion to lower out SCF operations.
   ConversionTarget target(getContext());
   target.addIllegalOp<scf::ForOp, scf::IfOp, scf::ParallelOp, scf::WhileOp,
                       scf::ExecuteRegionOp>();
@@ -629,6 +633,6 @@ void SCFToStandardPass::runOnOperation() {
     signalPassFailure();
 }
 
-std::unique_ptr<Pass> mlir::createLowerToCFGPass() {
-  return std::make_unique<SCFToStandardPass>();
+std::unique_ptr<Pass> mlir::createConvertSCFToCFPass() {
+  return std::make_unique<SCFToControlFlowPass>();
 }

diff  --git a/mlir/lib/Conversion/ShapeToStandard/ConvertShapeConstraints.cpp b/mlir/lib/Conversion/ShapeToStandard/ConvertShapeConstraints.cpp
index ccf6fd38100e5..f24ae5bbd6cf7 100644
--- a/mlir/lib/Conversion/ShapeToStandard/ConvertShapeConstraints.cpp
+++ b/mlir/lib/Conversion/ShapeToStandard/ConvertShapeConstraints.cpp
@@ -9,6 +9,7 @@
 #include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h"
 
 #include "../PassDetail.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Shape/IR/Shape.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
@@ -29,7 +30,7 @@ class ConvertCstrRequireOp : public OpRewritePattern<shape::CstrRequireOp> {
   using OpRewritePattern::OpRewritePattern;
   LogicalResult matchAndRewrite(shape::CstrRequireOp op,
                                 PatternRewriter &rewriter) const override {
-    rewriter.create<AssertOp>(op.getLoc(), op.getPred(), op.getMsgAttr());
+    rewriter.create<cf::AssertOp>(op.getLoc(), op.getPred(), op.getMsgAttr());
     rewriter.replaceOpWithNewOp<shape::ConstWitnessOp>(op, true);
     return success();
   }

diff  --git a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
index 4a637648ca2c0..ec31d8170ec05 100644
--- a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
@@ -14,6 +14,7 @@ add_mlir_conversion_library(MLIRStandardToLLVM
   LINK_LIBS PUBLIC
   MLIRAnalysis
   MLIRArithmeticToLLVM
+  MLIRControlFlowToLLVM
   MLIRDataLayoutInterfaces
   MLIRLLVMCommonConversion
   MLIRLLVMIR

diff  --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
index 04c51422ed115..6caaa0bfc993e 100644
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -14,6 +14,7 @@
 #include "../PassDetail.h"
 #include "mlir/Analysis/DataLayoutAnalysis.h"
 #include "mlir/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/LLVMCommon/VectorPattern.h"
@@ -387,48 +388,6 @@ struct BarePtrFuncOpConversion : public FuncOpConversionBase {
   }
 };
 
-/// Lower `std.assert`. The default lowering calls the `abort` function if the
-/// assertion is violated and has no effect otherwise. The failure message is
-/// ignored by the default lowering but should be propagated by any custom
-/// lowering.
-struct AssertOpLowering : public ConvertOpToLLVMPattern<AssertOp> {
-  using ConvertOpToLLVMPattern<AssertOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult
-  matchAndRewrite(AssertOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = op.getLoc();
-
-    // Insert the `abort` declaration if necessary.
-    auto module = op->getParentOfType<ModuleOp>();
-    auto abortFunc = module.lookupSymbol<LLVM::LLVMFuncOp>("abort");
-    if (!abortFunc) {
-      OpBuilder::InsertionGuard guard(rewriter);
-      rewriter.setInsertionPointToStart(module.getBody());
-      auto abortFuncTy = LLVM::LLVMFunctionType::get(getVoidType(), {});
-      abortFunc = rewriter.create<LLVM::LLVMFuncOp>(rewriter.getUnknownLoc(),
-                                                    "abort", abortFuncTy);
-    }
-
-    // Split block at `assert` operation.
-    Block *opBlock = rewriter.getInsertionBlock();
-    auto opPosition = rewriter.getInsertionPoint();
-    Block *continuationBlock = rewriter.splitBlock(opBlock, opPosition);
-
-    // Generate IR to call `abort`.
-    Block *failureBlock = rewriter.createBlock(opBlock->getParent());
-    rewriter.create<LLVM::CallOp>(loc, abortFunc, llvm::None);
-    rewriter.create<LLVM::UnreachableOp>(loc);
-
-    // Generate assertion test.
-    rewriter.setInsertionPointToEnd(opBlock);
-    rewriter.replaceOpWithNewOp<LLVM::CondBrOp>(
-        op, adaptor.getArg(), continuationBlock, failureBlock);
-
-    return success();
-  }
-};
-
 struct ConstantOpLowering : public ConvertOpToLLVMPattern<ConstantOp> {
   using ConvertOpToLLVMPattern<ConstantOp>::ConvertOpToLLVMPattern;
 
@@ -550,22 +509,6 @@ struct UnrealizedConversionCastOpLowering
   }
 };
 
-// Base class for LLVM IR lowering terminator operations with successors.
-template <typename SourceOp, typename TargetOp>
-struct OneToOneLLVMTerminatorLowering
-    : public ConvertOpToLLVMPattern<SourceOp> {
-  using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
-  using Super = OneToOneLLVMTerminatorLowering<SourceOp, TargetOp>;
-
-  LogicalResult
-  matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-    rewriter.replaceOpWithNewOp<TargetOp>(op, adaptor.getOperands(),
-                                          op->getSuccessors(), op->getAttrs());
-    return success();
-  }
-};
-
 // Special lowering pattern for `ReturnOps`.  Unlike all other operations,
 // `ReturnOp` interacts with the function signature and must have as many
 // operands as the function has return values.  Because in LLVM IR, functions
@@ -633,21 +576,6 @@ struct ReturnOpLowering : public ConvertOpToLLVMPattern<ReturnOp> {
     return success();
   }
 };
-
-// FIXME: this should be tablegen'ed as well.
-struct BranchOpLowering
-    : public OneToOneLLVMTerminatorLowering<BranchOp, LLVM::BrOp> {
-  using Super::Super;
-};
-struct CondBranchOpLowering
-    : public OneToOneLLVMTerminatorLowering<CondBranchOp, LLVM::CondBrOp> {
-  using Super::Super;
-};
-struct SwitchOpLowering
-    : public OneToOneLLVMTerminatorLowering<SwitchOp, LLVM::SwitchOp> {
-  using Super::Super;
-};
-
 } // namespace
 
 void mlir::populateStdToLLVMFuncOpConversionPattern(
@@ -663,14 +591,10 @@ void mlir::populateStdToLLVMConversionPatterns(LLVMTypeConverter &converter,
   populateStdToLLVMFuncOpConversionPattern(converter, patterns);
   // clang-format off
   patterns.add<
-      AssertOpLowering,
-      BranchOpLowering,
       CallIndirectOpLowering,
       CallOpLowering,
-      CondBranchOpLowering,
       ConstantOpLowering,
-      ReturnOpLowering,
-      SwitchOpLowering>(converter);
+      ReturnOpLowering>(converter);
   // clang-format on
 }
 
@@ -721,6 +645,7 @@ struct LLVMLoweringPass : public ConvertStandardToLLVMBase<LLVMLoweringPass> {
     RewritePatternSet patterns(&getContext());
     populateStdToLLVMConversionPatterns(typeConverter, patterns);
     arith::populateArithmeticToLLVMConversionPatterns(typeConverter, patterns);
+    cf::populateControlFlowToLLVMConversionPatterns(typeConverter, patterns);
 
     LLVMConversionTarget target(getContext());
     if (failed(applyPartialConversion(m, target, std::move(patterns))))

diff  --git a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
index 500f64b28ff5e..979780aea2284 100644
--- a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
+++ b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
@@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRStandardToSPIRV
 
   LINK_LIBS PUBLIC
   MLIRArithmeticToSPIRV
+  MLIRControlFlowToSPIRV
   MLIRIR
   MLIRMathToSPIRV
   MLIRMemRef

diff  --git a/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp
index 7e1269013af58..36d39c85bfbd2 100644
--- a/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp
+++ b/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp
@@ -46,24 +46,6 @@ class ReturnOpPattern final : public OpConversionPattern<ReturnOp> {
                   ConversionPatternRewriter &rewriter) const override;
 };
 
-/// Converts std.br to spv.Branch.
-struct BranchOpPattern final : public OpConversionPattern<BranchOp> {
-  using OpConversionPattern<BranchOp>::OpConversionPattern;
-
-  LogicalResult
-  matchAndRewrite(BranchOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Converts std.cond_br to spv.BranchConditional.
-struct CondBranchOpPattern final : public OpConversionPattern<CondBranchOp> {
-  using OpConversionPattern<CondBranchOp>::OpConversionPattern;
-
-  LogicalResult
-  matchAndRewrite(CondBranchOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
 /// Converts tensor.extract into loading using access chains from SPIR-V local
 /// variables.
 class TensorExtractPattern final
@@ -146,31 +128,6 @@ ReturnOpPattern::matchAndRewrite(ReturnOp returnOp, OpAdaptor adaptor,
   return success();
 }
 
-//===----------------------------------------------------------------------===//
-// BranchOpPattern
-//===----------------------------------------------------------------------===//
-
-LogicalResult
-BranchOpPattern::matchAndRewrite(BranchOp op, OpAdaptor adaptor,
-                                 ConversionPatternRewriter &rewriter) const {
-  rewriter.replaceOpWithNewOp<spirv::BranchOp>(op, op.getDest(),
-                                               adaptor.getDestOperands());
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// CondBranchOpPattern
-//===----------------------------------------------------------------------===//
-
-LogicalResult CondBranchOpPattern::matchAndRewrite(
-    CondBranchOp op, OpAdaptor adaptor,
-    ConversionPatternRewriter &rewriter) const {
-  rewriter.replaceOpWithNewOp<spirv::BranchConditionalOp>(
-      op, op.getCondition(), op.getTrueDest(), adaptor.getTrueDestOperands(),
-      op.getFalseDest(), adaptor.getFalseDestOperands());
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // Pattern population
 //===----------------------------------------------------------------------===//
@@ -189,8 +146,7 @@ void populateStandardToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
       spirv::ElementwiseOpPattern<arith::MinSIOp, spirv::GLSLSMinOp>,
       spirv::ElementwiseOpPattern<arith::MinUIOp, spirv::GLSLUMinOp>,
 
-      ReturnOpPattern, BranchOpPattern, CondBranchOpPattern>(typeConverter,
-                                                             context);
+      ReturnOpPattern>(typeConverter, context);
 }
 
 void populateTensorToSPIRVPatterns(SPIRVTypeConverter &typeConverter,

diff  --git a/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRVPass.cpp b/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRVPass.cpp
index 4c868f76067cf..176664d6d6ed5 100644
--- a/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRVPass.cpp
+++ b/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRVPass.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Conversion/StandardToSPIRV/StandardToSPIRVPass.h"
 #include "../PassDetail.h"
 #include "mlir/Conversion/ArithmeticToSPIRV/ArithmeticToSPIRV.h"
+#include "mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.h"
 #include "mlir/Conversion/MathToSPIRV/MathToSPIRV.h"
 #include "mlir/Conversion/StandardToSPIRV/StandardToSPIRV.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
@@ -40,9 +41,11 @@ void ConvertStandardToSPIRVPass::runOnOperation() {
   options.emulateNon32BitScalarTypes = this->emulateNon32BitScalarTypes;
   SPIRVTypeConverter typeConverter(targetAttr, options);
 
-  // TODO ArithmeticToSPIRV cannot be applied separately to StandardToSPIRV
+  // TODO ArithmeticToSPIRV/ControlFlowToSPIRV cannot be applied separately to
+  // StandardToSPIRV
   RewritePatternSet patterns(context);
   arith::populateArithmeticToSPIRVPatterns(typeConverter, patterns);
+  cf::populateControlFlowToSPIRVPatterns(typeConverter, patterns);
   populateMathToSPIRVPatterns(typeConverter, patterns);
   populateStandardToSPIRVPatterns(typeConverter, patterns);
   populateTensorToSPIRVPatterns(typeConverter, /*byteCountThreshold=*/64,

diff  --git a/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp
index 81fa42438c322..3fd80ea9f9ee7 100644
--- a/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp
+++ b/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Analysis/Liveness.h"
 #include "mlir/Dialect/Async/IR/Async.h"
 #include "mlir/Dialect/Async/Passes.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/ImplicitLocOpBuilder.h"
 #include "mlir/IR/PatternMatch.h"
@@ -169,11 +170,11 @@ class AsyncRuntimeRefCountingPass
   ///
   ///   ^entry:
   ///     %token = async.runtime.create : !async.token
-  ///     cond_br %cond, ^bb1, ^bb2
+  ///     cf.cond_br %cond, ^bb1, ^bb2
   ///   ^bb1:
   ///     async.runtime.await %token
   ///     async.runtime.drop_ref %token
-  ///     br ^bb2
+  ///     cf.br ^bb2
   ///   ^bb2:
   ///     return
   ///
@@ -185,14 +186,14 @@ class AsyncRuntimeRefCountingPass
   ///
   ///   ^entry:
   ///     %token = async.runtime.create : !async.token
-  ///     cond_br %cond, ^bb1, ^reference_counting
+  ///     cf.cond_br %cond, ^bb1, ^reference_counting
   ///   ^bb1:
   ///     async.runtime.await %token
   ///     async.runtime.drop_ref %token
-  ///     br ^bb2
+  ///     cf.br ^bb2
   ///   ^reference_counting:
   ///     async.runtime.drop_ref %token
-  ///     br ^bb2
+  ///     cf.br ^bb2
   ///   ^bb2:
   ///     return
   ///
@@ -208,7 +209,7 @@ class AsyncRuntimeRefCountingPass
   ///     async.coro.suspend %ret, ^suspend, ^resume, ^cleanup
   ///   ^resume:
   ///     %0 = async.runtime.load %value
-  ///     br ^cleanup
+  ///     cf.br ^cleanup
   ///   ^cleanup:
   ///     ...
   ///   ^suspend:
@@ -406,7 +407,7 @@ AsyncRuntimeRefCountingPass::addDropRefInDivergentLivenessSuccessor(
         refCountingBlock = &successor->getParent()->emplaceBlock();
         refCountingBlock->moveBefore(successor);
         OpBuilder builder = OpBuilder::atBlockEnd(refCountingBlock);
-        builder.create<BranchOp>(value.getLoc(), successor);
+        builder.create<cf::BranchOp>(value.getLoc(), successor);
       }
 
       OpBuilder builder = OpBuilder::atBlockBegin(refCountingBlock);

diff  --git a/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp
index 8e9c083df0c2a..d1d3e4f5326a9 100644
--- a/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp
+++ b/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Async/IR/Async.h"
 #include "mlir/Dialect/Async/Passes.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@@ -105,18 +106,18 @@ struct CoroMachinery {
 ///       %value = <async value> : !async.value<T> // create async value
 ///       %id = async.coro.id                      // create a coroutine id
 ///       %hdl = async.coro.begin %id              // create a coroutine handle
-///       br ^preexisting_entry_block
+///       cf.br ^preexisting_entry_block
 ///
 ///     /*  preexisting blocks modified to branch to the cleanup block */
 ///
 ///     ^set_error: // this block created lazily only if needed (see code below)
 ///       async.runtime.set_error %token : !async.token
 ///       async.runtime.set_error %value : !async.value<T>
-///       br ^cleanup
+///       cf.br ^cleanup
 ///
 ///     ^cleanup:
 ///       async.coro.free %hdl // delete the coroutine state
-///       br ^suspend
+///       cf.br ^suspend
 ///
 ///     ^suspend:
 ///       async.coro.end %hdl // marks the end of a coroutine
@@ -147,7 +148,7 @@ static CoroMachinery setupCoroMachinery(FuncOp func) {
   auto coroIdOp = builder.create<CoroIdOp>(CoroIdType::get(ctx));
   auto coroHdlOp =
       builder.create<CoroBeginOp>(CoroHandleType::get(ctx), coroIdOp.id());
-  builder.create<BranchOp>(originalEntryBlock);
+  builder.create<cf::BranchOp>(originalEntryBlock);
 
   Block *cleanupBlock = func.addBlock();
   Block *suspendBlock = func.addBlock();
@@ -159,7 +160,7 @@ static CoroMachinery setupCoroMachinery(FuncOp func) {
   builder.create<CoroFreeOp>(coroIdOp.id(), coroHdlOp.handle());
 
   // Branch into the suspend block.
-  builder.create<BranchOp>(suspendBlock);
+  builder.create<cf::BranchOp>(suspendBlock);
 
   // ------------------------------------------------------------------------ //
   // Coroutine suspend block: mark the end of a coroutine and return allocated
@@ -186,7 +187,7 @@ static CoroMachinery setupCoroMachinery(FuncOp func) {
     Operation *terminator = block.getTerminator();
     if (auto yield = dyn_cast<YieldOp>(terminator)) {
       builder.setInsertionPointToEnd(&block);
-      builder.create<BranchOp>(cleanupBlock);
+      builder.create<cf::BranchOp>(cleanupBlock);
     }
   }
 
@@ -227,7 +228,7 @@ static Block *setupSetErrorBlock(CoroMachinery &coro) {
     builder.create<RuntimeSetErrorOp>(retValue);
 
   // Branch into the cleanup block.
-  builder.create<BranchOp>(coro.cleanup);
+  builder.create<cf::BranchOp>(coro.cleanup);
 
   return coro.setError;
 }
@@ -305,7 +306,7 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) {
   // Async resume operation (execution will be resumed in a thread managed by
   // the async runtime).
   {
-    BranchOp branch = cast<BranchOp>(coro.entry->getTerminator());
+    cf::BranchOp branch = cast<cf::BranchOp>(coro.entry->getTerminator());
     builder.setInsertionPointToEnd(coro.entry);
 
     // Save the coroutine state: async.coro.save
@@ -419,8 +420,8 @@ class AwaitOpLoweringBase : public OpConversionPattern<AwaitType> {
           isError, builder.create<arith::ConstantOp>(
                        loc, i1, builder.getIntegerAttr(i1, 1)));
 
-      builder.create<AssertOp>(notError,
-                               "Awaited async operand is in error state");
+      builder.create<cf::AssertOp>(notError,
+                                   "Awaited async operand is in error state");
     }
 
     // Inside the coroutine we convert await operation into coroutine suspension
@@ -452,11 +453,11 @@ class AwaitOpLoweringBase : public OpConversionPattern<AwaitType> {
       // Check if the awaited value is in the error state.
       builder.setInsertionPointToStart(resume);
       auto isError = builder.create<RuntimeIsErrorOp>(loc, i1, operand);
-      builder.create<CondBranchOp>(isError,
-                                   /*trueDest=*/setupSetErrorBlock(coro),
-                                   /*trueArgs=*/ArrayRef<Value>(),
-                                   /*falseDest=*/continuation,
-                                   /*falseArgs=*/ArrayRef<Value>());
+      builder.create<cf::CondBranchOp>(isError,
+                                       /*trueDest=*/setupSetErrorBlock(coro),
+                                       /*trueArgs=*/ArrayRef<Value>(),
+                                       /*falseDest=*/continuation,
+                                       /*falseArgs=*/ArrayRef<Value>());
 
       // Make sure that replacement value will be constructed in the
       // continuation block.
@@ -560,18 +561,18 @@ class YieldOpLowering : public OpConversionPattern<async::YieldOp> {
 };
 
 //===----------------------------------------------------------------------===//
-// Convert std.assert operation to cond_br into `set_error` block.
+// Convert std.assert operation to cf.cond_br into `set_error` block.
 //===----------------------------------------------------------------------===//
 
-class AssertOpLowering : public OpConversionPattern<AssertOp> {
+class AssertOpLowering : public OpConversionPattern<cf::AssertOp> {
 public:
   AssertOpLowering(MLIRContext *ctx,
                    llvm::DenseMap<FuncOp, CoroMachinery> &outlinedFunctions)
-      : OpConversionPattern<AssertOp>(ctx),
+      : OpConversionPattern<cf::AssertOp>(ctx),
         outlinedFunctions(outlinedFunctions) {}
 
   LogicalResult
-  matchAndRewrite(AssertOp op, OpAdaptor adaptor,
+  matchAndRewrite(cf::AssertOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     // Check if assert operation is inside the async coroutine function.
     auto func = op->template getParentOfType<FuncOp>();
@@ -585,11 +586,11 @@ class AssertOpLowering : public OpConversionPattern<AssertOp> {
 
     Block *cont = rewriter.splitBlock(op->getBlock(), Block::iterator(op));
     rewriter.setInsertionPointToEnd(cont->getPrevNode());
-    rewriter.create<CondBranchOp>(loc, adaptor.getArg(),
-                                  /*trueDest=*/cont,
-                                  /*trueArgs=*/ArrayRef<Value>(),
-                                  /*falseDest=*/setupSetErrorBlock(coro),
-                                  /*falseArgs=*/ArrayRef<Value>());
+    rewriter.create<cf::CondBranchOp>(loc, adaptor.getArg(),
+                                      /*trueDest=*/cont,
+                                      /*trueArgs=*/ArrayRef<Value>(),
+                                      /*falseDest=*/setupSetErrorBlock(coro),
+                                      /*falseArgs=*/ArrayRef<Value>());
     rewriter.eraseOp(op);
 
     return success();
@@ -765,7 +766,7 @@ void AsyncToAsyncRuntimePass::runOnOperation() {
   // and we have to make sure that structured control flow operations with async
   // operations in nested regions will be converted to branch-based control flow
   // before we add the coroutine basic blocks.
-  populateLoopToStdConversionPatterns(asyncPatterns);
+  populateSCFToControlFlowConversionPatterns(asyncPatterns);
 
   // Async lowering does not use type converter because it must preserve all
   // types for async.runtime operations.
@@ -792,14 +793,15 @@ void AsyncToAsyncRuntimePass::runOnOperation() {
     });
     return !walkResult.wasInterrupted();
   });
-  runtimeTarget.addLegalOp<AssertOp, arith::XOrIOp, arith::ConstantOp,
-                           ConstantOp, BranchOp, CondBranchOp>();
+  runtimeTarget.addLegalOp<cf::AssertOp, arith::XOrIOp, arith::ConstantOp,
+                           ConstantOp, cf::BranchOp, cf::CondBranchOp>();
 
   // Assertions must be converted to runtime errors inside async functions.
-  runtimeTarget.addDynamicallyLegalOp<AssertOp>([&](AssertOp op) -> bool {
-    auto func = op->getParentOfType<FuncOp>();
-    return outlinedFunctions.find(func) == outlinedFunctions.end();
-  });
+  runtimeTarget.addDynamicallyLegalOp<cf::AssertOp>(
+      [&](cf::AssertOp op) -> bool {
+        auto func = op->getParentOfType<FuncOp>();
+        return outlinedFunctions.find(func) == outlinedFunctions.end();
+      });
 
   if (eliminateBlockingAwaitOps)
     runtimeTarget.addDynamicallyLegalOp<RuntimeAwaitOp>(

diff  --git a/mlir/lib/Dialect/Async/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Async/Transforms/CMakeLists.txt
index 61ecbaacbea7a..e5f9c75af28ed 100644
--- a/mlir/lib/Dialect/Async/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Async/Transforms/CMakeLists.txt
@@ -17,7 +17,7 @@ add_mlir_dialect_library(MLIRAsyncTransforms
   MLIRIR
   MLIRPass
   MLIRSCF
-  MLIRSCFToStandard
+  MLIRSCFToControlFlow
   MLIRStandard
   MLIRTransforms
   MLIRTransformUtils

diff  --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
index ed7bd5c20d58e..f3646806639e3 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
@@ -18,12 +18,12 @@
 // (using the BufferViewFlowAnalysis class). Consider the following example:
 //
 // ^bb0(%arg0):
-//   cond_br %cond, ^bb1, ^bb2
+//   cf.cond_br %cond, ^bb1, ^bb2
 // ^bb1:
-//   br ^exit(%arg0)
+//   cf.br ^exit(%arg0)
 // ^bb2:
 //   %new_value = ...
-//   br ^exit(%new_value)
+//   cf.br ^exit(%new_value)
 // ^exit(%arg1):
 //   return %arg1;
 //

diff  --git a/mlir/lib/Dialect/CMakeLists.txt b/mlir/lib/Dialect/CMakeLists.txt
index da2be111e2083..9d6a30ca3d7f1 100644
--- a/mlir/lib/Dialect/CMakeLists.txt
+++ b/mlir/lib/Dialect/CMakeLists.txt
@@ -6,6 +6,7 @@ add_subdirectory(Async)
 add_subdirectory(AMX)
 add_subdirectory(Bufferization)
 add_subdirectory(Complex)
+add_subdirectory(ControlFlow)
 add_subdirectory(DLTI)
 add_subdirectory(EmitC)
 add_subdirectory(GPU)

diff  --git a/mlir/lib/Dialect/ControlFlow/CMakeLists.txt b/mlir/lib/Dialect/ControlFlow/CMakeLists.txt
new file mode 100644
index 0000000000000..f33061b2d87cf
--- /dev/null
+++ b/mlir/lib/Dialect/ControlFlow/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(IR)

diff  --git a/mlir/lib/Dialect/ControlFlow/IR/CMakeLists.txt b/mlir/lib/Dialect/ControlFlow/IR/CMakeLists.txt
new file mode 100644
index 0000000000000..2c32274092de0
--- /dev/null
+++ b/mlir/lib/Dialect/ControlFlow/IR/CMakeLists.txt
@@ -0,0 +1,15 @@
+add_mlir_dialect_library(MLIRControlFlow
+  ControlFlowOps.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/ControlFlow/IR
+
+  DEPENDS
+  MLIRControlFlowOpsIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRArithmetic
+  MLIRControlFlowInterfaces
+  MLIRIR
+  MLIRSideEffectInterfaces
+  )

diff  --git a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp
new file mode 100644
index 0000000000000..11d719a1b5492
--- /dev/null
+++ b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp
@@ -0,0 +1,891 @@
+//===- ControlFlowOps.cpp - ControlFlow Operations ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+
+#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/CommonFolders.h"
+#include "mlir/IR/AffineExpr.h"
+#include "mlir/IR/AffineMap.h"
+#include "mlir/IR/BlockAndValueMapping.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/TypeUtilities.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Support/MathExtras.h"
+#include "mlir/Transforms/InliningUtils.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <numeric>
+
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOpsDialect.cpp.inc"
+
+using namespace mlir;
+using namespace mlir::cf;
+
+//===----------------------------------------------------------------------===//
+// ControlFlowDialect Interfaces
+//===----------------------------------------------------------------------===//
+namespace {
+/// This class defines the interface for handling inlining with control flow
+/// operations.
+struct ControlFlowInlinerInterface : public DialectInlinerInterface {
+  using DialectInlinerInterface::DialectInlinerInterface;
+  ~ControlFlowInlinerInterface() override = default;
+
+  /// All control flow operations can be inlined.
+  bool isLegalToInline(Operation *call, Operation *callable,
+                       bool wouldBeCloned) const final {
+    return true;
+  }
+  bool isLegalToInline(Operation *, Region *, bool,
+                       BlockAndValueMapping &) const final {
+    return true;
+  }
+
+  /// ControlFlow terminator operations don't really need any special handing.
+  void handleTerminator(Operation *op, Block *newDest) const final {}
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// ControlFlowDialect
+//===----------------------------------------------------------------------===//
+
+void ControlFlowDialect::initialize() {
+  addOperations<
+#define GET_OP_LIST
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.cpp.inc"
+      >();
+  addInterfaces<ControlFlowInlinerInterface>();
+}
+
+//===----------------------------------------------------------------------===//
+// AssertOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult AssertOp::canonicalize(AssertOp op, PatternRewriter &rewriter) {
+  // Erase assertion if argument is constant true.
+  if (matchPattern(op.getArg(), m_One())) {
+    rewriter.eraseOp(op);
+    return success();
+  }
+  return failure();
+}
+
+//===----------------------------------------------------------------------===//
+// BranchOp
+//===----------------------------------------------------------------------===//
+
+/// Given a successor, try to collapse it to a new destination if it only
+/// contains a passthrough unconditional branch. If the successor is
+/// collapsable, `successor` and `successorOperands` are updated to reference
+/// the new destination and values. `argStorage` is used as storage if operands
+/// to the collapsed successor need to be remapped. It must outlive uses of
+/// successorOperands.
+static LogicalResult collapseBranch(Block *&successor,
+                                    ValueRange &successorOperands,
+                                    SmallVectorImpl<Value> &argStorage) {
+  // Check that the successor only contains a unconditional branch.
+  if (std::next(successor->begin()) != successor->end())
+    return failure();
+  // Check that the terminator is an unconditional branch.
+  BranchOp successorBranch = dyn_cast<BranchOp>(successor->getTerminator());
+  if (!successorBranch)
+    return failure();
+  // Check that the arguments are only used within the terminator.
+  for (BlockArgument arg : successor->getArguments()) {
+    for (Operation *user : arg.getUsers())
+      if (user != successorBranch)
+        return failure();
+  }
+  // Don't try to collapse branches to infinite loops.
+  Block *successorDest = successorBranch.getDest();
+  if (successorDest == successor)
+    return failure();
+
+  // Update the operands to the successor. If the branch parent has no
+  // arguments, we can use the branch operands directly.
+  OperandRange operands = successorBranch.getOperands();
+  if (successor->args_empty()) {
+    successor = successorDest;
+    successorOperands = operands;
+    return success();
+  }
+
+  // Otherwise, we need to remap any argument operands.
+  for (Value operand : operands) {
+    BlockArgument argOperand = operand.dyn_cast<BlockArgument>();
+    if (argOperand && argOperand.getOwner() == successor)
+      argStorage.push_back(successorOperands[argOperand.getArgNumber()]);
+    else
+      argStorage.push_back(operand);
+  }
+  successor = successorDest;
+  successorOperands = argStorage;
+  return success();
+}
+
+/// Simplify a branch to a block that has a single predecessor. This effectively
+/// merges the two blocks.
+static LogicalResult
+simplifyBrToBlockWithSinglePred(BranchOp op, PatternRewriter &rewriter) {
+  // Check that the successor block has a single predecessor.
+  Block *succ = op.getDest();
+  Block *opParent = op->getBlock();
+  if (succ == opParent || !llvm::hasSingleElement(succ->getPredecessors()))
+    return failure();
+
+  // Merge the successor into the current block and erase the branch.
+  rewriter.mergeBlocks(succ, opParent, op.getOperands());
+  rewriter.eraseOp(op);
+  return success();
+}
+
+///   br ^bb1
+/// ^bb1
+///   br ^bbN(...)
+///
+///  -> br ^bbN(...)
+///
+static LogicalResult simplifyPassThroughBr(BranchOp op,
+                                           PatternRewriter &rewriter) {
+  Block *dest = op.getDest();
+  ValueRange destOperands = op.getOperands();
+  SmallVector<Value, 4> destOperandStorage;
+
+  // Try to collapse the successor if it points somewhere other than this
+  // block.
+  if (dest == op->getBlock() ||
+      failed(collapseBranch(dest, destOperands, destOperandStorage)))
+    return failure();
+
+  // Create a new branch with the collapsed successor.
+  rewriter.replaceOpWithNewOp<BranchOp>(op, dest, destOperands);
+  return success();
+}
+
+LogicalResult BranchOp::canonicalize(BranchOp op, PatternRewriter &rewriter) {
+  return success(succeeded(simplifyBrToBlockWithSinglePred(op, rewriter)) ||
+                 succeeded(simplifyPassThroughBr(op, rewriter)));
+}
+
+void BranchOp::setDest(Block *block) { return setSuccessor(block); }
+
+void BranchOp::eraseOperand(unsigned index) { (*this)->eraseOperand(index); }
+
+Optional<MutableOperandRange>
+BranchOp::getMutableSuccessorOperands(unsigned index) {
+  assert(index == 0 && "invalid successor index");
+  return getDestOperandsMutable();
+}
+
+Block *BranchOp::getSuccessorForOperands(ArrayRef<Attribute>) {
+  return getDest();
+}
+
+//===----------------------------------------------------------------------===//
+// CondBranchOp
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// cf.cond_br true, ^bb1, ^bb2
+///  -> br ^bb1
+/// cf.cond_br false, ^bb1, ^bb2
+///  -> br ^bb2
+///
+struct SimplifyConstCondBranchPred : public OpRewritePattern<CondBranchOp> {
+  using OpRewritePattern<CondBranchOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(CondBranchOp condbr,
+                                PatternRewriter &rewriter) const override {
+    if (matchPattern(condbr.getCondition(), m_NonZero())) {
+      // True branch taken.
+      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getTrueDest(),
+                                            condbr.getTrueOperands());
+      return success();
+    }
+    if (matchPattern(condbr.getCondition(), m_Zero())) {
+      // False branch taken.
+      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getFalseDest(),
+                                            condbr.getFalseOperands());
+      return success();
+    }
+    return failure();
+  }
+};
+
+///   cf.cond_br %cond, ^bb1, ^bb2
+/// ^bb1
+///   br ^bbN(...)
+/// ^bb2
+///   br ^bbK(...)
+///
+///  -> cf.cond_br %cond, ^bbN(...), ^bbK(...)
+///
+struct SimplifyPassThroughCondBranch : public OpRewritePattern<CondBranchOp> {
+  using OpRewritePattern<CondBranchOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(CondBranchOp condbr,
+                                PatternRewriter &rewriter) const override {
+    Block *trueDest = condbr.getTrueDest(), *falseDest = condbr.getFalseDest();
+    ValueRange trueDestOperands = condbr.getTrueOperands();
+    ValueRange falseDestOperands = condbr.getFalseOperands();
+    SmallVector<Value, 4> trueDestOperandStorage, falseDestOperandStorage;
+
+    // Try to collapse one of the current successors.
+    LogicalResult collapsedTrue =
+        collapseBranch(trueDest, trueDestOperands, trueDestOperandStorage);
+    LogicalResult collapsedFalse =
+        collapseBranch(falseDest, falseDestOperands, falseDestOperandStorage);
+    if (failed(collapsedTrue) && failed(collapsedFalse))
+      return failure();
+
+    // Create a new branch with the collapsed successors.
+    rewriter.replaceOpWithNewOp<CondBranchOp>(condbr, condbr.getCondition(),
+                                              trueDest, trueDestOperands,
+                                              falseDest, falseDestOperands);
+    return success();
+  }
+};
+
+/// cf.cond_br %cond, ^bb1(A, ..., N), ^bb1(A, ..., N)
+///  -> br ^bb1(A, ..., N)
+///
+/// cf.cond_br %cond, ^bb1(A), ^bb1(B)
+///  -> %select = arith.select %cond, A, B
+///     br ^bb1(%select)
+///
+struct SimplifyCondBranchIdenticalSuccessors
+    : public OpRewritePattern<CondBranchOp> {
+  using OpRewritePattern<CondBranchOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(CondBranchOp condbr,
+                                PatternRewriter &rewriter) const override {
+    // Check that the true and false destinations are the same and have the same
+    // operands.
+    Block *trueDest = condbr.getTrueDest();
+    if (trueDest != condbr.getFalseDest())
+      return failure();
+
+    // If all of the operands match, no selects need to be generated.
+    OperandRange trueOperands = condbr.getTrueOperands();
+    OperandRange falseOperands = condbr.getFalseOperands();
+    if (trueOperands == falseOperands) {
+      rewriter.replaceOpWithNewOp<BranchOp>(condbr, trueDest, trueOperands);
+      return success();
+    }
+
+    // Otherwise, if the current block is the only predecessor insert selects
+    // for any mismatched branch operands.
+    if (trueDest->getUniquePredecessor() != condbr->getBlock())
+      return failure();
+
+    // Generate a select for any operands that 
diff er between the two.
+    SmallVector<Value, 8> mergedOperands;
+    mergedOperands.reserve(trueOperands.size());
+    Value condition = condbr.getCondition();
+    for (auto it : llvm::zip(trueOperands, falseOperands)) {
+      if (std::get<0>(it) == std::get<1>(it))
+        mergedOperands.push_back(std::get<0>(it));
+      else
+        mergedOperands.push_back(rewriter.create<arith::SelectOp>(
+            condbr.getLoc(), condition, std::get<0>(it), std::get<1>(it)));
+    }
+
+    rewriter.replaceOpWithNewOp<BranchOp>(condbr, trueDest, mergedOperands);
+    return success();
+  }
+};
+
+///   ...
+///   cf.cond_br %cond, ^bb1(...), ^bb2(...)
+/// ...
+/// ^bb1: // has single predecessor
+///   ...
+///   cf.cond_br %cond, ^bb3(...), ^bb4(...)
+///
+/// ->
+///
+///   ...
+///   cf.cond_br %cond, ^bb1(...), ^bb2(...)
+/// ...
+/// ^bb1: // has single predecessor
+///   ...
+///   br ^bb3(...)
+///
+struct SimplifyCondBranchFromCondBranchOnSameCondition
+    : public OpRewritePattern<CondBranchOp> {
+  using OpRewritePattern<CondBranchOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(CondBranchOp condbr,
+                                PatternRewriter &rewriter) const override {
+    // Check that we have a single distinct predecessor.
+    Block *currentBlock = condbr->getBlock();
+    Block *predecessor = currentBlock->getSinglePredecessor();
+    if (!predecessor)
+      return failure();
+
+    // Check that the predecessor terminates with a conditional branch to this
+    // block and that it branches on the same condition.
+    auto predBranch = dyn_cast<CondBranchOp>(predecessor->getTerminator());
+    if (!predBranch || condbr.getCondition() != predBranch.getCondition())
+      return failure();
+
+    // Fold this branch to an unconditional branch.
+    if (currentBlock == predBranch.getTrueDest())
+      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getTrueDest(),
+                                            condbr.getTrueDestOperands());
+    else
+      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getFalseDest(),
+                                            condbr.getFalseDestOperands());
+    return success();
+  }
+};
+
+///   cf.cond_br %arg0, ^trueB, ^falseB
+///
+/// ^trueB:
+///   "test.consumer1"(%arg0) : (i1) -> ()
+///    ...
+///
+/// ^falseB:
+///   "test.consumer2"(%arg0) : (i1) -> ()
+///   ...
+///
+/// ->
+///
+///   cf.cond_br %arg0, ^trueB, ^falseB
+/// ^trueB:
+///   "test.consumer1"(%true) : (i1) -> ()
+///   ...
+///
+/// ^falseB:
+///   "test.consumer2"(%false) : (i1) -> ()
+///   ...
+struct CondBranchTruthPropagation : public OpRewritePattern<CondBranchOp> {
+  using OpRewritePattern<CondBranchOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(CondBranchOp condbr,
+                                PatternRewriter &rewriter) const override {
+    // Check that we have a single distinct predecessor.
+    bool replaced = false;
+    Type ty = rewriter.getI1Type();
+
+    // These variables serve to prevent creating duplicate constants
+    // and hold constant true or false values.
+    Value constantTrue = nullptr;
+    Value constantFalse = nullptr;
+
+    // TODO These checks can be expanded to encompas any use with only
+    // either the true of false edge as a predecessor. For now, we fall
+    // back to checking the single predecessor is given by the true/fasle
+    // destination, thereby ensuring that only that edge can reach the
+    // op.
+    if (condbr.getTrueDest()->getSinglePredecessor()) {
+      for (OpOperand &use :
+           llvm::make_early_inc_range(condbr.getCondition().getUses())) {
+        if (use.getOwner()->getBlock() == condbr.getTrueDest()) {
+          replaced = true;
+
+          if (!constantTrue)
+            constantTrue = rewriter.create<arith::ConstantOp>(
+                condbr.getLoc(), ty, rewriter.getBoolAttr(true));
+
+          rewriter.updateRootInPlace(use.getOwner(),
+                                     [&] { use.set(constantTrue); });
+        }
+      }
+    }
+    if (condbr.getFalseDest()->getSinglePredecessor()) {
+      for (OpOperand &use :
+           llvm::make_early_inc_range(condbr.getCondition().getUses())) {
+        if (use.getOwner()->getBlock() == condbr.getFalseDest()) {
+          replaced = true;
+
+          if (!constantFalse)
+            constantFalse = rewriter.create<arith::ConstantOp>(
+                condbr.getLoc(), ty, rewriter.getBoolAttr(false));
+
+          rewriter.updateRootInPlace(use.getOwner(),
+                                     [&] { use.set(constantFalse); });
+        }
+      }
+    }
+    return success(replaced);
+  }
+};
+} // namespace
+
+void CondBranchOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                               MLIRContext *context) {
+  results.add<SimplifyConstCondBranchPred, SimplifyPassThroughCondBranch,
+              SimplifyCondBranchIdenticalSuccessors,
+              SimplifyCondBranchFromCondBranchOnSameCondition,
+              CondBranchTruthPropagation>(context);
+}
+
+Optional<MutableOperandRange>
+CondBranchOp::getMutableSuccessorOperands(unsigned index) {
+  assert(index < getNumSuccessors() && "invalid successor index");
+  return index == trueIndex ? getTrueDestOperandsMutable()
+                            : getFalseDestOperandsMutable();
+}
+
+Block *CondBranchOp::getSuccessorForOperands(ArrayRef<Attribute> operands) {
+  if (IntegerAttr condAttr = operands.front().dyn_cast_or_null<IntegerAttr>())
+    return condAttr.getValue().isOneValue() ? getTrueDest() : getFalseDest();
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// SwitchOp
+//===----------------------------------------------------------------------===//
+
+void SwitchOp::build(OpBuilder &builder, OperationState &result, Value value,
+                     Block *defaultDestination, ValueRange defaultOperands,
+                     DenseIntElementsAttr caseValues,
+                     BlockRange caseDestinations,
+                     ArrayRef<ValueRange> caseOperands) {
+  build(builder, result, value, defaultOperands, caseOperands, caseValues,
+        defaultDestination, caseDestinations);
+}
+
+void SwitchOp::build(OpBuilder &builder, OperationState &result, Value value,
+                     Block *defaultDestination, ValueRange defaultOperands,
+                     ArrayRef<APInt> caseValues, BlockRange caseDestinations,
+                     ArrayRef<ValueRange> caseOperands) {
+  DenseIntElementsAttr caseValuesAttr;
+  if (!caseValues.empty()) {
+    ShapedType caseValueType = VectorType::get(
+        static_cast<int64_t>(caseValues.size()), value.getType());
+    caseValuesAttr = DenseIntElementsAttr::get(caseValueType, caseValues);
+  }
+  build(builder, result, value, defaultDestination, defaultOperands,
+        caseValuesAttr, caseDestinations, caseOperands);
+}
+
+/// <cases> ::= `default` `:` bb-id (`(` ssa-use-and-type-list `)`)?
+///             ( `,` integer `:` bb-id (`(` ssa-use-and-type-list `)`)? )*
+static ParseResult parseSwitchOpCases(
+    OpAsmParser &parser, Type &flagType, Block *&defaultDestination,
+    SmallVectorImpl<OpAsmParser::OperandType> &defaultOperands,
+    SmallVectorImpl<Type> &defaultOperandTypes,
+    DenseIntElementsAttr &caseValues,
+    SmallVectorImpl<Block *> &caseDestinations,
+    SmallVectorImpl<SmallVector<OpAsmParser::OperandType>> &caseOperands,
+    SmallVectorImpl<SmallVector<Type>> &caseOperandTypes) {
+  if (parser.parseKeyword("default") || parser.parseColon() ||
+      parser.parseSuccessor(defaultDestination))
+    return failure();
+  if (succeeded(parser.parseOptionalLParen())) {
+    if (parser.parseRegionArgumentList(defaultOperands) ||
+        parser.parseColonTypeList(defaultOperandTypes) || parser.parseRParen())
+      return failure();
+  }
+
+  SmallVector<APInt> values;
+  unsigned bitWidth = flagType.getIntOrFloatBitWidth();
+  while (succeeded(parser.parseOptionalComma())) {
+    int64_t value = 0;
+    if (failed(parser.parseInteger(value)))
+      return failure();
+    values.push_back(APInt(bitWidth, value));
+
+    Block *destination;
+    SmallVector<OpAsmParser::OperandType> operands;
+    SmallVector<Type> operandTypes;
+    if (failed(parser.parseColon()) ||
+        failed(parser.parseSuccessor(destination)))
+      return failure();
+    if (succeeded(parser.parseOptionalLParen())) {
+      if (failed(parser.parseRegionArgumentList(operands)) ||
+          failed(parser.parseColonTypeList(operandTypes)) ||
+          failed(parser.parseRParen()))
+        return failure();
+    }
+    caseDestinations.push_back(destination);
+    caseOperands.emplace_back(operands);
+    caseOperandTypes.emplace_back(operandTypes);
+  }
+
+  if (!values.empty()) {
+    ShapedType caseValueType =
+        VectorType::get(static_cast<int64_t>(values.size()), flagType);
+    caseValues = DenseIntElementsAttr::get(caseValueType, values);
+  }
+  return success();
+}
+
+static void printSwitchOpCases(
+    OpAsmPrinter &p, SwitchOp op, Type flagType, Block *defaultDestination,
+    OperandRange defaultOperands, TypeRange defaultOperandTypes,
+    DenseIntElementsAttr caseValues, SuccessorRange caseDestinations,
+    OperandRangeRange caseOperands, const TypeRangeRange &caseOperandTypes) {
+  p << "  default: ";
+  p.printSuccessorAndUseList(defaultDestination, defaultOperands);
+
+  if (!caseValues)
+    return;
+
+  for (const auto &it : llvm::enumerate(caseValues.getValues<APInt>())) {
+    p << ',';
+    p.printNewline();
+    p << "  ";
+    p << it.value().getLimitedValue();
+    p << ": ";
+    p.printSuccessorAndUseList(caseDestinations[it.index()],
+                               caseOperands[it.index()]);
+  }
+  p.printNewline();
+}
+
+LogicalResult SwitchOp::verify() {
+  auto caseValues = getCaseValues();
+  auto caseDestinations = getCaseDestinations();
+
+  if (!caseValues && caseDestinations.empty())
+    return success();
+
+  Type flagType = getFlag().getType();
+  Type caseValueType = caseValues->getType().getElementType();
+  if (caseValueType != flagType)
+    return emitOpError() << "'flag' type (" << flagType
+                         << ") should match case value type (" << caseValueType
+                         << ")";
+
+  if (caseValues &&
+      caseValues->size() != static_cast<int64_t>(caseDestinations.size()))
+    return emitOpError() << "number of case values (" << caseValues->size()
+                         << ") should match number of "
+                            "case destinations ("
+                         << caseDestinations.size() << ")";
+  return success();
+}
+
+Optional<MutableOperandRange>
+SwitchOp::getMutableSuccessorOperands(unsigned index) {
+  assert(index < getNumSuccessors() && "invalid successor index");
+  return index == 0 ? getDefaultOperandsMutable()
+                    : getCaseOperandsMutable(index - 1);
+}
+
+Block *SwitchOp::getSuccessorForOperands(ArrayRef<Attribute> operands) {
+  Optional<DenseIntElementsAttr> caseValues = getCaseValues();
+
+  if (!caseValues)
+    return getDefaultDestination();
+
+  SuccessorRange caseDests = getCaseDestinations();
+  if (auto value = operands.front().dyn_cast_or_null<IntegerAttr>()) {
+    for (const auto &it : llvm::enumerate(caseValues->getValues<APInt>()))
+      if (it.value() == value.getValue())
+        return caseDests[it.index()];
+    return getDefaultDestination();
+  }
+  return nullptr;
+}
+
+/// switch %flag : i32, [
+///   default:  ^bb1
+/// ]
+///  -> br ^bb1
+static LogicalResult simplifySwitchWithOnlyDefault(SwitchOp op,
+                                                   PatternRewriter &rewriter) {
+  if (!op.getCaseDestinations().empty())
+    return failure();
+
+  rewriter.replaceOpWithNewOp<BranchOp>(op, op.getDefaultDestination(),
+                                        op.getDefaultOperands());
+  return success();
+}
+
+/// switch %flag : i32, [
+///   default: ^bb1,
+///   42: ^bb1,
+///   43: ^bb2
+/// ]
+/// ->
+/// switch %flag : i32, [
+///   default: ^bb1,
+///   43: ^bb2
+/// ]
+static LogicalResult
+dropSwitchCasesThatMatchDefault(SwitchOp op, PatternRewriter &rewriter) {
+  SmallVector<Block *> newCaseDestinations;
+  SmallVector<ValueRange> newCaseOperands;
+  SmallVector<APInt> newCaseValues;
+  bool requiresChange = false;
+  auto caseValues = op.getCaseValues();
+  auto caseDests = op.getCaseDestinations();
+
+  for (const auto &it : llvm::enumerate(caseValues->getValues<APInt>())) {
+    if (caseDests[it.index()] == op.getDefaultDestination() &&
+        op.getCaseOperands(it.index()) == op.getDefaultOperands()) {
+      requiresChange = true;
+      continue;
+    }
+    newCaseDestinations.push_back(caseDests[it.index()]);
+    newCaseOperands.push_back(op.getCaseOperands(it.index()));
+    newCaseValues.push_back(it.value());
+  }
+
+  if (!requiresChange)
+    return failure();
+
+  rewriter.replaceOpWithNewOp<SwitchOp>(
+      op, op.getFlag(), op.getDefaultDestination(), op.getDefaultOperands(),
+      newCaseValues, newCaseDestinations, newCaseOperands);
+  return success();
+}
+
+/// Helper for folding a switch with a constant value.
+/// switch %c_42 : i32, [
+///   default: ^bb1 ,
+///   42: ^bb2,
+///   43: ^bb3
+/// ]
+/// -> br ^bb2
+static void foldSwitch(SwitchOp op, PatternRewriter &rewriter,
+                       const APInt &caseValue) {
+  auto caseValues = op.getCaseValues();
+  for (const auto &it : llvm::enumerate(caseValues->getValues<APInt>())) {
+    if (it.value() == caseValue) {
+      rewriter.replaceOpWithNewOp<BranchOp>(
+          op, op.getCaseDestinations()[it.index()],
+          op.getCaseOperands(it.index()));
+      return;
+    }
+  }
+  rewriter.replaceOpWithNewOp<BranchOp>(op, op.getDefaultDestination(),
+                                        op.getDefaultOperands());
+}
+
+/// switch %c_42 : i32, [
+///   default: ^bb1,
+///   42: ^bb2,
+///   43: ^bb3
+/// ]
+/// -> br ^bb2
+static LogicalResult simplifyConstSwitchValue(SwitchOp op,
+                                              PatternRewriter &rewriter) {
+  APInt caseValue;
+  if (!matchPattern(op.getFlag(), m_ConstantInt(&caseValue)))
+    return failure();
+
+  foldSwitch(op, rewriter, caseValue);
+  return success();
+}
+
+/// switch %c_42 : i32, [
+///   default: ^bb1,
+///   42: ^bb2,
+/// ]
+/// ^bb2:
+///   br ^bb3
+/// ->
+/// switch %c_42 : i32, [
+///   default: ^bb1,
+///   42: ^bb3,
+/// ]
+static LogicalResult simplifyPassThroughSwitch(SwitchOp op,
+                                               PatternRewriter &rewriter) {
+  SmallVector<Block *> newCaseDests;
+  SmallVector<ValueRange> newCaseOperands;
+  SmallVector<SmallVector<Value>> argStorage;
+  auto caseValues = op.getCaseValues();
+  auto caseDests = op.getCaseDestinations();
+  bool requiresChange = false;
+  for (int64_t i = 0, size = caseValues->size(); i < size; ++i) {
+    Block *caseDest = caseDests[i];
+    ValueRange caseOperands = op.getCaseOperands(i);
+    argStorage.emplace_back();
+    if (succeeded(collapseBranch(caseDest, caseOperands, argStorage.back())))
+      requiresChange = true;
+
+    newCaseDests.push_back(caseDest);
+    newCaseOperands.push_back(caseOperands);
+  }
+
+  Block *defaultDest = op.getDefaultDestination();
+  ValueRange defaultOperands = op.getDefaultOperands();
+  argStorage.emplace_back();
+
+  if (succeeded(
+          collapseBranch(defaultDest, defaultOperands, argStorage.back())))
+    requiresChange = true;
+
+  if (!requiresChange)
+    return failure();
+
+  rewriter.replaceOpWithNewOp<SwitchOp>(op, op.getFlag(), defaultDest,
+                                        defaultOperands, caseValues.getValue(),
+                                        newCaseDests, newCaseOperands);
+  return success();
+}
+
+/// switch %flag : i32, [
+///   default: ^bb1,
+///   42: ^bb2,
+/// ]
+/// ^bb2:
+///   switch %flag : i32, [
+///     default: ^bb3,
+///     42: ^bb4
+///   ]
+/// ->
+/// switch %flag : i32, [
+///   default: ^bb1,
+///   42: ^bb2,
+/// ]
+/// ^bb2:
+///   br ^bb4
+///
+///  and
+///
+/// switch %flag : i32, [
+///   default: ^bb1,
+///   42: ^bb2,
+/// ]
+/// ^bb2:
+///   switch %flag : i32, [
+///     default: ^bb3,
+///     43: ^bb4
+///   ]
+/// ->
+/// switch %flag : i32, [
+///   default: ^bb1,
+///   42: ^bb2,
+/// ]
+/// ^bb2:
+///   br ^bb3
+static LogicalResult
+simplifySwitchFromSwitchOnSameCondition(SwitchOp op,
+                                        PatternRewriter &rewriter) {
+  // Check that we have a single distinct predecessor.
+  Block *currentBlock = op->getBlock();
+  Block *predecessor = currentBlock->getSinglePredecessor();
+  if (!predecessor)
+    return failure();
+
+  // Check that the predecessor terminates with a switch branch to this block
+  // and that it branches on the same condition and that this branch isn't the
+  // default destination.
+  auto predSwitch = dyn_cast<SwitchOp>(predecessor->getTerminator());
+  if (!predSwitch || op.getFlag() != predSwitch.getFlag() ||
+      predSwitch.getDefaultDestination() == currentBlock)
+    return failure();
+
+  // Fold this switch to an unconditional branch.
+  SuccessorRange predDests = predSwitch.getCaseDestinations();
+  auto it = llvm::find(predDests, currentBlock);
+  if (it != predDests.end()) {
+    Optional<DenseIntElementsAttr> predCaseValues = predSwitch.getCaseValues();
+    foldSwitch(op, rewriter,
+               predCaseValues->getValues<APInt>()[it - predDests.begin()]);
+  } else {
+    rewriter.replaceOpWithNewOp<BranchOp>(op, op.getDefaultDestination(),
+                                          op.getDefaultOperands());
+  }
+  return success();
+}
+
+/// switch %flag : i32, [
+///   default: ^bb1,
+///   42: ^bb2
+/// ]
+/// ^bb1:
+///   switch %flag : i32, [
+///     default: ^bb3,
+///     42: ^bb4,
+///     43: ^bb5
+///   ]
+/// ->
+/// switch %flag : i32, [
+///   default: ^bb1,
+///   42: ^bb2,
+/// ]
+/// ^bb1:
+///   switch %flag : i32, [
+///     default: ^bb3,
+///     43: ^bb5
+///   ]
+static LogicalResult
+simplifySwitchFromDefaultSwitchOnSameCondition(SwitchOp op,
+                                               PatternRewriter &rewriter) {
+  // Check that we have a single distinct predecessor.
+  Block *currentBlock = op->getBlock();
+  Block *predecessor = currentBlock->getSinglePredecessor();
+  if (!predecessor)
+    return failure();
+
+  // Check that the predecessor terminates with a switch branch to this block
+  // and that it branches on the same condition and that this branch is the
+  // default destination.
+  auto predSwitch = dyn_cast<SwitchOp>(predecessor->getTerminator());
+  if (!predSwitch || op.getFlag() != predSwitch.getFlag() ||
+      predSwitch.getDefaultDestination() != currentBlock)
+    return failure();
+
+  // Delete case values that are not possible here.
+  DenseSet<APInt> caseValuesToRemove;
+  auto predDests = predSwitch.getCaseDestinations();
+  auto predCaseValues = predSwitch.getCaseValues();
+  for (int64_t i = 0, size = predCaseValues->size(); i < size; ++i)
+    if (currentBlock != predDests[i])
+      caseValuesToRemove.insert(predCaseValues->getValues<APInt>()[i]);
+
+  SmallVector<Block *> newCaseDestinations;
+  SmallVector<ValueRange> newCaseOperands;
+  SmallVector<APInt> newCaseValues;
+  bool requiresChange = false;
+
+  auto caseValues = op.getCaseValues();
+  auto caseDests = op.getCaseDestinations();
+  for (const auto &it : llvm::enumerate(caseValues->getValues<APInt>())) {
+    if (caseValuesToRemove.contains(it.value())) {
+      requiresChange = true;
+      continue;
+    }
+    newCaseDestinations.push_back(caseDests[it.index()]);
+    newCaseOperands.push_back(op.getCaseOperands(it.index()));
+    newCaseValues.push_back(it.value());
+  }
+
+  if (!requiresChange)
+    return failure();
+
+  rewriter.replaceOpWithNewOp<SwitchOp>(
+      op, op.getFlag(), op.getDefaultDestination(), op.getDefaultOperands(),
+      newCaseValues, newCaseDestinations, newCaseOperands);
+  return success();
+}
+
+void SwitchOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                           MLIRContext *context) {
+  results.add(&simplifySwitchWithOnlyDefault)
+      .add(&dropSwitchCasesThatMatchDefault)
+      .add(&simplifyConstSwitchValue)
+      .add(&simplifyPassThroughSwitch)
+      .add(&simplifySwitchFromSwitchOnSameCondition)
+      .add(&simplifySwitchFromDefaultSwitchOnSameCondition);
+}
+
+//===----------------------------------------------------------------------===//
+// TableGen'd op method definitions
+//===----------------------------------------------------------------------===//
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.cpp.inc"

diff  --git a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
index 4a7f80ee0d535..f8f51059d8e9a 100644
--- a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/PatternMatch.h"
@@ -44,14 +44,14 @@ struct GpuAllReduceRewriter {
   /// workgroup memory.
   ///
   ///     %subgroup_reduce = `createSubgroupReduce(%operand)`
-  ///     cond_br %is_first_lane, ^then1, ^continue1
+  ///     cf.cond_br %is_first_lane, ^then1, ^continue1
   ///   ^then1:
   ///     store %subgroup_reduce, %workgroup_buffer[%subgroup_id]
-  ///     br ^continue1
+  ///     cf.br ^continue1
   ///   ^continue1:
   ///     gpu.barrier
   ///     %is_valid_subgroup = arith.cmpi "slt" %invocation_idx, %num_subgroups
-  ///     cond_br %is_valid_subgroup, ^then2, ^continue2
+  ///     cf.cond_br %is_valid_subgroup, ^then2, ^continue2
   ///   ^then2:
   ///     %partial_reduce = load %workgroup_buffer[%invocation_idx]
   ///     %all_reduce = `createSubgroupReduce(%partial_reduce)`
@@ -194,7 +194,7 @@ struct GpuAllReduceRewriter {
 
       // Add branch before inserted body, into body.
       block = block->getNextNode();
-      create<BranchOp>(block, ValueRange());
+      create<cf::BranchOp>(block, ValueRange());
 
       // Replace all gpu.yield ops with branch out of body.
       for (; block != split; block = block->getNextNode()) {
@@ -202,7 +202,7 @@ struct GpuAllReduceRewriter {
         if (!isa<gpu::YieldOp>(terminator))
           continue;
         rewriter.setInsertionPointToEnd(block);
-        rewriter.replaceOpWithNewOp<BranchOp>(
+        rewriter.replaceOpWithNewOp<cf::BranchOp>(
             terminator, split, ValueRange(terminator->getOperand(0)));
       }
 
@@ -285,17 +285,17 @@ struct GpuAllReduceRewriter {
     Block *continueBlock = rewriter.splitBlock(elseBlock, elseBlock->begin());
 
     rewriter.setInsertionPointToEnd(currentBlock);
-    create<CondBranchOp>(condition, thenBlock,
-                         /*trueOperands=*/ArrayRef<Value>(), elseBlock,
-                         /*falseOperands=*/ArrayRef<Value>());
+    create<cf::CondBranchOp>(condition, thenBlock,
+                             /*trueOperands=*/ArrayRef<Value>(), elseBlock,
+                             /*falseOperands=*/ArrayRef<Value>());
 
     rewriter.setInsertionPointToStart(thenBlock);
     auto thenOperands = thenOpsFactory();
-    create<BranchOp>(continueBlock, thenOperands);
+    create<cf::BranchOp>(continueBlock, thenOperands);
 
     rewriter.setInsertionPointToStart(elseBlock);
     auto elseOperands = elseOpsFactory();
-    create<BranchOp>(continueBlock, elseOperands);
+    create<cf::BranchOp>(continueBlock, elseOperands);
 
     assert(thenOperands.size() == elseOperands.size());
     rewriter.setInsertionPointToStart(continueBlock);

diff  --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 3f93b3af00260..d5731dfd06d4a 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -12,6 +12,7 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
@@ -186,7 +187,7 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
   Block &launchOpEntry = launchOpBody.front();
   Block *clonedLaunchOpEntry = map.lookup(&launchOpEntry);
   builder.setInsertionPointToEnd(&entryBlock);
-  builder.create<BranchOp>(loc, clonedLaunchOpEntry);
+  builder.create<cf::BranchOp>(loc, clonedLaunchOpEntry);
 
   outlinedFunc.walk([](gpu::TerminatorOp op) {
     OpBuilder replacer(op);

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
index 9ee17fada83c6..77e522ad280c0 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/StandardOps/Transforms/FuncConversions.h"
@@ -254,13 +255,13 @@ struct LinalgDetensorize : public LinalgDetensorizeBase<LinalgDetensorize> {
                  DenseSet<BlockArgument> &blockArgsToDetensor) override {
       SmallVector<Value> workList;
 
-      func->walk([&](CondBranchOp condBr) {
+      func->walk([&](cf::CondBranchOp condBr) {
         for (auto operand : condBr.getOperands()) {
           workList.push_back(operand);
         }
       });
 
-      func->walk([&](BranchOp br) {
+      func->walk([&](cf::BranchOp br) {
         for (auto operand : br.getOperands()) {
           workList.push_back(operand);
         }

diff  --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp
index 0e2759fef3bb0..d8a76f835df9b 100644
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@@ -9,6 +9,7 @@
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Matchers.h"
@@ -165,13 +166,13 @@ struct SingleBlockExecuteInliner : public OpRewritePattern<ExecuteRegionOp> {
 //     "test.foo"() : () -> ()
 //     %v = scf.execute_region -> i64 {
 //       %c = "test.cmp"() : () -> i1
-//       cond_br %c, ^bb2, ^bb3
+//       cf.cond_br %c, ^bb2, ^bb3
 //     ^bb2:
 //       %x = "test.val1"() : () -> i64
-//       br ^bb4(%x : i64)
+//       cf.br ^bb4(%x : i64)
 //     ^bb3:
 //       %y = "test.val2"() : () -> i64
-//       br ^bb4(%y : i64)
+//       cf.br ^bb4(%y : i64)
 //     ^bb4(%z : i64):
 //       scf.yield %z : i64
 //     }
@@ -184,13 +185,13 @@ struct SingleBlockExecuteInliner : public OpRewritePattern<ExecuteRegionOp> {
 // func @func_execute_region_elim() {
 //    "test.foo"() : () -> ()
 //    %c = "test.cmp"() : () -> i1
-//    cond_br %c, ^bb1, ^bb2
+//    cf.cond_br %c, ^bb1, ^bb2
 //  ^bb1:  // pred: ^bb0
 //    %x = "test.val1"() : () -> i64
-//    br ^bb3(%x : i64)
+//    cf.br ^bb3(%x : i64)
 //  ^bb2:  // pred: ^bb0
 //    %y = "test.val2"() : () -> i64
-//    br ^bb3(%y : i64)
+//    cf.br ^bb3(%y : i64)
 //  ^bb3(%z: i64):  // 2 preds: ^bb1, ^bb2
 //    "test.bar"(%z) : (i64) -> ()
 //    return
@@ -208,13 +209,13 @@ struct MultiBlockExecuteInliner : public OpRewritePattern<ExecuteRegionOp> {
     Block *postBlock = rewriter.splitBlock(prevBlock, op->getIterator());
     rewriter.setInsertionPointToEnd(prevBlock);
 
-    rewriter.create<BranchOp>(op.getLoc(), &op.getRegion().front());
+    rewriter.create<cf::BranchOp>(op.getLoc(), &op.getRegion().front());
 
     for (Block &blk : op.getRegion()) {
       if (YieldOp yieldOp = dyn_cast<YieldOp>(blk.getTerminator())) {
         rewriter.setInsertionPoint(yieldOp);
-        rewriter.create<BranchOp>(yieldOp.getLoc(), postBlock,
-                                  yieldOp.getResults());
+        rewriter.create<cf::BranchOp>(yieldOp.getLoc(), postBlock,
+                                      yieldOp.getResults());
         rewriter.eraseOp(yieldOp);
       }
     }

diff  --git a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
index 909f3bc0d97e5..fc0f59b232922 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
@@ -13,7 +13,7 @@ add_mlir_dialect_library(MLIRSparseTensorPipelines
   MLIRMemRefToLLVM
   MLIRPass
   MLIRReconcileUnrealizedCasts
-  MLIRSCFToStandard
+  MLIRSCFToControlFlow
   MLIRSparseTensor
   MLIRSparseTensorTransforms
   MLIRStandardOpsTransforms

diff  --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
index 145ea480bbc41..819ecc6e5c882 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
@@ -33,7 +33,7 @@ void mlir::sparse_tensor::buildSparseCompiler(
   pm.addNestedPass<FuncOp>(createLinalgBufferizePass());
   pm.addNestedPass<FuncOp>(createConvertLinalgToLoopsPass());
   pm.addNestedPass<FuncOp>(createConvertVectorToSCFPass());
-  pm.addPass(createLowerToCFGPass()); // --convert-scf-to-std
+  pm.addNestedPass<FuncOp>(createConvertSCFToCFPass());
   pm.addPass(createFuncBufferizePass());
   pm.addPass(arith::createConstantBufferizePass());
   pm.addNestedPass<FuncOp>(createTensorBufferizePass());

diff  --git a/mlir/lib/Dialect/StandardOps/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/CMakeLists.txt
index fdfa44911e9a6..f1689abb6e8b9 100644
--- a/mlir/lib/Dialect/StandardOps/CMakeLists.txt
+++ b/mlir/lib/Dialect/StandardOps/CMakeLists.txt
@@ -11,6 +11,7 @@ add_mlir_dialect_library(MLIRStandard
   MLIRArithmetic
   MLIRCallInterfaces
   MLIRCastInterfaces
+  MLIRControlFlow
   MLIRControlFlowInterfaces
   MLIRInferTypeOpInterface
   MLIRIR

diff  --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
index bf35625adb625..cc304511c9f7a 100644
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -8,9 +8,8 @@
 
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 
-#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
-#include "mlir/Dialect/Arithmetic/Utils/Utils.h"
 #include "mlir/Dialect/CommonFolders.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@@ -77,7 +76,7 @@ struct StdInlinerInterface : public DialectInlinerInterface {
 
     // Replace the return with a branch to the dest.
     OpBuilder builder(op);
-    builder.create<BranchOp>(op->getLoc(), newDest, returnOp.getOperands());
+    builder.create<cf::BranchOp>(op->getLoc(), newDest, returnOp.getOperands());
     op->erase();
   }
 
@@ -121,130 +120,6 @@ Operation *StandardOpsDialect::materializeConstant(OpBuilder &builder,
   return nullptr;
 }
 
-//===----------------------------------------------------------------------===//
-// AssertOp
-//===----------------------------------------------------------------------===//
-
-LogicalResult AssertOp::canonicalize(AssertOp op, PatternRewriter &rewriter) {
-  // Erase assertion if argument is constant true.
-  if (matchPattern(op.getArg(), m_One())) {
-    rewriter.eraseOp(op);
-    return success();
-  }
-  return failure();
-}
-
-//===----------------------------------------------------------------------===//
-// BranchOp
-//===----------------------------------------------------------------------===//
-
-/// Given a successor, try to collapse it to a new destination if it only
-/// contains a passthrough unconditional branch. If the successor is
-/// collapsable, `successor` and `successorOperands` are updated to reference
-/// the new destination and values. `argStorage` is used as storage if operands
-/// to the collapsed successor need to be remapped. It must outlive uses of
-/// successorOperands.
-static LogicalResult collapseBranch(Block *&successor,
-                                    ValueRange &successorOperands,
-                                    SmallVectorImpl<Value> &argStorage) {
-  // Check that the successor only contains a unconditional branch.
-  if (std::next(successor->begin()) != successor->end())
-    return failure();
-  // Check that the terminator is an unconditional branch.
-  BranchOp successorBranch = dyn_cast<BranchOp>(successor->getTerminator());
-  if (!successorBranch)
-    return failure();
-  // Check that the arguments are only used within the terminator.
-  for (BlockArgument arg : successor->getArguments()) {
-    for (Operation *user : arg.getUsers())
-      if (user != successorBranch)
-        return failure();
-  }
-  // Don't try to collapse branches to infinite loops.
-  Block *successorDest = successorBranch.getDest();
-  if (successorDest == successor)
-    return failure();
-
-  // Update the operands to the successor. If the branch parent has no
-  // arguments, we can use the branch operands directly.
-  OperandRange operands = successorBranch.getOperands();
-  if (successor->args_empty()) {
-    successor = successorDest;
-    successorOperands = operands;
-    return success();
-  }
-
-  // Otherwise, we need to remap any argument operands.
-  for (Value operand : operands) {
-    BlockArgument argOperand = operand.dyn_cast<BlockArgument>();
-    if (argOperand && argOperand.getOwner() == successor)
-      argStorage.push_back(successorOperands[argOperand.getArgNumber()]);
-    else
-      argStorage.push_back(operand);
-  }
-  successor = successorDest;
-  successorOperands = argStorage;
-  return success();
-}
-
-/// Simplify a branch to a block that has a single predecessor. This effectively
-/// merges the two blocks.
-static LogicalResult
-simplifyBrToBlockWithSinglePred(BranchOp op, PatternRewriter &rewriter) {
-  // Check that the successor block has a single predecessor.
-  Block *succ = op.getDest();
-  Block *opParent = op->getBlock();
-  if (succ == opParent || !llvm::hasSingleElement(succ->getPredecessors()))
-    return failure();
-
-  // Merge the successor into the current block and erase the branch.
-  rewriter.mergeBlocks(succ, opParent, op.getOperands());
-  rewriter.eraseOp(op);
-  return success();
-}
-
-///   br ^bb1
-/// ^bb1
-///   br ^bbN(...)
-///
-///  -> br ^bbN(...)
-///
-static LogicalResult simplifyPassThroughBr(BranchOp op,
-                                           PatternRewriter &rewriter) {
-  Block *dest = op.getDest();
-  ValueRange destOperands = op.getOperands();
-  SmallVector<Value, 4> destOperandStorage;
-
-  // Try to collapse the successor if it points somewhere other than this
-  // block.
-  if (dest == op->getBlock() ||
-      failed(collapseBranch(dest, destOperands, destOperandStorage)))
-    return failure();
-
-  // Create a new branch with the collapsed successor.
-  rewriter.replaceOpWithNewOp<BranchOp>(op, dest, destOperands);
-  return success();
-}
-
-LogicalResult BranchOp::canonicalize(BranchOp op, PatternRewriter &rewriter) {
-  return success(succeeded(simplifyBrToBlockWithSinglePred(op, rewriter)) ||
-                 succeeded(simplifyPassThroughBr(op, rewriter)));
-}
-
-void BranchOp::setDest(Block *block) { return setSuccessor(block); }
-
-void BranchOp::eraseOperand(unsigned index) { (*this)->eraseOperand(index); }
-
-Optional<MutableOperandRange>
-BranchOp::getMutableSuccessorOperands(unsigned index) {
-  assert(index == 0 && "invalid successor index");
-  return getDestOperandsMutable();
-}
-
-Block *BranchOp::getSuccessorForOperands(ArrayRef<Attribute>) {
-  return getDest();
-}
-
 //===----------------------------------------------------------------------===//
 // CallOp
 //===----------------------------------------------------------------------===//
@@ -307,260 +182,6 @@ LogicalResult CallIndirectOp::canonicalize(CallIndirectOp indirectCall,
   return success();
 }
 
-//===----------------------------------------------------------------------===//
-// CondBranchOp
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// cond_br true, ^bb1, ^bb2
-///  -> br ^bb1
-/// cond_br false, ^bb1, ^bb2
-///  -> br ^bb2
-///
-struct SimplifyConstCondBranchPred : public OpRewritePattern<CondBranchOp> {
-  using OpRewritePattern<CondBranchOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(CondBranchOp condbr,
-                                PatternRewriter &rewriter) const override {
-    if (matchPattern(condbr.getCondition(), m_NonZero())) {
-      // True branch taken.
-      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getTrueDest(),
-                                            condbr.getTrueOperands());
-      return success();
-    }
-    if (matchPattern(condbr.getCondition(), m_Zero())) {
-      // False branch taken.
-      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getFalseDest(),
-                                            condbr.getFalseOperands());
-      return success();
-    }
-    return failure();
-  }
-};
-
-///   cond_br %cond, ^bb1, ^bb2
-/// ^bb1
-///   br ^bbN(...)
-/// ^bb2
-///   br ^bbK(...)
-///
-///  -> cond_br %cond, ^bbN(...), ^bbK(...)
-///
-struct SimplifyPassThroughCondBranch : public OpRewritePattern<CondBranchOp> {
-  using OpRewritePattern<CondBranchOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(CondBranchOp condbr,
-                                PatternRewriter &rewriter) const override {
-    Block *trueDest = condbr.getTrueDest(), *falseDest = condbr.getFalseDest();
-    ValueRange trueDestOperands = condbr.getTrueOperands();
-    ValueRange falseDestOperands = condbr.getFalseOperands();
-    SmallVector<Value, 4> trueDestOperandStorage, falseDestOperandStorage;
-
-    // Try to collapse one of the current successors.
-    LogicalResult collapsedTrue =
-        collapseBranch(trueDest, trueDestOperands, trueDestOperandStorage);
-    LogicalResult collapsedFalse =
-        collapseBranch(falseDest, falseDestOperands, falseDestOperandStorage);
-    if (failed(collapsedTrue) && failed(collapsedFalse))
-      return failure();
-
-    // Create a new branch with the collapsed successors.
-    rewriter.replaceOpWithNewOp<CondBranchOp>(condbr, condbr.getCondition(),
-                                              trueDest, trueDestOperands,
-                                              falseDest, falseDestOperands);
-    return success();
-  }
-};
-
-/// cond_br %cond, ^bb1(A, ..., N), ^bb1(A, ..., N)
-///  -> br ^bb1(A, ..., N)
-///
-/// cond_br %cond, ^bb1(A), ^bb1(B)
-///  -> %select = arith.select %cond, A, B
-///     br ^bb1(%select)
-///
-struct SimplifyCondBranchIdenticalSuccessors
-    : public OpRewritePattern<CondBranchOp> {
-  using OpRewritePattern<CondBranchOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(CondBranchOp condbr,
-                                PatternRewriter &rewriter) const override {
-    // Check that the true and false destinations are the same and have the same
-    // operands.
-    Block *trueDest = condbr.getTrueDest();
-    if (trueDest != condbr.getFalseDest())
-      return failure();
-
-    // If all of the operands match, no selects need to be generated.
-    OperandRange trueOperands = condbr.getTrueOperands();
-    OperandRange falseOperands = condbr.getFalseOperands();
-    if (trueOperands == falseOperands) {
-      rewriter.replaceOpWithNewOp<BranchOp>(condbr, trueDest, trueOperands);
-      return success();
-    }
-
-    // Otherwise, if the current block is the only predecessor insert selects
-    // for any mismatched branch operands.
-    if (trueDest->getUniquePredecessor() != condbr->getBlock())
-      return failure();
-
-    // Generate a select for any operands that 
diff er between the two.
-    SmallVector<Value, 8> mergedOperands;
-    mergedOperands.reserve(trueOperands.size());
-    Value condition = condbr.getCondition();
-    for (auto it : llvm::zip(trueOperands, falseOperands)) {
-      if (std::get<0>(it) == std::get<1>(it))
-        mergedOperands.push_back(std::get<0>(it));
-      else
-        mergedOperands.push_back(rewriter.create<arith::SelectOp>(
-            condbr.getLoc(), condition, std::get<0>(it), std::get<1>(it)));
-    }
-
-    rewriter.replaceOpWithNewOp<BranchOp>(condbr, trueDest, mergedOperands);
-    return success();
-  }
-};
-
-///   ...
-///   cond_br %cond, ^bb1(...), ^bb2(...)
-/// ...
-/// ^bb1: // has single predecessor
-///   ...
-///   cond_br %cond, ^bb3(...), ^bb4(...)
-///
-/// ->
-///
-///   ...
-///   cond_br %cond, ^bb1(...), ^bb2(...)
-/// ...
-/// ^bb1: // has single predecessor
-///   ...
-///   br ^bb3(...)
-///
-struct SimplifyCondBranchFromCondBranchOnSameCondition
-    : public OpRewritePattern<CondBranchOp> {
-  using OpRewritePattern<CondBranchOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(CondBranchOp condbr,
-                                PatternRewriter &rewriter) const override {
-    // Check that we have a single distinct predecessor.
-    Block *currentBlock = condbr->getBlock();
-    Block *predecessor = currentBlock->getSinglePredecessor();
-    if (!predecessor)
-      return failure();
-
-    // Check that the predecessor terminates with a conditional branch to this
-    // block and that it branches on the same condition.
-    auto predBranch = dyn_cast<CondBranchOp>(predecessor->getTerminator());
-    if (!predBranch || condbr.getCondition() != predBranch.getCondition())
-      return failure();
-
-    // Fold this branch to an unconditional branch.
-    if (currentBlock == predBranch.getTrueDest())
-      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getTrueDest(),
-                                            condbr.getTrueDestOperands());
-    else
-      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getFalseDest(),
-                                            condbr.getFalseDestOperands());
-    return success();
-  }
-};
-
-///   cond_br %arg0, ^trueB, ^falseB
-///
-/// ^trueB:
-///   "test.consumer1"(%arg0) : (i1) -> ()
-///    ...
-///
-/// ^falseB:
-///   "test.consumer2"(%arg0) : (i1) -> ()
-///   ...
-///
-/// ->
-///
-///   cond_br %arg0, ^trueB, ^falseB
-/// ^trueB:
-///   "test.consumer1"(%true) : (i1) -> ()
-///   ...
-///
-/// ^falseB:
-///   "test.consumer2"(%false) : (i1) -> ()
-///   ...
-struct CondBranchTruthPropagation : public OpRewritePattern<CondBranchOp> {
-  using OpRewritePattern<CondBranchOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(CondBranchOp condbr,
-                                PatternRewriter &rewriter) const override {
-    // Check that we have a single distinct predecessor.
-    bool replaced = false;
-    Type ty = rewriter.getI1Type();
-
-    // These variables serve to prevent creating duplicate constants
-    // and hold constant true or false values.
-    Value constantTrue = nullptr;
-    Value constantFalse = nullptr;
-
-    // TODO These checks can be expanded to encompas any use with only
-    // either the true of false edge as a predecessor. For now, we fall
-    // back to checking the single predecessor is given by the true/fasle
-    // destination, thereby ensuring that only that edge can reach the
-    // op.
-    if (condbr.getTrueDest()->getSinglePredecessor()) {
-      for (OpOperand &use :
-           llvm::make_early_inc_range(condbr.getCondition().getUses())) {
-        if (use.getOwner()->getBlock() == condbr.getTrueDest()) {
-          replaced = true;
-
-          if (!constantTrue)
-            constantTrue = rewriter.create<arith::ConstantOp>(
-                condbr.getLoc(), ty, rewriter.getBoolAttr(true));
-
-          rewriter.updateRootInPlace(use.getOwner(),
-                                     [&] { use.set(constantTrue); });
-        }
-      }
-    }
-    if (condbr.getFalseDest()->getSinglePredecessor()) {
-      for (OpOperand &use :
-           llvm::make_early_inc_range(condbr.getCondition().getUses())) {
-        if (use.getOwner()->getBlock() == condbr.getFalseDest()) {
-          replaced = true;
-
-          if (!constantFalse)
-            constantFalse = rewriter.create<arith::ConstantOp>(
-                condbr.getLoc(), ty, rewriter.getBoolAttr(false));
-
-          rewriter.updateRootInPlace(use.getOwner(),
-                                     [&] { use.set(constantFalse); });
-        }
-      }
-    }
-    return success(replaced);
-  }
-};
-} // namespace
-
-void CondBranchOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                               MLIRContext *context) {
-  results.add<SimplifyConstCondBranchPred, SimplifyPassThroughCondBranch,
-              SimplifyCondBranchIdenticalSuccessors,
-              SimplifyCondBranchFromCondBranchOnSameCondition,
-              CondBranchTruthPropagation>(context);
-}
-
-Optional<MutableOperandRange>
-CondBranchOp::getMutableSuccessorOperands(unsigned index) {
-  assert(index < getNumSuccessors() && "invalid successor index");
-  return index == trueIndex ? getTrueDestOperandsMutable()
-                            : getFalseDestOperandsMutable();
-}
-
-Block *CondBranchOp::getSuccessorForOperands(ArrayRef<Attribute> operands) {
-  if (IntegerAttr condAttr = operands.front().dyn_cast_or_null<IntegerAttr>())
-    return condAttr.getValue().isOneValue() ? getTrueDest() : getFalseDest();
-  return nullptr;
-}
-
 //===----------------------------------------------------------------------===//
 // ConstantOp
 //===----------------------------------------------------------------------===//
@@ -621,439 +242,6 @@ LogicalResult ReturnOp::verify() {
   return success();
 }
 
-//===----------------------------------------------------------------------===//
-// SwitchOp
-//===----------------------------------------------------------------------===//
-
-void SwitchOp::build(OpBuilder &builder, OperationState &result, Value value,
-                     Block *defaultDestination, ValueRange defaultOperands,
-                     DenseIntElementsAttr caseValues,
-                     BlockRange caseDestinations,
-                     ArrayRef<ValueRange> caseOperands) {
-  build(builder, result, value, defaultOperands, caseOperands, caseValues,
-        defaultDestination, caseDestinations);
-}
-
-void SwitchOp::build(OpBuilder &builder, OperationState &result, Value value,
-                     Block *defaultDestination, ValueRange defaultOperands,
-                     ArrayRef<APInt> caseValues, BlockRange caseDestinations,
-                     ArrayRef<ValueRange> caseOperands) {
-  DenseIntElementsAttr caseValuesAttr;
-  if (!caseValues.empty()) {
-    ShapedType caseValueType = VectorType::get(
-        static_cast<int64_t>(caseValues.size()), value.getType());
-    caseValuesAttr = DenseIntElementsAttr::get(caseValueType, caseValues);
-  }
-  build(builder, result, value, defaultDestination, defaultOperands,
-        caseValuesAttr, caseDestinations, caseOperands);
-}
-
-/// <cases> ::= `default` `:` bb-id (`(` ssa-use-and-type-list `)`)?
-///             ( `,` integer `:` bb-id (`(` ssa-use-and-type-list `)`)? )*
-static ParseResult parseSwitchOpCases(
-    OpAsmParser &parser, Type &flagType, Block *&defaultDestination,
-    SmallVectorImpl<OpAsmParser::OperandType> &defaultOperands,
-    SmallVectorImpl<Type> &defaultOperandTypes,
-    DenseIntElementsAttr &caseValues,
-    SmallVectorImpl<Block *> &caseDestinations,
-    SmallVectorImpl<SmallVector<OpAsmParser::OperandType>> &caseOperands,
-    SmallVectorImpl<SmallVector<Type>> &caseOperandTypes) {
-  if (parser.parseKeyword("default") || parser.parseColon() ||
-      parser.parseSuccessor(defaultDestination))
-    return failure();
-  if (succeeded(parser.parseOptionalLParen())) {
-    if (parser.parseRegionArgumentList(defaultOperands) ||
-        parser.parseColonTypeList(defaultOperandTypes) || parser.parseRParen())
-      return failure();
-  }
-
-  SmallVector<APInt> values;
-  unsigned bitWidth = flagType.getIntOrFloatBitWidth();
-  while (succeeded(parser.parseOptionalComma())) {
-    int64_t value = 0;
-    if (failed(parser.parseInteger(value)))
-      return failure();
-    values.push_back(APInt(bitWidth, value));
-
-    Block *destination;
-    SmallVector<OpAsmParser::OperandType> operands;
-    SmallVector<Type> operandTypes;
-    if (failed(parser.parseColon()) ||
-        failed(parser.parseSuccessor(destination)))
-      return failure();
-    if (succeeded(parser.parseOptionalLParen())) {
-      if (failed(parser.parseRegionArgumentList(operands)) ||
-          failed(parser.parseColonTypeList(operandTypes)) ||
-          failed(parser.parseRParen()))
-        return failure();
-    }
-    caseDestinations.push_back(destination);
-    caseOperands.emplace_back(operands);
-    caseOperandTypes.emplace_back(operandTypes);
-  }
-
-  if (!values.empty()) {
-    ShapedType caseValueType =
-        VectorType::get(static_cast<int64_t>(values.size()), flagType);
-    caseValues = DenseIntElementsAttr::get(caseValueType, values);
-  }
-  return success();
-}
-
-static void printSwitchOpCases(
-    OpAsmPrinter &p, SwitchOp op, Type flagType, Block *defaultDestination,
-    OperandRange defaultOperands, TypeRange defaultOperandTypes,
-    DenseIntElementsAttr caseValues, SuccessorRange caseDestinations,
-    OperandRangeRange caseOperands, const TypeRangeRange &caseOperandTypes) {
-  p << "  default: ";
-  p.printSuccessorAndUseList(defaultDestination, defaultOperands);
-
-  if (!caseValues)
-    return;
-
-  for (const auto &it : llvm::enumerate(caseValues.getValues<APInt>())) {
-    p << ',';
-    p.printNewline();
-    p << "  ";
-    p << it.value().getLimitedValue();
-    p << ": ";
-    p.printSuccessorAndUseList(caseDestinations[it.index()],
-                               caseOperands[it.index()]);
-  }
-  p.printNewline();
-}
-
-LogicalResult SwitchOp::verify() {
-  auto caseValues = getCaseValues();
-  auto caseDestinations = getCaseDestinations();
-
-  if (!caseValues && caseDestinations.empty())
-    return success();
-
-  Type flagType = getFlag().getType();
-  Type caseValueType = caseValues->getType().getElementType();
-  if (caseValueType != flagType)
-    return emitOpError() << "'flag' type (" << flagType
-                         << ") should match case value type (" << caseValueType
-                         << ")";
-
-  if (caseValues &&
-      caseValues->size() != static_cast<int64_t>(caseDestinations.size()))
-    return emitOpError() << "number of case values (" << caseValues->size()
-                         << ") should match number of "
-                            "case destinations ("
-                         << caseDestinations.size() << ")";
-  return success();
-}
-
-Optional<MutableOperandRange>
-SwitchOp::getMutableSuccessorOperands(unsigned index) {
-  assert(index < getNumSuccessors() && "invalid successor index");
-  return index == 0 ? getDefaultOperandsMutable()
-                    : getCaseOperandsMutable(index - 1);
-}
-
-Block *SwitchOp::getSuccessorForOperands(ArrayRef<Attribute> operands) {
-  Optional<DenseIntElementsAttr> caseValues = getCaseValues();
-
-  if (!caseValues)
-    return getDefaultDestination();
-
-  SuccessorRange caseDests = getCaseDestinations();
-  if (auto value = operands.front().dyn_cast_or_null<IntegerAttr>()) {
-    for (const auto &it : llvm::enumerate(caseValues->getValues<APInt>()))
-      if (it.value() == value.getValue())
-        return caseDests[it.index()];
-    return getDefaultDestination();
-  }
-  return nullptr;
-}
-
-/// switch %flag : i32, [
-///   default:  ^bb1
-/// ]
-///  -> br ^bb1
-static LogicalResult simplifySwitchWithOnlyDefault(SwitchOp op,
-                                                   PatternRewriter &rewriter) {
-  if (!op.getCaseDestinations().empty())
-    return failure();
-
-  rewriter.replaceOpWithNewOp<BranchOp>(op, op.getDefaultDestination(),
-                                        op.getDefaultOperands());
-  return success();
-}
-
-/// switch %flag : i32, [
-///   default: ^bb1,
-///   42: ^bb1,
-///   43: ^bb2
-/// ]
-/// ->
-/// switch %flag : i32, [
-///   default: ^bb1,
-///   43: ^bb2
-/// ]
-static LogicalResult
-dropSwitchCasesThatMatchDefault(SwitchOp op, PatternRewriter &rewriter) {
-  SmallVector<Block *> newCaseDestinations;
-  SmallVector<ValueRange> newCaseOperands;
-  SmallVector<APInt> newCaseValues;
-  bool requiresChange = false;
-  auto caseValues = op.getCaseValues();
-  auto caseDests = op.getCaseDestinations();
-
-  for (const auto &it : llvm::enumerate(caseValues->getValues<APInt>())) {
-    if (caseDests[it.index()] == op.getDefaultDestination() &&
-        op.getCaseOperands(it.index()) == op.getDefaultOperands()) {
-      requiresChange = true;
-      continue;
-    }
-    newCaseDestinations.push_back(caseDests[it.index()]);
-    newCaseOperands.push_back(op.getCaseOperands(it.index()));
-    newCaseValues.push_back(it.value());
-  }
-
-  if (!requiresChange)
-    return failure();
-
-  rewriter.replaceOpWithNewOp<SwitchOp>(
-      op, op.getFlag(), op.getDefaultDestination(), op.getDefaultOperands(),
-      newCaseValues, newCaseDestinations, newCaseOperands);
-  return success();
-}
-
-/// Helper for folding a switch with a constant value.
-/// switch %c_42 : i32, [
-///   default: ^bb1 ,
-///   42: ^bb2,
-///   43: ^bb3
-/// ]
-/// -> br ^bb2
-static void foldSwitch(SwitchOp op, PatternRewriter &rewriter,
-                       const APInt &caseValue) {
-  auto caseValues = op.getCaseValues();
-  for (const auto &it : llvm::enumerate(caseValues->getValues<APInt>())) {
-    if (it.value() == caseValue) {
-      rewriter.replaceOpWithNewOp<BranchOp>(
-          op, op.getCaseDestinations()[it.index()],
-          op.getCaseOperands(it.index()));
-      return;
-    }
-  }
-  rewriter.replaceOpWithNewOp<BranchOp>(op, op.getDefaultDestination(),
-                                        op.getDefaultOperands());
-}
-
-/// switch %c_42 : i32, [
-///   default: ^bb1,
-///   42: ^bb2,
-///   43: ^bb3
-/// ]
-/// -> br ^bb2
-static LogicalResult simplifyConstSwitchValue(SwitchOp op,
-                                              PatternRewriter &rewriter) {
-  APInt caseValue;
-  if (!matchPattern(op.getFlag(), m_ConstantInt(&caseValue)))
-    return failure();
-
-  foldSwitch(op, rewriter, caseValue);
-  return success();
-}
-
-/// switch %c_42 : i32, [
-///   default: ^bb1,
-///   42: ^bb2,
-/// ]
-/// ^bb2:
-///   br ^bb3
-/// ->
-/// switch %c_42 : i32, [
-///   default: ^bb1,
-///   42: ^bb3,
-/// ]
-static LogicalResult simplifyPassThroughSwitch(SwitchOp op,
-                                               PatternRewriter &rewriter) {
-  SmallVector<Block *> newCaseDests;
-  SmallVector<ValueRange> newCaseOperands;
-  SmallVector<SmallVector<Value>> argStorage;
-  auto caseValues = op.getCaseValues();
-  auto caseDests = op.getCaseDestinations();
-  bool requiresChange = false;
-  for (int64_t i = 0, size = caseValues->size(); i < size; ++i) {
-    Block *caseDest = caseDests[i];
-    ValueRange caseOperands = op.getCaseOperands(i);
-    argStorage.emplace_back();
-    if (succeeded(collapseBranch(caseDest, caseOperands, argStorage.back())))
-      requiresChange = true;
-
-    newCaseDests.push_back(caseDest);
-    newCaseOperands.push_back(caseOperands);
-  }
-
-  Block *defaultDest = op.getDefaultDestination();
-  ValueRange defaultOperands = op.getDefaultOperands();
-  argStorage.emplace_back();
-
-  if (succeeded(
-          collapseBranch(defaultDest, defaultOperands, argStorage.back())))
-    requiresChange = true;
-
-  if (!requiresChange)
-    return failure();
-
-  rewriter.replaceOpWithNewOp<SwitchOp>(op, op.getFlag(), defaultDest,
-                                        defaultOperands, caseValues.getValue(),
-                                        newCaseDests, newCaseOperands);
-  return success();
-}
-
-/// switch %flag : i32, [
-///   default: ^bb1,
-///   42: ^bb2,
-/// ]
-/// ^bb2:
-///   switch %flag : i32, [
-///     default: ^bb3,
-///     42: ^bb4
-///   ]
-/// ->
-/// switch %flag : i32, [
-///   default: ^bb1,
-///   42: ^bb2,
-/// ]
-/// ^bb2:
-///   br ^bb4
-///
-///  and
-///
-/// switch %flag : i32, [
-///   default: ^bb1,
-///   42: ^bb2,
-/// ]
-/// ^bb2:
-///   switch %flag : i32, [
-///     default: ^bb3,
-///     43: ^bb4
-///   ]
-/// ->
-/// switch %flag : i32, [
-///   default: ^bb1,
-///   42: ^bb2,
-/// ]
-/// ^bb2:
-///   br ^bb3
-static LogicalResult
-simplifySwitchFromSwitchOnSameCondition(SwitchOp op,
-                                        PatternRewriter &rewriter) {
-  // Check that we have a single distinct predecessor.
-  Block *currentBlock = op->getBlock();
-  Block *predecessor = currentBlock->getSinglePredecessor();
-  if (!predecessor)
-    return failure();
-
-  // Check that the predecessor terminates with a switch branch to this block
-  // and that it branches on the same condition and that this branch isn't the
-  // default destination.
-  auto predSwitch = dyn_cast<SwitchOp>(predecessor->getTerminator());
-  if (!predSwitch || op.getFlag() != predSwitch.getFlag() ||
-      predSwitch.getDefaultDestination() == currentBlock)
-    return failure();
-
-  // Fold this switch to an unconditional branch.
-  SuccessorRange predDests = predSwitch.getCaseDestinations();
-  auto it = llvm::find(predDests, currentBlock);
-  if (it != predDests.end()) {
-    Optional<DenseIntElementsAttr> predCaseValues = predSwitch.getCaseValues();
-    foldSwitch(op, rewriter,
-               predCaseValues->getValues<APInt>()[it - predDests.begin()]);
-  } else {
-    rewriter.replaceOpWithNewOp<BranchOp>(op, op.getDefaultDestination(),
-                                          op.getDefaultOperands());
-  }
-  return success();
-}
-
-/// switch %flag : i32, [
-///   default: ^bb1,
-///   42: ^bb2
-/// ]
-/// ^bb1:
-///   switch %flag : i32, [
-///     default: ^bb3,
-///     42: ^bb4,
-///     43: ^bb5
-///   ]
-/// ->
-/// switch %flag : i32, [
-///   default: ^bb1,
-///   42: ^bb2,
-/// ]
-/// ^bb1:
-///   switch %flag : i32, [
-///     default: ^bb3,
-///     43: ^bb5
-///   ]
-static LogicalResult
-simplifySwitchFromDefaultSwitchOnSameCondition(SwitchOp op,
-                                               PatternRewriter &rewriter) {
-  // Check that we have a single distinct predecessor.
-  Block *currentBlock = op->getBlock();
-  Block *predecessor = currentBlock->getSinglePredecessor();
-  if (!predecessor)
-    return failure();
-
-  // Check that the predecessor terminates with a switch branch to this block
-  // and that it branches on the same condition and that this branch is the
-  // default destination.
-  auto predSwitch = dyn_cast<SwitchOp>(predecessor->getTerminator());
-  if (!predSwitch || op.getFlag() != predSwitch.getFlag() ||
-      predSwitch.getDefaultDestination() != currentBlock)
-    return failure();
-
-  // Delete case values that are not possible here.
-  DenseSet<APInt> caseValuesToRemove;
-  auto predDests = predSwitch.getCaseDestinations();
-  auto predCaseValues = predSwitch.getCaseValues();
-  for (int64_t i = 0, size = predCaseValues->size(); i < size; ++i)
-    if (currentBlock != predDests[i])
-      caseValuesToRemove.insert(predCaseValues->getValues<APInt>()[i]);
-
-  SmallVector<Block *> newCaseDestinations;
-  SmallVector<ValueRange> newCaseOperands;
-  SmallVector<APInt> newCaseValues;
-  bool requiresChange = false;
-
-  auto caseValues = op.getCaseValues();
-  auto caseDests = op.getCaseDestinations();
-  for (const auto &it : llvm::enumerate(caseValues->getValues<APInt>())) {
-    if (caseValuesToRemove.contains(it.value())) {
-      requiresChange = true;
-      continue;
-    }
-    newCaseDestinations.push_back(caseDests[it.index()]);
-    newCaseOperands.push_back(op.getCaseOperands(it.index()));
-    newCaseValues.push_back(it.value());
-  }
-
-  if (!requiresChange)
-    return failure();
-
-  rewriter.replaceOpWithNewOp<SwitchOp>(
-      op, op.getFlag(), op.getDefaultDestination(), op.getDefaultOperands(),
-      newCaseValues, newCaseDestinations, newCaseOperands);
-  return success();
-}
-
-void SwitchOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                           MLIRContext *context) {
-  results.add(&simplifySwitchWithOnlyDefault)
-      .add(&dropSwitchCasesThatMatchDefault)
-      .add(&simplifyConstSwitchValue)
-      .add(&simplifyPassThroughSwitch)
-      .add(&simplifySwitchFromSwitchOnSameCondition)
-      .add(&simplifySwitchFromDefaultSwitchOnSameCondition);
-}
-
 //===----------------------------------------------------------------------===//
 // TableGen'd op method definitions
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/lib/Target/Cpp/TranslateRegistration.cpp b/mlir/lib/Target/Cpp/TranslateRegistration.cpp
index 7bdcd96b0407b..889637bec5506 100644
--- a/mlir/lib/Target/Cpp/TranslateRegistration.cpp
+++ b/mlir/lib/Target/Cpp/TranslateRegistration.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
 #include "mlir/Dialect/EmitC/IR/EmitC.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/SCF/SCF.h"
@@ -41,6 +42,7 @@ void registerToCppTranslation() {
       [](DialectRegistry &registry) {
         // clang-format off
         registry.insert<arith::ArithmeticDialect,
+                        cf::ControlFlowDialect,
                         emitc::EmitCDialect,
                         math::MathDialect,
                         StandardOpsDialect,

diff  --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
index 5d7ef65fcad2e..1215e4473f4ea 100644
--- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp
+++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <utility>
-
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/EmitC/IR/EmitC.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
@@ -23,6 +22,7 @@
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormatVariadic.h"
+#include <utility>
 
 #define DEBUG_TYPE "translate-to-cpp"
 
@@ -237,7 +237,8 @@ static LogicalResult printOperation(CppEmitter &emitter,
   return printConstantOp(emitter, operation, value);
 }
 
-static LogicalResult printOperation(CppEmitter &emitter, BranchOp branchOp) {
+static LogicalResult printOperation(CppEmitter &emitter,
+                                    cf::BranchOp branchOp) {
   raw_ostream &os = emitter.ostream();
   Block &successor = *branchOp.getSuccessor();
 
@@ -257,7 +258,7 @@ static LogicalResult printOperation(CppEmitter &emitter, BranchOp branchOp) {
 }
 
 static LogicalResult printOperation(CppEmitter &emitter,
-                                    CondBranchOp condBranchOp) {
+                                    cf::CondBranchOp condBranchOp) {
   raw_indented_ostream &os = emitter.ostream();
   Block &trueSuccessor = *condBranchOp.getTrueDest();
   Block &falseSuccessor = *condBranchOp.getFalseDest();
@@ -637,11 +638,12 @@ static LogicalResult printOperation(CppEmitter &emitter, FuncOp functionOp) {
         return failure();
     }
     for (Operation &op : block.getOperations()) {
-      // When generating code for an scf.if or std.cond_br op no semicolon needs
+      // When generating code for an scf.if or cf.cond_br op no semicolon needs
       // to be printed after the closing brace.
       // When generating code for an scf.for op, printing a trailing semicolon
       // is handled within the printOperation function.
-      bool trailingSemicolon = !isa<scf::IfOp, scf::ForOp, CondBranchOp>(op);
+      bool trailingSemicolon =
+          !isa<scf::IfOp, scf::ForOp, cf::CondBranchOp>(op);
 
       if (failed(emitter.emitOperation(
               op, /*trailingSemicolon=*/trailingSemicolon)))
@@ -907,8 +909,8 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) {
           .Case<scf::ForOp, scf::IfOp, scf::YieldOp>(
               [&](auto op) { return printOperation(*this, op); })
           // Standard ops.
-          .Case<BranchOp, mlir::CallOp, CondBranchOp, mlir::ConstantOp, FuncOp,
-                ModuleOp, ReturnOp>(
+          .Case<cf::BranchOp, mlir::CallOp, cf::CondBranchOp, mlir::ConstantOp,
+                FuncOp, ModuleOp, ReturnOp>(
               [&](auto op) { return printOperation(*this, op); })
           // Arithmetic ops.
           .Case<arith::ConstantOp>(

diff  --git a/mlir/test/Analysis/test-alias-analysis.mlir b/mlir/test/Analysis/test-alias-analysis.mlir
index ddd779c74509b..2f6db065b596e 100644
--- a/mlir/test/Analysis/test-alias-analysis.mlir
+++ b/mlir/test/Analysis/test-alias-analysis.mlir
@@ -52,10 +52,10 @@ func @control_flow(%arg: memref<2xf32>, %cond: i1) attributes {test.ptr = "func"
   %1 = memref.alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
   %2 = memref.alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
 
-  cond_br %cond, ^bb1(%0 : memref<8x64xf32>), ^bb2(%0 : memref<8x64xf32>)
+  cf.cond_br %cond, ^bb1(%0 : memref<8x64xf32>), ^bb2(%0 : memref<8x64xf32>)
 
 ^bb1(%arg1: memref<8x64xf32>):
-  br ^bb2(%arg1 : memref<8x64xf32>)
+  cf.br ^bb2(%arg1 : memref<8x64xf32>)
 
 ^bb2(%arg2: memref<8x64xf32>):
   return
@@ -85,10 +85,10 @@ func @control_flow_merge(%arg: memref<2xf32>, %cond: i1) attributes {test.ptr =
   %1 = memref.alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
   %2 = memref.alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
 
-  cond_br %cond, ^bb1(%0 : memref<8x64xf32>), ^bb2(%2 : memref<8x64xf32>)
+  cf.cond_br %cond, ^bb1(%0 : memref<8x64xf32>), ^bb2(%2 : memref<8x64xf32>)
 
 ^bb1(%arg1: memref<8x64xf32>):
-  br ^bb2(%arg1 : memref<8x64xf32>)
+  cf.br ^bb2(%arg1 : memref<8x64xf32>)
 
 ^bb2(%arg2: memref<8x64xf32>):
   return

diff  --git a/mlir/test/Analysis/test-dominance.mlir b/mlir/test/Analysis/test-dominance.mlir
index 8333064028fbd..88403c8a7754c 100644
--- a/mlir/test/Analysis/test-dominance.mlir
+++ b/mlir/test/Analysis/test-dominance.mlir
@@ -2,11 +2,11 @@
 
 // CHECK-LABEL: Testing : func_condBranch
 func @func_condBranch(%cond : i1) {
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 ^bb1:
-  br ^exit
+  cf.br ^exit
 ^bb2:
-  br ^exit
+  cf.br ^exit
 ^exit:
   return
 }
@@ -49,14 +49,14 @@ func @func_condBranch(%cond : i1) {
 
 // CHECK-LABEL: Testing : func_loop
 func @func_loop(%arg0 : i32, %arg1 : i32) {
-  br ^loopHeader(%arg0 : i32)
+  cf.br ^loopHeader(%arg0 : i32)
 ^loopHeader(%counter : i32):
   %lessThan = arith.cmpi slt, %counter, %arg1 : i32
-  cond_br %lessThan, ^loopBody, ^exit
+  cf.cond_br %lessThan, ^loopBody, ^exit
 ^loopBody:
   %const0 = arith.constant 1 : i32
   %inc = arith.addi %counter, %const0 : i32
-  br ^loopHeader(%inc : i32)
+  cf.br ^loopHeader(%inc : i32)
 ^exit:
   return
 }
@@ -153,17 +153,17 @@ func @func_loop_nested_region(
   %arg2 : index,
   %arg3 : index,
   %arg4 : index) {
-  br ^loopHeader(%arg0 : i32)
+  cf.br ^loopHeader(%arg0 : i32)
 ^loopHeader(%counter : i32):
   %lessThan = arith.cmpi slt, %counter, %arg1 : i32
-  cond_br %lessThan, ^loopBody, ^exit
+  cf.cond_br %lessThan, ^loopBody, ^exit
 ^loopBody:
   %const0 = arith.constant 1 : i32
   %inc = arith.addi %counter, %const0 : i32
   scf.for %arg5 = %arg2 to %arg3 step %arg4 {
     scf.for %arg6 = %arg2 to %arg3 step %arg4 { }
   }
-  br ^loopHeader(%inc : i32)
+  cf.br ^loopHeader(%inc : i32)
 ^exit:
   return
 }

diff  --git a/mlir/test/Analysis/test-liveness.mlir b/mlir/test/Analysis/test-liveness.mlir
index 0561846e9dbcb..570ef1fbdab16 100644
--- a/mlir/test/Analysis/test-liveness.mlir
+++ b/mlir/test/Analysis/test-liveness.mlir
@@ -19,7 +19,7 @@ func @func_simpleBranch(%arg0: i32, %arg1 : i32) -> i32 {
   // CHECK-NEXT: LiveOut: arg0 at 0 arg1 at 0
   // CHECK-NEXT: BeginLiveness
   // CHECK-NEXT: EndLiveness
-  br ^exit
+  cf.br ^exit
 ^exit:
   // CHECK: Block: 1
   // CHECK-NEXT: LiveIn: arg0 at 0 arg1 at 0
@@ -42,17 +42,17 @@ func @func_condBranch(%cond : i1, %arg1: i32, %arg2 : i32) -> i32 {
   // CHECK-NEXT: LiveOut: arg1 at 0 arg2 at 0
   // CHECK-NEXT: BeginLiveness
   // CHECK-NEXT: EndLiveness
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 ^bb1:
   // CHECK: Block: 1
   // CHECK-NEXT: LiveIn: arg1 at 0 arg2 at 0
   // CHECK-NEXT: LiveOut: arg1 at 0 arg2 at 0
-  br ^exit
+  cf.br ^exit
 ^bb2:
   // CHECK: Block: 2
   // CHECK-NEXT: LiveIn: arg1 at 0 arg2 at 0
   // CHECK-NEXT: LiveOut: arg1 at 0 arg2 at 0
-  br ^exit
+  cf.br ^exit
 ^exit:
   // CHECK: Block: 3
   // CHECK-NEXT: LiveIn: arg1 at 0 arg2 at 0
@@ -74,7 +74,7 @@ func @func_loop(%arg0 : i32, %arg1 : i32) -> i32 {
   // CHECK-NEXT: LiveIn:{{ *$}}
   // CHECK-NEXT: LiveOut: arg1 at 0
   %const0 = arith.constant 0 : i32
-  br ^loopHeader(%const0, %arg0 : i32, i32)
+  cf.br ^loopHeader(%const0, %arg0 : i32, i32)
 ^loopHeader(%counter : i32, %i : i32):
   // CHECK: Block: 1
   // CHECK-NEXT: LiveIn: arg1 at 0
@@ -82,10 +82,10 @@ func @func_loop(%arg0 : i32, %arg1 : i32) -> i32 {
   // CHECK-NEXT: BeginLiveness
   // CHECK-NEXT: val_5
   // CHECK-NEXT:     %2 = arith.cmpi
-  // CHECK-NEXT:     cond_br
+  // CHECK-NEXT:     cf.cond_br
   // CHECK-NEXT: EndLiveness
   %lessThan = arith.cmpi slt, %counter, %arg1 : i32
-  cond_br %lessThan, ^loopBody(%i : i32), ^exit(%i : i32)
+  cf.cond_br %lessThan, ^loopBody(%i : i32), ^exit(%i : i32)
 ^loopBody(%val : i32):
   // CHECK: Block: 2
   // CHECK-NEXT: LiveIn: arg1 at 0 arg0 at 1
@@ -98,12 +98,12 @@ func @func_loop(%arg0 : i32, %arg1 : i32) -> i32 {
   // CHECK-NEXT: val_8
   // CHECK-NEXT:     %4 = arith.addi
   // CHECK-NEXT:     %5 = arith.addi
-  // CHECK-NEXT:     br
+  // CHECK-NEXT:     cf.br
   // CHECK: EndLiveness
   %const1 = arith.constant 1 : i32
   %inc = arith.addi %val, %const1 : i32
   %inc2 = arith.addi %counter, %const1 : i32
-  br ^loopHeader(%inc, %inc2 : i32, i32)
+  cf.br ^loopHeader(%inc, %inc2 : i32, i32)
 ^exit(%sum : i32):
   // CHECK: Block: 3
   // CHECK-NEXT: LiveIn: arg1 at 0
@@ -147,14 +147,14 @@ func @func_ranges(%cond : i1, %arg1 : i32, %arg2 : i32, %arg3 : i32) -> i32 {
   // CHECK-NEXT: val_9
   // CHECK-NEXT:    %4 = arith.muli
   // CHECK-NEXT:    %5 = arith.addi
-  // CHECK-NEXT:    cond_br
+  // CHECK-NEXT:    cf.cond_br
   // CHECK-NEXT:    %c
   // CHECK-NEXT:    %6 = arith.muli
   // CHECK-NEXT:    %7 = arith.muli
   // CHECK-NEXT:    %8 = arith.addi
   // CHECK-NEXT: val_10
   // CHECK-NEXT:    %5 = arith.addi
-  // CHECK-NEXT:    cond_br
+  // CHECK-NEXT:    cf.cond_br
   // CHECK-NEXT:    %7
   // CHECK: EndLiveness
   %0 = arith.addi %arg1, %arg2 : i32
@@ -164,7 +164,7 @@ func @func_ranges(%cond : i1, %arg1 : i32, %arg2 : i32, %arg3 : i32) -> i32 {
   %3 = arith.muli %0, %1 : i32
   %4 = arith.muli %3, %2 : i32
   %5 = arith.addi %4, %const1 : i32
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
   // CHECK: Block: 1
@@ -172,7 +172,7 @@ func @func_ranges(%cond : i1, %arg1 : i32, %arg2 : i32, %arg3 : i32) -> i32 {
   // CHECK-NEXT: LiveOut: arg2 at 0
   %const4 = arith.constant 4 : i32
   %6 = arith.muli %4, %const4 : i32
-  br ^exit(%6 : i32)
+  cf.br ^exit(%6 : i32)
 
 ^bb2:
   // CHECK: Block: 2
@@ -180,7 +180,7 @@ func @func_ranges(%cond : i1, %arg1 : i32, %arg2 : i32, %arg3 : i32) -> i32 {
   // CHECK-NEXT: LiveOut: arg2 at 0
   %7 = arith.muli %4, %5 : i32
   %8 = arith.addi %4, %arg2 : i32
-  br ^exit(%8 : i32)
+  cf.br ^exit(%8 : i32)
 
 ^exit(%sum : i32):
   // CHECK: Block: 3
@@ -284,7 +284,7 @@ func @nested_region3(
   // CHECK-NEXT:    %0 = arith.addi
   // CHECK-NEXT:    %1 = arith.addi
   // CHECK-NEXT:    scf.for
-  // CHECK:         // br ^bb1
+  // CHECK:         // cf.br ^bb1
   // CHECK-NEXT:    %2 = arith.addi
   // CHECK-NEXT:    scf.for
   // CHECK:         // %2 = arith.addi
@@ -301,7 +301,7 @@ func @nested_region3(
     %2 = arith.addi %0, %arg5 : i32
     memref.store %2, %buffer[] : memref<i32>
   }
-  br ^exit
+  cf.br ^exit
 
 ^exit:
   // CHECK: Block: 2

diff  --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c
index 79fb3344d0819..7ac7a19a579d3 100644
--- a/mlir/test/CAPI/ir.c
+++ b/mlir/test/CAPI/ir.c
@@ -1531,10 +1531,10 @@ int registerOnlyStd() {
   fprintf(stderr, "@registration\n");
   // CHECK-LABEL: @registration
 
-  // CHECK: std.cond_br is_registered: 1
-  fprintf(stderr, "std.cond_br is_registered: %d\n",
+  // CHECK: cf.cond_br is_registered: 1
+  fprintf(stderr, "cf.cond_br is_registered: %d\n",
           mlirContextIsRegisteredOperation(
-              ctx, mlirStringRefCreateFromCString("std.cond_br")));
+              ctx, mlirStringRefCreateFromCString("cf.cond_br")));
 
   // CHECK: std.not_existing_op is_registered: 0
   fprintf(stderr, "std.not_existing_op is_registered: %d\n",

diff  --git a/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
index 46ff7501f4b89..73aa0d3d671b0 100644
--- a/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
+++ b/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
@@ -27,7 +27,7 @@ func @execute_no_async_args(%arg0: f32, %arg1: memref<1xf32>) {
   // CHECK: %[[IS_ERROR:.*]] = call @mlirAsyncRuntimeIsTokenError(%[[TOKEN]])
   // CHECK: %[[TRUE:.*]] = arith.constant true
   // CHECK: %[[NOT_ERROR:.*]] = arith.xori %[[IS_ERROR]], %[[TRUE]] : i1
-  // CHECK: assert %[[NOT_ERROR]]
+  // CHECK: cf.assert %[[NOT_ERROR]]
   // CHECK-NEXT: return
   async.await %token : !async.token
   return
@@ -90,7 +90,7 @@ func @nested_async_execute(%arg0: f32, %arg1: f32, %arg2: memref<1xf32>) {
   // CHECK: %[[IS_ERROR:.*]] = call @mlirAsyncRuntimeIsTokenError(%[[TOKEN]])
   // CHECK: %[[TRUE:.*]] = arith.constant true
   // CHECK: %[[NOT_ERROR:.*]] = arith.xori %[[IS_ERROR]], %[[TRUE]] : i1
-  // CHECK: assert %[[NOT_ERROR]]
+  // CHECK: cf.assert %[[NOT_ERROR]]
   async.await %token0 : !async.token
   return
 }

diff  --git a/mlir/test/Conversion/ControlFlowToSPIRV/cf-ops-to-spirv.mlir b/mlir/test/Conversion/ControlFlowToSPIRV/cf-ops-to-spirv.mlir
new file mode 100644
index 0000000000000..712f2f16e968c
--- /dev/null
+++ b/mlir/test/Conversion/ControlFlowToSPIRV/cf-ops-to-spirv.mlir
@@ -0,0 +1,41 @@
+// RUN: mlir-opt -split-input-file -convert-std-to-spirv -verify-diagnostics %s | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// cf.br, cf.cond_br
+//===----------------------------------------------------------------------===//
+
+module attributes {
+  spv.target_env = #spv.target_env<#spv.vce<v1.0, [], []>, {}>
+} {
+
+// CHECK-LABEL: func @simple_loop
+func @simple_loop(index, index, index) {
+^bb0(%begin : index, %end : index, %step : index):
+// CHECK-NEXT:  spv.Branch ^bb1
+  cf.br ^bb1
+
+// CHECK-NEXT: ^bb1:    // pred: ^bb0
+// CHECK-NEXT:  spv.Branch ^bb2({{.*}} : i32)
+^bb1:   // pred: ^bb0
+  cf.br ^bb2(%begin : index)
+
+// CHECK:      ^bb2({{.*}}: i32):       // 2 preds: ^bb1, ^bb3
+// CHECK-NEXT:  {{.*}} = spv.SLessThan {{.*}}, {{.*}} : i32
+// CHECK-NEXT:  spv.BranchConditional {{.*}}, ^bb3, ^bb4
+^bb2(%0: index):        // 2 preds: ^bb1, ^bb3
+  %1 = arith.cmpi slt, %0, %end : index
+  cf.cond_br %1, ^bb3, ^bb4
+
+// CHECK:      ^bb3:    // pred: ^bb2
+// CHECK-NEXT:  {{.*}} = spv.IAdd {{.*}}, {{.*}} : i32
+// CHECK-NEXT:  spv.Branch ^bb2({{.*}} : i32)
+^bb3:   // pred: ^bb2
+  %2 = arith.addi %0, %step : index
+  cf.br ^bb2(%2 : index)
+
+// CHECK:      ^bb4:    // pred: ^bb2
+^bb4:   // pred: ^bb2
+  return
+}
+
+}

diff  --git a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
index 1985638d1fed1..258d5cc41e7cb 100644
--- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
@@ -168,16 +168,16 @@ gpu.module @test_module {
       %c128 = arith.constant 128 : index
       %c32 = arith.constant 32 : index
       %0 = gpu.subgroup_mma_load_matrix %arg2[%c0, %c0] {leadDimension = 128 : index} : memref<128x128xf16> -> !gpu.mma_matrix<16x16xf16, "COp">
-      br ^bb1(%c0, %0 : index, !gpu.mma_matrix<16x16xf16, "COp">)
+      cf.br ^bb1(%c0, %0 : index, !gpu.mma_matrix<16x16xf16, "COp">)
     ^bb1(%1: index, %2: !gpu.mma_matrix<16x16xf16, "COp">):  // 2 preds: ^bb0, ^bb2
       %3 = arith.cmpi slt, %1, %c128 : index
-      cond_br %3, ^bb2, ^bb3
+      cf.cond_br %3, ^bb2, ^bb3
     ^bb2:  // pred: ^bb1
       %4 = gpu.subgroup_mma_load_matrix %arg0[%c0, %1] {leadDimension = 128 : index} : memref<128x128xf16> -> !gpu.mma_matrix<16x16xf16, "AOp">
       %5 = gpu.subgroup_mma_load_matrix %arg1[%1, %c0] {leadDimension = 128 : index} : memref<128x128xf16> -> !gpu.mma_matrix<16x16xf16, "BOp">
       %6 = gpu.subgroup_mma_compute %4, %5, %2 : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp">
       %7 = arith.addi %1, %c32 : index
-      br ^bb1(%7, %6 : index, !gpu.mma_matrix<16x16xf16, "COp">)
+      cf.br ^bb1(%7, %6 : index, !gpu.mma_matrix<16x16xf16, "COp">)
     ^bb3:  // pred: ^bb1
       gpu.subgroup_mma_store_matrix %2, %arg2[%c0, %c0] {leadDimension = 128 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<128x128xf16>
       return

diff  --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
index f229b8b9998f5..460ea9045ae07 100644
--- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
@@ -22,17 +22,17 @@ func @branch_loop() {
   // CHECK: omp.parallel
   omp.parallel {
     // CHECK-NEXT: llvm.br ^[[BB1:.*]](%{{[0-9]+}}, %{{[0-9]+}} : i64, i64
-    br ^bb1(%start, %end : index, index)
+    cf.br ^bb1(%start, %end : index, index)
   // CHECK-NEXT: ^[[BB1]](%[[ARG1:[0-9]+]]: i64, %[[ARG2:[0-9]+]]: i64):{{.*}}
   ^bb1(%0: index, %1: index):
     // CHECK-NEXT: %[[CMP:[0-9]+]] = llvm.icmp "slt" %[[ARG1]], %[[ARG2]] : i64
     %2 = arith.cmpi slt, %0, %1 : index
     // CHECK-NEXT: llvm.cond_br %[[CMP]], ^[[BB2:.*]](%{{[0-9]+}}, %{{[0-9]+}} : i64, i64), ^[[BB3:.*]]
-    cond_br %2, ^bb2(%end, %end : index, index), ^bb3
+    cf.cond_br %2, ^bb2(%end, %end : index, index), ^bb3
   // CHECK-NEXT: ^[[BB2]](%[[ARG3:[0-9]+]]: i64, %[[ARG4:[0-9]+]]: i64):
   ^bb2(%3: index, %4: index):
     // CHECK-NEXT: llvm.br ^[[BB1]](%[[ARG3]], %[[ARG4]] : i64, i64)
-    br ^bb1(%3, %4 : index, index)
+    cf.br ^bb1(%3, %4 : index, index)
   // CHECK-NEXT: ^[[BB3]]:
   ^bb3:
     omp.flush

diff  --git a/mlir/test/Conversion/SCFToStandard/convert-to-cfg.mlir b/mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir
similarity index 77%
rename from mlir/test/Conversion/SCFToStandard/convert-to-cfg.mlir
rename to mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir
index dc10d9e2943df..159c557cac259 100644
--- a/mlir/test/Conversion/SCFToStandard/convert-to-cfg.mlir
+++ b/mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir
@@ -1,14 +1,14 @@
-// RUN: mlir-opt -allow-unregistered-dialect -convert-scf-to-std %s | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect -convert-scf-to-cf %s | FileCheck %s
 
 // CHECK-LABEL: func @simple_std_for_loop(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
-//  CHECK-NEXT:  br ^bb1(%{{.*}} : index)
+//  CHECK-NEXT:  cf.br ^bb1(%{{.*}} : index)
 //  CHECK-NEXT:  ^bb1(%{{.*}}: index):    // 2 preds: ^bb0, ^bb2
 //  CHECK-NEXT:    %{{.*}} = arith.cmpi slt, %{{.*}}, %{{.*}} : index
-//  CHECK-NEXT:    cond_br %{{.*}}, ^bb2, ^bb3
+//  CHECK-NEXT:    cf.cond_br %{{.*}}, ^bb2, ^bb3
 //  CHECK-NEXT:  ^bb2:   // pred: ^bb1
 //  CHECK-NEXT:    %{{.*}} = arith.constant 1 : index
 //  CHECK-NEXT:    %[[iv:.*]] = arith.addi %{{.*}}, %{{.*}} : index
-//  CHECK-NEXT:    br ^bb1(%[[iv]] : index)
+//  CHECK-NEXT:    cf.br ^bb1(%[[iv]] : index)
 //  CHECK-NEXT:  ^bb3:   // pred: ^bb1
 //  CHECK-NEXT:    return
 func @simple_std_for_loop(%arg0 : index, %arg1 : index, %arg2 : index) {
@@ -19,23 +19,23 @@ func @simple_std_for_loop(%arg0 : index, %arg1 : index, %arg2 : index) {
 }
 
 // CHECK-LABEL: func @simple_std_2_for_loops(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
-//  CHECK-NEXT:    br ^bb1(%{{.*}} : index)
+//  CHECK-NEXT:    cf.br ^bb1(%{{.*}} : index)
 //  CHECK-NEXT:  ^bb1(%[[ub0:.*]]: index):    // 2 preds: ^bb0, ^bb5
 //  CHECK-NEXT:    %[[cond0:.*]] = arith.cmpi slt, %[[ub0]], %{{.*}} : index
-//  CHECK-NEXT:    cond_br %[[cond0]], ^bb2, ^bb6
+//  CHECK-NEXT:    cf.cond_br %[[cond0]], ^bb2, ^bb6
 //  CHECK-NEXT:  ^bb2:   // pred: ^bb1
 //  CHECK-NEXT:    %{{.*}} = arith.constant 1 : index
-//  CHECK-NEXT:    br ^bb3(%{{.*}} : index)
+//  CHECK-NEXT:    cf.br ^bb3(%{{.*}} : index)
 //  CHECK-NEXT:  ^bb3(%[[ub1:.*]]: index):    // 2 preds: ^bb2, ^bb4
 //  CHECK-NEXT:    %[[cond1:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index
-//  CHECK-NEXT:    cond_br %[[cond1]], ^bb4, ^bb5
+//  CHECK-NEXT:    cf.cond_br %[[cond1]], ^bb4, ^bb5
 //  CHECK-NEXT:  ^bb4:   // pred: ^bb3
 //  CHECK-NEXT:    %{{.*}} = arith.constant 1 : index
 //  CHECK-NEXT:    %[[iv1:.*]] = arith.addi %{{.*}}, %{{.*}} : index
-//  CHECK-NEXT:    br ^bb3(%[[iv1]] : index)
+//  CHECK-NEXT:    cf.br ^bb3(%[[iv1]] : index)
 //  CHECK-NEXT:  ^bb5:   // pred: ^bb3
 //  CHECK-NEXT:    %[[iv0:.*]] = arith.addi %{{.*}}, %{{.*}} : index
-//  CHECK-NEXT:    br ^bb1(%[[iv0]] : index)
+//  CHECK-NEXT:    cf.br ^bb1(%[[iv0]] : index)
 //  CHECK-NEXT:  ^bb6:   // pred: ^bb1
 //  CHECK-NEXT:    return
 func @simple_std_2_for_loops(%arg0 : index, %arg1 : index, %arg2 : index) {
@@ -49,10 +49,10 @@ func @simple_std_2_for_loops(%arg0 : index, %arg1 : index, %arg2 : index) {
 }
 
 // CHECK-LABEL: func @simple_std_if(%{{.*}}: i1) {
-//  CHECK-NEXT:   cond_br %{{.*}}, ^bb1, ^bb2
+//  CHECK-NEXT:   cf.cond_br %{{.*}}, ^bb1, ^bb2
 //  CHECK-NEXT:   ^bb1:   // pred: ^bb0
 //  CHECK-NEXT:     %{{.*}} = arith.constant 1 : index
-//  CHECK-NEXT:     br ^bb2
+//  CHECK-NEXT:     cf.br ^bb2
 //  CHECK-NEXT:   ^bb2:   // 2 preds: ^bb0, ^bb1
 //  CHECK-NEXT:     return
 func @simple_std_if(%arg0: i1) {
@@ -63,13 +63,13 @@ func @simple_std_if(%arg0: i1) {
 }
 
 // CHECK-LABEL: func @simple_std_if_else(%{{.*}}: i1) {
-//  CHECK-NEXT:   cond_br %{{.*}}, ^bb1, ^bb2
+//  CHECK-NEXT:   cf.cond_br %{{.*}}, ^bb1, ^bb2
 //  CHECK-NEXT:   ^bb1:   // pred: ^bb0
 //  CHECK-NEXT:     %{{.*}} = arith.constant 1 : index
-//  CHECK-NEXT:     br ^bb3
+//  CHECK-NEXT:     cf.br ^bb3
 //  CHECK-NEXT:   ^bb2:   // pred: ^bb0
 //  CHECK-NEXT:     %{{.*}} = arith.constant 1 : index
-//  CHECK-NEXT:     br ^bb3
+//  CHECK-NEXT:     cf.br ^bb3
 //  CHECK-NEXT:   ^bb3:   // 2 preds: ^bb1, ^bb2
 //  CHECK-NEXT:     return
 func @simple_std_if_else(%arg0: i1) {
@@ -82,18 +82,18 @@ func @simple_std_if_else(%arg0: i1) {
 }
 
 // CHECK-LABEL: func @simple_std_2_ifs(%{{.*}}: i1) {
-//  CHECK-NEXT:   cond_br %{{.*}}, ^bb1, ^bb5
+//  CHECK-NEXT:   cf.cond_br %{{.*}}, ^bb1, ^bb5
 //  CHECK-NEXT: ^bb1:   // pred: ^bb0
 //  CHECK-NEXT:   %{{.*}} = arith.constant 1 : index
-//  CHECK-NEXT:   cond_br %{{.*}}, ^bb2, ^bb3
+//  CHECK-NEXT:   cf.cond_br %{{.*}}, ^bb2, ^bb3
 //  CHECK-NEXT: ^bb2:   // pred: ^bb1
 //  CHECK-NEXT:   %{{.*}} = arith.constant 1 : index
-//  CHECK-NEXT:   br ^bb4
+//  CHECK-NEXT:   cf.br ^bb4
 //  CHECK-NEXT: ^bb3:   // pred: ^bb1
 //  CHECK-NEXT:   %{{.*}} = arith.constant 1 : index
-//  CHECK-NEXT:   br ^bb4
+//  CHECK-NEXT:   cf.br ^bb4
 //  CHECK-NEXT: ^bb4:   // 2 preds: ^bb2, ^bb3
-//  CHECK-NEXT:   br ^bb5
+//  CHECK-NEXT:   cf.br ^bb5
 //  CHECK-NEXT: ^bb5:   // 2 preds: ^bb0, ^bb4
 //  CHECK-NEXT:   return
 func @simple_std_2_ifs(%arg0: i1) {
@@ -109,27 +109,27 @@ func @simple_std_2_ifs(%arg0: i1) {
 }
 
 // CHECK-LABEL: func @simple_std_for_loop_with_2_ifs(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: i1) {
-//  CHECK-NEXT:   br ^bb1(%{{.*}} : index)
+//  CHECK-NEXT:   cf.br ^bb1(%{{.*}} : index)
 //  CHECK-NEXT:   ^bb1(%{{.*}}: index):    // 2 preds: ^bb0, ^bb7
 //  CHECK-NEXT:     %{{.*}} = arith.cmpi slt, %{{.*}}, %{{.*}} : index
-//  CHECK-NEXT:     cond_br %{{.*}}, ^bb2, ^bb8
+//  CHECK-NEXT:     cf.cond_br %{{.*}}, ^bb2, ^bb8
 //  CHECK-NEXT:   ^bb2:   // pred: ^bb1
 //  CHECK-NEXT:     %{{.*}} = arith.constant 1 : index
-//  CHECK-NEXT:     cond_br %{{.*}}, ^bb3, ^bb7
+//  CHECK-NEXT:     cf.cond_br %{{.*}}, ^bb3, ^bb7
 //  CHECK-NEXT:   ^bb3:   // pred: ^bb2
 //  CHECK-NEXT:     %{{.*}} = arith.constant 1 : index
-//  CHECK-NEXT:     cond_br %{{.*}}, ^bb4, ^bb5
+//  CHECK-NEXT:     cf.cond_br %{{.*}}, ^bb4, ^bb5
 //  CHECK-NEXT:   ^bb4:   // pred: ^bb3
 //  CHECK-NEXT:     %{{.*}} = arith.constant 1 : index
-//  CHECK-NEXT:     br ^bb6
+//  CHECK-NEXT:     cf.br ^bb6
 //  CHECK-NEXT:   ^bb5:   // pred: ^bb3
 //  CHECK-NEXT:     %{{.*}} = arith.constant 1 : index
-//  CHECK-NEXT:     br ^bb6
+//  CHECK-NEXT:     cf.br ^bb6
 //  CHECK-NEXT:   ^bb6:   // 2 preds: ^bb4, ^bb5
-//  CHECK-NEXT:     br ^bb7
+//  CHECK-NEXT:     cf.br ^bb7
 //  CHECK-NEXT:   ^bb7:   // 2 preds: ^bb2, ^bb6
 //  CHECK-NEXT:     %[[iv0:.*]] = arith.addi %{{.*}}, %{{.*}} : index
-//  CHECK-NEXT:     br ^bb1(%[[iv0]] : index)
+//  CHECK-NEXT:     cf.br ^bb1(%[[iv0]] : index)
 //  CHECK-NEXT:   ^bb8:   // pred: ^bb1
 //  CHECK-NEXT:     return
 //  CHECK-NEXT: }
@@ -150,12 +150,12 @@ func @simple_std_for_loop_with_2_ifs(%arg0 : index, %arg1 : index, %arg2 : index
 
 // CHECK-LABEL: func @simple_if_yield
 func @simple_if_yield(%arg0: i1) -> (i1, i1) {
-// CHECK:   cond_br %{{.*}}, ^[[then:.*]], ^[[else:.*]]
+// CHECK:   cf.cond_br %{{.*}}, ^[[then:.*]], ^[[else:.*]]
   %0:2 = scf.if %arg0 -> (i1, i1) {
 // CHECK: ^[[then]]:
 // CHECK:   %[[v0:.*]] = arith.constant false
 // CHECK:   %[[v1:.*]] = arith.constant true
-// CHECK:   br ^[[dom:.*]](%[[v0]], %[[v1]] : i1, i1)
+// CHECK:   cf.br ^[[dom:.*]](%[[v0]], %[[v1]] : i1, i1)
     %c0 = arith.constant false
     %c1 = arith.constant true
     scf.yield %c0, %c1 : i1, i1
@@ -163,13 +163,13 @@ func @simple_if_yield(%arg0: i1) -> (i1, i1) {
 // CHECK: ^[[else]]:
 // CHECK:   %[[v2:.*]] = arith.constant false
 // CHECK:   %[[v3:.*]] = arith.constant true
-// CHECK:   br ^[[dom]](%[[v3]], %[[v2]] : i1, i1)
+// CHECK:   cf.br ^[[dom]](%[[v3]], %[[v2]] : i1, i1)
     %c0 = arith.constant false
     %c1 = arith.constant true
     scf.yield %c1, %c0 : i1, i1
   }
 // CHECK: ^[[dom]](%[[arg1:.*]]: i1, %[[arg2:.*]]: i1):
-// CHECK:   br ^[[cont:.*]]
+// CHECK:   cf.br ^[[cont:.*]]
 // CHECK: ^[[cont]]:
 // CHECK:   return %[[arg1]], %[[arg2]]
   return %0#0, %0#1 : i1, i1
@@ -177,49 +177,49 @@ func @simple_if_yield(%arg0: i1) -> (i1, i1) {
 
 // CHECK-LABEL: func @nested_if_yield
 func @nested_if_yield(%arg0: i1) -> (index) {
-// CHECK:   cond_br %{{.*}}, ^[[first_then:.*]], ^[[first_else:.*]]
+// CHECK:   cf.cond_br %{{.*}}, ^[[first_then:.*]], ^[[first_else:.*]]
   %0 = scf.if %arg0 -> i1 {
 // CHECK: ^[[first_then]]:
     %1 = arith.constant true
-// CHECK:   br ^[[first_dom:.*]]({{.*}})
+// CHECK:   cf.br ^[[first_dom:.*]]({{.*}})
     scf.yield %1 : i1
   } else {
 // CHECK: ^[[first_else]]:
     %2 = arith.constant false
-// CHECK:   br ^[[first_dom]]({{.*}})
+// CHECK:   cf.br ^[[first_dom]]({{.*}})
     scf.yield %2 : i1
   }
 // CHECK: ^[[first_dom]](%[[arg1:.*]]: i1):
-// CHECK:   br ^[[first_cont:.*]]
+// CHECK:   cf.br ^[[first_cont:.*]]
 // CHECK: ^[[first_cont]]:
-// CHECK:   cond_br %[[arg1]], ^[[second_outer_then:.*]], ^[[second_outer_else:.*]]
+// CHECK:   cf.cond_br %[[arg1]], ^[[second_outer_then:.*]], ^[[second_outer_else:.*]]
   %1 = scf.if %0 -> index {
 // CHECK: ^[[second_outer_then]]:
-// CHECK:   cond_br %arg0, ^[[second_inner_then:.*]], ^[[second_inner_else:.*]]
+// CHECK:   cf.cond_br %arg0, ^[[second_inner_then:.*]], ^[[second_inner_else:.*]]
     %3 = scf.if %arg0 -> index {
 // CHECK: ^[[second_inner_then]]:
       %4 = arith.constant 40 : index
-// CHECK:   br ^[[second_inner_dom:.*]]({{.*}})
+// CHECK:   cf.br ^[[second_inner_dom:.*]]({{.*}})
       scf.yield %4 : index
     } else {
 // CHECK: ^[[second_inner_else]]:
       %5 = arith.constant 41 : index
-// CHECK:   br ^[[second_inner_dom]]({{.*}})
+// CHECK:   cf.br ^[[second_inner_dom]]({{.*}})
       scf.yield %5 : index
     }
 // CHECK: ^[[second_inner_dom]](%[[arg2:.*]]: index):
-// CHECK:   br ^[[second_inner_cont:.*]]
+// CHECK:   cf.br ^[[second_inner_cont:.*]]
 // CHECK: ^[[second_inner_cont]]:
-// CHECK:   br ^[[second_outer_dom:.*]]({{.*}})
+// CHECK:   cf.br ^[[second_outer_dom:.*]]({{.*}})
     scf.yield %3 : index
   } else {
 // CHECK: ^[[second_outer_else]]:
     %6 = arith.constant 42 : index
-// CHECK:   br ^[[second_outer_dom]]({{.*}}
+// CHECK:   cf.br ^[[second_outer_dom]]({{.*}}
     scf.yield %6 : index
   }
 // CHECK: ^[[second_outer_dom]](%[[arg3:.*]]: index):
-// CHECK:   br ^[[second_outer_cont:.*]]
+// CHECK:   cf.br ^[[second_outer_cont:.*]]
 // CHECK: ^[[second_outer_cont]]:
 // CHECK:   return %[[arg3]] : index
   return %1 : index
@@ -228,22 +228,22 @@ func @nested_if_yield(%arg0: i1) -> (index) {
 // CHECK-LABEL:   func @parallel_loop(
 // CHECK-SAME:                        [[VAL_0:%.*]]: index, [[VAL_1:%.*]]: index, [[VAL_2:%.*]]: index, [[VAL_3:%.*]]: index, [[VAL_4:%.*]]: index) {
 // CHECK:           [[VAL_5:%.*]] = arith.constant 1 : index
-// CHECK:           br ^bb1([[VAL_0]] : index)
+// CHECK:           cf.br ^bb1([[VAL_0]] : index)
 // CHECK:         ^bb1([[VAL_6:%.*]]: index):
 // CHECK:           [[VAL_7:%.*]] = arith.cmpi slt, [[VAL_6]], [[VAL_2]] : index
-// CHECK:           cond_br [[VAL_7]], ^bb2, ^bb6
+// CHECK:           cf.cond_br [[VAL_7]], ^bb2, ^bb6
 // CHECK:         ^bb2:
-// CHECK:           br ^bb3([[VAL_1]] : index)
+// CHECK:           cf.br ^bb3([[VAL_1]] : index)
 // CHECK:         ^bb3([[VAL_8:%.*]]: index):
 // CHECK:           [[VAL_9:%.*]] = arith.cmpi slt, [[VAL_8]], [[VAL_3]] : index
-// CHECK:           cond_br [[VAL_9]], ^bb4, ^bb5
+// CHECK:           cf.cond_br [[VAL_9]], ^bb4, ^bb5
 // CHECK:         ^bb4:
 // CHECK:           [[VAL_10:%.*]] = arith.constant 1 : index
 // CHECK:           [[VAL_11:%.*]] = arith.addi [[VAL_8]], [[VAL_5]] : index
-// CHECK:           br ^bb3([[VAL_11]] : index)
+// CHECK:           cf.br ^bb3([[VAL_11]] : index)
 // CHECK:         ^bb5:
 // CHECK:           [[VAL_12:%.*]] = arith.addi [[VAL_6]], [[VAL_4]] : index
-// CHECK:           br ^bb1([[VAL_12]] : index)
+// CHECK:           cf.br ^bb1([[VAL_12]] : index)
 // CHECK:         ^bb6:
 // CHECK:           return
 // CHECK:         }
@@ -262,16 +262,16 @@ func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
 // CHECK-SAME: (%[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index)
 // CHECK:        %[[INIT0:.*]] = arith.constant 0
 // CHECK:        %[[INIT1:.*]] = arith.constant 1
-// CHECK:        br ^[[COND:.*]](%[[LB]], %[[INIT0]], %[[INIT1]] : index, f32, f32)
+// CHECK:        cf.br ^[[COND:.*]](%[[LB]], %[[INIT0]], %[[INIT1]] : index, f32, f32)
 //
 // CHECK:      ^[[COND]](%[[ITER:.*]]: index, %[[ITER_ARG0:.*]]: f32, %[[ITER_ARG1:.*]]: f32):
 // CHECK:        %[[CMP:.*]] = arith.cmpi slt, %[[ITER]], %[[UB]] : index
-// CHECK:        cond_br %[[CMP]], ^[[BODY:.*]], ^[[CONTINUE:.*]]
+// CHECK:        cf.cond_br %[[CMP]], ^[[BODY:.*]], ^[[CONTINUE:.*]]
 //
 // CHECK:      ^[[BODY]]:
 // CHECK:        %[[SUM:.*]] = arith.addf %[[ITER_ARG0]], %[[ITER_ARG1]] : f32
 // CHECK:        %[[STEPPED:.*]] = arith.addi %[[ITER]], %[[STEP]] : index
-// CHECK:        br ^[[COND]](%[[STEPPED]], %[[SUM]], %[[SUM]] : index, f32, f32)
+// CHECK:        cf.br ^[[COND]](%[[STEPPED]], %[[SUM]], %[[SUM]] : index, f32, f32)
 //
 // CHECK:      ^[[CONTINUE]]:
 // CHECK:        return %[[ITER_ARG0]], %[[ITER_ARG1]] : f32, f32
@@ -288,18 +288,18 @@ func @for_yield(%arg0 : index, %arg1 : index, %arg2 : index) -> (f32, f32) {
 // CHECK-LABEL: @nested_for_yield
 // CHECK-SAME: (%[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index)
 // CHECK:         %[[INIT:.*]] = arith.constant
-// CHECK:         br ^[[COND_OUT:.*]](%[[LB]], %[[INIT]] : index, f32)
+// CHECK:         cf.br ^[[COND_OUT:.*]](%[[LB]], %[[INIT]] : index, f32)
 // CHECK:       ^[[COND_OUT]](%[[ITER_OUT:.*]]: index, %[[ARG_OUT:.*]]: f32):
-// CHECK:         cond_br %{{.*}}, ^[[BODY_OUT:.*]], ^[[CONT_OUT:.*]]
+// CHECK:         cf.cond_br %{{.*}}, ^[[BODY_OUT:.*]], ^[[CONT_OUT:.*]]
 // CHECK:       ^[[BODY_OUT]]:
-// CHECK:         br ^[[COND_IN:.*]](%[[LB]], %[[ARG_OUT]] : index, f32)
+// CHECK:         cf.br ^[[COND_IN:.*]](%[[LB]], %[[ARG_OUT]] : index, f32)
 // CHECK:       ^[[COND_IN]](%[[ITER_IN:.*]]: index, %[[ARG_IN:.*]]: f32):
-// CHECK:         cond_br %{{.*}}, ^[[BODY_IN:.*]], ^[[CONT_IN:.*]]
+// CHECK:         cf.cond_br %{{.*}}, ^[[BODY_IN:.*]], ^[[CONT_IN:.*]]
 // CHECK:       ^[[BODY_IN]]
 // CHECK:         %[[RES:.*]] = arith.addf
-// CHECK:         br ^[[COND_IN]](%{{.*}}, %[[RES]] : index, f32)
+// CHECK:         cf.br ^[[COND_IN]](%{{.*}}, %[[RES]] : index, f32)
 // CHECK:       ^[[CONT_IN]]:
-// CHECK:         br ^[[COND_OUT]](%{{.*}}, %[[ARG_IN]] : index, f32)
+// CHECK:         cf.br ^[[COND_OUT]](%{{.*}}, %[[ARG_IN]] : index, f32)
 // CHECK:       ^[[CONT_OUT]]:
 // CHECK:         return %[[ARG_OUT]] : f32
 func @nested_for_yield(%arg0 : index, %arg1 : index, %arg2 : index) -> f32 {
@@ -325,13 +325,13 @@ func @simple_parallel_reduce_loop(%arg0: index, %arg1: index,
   // passed across as a block argument.
 
   // Branch to the condition block passing in the initial reduction value.
-  // CHECK:   br ^[[COND:.*]](%[[LB]], %[[INIT]]
+  // CHECK:   cf.br ^[[COND:.*]](%[[LB]], %[[INIT]]
 
   // Condition branch takes as arguments the current value of the iteration
   // variable and the current partially reduced value.
   // CHECK: ^[[COND]](%[[ITER:.*]]: index, %[[ITER_ARG:.*]]: f32
   // CHECK:   %[[COMP:.*]] = arith.cmpi slt, %[[ITER]], %[[UB]]
-  // CHECK:   cond_br %[[COMP]], ^[[BODY:.*]], ^[[CONTINUE:.*]]
+  // CHECK:   cf.cond_br %[[COMP]], ^[[BODY:.*]], ^[[CONTINUE:.*]]
 
   // Bodies of scf.reduce operations are folded into the main loop body. The
   // result of this partial reduction is passed as argument to the condition
@@ -340,7 +340,7 @@ func @simple_parallel_reduce_loop(%arg0: index, %arg1: index,
   // CHECK:   %[[CST:.*]] = arith.constant 4.2
   // CHECK:   %[[PROD:.*]] = arith.mulf %[[ITER_ARG]], %[[CST]]
   // CHECK:   %[[INCR:.*]] = arith.addi %[[ITER]], %[[STEP]]
-  // CHECK:   br ^[[COND]](%[[INCR]], %[[PROD]]
+  // CHECK:   cf.br ^[[COND]](%[[INCR]], %[[PROD]]
 
   // The continuation block has access to the (last value of) reduction.
   // CHECK: ^[[CONTINUE]]:
@@ -363,19 +363,19 @@ func @parallel_reduce_loop(%arg0 : index, %arg1 : index, %arg2 : index,
   // Multiple reduction blocks should be folded in the same body, and the
   // reduction value must be forwarded through block structures.
   // CHECK:   %[[INIT2:.*]] = arith.constant 42
-  // CHECK:   br ^[[COND_OUT:.*]](%{{.*}}, %[[INIT1]], %[[INIT2]]
+  // CHECK:   cf.br ^[[COND_OUT:.*]](%{{.*}}, %[[INIT1]], %[[INIT2]]
   // CHECK: ^[[COND_OUT]](%{{.*}}: index, %[[ITER_ARG1_OUT:.*]]: f32, %[[ITER_ARG2_OUT:.*]]: i64
-  // CHECK:   cond_br %{{.*}}, ^[[BODY_OUT:.*]], ^[[CONT_OUT:.*]]
+  // CHECK:   cf.cond_br %{{.*}}, ^[[BODY_OUT:.*]], ^[[CONT_OUT:.*]]
   // CHECK: ^[[BODY_OUT]]:
-  // CHECK:   br ^[[COND_IN:.*]](%{{.*}}, %[[ITER_ARG1_OUT]], %[[ITER_ARG2_OUT]]
+  // CHECK:   cf.br ^[[COND_IN:.*]](%{{.*}}, %[[ITER_ARG1_OUT]], %[[ITER_ARG2_OUT]]
   // CHECK: ^[[COND_IN]](%{{.*}}: index, %[[ITER_ARG1_IN:.*]]: f32, %[[ITER_ARG2_IN:.*]]: i64
-  // CHECK:   cond_br %{{.*}}, ^[[BODY_IN:.*]], ^[[CONT_IN:.*]]
+  // CHECK:   cf.cond_br %{{.*}}, ^[[BODY_IN:.*]], ^[[CONT_IN:.*]]
   // CHECK: ^[[BODY_IN]]:
   // CHECK:   %[[REDUCE1:.*]] = arith.addf %[[ITER_ARG1_IN]], %{{.*}}
   // CHECK:   %[[REDUCE2:.*]] = arith.ori %[[ITER_ARG2_IN]], %{{.*}}
-  // CHECK:   br ^[[COND_IN]](%{{.*}}, %[[REDUCE1]], %[[REDUCE2]]
+  // CHECK:   cf.br ^[[COND_IN]](%{{.*}}, %[[REDUCE1]], %[[REDUCE2]]
   // CHECK: ^[[CONT_IN]]:
-  // CHECK:   br ^[[COND_OUT]](%{{.*}}, %[[ITER_ARG1_IN]], %[[ITER_ARG2_IN]]
+  // CHECK:   cf.br ^[[COND_OUT]](%{{.*}}, %[[ITER_ARG1_IN]], %[[ITER_ARG2_IN]]
   // CHECK: ^[[CONT_OUT]]:
   // CHECK:   return %[[ITER_ARG1_OUT]], %[[ITER_ARG2_OUT]]
   %step = arith.constant 1 : index
@@ -416,17 +416,17 @@ func @unknown_op_inside_loop(%arg0: index, %arg1: index, %arg2: index) {
 // CHECK-LABEL: @minimal_while
 func @minimal_while() {
   // CHECK:   %[[COND:.*]] = "test.make_condition"() : () -> i1
-  // CHECK:   br ^[[BEFORE:.*]]
+  // CHECK:   cf.br ^[[BEFORE:.*]]
   %0 = "test.make_condition"() : () -> i1
   scf.while : () -> () {
   // CHECK: ^[[BEFORE]]:
-  // CHECK:   cond_br %[[COND]], ^[[AFTER:.*]], ^[[CONT:.*]]
+  // CHECK:   cf.cond_br %[[COND]], ^[[AFTER:.*]], ^[[CONT:.*]]
     scf.condition(%0)
   } do {
   // CHECK: ^[[AFTER]]:
   // CHECK:   "test.some_payload"() : () -> ()
     "test.some_payload"() : () -> ()
-  // CHECK:   br ^[[BEFORE]]
+  // CHECK:   cf.br ^[[BEFORE]]
     scf.yield
   }
   // CHECK: ^[[CONT]]:
@@ -436,16 +436,16 @@ func @minimal_while() {
 
 // CHECK-LABEL: @do_while
 func @do_while(%arg0: f32) {
-  // CHECK:   br ^[[BEFORE:.*]]({{.*}}: f32)
+  // CHECK:   cf.br ^[[BEFORE:.*]]({{.*}}: f32)
   scf.while (%arg1 = %arg0) : (f32) -> (f32) {
   // CHECK: ^[[BEFORE]](%[[VAL:.*]]: f32):
     // CHECK:   %[[COND:.*]] = "test.make_condition"() : () -> i1
     %0 = "test.make_condition"() : () -> i1
-    // CHECK:   cond_br %[[COND]], ^[[BEFORE]](%[[VAL]] : f32), ^[[CONT:.*]]
+    // CHECK:   cf.cond_br %[[COND]], ^[[BEFORE]](%[[VAL]] : f32), ^[[CONT:.*]]
     scf.condition(%0) %arg1 : f32
   } do {
   ^bb0(%arg2: f32):
-    // CHECK-NOT: br ^[[BEFORE]]
+    // CHECK-NOT: cf.br ^[[BEFORE]]
     scf.yield %arg2 : f32
   }
   // CHECK: ^[[CONT]]:
@@ -460,21 +460,21 @@ func @while_values(%arg0: i32, %arg1: f32) {
   %0 = "test.make_condition"() : () -> i1
   %c0_i32 = arith.constant 0 : i32
   %cst = arith.constant 0.000000e+00 : f32
-  // CHECK:     br ^[[BEFORE:.*]](%[[ARG0]], %[[ARG1]] : i32, f32)
+  // CHECK:     cf.br ^[[BEFORE:.*]](%[[ARG0]], %[[ARG1]] : i32, f32)
   %1:2 = scf.while (%arg2 = %arg0, %arg3 = %arg1) : (i32, f32) -> (i64, f64) {
   // CHECK:   ^bb1(%[[ARG2:.*]]: i32, %[[ARG3:.]]: f32):
     // CHECK:   %[[VAL1:.*]] = arith.extui %[[ARG0]] : i32 to i64
     %2 = arith.extui %arg0 : i32 to i64
     // CHECK:   %[[VAL2:.*]] = arith.extf %[[ARG3]] : f32 to f64
     %3 = arith.extf %arg3 : f32 to f64
-    // CHECK:   cond_br %[[COND]],
+    // CHECK:   cf.cond_br %[[COND]],
     // CHECK:           ^[[AFTER:.*]](%[[VAL1]], %[[VAL2]] : i64, f64),
     // CHECK:           ^[[CONT:.*]]
     scf.condition(%0) %2, %3 : i64, f64
   } do {
   // CHECK:   ^[[AFTER]](%[[ARG4:.*]]: i64, %[[ARG5:.*]]: f64):
   ^bb0(%arg2: i64, %arg3: f64):  
-    // CHECK:   br ^[[BEFORE]](%{{.*}}, %{{.*}} : i32, f32)
+    // CHECK:   cf.br ^[[BEFORE]](%{{.*}}, %{{.*}} : i32, f32)
     scf.yield %c0_i32, %cst : i32, f32
   }
   // CHECK:   ^bb3:
@@ -484,17 +484,17 @@ func @while_values(%arg0: i32, %arg1: f32) {
 
 // CHECK-LABEL: @nested_while_ops
 func @nested_while_ops(%arg0: f32) -> i64 {
-  // CHECK:       br ^[[OUTER_BEFORE:.*]](%{{.*}} : f32)
+  // CHECK:       cf.br ^[[OUTER_BEFORE:.*]](%{{.*}} : f32)
   %0 = scf.while(%outer = %arg0) : (f32) -> i64 {
     // CHECK:   ^[[OUTER_BEFORE]](%{{.*}}: f32):
     // CHECK:     %[[OUTER_COND:.*]] = "test.outer_before_pre"() : () -> i1
     %cond = "test.outer_before_pre"() : () -> i1
-    // CHECK:     br ^[[INNER_BEFORE_BEFORE:.*]](%{{.*}} : f32)
+    // CHECK:     cf.br ^[[INNER_BEFORE_BEFORE:.*]](%{{.*}} : f32)
     %1 = scf.while(%inner = %outer) : (f32) -> i64 {
       // CHECK: ^[[INNER_BEFORE_BEFORE]](%{{.*}}: f32):
       // CHECK:   %[[INNER1:.*]]:2 = "test.inner_before"(%{{.*}}) : (f32) -> (i1, i64)
       %2:2 = "test.inner_before"(%inner) : (f32) -> (i1, i64)
-      // CHECK:   cond_br %[[INNER1]]#0,
+      // CHECK:   cf.cond_br %[[INNER1]]#0,
       // CHECK:           ^[[INNER_BEFORE_AFTER:.*]](%[[INNER1]]#1 : i64),
       // CHECK:           ^[[OUTER_BEFORE_LAST:.*]]
       scf.condition(%2#0) %2#1 : i64
@@ -503,13 +503,13 @@ func @nested_while_ops(%arg0: f32) -> i64 {
     ^bb0(%arg1: i64):
       // CHECK:   %[[INNER2:.*]] = "test.inner_after"(%{{.*}}) : (i64) -> f32
       %3 = "test.inner_after"(%arg1) : (i64) -> f32
-      // CHECK:   br ^[[INNER_BEFORE_BEFORE]](%[[INNER2]] : f32)
+      // CHECK:   cf.br ^[[INNER_BEFORE_BEFORE]](%[[INNER2]] : f32)
       scf.yield %3 : f32
     }
     // CHECK:   ^[[OUTER_BEFORE_LAST]]:
     // CHECK:     "test.outer_before_post"() : () -> ()
     "test.outer_before_post"() : () -> ()
-    // CHECK:     cond_br %[[OUTER_COND]],
+    // CHECK:     cf.cond_br %[[OUTER_COND]],
     // CHECK:             ^[[OUTER_AFTER:.*]](%[[INNER1]]#1 : i64),
     // CHECK:             ^[[CONTINUATION:.*]]
     scf.condition(%cond) %1 : i64
@@ -518,12 +518,12 @@ func @nested_while_ops(%arg0: f32) -> i64 {
   ^bb2(%arg2: i64):
     // CHECK:     "test.outer_after_pre"(%{{.*}}) : (i64) -> ()
     "test.outer_after_pre"(%arg2) : (i64) -> ()
-    // CHECK:     br ^[[INNER_AFTER_BEFORE:.*]](%{{.*}} : i64)
+    // CHECK:     cf.br ^[[INNER_AFTER_BEFORE:.*]](%{{.*}} : i64)
     %4 = scf.while(%inner = %arg2) : (i64) -> f32 {
       // CHECK: ^[[INNER_AFTER_BEFORE]](%{{.*}}: i64):
       // CHECK:   %[[INNER3:.*]]:2 = "test.inner2_before"(%{{.*}}) : (i64) -> (i1, f32)
       %5:2 = "test.inner2_before"(%inner) : (i64) -> (i1, f32)
-      // CHECK:   cond_br %[[INNER3]]#0,
+      // CHECK:   cf.cond_br %[[INNER3]]#0,
       // CHECK:           ^[[INNER_AFTER_AFTER:.*]](%[[INNER3]]#1 : f32),
       // CHECK:           ^[[OUTER_AFTER_LAST:.*]]
       scf.condition(%5#0) %5#1 : f32
@@ -532,13 +532,13 @@ func @nested_while_ops(%arg0: f32) -> i64 {
     ^bb3(%arg3: f32):
       // CHECK:   %{{.*}} = "test.inner2_after"(%{{.*}}) : (f32) -> i64
       %6 = "test.inner2_after"(%arg3) : (f32) -> i64
-      // CHECK:   br ^[[INNER_AFTER_BEFORE]](%{{.*}} : i64)
+      // CHECK:   cf.br ^[[INNER_AFTER_BEFORE]](%{{.*}} : i64)
       scf.yield %6 : i64
     }
     // CHECK:   ^[[OUTER_AFTER_LAST]]:
     // CHECK:     "test.outer_after_post"() : () -> ()
     "test.outer_after_post"() : () -> ()
-    // CHECK:     br ^[[OUTER_BEFORE]](%[[INNER3]]#1 : f32)
+    // CHECK:     cf.br ^[[OUTER_BEFORE]](%[[INNER3]]#1 : f32)
     scf.yield %4 : f32
   }
   // CHECK:     ^[[CONTINUATION]]:
@@ -549,27 +549,27 @@ func @nested_while_ops(%arg0: f32) -> i64 {
 // CHECK-LABEL: @ifs_in_parallel
 // CHECK: (%[[ARG0:.*]]: index, %[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: i1)
 func @ifs_in_parallel(%arg1: index, %arg2: index, %arg3: index, %arg4: i1, %arg5: i1) {
-  // CHECK:   br ^[[LOOP_LATCH:.*]](%[[ARG0]] : index)
+  // CHECK:   cf.br ^[[LOOP_LATCH:.*]](%[[ARG0]] : index)
   // CHECK: ^[[LOOP_LATCH]](%[[LOOP_IV:.*]]: index):
   // CHECK:   %[[LOOP_COND:.*]] = arith.cmpi slt, %[[LOOP_IV]], %[[ARG1]] : index
-  // CHECK:   cond_br %[[LOOP_COND]], ^[[LOOP_BODY:.*]], ^[[LOOP_CONT:.*]]
+  // CHECK:   cf.cond_br %[[LOOP_COND]], ^[[LOOP_BODY:.*]], ^[[LOOP_CONT:.*]]
   // CHECK: ^[[LOOP_BODY]]:
-  // CHECK:   cond_br %[[ARG3]], ^[[IF1_THEN:.*]], ^[[IF1_CONT:.*]]
+  // CHECK:   cf.cond_br %[[ARG3]], ^[[IF1_THEN:.*]], ^[[IF1_CONT:.*]]
   // CHECK: ^[[IF1_THEN]]:
-  // CHECK:   cond_br %[[ARG4]], ^[[IF2_THEN:.*]], ^[[IF2_ELSE:.*]]
+  // CHECK:   cf.cond_br %[[ARG4]], ^[[IF2_THEN:.*]], ^[[IF2_ELSE:.*]]
   // CHECK: ^[[IF2_THEN]]:
   // CHECK:   %{{.*}} = "test.if2"() : () -> index
-  // CHECK:   br ^[[IF2_MERGE:.*]](%{{.*}} : index)
+  // CHECK:   cf.br ^[[IF2_MERGE:.*]](%{{.*}} : index)
   // CHECK: ^[[IF2_ELSE]]:
   // CHECK:   %{{.*}} = "test.else2"() : () -> index
-  // CHECK:   br ^[[IF2_MERGE]](%{{.*}} : index)
+  // CHECK:   cf.br ^[[IF2_MERGE]](%{{.*}} : index)
   // CHECK: ^[[IF2_MERGE]](%{{.*}}: index):
-  // CHECK:   br ^[[IF2_CONT:.*]]
+  // CHECK:   cf.br ^[[IF2_CONT:.*]]
   // CHECK: ^[[IF2_CONT]]:
-  // CHECK:   br ^[[IF1_CONT]]
+  // CHECK:   cf.br ^[[IF1_CONT]]
   // CHECK: ^[[IF1_CONT]]:
   // CHECK:   %{{.*}} = arith.addi %[[LOOP_IV]], %[[ARG2]] : index
-  // CHECK:   br ^[[LOOP_LATCH]](%{{.*}} : index)
+  // CHECK:   cf.br ^[[LOOP_LATCH]](%{{.*}} : index)
   scf.parallel (%i) = (%arg1) to (%arg2) step (%arg3) {
     scf.if %arg4 {
       %0 = scf.if %arg5 -> (index) {
@@ -593,7 +593,7 @@ func @func_execute_region_elim_multi_yield() {
     "test.foo"() : () -> ()
     %v = scf.execute_region -> i64 {
       %c = "test.cmp"() : () -> i1
-      cond_br %c, ^bb2, ^bb3
+      cf.cond_br %c, ^bb2, ^bb3
     ^bb2:
       %x = "test.val1"() : () -> i64
       scf.yield %x : i64
@@ -607,16 +607,16 @@ func @func_execute_region_elim_multi_yield() {
 
 // CHECK-NOT: execute_region
 // CHECK:     "test.foo"
-// CHECK:     br ^[[rentry:.+]]
+// CHECK:     cf.br ^[[rentry:.+]]
 // CHECK:   ^[[rentry]]
 // CHECK:     %[[cmp:.+]] = "test.cmp"
-// CHECK:     cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]]
+// CHECK:     cf.cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]]
 // CHECK:   ^[[bb1]]:
 // CHECK:     %[[x:.+]] = "test.val1"
-// CHECK:     br ^[[bb3:.+]](%[[x]] : i64)
+// CHECK:     cf.br ^[[bb3:.+]](%[[x]] : i64)
 // CHECK:   ^[[bb2]]:
 // CHECK:     %[[y:.+]] = "test.val2"
-// CHECK:     br ^[[bb3]](%[[y:.+]] : i64)
+// CHECK:     cf.br ^[[bb3]](%[[y:.+]] : i64)
 // CHECK:   ^[[bb3]](%[[z:.+]]: i64):
 // CHECK:     "test.bar"(%[[z]])
 // CHECK:     return

diff  --git a/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir b/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir
index eff6c149bea07..4ac169e861292 100644
--- a/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir
+++ b/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir
@@ -6,7 +6,7 @@
 // CHECK-SAME:                             %[[RHS:.*]]: tensor<?xindex>) -> !shape.witness {
 // CHECK:           %[[RET:.*]] = shape.const_witness true
 // CHECK:           %[[BROADCAST_IS_VALID:.*]] = shape.is_broadcastable %[[LHS]], %[[RHS]]
-// CHECK:           assert %[[BROADCAST_IS_VALID]], "required broadcastable shapes"
+// CHECK:           cf.assert %[[BROADCAST_IS_VALID]], "required broadcastable shapes"
 // CHECK:           return %[[RET]] : !shape.witness
 // CHECK:         }
 func @cstr_broadcastable(%arg0: tensor<?xindex>, %arg1: tensor<?xindex>) -> !shape.witness {
@@ -19,7 +19,7 @@ func @cstr_broadcastable(%arg0: tensor<?xindex>, %arg1: tensor<?xindex>) -> !sha
 // CHECK-SAME:                             %[[RHS:.*]]: tensor<?xindex>) -> !shape.witness {
 // CHECK:           %[[RET:.*]] = shape.const_witness true
 // CHECK:           %[[EQUAL_IS_VALID:.*]] = shape.shape_eq %[[LHS]], %[[RHS]]
-// CHECK:           assert %[[EQUAL_IS_VALID]], "required equal shapes"
+// CHECK:           cf.assert %[[EQUAL_IS_VALID]], "required equal shapes"
 // CHECK:           return %[[RET]] : !shape.witness
 // CHECK:         }
 func @cstr_eq(%arg0: tensor<?xindex>, %arg1: tensor<?xindex>) -> !shape.witness {
@@ -30,7 +30,7 @@ func @cstr_eq(%arg0: tensor<?xindex>, %arg1: tensor<?xindex>) -> !shape.witness
 // CHECK-LABEL: func @cstr_require
 func @cstr_require(%arg0: i1) -> !shape.witness {
   // CHECK: %[[RET:.*]] = shape.const_witness true
-  // CHECK: assert %arg0, "msg"
+  // CHECK: cf.assert %arg0, "msg"
   // CHECK: return %[[RET]]
   %witness = shape.cstr_require %arg0, "msg"
   return %witness : !shape.witness

diff  --git a/mlir/test/Conversion/StandardToLLVM/convert-funcs.mlir b/mlir/test/Conversion/StandardToLLVM/convert-funcs.mlir
index 51f4b9aa71fe0..6e871bf83a868 100644
--- a/mlir/test/Conversion/StandardToLLVM/convert-funcs.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/convert-funcs.mlir
@@ -29,7 +29,7 @@ func private @memref_call_conv_nested(%arg0: (memref<?xf32>) -> ())
 //CHECK-LABEL: llvm.func @pass_through(%arg0: !llvm.ptr<func<void ()>>) -> !llvm.ptr<func<void ()>> {
 func @pass_through(%arg0: () -> ()) -> (() -> ()) {
 // CHECK-NEXT:  llvm.br ^bb1(%arg0 : !llvm.ptr<func<void ()>>)
-  br ^bb1(%arg0 : () -> ())
+  cf.br ^bb1(%arg0 : () -> ())
 
 //CHECK-NEXT: ^bb1(%0: !llvm.ptr<func<void ()>>):
 ^bb1(%bbarg: () -> ()):

diff  --git a/mlir/test/Conversion/StandardToLLVM/func-memref.mlir b/mlir/test/Conversion/StandardToLLVM/func-memref.mlir
index c01a9df2191f8..463f31d9cff5f 100644
--- a/mlir/test/Conversion/StandardToLLVM/func-memref.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/func-memref.mlir
@@ -109,17 +109,17 @@ func @loop_carried(%arg0 : index, %arg1 : index, %arg2 : index, %base0 : !base_t
   // This test checks that in the BAREPTR case, the branch arguments only forward the descriptor.
   // This test was lowered from a simple scf.for that swaps 2 memref iter_args.
   //      BAREPTR: llvm.br ^bb1(%{{.*}}, %{{.*}}, %{{.*}} : i64, !llvm.struct<(ptr<i32, 201>, ptr<i32, 201>, i64, array<1 x i64>, array<1 x i64>)>, !llvm.struct<(ptr<i32, 201>, ptr<i32, 201>, i64, array<1 x i64>, array<1 x i64>)>)
-  br ^bb1(%arg0, %base0, %base1 : index, memref<64xi32, 201>, memref<64xi32, 201>)
+  cf.br ^bb1(%arg0, %base0, %base1 : index, memref<64xi32, 201>, memref<64xi32, 201>)
 
   // BAREPTR-NEXT: ^bb1
   // BAREPTR-NEXT:   llvm.icmp
   // BAREPTR-NEXT:   llvm.cond_br %{{.*}}, ^bb2, ^bb3
   ^bb1(%0: index, %1: memref<64xi32, 201>, %2: memref<64xi32, 201>):  // 2 preds: ^bb0, ^bb2
     %3 = arith.cmpi slt, %0, %arg1 : index
-    cond_br %3, ^bb2, ^bb3
+    cf.cond_br %3, ^bb2, ^bb3
   ^bb2:  // pred: ^bb1
     %4 = arith.addi %0, %arg2 : index
-    br ^bb1(%4, %2, %1 : index, memref<64xi32, 201>, memref<64xi32, 201>)
+    cf.br ^bb1(%4, %2, %1 : index, memref<64xi32, 201>, memref<64xi32, 201>)
   ^bb3:  // pred: ^bb1
     return %1, %2 : memref<64xi32, 201>, memref<64xi32, 201>
 }

diff  --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
index e292d8dae100d..479ccaad2da6c 100644
--- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
@@ -18,7 +18,7 @@ func @simple_loop() {
 ^bb0:
 // CHECK-NEXT:  llvm.br ^bb1
 // CHECK32-NEXT:  llvm.br ^bb1
-  br ^bb1
+  cf.br ^bb1
 
 // CHECK-NEXT: ^bb1:	// pred: ^bb0
 // CHECK-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i64
@@ -31,7 +31,7 @@ func @simple_loop() {
 ^bb1:	// pred: ^bb0
   %c1 = arith.constant 1 : index
   %c42 = arith.constant 42 : index
-  br ^bb2(%c1 : index)
+  cf.br ^bb2(%c1 : index)
 
 // CHECK:      ^bb2({{.*}}: i64):	// 2 preds: ^bb1, ^bb3
 // CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
@@ -41,7 +41,7 @@ func @simple_loop() {
 // CHECK32-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb4
 ^bb2(%0: index):	// 2 preds: ^bb1, ^bb3
   %1 = arith.cmpi slt, %0, %c42 : index
-  cond_br %1, ^bb3, ^bb4
+  cf.cond_br %1, ^bb3, ^bb4
 
 // CHECK:      ^bb3:	// pred: ^bb2
 // CHECK-NEXT:  llvm.call @body({{.*}}) : (i64) -> ()
@@ -57,7 +57,7 @@ func @simple_loop() {
   call @body(%0) : (index) -> ()
   %c1_0 = arith.constant 1 : index
   %2 = arith.addi %0, %c1_0 : index
-  br ^bb2(%2 : index)
+  cf.br ^bb2(%2 : index)
 
 // CHECK:      ^bb4:	// pred: ^bb2
 // CHECK-NEXT:  llvm.return
@@ -111,7 +111,7 @@ func private @other(index, i32) -> i32
 func @func_args(i32, i32) -> i32 {
 ^bb0(%arg0: i32, %arg1: i32):
   %c0_i32 = arith.constant 0 : i32
-  br ^bb1
+  cf.br ^bb1
 
 // CHECK-NEXT: ^bb1:	// pred: ^bb0
 // CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i64
@@ -124,7 +124,7 @@ func @func_args(i32, i32) -> i32 {
 ^bb1:	// pred: ^bb0
   %c0 = arith.constant 0 : index
   %c42 = arith.constant 42 : index
-  br ^bb2(%c0 : index)
+  cf.br ^bb2(%c0 : index)
 
 // CHECK-NEXT: ^bb2({{.*}}: i64):	// 2 preds: ^bb1, ^bb3
 // CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
@@ -134,7 +134,7 @@ func @func_args(i32, i32) -> i32 {
 // CHECK32-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb4
 ^bb2(%0: index):	// 2 preds: ^bb1, ^bb3
   %1 = arith.cmpi slt, %0, %c42 : index
-  cond_br %1, ^bb3, ^bb4
+  cf.cond_br %1, ^bb3, ^bb4
 
 // CHECK-NEXT: ^bb3:	// pred: ^bb2
 // CHECK-NEXT:  {{.*}} = llvm.call @body_args({{.*}}) : (i64) -> i64
@@ -159,7 +159,7 @@ func @func_args(i32, i32) -> i32 {
   %5 = call @other(%2, %arg1) : (index, i32) -> i32
   %c1 = arith.constant 1 : index
   %6 = arith.addi %0, %c1 : index
-  br ^bb2(%6 : index)
+  cf.br ^bb2(%6 : index)
 
 // CHECK-NEXT: ^bb4:	// pred: ^bb2
 // CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i64
@@ -191,7 +191,7 @@ func private @post(index)
 // CHECK-NEXT:  llvm.br ^bb1
 func @imperfectly_nested_loops() {
 ^bb0:
-  br ^bb1
+  cf.br ^bb1
 
 // CHECK-NEXT: ^bb1:	// pred: ^bb0
 // CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i64
@@ -200,21 +200,21 @@ func @imperfectly_nested_loops() {
 ^bb1:	// pred: ^bb0
   %c0 = arith.constant 0 : index
   %c42 = arith.constant 42 : index
-  br ^bb2(%c0 : index)
+  cf.br ^bb2(%c0 : index)
 
 // CHECK-NEXT: ^bb2({{.*}}: i64):	// 2 preds: ^bb1, ^bb7
 // CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
 // CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb8
 ^bb2(%0: index):	// 2 preds: ^bb1, ^bb7
   %1 = arith.cmpi slt, %0, %c42 : index
-  cond_br %1, ^bb3, ^bb8
+  cf.cond_br %1, ^bb3, ^bb8
 
 // CHECK-NEXT: ^bb3:
 // CHECK-NEXT:  llvm.call @pre({{.*}}) : (i64) -> ()
 // CHECK-NEXT:  llvm.br ^bb4
 ^bb3:	// pred: ^bb2
   call @pre(%0) : (index) -> ()
-  br ^bb4
+  cf.br ^bb4
 
 // CHECK-NEXT: ^bb4:	// pred: ^bb3
 // CHECK-NEXT:  {{.*}} = llvm.mlir.constant(7 : index) : i64
@@ -223,14 +223,14 @@ func @imperfectly_nested_loops() {
 ^bb4:	// pred: ^bb3
   %c7 = arith.constant 7 : index
   %c56 = arith.constant 56 : index
-  br ^bb5(%c7 : index)
+  cf.br ^bb5(%c7 : index)
 
 // CHECK-NEXT: ^bb5({{.*}}: i64):	// 2 preds: ^bb4, ^bb6
 // CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
 // CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb6, ^bb7
 ^bb5(%2: index):	// 2 preds: ^bb4, ^bb6
   %3 = arith.cmpi slt, %2, %c56 : index
-  cond_br %3, ^bb6, ^bb7
+  cf.cond_br %3, ^bb6, ^bb7
 
 // CHECK-NEXT: ^bb6:	// pred: ^bb5
 // CHECK-NEXT:  llvm.call @body2({{.*}}, {{.*}}) : (i64, i64) -> ()
@@ -241,7 +241,7 @@ func @imperfectly_nested_loops() {
   call @body2(%0, %2) : (index, index) -> ()
   %c2 = arith.constant 2 : index
   %4 = arith.addi %2, %c2 : index
-  br ^bb5(%4 : index)
+  cf.br ^bb5(%4 : index)
 
 // CHECK-NEXT: ^bb7:	// pred: ^bb5
 // CHECK-NEXT:  llvm.call @post({{.*}}) : (i64) -> ()
@@ -252,7 +252,7 @@ func @imperfectly_nested_loops() {
   call @post(%0) : (index) -> ()
   %c1 = arith.constant 1 : index
   %5 = arith.addi %0, %c1 : index
-  br ^bb2(%5 : index)
+  cf.br ^bb2(%5 : index)
 
 // CHECK-NEXT: ^bb8:	// pred: ^bb2
 // CHECK-NEXT:  llvm.return
@@ -316,49 +316,49 @@ func private @body3(index, index)
 // CHECK-NEXT: }
 func @more_imperfectly_nested_loops() {
 ^bb0:
-  br ^bb1
+  cf.br ^bb1
 ^bb1:	// pred: ^bb0
   %c0 = arith.constant 0 : index
   %c42 = arith.constant 42 : index
-  br ^bb2(%c0 : index)
+  cf.br ^bb2(%c0 : index)
 ^bb2(%0: index):	// 2 preds: ^bb1, ^bb11
   %1 = arith.cmpi slt, %0, %c42 : index
-  cond_br %1, ^bb3, ^bb12
+  cf.cond_br %1, ^bb3, ^bb12
 ^bb3:	// pred: ^bb2
   call @pre(%0) : (index) -> ()
-  br ^bb4
+  cf.br ^bb4
 ^bb4:	// pred: ^bb3
   %c7 = arith.constant 7 : index
   %c56 = arith.constant 56 : index
-  br ^bb5(%c7 : index)
+  cf.br ^bb5(%c7 : index)
 ^bb5(%2: index):	// 2 preds: ^bb4, ^bb6
   %3 = arith.cmpi slt, %2, %c56 : index
-  cond_br %3, ^bb6, ^bb7
+  cf.cond_br %3, ^bb6, ^bb7
 ^bb6:	// pred: ^bb5
   call @body2(%0, %2) : (index, index) -> ()
   %c2 = arith.constant 2 : index
   %4 = arith.addi %2, %c2 : index
-  br ^bb5(%4 : index)
+  cf.br ^bb5(%4 : index)
 ^bb7:	// pred: ^bb5
   call @mid(%0) : (index) -> ()
-  br ^bb8
+  cf.br ^bb8
 ^bb8:	// pred: ^bb7
   %c18 = arith.constant 18 : index
   %c37 = arith.constant 37 : index
-  br ^bb9(%c18 : index)
+  cf.br ^bb9(%c18 : index)
 ^bb9(%5: index):	// 2 preds: ^bb8, ^bb10
   %6 = arith.cmpi slt, %5, %c37 : index
-  cond_br %6, ^bb10, ^bb11
+  cf.cond_br %6, ^bb10, ^bb11
 ^bb10:	// pred: ^bb9
   call @body3(%0, %5) : (index, index) -> ()
   %c3 = arith.constant 3 : index
   %7 = arith.addi %5, %c3 : index
-  br ^bb9(%7 : index)
+  cf.br ^bb9(%7 : index)
 ^bb11:	// pred: ^bb9
   call @post(%0) : (index) -> ()
   %c1 = arith.constant 1 : index
   %8 = arith.addi %0, %c1 : index
-  br ^bb2(%8 : index)
+  cf.br ^bb2(%8 : index)
 ^bb12:	// pred: ^bb2
   return
 }
@@ -432,7 +432,7 @@ func @dfs_block_order(%arg0: i32) -> (i32) {
 // CHECK-NEXT:  %[[CST:.*]] = llvm.mlir.constant(42 : i32) : i32
   %0 = arith.constant 42 : i32
 // CHECK-NEXT:  llvm.br ^bb2
-  br ^bb2
+  cf.br ^bb2
 
 // CHECK-NEXT: ^bb1:
 // CHECK-NEXT:  %[[ADD:.*]] = llvm.add %arg0, %[[CST]] : i32
@@ -444,7 +444,7 @@ func @dfs_block_order(%arg0: i32) -> (i32) {
 // CHECK-NEXT: ^bb2:
 ^bb2:
 // CHECK-NEXT:  llvm.br ^bb1
-  br ^bb1
+  cf.br ^bb1
 }
 
 // -----
@@ -469,7 +469,7 @@ func @floorf(%arg0 : f32) {
 
 // -----
 
-// Lowers `assert` to a function call to `abort` if the assertion is violated.
+// Lowers `cf.assert` to a function call to `abort` if the assertion is violated.
 // CHECK: llvm.func @abort()
 // CHECK-LABEL: @assert_test_function
 // CHECK-SAME:  (%[[ARG:.*]]: i1)
@@ -480,7 +480,7 @@ func @assert_test_function(%arg : i1) {
   // CHECK: ^[[FAILURE_BLOCK]]:
   // CHECK: llvm.call @abort() : () -> ()
   // CHECK: llvm.unreachable
-  assert %arg, "Computer says no"
+  cf.assert %arg, "Computer says no"
   return
 }
 
@@ -514,8 +514,8 @@ func @fmaf(%arg0: f32, %arg1: vector<4xf32>) {
 
 // CHECK-LABEL: func @switchi8(
 func @switchi8(%arg0 : i8) -> i32 {
-switch %arg0 : i8, [
-  default: ^bb1,
+  cf.switch %arg0 : i8, [
+    default: ^bb1,
     42: ^bb1,
     43: ^bb3
   ]

diff  --git a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
index 8926d7c9838dd..4fc94af8f6a93 100644
--- a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
@@ -900,45 +900,3 @@ func @tensor_extract_constant(%a : index, %b: index, %c: index) -> i32 {
   // CHECK: spv.ReturnValue %[[VAL]]
   return %extract : i32
 }
-
-// -----
-
-//===----------------------------------------------------------------------===//
-// std.br, std.cond_br
-//===----------------------------------------------------------------------===//
-
-module attributes {
-  spv.target_env = #spv.target_env<#spv.vce<v1.0, [], []>, {}>
-} {
-
-// CHECK-LABEL: func @simple_loop
-func @simple_loop(index, index, index) {
-^bb0(%begin : index, %end : index, %step : index):
-// CHECK-NEXT:  spv.Branch ^bb1
-  br ^bb1
-
-// CHECK-NEXT: ^bb1:    // pred: ^bb0
-// CHECK-NEXT:  spv.Branch ^bb2({{.*}} : i32)
-^bb1:   // pred: ^bb0
-  br ^bb2(%begin : index)
-
-// CHECK:      ^bb2({{.*}}: i32):       // 2 preds: ^bb1, ^bb3
-// CHECK-NEXT:  {{.*}} = spv.SLessThan {{.*}}, {{.*}} : i32
-// CHECK-NEXT:  spv.BranchConditional {{.*}}, ^bb3, ^bb4
-^bb2(%0: index):        // 2 preds: ^bb1, ^bb3
-  %1 = arith.cmpi slt, %0, %end : index
-  cond_br %1, ^bb3, ^bb4
-
-// CHECK:      ^bb3:    // pred: ^bb2
-// CHECK-NEXT:  {{.*}} = spv.IAdd {{.*}}, {{.*}} : i32
-// CHECK-NEXT:  spv.Branch ^bb2({{.*}} : i32)
-^bb3:   // pred: ^bb2
-  %2 = arith.addi %0, %step : index
-  br ^bb2(%2 : index)
-
-// CHECK:      ^bb4:    // pred: ^bb2
-^bb4:   // pred: ^bb2
-  return
-}
-
-}

diff  --git a/mlir/test/Dialect/Affine/invalid.mlir b/mlir/test/Dialect/Affine/invalid.mlir
index a3a709231cd35..e1ff020b00d79 100644
--- a/mlir/test/Dialect/Affine/invalid.mlir
+++ b/mlir/test/Dialect/Affine/invalid.mlir
@@ -56,9 +56,9 @@ func @affine_load_invalid_dim(%M : memref<10xi32>) {
   ^bb0(%arg: index):
     affine.load %M[%arg] : memref<10xi32>
     // expected-error at -1 {{index must be a dimension or symbol identifier}}
-    br ^bb1
+    cf.br ^bb1
   ^bb1:
-    br ^bb1
+    cf.br ^bb1
   }) : () -> ()
   return
 }

diff  --git a/mlir/test/Dialect/Async/async-runtime-ref-counting.mlir b/mlir/test/Dialect/Async/async-runtime-ref-counting.mlir
index ba762dbcf3732..b49389cf61def 100644
--- a/mlir/test/Dialect/Async/async-runtime-ref-counting.mlir
+++ b/mlir/test/Dialect/Async/async-runtime-ref-counting.mlir
@@ -54,13 +54,13 @@ func @token_value_to_func() {
 // CHECK-LABEL: @token_arg_cond_br_await_with_fallthough
 // CHECK: %[[TOKEN:.*]]: !async.token
 func @token_arg_cond_br_await_with_fallthough(%arg0: !async.token, %arg1: i1) {
-  // CHECK: cond_br
+  // CHECK: cf.cond_br
   // CHECK-SAME: ^[[BB1:.*]], ^[[BB2:.*]]
-  cond_br %arg1, ^bb1, ^bb2
+  cf.cond_br %arg1, ^bb1, ^bb2
 ^bb1:
   // CHECK: ^[[BB1]]:
-  // CHECK:   br ^[[BB2]]
-  br ^bb2
+  // CHECK:   cf.br ^[[BB2]]
+  cf.br ^bb2
 ^bb2:
   // CHECK: ^[[BB2]]:
   // CHECK:   async.runtime.await %[[TOKEN]]
@@ -88,10 +88,10 @@ func @token_coro_return() -> !async.token {
   async.runtime.resume %hdl
   async.coro.suspend %saved, ^suspend, ^resume, ^cleanup
 ^resume:
-  br ^cleanup
+  cf.br ^cleanup
 ^cleanup:
   async.coro.free %id, %hdl
-  br ^suspend
+  cf.br ^suspend
 ^suspend:
   async.coro.end %hdl
   return %token : !async.token
@@ -109,10 +109,10 @@ func @token_coro_await_and_resume(%arg0: !async.token) -> !async.token {
   // CHECK-NEXT: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i64}
   async.coro.suspend %saved, ^suspend, ^resume, ^cleanup
 ^resume:
-  br ^cleanup
+  cf.br ^cleanup
 ^cleanup:
   async.coro.free %id, %hdl
-  br ^suspend
+  cf.br ^suspend
 ^suspend:
   async.coro.end %hdl
   return %token : !async.token
@@ -137,10 +137,10 @@ func @value_coro_await_and_resume(%arg0: !async.value<f32>) -> !async.token {
   %0 = async.runtime.load %arg0 : !async.value<f32>
   // CHECK:  arith.addf %[[LOADED]], %[[LOADED]]
   %1 = arith.addf %0, %0 : f32
-  br ^cleanup
+  cf.br ^cleanup
 ^cleanup:
   async.coro.free %id, %hdl
-  br ^suspend
+  cf.br ^suspend
 ^suspend:
   async.coro.end %hdl
   return %token : !async.token
@@ -167,12 +167,12 @@ func private @outlined_async_execute(%arg0: !async.token) -> !async.token {
   // CHECK: ^[[RESUME_1:.*]]:
   // CHECK:   async.runtime.set_available
   async.runtime.set_available %0 : !async.token
-  br ^cleanup
+  cf.br ^cleanup
 ^cleanup:
   // CHECK: ^[[CLEANUP:.*]]:
   // CHECK:   async.coro.free
   async.coro.free %1, %2
-  br ^suspend
+  cf.br ^suspend
 ^suspend:
   // CHECK: ^[[SUSPEND:.*]]:
   // CHECK:   async.coro.end
@@ -198,7 +198,7 @@ func @token_await_inside_nested_region(%arg0: i1) {
 
 // CHECK-LABEL: @token_defined_in_the_loop
 func @token_defined_in_the_loop() {
-  br ^bb1
+  cf.br ^bb1
 ^bb1:
   // CHECK: ^[[BB1:.*]]:
   // CHECK:   %[[TOKEN:.*]] = call @token()
@@ -207,7 +207,7 @@ func @token_defined_in_the_loop() {
   // CHECK:   async.runtime.drop_ref %[[TOKEN]] {count = 1 : i64}
   async.runtime.await %token : !async.token
   %0 = call @cond(): () -> (i1)
-  cond_br %0, ^bb1, ^bb2
+  cf.cond_br %0, ^bb1, ^bb2
 ^bb2:
   // CHECK: ^[[BB2:.*]]:
   // CHECK:   return
@@ -218,18 +218,18 @@ func @token_defined_in_the_loop() {
 func @divergent_liveness_one_token(%arg0 : i1) {
   // CHECK: %[[TOKEN:.*]] = call @token()
   %token = call @token() : () -> !async.token
-  // CHECK: cond_br %arg0, ^[[LIVE_IN:.*]], ^[[REF_COUNTING:.*]]
-  cond_br %arg0, ^bb1, ^bb2
+  // CHECK: cf.cond_br %arg0, ^[[LIVE_IN:.*]], ^[[REF_COUNTING:.*]]
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
   // CHECK: ^[[LIVE_IN]]:
   // CHECK:   async.runtime.await %[[TOKEN]]
   // CHECK:   async.runtime.drop_ref %[[TOKEN]] {count = 1 : i64}
-  // CHECK:   br ^[[RETURN:.*]]
+  // CHECK:   cf.br ^[[RETURN:.*]]
   async.runtime.await %token : !async.token
-  br ^bb2
+  cf.br ^bb2
   // CHECK: ^[[REF_COUNTING:.*]]:
   // CHECK:   async.runtime.drop_ref %[[TOKEN]] {count = 1 : i64}
-  // CHECK:   br ^[[RETURN:.*]]
+  // CHECK:   cf.br ^[[RETURN:.*]]
 ^bb2:
   // CHECK: ^[[RETURN]]:
   // CHECK:   return
@@ -240,20 +240,20 @@ func @divergent_liveness_one_token(%arg0 : i1) {
 func @divergent_liveness_unique_predecessor(%arg0 : i1) {
   // CHECK: %[[TOKEN:.*]] = call @token()
   %token = call @token() : () -> !async.token
-  // CHECK: cond_br %arg0, ^[[LIVE_IN:.*]], ^[[NO_LIVE_IN:.*]]
-  cond_br %arg0, ^bb2, ^bb1
+  // CHECK: cf.cond_br %arg0, ^[[LIVE_IN:.*]], ^[[NO_LIVE_IN:.*]]
+  cf.cond_br %arg0, ^bb2, ^bb1
 ^bb1:
   // CHECK: ^[[NO_LIVE_IN]]:
   // CHECK:   async.runtime.drop_ref %[[TOKEN]] {count = 1 : i64}
-  // CHECK:   br ^[[RETURN:.*]]
-  br ^bb3
+  // CHECK:   cf.br ^[[RETURN:.*]]
+  cf.br ^bb3
 ^bb2:
   // CHECK: ^[[LIVE_IN]]:
   // CHECK:   async.runtime.await %[[TOKEN]]
   // CHECK:   async.runtime.drop_ref %[[TOKEN]] {count = 1 : i64}
-  // CHECK:   br ^[[RETURN]]
+  // CHECK:   cf.br ^[[RETURN]]
   async.runtime.await %token : !async.token
-  br ^bb3
+  cf.br ^bb3
 ^bb3:
   // CHECK: ^[[RETURN]]:
   // CHECK:  return
@@ -266,24 +266,24 @@ func @divergent_liveness_two_tokens(%arg0 : i1) {
   // CHECK: %[[TOKEN1:.*]] = call @token()
   %token0 = call @token() : () -> !async.token
   %token1 = call @token() : () -> !async.token
-  // CHECK: cond_br %arg0, ^[[AWAIT0:.*]], ^[[AWAIT1:.*]]
-  cond_br %arg0, ^await0, ^await1
+  // CHECK: cf.cond_br %arg0, ^[[AWAIT0:.*]], ^[[AWAIT1:.*]]
+  cf.cond_br %arg0, ^await0, ^await1
 ^await0:
   // CHECK: ^[[AWAIT0]]:
   // CHECK:   async.runtime.drop_ref %[[TOKEN1]] {count = 1 : i64}
   // CHECK:   async.runtime.await %[[TOKEN0]]
   // CHECK:   async.runtime.drop_ref %[[TOKEN0]] {count = 1 : i64}
-  // CHECK:   br ^[[RETURN:.*]]
+  // CHECK:   cf.br ^[[RETURN:.*]]
   async.runtime.await %token0 : !async.token
-  br ^ret
+  cf.br ^ret
 ^await1:
   // CHECK: ^[[AWAIT1]]:
   // CHECK:   async.runtime.drop_ref %[[TOKEN0]] {count = 1 : i64}
   // CHECK:   async.runtime.await %[[TOKEN1]]
   // CHECK:   async.runtime.drop_ref %[[TOKEN1]] {count = 1 : i64}
-  // CHECK:   br ^[[RETURN]]
+  // CHECK:   cf.br ^[[RETURN]]
   async.runtime.await %token1 : !async.token
-  br ^ret
+  cf.br ^ret
 ^ret:
   // CHECK: ^[[RETURN]]:
   // CHECK:   return

diff  --git a/mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir b/mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir
index c570c584cbe67..49f74c7fa1159 100644
--- a/mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir
+++ b/mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir
@@ -10,7 +10,7 @@ func @simple_callee(%arg0: f32) -> (f32 {builtin.foo = "bar"}) {
 // CHECK: %[[RETURNED_STORAGE:.*]] = async.runtime.create : !async.value<f32>
 // CHECK: %[[ID:.*]] = async.coro.id
 // CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]]
-// CHECK: br ^[[ORIGINAL_ENTRY:.*]]
+// CHECK: cf.br ^[[ORIGINAL_ENTRY:.*]]
 // CHECK  ^[[ORIGINAL_ENTRY]]:
 // CHECK:   %[[VAL:.*]] = arith.addf %[[ARG]], %[[ARG]] : f32
   %0 = arith.addf %arg0, %arg0 : f32
@@ -29,7 +29,7 @@ func @simple_callee(%arg0: f32) -> (f32 {builtin.foo = "bar"}) {
 
 // CHECK: ^[[RESUME]]:
 // CHECK:   %[[IS_ERROR:.*]] = async.runtime.is_error %[[VAL_STORAGE]] : !async.value<f32>
-// CHECK:   cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]]
+// CHECK:   cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]]
 
 // CHECK: ^[[BRANCH_OK]]:
 // CHECK:   %[[LOADED:.*]] = async.runtime.load %[[VAL_STORAGE]] : <f32>
@@ -37,19 +37,19 @@ func @simple_callee(%arg0: f32) -> (f32 {builtin.foo = "bar"}) {
 // CHECK:   async.runtime.store %[[RETURNED]], %[[RETURNED_STORAGE]] : <f32>
 // CHECK:   async.runtime.set_available %[[RETURNED_STORAGE]]
 // CHECK:   async.runtime.set_available %[[TOKEN]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
   %3 = arith.mulf %arg0, %2 : f32
   return %3: f32
 
 // CHECK: ^[[BRANCH_ERROR]]:
 // CHECK:   async.runtime.set_error %[[TOKEN]]
 // CHECK:   async.runtime.set_error %[[RETURNED_STORAGE]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
 
 
 // CHECK: ^[[CLEANUP]]:
 // CHECK:   async.coro.free %[[ID]], %[[HDL]]
-// CHECK:   br ^[[SUSPEND]]
+// CHECK:   cf.br ^[[SUSPEND]]
 
 // CHECK: ^[[SUSPEND]]:
 // CHECK:   async.coro.end %[[HDL]]
@@ -63,7 +63,7 @@ func @simple_caller() -> f32 {
 // CHECK: %[[RETURNED_STORAGE:.*]] = async.runtime.create : !async.value<f32>
 // CHECK: %[[ID:.*]] = async.coro.id
 // CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]]
-// CHECK: br ^[[ORIGINAL_ENTRY:.*]]
+// CHECK: cf.br ^[[ORIGINAL_ENTRY:.*]]
 // CHECK  ^[[ORIGINAL_ENTRY]]:
 
 // CHECK:   %[[CONSTANT:.*]] = arith.constant
@@ -77,28 +77,28 @@ func @simple_caller() -> f32 {
 
 // CHECK: ^[[RESUME]]:
 // CHECK:   %[[IS_TOKEN_ERROR:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER]]#0 : !async.token
-// CHECK:   cond_br %[[IS_TOKEN_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK:.*]]
+// CHECK:   cf.cond_br %[[IS_TOKEN_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK:.*]]
 
 // CHECK: ^[[BRANCH_TOKEN_OK]]:
 // CHECK:   %[[IS_VALUE_ERROR:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER]]#1 : !async.value<f32>
-// CHECK:   cond_br %[[IS_VALUE_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK:.*]]
+// CHECK:   cf.cond_br %[[IS_VALUE_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK:.*]]
 
 // CHECK: ^[[BRANCH_VALUE_OK]]:
 // CHECK:   %[[LOADED:.*]] = async.runtime.load %[[RETURNED_TO_CALLER]]#1 : <f32>
 // CHECK:   async.runtime.store %[[LOADED]], %[[RETURNED_STORAGE]] : <f32>
 // CHECK:   async.runtime.set_available %[[RETURNED_STORAGE]]
 // CHECK:   async.runtime.set_available %[[TOKEN]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
   return %r: f32
 // CHECK: ^[[BRANCH_ERROR]]:
 // CHECK:   async.runtime.set_error %[[TOKEN]]
 // CHECK:   async.runtime.set_error %[[RETURNED_STORAGE]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
 
 
 // CHECK: ^[[CLEANUP]]:
 // CHECK:   async.coro.free %[[ID]], %[[HDL]]
-// CHECK:   br ^[[SUSPEND]]
+// CHECK:   cf.br ^[[SUSPEND]]
 
 // CHECK: ^[[SUSPEND]]:
 // CHECK:   async.coro.end %[[HDL]]
@@ -112,7 +112,7 @@ func @double_caller() -> f32 {
 // CHECK: %[[RETURNED_STORAGE:.*]] = async.runtime.create : !async.value<f32>
 // CHECK: %[[ID:.*]] = async.coro.id
 // CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]]
-// CHECK: br ^[[ORIGINAL_ENTRY:.*]]
+// CHECK: cf.br ^[[ORIGINAL_ENTRY:.*]]
 // CHECK  ^[[ORIGINAL_ENTRY]]:
 
 // CHECK:   %[[CONSTANT:.*]] = arith.constant
@@ -126,11 +126,11 @@ func @double_caller() -> f32 {
 
 // CHECK: ^[[RESUME_1]]:
 // CHECK:   %[[IS_TOKEN_ERROR_1:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_1]]#0 : !async.token
-// CHECK:   cond_br %[[IS_TOKEN_ERROR_1]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK_1:.*]]
+// CHECK:   cf.cond_br %[[IS_TOKEN_ERROR_1]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK_1:.*]]
 
 // CHECK: ^[[BRANCH_TOKEN_OK_1]]:
 // CHECK:   %[[IS_VALUE_ERROR_1:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_1]]#1 : !async.value<f32>
-// CHECK:   cond_br %[[IS_VALUE_ERROR_1]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK_1:.*]]
+// CHECK:   cf.cond_br %[[IS_VALUE_ERROR_1]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK_1:.*]]
 
 // CHECK: ^[[BRANCH_VALUE_OK_1]]:
 // CHECK:   %[[LOADED_1:.*]] = async.runtime.load %[[RETURNED_TO_CALLER_1]]#1 : <f32>
@@ -143,27 +143,27 @@ func @double_caller() -> f32 {
 
 // CHECK: ^[[RESUME_2]]:
 // CHECK:   %[[IS_TOKEN_ERROR_2:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_2]]#0 : !async.token
-// CHECK:   cond_br %[[IS_TOKEN_ERROR_2]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK_2:.*]]
+// CHECK:   cf.cond_br %[[IS_TOKEN_ERROR_2]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK_2:.*]]
 
 // CHECK: ^[[BRANCH_TOKEN_OK_2]]:
 // CHECK:   %[[IS_VALUE_ERROR_2:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_2]]#1 : !async.value<f32>
-// CHECK:   cond_br %[[IS_VALUE_ERROR_2]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK_2:.*]]
+// CHECK:   cf.cond_br %[[IS_VALUE_ERROR_2]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK_2:.*]]
 
 // CHECK: ^[[BRANCH_VALUE_OK_2]]:
 // CHECK:   %[[LOADED_2:.*]] = async.runtime.load %[[RETURNED_TO_CALLER_2]]#1 : <f32>
 // CHECK:   async.runtime.store %[[LOADED_2]], %[[RETURNED_STORAGE]] : <f32>
 // CHECK:   async.runtime.set_available %[[RETURNED_STORAGE]]
 // CHECK:   async.runtime.set_available %[[TOKEN]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
   return %s: f32
 // CHECK: ^[[BRANCH_ERROR]]:
 // CHECK:   async.runtime.set_error %[[TOKEN]]
 // CHECK:   async.runtime.set_error %[[RETURNED_STORAGE]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
 
 // CHECK: ^[[CLEANUP]]:
 // CHECK:   async.coro.free %[[ID]], %[[HDL]]
-// CHECK:   br ^[[SUSPEND]]
+// CHECK:   cf.br ^[[SUSPEND]]
 
 // CHECK: ^[[SUSPEND]]:
 // CHECK:   async.coro.end %[[HDL]]
@@ -184,7 +184,7 @@ func @recursive(%arg: !async.token) {
   async.await %arg : !async.token
 // CHECK: ^[[RESUME_1]]:
 // CHECK:   %[[IS_ERROR:.*]] = async.runtime.is_error %[[ARG]] : !async.token
-// CHECK:   cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]]
+// CHECK:   cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]]
 
 // CHECK: ^[[BRANCH_OK]]:
 // CHECK:   %[[GIVEN:.*]] = async.runtime.create : !async.token
@@ -200,16 +200,16 @@ call @recursive(%r): (!async.token) -> ()
 
 // CHECK: ^[[RESUME_2]]:
 // CHECK:   async.runtime.set_available %[[TOKEN]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
 
 // CHECK: ^[[BRANCH_ERROR]]:
 // CHECK:   async.runtime.set_error %[[TOKEN]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
 return
 
 // CHECK: ^[[CLEANUP]]:
 // CHECK:   async.coro.free %[[ID]], %[[HDL]]
-// CHECK:   br ^[[SUSPEND]]
+// CHECK:   cf.br ^[[SUSPEND]]
 
 // CHECK: ^[[SUSPEND]]:
 // CHECK:   async.coro.end %[[HDL]]
@@ -230,7 +230,7 @@ func @corecursive1(%arg: !async.token) {
   async.await %arg : !async.token
 // CHECK: ^[[RESUME_1]]:
 // CHECK:   %[[IS_ERROR:.*]] = async.runtime.is_error %[[ARG]] : !async.token
-// CHECK:   cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]]
+// CHECK:   cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]]
 
 // CHECK: ^[[BRANCH_OK]]:
 // CHECK:   %[[GIVEN:.*]] = async.runtime.create : !async.token
@@ -246,16 +246,16 @@ call @corecursive2(%r): (!async.token) -> ()
 
 // CHECK: ^[[RESUME_2]]:
 // CHECK:   async.runtime.set_available %[[TOKEN]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
 
 // CHECK: ^[[BRANCH_ERROR]]:
 // CHECK:   async.runtime.set_error %[[TOKEN]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
 return
 
 // CHECK: ^[[CLEANUP]]:
 // CHECK:   async.coro.free %[[ID]], %[[HDL]]
-// CHECK:   br ^[[SUSPEND]]
+// CHECK:   cf.br ^[[SUSPEND]]
 
 // CHECK: ^[[SUSPEND]]:
 // CHECK:   async.coro.end %[[HDL]]
@@ -276,7 +276,7 @@ func @corecursive2(%arg: !async.token) {
   async.await %arg : !async.token
 // CHECK: ^[[RESUME_1]]:
 // CHECK:   %[[IS_ERROR:.*]] = async.runtime.is_error %[[ARG]] : !async.token
-// CHECK:   cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]]
+// CHECK:   cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]]
 
 // CHECK: ^[[BRANCH_OK]]:
 // CHECK:   %[[GIVEN:.*]] = async.runtime.create : !async.token
@@ -292,16 +292,16 @@ call @corecursive1(%r): (!async.token) -> ()
 
 // CHECK: ^[[RESUME_2]]:
 // CHECK:   async.runtime.set_available %[[TOKEN]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
 
 // CHECK: ^[[BRANCH_ERROR]]:
 // CHECK:   async.runtime.set_error %[[TOKEN]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
 return
 
 // CHECK: ^[[CLEANUP]]:
 // CHECK:   async.coro.free %[[ID]], %[[HDL]]
-// CHECK:   br ^[[SUSPEND]]
+// CHECK:   cf.br ^[[SUSPEND]]
 
 // CHECK: ^[[SUSPEND]]:
 // CHECK:   async.coro.end %[[HDL]]

diff  --git a/mlir/test/Dialect/Async/async-to-async-runtime.mlir b/mlir/test/Dialect/Async/async-to-async-runtime.mlir
index 34532e56db7b7..746b2341af846 100644
--- a/mlir/test/Dialect/Async/async-to-async-runtime.mlir
+++ b/mlir/test/Dialect/Async/async-to-async-runtime.mlir
@@ -63,7 +63,7 @@ func @nested_async_execute(%arg0: f32, %arg1: f32, %arg2: memref<1xf32>) {
   // CHECK: %[[IS_ERROR:.*]] = async.runtime.is_error %[[TOKEN]]
   // CHECK: %[[TRUE:.*]] = arith.constant true
   // CHECK: %[[NOT_ERROR:.*]] = arith.xori %[[IS_ERROR]], %[[TRUE]] : i1
-  // CHECK: assert %[[NOT_ERROR]]
+  // CHECK: cf.assert %[[NOT_ERROR]]
   // CHECK-NEXT: return
   async.await %token0 : !async.token
   return
@@ -109,7 +109,7 @@ func @nested_async_execute(%arg0: f32, %arg1: f32, %arg2: memref<1xf32>) {
 // Check the error of the awaited token after resumption.
 // CHECK: ^[[RESUME_1]]:
 // CHECK:   %[[ERR:.*]] = async.runtime.is_error %[[INNER_TOKEN]]
-// CHECK:   cond_br %[[ERR]], ^[[SET_ERROR:.*]], ^[[CONTINUATION:.*]]
+// CHECK:   cf.cond_br %[[ERR]], ^[[SET_ERROR:.*]], ^[[CONTINUATION:.*]]
 
 // Set token available if the token is not in the error state.
 // CHECK: ^[[CONTINUATION:.*]]:
@@ -169,7 +169,7 @@ func @async_execute_token_dependency(%arg0: f32, %arg1: memref<1xf32>) {
 // Check the error of the awaited token after resumption.
 // CHECK: ^[[RESUME_1]]:
 // CHECK:   %[[ERR:.*]] = async.runtime.is_error %[[ARG0]]
-// CHECK:   cond_br %[[ERR]], ^[[SET_ERROR:.*]], ^[[CONTINUATION:.*]]
+// CHECK:   cf.cond_br %[[ERR]], ^[[SET_ERROR:.*]], ^[[CONTINUATION:.*]]
 
 // Emplace result token after second resumption and error checking.
 // CHECK: ^[[CONTINUATION:.*]]:
@@ -225,7 +225,7 @@ func @async_group_await_all(%arg0: f32, %arg1: memref<1xf32>) {
 // Check the error of the awaited token after resumption.
 // CHECK: ^[[RESUME_1]]:
 // CHECK:   %[[ERR:.*]] = async.runtime.is_error %[[ARG]]
-// CHECK:   cond_br %[[ERR]], ^[[SET_ERROR:.*]], ^[[CONTINUATION:.*]]
+// CHECK:   cf.cond_br %[[ERR]], ^[[SET_ERROR:.*]], ^[[CONTINUATION:.*]]
 
 // Emplace result token after error checking.
 // CHECK: ^[[CONTINUATION:.*]]:
@@ -319,7 +319,7 @@ func @async_value_operands() {
 // Check the error of the awaited token after resumption.
 // CHECK: ^[[RESUME_1]]:
 // CHECK:   %[[ERR:.*]] = async.runtime.is_error %[[ARG]]
-// CHECK:   cond_br %[[ERR]], ^[[SET_ERROR:.*]], ^[[CONTINUATION:.*]]
+// CHECK:   cf.cond_br %[[ERR]], ^[[SET_ERROR:.*]], ^[[CONTINUATION:.*]]
 
 // // Load from the async.value argument after error checking.
 // CHECK: ^[[CONTINUATION:.*]]:
@@ -335,7 +335,7 @@ func @async_value_operands() {
 // CHECK-LABEL: @execute_assertion
 func @execute_assertion(%arg0: i1) {
   %token = async.execute {
-    assert %arg0, "error"
+    cf.assert %arg0, "error"
     async.yield
   }
   async.await %token : !async.token
@@ -358,17 +358,17 @@ func @execute_assertion(%arg0: i1) {
 
 // Resume coroutine after suspension.
 // CHECK: ^[[RESUME]]:
-// CHECK:   cond_br %[[ARG0]], ^[[SET_AVAILABLE:.*]], ^[[SET_ERROR:.*]]
+// CHECK:   cf.cond_br %[[ARG0]], ^[[SET_AVAILABLE:.*]], ^[[SET_ERROR:.*]]
 
 // Set coroutine completion token to available state.
 // CHECK: ^[[SET_AVAILABLE]]:
 // CHECK:   async.runtime.set_available %[[TOKEN]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
 
 // Set coroutine completion token to error state.
 // CHECK: ^[[SET_ERROR]]:
 // CHECK:   async.runtime.set_error %[[TOKEN]]
-// CHECK:   br ^[[CLEANUP]]
+// CHECK:   cf.br ^[[CLEANUP]]
 
 // Delete coroutine.
 // CHECK: ^[[CLEANUP]]:
@@ -409,7 +409,7 @@ func @lower_scf_to_cfg(%arg0: f32, %arg1: memref<1xf32>, %arg2: i1) {
 
 // Check that structured control flow lowered to CFG.
 // CHECK-NOT: scf.if
-// CHECK: cond_br %[[FLAG]]
+// CHECK: cf.cond_br %[[FLAG]]
 
 // -----
 // Constants captured by the async.execute region should be cloned into the

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/buffer-deallocation.mlir b/mlir/test/Dialect/Bufferization/Transforms/buffer-deallocation.mlir
index 9c21efba6998b..0a80265aba50f 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/buffer-deallocation.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/buffer-deallocation.mlir
@@ -17,26 +17,26 @@
 
 // CHECK-LABEL: func @condBranch
 func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: %[[ALLOC0:.*]] = bufferization.clone
-// CHECK-NEXT: br ^bb3(%[[ALLOC0]]
+// CHECK-NEXT: cf.br ^bb3(%[[ALLOC0]]
 //      CHECK: %[[ALLOC1:.*]] = memref.alloc
 // CHECK-NEXT: test.buffer_based
 // CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC1]]
 // CHECK-NEXT: memref.dealloc %[[ALLOC1]]
-// CHECK-NEXT: br ^bb3(%[[ALLOC2]]
+// CHECK-NEXT: cf.br ^bb3(%[[ALLOC2]]
 //      CHECK: test.copy
 // CHECK-NEXT: memref.dealloc
 // CHECK-NEXT: return
@@ -62,27 +62,27 @@ func @condBranchDynamicType(
   %arg1: memref<?xf32>,
   %arg2: memref<?xf32>,
   %arg3: index) {
-  cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
 ^bb1:
-  br ^bb3(%arg1 : memref<?xf32>)
+  cf.br ^bb3(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
   %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
-  br ^bb3(%1 : memref<?xf32>)
+  cf.br ^bb3(%1 : memref<?xf32>)
 ^bb3(%2: memref<?xf32>):
   test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: %[[ALLOC0:.*]] = bufferization.clone
-// CHECK-NEXT: br ^bb3(%[[ALLOC0]]
+// CHECK-NEXT: cf.br ^bb3(%[[ALLOC0]]
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
 // CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
 // CHECK-NEXT: test.buffer_based
 // CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone
 // CHECK-NEXT: memref.dealloc %[[ALLOC1]]
-// CHECK-NEXT: br ^bb3
+// CHECK-NEXT: cf.br ^bb3
 // CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}})
 //      CHECK: test.copy(%[[ALLOC3]],
 // CHECK-NEXT: memref.dealloc %[[ALLOC3]]
@@ -98,28 +98,28 @@ func @condBranchUnrankedType(
   %arg1: memref<*xf32>,
   %arg2: memref<*xf32>,
   %arg3: index) {
-  cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
 ^bb1:
-  br ^bb3(%arg1 : memref<*xf32>)
+  cf.br ^bb3(%arg1 : memref<*xf32>)
 ^bb2(%0: index):
   %1 = memref.alloc(%0) : memref<?xf32>
   %2 = memref.cast %1 : memref<?xf32> to memref<*xf32>
   test.buffer_based in(%arg1: memref<*xf32>) out(%2: memref<*xf32>)
-  br ^bb3(%2 : memref<*xf32>)
+  cf.br ^bb3(%2 : memref<*xf32>)
 ^bb3(%3: memref<*xf32>):
   test.copy(%3, %arg2) : (memref<*xf32>, memref<*xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: %[[ALLOC0:.*]] = bufferization.clone
-// CHECK-NEXT: br ^bb3(%[[ALLOC0]]
+// CHECK-NEXT: cf.br ^bb3(%[[ALLOC0]]
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
 // CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
 //      CHECK: test.buffer_based
 // CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone
 // CHECK-NEXT: memref.dealloc %[[ALLOC1]]
-// CHECK-NEXT: br ^bb3
+// CHECK-NEXT: cf.br ^bb3
 // CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}})
 //      CHECK: test.copy(%[[ALLOC3]],
 // CHECK-NEXT: memref.dealloc %[[ALLOC3]]
@@ -153,44 +153,44 @@ func @condBranchDynamicTypeNested(
   %arg1: memref<?xf32>,
   %arg2: memref<?xf32>,
   %arg3: index) {
-  cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
 ^bb1:
-  br ^bb6(%arg1 : memref<?xf32>)
+  cf.br ^bb6(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
   %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
-  cond_br %arg0, ^bb3, ^bb4
+  cf.cond_br %arg0, ^bb3, ^bb4
 ^bb3:
-  br ^bb5(%1 : memref<?xf32>)
+  cf.br ^bb5(%1 : memref<?xf32>)
 ^bb4:
-  br ^bb5(%1 : memref<?xf32>)
+  cf.br ^bb5(%1 : memref<?xf32>)
 ^bb5(%2: memref<?xf32>):
-  br ^bb6(%2 : memref<?xf32>)
+  cf.br ^bb6(%2 : memref<?xf32>)
 ^bb6(%3: memref<?xf32>):
-  br ^bb7(%3 : memref<?xf32>)
+  cf.br ^bb7(%3 : memref<?xf32>)
 ^bb7(%4: memref<?xf32>):
   test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br{{.*}}
+// CHECK-NEXT: cf.cond_br{{.*}}
 // CHECK-NEXT: ^bb1
 // CHECK-NEXT: %[[ALLOC0:.*]] = bufferization.clone
-// CHECK-NEXT: br ^bb6(%[[ALLOC0]]
+// CHECK-NEXT: cf.br ^bb6(%[[ALLOC0]]
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
 // CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
 // CHECK-NEXT: test.buffer_based
-//      CHECK: cond_br
+//      CHECK: cf.cond_br
 //      CHECK: ^bb3:
-// CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}})
+// CHECK-NEXT: cf.br ^bb5(%[[ALLOC1]]{{.*}})
 //      CHECK: ^bb4:
-// CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}})
+// CHECK-NEXT: cf.br ^bb5(%[[ALLOC1]]{{.*}})
 // CHECK-NEXT: ^bb5(%[[ALLOC2:.*]]:{{.*}})
 // CHECK-NEXT: %[[ALLOC3:.*]] = bufferization.clone %[[ALLOC2]]
 // CHECK-NEXT: memref.dealloc %[[ALLOC1]]
-// CHECK-NEXT: br ^bb6(%[[ALLOC3]]{{.*}})
+// CHECK-NEXT: cf.br ^bb6(%[[ALLOC3]]{{.*}})
 // CHECK-NEXT: ^bb6(%[[ALLOC4:.*]]:{{.*}})
-// CHECK-NEXT: br ^bb7(%[[ALLOC4]]{{.*}})
+// CHECK-NEXT: cf.br ^bb7(%[[ALLOC4]]{{.*}})
 // CHECK-NEXT: ^bb7(%[[ALLOC5:.*]]:{{.*}})
 //      CHECK: test.copy(%[[ALLOC5]],
 // CHECK-NEXT: memref.dealloc %[[ALLOC4]]
@@ -225,18 +225,18 @@ func @emptyUsesValue(%arg0: memref<4xf32>) {
 
 // CHECK-LABEL: func @criticalEdge
 func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^bb2(%0 : memref<2xf32>)
+  cf.br ^bb2(%0 : memref<2xf32>)
 ^bb2(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
 // CHECK-NEXT: %[[ALLOC0:.*]] = bufferization.clone
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
 // CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC1]]
@@ -260,9 +260,9 @@ func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
-  br ^bb2(%0 : memref<2xf32>)
+  cf.br ^bb2(%0 : memref<2xf32>)
 ^bb2(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
@@ -288,13 +288,13 @@ func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
   %7 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>)
@@ -326,13 +326,13 @@ func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
   test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
@@ -361,17 +361,17 @@ func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
+  cf.cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
 ^bb3(%5: memref<2xf32>):
-  br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
 ^bb4(%6: memref<2xf32>):
-  br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
 ^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
   %9 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
@@ -430,33 +430,33 @@ func @moving_alloc_and_inserting_missing_dealloc(
   %cond: i1,
     %arg0: memref<2xf32>,
     %arg1: memref<2xf32>) {
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 ^bb1:
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^exit(%0 : memref<2xf32>)
+  cf.br ^exit(%0 : memref<2xf32>)
 ^bb2:
   %1 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
-  br ^exit(%1 : memref<2xf32>)
+  cf.br ^exit(%1 : memref<2xf32>)
 ^exit(%arg2: memref<2xf32>):
   test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br{{.*}}
+// CHECK-NEXT: cf.cond_br{{.*}}
 // CHECK-NEXT: ^bb1
 //      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
 // CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %[[ALLOC0]]
 // CHECK-NEXT: memref.dealloc %[[ALLOC0]]
-// CHECK-NEXT: br ^bb3(%[[ALLOC1]]
+// CHECK-NEXT: cf.br ^bb3(%[[ALLOC1]]
 // CHECK-NEXT: ^bb2
 // CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
 // CHECK-NEXT: %[[ALLOC3:.*]] = bufferization.clone %[[ALLOC2]]
 // CHECK-NEXT: memref.dealloc %[[ALLOC2]]
-// CHECK-NEXT: br ^bb3(%[[ALLOC3]]
+// CHECK-NEXT: cf.br ^bb3(%[[ALLOC3]]
 // CHECK-NEXT: ^bb3(%[[ALLOC4:.*]]:{{.*}})
 //      CHECK: test.copy
 // CHECK-NEXT: memref.dealloc %[[ALLOC4]]
@@ -480,20 +480,20 @@ func @moving_invalid_dealloc_op_complex(
     %arg0: memref<2xf32>,
     %arg1: memref<2xf32>) {
   %1 = memref.alloc() : memref<2xf32>
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 ^bb1:
-  br ^exit(%arg0 : memref<2xf32>)
+  cf.br ^exit(%arg0 : memref<2xf32>)
 ^bb2:
   test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
   memref.dealloc %1 : memref<2xf32>
-  br ^exit(%1 : memref<2xf32>)
+  cf.br ^exit(%1 : memref<2xf32>)
 ^exit(%arg2: memref<2xf32>):
   test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
 // CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: test.copy
 // CHECK-NEXT: memref.dealloc %[[ALLOC0]]
 // CHECK-NEXT: return
@@ -548,9 +548,9 @@ func @nested_regions_and_cond_branch(
   %arg0: i1,
   %arg1: memref<2xf32>,
   %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
@@ -560,13 +560,13 @@ func @nested_regions_and_cond_branch(
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
   }
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 //      CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}})
-// CHECK-NEXT:   cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
+// CHECK-NEXT:   cf.cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
 //      CHECK:   %[[ALLOC0:.*]] = bufferization.clone %[[ARG1]]
 //      CHECK: ^[[BB2]]:
 //      CHECK:   %[[ALLOC1:.*]] = memref.alloc()
@@ -728,21 +728,21 @@ func @subview(%arg0 : index, %arg1 : index, %arg2 : memref<?x?xf32>) {
 
 // CHECK-LABEL: func @condBranchAlloca
 func @condBranchAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloca() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: %[[ALLOCA:.*]] = memref.alloca()
-//      CHECK: br ^bb3(%[[ALLOCA:.*]])
+//      CHECK: cf.br ^bb3(%[[ALLOCA:.*]])
 // CHECK-NEXT: ^bb3
 // CHECK-NEXT: test.copy
 // CHECK-NEXT: return
@@ -757,13 +757,13 @@ func @condBranchAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @ifElseAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
   %7 = memref.alloca() : memref<2xf32>
   test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>)
@@ -788,17 +788,17 @@ func @ifElseNestedAlloca(
   %arg2: memref<2xf32>) {
   %0 = memref.alloca() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
+  cf.cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
 ^bb3(%5: memref<2xf32>):
-  br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
 ^bb4(%6: memref<2xf32>):
-  br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
 ^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
   %9 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
@@ -821,9 +821,9 @@ func @nestedRegionsAndCondBranchAlloca(
   %arg0: i1,
   %arg1: memref<2xf32>,
   %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
@@ -833,13 +833,13 @@ func @nestedRegionsAndCondBranchAlloca(
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
   }
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 //      CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}})
-// CHECK-NEXT:   cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
+// CHECK-NEXT:   cf.cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
 //      CHECK: ^[[BB1]]:
 //      CHECK: %[[ALLOC0:.*]] = bufferization.clone
 //      CHECK: ^[[BB2]]:
@@ -1103,11 +1103,11 @@ func @loop_dynalloc(
   %arg2: memref<?xf32>,
   %arg3: memref<?xf32>) {
   %const0 = arith.constant 0 : i32
-  br ^loopHeader(%const0, %arg2 : i32, memref<?xf32>)
+  cf.br ^loopHeader(%const0, %arg2 : i32, memref<?xf32>)
 
 ^loopHeader(%i : i32, %buff : memref<?xf32>):
   %lessThan = arith.cmpi slt, %i, %arg1 : i32
-  cond_br %lessThan,
+  cf.cond_br %lessThan,
     ^loopBody(%i, %buff : i32, memref<?xf32>),
     ^exit(%buff : memref<?xf32>)
 
@@ -1116,7 +1116,7 @@ func @loop_dynalloc(
   %inc = arith.addi %val, %const1 : i32
   %size = arith.index_cast %inc : i32 to index
   %alloc1 = memref.alloc(%size) : memref<?xf32>
-  br ^loopHeader(%inc, %alloc1 : i32, memref<?xf32>)
+  cf.br ^loopHeader(%inc, %alloc1 : i32, memref<?xf32>)
 
 ^exit(%buff3 : memref<?xf32>):
   test.copy(%buff3, %arg3) : (memref<?xf32>, memref<?xf32>)
@@ -1136,17 +1136,17 @@ func @do_loop_alloc(
   %arg2: memref<2xf32>,
   %arg3: memref<2xf32>) {
   %const0 = arith.constant 0 : i32
-  br ^loopBody(%const0, %arg2 : i32, memref<2xf32>)
+  cf.br ^loopBody(%const0, %arg2 : i32, memref<2xf32>)
 
 ^loopBody(%val : i32, %buff2: memref<2xf32>):
   %const1 = arith.constant 1 : i32
   %inc = arith.addi %val, %const1 : i32
   %alloc1 = memref.alloc() : memref<2xf32>
-  br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>)
+  cf.br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>)
 
 ^loopHeader(%i : i32, %buff : memref<2xf32>):
   %lessThan = arith.cmpi slt, %i, %arg1 : i32
-  cond_br %lessThan,
+  cf.cond_br %lessThan,
     ^loopBody(%i, %buff : i32, memref<2xf32>),
     ^exit(%buff : memref<2xf32>)
 

diff  --git a/mlir/test/Dialect/Standard/canonicalize-cf.mlir b/mlir/test/Dialect/ControlFlow/canonicalize.mlir
similarity index 81%
rename from mlir/test/Dialect/Standard/canonicalize-cf.mlir
rename to mlir/test/Dialect/ControlFlow/canonicalize.mlir
index 61ebaf742374f..528fc2c16bc90 100644
--- a/mlir/test/Dialect/Standard/canonicalize-cf.mlir
+++ b/mlir/test/Dialect/ControlFlow/canonicalize.mlir
@@ -7,7 +7,7 @@ func @br_folding() -> i32 {
   // CHECK-NEXT: %[[CST:.*]] = arith.constant 0 : i32
   // CHECK-NEXT: return %[[CST]] : i32
   %c0_i32 = arith.constant 0 : i32
-  br ^bb1(%c0_i32 : i32)
+  cf.br ^bb1(%c0_i32 : i32)
 ^bb1(%x : i32):
   return %x : i32
 }
@@ -21,12 +21,12 @@ func @br_passthrough(%arg0 : i32, %arg1 : i32) -> (i32, i32) {
 
 ^bb1:
   // CHECK: ^bb1:
-  // CHECK-NEXT: br ^bb3(%[[ARG0]], %[[ARG1]] : i32, i32)
+  // CHECK-NEXT: cf.br ^bb3(%[[ARG0]], %[[ARG1]] : i32, i32)
 
-  br ^bb2(%arg0 : i32)
+  cf.br ^bb2(%arg0 : i32)
 
 ^bb2(%arg2 : i32):
-  br ^bb3(%arg2, %arg1 : i32, i32)
+  cf.br ^bb3(%arg2, %arg1 : i32, i32)
 
 ^bb3(%arg4 : i32, %arg5 : i32):
   return %arg4, %arg5 : i32, i32
@@ -40,13 +40,13 @@ func @cond_br_folding(%cond : i1, %a : i32) {
 
   %false_cond = arith.constant false
   %true_cond = arith.constant true
-  cond_br %cond, ^bb1, ^bb2(%a : i32)
+  cf.cond_br %cond, ^bb1, ^bb2(%a : i32)
 
 ^bb1:
-  cond_br %true_cond, ^bb3, ^bb2(%a : i32)
+  cf.cond_br %true_cond, ^bb3, ^bb2(%a : i32)
 
 ^bb2(%x : i32):
-  cond_br %false_cond, ^bb2(%x : i32), ^bb3
+  cf.cond_br %false_cond, ^bb2(%x : i32), ^bb3
 
 ^bb3:
   return
@@ -58,7 +58,7 @@ func @cond_br_folding(%cond : i1, %a : i32) {
 func @cond_br_same_successor(%cond : i1, %a : i32) {
   // CHECK-NEXT: return
 
-  cond_br %cond, ^bb1(%a : i32), ^bb1(%a : i32)
+  cf.cond_br %cond, ^bb1(%a : i32), ^bb1(%a : i32)
 
 ^bb1(%result : i32):
   return
@@ -77,7 +77,7 @@ func @cond_br_same_successor_insert_select(
   // CHECK: %[[RES2:.*]] = arith.select %[[COND]], %[[ARG2]], %[[ARG3]]
   // CHECK: return %[[RES]], %[[RES2]]
 
-  cond_br %cond, ^bb1(%a, %c : i32, tensor<2xi32>), ^bb1(%b, %d : i32, tensor<2xi32>)
+  cf.cond_br %cond, ^bb1(%a, %c : i32, tensor<2xi32>), ^bb1(%b, %d : i32, tensor<2xi32>)
 
 ^bb1(%result : i32, %result2 : tensor<2xi32>):
   return %result, %result2 : i32, tensor<2xi32>
@@ -91,10 +91,10 @@ func @cond_br_and_br_folding(%a : i32) {
 
   %false_cond = arith.constant false
   %true_cond = arith.constant true
-  cond_br %true_cond, ^bb2, ^bb1(%a : i32)
+  cf.cond_br %true_cond, ^bb2, ^bb1(%a : i32)
 
 ^bb1(%x : i32):
-  cond_br %false_cond, ^bb1(%x : i32), ^bb2
+  cf.cond_br %false_cond, ^bb1(%x : i32), ^bb2
 
 ^bb2:
   return
@@ -109,10 +109,10 @@ func @cond_br_passthrough(%arg0 : i32, %arg1 : i32, %arg2 : i32, %cond : i1) ->
   // CHECK: %[[RES2:.*]] = arith.select %[[COND]], %[[ARG1]], %[[ARG2]]
   // CHECK: return %[[RES]], %[[RES2]]
 
-  cond_br %cond, ^bb1(%arg0 : i32), ^bb2(%arg2, %arg2 : i32, i32)
+  cf.cond_br %cond, ^bb1(%arg0 : i32), ^bb2(%arg2, %arg2 : i32, i32)
 
 ^bb1(%arg3: i32):
-  br ^bb2(%arg3, %arg1 : i32, i32)
+  cf.br ^bb2(%arg3, %arg1 : i32, i32)
 
 ^bb2(%arg4: i32, %arg5: i32):
   return %arg4, %arg5 : i32, i32
@@ -122,18 +122,18 @@ func @cond_br_passthrough(%arg0 : i32, %arg1 : i32, %arg2 : i32, %cond : i1) ->
 
 // CHECK-LABEL: func @cond_br_pass_through_fail(
 func @cond_br_pass_through_fail(%cond : i1) {
-  // CHECK: cond_br %{{.*}}, ^bb1, ^bb2
+  // CHECK: cf.cond_br %{{.*}}, ^bb1, ^bb2
 
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
   // CHECK: ^bb1:
   // CHECK: "foo.op"
-  // CHECK: br ^bb2
+  // CHECK: cf.br ^bb2
 
   // Successors can't be collapsed if they contain other operations.
   "foo.op"() : () -> ()
-  br ^bb2
+  cf.br ^bb2
 
 ^bb2:
   return
@@ -149,9 +149,9 @@ func @switch_only_default(%flag : i32, %caseOperand0 : f32) {
   // add predecessors for all blocks to avoid other canonicalizations.
   "foo.pred"() [^bb1, ^bb2] : () -> ()
   ^bb1:
-    // CHECK-NOT: switch
-    // CHECK: br ^[[BB2:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_0]]
-    switch %flag : i32, [
+    // CHECK-NOT: cf.switch
+    // CHECK: cf.br ^[[BB2:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_0]]
+    cf.switch %flag : i32, [
       default: ^bb2(%caseOperand0 : f32)
     ]
   // CHECK: ^[[BB2]]({{.*}}):
@@ -169,11 +169,11 @@ func @switch_case_matching_default(%flag : i32, %caseOperand0 : f32, %caseOperan
   // add predecessors for all blocks to avoid other canonicalizations.
   "foo.pred"() [^bb1, ^bb2, ^bb3] : () -> ()
   ^bb1:
-    // CHECK: switch %[[FLAG]]
+    // CHECK: cf.switch %[[FLAG]]
     // CHECK-NEXT:   default: ^[[BB1:.+]](%[[CASE_OPERAND_0]] : f32)
     // CHECK-NEXT:   10: ^[[BB2:.+]](%[[CASE_OPERAND_1]] : f32)
     // CHECK-NEXT: ]
-    switch %flag : i32, [
+    cf.switch %flag : i32, [
       default: ^bb2(%caseOperand0 : f32),
       42: ^bb2(%caseOperand0 : f32),
       10: ^bb3(%caseOperand1 : f32),
@@ -194,10 +194,10 @@ func @switch_on_const_no_match(%caseOperand0 : f32, %caseOperand1 : f32, %caseOp
   // add predecessors for all blocks to avoid other canonicalizations.
   "foo.pred"() [^bb1, ^bb2, ^bb3, ^bb4] : () -> ()
   ^bb1:
-    // CHECK-NOT: switch
-    // CHECK: br ^[[BB2:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_0]]
+    // CHECK-NOT: cf.switch
+    // CHECK: cf.br ^[[BB2:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_0]]
     %c0_i32 = arith.constant 0 : i32
-    switch %c0_i32 : i32, [
+    cf.switch %c0_i32 : i32, [
       default: ^bb2(%caseOperand0 : f32),
       -1: ^bb3(%caseOperand1 : f32),
       1: ^bb4(%caseOperand2 : f32)
@@ -220,10 +220,10 @@ func @switch_on_const_with_match(%caseOperand0 : f32, %caseOperand1 : f32, %case
   // add predecessors for all blocks to avoid other canonicalizations.
   "foo.pred"() [^bb1, ^bb2, ^bb3, ^bb4] : () -> ()
   ^bb1:
-    // CHECK-NOT: switch
-    // CHECK: br ^[[BB4:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_2]]
+    // CHECK-NOT: cf.switch
+    // CHECK: cf.br ^[[BB4:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_2]]
     %c0_i32 = arith.constant 1 : i32
-    switch %c0_i32 : i32, [
+    cf.switch %c0_i32 : i32, [
       default: ^bb2(%caseOperand0 : f32),
       -1: ^bb3(%caseOperand1 : f32),
       1: ^bb4(%caseOperand2 : f32)
@@ -253,20 +253,20 @@ func @switch_passthrough(%flag : i32,
   "foo.pred"() [^bb1, ^bb2, ^bb3, ^bb4, ^bb5, ^bb6] : () -> ()
 
   ^bb1:
-  //      CHECK: switch %[[FLAG]]
+  //      CHECK: cf.switch %[[FLAG]]
   // CHECK-NEXT:   default: ^[[BB5:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_0]]
   // CHECK-NEXT:   43: ^[[BB6:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_1]]
   // CHECK-NEXT:   44: ^[[BB4:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_2]]
   // CHECK-NEXT: ]
-    switch %flag : i32, [
+    cf.switch %flag : i32, [
       default: ^bb2(%caseOperand0 : f32),
       43: ^bb3(%caseOperand1 : f32),
       44: ^bb4(%caseOperand2 : f32)
     ]
   ^bb2(%bb2Arg : f32):
-    br ^bb5(%bb2Arg : f32)
+    cf.br ^bb5(%bb2Arg : f32)
   ^bb3(%bb3Arg : f32):
-    br ^bb6(%bb3Arg : f32)
+    cf.br ^bb6(%bb3Arg : f32)
   ^bb4(%bb4Arg : f32):
     "foo.bb4Terminator"(%bb4Arg) : (f32) -> ()
 
@@ -290,8 +290,8 @@ func @switch_from_switch_with_same_value_with_match(%flag : i32, %caseOperand0 :
   "foo.pred"() [^bb1, ^bb2, ^bb4, ^bb5] : () -> ()
 
   ^bb1:
-    // CHECK: switch %[[FLAG]]
-    switch %flag : i32, [
+    // CHECK: cf.switch %[[FLAG]]
+    cf.switch %flag : i32, [
       default: ^bb2,
       42: ^bb3
     ]
@@ -301,9 +301,9 @@ func @switch_from_switch_with_same_value_with_match(%flag : i32, %caseOperand0 :
   ^bb3:
     // prevent this block from being simplified away
     "foo.op"() : () -> ()
-    // CHECK-NOT: switch %[[FLAG]]
-    // CHECK: br ^[[BB5:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_1]]
-    switch %flag : i32, [
+    // CHECK-NOT: cf.switch %[[FLAG]]
+    // CHECK: cf.br ^[[BB5:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_1]]
+    cf.switch %flag : i32, [
       default: ^bb4(%caseOperand0 : f32),
       42: ^bb5(%caseOperand1 : f32)
     ]
@@ -327,8 +327,8 @@ func @switch_from_switch_with_same_value_no_match(%flag : i32, %caseOperand0 : f
   "foo.pred"() [^bb1, ^bb2, ^bb4, ^bb5, ^bb6] : () -> ()
 
   ^bb1:
-    // CHECK: switch %[[FLAG]]
-    switch %flag : i32, [
+    // CHECK: cf.switch %[[FLAG]]
+    cf.switch %flag : i32, [
       default: ^bb2,
       42: ^bb3
     ]
@@ -337,9 +337,9 @@ func @switch_from_switch_with_same_value_no_match(%flag : i32, %caseOperand0 : f
     "foo.bb2Terminator"() : () -> ()
   ^bb3:
     "foo.op"() : () -> ()
-    // CHECK-NOT: switch %[[FLAG]]
-    // CHECK: br ^[[BB4:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_0]]
-    switch %flag : i32, [
+    // CHECK-NOT: cf.switch %[[FLAG]]
+    // CHECK: cf.br ^[[BB4:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_0]]
+    cf.switch %flag : i32, [
       default: ^bb4(%caseOperand0 : f32),
       0: ^bb5(%caseOperand1 : f32),
       43: ^bb6(%caseOperand2 : f32)
@@ -367,8 +367,8 @@ func @switch_from_switch_default_with_same_value(%flag : i32, %caseOperand0 : f3
   "foo.pred"() [^bb1, ^bb2, ^bb4, ^bb5, ^bb6] : () -> ()
 
   ^bb1:
-    // CHECK: switch %[[FLAG]]
-    switch %flag : i32, [
+    // CHECK: cf.switch %[[FLAG]]
+    cf.switch %flag : i32, [
       default: ^bb3,
       42: ^bb2
     ]
@@ -377,11 +377,11 @@ func @switch_from_switch_default_with_same_value(%flag : i32, %caseOperand0 : f3
     "foo.bb2Terminator"() : () -> ()
   ^bb3:
     "foo.op"() : () -> ()
-    // CHECK: switch %[[FLAG]]
+    // CHECK: cf.switch %[[FLAG]]
     // CHECK-NEXT: default: ^[[BB4:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_0]]
     // CHECK-NEXT: 43: ^[[BB6:[a-zA-Z0-9_]+]](%[[CASE_OPERAND_2]]
     // CHECK-NOT: 42
-    switch %flag : i32, [
+    cf.switch %flag : i32, [
       default: ^bb4(%caseOperand0 : f32),
       42: ^bb5(%caseOperand1 : f32),
       43: ^bb6(%caseOperand2 : f32)
@@ -406,14 +406,14 @@ func @switch_from_switch_default_with_same_value(%flag : i32, %caseOperand0 : f3
 
 // CHECK-LABEL: func @cond_br_from_cond_br_with_same_condition
 func @cond_br_from_cond_br_with_same_condition(%cond : i1) {
-  // CHECK:   cond_br %{{.*}}, ^bb1, ^bb2
+  // CHECK:   cf.cond_br %{{.*}}, ^bb1, ^bb2
   // CHECK: ^bb1:
   // CHECK:   return
 
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
-  cond_br %cond, ^bb3, ^bb2
+  cf.cond_br %cond, ^bb3, ^bb2
 
 ^bb2:
   "foo.terminator"() : () -> ()
@@ -427,19 +427,41 @@ func @cond_br_from_cond_br_with_same_condition(%cond : i1) {
 // Erase assertion if condition is known to be true at compile time.
 // CHECK-LABEL: @assert_true
 func @assert_true() {
-  // CHECK-NOT: assert
+  // CHECK-NOT: cf.assert
   %true = arith.constant true
-  assert %true, "Computer says no"
+  cf.assert %true, "Computer says no"
   return
 }
 
 // -----
 
 // Keep assertion if condition unknown at compile time.
-// CHECK-LABEL: @assert
+// CHECK-LABEL: @cf.assert
 // CHECK-SAME:  (%[[ARG:.*]]: i1)
-func @assert(%arg : i1) {
-  // CHECK: assert %[[ARG]], "Computer says no"
-  assert %arg, "Computer says no"
+func @cf.assert(%arg : i1) {
+  // CHECK: cf.assert %[[ARG]], "Computer says no"
+  cf.assert %arg, "Computer says no"
+  return
+}
+
+// -----
+
+// CHECK-LABEL: @branchCondProp
+//       CHECK:       %[[trueval:.+]] = arith.constant true
+//       CHECK:       %[[falseval:.+]] = arith.constant false
+//       CHECK:       "test.consumer1"(%[[trueval]]) : (i1) -> ()
+//       CHECK:       "test.consumer2"(%[[falseval]]) : (i1) -> ()
+func @branchCondProp(%arg0: i1) {
+  cf.cond_br %arg0, ^trueB, ^falseB
+
+^trueB:
+  "test.consumer1"(%arg0) : (i1) -> ()
+  cf.br ^exit
+
+^falseB:
+  "test.consumer2"(%arg0) : (i1) -> ()
+  cf.br ^exit
+
+^exit:
   return
 }

diff  --git a/mlir/test/Dialect/Standard/parser.mlir b/mlir/test/Dialect/ControlFlow/invalid.mlir
similarity index 92%
rename from mlir/test/Dialect/Standard/parser.mlir
rename to mlir/test/Dialect/ControlFlow/invalid.mlir
index 9fcf9529a4a78..b35c8fcce2734 100644
--- a/mlir/test/Dialect/Standard/parser.mlir
+++ b/mlir/test/Dialect/ControlFlow/invalid.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt -verify-diagnostics -split-input-file %s
 
 func @switch_missing_case_value(%flag : i32, %caseOperand : i32) {
-  switch %flag : i32, [
+  cf.switch %flag : i32, [
     default: ^bb1(%caseOperand : i32),
     45: ^bb2(%caseOperand : i32),
     // expected-error at +1 {{expected integer value}}
@@ -19,7 +19,7 @@ func @switch_missing_case_value(%flag : i32, %caseOperand : i32) {
 // -----
 
 func @switch_wrong_type_case_value(%flag : i32, %caseOperand : i32) {
-  switch %flag : i32, [
+  cf.switch %flag : i32, [
     default: ^bb1(%caseOperand : i32),
     // expected-error at +1 {{expected integer value}}
     "hello": ^bb2(%caseOperand : i32)
@@ -36,7 +36,7 @@ func @switch_wrong_type_case_value(%flag : i32, %caseOperand : i32) {
 // -----
 
 func @switch_missing_comma(%flag : i32, %caseOperand : i32) {
-  switch %flag : i32, [
+  cf.switch %flag : i32, [
     default: ^bb1(%caseOperand : i32),
     45: ^bb2(%caseOperand : i32)
     // expected-error at +1 {{expected ']'}}
@@ -54,7 +54,7 @@ func @switch_missing_comma(%flag : i32, %caseOperand : i32) {
 // -----
 
 func @switch_missing_default(%flag : i32, %caseOperand : i32) {
-  switch %flag : i32, [
+  cf.switch %flag : i32, [
     // expected-error at +1 {{expected 'default'}}
     45: ^bb2(%caseOperand : i32)
     43: ^bb3(%caseOperand : i32)

diff  --git a/mlir/test/Dialect/Standard/ops.mlir b/mlir/test/Dialect/ControlFlow/ops.mlir
similarity index 68%
rename from mlir/test/Dialect/Standard/ops.mlir
rename to mlir/test/Dialect/ControlFlow/ops.mlir
index cb71b495ad674..43f33feeb8a72 100644
--- a/mlir/test/Dialect/Standard/ops.mlir
+++ b/mlir/test/Dialect/ControlFlow/ops.mlir
@@ -3,25 +3,13 @@
 
 // CHECK-LABEL: @assert
 func @assert(%arg : i1) {
-  assert %arg, "Some message in case this assertion fails."
+  cf.assert %arg, "Some message in case this assertion fails."
   return
 }
 
-// CHECK-LABEL: @atan
-func @atan(%arg : f32) -> f32 {
-  %result = math.atan %arg : f32
-  return %result : f32
-}
-
-// CHECK-LABEL: @atan2
-func @atan2(%arg0 : f32, %arg1 : f32) -> f32 {
-  %result = math.atan2 %arg0, %arg1 : f32
-  return %result : f32
-}
-
 // CHECK-LABEL: func @switch(
 func @switch(%flag : i32, %caseOperand : i32) {
-  switch %flag : i32, [
+  cf.switch %flag : i32, [
     default: ^bb1(%caseOperand : i32),
     42: ^bb2(%caseOperand : i32),
     43: ^bb3(%caseOperand : i32)
@@ -37,7 +25,7 @@ func @switch(%flag : i32, %caseOperand : i32) {
 
 // CHECK-LABEL: func @switch_i64(
 func @switch_i64(%flag : i64, %caseOperand : i32) {
-  switch %flag : i64, [
+  cf.switch %flag : i64, [
     default: ^bb1(%caseOperand : i32),
     42: ^bb2(%caseOperand : i32),
     43: ^bb3(%caseOperand : i32)

diff  --git a/mlir/test/Dialect/GPU/all-reduce-max.mlir b/mlir/test/Dialect/GPU/all-reduce-max.mlir
index e8ea4fb6f5635..a1dcdb400ef54 100644
--- a/mlir/test/Dialect/GPU/all-reduce-max.mlir
+++ b/mlir/test/Dialect/GPU/all-reduce-max.mlir
@@ -39,54 +39,54 @@ gpu.module @kernels {
     // CHECK:   [[VAL_31:%.*]] = arith.subi [[VAL_27]], [[VAL_29]] : i32
     // CHECK:   [[VAL_32:%.*]] = arith.subi [[VAL_28]], [[VAL_31]] : i32
     // CHECK:   [[VAL_33:%.*]] = arith.cmpi slt, [[VAL_32]], [[VAL_5]] : i32
-    // CHECK:   cond_br [[VAL_33]], ^bb1, ^bb17
+    // CHECK:   cf.cond_br [[VAL_33]], ^bb1, ^bb17
     // CHECK: ^bb1:
     // CHECK:   [[VAL_34:%.*]], [[VAL_35:%.*]] = gpu.shuffle xor [[VAL_0]], [[VAL_6]], [[VAL_32]] : f32
-    // CHECK:   cond_br [[VAL_35]], ^bb2, ^bb3
+    // CHECK:   cf.cond_br [[VAL_35]], ^bb2, ^bb3
     // CHECK: ^bb2:
     // CHECK:   [[VAL_36:%.*]] = arith.cmpf ugt, [[VAL_0]], [[VAL_34]] : f32
     // CHECK:   [[VAL_37:%.*]] = arith.select [[VAL_36]], [[VAL_0]], [[VAL_34]] : f32
-    // CHECK:   br ^bb4([[VAL_37]] : f32)
+    // CHECK:   cf.br ^bb4([[VAL_37]] : f32)
     // CHECK: ^bb3:
-    // CHECK:   br ^bb4([[VAL_0]] : f32)
+    // CHECK:   cf.br ^bb4([[VAL_0]] : f32)
     // CHECK: ^bb4([[VAL_38:%.*]]: f32):
     // CHECK:   [[VAL_39:%.*]], [[VAL_40:%.*]] = gpu.shuffle xor [[VAL_38]], [[VAL_7]], [[VAL_32]] : f32
-    // CHECK:   cond_br [[VAL_40]], ^bb5, ^bb6
+    // CHECK:   cf.cond_br [[VAL_40]], ^bb5, ^bb6
     // CHECK: ^bb5:
     // CHECK:   [[VAL_41:%.*]] = arith.cmpf ugt, [[VAL_38]], [[VAL_39]] : f32
     // CHECK:   [[VAL_42:%.*]] = arith.select [[VAL_41]], [[VAL_38]], [[VAL_39]] : f32
-    // CHECK:   br ^bb7([[VAL_42]] : f32)
+    // CHECK:   cf.br ^bb7([[VAL_42]] : f32)
     // CHECK: ^bb6:
-    // CHECK:   br ^bb7([[VAL_38]] : f32)
+    // CHECK:   cf.br ^bb7([[VAL_38]] : f32)
     // CHECK: ^bb7([[VAL_43:%.*]]: f32):
     // CHECK:   [[VAL_44:%.*]], [[VAL_45:%.*]] = gpu.shuffle xor [[VAL_43]], [[VAL_8]], [[VAL_32]] : f32
-    // CHECK:   cond_br [[VAL_45]], ^bb8, ^bb9
+    // CHECK:   cf.cond_br [[VAL_45]], ^bb8, ^bb9
     // CHECK: ^bb8:
     // CHECK:   [[VAL_46:%.*]] = arith.cmpf ugt, [[VAL_43]], [[VAL_44]] : f32
     // CHECK:   [[VAL_47:%.*]] = arith.select [[VAL_46]], [[VAL_43]], [[VAL_44]] : f32
-    // CHECK:   br ^bb10([[VAL_47]] : f32)
+    // CHECK:   cf.br ^bb10([[VAL_47]] : f32)
     // CHECK: ^bb9:
-    // CHECK:   br ^bb10([[VAL_43]] : f32)
+    // CHECK:   cf.br ^bb10([[VAL_43]] : f32)
     // CHECK: ^bb10([[VAL_48:%.*]]: f32):
     // CHECK:   [[VAL_49:%.*]], [[VAL_50:%.*]] = gpu.shuffle xor [[VAL_48]], [[VAL_9]], [[VAL_32]] : f32
-    // CHECK:   cond_br [[VAL_50]], ^bb11, ^bb12
+    // CHECK:   cf.cond_br [[VAL_50]], ^bb11, ^bb12
     // CHECK: ^bb11:
     // CHECK:   [[VAL_51:%.*]] = arith.cmpf ugt, [[VAL_48]], [[VAL_49]] : f32
     // CHECK:   [[VAL_52:%.*]] = arith.select [[VAL_51]], [[VAL_48]], [[VAL_49]] : f32
-    // CHECK:   br ^bb13([[VAL_52]] : f32)
+    // CHECK:   cf.br ^bb13([[VAL_52]] : f32)
     // CHECK: ^bb12:
-    // CHECK:   br ^bb13([[VAL_48]] : f32)
+    // CHECK:   cf.br ^bb13([[VAL_48]] : f32)
     // CHECK: ^bb13([[VAL_53:%.*]]: f32):
     // CHECK:   [[VAL_54:%.*]], [[VAL_55:%.*]] = gpu.shuffle xor [[VAL_53]], [[VAL_10]], [[VAL_32]] : f32
-    // CHECK:   cond_br [[VAL_55]], ^bb14, ^bb15
+    // CHECK:   cf.cond_br [[VAL_55]], ^bb14, ^bb15
     // CHECK: ^bb14:
     // CHECK:   [[VAL_56:%.*]] = arith.cmpf ugt, [[VAL_53]], [[VAL_54]] : f32
     // CHECK:   [[VAL_57:%.*]] = arith.select [[VAL_56]], [[VAL_53]], [[VAL_54]] : f32
-    // CHECK:   br ^bb16([[VAL_57]] : f32)
+    // CHECK:   cf.br ^bb16([[VAL_57]] : f32)
     // CHECK: ^bb15:
-    // CHECK:   br ^bb16([[VAL_53]] : f32)
+    // CHECK:   cf.br ^bb16([[VAL_53]] : f32)
     // CHECK: ^bb16([[VAL_58:%.*]]: f32):
-    // CHECK:   br ^bb18([[VAL_58]] : f32)
+    // CHECK:   cf.br ^bb18([[VAL_58]] : f32)
     // CHECK: ^bb17:
     // CHECK:   [[VAL_59:%.*]], [[VAL_60:%.*]] = gpu.shuffle xor [[VAL_0]], [[VAL_6]], [[VAL_5]] : f32
     // CHECK:   [[VAL_61:%.*]] = arith.cmpf ugt, [[VAL_0]], [[VAL_59]] : f32
@@ -103,74 +103,74 @@ gpu.module @kernels {
     // CHECK:   [[VAL_75:%.*]], [[VAL_76:%.*]] = gpu.shuffle xor [[VAL_74]], [[VAL_10]], [[VAL_5]] : f32
     // CHECK:   [[VAL_77:%.*]] = arith.cmpf ugt, [[VAL_74]], [[VAL_75]] : f32
     // CHECK:   [[VAL_78:%.*]] = arith.select [[VAL_77]], [[VAL_74]], [[VAL_75]] : f32
-    // CHECK:   br ^bb18([[VAL_78]] : f32)
+    // CHECK:   cf.br ^bb18([[VAL_78]] : f32)
     // CHECK: ^bb18([[VAL_79:%.*]]: f32):
-    // CHECK:   cond_br [[VAL_30]], ^bb19, ^bb20
+    // CHECK:   cf.cond_br [[VAL_30]], ^bb19, ^bb20
     // CHECK: ^bb19:
     // CHECK:   [[VAL_80:%.*]] = arith.divsi [[VAL_27]], [[VAL_5]] : i32
     // CHECK:   [[VAL_81:%.*]] = arith.index_cast [[VAL_80]] : i32 to index
     // CHECK:   store [[VAL_79]], [[VAL_1]]{{\[}}[[VAL_81]]] : memref<32xf32, 3>
-    // CHECK:   br ^bb21
+    // CHECK:   cf.br ^bb21
     // CHECK: ^bb20:
-    // CHECK:   br ^bb21
+    // CHECK:   cf.br ^bb21
     // CHECK: ^bb21:
     // CHECK:   gpu.barrier
     // CHECK:   [[VAL_82:%.*]] = arith.addi [[VAL_28]], [[VAL_2]] : i32
     // CHECK:   [[VAL_83:%.*]] = arith.divsi [[VAL_82]], [[VAL_5]] : i32
     // CHECK:   [[VAL_84:%.*]] = arith.cmpi slt, [[VAL_27]], [[VAL_83]] : i32
-    // CHECK:   cond_br [[VAL_84]], ^bb22, ^bb41
+    // CHECK:   cf.cond_br [[VAL_84]], ^bb22, ^bb41
     // CHECK: ^bb22:
     // CHECK:   [[VAL_85:%.*]] = arith.index_cast [[VAL_27]] : i32 to index
     // CHECK:   [[VAL_86:%.*]] = memref.load [[VAL_1]]{{\[}}[[VAL_85]]] : memref<32xf32, 3>
     // CHECK:   [[VAL_87:%.*]] = arith.cmpi slt, [[VAL_83]], [[VAL_5]] : i32
-    // CHECK:   cond_br [[VAL_87]], ^bb23, ^bb39
+    // CHECK:   cf.cond_br [[VAL_87]], ^bb23, ^bb39
     // CHECK: ^bb23:
     // CHECK:   [[VAL_88:%.*]], [[VAL_89:%.*]] = gpu.shuffle xor [[VAL_86]], [[VAL_6]], [[VAL_83]] : f32
-    // CHECK:   cond_br [[VAL_89]], ^bb24, ^bb25
+    // CHECK:   cf.cond_br [[VAL_89]], ^bb24, ^bb25
     // CHECK: ^bb24:
     // CHECK:   [[VAL_90:%.*]] = arith.cmpf ugt, [[VAL_86]], [[VAL_88]] : f32
     // CHECK:   [[VAL_91:%.*]] = arith.select [[VAL_90]], [[VAL_86]], [[VAL_88]] : f32
-    // CHECK:   br ^bb26([[VAL_91]] : f32)
+    // CHECK:   cf.br ^bb26([[VAL_91]] : f32)
     // CHECK: ^bb25:
-    // CHECK:   br ^bb26([[VAL_86]] : f32)
+    // CHECK:   cf.br ^bb26([[VAL_86]] : f32)
     // CHECK: ^bb26([[VAL_92:%.*]]: f32):
     // CHECK:   [[VAL_93:%.*]], [[VAL_94:%.*]] = gpu.shuffle xor [[VAL_92]], [[VAL_7]], [[VAL_83]] : f32
-    // CHECK:   cond_br [[VAL_94]], ^bb27, ^bb28
+    // CHECK:   cf.cond_br [[VAL_94]], ^bb27, ^bb28
     // CHECK: ^bb27:
     // CHECK:   [[VAL_95:%.*]] = arith.cmpf ugt, [[VAL_92]], [[VAL_93]] : f32
     // CHECK:   [[VAL_96:%.*]] = arith.select [[VAL_95]], [[VAL_92]], [[VAL_93]] : f32
-    // CHECK:   br ^bb29([[VAL_96]] : f32)
+    // CHECK:   cf.br ^bb29([[VAL_96]] : f32)
     // CHECK: ^bb28:
-    // CHECK:   br ^bb29([[VAL_92]] : f32)
+    // CHECK:   cf.br ^bb29([[VAL_92]] : f32)
     // CHECK: ^bb29([[VAL_97:%.*]]: f32):
     // CHECK:   [[VAL_98:%.*]], [[VAL_99:%.*]] = gpu.shuffle xor [[VAL_97]], [[VAL_8]], [[VAL_83]] : f32
-    // CHECK:   cond_br [[VAL_99]], ^bb30, ^bb31
+    // CHECK:   cf.cond_br [[VAL_99]], ^bb30, ^bb31
     // CHECK: ^bb30:
     // CHECK:   [[VAL_100:%.*]] = arith.cmpf ugt, [[VAL_97]], [[VAL_98]] : f32
     // CHECK:   [[VAL_101:%.*]] = arith.select [[VAL_100]], [[VAL_97]], [[VAL_98]] : f32
-    // CHECK:   br ^bb32([[VAL_101]] : f32)
+    // CHECK:   cf.br ^bb32([[VAL_101]] : f32)
     // CHECK: ^bb31:
-    // CHECK:   br ^bb32([[VAL_97]] : f32)
+    // CHECK:   cf.br ^bb32([[VAL_97]] : f32)
     // CHECK: ^bb32([[VAL_102:%.*]]: f32):
     // CHECK:   [[VAL_103:%.*]], [[VAL_104:%.*]] = gpu.shuffle xor [[VAL_102]], [[VAL_9]], [[VAL_83]] : f32
-    // CHECK:   cond_br [[VAL_104]], ^bb33, ^bb34
+    // CHECK:   cf.cond_br [[VAL_104]], ^bb33, ^bb34
     // CHECK: ^bb33:
     // CHECK:   [[VAL_105:%.*]] = arith.cmpf ugt, [[VAL_102]], [[VAL_103]] : f32
     // CHECK:   [[VAL_106:%.*]] = arith.select [[VAL_105]], [[VAL_102]], [[VAL_103]] : f32
-    // CHECK:   br ^bb35([[VAL_106]] : f32)
+    // CHECK:   cf.br ^bb35([[VAL_106]] : f32)
     // CHECK: ^bb34:
-    // CHECK:   br ^bb35([[VAL_102]] : f32)
+    // CHECK:   cf.br ^bb35([[VAL_102]] : f32)
     // CHECK: ^bb35([[VAL_107:%.*]]: f32):
     // CHECK:   [[VAL_108:%.*]], [[VAL_109:%.*]] = gpu.shuffle xor [[VAL_107]], [[VAL_10]], [[VAL_83]] : f32
-    // CHECK:   cond_br [[VAL_109]], ^bb36, ^bb37
+    // CHECK:   cf.cond_br [[VAL_109]], ^bb36, ^bb37
     // CHECK: ^bb36:
     // CHECK:   [[VAL_110:%.*]] = arith.cmpf ugt, [[VAL_107]], [[VAL_108]] : f32
     // CHECK:   [[VAL_111:%.*]] = arith.select [[VAL_110]], [[VAL_107]], [[VAL_108]] : f32
-    // CHECK:   br ^bb38([[VAL_111]] : f32)
+    // CHECK:   cf.br ^bb38([[VAL_111]] : f32)
     // CHECK: ^bb37:
-    // CHECK:   br ^bb38([[VAL_107]] : f32)
+    // CHECK:   cf.br ^bb38([[VAL_107]] : f32)
     // CHECK: ^bb38([[VAL_112:%.*]]: f32):
-    // CHECK:   br ^bb40([[VAL_112]] : f32)
+    // CHECK:   cf.br ^bb40([[VAL_112]] : f32)
     // CHECK: ^bb39:
     // CHECK:   [[VAL_113:%.*]], [[VAL_114:%.*]] = gpu.shuffle xor [[VAL_86]], [[VAL_6]], [[VAL_5]] : f32
     // CHECK:   [[VAL_115:%.*]] = arith.cmpf ugt, [[VAL_86]], [[VAL_113]] : f32
@@ -187,12 +187,12 @@ gpu.module @kernels {
     // CHECK:   [[VAL_129:%.*]], [[VAL_130:%.*]] = gpu.shuffle xor [[VAL_128]], [[VAL_10]], [[VAL_5]] : f32
     // CHECK:   [[VAL_131:%.*]] = arith.cmpf ugt, [[VAL_128]], [[VAL_129]] : f32
     // CHECK:   [[VAL_132:%.*]] = arith.select [[VAL_131]], [[VAL_128]], [[VAL_129]] : f32
-    // CHECK:   br ^bb40([[VAL_132]] : f32)
+    // CHECK:   cf.br ^bb40([[VAL_132]] : f32)
     // CHECK: ^bb40([[VAL_133:%.*]]: f32):
     // CHECK:   store [[VAL_133]], [[VAL_1]]{{\[}}[[VAL_4]]] : memref<32xf32, 3>
-    // CHECK:   br ^bb42
+    // CHECK:   cf.br ^bb42
     // CHECK: ^bb41:
-    // CHECK:   br ^bb42
+    // CHECK:   cf.br ^bb42
     // CHECK: ^bb42:
     // CHECK:   gpu.barrier
     %sum = gpu.all_reduce max %arg0 {} : (f32) -> (f32)

diff  --git a/mlir/test/Dialect/GPU/all-reduce.mlir b/mlir/test/Dialect/GPU/all-reduce.mlir
index 80f591b54de16..4d8654dece6fc 100644
--- a/mlir/test/Dialect/GPU/all-reduce.mlir
+++ b/mlir/test/Dialect/GPU/all-reduce.mlir
@@ -39,49 +39,49 @@ gpu.module @kernels {
     // CHECK:   [[VAL_31:%.*]] = arith.subi [[VAL_27]], [[VAL_29]] : i32
     // CHECK:   [[VAL_32:%.*]] = arith.subi [[VAL_28]], [[VAL_31]] : i32
     // CHECK:   [[VAL_33:%.*]] = arith.cmpi slt, [[VAL_32]], [[VAL_5]] : i32
-    // CHECK:   cond_br [[VAL_33]], ^bb1, ^bb17
+    // CHECK:   cf.cond_br [[VAL_33]], ^bb1, ^bb17
     // CHECK: ^bb1:
     // CHECK:   [[VAL_34:%.*]], [[VAL_35:%.*]] = gpu.shuffle xor [[VAL_0]], [[VAL_6]], [[VAL_32]] : f32
-    // CHECK:   cond_br [[VAL_35]], ^bb2, ^bb3
+    // CHECK:   cf.cond_br [[VAL_35]], ^bb2, ^bb3
     // CHECK: ^bb2:
     // CHECK:   [[VAL_36:%.*]] = arith.addf [[VAL_0]], [[VAL_34]] : f32
-    // CHECK:   br ^bb4([[VAL_36]] : f32)
+    // CHECK:   cf.br ^bb4([[VAL_36]] : f32)
     // CHECK: ^bb3:
-    // CHECK:   br ^bb4([[VAL_0]] : f32)
+    // CHECK:   cf.br ^bb4([[VAL_0]] : f32)
     // CHECK: ^bb4([[VAL_37:%.*]]: f32):
     // CHECK:   [[VAL_38:%.*]], [[VAL_39:%.*]] = gpu.shuffle xor [[VAL_37]], [[VAL_7]], [[VAL_32]] : f32
-    // CHECK:   cond_br [[VAL_39]], ^bb5, ^bb6
+    // CHECK:   cf.cond_br [[VAL_39]], ^bb5, ^bb6
     // CHECK: ^bb5:
     // CHECK:   [[VAL_40:%.*]] = arith.addf [[VAL_37]], [[VAL_38]] : f32
-    // CHECK:   br ^bb7([[VAL_40]] : f32)
+    // CHECK:   cf.br ^bb7([[VAL_40]] : f32)
     // CHECK: ^bb6:
-    // CHECK:   br ^bb7([[VAL_37]] : f32)
+    // CHECK:   cf.br ^bb7([[VAL_37]] : f32)
     // CHECK: ^bb7([[VAL_41:%.*]]: f32):
     // CHECK:   [[VAL_42:%.*]], [[VAL_43:%.*]] = gpu.shuffle xor [[VAL_41]], [[VAL_8]], [[VAL_32]] : f32
-    // CHECK:   cond_br [[VAL_43]], ^bb8, ^bb9
+    // CHECK:   cf.cond_br [[VAL_43]], ^bb8, ^bb9
     // CHECK: ^bb8:
     // CHECK:   [[VAL_44:%.*]] = arith.addf [[VAL_41]], [[VAL_42]] : f32
-    // CHECK:   br ^bb10([[VAL_44]] : f32)
+    // CHECK:   cf.br ^bb10([[VAL_44]] : f32)
     // CHECK: ^bb9:
-    // CHECK:   br ^bb10([[VAL_41]] : f32)
+    // CHECK:   cf.br ^bb10([[VAL_41]] : f32)
     // CHECK: ^bb10([[VAL_45:%.*]]: f32):
     // CHECK:   [[VAL_46:%.*]], [[VAL_47:%.*]] = gpu.shuffle xor [[VAL_45]], [[VAL_9]], [[VAL_32]] : f32
-    // CHECK:   cond_br [[VAL_47]], ^bb11, ^bb12
+    // CHECK:   cf.cond_br [[VAL_47]], ^bb11, ^bb12
     // CHECK: ^bb11:
     // CHECK:   [[VAL_48:%.*]] = arith.addf [[VAL_45]], [[VAL_46]] : f32
-    // CHECK:   br ^bb13([[VAL_48]] : f32)
+    // CHECK:   cf.br ^bb13([[VAL_48]] : f32)
     // CHECK: ^bb12:
-    // CHECK:   br ^bb13([[VAL_45]] : f32)
+    // CHECK:   cf.br ^bb13([[VAL_45]] : f32)
     // CHECK: ^bb13([[VAL_49:%.*]]: f32):
     // CHECK:   [[VAL_50:%.*]], [[VAL_51:%.*]] = gpu.shuffle xor [[VAL_49]], [[VAL_10]], [[VAL_32]] : f32
-    // CHECK:   cond_br [[VAL_51]], ^bb14, ^bb15
+    // CHECK:   cf.cond_br [[VAL_51]], ^bb14, ^bb15
     // CHECK: ^bb14:
     // CHECK:   [[VAL_52:%.*]] = arith.addf [[VAL_49]], [[VAL_50]] : f32
-    // CHECK:   br ^bb16([[VAL_52]] : f32)
+    // CHECK:   cf.br ^bb16([[VAL_52]] : f32)
     // CHECK: ^bb15:
-    // CHECK:   br ^bb16([[VAL_49]] : f32)
+    // CHECK:   cf.br ^bb16([[VAL_49]] : f32)
     // CHECK: ^bb16([[VAL_53:%.*]]: f32):
-    // CHECK:   br ^bb18([[VAL_53]] : f32)
+    // CHECK:   cf.br ^bb18([[VAL_53]] : f32)
     // CHECK: ^bb17:
     // CHECK:   [[VAL_54:%.*]], [[VAL_55:%.*]] = gpu.shuffle xor [[VAL_0]], [[VAL_6]], [[VAL_5]] : f32
     // CHECK:   [[VAL_56:%.*]] = arith.addf [[VAL_0]], [[VAL_54]] : f32
@@ -93,69 +93,69 @@ gpu.module @kernels {
     // CHECK:   [[VAL_65:%.*]] = arith.addf [[VAL_62]], [[VAL_63]] : f32
     // CHECK:   [[VAL_66:%.*]], [[VAL_67:%.*]] = gpu.shuffle xor [[VAL_65]], [[VAL_10]], [[VAL_5]] : f32
     // CHECK:   [[VAL_68:%.*]] = arith.addf [[VAL_65]], [[VAL_66]] : f32
-    // CHECK:   br ^bb18([[VAL_68]] : f32)
+    // CHECK:   cf.br ^bb18([[VAL_68]] : f32)
     // CHECK: ^bb18([[VAL_69:%.*]]: f32):
-    // CHECK:   cond_br [[VAL_30]], ^bb19, ^bb20
+    // CHECK:   cf.cond_br [[VAL_30]], ^bb19, ^bb20
     // CHECK: ^bb19:
     // CHECK:   [[VAL_70:%.*]] = arith.divsi [[VAL_27]], [[VAL_5]] : i32
     // CHECK:   [[VAL_71:%.*]] = arith.index_cast [[VAL_70]] : i32 to index
     // CHECK:   store [[VAL_69]], [[VAL_1]]{{\[}}[[VAL_71]]] : memref<32xf32, 3>
-    // CHECK:   br ^bb21
+    // CHECK:   cf.br ^bb21
     // CHECK: ^bb20:
-    // CHECK:   br ^bb21
+    // CHECK:   cf.br ^bb21
     // CHECK: ^bb21:
     // CHECK:   gpu.barrier
     // CHECK:   [[VAL_72:%.*]] = arith.addi [[VAL_28]], [[VAL_2]] : i32
     // CHECK:   [[VAL_73:%.*]] = arith.divsi [[VAL_72]], [[VAL_5]] : i32
     // CHECK:   [[VAL_74:%.*]] = arith.cmpi slt, [[VAL_27]], [[VAL_73]] : i32
-    // CHECK:   cond_br [[VAL_74]], ^bb22, ^bb41
+    // CHECK:   cf.cond_br [[VAL_74]], ^bb22, ^bb41
     // CHECK: ^bb22:
     // CHECK:   [[VAL_75:%.*]] = arith.index_cast [[VAL_27]] : i32 to index
     // CHECK:   [[VAL_76:%.*]] = memref.load [[VAL_1]]{{\[}}[[VAL_75]]] : memref<32xf32, 3>
     // CHECK:   [[VAL_77:%.*]] = arith.cmpi slt, [[VAL_73]], [[VAL_5]] : i32
-    // CHECK:   cond_br [[VAL_77]], ^bb23, ^bb39
+    // CHECK:   cf.cond_br [[VAL_77]], ^bb23, ^bb39
     // CHECK: ^bb23:
     // CHECK:   [[VAL_78:%.*]], [[VAL_79:%.*]] = gpu.shuffle xor [[VAL_76]], [[VAL_6]], [[VAL_73]] : f32
-    // CHECK:   cond_br [[VAL_79]], ^bb24, ^bb25
+    // CHECK:   cf.cond_br [[VAL_79]], ^bb24, ^bb25
     // CHECK: ^bb24:
     // CHECK:   [[VAL_80:%.*]] = arith.addf [[VAL_76]], [[VAL_78]] : f32
-    // CHECK:   br ^bb26([[VAL_80]] : f32)
+    // CHECK:   cf.br ^bb26([[VAL_80]] : f32)
     // CHECK: ^bb25:
-    // CHECK:   br ^bb26([[VAL_76]] : f32)
+    // CHECK:   cf.br ^bb26([[VAL_76]] : f32)
     // CHECK: ^bb26([[VAL_81:%.*]]: f32):
     // CHECK:   [[VAL_82:%.*]], [[VAL_83:%.*]] = gpu.shuffle xor [[VAL_81]], [[VAL_7]], [[VAL_73]] : f32
-    // CHECK:   cond_br [[VAL_83]], ^bb27, ^bb28
+    // CHECK:   cf.cond_br [[VAL_83]], ^bb27, ^bb28
     // CHECK: ^bb27:
     // CHECK:   [[VAL_84:%.*]] = arith.addf [[VAL_81]], [[VAL_82]] : f32
-    // CHECK:   br ^bb29([[VAL_84]] : f32)
+    // CHECK:   cf.br ^bb29([[VAL_84]] : f32)
     // CHECK: ^bb28:
-    // CHECK:   br ^bb29([[VAL_81]] : f32)
+    // CHECK:   cf.br ^bb29([[VAL_81]] : f32)
     // CHECK: ^bb29([[VAL_85:%.*]]: f32):
     // CHECK:   [[VAL_86:%.*]], [[VAL_87:%.*]] = gpu.shuffle xor [[VAL_85]], [[VAL_8]], [[VAL_73]] : f32
-    // CHECK:   cond_br [[VAL_87]], ^bb30, ^bb31
+    // CHECK:   cf.cond_br [[VAL_87]], ^bb30, ^bb31
     // CHECK: ^bb30:
     // CHECK:   [[VAL_88:%.*]] = arith.addf [[VAL_85]], [[VAL_86]] : f32
-    // CHECK:   br ^bb32([[VAL_88]] : f32)
+    // CHECK:   cf.br ^bb32([[VAL_88]] : f32)
     // CHECK: ^bb31:
-    // CHECK:   br ^bb32([[VAL_85]] : f32)
+    // CHECK:   cf.br ^bb32([[VAL_85]] : f32)
     // CHECK: ^bb32([[VAL_89:%.*]]: f32):
     // CHECK:   [[VAL_90:%.*]], [[VAL_91:%.*]] = gpu.shuffle xor [[VAL_89]], [[VAL_9]], [[VAL_73]] : f32
-    // CHECK:   cond_br [[VAL_91]], ^bb33, ^bb34
+    // CHECK:   cf.cond_br [[VAL_91]], ^bb33, ^bb34
     // CHECK: ^bb33:
     // CHECK:   [[VAL_92:%.*]] = arith.addf [[VAL_89]], [[VAL_90]] : f32
-    // CHECK:   br ^bb35([[VAL_92]] : f32)
+    // CHECK:   cf.br ^bb35([[VAL_92]] : f32)
     // CHECK: ^bb34:
-    // CHECK:   br ^bb35([[VAL_89]] : f32)
+    // CHECK:   cf.br ^bb35([[VAL_89]] : f32)
     // CHECK: ^bb35([[VAL_93:%.*]]: f32):
     // CHECK:   [[VAL_94:%.*]], [[VAL_95:%.*]] = gpu.shuffle xor [[VAL_93]], [[VAL_10]], [[VAL_73]] : f32
-    // CHECK:   cond_br [[VAL_95]], ^bb36, ^bb37
+    // CHECK:   cf.cond_br [[VAL_95]], ^bb36, ^bb37
     // CHECK: ^bb36:
     // CHECK:   [[VAL_96:%.*]] = arith.addf [[VAL_93]], [[VAL_94]] : f32
-    // CHECK:   br ^bb38([[VAL_96]] : f32)
+    // CHECK:   cf.br ^bb38([[VAL_96]] : f32)
     // CHECK: ^bb37:
-    // CHECK:   br ^bb38([[VAL_93]] : f32)
+    // CHECK:   cf.br ^bb38([[VAL_93]] : f32)
     // CHECK: ^bb38([[VAL_97:%.*]]: f32):
-    // CHECK:   br ^bb40([[VAL_97]] : f32)
+    // CHECK:   cf.br ^bb40([[VAL_97]] : f32)
     // CHECK: ^bb39:
     // CHECK:   [[VAL_98:%.*]], [[VAL_99:%.*]] = gpu.shuffle xor [[VAL_76]], [[VAL_6]], [[VAL_5]] : f32
     // CHECK:   [[VAL_100:%.*]] = arith.addf [[VAL_76]], [[VAL_98]] : f32
@@ -167,12 +167,12 @@ gpu.module @kernels {
     // CHECK:   [[VAL_109:%.*]] = arith.addf [[VAL_106]], [[VAL_107]] : f32
     // CHECK:   [[VAL_110:%.*]], [[VAL_111:%.*]] = gpu.shuffle xor [[VAL_109]], [[VAL_10]], [[VAL_5]] : f32
     // CHECK:   [[VAL_112:%.*]] = arith.addf [[VAL_109]], [[VAL_110]] : f32
-    // CHECK:   br ^bb40([[VAL_112]] : f32)
+    // CHECK:   cf.br ^bb40([[VAL_112]] : f32)
     // CHECK: ^bb40([[VAL_113:%.*]]: f32):
     // CHECK:   store [[VAL_113]], [[VAL_1]]{{\[}}[[VAL_4]]] : memref<32xf32, 3>
-    // CHECK:   br ^bb42
+    // CHECK:   cf.br ^bb42
     // CHECK: ^bb41:
-    // CHECK:   br ^bb42
+    // CHECK:   cf.br ^bb42
     // CHECK: ^bb42:
     // CHECK:   gpu.barrier
     %sum = gpu.all_reduce add %arg0 {} : (f32) -> (f32)

diff  --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 7b550d995edae..4b15e5b449066 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -53,7 +53,7 @@ func @launch() {
 // CHECK-NEXT: %[[BDIM:.*]] = gpu.block_dim x
 // CHECK-NEXT: = gpu.block_dim y
 // CHECK-NEXT: = gpu.block_dim z
-// CHECK-NEXT: br ^[[BLOCK:.*]]
+// CHECK-NEXT: cf.br ^[[BLOCK:.*]]
 // CHECK-NEXT: ^[[BLOCK]]:
 // CHECK-NEXT: "use"(%[[KERNEL_ARG0]]) : (f32) -> ()
 // CHECK-NEXT: "some_op"(%[[BID]], %[[BDIM]]) : (index, index) -> ()

diff  --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index b79fb56a04986..48f70c1404ad2 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -208,7 +208,7 @@ func @keep_not_noop(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
-  br ^bb1(%cst : f32)
+  cf.br ^bb1(%cst : f32)
 
 ^bb1(%arg1 : f32):
   %3 = linalg.generic
@@ -234,7 +234,7 @@ func @keep_not_noop(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>)
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
-  br ^bb1(%cst : f32)
+  cf.br ^bb1(%cst : f32)
 
 ^bb1(%arg2 : f32):
   %3:2 = linalg.generic

diff  --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir
index 447d41bd0d4c2..d1791da1646bf 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir
@@ -20,7 +20,7 @@ func private @foo() -> tensor<?xf32>
 func @swappy(%cond1 : i1, %cond2 : i1, %t1 : tensor<f32>, %t2 : tensor<f32>)
     -> (tensor<f32>, tensor<f32>)
 {
-  cond_br %cond1, ^bb1, ^bb2
+  cf.cond_br %cond1, ^bb1, ^bb2
 
   ^bb1:
     %T:2 = scf.if %cond2 -> (tensor<f32>, tensor<f32>) {

diff  --git a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
index 5109ba13fa21b..b68c05b16c2f3 100644
--- a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
@@ -17,7 +17,7 @@ func @if_true_test(%arg0: i1, %arg1: i32) -> tensor<i32> attributes {} {
   } -> tensor<i8>
   %4 = tensor.extract %3[] : tensor<i8>
   %5 = arith.trunci %4 : i8 to i1
-  cond_br %5, ^bb1, ^bb2(%arg1_t : tensor<i32>)
+  cf.cond_br %5, ^bb1, ^bb2(%arg1_t : tensor<i32>)
 ^bb1:
   %6 = linalg.init_tensor [] : tensor<i32>
   %7 = linalg.generic
@@ -28,7 +28,7 @@ func @if_true_test(%arg0: i1, %arg1: i32) -> tensor<i32> attributes {} {
     %10 = arith.addi %arg2, %arg3 : i32
     linalg.yield %10 : i32
   } -> tensor<i32>
-  br ^bb2(%7 : tensor<i32>)
+  cf.br ^bb2(%7 : tensor<i32>)
 ^bb2(%8: tensor<i32>):
   return %8 : tensor<i32>
 }
@@ -36,10 +36,10 @@ func @if_true_test(%arg0: i1, %arg1: i32) -> tensor<i32> attributes {} {
 // CHECK-LABEL:  func @if_true_test
 // CHECK-SAME:     (%[[arg0:.*]]: i1, %[[arg1:.*]]: i32)
 // CHECK-NEXT:     arith.constant 10 : i32
-// CHECK-NEXT:     cond_br %[[arg0]], ^[[bb1:.*]], ^[[bb2:.*]](%[[arg1]] : i32)
+// CHECK-NEXT:     cf.cond_br %[[arg0]], ^[[bb1:.*]], ^[[bb2:.*]](%[[arg1]] : i32)
 // CHECK-NEXT:   ^[[bb1]]:
 // CHECK-NEXT:     %[[add_res:.*]] = arith.addi
-// CHECK-NEXT:     br ^[[bb2]](%[[add_res]] : i32)
+// CHECK-NEXT:     cf.br ^[[bb2]](%[[add_res]] : i32)
 // CHECK-NEXT:   ^[[bb2]]
 // CHECK-NEXT:     %[[func_res:.*]] = tensor.from_elements
 // CHECK-NEXT:     return %[[func_res]]

diff  --git a/mlir/test/Dialect/Linalg/detensorize_if.mlir b/mlir/test/Dialect/Linalg/detensorize_if.mlir
index d1ffee5f91620..d53d1797ca67f 100644
--- a/mlir/test/Dialect/Linalg/detensorize_if.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_if.mlir
@@ -12,7 +12,7 @@ func @main() -> (tensor<i32>) attributes {} {
   %0 = tensor.from_elements %c0 : tensor<i32>
   %c10 = arith.constant 10 : i32
   %1 = tensor.from_elements %c10 : tensor<i32>
-  br ^bb1(%0 : tensor<i32>)
+  cf.br ^bb1(%0 : tensor<i32>)
 
 ^bb1(%2: tensor<i32>):  // 2 preds: ^bb0, ^bb2
   %3 = linalg.init_tensor [] : tensor<i1>
@@ -24,7 +24,7 @@ func @main() -> (tensor<i32>) attributes {} {
       linalg.yield %8 : i1
   } -> tensor<i1>
   %5 = tensor.extract %4[] : tensor<i1>
-  cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
+  cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
 
 ^bb2(%6: tensor<i32>):  // pred: ^bb1
   %7 = linalg.init_tensor [] : tensor<i32>
@@ -35,7 +35,7 @@ func @main() -> (tensor<i32>) attributes {} {
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
   } -> tensor<i32>
-  br ^bb3(%8 : tensor<i32>)
+  cf.br ^bb3(%8 : tensor<i32>)
 
 ^bb3(%10: tensor<i32>):  // pred: ^bb1
   return %10 : tensor<i32>
@@ -44,13 +44,13 @@ func @main() -> (tensor<i32>) attributes {} {
 // CHECK-LABEL:  func @main()
 // CHECK-NEXT:     arith.constant 0
 // CHECK-NEXT:     arith.constant 10
-// CHECK-NEXT:     br ^[[bb1:.*]](%{{.*}}: i32)
+// CHECK-NEXT:     cf.br ^[[bb1:.*]](%{{.*}}: i32)
 // CHECK-NEXT:   ^[[bb1]](%{{.*}}: i32):
 // CHECK-NEXT:     arith.cmpi slt, %{{.*}}, %{{.*}}
-// CHECK-NEXT:     cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^bb3(%{{.*}} : i32)
+// CHECK-NEXT:     cf.cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^bb3(%{{.*}} : i32)
 // CHECK-NEXT:   ^[[bb2]](%{{.*}}: i32)
 // CHECK-NEXT:     arith.addi %{{.*}}, %{{.*}}
-// CHECK-NEXT:     br ^[[bb3:.*]](%{{.*}} : i32)
+// CHECK-NEXT:     cf.br ^[[bb3:.*]](%{{.*}} : i32)
 // CHECK-NEXT:   ^[[bb3]](%{{.*}}: i32)
 // CHECK-NEXT:     tensor.from_elements %{{.*}} : tensor<i32>
 // CHECK-NEXT:     return %{{.*}}
@@ -73,7 +73,7 @@ func @main() -> (tensor<i32>) attributes {} {
   %0 = tensor.from_elements %c0 : tensor<i32>
   %c10 = arith.constant 10 : i32
   %1 = tensor.from_elements %c10 : tensor<i32>
-  br ^bb1(%0 : tensor<i32>)
+  cf.br ^bb1(%0 : tensor<i32>)
 
 ^bb1(%2: tensor<i32>):  // 2 preds: ^bb0, ^bb2
   %3 = linalg.init_tensor [] : tensor<i1>
@@ -85,7 +85,7 @@ func @main() -> (tensor<i32>) attributes {} {
       linalg.yield %8 : i1
   } -> tensor<i1>
   %5 = tensor.extract %4[] : tensor<i1>
-  cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
+  cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
 
 ^bb2(%6: tensor<i32>):  // pred: ^bb1
   %7 = linalg.init_tensor [] : tensor<i32>
@@ -96,10 +96,10 @@ func @main() -> (tensor<i32>) attributes {} {
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
   } -> tensor<i32>
-  br ^bb3(%8 : tensor<i32>)
+  cf.br ^bb3(%8 : tensor<i32>)
 
 ^bb3(%10: tensor<i32>):  // pred: ^bb1
-  br ^bb4(%10 : tensor<i32>)
+  cf.br ^bb4(%10 : tensor<i32>)
 
 ^bb4(%11: tensor<i32>):  // pred: ^bb1
   return %11 : tensor<i32>
@@ -108,15 +108,15 @@ func @main() -> (tensor<i32>) attributes {} {
 // CHECK-LABEL:  func @main()
 // CHECK-NEXT:     arith.constant 0
 // CHECK-NEXT:     arith.constant 10
-// CHECK-NEXT:     br ^[[bb1:.*]](%{{.*}}: i32)
+// CHECK-NEXT:     cf.br ^[[bb1:.*]](%{{.*}}: i32)
 // CHECK-NEXT:   ^[[bb1]](%{{.*}}: i32):
 // CHECK-NEXT:     arith.cmpi slt, %{{.*}}, %{{.*}}
-// CHECK-NEXT:     cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^bb3(%{{.*}} : i32)
+// CHECK-NEXT:     cf.cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^bb3(%{{.*}} : i32)
 // CHECK-NEXT:   ^[[bb2]](%{{.*}}: i32)
 // CHECK-NEXT:     arith.addi %{{.*}}, %{{.*}}
-// CHECK-NEXT:     br ^[[bb3:.*]](%{{.*}} : i32)
+// CHECK-NEXT:     cf.br ^[[bb3:.*]](%{{.*}} : i32)
 // CHECK-NEXT:   ^[[bb3]](%{{.*}}: i32)
-// CHECK-NEXT:     br ^[[bb4:.*]](%{{.*}} : i32)
+// CHECK-NEXT:     cf.br ^[[bb4:.*]](%{{.*}} : i32)
 // CHECK-NEXT:   ^[[bb4]](%{{.*}}: i32)
 // CHECK-NEXT:     tensor.from_elements %{{.*}} : tensor<i32>
 // CHECK-NEXT:     return %{{.*}}
@@ -136,7 +136,7 @@ func @main() -> (tensor<i32>) attributes {} {
   %0 = tensor.from_elements %c0 : tensor<i32>
   %c10 = arith.constant 10 : i32
   %1 = tensor.from_elements %c10 : tensor<i32>
-  br ^bb1(%0 : tensor<i32>)
+  cf.br ^bb1(%0 : tensor<i32>)
 
 ^bb1(%2: tensor<i32>):  // 2 preds: ^bb0, ^bb2
   %3 = linalg.init_tensor [] : tensor<i1>
@@ -148,11 +148,11 @@ func @main() -> (tensor<i32>) attributes {} {
       linalg.yield %8 : i1
   } -> tensor<i1>
   %5 = tensor.extract %4[] : tensor<i1>
-  // This cond_br intentionally has bb2 as it's target for both branches. This
+  // This cf.cond_br intentionally has bb2 as it's target for both branches. This
   // is to make sure that the "forward phase" of the cost-model correctly adds
   // the users of a block argument (in this case bb2's argument) to the work
   // list.
-  cond_br %5, ^bb2(%2 : tensor<i32>), ^bb2(%2 : tensor<i32>)
+  cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb2(%2 : tensor<i32>)
 
 ^bb2(%6: tensor<i32>):  // pred: ^bb1
   %12 = tensor.from_elements %c10 : tensor<i32>
@@ -164,7 +164,7 @@ func @main() -> (tensor<i32>) attributes {} {
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
   } -> tensor<i32>
-  br ^bb3(%8 : tensor<i32>)
+  cf.br ^bb3(%8 : tensor<i32>)
 
 ^bb3(%10: tensor<i32>):  // pred: ^bb1
   return %10 : tensor<i32>
@@ -173,13 +173,13 @@ func @main() -> (tensor<i32>) attributes {} {
 // CHECK-LABEL:  func @main()
 // CHECK-NEXT:     arith.constant 0
 // CHECK-NEXT:     arith.constant 10
-// CHECK-NEXT:     br ^[[bb1:.*]](%{{.*}}: i32)
+// CHECK-NEXT:     cf.br ^[[bb1:.*]](%{{.*}}: i32)
 // CHECK-NEXT:   ^[[bb1]](%{{.*}}: i32):
 // CHECK-NEXT:     arith.cmpi slt, %{{.*}}, %{{.*}}
-// CHECK-NEXT:     cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^bb2(%{{.*}} : i32)
+// CHECK-NEXT:     cf.cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^bb2(%{{.*}} : i32)
 // CHECK-NEXT:   ^[[bb2]](%{{.*}}: i32)
 // CHECK-NEXT:     arith.addi %{{.*}}, %{{.*}}
-// CHECK-NEXT:     br ^[[bb3:.*]](%{{.*}} : i32)
+// CHECK-NEXT:     cf.br ^[[bb3:.*]](%{{.*}} : i32)
 // CHECK-NEXT:   ^[[bb3]](%{{.*}}: i32)
 // CHECK-NEXT:     tensor.from_elements %{{.*}} : tensor<i32>
 // CHECK-NEXT:     return %{{.*}}

diff  --git a/mlir/test/Dialect/Linalg/detensorize_while.mlir b/mlir/test/Dialect/Linalg/detensorize_while.mlir
index 4e7ad1475c153..b90e6926d68af 100644
--- a/mlir/test/Dialect/Linalg/detensorize_while.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while.mlir
@@ -9,7 +9,7 @@
 }
 
 func @main(%farg0: tensor<i32>, %farg1: tensor<i32>) -> tensor<i32> attributes {} {
-  br ^bb1(%farg0 : tensor<i32>)
+  cf.br ^bb1(%farg0 : tensor<i32>)
 
 ^bb1(%0: tensor<i32>):  // 2 preds: ^bb0, ^bb2
   %1 = linalg.init_tensor [] : tensor<i1>
@@ -21,7 +21,7 @@ func @main(%farg0: tensor<i32>, %farg1: tensor<i32>) -> tensor<i32> attributes {
       linalg.yield %8 : i1
   } -> tensor<i1>
   %3 = tensor.extract %2[] : tensor<i1>
-  cond_br %3, ^bb2(%0 : tensor<i32>), ^bb3(%0 : tensor<i32>)
+  cf.cond_br %3, ^bb2(%0 : tensor<i32>), ^bb3(%0 : tensor<i32>)
 
 ^bb2(%4: tensor<i32>):  // pred: ^bb1
   %5 = linalg.init_tensor [] : tensor<i32>
@@ -32,7 +32,7 @@ func @main(%farg0: tensor<i32>, %farg1: tensor<i32>) -> tensor<i32> attributes {
       %8 = arith.addi %arg0, %arg1 : i32
       linalg.yield %8 : i32
   } -> tensor<i32>
-  br ^bb1(%6 : tensor<i32>)
+  cf.br ^bb1(%6 : tensor<i32>)
 
 ^bb3(%7: tensor<i32>):  // pred: ^bb1
   return %7 : tensor<i32>
@@ -43,13 +43,13 @@ func @main(%farg0: tensor<i32>, %farg1: tensor<i32>) -> tensor<i32> attributes {
 // DET-ALL-LABEL: func @main
 // DET-ALL-SAME:    (%{{.*}}: tensor<i32>, %{{.*}}: tensor<i32>)
 // DET-ALL:         tensor.extract {{.*}}
-// DET-ALL:         br ^[[bb1:.*]](%{{.*}} : i32)
+// DET-ALL:         cf.br ^[[bb1:.*]](%{{.*}} : i32)
 // DET-ALL:       ^[[bb1]](%{{.*}}: i32)
 // DET-ALL:         arith.cmpi slt, {{.*}}
-// DET-ALL:         cond_br {{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]](%{{.*}} : i32)
+// DET-ALL:         cf.cond_br {{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]](%{{.*}} : i32)
 // DET-ALL:       ^[[bb2]](%{{.*}}: i32)
 // DET-ALL:         arith.addi {{.*}}
-// DET-ALL:         br ^[[bb1]](%{{.*}} : i32)
+// DET-ALL:         cf.br ^[[bb1]](%{{.*}} : i32)
 // DET-ALL:       ^[[bb3]](%{{.*}}: i32)
 // DET-ALL:         tensor.from_elements {{.*}}
 // DET-ALL:         return %{{.*}} : tensor<i32>
@@ -59,13 +59,13 @@ func @main(%farg0: tensor<i32>, %farg1: tensor<i32>) -> tensor<i32> attributes {
 // DET-CF-LABEL: func @main
 // DET-CF-SAME:    (%{{.*}}: tensor<i32>, %{{.*}}: tensor<i32>)
 // DET-CF:         tensor.extract {{.*}}
-// DET-CF:         br ^[[bb1:.*]](%{{.*}} : i32)
+// DET-CF:         cf.br ^[[bb1:.*]](%{{.*}} : i32)
 // DET-CF:       ^[[bb1]](%{{.*}}: i32)
 // DET-CF:         arith.cmpi slt, {{.*}}
-// DET-CF:         cond_br {{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]](%{{.*}} : i32)
+// DET-CF:         cf.cond_br {{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]](%{{.*}} : i32)
 // DET-CF:       ^[[bb2]](%{{.*}}: i32)
 // DET-CF:         arith.addi {{.*}}
-// DET-CF:         br ^[[bb1]](%{{.*}} : i32)
+// DET-CF:         cf.br ^[[bb1]](%{{.*}} : i32)
 // DET-CF:       ^[[bb3]](%{{.*}}: i32)
 // DET-CF:         tensor.from_elements %{{.*}} : tensor<i32>
 // DET-CF:         return %{{.*}} : tensor<i32>

diff  --git a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
index 3613852c4efa2..6200a13e3c7ce 100644
--- a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
@@ -22,7 +22,7 @@
 }
 
 func @main(%farg0: tensor<10xi32>, %farg1: tensor<i32>) -> tensor<i32> attributes {} {
-  br ^bb1(%farg0 : tensor<10xi32>)
+  cf.br ^bb1(%farg0 : tensor<10xi32>)
 
 ^bb1(%0: tensor<10xi32>):  // 2 preds: ^bb0, ^bb2
   %1 = linalg.init_tensor [] : tensor<i32>
@@ -43,7 +43,7 @@ func @main(%farg0: tensor<10xi32>, %farg1: tensor<i32>) -> tensor<i32> attribute
       linalg.yield %8 : i1
   } -> tensor<i1>
   %5 = tensor.extract %4[] : tensor<i1>
-  cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
+  cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
 
 ^bb2(%6: tensor<i32>):  // pred: ^bb1
   %7 = linalg.init_tensor [10] : tensor<10xi32>
@@ -54,7 +54,7 @@ func @main(%farg0: tensor<10xi32>, %farg1: tensor<i32>) -> tensor<i32> attribute
       linalg.yield %a : i32
   } -> tensor<10xi32>
 
-  br ^bb1(%9 : tensor<10xi32>)
+  cf.br ^bb1(%9 : tensor<10xi32>)
 
 ^bb3(%10: tensor<i32>):  // pred: ^bb1
   return %10 : tensor<i32>
@@ -64,7 +64,7 @@ func @main(%farg0: tensor<10xi32>, %farg1: tensor<i32>) -> tensor<i32> attribute
 //
 // DET-ALL-LABEL: func @main
 // DET-ALL-SAME:    (%{{.*}}: tensor<10xi32>, %{{.*}}: tensor<i32>)
-// DET-ALL:         br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
+// DET-ALL:         cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
 // DET-ALL:       ^[[bb1]](%{{.*}}: tensor<10xi32>)
 // DET-ALL:         linalg.init_tensor [] : tensor<i32>
 // DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
@@ -74,7 +74,7 @@ func @main(%farg0: tensor<10xi32>, %farg1: tensor<i32>) -> tensor<i32> attribute
 // DET-ALL:         } -> tensor<i32>
 // DET-ALL:         tensor.extract %{{.*}}[] : tensor<i32>
 // DET-ALL:         cmpi slt, %{{.*}}, %{{.*}} : i32
-// DET-ALL:         cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]](%{{.*}} : i32)
+// DET-ALL:         cf.cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]](%{{.*}} : i32)
 // DET-ALL:       ^[[bb2]](%{{.*}}: i32)
 // DET-ALL:         tensor.from_elements %{{.*}} : tensor<i32>
 // DET-ALL:         linalg.init_tensor [10] : tensor<10xi32>
@@ -82,7 +82,7 @@ func @main(%farg0: tensor<10xi32>, %farg1: tensor<i32>) -> tensor<i32> attribute
 // DET-ALL:         ^bb0(%{{.*}}: i32, %{{.*}}: i32):
 // DET-ALL:           linalg.yield %{{.*}} : i32
 // DET-ALL:         } -> tensor<10xi32>
-// DET-ALL:         br ^[[bb1]](%{{.*}} : tensor<10xi32>)
+// DET-ALL:         cf.br ^[[bb1]](%{{.*}} : tensor<10xi32>)
 // DET-ALL:       ^[[bb3]](%{{.*}}: i32)
 // DET-ALL:         tensor.from_elements %{{.*}} : tensor<i32>
 // DET-ALL:         return %{{.*}} : tensor<i32>
@@ -90,15 +90,15 @@ func @main(%farg0: tensor<10xi32>, %farg1: tensor<i32>) -> tensor<i32> attribute
 
 // DET-CF-LABEL: func @main
 // DET-CF-SAME:    (%{{.*}}: tensor<10xi32>, %{{.*}}: tensor<i32>)
-// DET-CF:         br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
+// DET-CF:         cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
 // DET-CF:       ^bb1(%{{.*}}: tensor<10xi32>)
 // DET-CF:         %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
 // DET-CF:         tensor.extract %{{.*}}[] : tensor<i32>
 // DET-CF:         cmpi slt, %{{.*}}, %{{.*}} : i32
-// DET-CF:         cond_br %{{.*}}, ^bb2(%{{.*}} : tensor<i32>), ^bb3(%{{.*}} : tensor<i32>)
+// DET-CF:         cf.cond_br %{{.*}}, ^bb2(%{{.*}} : tensor<i32>), ^bb3(%{{.*}} : tensor<i32>)
 // DET-CF:       ^bb2(%{{.*}}: tensor<i32>)
 // DET-CF:         %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) outs(%{{.*}} : tensor<10xi32>) {
-// DET-CF:         br ^bb1(%{{.*}} : tensor<10xi32>)
+// DET-CF:         cf.br ^bb1(%{{.*}} : tensor<10xi32>)
 // DET-CF:       ^bb3(%{{.*}}: tensor<i32>)
 // DET-CF:         return %{{.*}} : tensor<i32>
 // DET-CF:       }

diff  --git a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
index 692e1c79e0cf2..d0c22f396275c 100644
--- a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
@@ -14,7 +14,7 @@ func @main() -> () attributes {} {
   %c10 = arith.constant 10 : i32
   %1 = tensor.from_elements %c10 : tensor<1xi32>
   %reshaped1 = tensor.collapse_shape %1 [] : tensor<1xi32> into tensor<i32>
-  br ^bb1(%reshaped0 : tensor<i32>)
+  cf.br ^bb1(%reshaped0 : tensor<i32>)
 
 ^bb1(%2: tensor<i32>):  // 2 preds: ^bb0, ^bb2
   %3 = linalg.init_tensor [] : tensor<i1>
@@ -26,7 +26,7 @@ func @main() -> () attributes {} {
       linalg.yield %8 : i1
   } -> tensor<i1>
   %5 = tensor.extract %4[] : tensor<i1>
-  cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3
+  cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3
 
 ^bb2(%6: tensor<i32>):  // pred: ^bb1
   %7 = linalg.init_tensor [] : tensor<i32>
@@ -37,7 +37,7 @@ func @main() -> () attributes {} {
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
   } -> tensor<i32>
-  br ^bb1(%8 : tensor<i32>)
+  cf.br ^bb1(%8 : tensor<i32>)
 
 ^bb3:  // pred: ^bb1
   return
@@ -46,13 +46,13 @@ func @main() -> () attributes {} {
 // CHECK-LABEL: func @main
 // CHECK-NEXT:    arith.constant 0 : i32
 // CHECK-NEXT:    arith.constant 10
-// CHECK-NEXT:    br ^[[bb1:.*]](%{{.*}} : i32)
+// CHECK-NEXT:    cf.br ^[[bb1:.*]](%{{.*}} : i32)
 // CHECK-NEXT:  ^[[bb1]](%{{.*}}: i32)
 // CHECK-NEXT:    %{{.*}} = arith.cmpi slt, %{{.*}}, %{{.*}}
-// CHECK-NEXT:    cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]]
+// CHECK-NEXT:    cf.cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]]
 // CHECK-NEXT:  ^[[bb2]](%{{.*}}: i32)
 // CHECK-NEXT:    %{{.*}} = arith.addi %{{.*}}, %{{.*}}
-// CHECK-NEXT:    br ^[[bb1]](%{{.*}} : i32)
+// CHECK-NEXT:    cf.br ^[[bb1]](%{{.*}} : i32)
 // CHECK-NEXT:  ^[[bb3]]:
 // CHECK-NEXT:    return
 // CHECK-NEXT:  }

diff  --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 3586acbd9dd2d..e8afc66f4e2e8 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -250,7 +250,7 @@ func @omp_wsloop_pretty_multi_block(%lb : index, %ub : index, %step : index, %da
   // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
   omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) {
     %1 = "test.payload"(%iv) : (index) -> (i32)
-    br ^bb1(%1: i32)
+    cf.br ^bb1(%1: i32)
   ^bb1(%arg: i32):
     memref.store %arg, %data1[%iv] : memref<?xi32>
     omp.yield
@@ -260,13 +260,13 @@ func @omp_wsloop_pretty_multi_block(%lb : index, %ub : index, %step : index, %da
   omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) {
     %c = "test.condition"(%iv) : (index) -> (i1)
     %v1 = "test.payload"(%iv) : (index) -> (i32)
-    cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
+    cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
   ^bb1(%arg0: i32):
     memref.store %arg0, %data1[%iv] : memref<?xi32>
-    br ^bb3
+    cf.br ^bb3
   ^bb2(%arg1: i32):
     memref.store %arg1, %data2[%iv] : memref<?xi32>
-    br ^bb3
+    cf.br ^bb3
   ^bb3:
     omp.yield
   }
@@ -275,7 +275,7 @@ func @omp_wsloop_pretty_multi_block(%lb : index, %ub : index, %step : index, %da
   omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) {
     %c = "test.condition"(%iv) : (index) -> (i1)
     %v1 = "test.payload"(%iv) : (index) -> (i32)
-    cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
+    cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
   ^bb1(%arg0: i32):
     memref.store %arg0, %data1[%iv] : memref<?xi32>
     omp.yield
@@ -294,7 +294,7 @@ func @omp_wsloop_pretty_non_index(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i
   // CHECK: omp.wsloop (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
   omp.wsloop (%iv1) : i32 = (%lb1) to (%ub1) step (%step1) {
     %1 = "test.payload"(%iv1) : (i32) -> (index)
-    br ^bb1(%1: index)
+    cf.br ^bb1(%1: index)
   ^bb1(%arg1: index):
     memref.store %iv1, %data1[%arg1] : memref<?xi32>
     omp.yield
@@ -303,7 +303,7 @@ func @omp_wsloop_pretty_non_index(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i
   // CHECK: omp.wsloop (%{{.*}}) : i64 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
   omp.wsloop (%iv2) : i64 = (%lb2) to (%ub2) step (%step2) {
     %2 = "test.payload"(%iv2) : (i64) -> (index)
-    br ^bb1(%2: index)
+    cf.br ^bb1(%2: index)
   ^bb1(%arg2: index):
     memref.store %iv2, %data2[%arg2] : memref<?xi64>
     omp.yield

diff  --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir
index 1563349ac24bf..80af8174d2f59 100644
--- a/mlir/test/Dialect/SCF/canonicalize.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize.mlir
@@ -1127,15 +1127,15 @@ func @propagate_into_execute_region() {
   affine.for %i = 0 to 100 {
     "test.foo"() : () -> ()
     %v = scf.execute_region -> i64 {
-      cond_br %cond, ^bb1, ^bb2
+      cf.cond_br %cond, ^bb1, ^bb2
 
     ^bb1:
       %c1 = arith.constant 1 : i64
-      br ^bb3(%c1 : i64)
+      cf.br ^bb3(%c1 : i64)
 
     ^bb2:
       %c2 = arith.constant 2 : i64
-      br ^bb3(%c2 : i64)
+      cf.br ^bb3(%c2 : i64)
 
     ^bb3(%x : i64):
       scf.yield %x : i64
@@ -1177,13 +1177,13 @@ func @func_execute_region_elim() {
     "test.foo"() : () -> ()
     %v = scf.execute_region -> i64 {
       %c = "test.cmp"() : () -> i1
-      cond_br %c, ^bb2, ^bb3
+      cf.cond_br %c, ^bb2, ^bb3
     ^bb2:
       %x = "test.val1"() : () -> i64
-      br ^bb4(%x : i64)
+      cf.br ^bb4(%x : i64)
     ^bb3:
       %y = "test.val2"() : () -> i64
-      br ^bb4(%y : i64)
+      cf.br ^bb4(%y : i64)
     ^bb4(%z : i64):
       scf.yield %z : i64
     }
@@ -1194,13 +1194,13 @@ func @func_execute_region_elim() {
 // CHECK-NOT: execute_region
 // CHECK:     "test.foo"
 // CHECK:     %[[cmp:.+]] = "test.cmp"
-// CHECK:     cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]]
+// CHECK:     cf.cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]]
 // CHECK:   ^[[bb1]]:
 // CHECK:     %[[x:.+]] = "test.val1"
-// CHECK:     br ^[[bb3:.+]](%[[x]] : i64)
+// CHECK:     cf.br ^[[bb3:.+]](%[[x]] : i64)
 // CHECK:   ^[[bb2]]:
 // CHECK:     %[[y:.+]] = "test.val2"
-// CHECK:     br ^[[bb3]](%[[y:.+]] : i64)
+// CHECK:     cf.br ^[[bb3]](%[[y:.+]] : i64)
 // CHECK:   ^[[bb3]](%[[z:.+]]: i64):
 // CHECK:     "test.bar"(%[[z]])
 // CHECK:     return
@@ -1213,7 +1213,7 @@ func @func_execute_region_elim_multi_yield() {
     "test.foo"() : () -> ()
     %v = scf.execute_region -> i64 {
       %c = "test.cmp"() : () -> i1
-      cond_br %c, ^bb2, ^bb3
+      cf.cond_br %c, ^bb2, ^bb3
     ^bb2:
       %x = "test.val1"() : () -> i64
       scf.yield %x : i64
@@ -1228,13 +1228,13 @@ func @func_execute_region_elim_multi_yield() {
 // CHECK-NOT: execute_region
 // CHECK:     "test.foo"
 // CHECK:     %[[cmp:.+]] = "test.cmp"
-// CHECK:     cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]]
+// CHECK:     cf.cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]]
 // CHECK:   ^[[bb1]]:
 // CHECK:     %[[x:.+]] = "test.val1"
-// CHECK:     br ^[[bb3:.+]](%[[x]] : i64)
+// CHECK:     cf.br ^[[bb3:.+]](%[[x]] : i64)
 // CHECK:   ^[[bb2]]:
 // CHECK:     %[[y:.+]] = "test.val2"
-// CHECK:     br ^[[bb3]](%[[y:.+]] : i64)
+// CHECK:     cf.br ^[[bb3]](%[[y:.+]] : i64)
 // CHECK:   ^[[bb3]](%[[z:.+]]: i64):
 // CHECK:     "test.bar"(%[[z]])
 // CHECK:     return

diff  --git a/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir b/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir
index 4b8b98768385b..be823defd6789 100644
--- a/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir
+++ b/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir
@@ -113,7 +113,7 @@ func @for_iter_args(%arg0 : index, %arg1: index, %arg2: index) -> f32 {
 // CHECK:             %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_4]] : index
 // CHECK:             %[[VAL_12:.*]] = scf.execute_region -> i32 {
 // CHECK:               %[[VAL_13:.*]] = arith.cmpi slt, %[[VAL_9]], %[[VAL_4]] : index
-// CHECK:               cond_br %[[VAL_13]], ^bb1, ^bb2
+// CHECK:               cf.cond_br %[[VAL_13]], ^bb1, ^bb2
 // CHECK:             ^bb1:
 // CHECK:               %[[VAL_14:.*]] = arith.subi %[[VAL_10]], %[[VAL_0]] : i32
 // CHECK:               scf.yield %[[VAL_14]] : i32
@@ -134,7 +134,7 @@ func @exec_region_multiple_yields(%arg0: i32, %arg1: index, %arg2: i32) -> i32 {
   %0 = scf.for %i = %c0 to %arg1 step %c1 iter_args(%iarg0 = %arg0) -> i32 {
     %2 = scf.execute_region -> i32 {
       %1 = arith.cmpi slt, %i, %c1 : index
-      cond_br %1, ^bb1, ^bb2
+      cf.cond_br %1, ^bb1, ^bb2
     ^bb1:
       %2 = arith.subi %iarg0, %arg0 : i32
       scf.yield %2 : i32

diff  --git a/mlir/test/Dialect/SCF/ops.mlir b/mlir/test/Dialect/SCF/ops.mlir
index 31bb1290bcb66..a3c3391c852ba 100644
--- a/mlir/test/Dialect/SCF/ops.mlir
+++ b/mlir/test/Dialect/SCF/ops.mlir
@@ -298,13 +298,13 @@ func @execute_region() -> i64 {
   }
 
   // CHECK:       scf.execute_region {
-  // CHECK-NEXT:    br ^bb1
+  // CHECK-NEXT:    cf.br ^bb1
   // CHECK-NEXT:  ^bb1:
   // CHECK-NEXT:    scf.yield
   // CHECK-NEXT:  }
   "scf.execute_region"() ({
   ^bb0:
-    br ^bb1
+    cf.br ^bb1
   ^bb1:
     scf.yield
   }) : () -> ()

diff  --git a/mlir/test/Dialect/Standard/canonicalize.mlir b/mlir/test/Dialect/Standard/canonicalize.mlir
index 2a548774fd118..2c3f977b4cf7f 100644
--- a/mlir/test/Dialect/Standard/canonicalize.mlir
+++ b/mlir/test/Dialect/Standard/canonicalize.mlir
@@ -64,28 +64,6 @@ func @select_extui_i1(%arg0: i1) -> i1 {
 
 // -----
 
-// CHECK-LABEL: @branchCondProp
-//       CHECK:       %[[trueval:.+]] = arith.constant true
-//       CHECK:       %[[falseval:.+]] = arith.constant false
-//       CHECK:       "test.consumer1"(%[[trueval]]) : (i1) -> ()
-//       CHECK:       "test.consumer2"(%[[falseval]]) : (i1) -> ()
-func @branchCondProp(%arg0: i1) {
-  cond_br %arg0, ^trueB, ^falseB
-
-^trueB:
-  "test.consumer1"(%arg0) : (i1) -> ()
-  br ^exit
-
-^falseB:
-  "test.consumer2"(%arg0) : (i1) -> ()
-  br ^exit
-
-^exit:
-  return
-}
-
-// -----
-
 // CHECK-LABEL: @selToNot
 //       CHECK:       %[[trueval:.+]] = arith.constant true
 //       CHECK:       %[[res:.+]] = arith.xori %arg0, %[[trueval]] : i1

diff  --git a/mlir/test/Dialect/Standard/func-bufferize.mlir b/mlir/test/Dialect/Standard/func-bufferize.mlir
index 2e37d70730bfe..6895e44c27913 100644
--- a/mlir/test/Dialect/Standard/func-bufferize.mlir
+++ b/mlir/test/Dialect/Standard/func-bufferize.mlir
@@ -9,11 +9,11 @@ func @identity(%arg0: tensor<f32>) -> tensor<f32> {
 
 // CHECK-LABEL:   func @block_arguments(
 // CHECK-SAME:        %[[ARG:.*]]: memref<f32>) -> memref<f32> {
-// CHECK:           br ^bb1(%[[ARG]] : memref<f32>)
+// CHECK:           cf.br ^bb1(%[[ARG]] : memref<f32>)
 // CHECK:         ^bb1(%[[BBARG:.*]]: memref<f32>):
 // CHECK:           return %[[BBARG]] : memref<f32>
 func @block_arguments(%arg0: tensor<f32>) -> tensor<f32> {
-  br ^bb1(%arg0: tensor<f32>)
+  cf.br ^bb1(%arg0: tensor<f32>)
 ^bb1(%bbarg: tensor<f32>):
   return %bbarg : tensor<f32>
 }
@@ -52,7 +52,7 @@ func @unconverted_op_in_body() -> tensor<f32> {
 // update all terminators and issue an error if that is not possible.
 func @unable_to_update_terminator(%arg0: tensor<f32>) -> tensor<f32> {
     %0 = arith.constant true
-    cond_br %0, ^bb1(%arg0: tensor<f32>), ^bb2(%arg0: tensor<f32>)
+    cf.cond_br %0, ^bb1(%arg0: tensor<f32>), ^bb2(%arg0: tensor<f32>)
   ^bb1(%bbarg0: tensor<f32>):
     // expected-error @+1 {{failed to legalize operation 'test.terminator'}}
     "test.terminator"() : () -> ()

diff  --git a/mlir/test/IR/invalid.mlir b/mlir/test/IR/invalid.mlir
index bfd655a5820d5..c510ed69645cf 100644
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@@ -111,7 +111,7 @@ func @tensor_encoding_mismatch(%arg0: tensor<8xi32, "enc">) -> (tensor<8xi32>) {
 
 func @bad_branch() {
 ^bb12:
-  br ^missing  // expected-error {{reference to an undefined block}}
+  cf.br ^missing  // expected-error {{reference to an undefined block}}
 }
 
 // -----
@@ -158,7 +158,7 @@ func @block_arg_no_type() {
 
 func @block_arg_no_close_paren() {
 ^bb42:
-  br ^bb2( // expected-error at +1 {{expected ':'}}
+  cf.br ^bb2( // expected-error at +1 {{expected ':'}}
   return
 }
 
@@ -167,9 +167,9 @@ func @block_arg_no_close_paren() {
 func @block_first_has_predecessor() {
 // expected-error at -1 {{entry block of region may not have predecessors}}
 ^bb42:
-  br ^bb43
+  cf.br ^bb43
 ^bb43:
-  br ^bb42
+  cf.br ^bb42
 }
 
 // -----
@@ -182,7 +182,7 @@ func @no_return() {
 // -----
 
 func @no_terminator() {
-  br ^bb1
+  cf.br ^bb1
 ^bb1:
   %x = arith.constant 0 : i32
   %y = arith.constant 1 : i32  // expected-error {{block with no terminator}}
@@ -368,7 +368,7 @@ func @func_resulterror() -> i32 {
 
 func @argError() {
 ^bb1(%a: i64):  // expected-note {{previously defined here}}
-  br ^bb2
+  cf.br ^bb2
 ^bb2(%a: i64):  // expected-error{{redefinition of SSA value '%a'}}
   return
 }
@@ -379,7 +379,7 @@ func @br_mismatch() {
 ^bb0:
   %0:2 = "foo"() : () -> (i1, i17)
   // expected-error @+1 {{branch has 2 operands for successor #0, but target block has 1}}
-  br ^bb1(%0#1, %0#0 : i17, i1)
+  cf.br ^bb1(%0#1, %0#0 : i17, i1)
 
 ^bb1(%x: i17):
   return
@@ -391,7 +391,7 @@ func @succ_arg_type_mismatch() {
 ^bb0:
   %0 = "getBool"() : () -> i1
   // expected-error @+1 {{type mismatch for bb argument #0 of successor #0}}
-  br ^bb1(%0 : i1)
+  cf.br ^bb1(%0 : i1)
 
 ^bb1(%x: i32):
   return
@@ -409,7 +409,7 @@ func @vectors(vector<1 x vector<1xi32>>, vector<2x4xf32>)
 func @condbr_notbool() {
 ^bb0:
   %a = "foo"() : () -> i32 // expected-note {{prior use here}}
-  cond_br %a, ^bb0, ^bb0 // expected-error {{use of value '%a' expects 
diff erent type than prior uses: 'i1' vs 'i32'}}
+  cf.cond_br %a, ^bb0, ^bb0 // expected-error {{use of value '%a' expects 
diff erent type than prior uses: 'i1' vs 'i32'}}
 }
 
 // -----
@@ -418,7 +418,7 @@ func @condbr_badtype() {
 ^bb0:
   %c = "foo"() : () -> i1
   %a = "foo"() : () -> i32
-  cond_br %c, ^bb0(%a, %a : i32, ^bb0) // expected-error {{expected non-function type}}
+  cf.cond_br %c, ^bb0(%a, %a : i32, ^bb0) // expected-error {{expected non-function type}}
 }
 
 // -----
@@ -427,7 +427,7 @@ func @condbr_a_bb_is_not_a_type() {
 ^bb0:
   %c = "foo"() : () -> i1
   %a = "foo"() : () -> i32
-  cond_br %c, ^bb0(%a, %a : i32, i32), i32 // expected-error {{expected block name}}
+  cf.cond_br %c, ^bb0(%a, %a : i32, i32), i32 // expected-error {{expected block name}}
 }
 
 // -----
@@ -477,7 +477,7 @@ func @name_scope_failure() {
 func @dominance_failure() {
 ^bb0:
   "foo"(%x) : (i32) -> ()    // expected-error {{operand #0 does not dominate this use}}
-  br ^bb1
+  cf.br ^bb1
 ^bb1:
   %x = "bar"() : () -> i32    // expected-note {{operand defined here (op in the same region)}}
   return
@@ -489,7 +489,7 @@ func @dominance_failure() {
 ^bb0:
   "foo"(%x) : (i32) -> ()    // expected-error {{operand #0 does not dominate this use}}
   %x = "bar"() : () -> i32    // expected-note {{operand defined here (op in the same block)}}
-  br ^bb1
+  cf.br ^bb1
 ^bb1:
   return
 }
@@ -508,7 +508,7 @@ func @dominance_failure() {
 
 func @dominance_failure() {  //  expected-note {{operand defined as a block argument (block #1 in the same region)}}
 ^bb0:
-  br ^bb1(%x : i32)    // expected-error {{operand #0 does not dominate this use}}
+  cf.br ^bb1(%x : i32)    // expected-error {{operand #0 does not dominate this use}}
 ^bb1(%x : i32):
   return
 }
@@ -520,7 +520,7 @@ func @dominance_failure() {  //  expected-note {{operand defined as a block argu
   %f = "foo"() ({
     "foo"(%x) : (i32) -> ()    // expected-error {{operand #0 does not dominate this use}}
   }) : () -> (i32)
-  br ^bb1(%f : i32)
+  cf.br ^bb1(%f : i32)
 ^bb1(%x : i32):
   return
 }
@@ -988,7 +988,7 @@ func @invalid_nested_dominance() {
   "test.ssacfg_region"() ({
     // expected-error @+1 {{operand #0 does not dominate this use}}
     "foo.use" (%1) : (i32) -> ()
-    br ^bb2
+    cf.br ^bb2
 
   ^bb2:
     // expected-note @+1 {{operand defined here}}
@@ -1588,7 +1588,7 @@ test.format_symbol_name_attr_op @name { attr = "xx" }
 // -----
 
 func @forward_reference_type_check() -> (i8) {
-  br ^bb2
+  cf.br ^bb2
 
 ^bb1:
   // expected-note @+1 {{previously used here with type 'i8'}}
@@ -1597,7 +1597,7 @@ func @forward_reference_type_check() -> (i8) {
 ^bb2:
   // expected-error @+1 {{definition of SSA value '%1#0' has type 'f32'}}
   %1 = "bar"() : () -> (f32)
-  br ^bb1
+  cf.br ^bb1
 }
 
 // -----
@@ -1610,9 +1610,9 @@ func @dominance_error_in_unreachable_op() -> i1 {
     ^bb1:
 // expected-error @+1 {{operand #0 does not dominate this use}}
       %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
-      br ^bb4
+      cf.br ^bb4
     ^bb2:
-      br ^bb2
+      cf.br ^bb2
     ^bb4:
       %1 = "foo"() : ()->i64   // expected-note {{operand defined here}}
   }) : () -> ()

diff  --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir
index 0c7bdd1e08bae..5e47d37613859 100644
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@@ -185,9 +185,9 @@ func @simpleCFGUsingBBArgs(i32, i64) {
 func @multiblock() {
   return     // CHECK:   return
 ^bb1:         // CHECK: ^bb1:   // no predecessors
-  br ^bb4     // CHECK:   br ^bb3
+  cf.br ^bb4     // CHECK:   cf.br ^bb3
 ^bb2:         // CHECK: ^bb2:   // pred: ^bb2
-  br ^bb2     // CHECK:   br ^bb2
+  cf.br ^bb2     // CHECK:   cf.br ^bb2
 ^bb4:         // CHECK: ^bb3:   // pred: ^bb1
   return     // CHECK:   return
 }            // CHECK: }
@@ -416,7 +416,7 @@ func @attributes() {
 func @ssa_values() -> (i16, i8) {
   // CHECK: %{{.*}}:2 = "foo"() : () -> (i1, i17)
   %0:2 = "foo"() : () -> (i1, i17)
-  br ^bb2
+  cf.br ^bb2
 
 ^bb1:       // CHECK: ^bb1: // pred: ^bb2
   // CHECK: %{{.*}}:2 = "baz"(%{{.*}}#1, %{{.*}}#0, %{{.*}}#1) : (f32, i11, i17) -> (i16, i8)
@@ -428,14 +428,14 @@ func @ssa_values() -> (i16, i8) {
 ^bb2:       // CHECK: ^bb2:  // pred: ^bb0
   // CHECK: %{{.*}}:2 = "bar"(%{{.*}}#0, %{{.*}}#1) : (i1, i17) -> (i11, f32)
   %2:2 = "bar"(%0#0, %0#1) : (i1, i17) -> (i11, f32)
-  br ^bb1
+  cf.br ^bb1
 }
 
 // CHECK-LABEL: func @bbargs() -> (i16, i8) {
 func @bbargs() -> (i16, i8) {
   // CHECK: %{{.*}}:2 = "foo"() : () -> (i1, i17)
   %0:2 = "foo"() : () -> (i1, i17)
-  br ^bb1(%0#1, %0#0 : i17, i1)
+  cf.br ^bb1(%0#1, %0#0 : i17, i1)
 
 ^bb1(%x: i17, %y: i1):       // CHECK: ^bb1(%{{.*}}: i17, %{{.*}}: i1):
   // CHECK: %{{.*}}:2 = "baz"(%{{.*}}, %{{.*}}, %{{.*}}#1) : (i17, i1, i17) -> (i16, i8)
@@ -446,12 +446,12 @@ func @bbargs() -> (i16, i8) {
 // CHECK-LABEL: func @verbose_terminators() -> (i1, i17)
 func @verbose_terminators() -> (i1, i17) {
   %0:2 = "foo"() : () -> (i1, i17)
-// CHECK:  br ^bb1(%{{.*}}#0, %{{.*}}#1 : i1, i17)
-  "std.br"(%0#0, %0#1)[^bb1] : (i1, i17) -> ()
+// CHECK:  cf.br ^bb1(%{{.*}}#0, %{{.*}}#1 : i1, i17)
+  "cf.br"(%0#0, %0#1)[^bb1] : (i1, i17) -> ()
 
 ^bb1(%x : i1, %y : i17):
-// CHECK:  cond_br %{{.*}}, ^bb2(%{{.*}} : i17), ^bb3(%{{.*}}, %{{.*}} : i1, i17)
-  "std.cond_br"(%x, %y, %x, %y) [^bb2, ^bb3] {operand_segment_sizes = dense<[1, 1, 2]>: vector<3xi32>} : (i1, i17, i1, i17) -> ()
+// CHECK:  cf.cond_br %{{.*}}, ^bb2(%{{.*}} : i17), ^bb3(%{{.*}}, %{{.*}} : i1, i17)
+  "cf.cond_br"(%x, %y, %x, %y) [^bb2, ^bb3] {operand_segment_sizes = dense<[1, 1, 2]>: vector<3xi32>} : (i1, i17, i1, i17) -> ()
 
 ^bb2(%a : i17):
   %true = arith.constant true
@@ -468,12 +468,12 @@ func @condbr_simple() -> (i32) {
   %cond = "foo"() : () -> i1
   %a = "bar"() : () -> i32
   %b = "bar"() : () -> i64
-  // CHECK: cond_br %{{.*}}, ^bb1(%{{.*}} : i32), ^bb2(%{{.*}} : i64)
-  cond_br %cond, ^bb1(%a : i32), ^bb2(%b : i64)
+  // CHECK: cf.cond_br %{{.*}}, ^bb1(%{{.*}} : i32), ^bb2(%{{.*}} : i64)
+  cf.cond_br %cond, ^bb1(%a : i32), ^bb2(%b : i64)
 
 // CHECK: ^bb1({{.*}}: i32): // pred: ^bb0
 ^bb1(%x : i32):
-  br ^bb2(%b: i64)
+  cf.br ^bb2(%b: i64)
 
 // CHECK: ^bb2({{.*}}: i64): // 2 preds: ^bb0, ^bb1
 ^bb2(%y : i64):
@@ -486,8 +486,8 @@ func @condbr_moarargs() -> (i32) {
   %cond = "foo"() : () -> i1
   %a = "bar"() : () -> i32
   %b = "bar"() : () -> i64
-  // CHECK: cond_br %{{.*}}, ^bb1(%{{.*}}, %{{.*}} : i32, i64), ^bb2(%{{.*}}, %{{.*}}, %{{.*}} : i64, i32, i32)
-  cond_br %cond, ^bb1(%a, %b : i32, i64), ^bb2(%b, %a, %a : i64, i32, i32)
+  // CHECK: cf.cond_br %{{.*}}, ^bb1(%{{.*}}, %{{.*}} : i32, i64), ^bb2(%{{.*}}, %{{.*}}, %{{.*}} : i64, i32, i32)
+  cf.cond_br %cond, ^bb1(%a, %b : i32, i64), ^bb2(%b, %a, %a : i64, i32, i32)
 
 ^bb1(%x : i32, %y : i64):
   return %x : i32
@@ -1279,15 +1279,14 @@ func @default_dialect(%bool : i1) {
 
     // TODO: remove this after removing the special casing for std in the printer.
     // Verify that operations in the standard dialect keep the `std.` prefix.
-    // CHECK: std.assert
-    assert %bool, "Assertion"
+    // CHECK: cf.assert
+    cf.assert %bool, "Assertion"
     "test.terminator"() : ()->()
   }
   // The same operation outside of the region does not have an std. prefix.
   // CHECK-NOT: std.assert
-  // CHECK: assert
-  assert %bool, "Assertion"
-  return
+  // CHECK: return
+  std.return
 }
 
 // CHECK-LABEL: func @unreachable_dominance_violation_ok
@@ -1296,9 +1295,9 @@ func @unreachable_dominance_violation_ok() -> i1 {
 // CHECK:   return [[VAL]] : i1
 // CHECK: ^bb1:   // no predecessors
 // CHECK:   [[VAL2:%.*]]:3 = "bar"([[VAL3:%.*]]) : (i64) -> (i1, i1, i1)
-// CHECK:   br ^bb3
+// CHECK:   cf.br ^bb3
 // CHECK: ^bb2:   // pred: ^bb2
-// CHECK:   br ^bb2
+// CHECK:   cf.br ^bb2
 // CHECK: ^bb3:   // pred: ^bb1
 // CHECK:   [[VAL3]] = "foo"() : () -> i64
 // CHECK:   return [[VAL2]]#1 : i1
@@ -1308,9 +1307,9 @@ func @unreachable_dominance_violation_ok() -> i1 {
 ^bb1:
   // %1 is not dominated by it's definition, but block is not reachable.
   %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
-  br ^bb3
+  cf.br ^bb3
 ^bb2:
-  br ^bb2
+  cf.br ^bb2
 ^bb3:
   %1 = "foo"() : ()->i64
   return %2#1 : i1
@@ -1318,28 +1317,28 @@ func @unreachable_dominance_violation_ok() -> i1 {
 
 // CHECK-LABEL: func @graph_region_in_hierarchy_ok
 func @graph_region_in_hierarchy_ok() -> i64 {
-// CHECK:   br ^bb2
+// CHECK:   cf.br ^bb2
 // CHECK: ^bb1:
 // CHECK:   test.graph_region {
 // CHECK:     [[VAL2:%.*]]:3 = "bar"([[VAL3:%.*]]) : (i64) -> (i1, i1, i1)
 // CHECK:   }
-// CHECK:   br ^bb3
+// CHECK:   cf.br ^bb3
 // CHECK: ^bb2:   // pred: ^bb0
 // CHECK:   [[VAL3]] = "foo"() : () -> i64
-// CHECK:   br ^bb1
+// CHECK:   cf.br ^bb1
 // CHECK: ^bb3:   // pred: ^bb1
 // CHECK:   return [[VAL3]] : i64
 // CHECK: }
-  br ^bb2
+  cf.br ^bb2
 ^bb1:
   test.graph_region {
     // %1 is well-defined here, since bb2 dominates bb1.
     %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
   }
-  br ^bb4
+  cf.br ^bb4
 ^bb2:
   %1 = "foo"() : ()->i64
-  br ^bb1
+  cf.br ^bb1
 ^bb4:
   return %1 : i64
 }

diff  --git a/mlir/test/IR/region.mlir b/mlir/test/IR/region.mlir
index e3aec976a2fe2..028e0cd7b05ce 100644
--- a/mlir/test/IR/region.mlir
+++ b/mlir/test/IR/region.mlir
@@ -46,13 +46,13 @@ func @unnamed_region_has_wrong_number_of_blocks() {
     "test.sized_region_op"() (
     {
         "work"() : () -> ()
-        br ^next1
+        cf.br ^next1
       ^next1:
         "work"() : () -> ()
     },
     {
         "work"() : () -> ()
-        br ^next2
+        cf.br ^next2
       ^next2:
         "work"() : () -> ()
     }) : () -> ()

diff  --git a/mlir/test/IR/traits.mlir b/mlir/test/IR/traits.mlir
index 4d15d70b21c02..c0fb012975bac 100644
--- a/mlir/test/IR/traits.mlir
+++ b/mlir/test/IR/traits.mlir
@@ -529,9 +529,9 @@ func @illegalCDFGInsideDominanceFreeScope() -> () {
     ^bb1:
       // expected-error @+1 {{operand #0 does not dominate this use}}
       %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
-      br ^bb4
+      cf.br ^bb4
     ^bb2:
-      br ^bb2
+      cf.br ^bb2
     ^bb4:
       %1 = "foo"() : ()->i64   // expected-note {{operand defined here}}
 		return %2#1 : i1
@@ -559,7 +559,7 @@ func @graph_region_cant_have_blocks() {
   test.graph_region {
     // expected-error at -1 {{'test.graph_region' op expects graph region #0 to have 0 or 1 blocks}}
   ^bb42:
-    br ^bb43
+    cf.br ^bb43
   ^bb43:
     "terminator"() : () -> ()
   }

diff  --git a/mlir/test/IR/visitors.mlir b/mlir/test/IR/visitors.mlir
index 9a8332cd06614..23903b898357d 100644
--- a/mlir/test/IR/visitors.mlir
+++ b/mlir/test/IR/visitors.mlir
@@ -116,10 +116,10 @@ func @unstructured_cfg() {
   "regionOp0"() ({
     ^bb0:
       "op0"() : () -> ()
-      br ^bb2
+      cf.br ^bb2
     ^bb1:
       "op1"() : () -> ()
-      br ^bb2
+      cf.br ^bb2
     ^bb2:
       "op2"() : () -> ()
   }) : () -> ()
@@ -131,9 +131,9 @@ func @unstructured_cfg() {
 // CHECK:       Visiting op 'builtin.func'
 // CHECK:       Visiting op 'regionOp0'
 // CHECK:       Visiting op 'op0'
-// CHECK:       Visiting op 'std.br'
+// CHECK:       Visiting op 'cf.br'
 // CHECK:       Visiting op 'op1'
-// CHECK:       Visiting op 'std.br'
+// CHECK:       Visiting op 'cf.br'
 // CHECK:       Visiting op 'op2'
 // CHECK:       Visiting op 'std.return'
 
@@ -151,9 +151,9 @@ func @unstructured_cfg() {
 
 // CHECK-LABEL: Op post-order visits
 // CHECK:       Visiting op 'op0'
-// CHECK:       Visiting op 'std.br'
+// CHECK:       Visiting op 'cf.br'
 // CHECK:       Visiting op 'op1'
-// CHECK:       Visiting op 'std.br'
+// CHECK:       Visiting op 'cf.br'
 // CHECK:       Visiting op 'op2'
 // CHECK:       Visiting op 'regionOp0'
 // CHECK:       Visiting op 'std.return'
@@ -183,9 +183,9 @@ func @unstructured_cfg() {
 
 // CHECK-LABEL: Op post-order erasures (skip)
 // CHECK:       Erasing op 'op0'
-// CHECK:       Erasing op 'std.br'
+// CHECK:       Erasing op 'cf.br'
 // CHECK:       Erasing op 'op1'
-// CHECK:       Erasing op 'std.br'
+// CHECK:       Erasing op 'cf.br'
 // CHECK:       Erasing op 'op2'
 // CHECK:       Erasing op 'regionOp0'
 // CHECK:       Erasing op 'std.return'
@@ -197,9 +197,9 @@ func @unstructured_cfg() {
 
 // CHECK-LABEL: Op post-order erasures (no skip)
 // CHECK:       Erasing op 'op0'
-// CHECK:       Erasing op 'std.br'
+// CHECK:       Erasing op 'cf.br'
 // CHECK:       Erasing op 'op1'
-// CHECK:       Erasing op 'std.br'
+// CHECK:       Erasing op 'cf.br'
 // CHECK:       Erasing op 'op2'
 // CHECK:       Erasing op 'regionOp0'
 // CHECK:       Erasing op 'std.return'

diff  --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
index b6ae33b0e2190..6b215e81d2e49 100644
--- a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
@@ -5,7 +5,7 @@
 // RUN:               -async-runtime-ref-counting                              \
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -arith-expand                                            \
 // RUN:               -memref-expand                                              \
 // RUN:               -convert-vector-to-llvm                                  \
@@ -21,7 +21,7 @@
 
 // RUN:   mlir-opt %s                                                          \
 // RUN:               -convert-linalg-to-loops                                 \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -convert-vector-to-llvm                                  \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \

diff  --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
index 4040e9db07efd..372e626d727b0 100644
--- a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
@@ -5,7 +5,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-linalg-to-loops                                 \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -arith-expand                                            \
 // RUN:               -memref-expand                                              \
 // RUN:               -convert-vector-to-llvm                                  \
@@ -26,7 +26,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-linalg-to-loops                                 \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -arith-expand                                            \
 // RUN:               -memref-expand                                              \
 // RUN:               -convert-vector-to-llvm                                  \
@@ -42,7 +42,7 @@
 
 // RUN:   mlir-opt %s                                                          \
 // RUN:               -convert-linalg-to-loops                                 \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -convert-vector-to-llvm                                  \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \

diff  --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
index 4394aac514864..9c143a25bce61 100644
--- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
@@ -3,7 +3,7 @@
 // RUN:               -async-runtime-ref-counting                              \
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -arith-expand                                            \
 // RUN:               -memref-expand                                              \
@@ -19,7 +19,7 @@
 // RUN:               -async-to-async-runtime                                  \
 // RUN:               -async-runtime-policy-based-ref-counting                 \
 // RUN:               -convert-async-to-llvm                                   \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -arith-expand                                            \
 // RUN:               -memref-expand                                              \
@@ -38,7 +38,7 @@
 // RUN:               -async-runtime-ref-counting                              \
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -arith-expand                                            \
 // RUN:               -memref-expand                                              \
@@ -125,7 +125,7 @@ func @entry() {
 
   scf.parallel (%i) = (%lb1) to (%ub1) step (%c1) {
     %false = arith.constant 0 : i1
-    assert %false, "should never be executed"
+    cf.assert %false, "should never be executed"
   }
 
   memref.dealloc %A : memref<9xf32>

diff  --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
index 444747369dd2a..bb4687f062552 100644
--- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
@@ -4,7 +4,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -arith-expand                                            \
 // RUN:               -convert-async-to-llvm                                   \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN:               -reconcile-unrealized-casts                              \
@@ -19,7 +19,7 @@
 // RUN:               -async-runtime-policy-based-ref-counting                 \
 // RUN:               -arith-expand                                            \
 // RUN:               -convert-async-to-llvm                                   \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN:               -reconcile-unrealized-casts                              \
@@ -37,7 +37,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -arith-expand                                            \
 // RUN:               -convert-async-to-llvm                                   \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN:               -reconcile-unrealized-casts                              \

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
index d7c10cb940fc9..92eb863951789 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
@@ -5,7 +5,7 @@
 // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=memref.copy register-tile-sizes=4,32 vectorize" | \
 
 // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // Activate to dump assembly
 // R_UN:   -dump-object-file -object-filename=/tmp/a.o \

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
index 5b183db5e55f8..11bb57b1a9d4f 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
@@ -56,7 +56,7 @@ func @main() {
       %e1 = memref.load %C1[%i, %j] : memref<?x?xf32>
       %e2 = memref.load %C2[%i, %j] : memref<?x?xf32>
       %c = arith.cmpf oeq, %e1, %e2 : f32
-      assert %c, "Matmul does not produce same output as matvec"
+      cf.assert %c, "Matmul does not produce same output as matvec"
     }
   }
   %C2_ = memref.cast %C2 : memref<?x?xf32> to memref<*xf32>

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir
index 60add3f321688..b15809f875723 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
index d840491b89984..d4437a9675362 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -canonicalize -cse -linalg-comprehensive-module-bufferize |\
 // RUN: mlir-opt -buffer-deallocation -convert-vector-to-scf -lower-affine -convert-linalg-to-loops |\
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext |\

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
index fa50a7d914460..6355d4ea29280 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
@@ -1,10 +1,10 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -linalg-tile="tile-sizes=4" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
index acef95a33ca6c..31761fe8552b8 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
@@ -1,10 +1,10 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
index 1fa9b83700cf4..058376a1437ce 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
@@ -1,10 +1,10 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,2" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,2" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
index 126d69ca13c95..5244dd3753e96 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
@@ -1,10 +1,10 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,3,2" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,3,2" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
index 77418e45a4b4b..f4e0afb4b4dd6 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
@@ -1,10 +1,10 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,2,2" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,2,2" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
index 0458f26239899..8d0f81ddeb765 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
@@ -1,10 +1,10 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,5,5,5" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,5,5,5" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
index 3734e9ce18d4e..32465294fa1e0 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -linalg-bufferize \
 // RUN: -arith-bufferize -tensor-bufferize -func-bufferize \
 // RUN: -finalizing-bufferize -buffer-deallocation \
-// RUN: -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
index bb17ff2f07bda..bfd705364ffb4 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -linalg-bufferize \
 // RUN: -arith-bufferize -tensor-bufferize -func-bufferize \
 // RUN: -finalizing-bufferize -buffer-deallocation \
-// RUN: -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
index fd9778cf3b79f..c4e3a7157a7de 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -linalg-bufferize \
 // RUN: -arith-bufferize -tensor-bufferize -func-bufferize \
 // RUN: -finalizing-bufferize -buffer-deallocation \
-// RUN: -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
index 37023e1622fdd..7a4e3b4fbc4ab 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
@@ -1,7 +1,7 @@
 // UNSUPPORTED: asan
 // RUN: mlir-opt %s -linalg-bufferize -arith-bufferize \
-// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation -convert-linalg-to-loops -convert-scf-to-std \
-// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
@@ -9,8 +9,8 @@
 // RUN: mlir-opt %s  -linalg-tile="tile-sizes=1,2,3" -linalg-bufferize \
 // RUN: -scf-bufferize -arith-bufferize -tensor-bufferize \
 // RUN: -func-bufferize \
-// RUN: -finalizing-bufferize -convert-linalg-to-loops -convert-scf-to-std -convert-scf-to-std \
-// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: -finalizing-bufferize -convert-linalg-to-loops -convert-scf-to-cf -convert-scf-to-cf \
+// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Memref/memref_abi.c b/mlir/test/Integration/Dialect/Memref/memref_abi.c
index 960d4e1526981..4955654170a8b 100644
--- a/mlir/test/Integration/Dialect/Memref/memref_abi.c
+++ b/mlir/test/Integration/Dialect/Memref/memref_abi.c
@@ -3,7 +3,7 @@
 
 // Compile the MLIR file to LLVM:
 // RUN: mlir-opt %t/input.mlir \
-// RUN:  -lower-affine  -convert-scf-to-std  -convert-memref-to-llvm \
+// RUN:  -lower-affine  -convert-scf-to-cf  -convert-memref-to-llvm \
 // RUN:  -convert-std-to-llvm -reconcile-unrealized-casts \
 // RUN: | mlir-translate --mlir-to-llvmir -o %t.ll
 

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
index 90bef5dda1995..a719cd6d9f63f 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
@@ -158,7 +158,7 @@ def compile_and_build_engine(
       f"sparsification,"
       f"sparse-tensor-conversion,"
       f"builtin.func(linalg-bufferize,convert-linalg-to-loops,convert-vector-to-scf),"
-      f"convert-scf-to-std,"
+      f"convert-scf-to-cf,"
       f"func-bufferize,"
       f"arith-bufferize,"
       f"builtin.func(tensor-bufferize,finalizing-bufferize),"

diff  --git a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
index 6c32db1f6fcae..4ecd106423edb 100644
--- a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
+++ b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf \
 // RUN:   -memref-expand -arith-expand -convert-vector-to-llvm \
 // RUN:   -convert-memref-to-llvm -convert-std-to-llvm \
 // RUN:   -reconcile-unrealized-casts | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir
index 422b619145dee..5451e0d26302f 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf \
 // RUN:  -arith-bufferize -convert-vector-to-llvm="enable-amx" \
 // RUN:  -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-translate -mlir-to-llvmir | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir
index c9a80e57e1ea3..92f8d9571e18c 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-translate -mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-ext.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-ext.mlir
index 3ba961f79daba..c5006bbb52081 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-ext.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-ext.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-translate -mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir
index 1517358c0f265..0fe686d8ba395 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf \
 // RUN:  -arith-bufferize -convert-vector-to-llvm="enable-amx" \
 // RUN:  -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-translate -mlir-to-llvmir | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir
index 7bc400b1abb33..01127cab5b6ad 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-translate -mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero-block.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero-block.mlir
index 9bf7942e52e29..7c6533ed002da 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero-block.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero-block.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-translate -mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero.mlir
index 0f2ef07d59026..7b41f2be8b048 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-translate -mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-dot.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-dot.mlir
index ddbc3babaed91..b69a75de009e9 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-dot.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-dot.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm="enable-x86vector" -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm="enable-x86vector" -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-translate --mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="avx" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir
index fa2994c76b71b..866a960e78665 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm  -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' -convert-arith-to-llvm -reconcile-unrealized-casts |\
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm  -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' -convert-arith-to-llvm -reconcile-unrealized-casts |\
 // RUN: mlir-translate --mlir-to-llvmir |\
 // RUN: %lli --entry-function=entry --mattr="avx512f" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext |\
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-mask-compress.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-mask-compress.mlir
index b7a503d12dd19..ac7cb72298bba 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-mask-compress.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-mask-compress.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm="enable-x86vector" -convert-std-to-llvm -reconcile-unrealized-casts  | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm="enable-x86vector" -convert-std-to-llvm -reconcile-unrealized-casts  | \
 // RUN: mlir-translate  --mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="avx512bw" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-sparse-dot-product.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-sparse-dot-product.mlir
index 7cfbbbe56da21..9c78426ea4d9c 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-sparse-dot-product.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-sparse-dot-product.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm="enable-x86vector" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts  | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm="enable-x86vector" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts  | \
 // RUN: mlir-translate  --mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="avx512bw,avx512vp2intersect" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-vp2intersect-i32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-vp2intersect-i32.mlir
index ab39552b48eb8..027aa5f391d78 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-vp2intersect-i32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-vp2intersect-i32.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm="enable-x86vector" -convert-std-to-llvm -reconcile-unrealized-casts  | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm="enable-x86vector" -convert-std-to-llvm -reconcile-unrealized-casts  | \
 // RUN: mlir-translate  --mlir-to-llvmir | \
 // RUN: %lli --entry-function=entry --mattr="avx512bw,avx512vp2intersect" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir
index c7a81018c3a3a..19c099cebf948 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-broadcast.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-broadcast.mlir
index 1cf2254519008..195845532a8d3 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-broadcast.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-broadcast.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
index b27bd9212f781..afc9e655314d0 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-constant-mask.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-constant-mask.mlir
index 3820c5d5ded61..efcec59138814 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-constant-mask.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-constant-mask.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-contraction.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-contraction.mlir
index 4bf8f1b50e6f1..1976cdd5bf20a 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-contraction.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-contraction.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask-v4i1.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask-v4i1.mlir
index 4d89ac8e36245..96bee30c07d7f 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask-v4i1.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask-v4i1.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir
index 5834f14c6d22a..15bd627b3a575 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
index befad58e955dc..44b87cff33bd2 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-extract-strided-slice.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-extract-strided-slice.mlir
index c834b3dd3673f..477f3a149fb8e 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-extract-strided-slice.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-extract-strided-slice.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-col.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-col.mlir
index fbd15524f9094..5d5ca67774410 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-col.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-col.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -lower-matrix-intrinsics -matrix-allow-contract -matrix-default-layout=column-major \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-row.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-row.mlir
index 4791b78f876a0..ad7e9c9779f14 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-row.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-flat-transpose-row.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -lower-matrix-intrinsics -matrix-allow-contract -matrix-default-layout=row-major \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-fma.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-fma.mlir
index 4f55c9d6ab631..3a5c0f2126335 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-fma.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-fma.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
index e41c5a96b2310..4ae08567382b5 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-insert-strided-slice.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-insert-strided-slice.mlir
index 6191443a19aaa..449d55a01e9d0 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-insert-strided-slice.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-insert-strided-slice.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
index 7f574758de88e..6133d6a74b3a0 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
index 7efef146a9547..a9ace32d43a79 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-col.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-col.mlir
index 8e375c86a4cd3..7b433f26d5b8d 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-col.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-col.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -lower-matrix-intrinsics -matrix-allow-contract -matrix-default-layout=column-major \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-row.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-row.mlir
index 4551508a18a62..213e2e6e942c2 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-row.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-matrix-multiply-row.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -lower-matrix-intrinsics -matrix-allow-contract -matrix-default-layout=row-major \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-f32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-f32.mlir
index 901f030692104..ca9d06ea8a05e 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-f32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-f32.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-i64.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-i64.mlir
index b6414b92595ac..807ef2162c343 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-i64.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-outerproduct-i64.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-print-int.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-print-int.mlir
index f9c7edf52897c..84debe4c8eeb1 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-print-int.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-print-int.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32-reassoc.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32-reassoc.mlir
index 158714b6b7475..4e43a073b88e7 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32-reassoc.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32-reassoc.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std \
+// RUN: mlir-opt %s -convert-scf-to-cf \
 // RUN:             -convert-vector-to-llvm='reassociate-fp-reductions' \
 // RUN:             -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32.mlir
index 9c01468a7d437..7cd1a1835ca38 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f32.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64-reassoc.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64-reassoc.mlir
index c2d759e852d33..2a350583059f0 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64-reassoc.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64-reassoc.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std \
+// RUN: mlir-opt %s -convert-scf-to-cf \
 // RUN:             -convert-vector-to-llvm='reassociate-fp-reductions' \
 // RUN:             -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64.mlir
index e43578abd4091..7d6d7509853a3 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-f64.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i32.mlir
index 143f95504c584..23f3dbac69db6 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i32.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i4.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i4.mlir
index 10abaa7a83d3c..28f073b615c6f 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i4.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i4.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i64.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i64.mlir
index 51136c5d91828..9b834565ba0a8 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i64.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-i64.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-si4.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-si4.mlir
index 66fbed253f779..e603befc8ab2e 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-si4.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-si4.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-ui4.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-ui4.mlir
index 4a978e4514197..05da9e027af49 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-ui4.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-reductions-ui4.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir
index f8b670172ce40..b3cd2aed29335 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-vector-scan-lowering -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-vector-scan-lowering -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
index cec36d1d35caf..e013be760f6a8 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-shape-cast.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-shape-cast.mlir
index a51f01ed72df5..58cae1c899971 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-shape-cast.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-shape-cast.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle.mlir
index 40356267230cd..c8a748870aa5f 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-shuffle.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
index 6df9a7f832cfd..cdc21f497683d 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
index ebb441c8b2cbc..d21a7b5c27edb 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
index f1a77923c9eee..a69393245aeab 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
@@ -1,19 +1,19 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
index b2f56525738d6..1d0f63a0c57be 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
@@ -1,19 +1,19 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
index 3553d094324fd..515e72b7dc008 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
@@ -1,19 +1,19 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
index 427e5101000b1..7b4141def89a6 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
index ea6b6d178d7c7..18d08ecb6beea 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir
index b773f1f439127..14b9f7aa4286d 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transpose.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transpose.mlir
index 1d6e61f15eb96..5b660960d227b 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transpose.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transpose.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-vector-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
index 0da3d853635f0..5f9c6d2a643ec 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
@@ -1,11 +1,11 @@
 // RUN: mlir-opt %s -test-vector-to-forloop -convert-vector-to-scf \
-// RUN:   -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN:   -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
 // RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \
-// RUN: -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e main \
+// RUN: -convert-scf-to-cf -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e main \
 // RUN: -entry-point-result=void \
 // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir
index 52d5faf43f3e5..2ceddc1bf28f8 100644
--- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir
+++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s \
 // RUN: -gpu-kernel-outlining \
 // RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \
-// RUN: --convert-scf-to-std -gpu-to-llvm \
+// RUN: --convert-scf-to-cf -gpu-to-llvm \
 // RUN: | mlir-cpu-runner \
 // RUN:   --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
 // RUN:   --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \

diff  --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir
index a25f5d3408a39..b8aaca99e068e 100644
--- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir
+++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s \
 // RUN: -gpu-kernel-outlining \
 // RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \
-// RUN: --convert-scf-to-std -gpu-to-llvm \
+// RUN: --convert-scf-to-cf -gpu-to-llvm \
 // RUN: | mlir-cpu-runner \
 // RUN:   --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
 // RUN:   --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \

diff  --git a/mlir/test/Integration/GPU/CUDA/shuffle.mlir b/mlir/test/Integration/GPU/CUDA/shuffle.mlir
index 9d009abe3ef2d..292102410aafc 100644
--- a/mlir/test/Integration/GPU/CUDA/shuffle.mlir
+++ b/mlir/test/Integration/GPU/CUDA/shuffle.mlir
@@ -24,10 +24,10 @@ func @main() {
     %width = arith.index_cast %block_x : index to i32
     %offset = arith.constant 4 : i32
     %shfl, %valid = gpu.shuffle xor %val, %offset, %width : f32
-    cond_br %valid, ^bb1(%shfl : f32), ^bb0
+    cf.cond_br %valid, ^bb1(%shfl : f32), ^bb0
   ^bb0:
     %m1 = arith.constant -1.0 : f32
-    br ^bb1(%m1 : f32)
+    cf.br ^bb1(%m1 : f32)
   ^bb1(%value : f32):
     memref.store %value, %dst[%tx] : memref<?xf32>
     gpu.terminator

diff  --git a/mlir/test/Integration/GPU/ROCM/vecadd.mlir b/mlir/test/Integration/GPU/ROCM/vecadd.mlir
index 5546035d59bdb..dc079d4af3219 100644
--- a/mlir/test/Integration/GPU/ROCM/vecadd.mlir
+++ b/mlir/test/Integration/GPU/ROCM/vecadd.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s \
-// RUN:   -convert-scf-to-std \
+// RUN:   -convert-scf-to-cf \
 // RUN:   -gpu-kernel-outlining \
 // RUN:   -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \
 // RUN:   -gpu-to-llvm \

diff  --git a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
index db358f43dd9c8..e47bf1715abee 100644
--- a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
+++ b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s \
-// RUN:   -convert-scf-to-std \
+// RUN:   -convert-scf-to-cf \
 // RUN:   -gpu-kernel-outlining \
 // RUN:   -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \
 // RUN:   -gpu-to-llvm \

diff  --git a/mlir/test/Target/Cpp/control_flow.mlir b/mlir/test/Target/Cpp/control_flow.mlir
index d73299e0a9621..a7b10b57a8240 100644
--- a/mlir/test/Target/Cpp/control_flow.mlir
+++ b/mlir/test/Target/Cpp/control_flow.mlir
@@ -4,14 +4,14 @@
 // simple(10, false) -> 30
 func @simple(i64, i1) -> i64 {
 ^bb0(%a: i64, %cond: i1):
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%a: i64)
+  cf.br ^bb3(%a: i64)
 ^bb2:
   %b = emitc.call "add"(%a, %a) : (i64, i64) -> i64
-  br ^bb3(%b: i64)
+  cf.br ^bb3(%b: i64)
 ^bb3(%c: i64):
-  br ^bb4(%c, %a : i64, i64)
+  cf.br ^bb4(%c, %a : i64, i64)
 ^bb4(%d : i64, %e : i64):
   %0 = emitc.call "add"(%d, %e) : (i64, i64) -> i64
   return %0 : i64
@@ -45,7 +45,7 @@ func @simple(i64, i1) -> i64 {
 
 func @block_labels0() {
 ^bb1:
-    br ^bb2
+    cf.br ^bb2
 ^bb2:
     return
 }
@@ -59,7 +59,7 @@ func @block_labels0() {
 // Repeat the same function to make sure the names of the block labels get reset.
 func @block_labels1() {
 ^bb1:
-    br ^bb2
+    cf.br ^bb2
 ^bb2:
     return
 }

diff  --git a/mlir/test/Target/Cpp/invalid.mlir b/mlir/test/Target/Cpp/invalid.mlir
index d6b886840f0d8..2ada598969d5d 100644
--- a/mlir/test/Target/Cpp/invalid.mlir
+++ b/mlir/test/Target/Cpp/invalid.mlir
@@ -3,7 +3,7 @@
 // expected-error at +1 {{'builtin.func' op with multiple blocks needs variables declared at top}}
 func @multiple_blocks() {
 ^bb1:
-    br ^bb2
+    cf.br ^bb2
 ^bb2:
     return
 }

diff  --git a/mlir/test/Transforms/buffer-hoisting.mlir b/mlir/test/Transforms/buffer-hoisting.mlir
index e098ede39aceb..6183f4f09b1d3 100644
--- a/mlir/test/Transforms/buffer-hoisting.mlir
+++ b/mlir/test/Transforms/buffer-hoisting.mlir
@@ -14,20 +14,20 @@
 
 // CHECK-LABEL: func @condBranch
 func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
 // CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 
 // -----
 
@@ -47,19 +47,19 @@ func @condBranchDynamicType(
   %arg1: memref<?xf32>,
   %arg2: memref<?xf32>,
   %arg3: index) {
-  cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
 ^bb1:
-  br ^bb3(%arg1 : memref<?xf32>)
+  cf.br ^bb3(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
   %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
-  br ^bb3(%1 : memref<?xf32>)
+  cf.br ^bb3(%1 : memref<?xf32>)
 ^bb3(%2: memref<?xf32>):
   test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: ^bb2
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
 // CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[IDX]])
@@ -89,27 +89,27 @@ func @condBranchDynamicTypeNested(
   %arg1: memref<?xf32>,
   %arg2: memref<?xf32>,
   %arg3: index) {
-  cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
 ^bb1:
-  br ^bb6(%arg1 : memref<?xf32>)
+  cf.br ^bb6(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
   %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
-  cond_br %arg0, ^bb3, ^bb4
+  cf.cond_br %arg0, ^bb3, ^bb4
 ^bb3:
-  br ^bb5(%1 : memref<?xf32>)
+  cf.br ^bb5(%1 : memref<?xf32>)
 ^bb4:
-  br ^bb5(%1 : memref<?xf32>)
+  cf.br ^bb5(%1 : memref<?xf32>)
 ^bb5(%2: memref<?xf32>):
-  br ^bb6(%2 : memref<?xf32>)
+  cf.br ^bb6(%2 : memref<?xf32>)
 ^bb6(%3: memref<?xf32>):
-  br ^bb7(%3 : memref<?xf32>)
+  cf.br ^bb7(%3 : memref<?xf32>)
 ^bb7(%4: memref<?xf32>):
   test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: ^bb2
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
 // CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[IDX]])
@@ -128,18 +128,18 @@ func @condBranchDynamicTypeNested(
 
 // CHECK-LABEL: func @criticalEdge
 func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^bb2(%0 : memref<2xf32>)
+  cf.br ^bb2(%0 : memref<2xf32>)
 ^bb2(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
 // CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 
 // -----
 
@@ -155,13 +155,13 @@ func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
   %7 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%7: memref<2xf32>)
@@ -171,8 +171,8 @@ func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 
 // CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: br ^bb3
-//      CHECK: br ^bb3
+//      CHECK: cf.br ^bb3
+//      CHECK: cf.br ^bb3
 // CHECK-NEXT: ^bb3
 //      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
@@ -193,13 +193,13 @@ func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
   test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
@@ -225,17 +225,17 @@ func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
+  cf.cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
 ^bb3(%5: memref<2xf32>):
-  br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
 ^bb4(%6: memref<2xf32>):
-  br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
 ^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
   %9 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
@@ -245,9 +245,9 @@ func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 
 // CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: br ^bb5
-//      CHECK: br ^bb5
-//      CHECK: br ^bb5
+//      CHECK: cf.br ^bb5
+//      CHECK: cf.br ^bb5
+//      CHECK: cf.br ^bb5
 // CHECK-NEXT: ^bb5
 //      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
@@ -287,15 +287,15 @@ func @moving_alloc_and_inserting_missing_dealloc(
   %cond: i1,
     %arg0: memref<2xf32>,
     %arg1: memref<2xf32>) {
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 ^bb1:
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^exit(%0 : memref<2xf32>)
+  cf.br ^exit(%0 : memref<2xf32>)
 ^bb2:
   %1 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
-  br ^exit(%1 : memref<2xf32>)
+  cf.br ^exit(%1 : memref<2xf32>)
 ^exit(%arg2: memref<2xf32>):
   test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>)
   return
@@ -303,7 +303,7 @@ func @moving_alloc_and_inserting_missing_dealloc(
 
 // CHECK-NEXT: %{{.*}} = memref.alloc()
 // CHECK-NEXT: %{{.*}} = memref.alloc()
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 
 // -----
 
@@ -322,21 +322,21 @@ func @moving_invalid_dealloc_op_complex(
   %cond: i1,
     %arg0: memref<2xf32>,
     %arg1: memref<2xf32>) {
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 ^bb1:
-  br ^exit(%arg0 : memref<2xf32>)
+  cf.br ^exit(%arg0 : memref<2xf32>)
 ^bb2:
   %1 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
   memref.dealloc %1 : memref<2xf32>
-  br ^exit(%1 : memref<2xf32>)
+  cf.br ^exit(%1 : memref<2xf32>)
 ^exit(%arg2: memref<2xf32>):
   test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
 // CHECK-NEXT: %{{.*}} = memref.alloc()
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 
 // -----
 
@@ -351,9 +351,9 @@ func @nested_regions_and_cond_branch(
   %arg0: i1,
   %arg1: memref<2xf32>,
   %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
@@ -363,13 +363,13 @@ func @nested_regions_and_cond_branch(
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
   }
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 // CHECK-NEXT:   %[[ALLOC0:.*]] = memref.alloc()
-// CHECK-NEXT:   cond_br
+// CHECK-NEXT:   cf.cond_br
 //      CHECK:   test.region_buffer_based
 //      CHECK:     %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT:     test.buffer_based
@@ -556,19 +556,19 @@ func @inner_region_control_flow_div(
 
 // CHECK-LABEL: func @condBranchAlloca
 func @condBranchAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloca() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: ^bb2
 //      CHECK: ^bb2
 // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
@@ -586,17 +586,17 @@ func @ifElseNestedAlloca(
   %arg2: memref<2xf32>) {
   %0 = memref.alloca() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
+  cf.cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
 ^bb3(%5: memref<2xf32>):
-  br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
 ^bb4(%6: memref<2xf32>):
-  br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
 ^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
   %9 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
@@ -623,9 +623,9 @@ func @nestedRegionsAndCondBranchAlloca(
   %arg0: i1,
   %arg1: memref<2xf32>,
   %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
@@ -635,13 +635,13 @@ func @nestedRegionsAndCondBranchAlloca(
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
   }
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 // CHECK-NEXT:   %[[ALLOC:.*]] = memref.alloc()
-// CHECK-NEXT:   cond_br
+// CHECK-NEXT:   cf.cond_br
 //      CHECK:   test.region_buffer_based
 //      CHECK:     %[[ALLOCA:.*]] = memref.alloca()
 // CHECK-NEXT:     test.buffer_based

diff  --git a/mlir/test/Transforms/buffer-loop-hoisting.mlir b/mlir/test/Transforms/buffer-loop-hoisting.mlir
index b4355add46618..1aab75f3a27f3 100644
--- a/mlir/test/Transforms/buffer-loop-hoisting.mlir
+++ b/mlir/test/Transforms/buffer-loop-hoisting.mlir
@@ -13,19 +13,19 @@
 
 // CHECK-LABEL: func @condBranch
 func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: %[[ALLOC:.*]] = memref.alloc()
 
 // -----
@@ -46,19 +46,19 @@ func @condBranchDynamicType(
   %arg1: memref<?xf32>,
   %arg2: memref<?xf32>,
   %arg3: index) {
-  cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
 ^bb1:
-  br ^bb3(%arg1 : memref<?xf32>)
+  cf.br ^bb3(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
   %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
-  br ^bb3(%1 : memref<?xf32>)
+  cf.br ^bb3(%1 : memref<?xf32>)
 ^bb3(%2: memref<?xf32>):
   test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: ^bb2
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
 // CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[IDX]])
@@ -77,9 +77,9 @@ func @nested_regions_and_cond_branch(
   %arg0: i1,
   %arg1: memref<2xf32>,
   %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
@@ -89,12 +89,12 @@ func @nested_regions_and_cond_branch(
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
   }
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
-// CHECK-NEXT:   cond_br
+// CHECK-NEXT:   cf.cond_br
 //      CHECK:   %[[ALLOC0:.*]] = memref.alloc()
 //      CHECK:   test.region_buffer_based
 //      CHECK:     %[[ALLOC1:.*]] = memref.alloc()

diff  --git a/mlir/test/Transforms/canonicalize-block-merge.mlir b/mlir/test/Transforms/canonicalize-block-merge.mlir
index acaf13957bafd..fdf300a0b1389 100644
--- a/mlir/test/Transforms/canonicalize-block-merge.mlir
+++ b/mlir/test/Transforms/canonicalize-block-merge.mlir
@@ -58,7 +58,7 @@ func @mismatch_operands(%cond : i1, %arg0 : i32, %arg1 : i32) -> i32 {
   // CHECK: %[[RES:.*]] = arith.select %[[COND]], %[[ARG0]], %[[ARG1]]
   // CHECK: return %[[RES]]
 
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
   return %arg0 : i32
@@ -75,7 +75,7 @@ func @mismatch_operands_matching_arguments(%cond : i1, %arg0 : i32, %arg1 : i32)
   // CHECK: %[[RES1:.*]] = arith.select %[[COND]], %[[ARG0]], %[[ARG1]]
   // CHECK: return %[[RES1]], %[[RES0]]
 
-  cond_br %cond, ^bb1(%arg1 : i32), ^bb2(%arg0 : i32)
+  cf.cond_br %cond, ^bb1(%arg1 : i32), ^bb2(%arg0 : i32)
 
 ^bb1(%arg2 : i32):
   return %arg0, %arg2 : i32, i32
@@ -87,9 +87,9 @@ func @mismatch_operands_matching_arguments(%cond : i1, %arg0 : i32, %arg1 : i32)
 
 // CHECK-LABEL: func @mismatch_argument_uses(
 func @mismatch_argument_uses(%cond : i1, %arg0 : i32, %arg1 : i32) -> (i32, i32) {
-  // CHECK: cond_br %{{.*}}, ^bb1(%{{.*}}), ^bb2
+  // CHECK: cf.cond_br %{{.*}}, ^bb1(%{{.*}}), ^bb2
 
-  cond_br %cond, ^bb1(%arg1 : i32), ^bb2(%arg0 : i32)
+  cf.cond_br %cond, ^bb1(%arg1 : i32), ^bb2(%arg0 : i32)
 
 ^bb1(%arg2 : i32):
   return %arg0, %arg2 : i32, i32
@@ -101,9 +101,9 @@ func @mismatch_argument_uses(%cond : i1, %arg0 : i32, %arg1 : i32) -> (i32, i32)
 
 // CHECK-LABEL: func @mismatch_argument_types(
 func @mismatch_argument_types(%cond : i1, %arg0 : i32, %arg1 : i16) {
-  // CHECK: cond_br %{{.*}}, ^bb1(%{{.*}}), ^bb2
+  // CHECK: cf.cond_br %{{.*}}, ^bb1(%{{.*}}), ^bb2
 
-  cond_br %cond, ^bb1(%arg0 : i32), ^bb2(%arg1 : i16)
+  cf.cond_br %cond, ^bb1(%arg0 : i32), ^bb2(%arg1 : i16)
 
 ^bb1(%arg2 : i32):
   "foo.return"(%arg2) : (i32) -> ()
@@ -115,9 +115,9 @@ func @mismatch_argument_types(%cond : i1, %arg0 : i32, %arg1 : i16) {
 
 // CHECK-LABEL: func @mismatch_argument_count(
 func @mismatch_argument_count(%cond : i1, %arg0 : i32) {
-  // CHECK: cond_br %{{.*}}, ^bb1(%{{.*}}), ^bb2
+  // CHECK: cf.cond_br %{{.*}}, ^bb1(%{{.*}}), ^bb2
 
-  cond_br %cond, ^bb1(%arg0 : i32), ^bb2
+  cf.cond_br %cond, ^bb1(%arg0 : i32), ^bb2
 
 ^bb1(%arg2 : i32):
   "foo.return"(%arg2) : (i32) -> ()
@@ -129,9 +129,9 @@ func @mismatch_argument_count(%cond : i1, %arg0 : i32) {
 
 // CHECK-LABEL: func @mismatch_operations(
 func @mismatch_operations(%cond : i1) {
-  // CHECK: cond_br %{{.*}}, ^bb1, ^bb2
+  // CHECK: cf.cond_br %{{.*}}, ^bb1, ^bb2
 
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
   "foo.return"() : () -> ()
@@ -143,9 +143,9 @@ func @mismatch_operations(%cond : i1) {
 
 // CHECK-LABEL: func @mismatch_operation_count(
 func @mismatch_operation_count(%cond : i1) {
-  // CHECK: cond_br %{{.*}}, ^bb1, ^bb2
+  // CHECK: cf.cond_br %{{.*}}, ^bb1, ^bb2
 
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
   "foo.op"() : () -> ()
@@ -158,9 +158,9 @@ func @mismatch_operation_count(%cond : i1) {
 
 // CHECK-LABEL: func @contains_regions(
 func @contains_regions(%cond : i1) {
-  // CHECK: cond_br %{{.*}}, ^bb1, ^bb2
+  // CHECK: cf.cond_br %{{.*}}, ^bb1, ^bb2
 
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
   scf.if %cond {
@@ -180,19 +180,19 @@ func @contains_regions(%cond : i1) {
 // CHECK-SAME: %[[ARG:.*]]: i1, %[[ARG2:.*]]: i1
 func @mismatch_loop(%cond : i1, %cond2 : i1) {
   // CHECK-NEXT: %[[LOOP_CARRY:.*]] = "foo.op"
-  // CHECK: cond_br %{{.*}}, ^bb1(%[[ARG2]] : i1), ^bb2
+  // CHECK: cf.cond_br %{{.*}}, ^bb1(%[[ARG2]] : i1), ^bb2
 
   %cond3 = "foo.op"() : () -> (i1)
-  cond_br %cond, ^bb2, ^bb3
+  cf.cond_br %cond, ^bb2, ^bb3
 
 ^bb1:
   // CHECK: ^bb1(%[[ARG3:.*]]: i1):
-  // CHECK-NEXT: cond_br %[[ARG3]], ^bb1(%[[LOOP_CARRY]] : i1), ^bb2
+  // CHECK-NEXT: cf.cond_br %[[ARG3]], ^bb1(%[[LOOP_CARRY]] : i1), ^bb2
 
-  cond_br %cond3, ^bb1, ^bb3
+  cf.cond_br %cond3, ^bb1, ^bb3
 
 ^bb2:
-  cond_br %cond2, ^bb1, ^bb3
+  cf.cond_br %cond2, ^bb1, ^bb3
 
 ^bb3:
   // CHECK: ^bb2:
@@ -207,20 +207,20 @@ func @mismatch_loop(%cond : i1, %cond2 : i1) {
 func @mismatch_operand_types(%arg0 : i1, %arg1 : memref<i32>, %arg2 : memref<i1>) {
   %c0_i32 = arith.constant 0 : i32
   %true = arith.constant true
-  br ^bb1
+  cf.br ^bb1
 
 ^bb1:
-  cond_br %arg0, ^bb2, ^bb3
+  cf.cond_br %arg0, ^bb2, ^bb3
 
 ^bb2:
   // CHECK: memref.store %{{.*}}, %{{.*}} : memref<i32>
   memref.store %c0_i32, %arg1[] : memref<i32>
-  br ^bb1
+  cf.br ^bb1
 
 ^bb3:
   // CHECK: memref.store %{{.*}}, %{{.*}} : memref<i1>
   memref.store %true, %arg2[] : memref<i1>
-  br ^bb1
+  cf.br ^bb1
 }
 
 // Check that it is illegal to merge blocks containing an operand
@@ -232,21 +232,21 @@ func private @print(%arg0: i32, %arg1: i32)
 func @nomerge(%arg0: i32, %i: i32) {
   %c1_i32 = arith.constant 1 : i32
   %icmp = arith.cmpi slt, %i, %arg0 : i32
-  cond_br %icmp, ^bb2, ^bb3
+  cf.cond_br %icmp, ^bb2, ^bb3
 
 ^bb2:  // pred: ^bb1
   %ip1 = arith.addi %i, %c1_i32 : i32
-  br ^bb4(%ip1 : i32)
+  cf.br ^bb4(%ip1 : i32)
 
 ^bb7:  // pred: ^bb5
   %jp1 = arith.addi %j, %c1_i32 : i32
-  br ^bb4(%jp1 : i32)
+  cf.br ^bb4(%jp1 : i32)
 
 ^bb4(%j: i32):  // 2 preds: ^bb2, ^bb7
   %jcmp = arith.cmpi slt, %j, %arg0 : i32
 // CHECK-NOT:  call @print(%[[arg1:.+]], %[[arg1]])
   call @print(%j, %ip1) : (i32, i32) -> ()
-  cond_br %jcmp, ^bb7, ^bb3
+  cf.cond_br %jcmp, ^bb7, ^bb3
 
 ^bb3:  // pred: ^bb1
   return

diff  --git a/mlir/test/Transforms/canonicalize-dce.mlir b/mlir/test/Transforms/canonicalize-dce.mlir
index 0aa684093072c..9850fdfc781b8 100644
--- a/mlir/test/Transforms/canonicalize-dce.mlir
+++ b/mlir/test/Transforms/canonicalize-dce.mlir
@@ -30,15 +30,15 @@ func @f(%arg0: f32) {
 // Test case: Deleting recursively dead block arguments.
 
 // CHECK:      func @f(%arg0: f32)
-// CHECK-NEXT:   br ^bb1
+// CHECK-NEXT:   cf.br ^bb1
 // CHECK-NEXT: ^bb1:
-// CHECK-NEXT:   br ^bb1
+// CHECK-NEXT:   cf.br ^bb1
 
 
 func @f(%arg0: f32) {
-  br ^loop(%arg0: f32)
+  cf.br ^loop(%arg0: f32)
 ^loop(%loop: f32):
-  br ^loop(%loop: f32)
+  cf.br ^loop(%loop: f32)
 }
 
 // -----
@@ -46,27 +46,27 @@ func @f(%arg0: f32) {
 // Test case: Deleting recursively dead block arguments with pure ops in between.
 
 // CHECK:      func @f(%arg0: f32)
-// CHECK-NEXT:   br ^bb1
+// CHECK-NEXT:   cf.br ^bb1
 // CHECK-NEXT: ^bb1:
-// CHECK-NEXT:   br ^bb1
+// CHECK-NEXT:   cf.br ^bb1
 
 func @f(%arg0: f32) {
-  br ^loop(%arg0: f32)
+  cf.br ^loop(%arg0: f32)
 ^loop(%0: f32):
   %1 = "math.exp"(%0) : (f32) -> f32
-  br ^loop(%1: f32)
+  cf.br ^loop(%1: f32)
 }
 
 // -----
 
-// Test case: Delete block arguments for cond_br.
+// Test case: Delete block arguments for cf.cond_br.
 
 // CHECK:      func @f(%arg0: f32, %arg1: i1)
 // CHECK-NEXT:   return
 
 func @f(%arg0: f32, %pred: i1) {
   %exp = "math.exp"(%arg0) : (f32) -> f32
-  cond_br %pred, ^true(%exp: f32), ^false(%exp: f32)
+  cf.cond_br %pred, ^true(%exp: f32), ^false(%exp: f32)
 ^true(%0: f32):
   return
 ^false(%1: f32):

diff  --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index d32ee1a16d941..37abd219b013d 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -389,7 +389,7 @@ func @dead_dealloc_fold() {
 func @dead_dealloc_fold_multi_use(%cond : i1) {
   // CHECK-NEXT: return
   %a = memref.alloc() : memref<4xf32>
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
   memref.dealloc %a: memref<4xf32>

diff  --git a/mlir/test/Transforms/control-flow-sink.mlir b/mlir/test/Transforms/control-flow-sink.mlir
index ba895ebc493e5..1327e01ab1b19 100644
--- a/mlir/test/Transforms/control-flow-sink.mlir
+++ b/mlir/test/Transforms/control-flow-sink.mlir
@@ -117,7 +117,7 @@ func @test_subgraph_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
 // CHECK-NEXT: %[[V1:.*]] = "test.any_cond"() ({
 // CHECK-NEXT:   %[[V3:.*]] = arith.addi %[[V0]], %[[ARG2]]
 // CHECK-NEXT:   %[[V4:.*]] = arith.addi %[[V3]], %[[ARG1]]
-// CHECK-NEXT:   br ^bb1(%[[V4]] : i32)
+// CHECK-NEXT:   cf.br ^bb1(%[[V4]] : i32)
 // CHECK-NEXT: ^bb1(%[[V5:.*]]: i32):
 // CHECK-NEXT:   %[[V6:.*]] = arith.addi %[[V5]], %[[V4]]
 // CHECK-NEXT:   "test.yield"(%[[V6]])
@@ -129,7 +129,7 @@ func @test_multiblock_region_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
   %1 = arith.addi %0, %arg2 : i32
   %2 = arith.addi %1, %arg1 : i32
   %3 = "test.any_cond"() ({
-    br ^bb1(%2 : i32)
+    cf.br ^bb1(%2 : i32)
   ^bb1(%5: i32):
     %6 = arith.addi %5, %2 : i32
     "test.yield"(%6) : (i32) -> ()
@@ -184,7 +184,7 @@ func @test_nested_region_sink(%arg0: i1, %arg1: i32) -> i32 {
 // CHECK-SAME:  (%[[ARG0:.*]]: i32) -> i32 {
 // CHECK-NEXT: %[[V0:.*]] = "test.any_cond"() ({
 // CHECK-NEXT:   %[[V1:.*]] = arith.addi %[[ARG0]], %[[ARG0]]
-// CHECK-NEXT:   br ^bb1
+// CHECK-NEXT:   cf.br ^bb1
 // CHECK-NEXT: ^bb1:
 // CHECK-NEXT:   "test.yield"(%[[V1]]) : (i32) -> ()
 // CHECK-NEXT: })
@@ -192,7 +192,7 @@ func @test_nested_region_sink(%arg0: i1, %arg1: i32) -> i32 {
 func @test_not_sunk_deeply(%arg0: i32) -> i32 {
   %0 = arith.addi %arg0, %arg0 : i32
   %1 = "test.any_cond"() ({
-    br ^bb1
+    cf.br ^bb1
   ^bb1:
     "test.yield"(%0) : (i32) -> ()
   }) : () -> i32

diff  --git a/mlir/test/Transforms/cse.mlir b/mlir/test/Transforms/cse.mlir
index a1211b943f4fa..eae8595f8af8b 100644
--- a/mlir/test/Transforms/cse.mlir
+++ b/mlir/test/Transforms/cse.mlir
@@ -130,13 +130,13 @@ func @down_propagate() -> i32 {
   // CHECK-NEXT: %true = arith.constant true
   %cond = arith.constant true
 
-  // CHECK-NEXT: cond_br %true, ^bb1, ^bb2(%c1_i32 : i32)
-  cond_br %cond, ^bb1, ^bb2(%0 : i32)
+  // CHECK-NEXT: cf.cond_br %true, ^bb1, ^bb2(%c1_i32 : i32)
+  cf.cond_br %cond, ^bb1, ^bb2(%0 : i32)
 
 ^bb1: // CHECK: ^bb1:
-  // CHECK-NEXT: br ^bb2(%c1_i32 : i32)
+  // CHECK-NEXT: cf.br ^bb2(%c1_i32 : i32)
   %1 = arith.constant 1 : i32
-  br ^bb2(%1 : i32)
+  cf.br ^bb2(%1 : i32)
 
 ^bb2(%arg : i32):
   return %arg : i32
@@ -167,15 +167,15 @@ func @up_propagate() -> i32 {
   // CHECK-NEXT: %true = arith.constant true
   %cond = arith.constant true
 
-  // CHECK-NEXT: cond_br %true, ^bb1, ^bb2(%c0_i32 : i32)
-  cond_br %cond, ^bb1, ^bb2(%0 : i32)
+  // CHECK-NEXT: cf.cond_br %true, ^bb1, ^bb2(%c0_i32 : i32)
+  cf.cond_br %cond, ^bb1, ^bb2(%0 : i32)
 
 ^bb1: // CHECK: ^bb1:
   // CHECK-NEXT: %c1_i32 = arith.constant 1 : i32
   %1 = arith.constant 1 : i32
 
-  // CHECK-NEXT: br ^bb2(%c1_i32 : i32)
-  br ^bb2(%1 : i32)
+  // CHECK-NEXT: cf.br ^bb2(%c1_i32 : i32)
+  cf.br ^bb2(%1 : i32)
 
 ^bb2(%arg : i32): // CHECK: ^bb2
   // CHECK-NEXT: %c1_i32_0 = arith.constant 1 : i32
@@ -196,18 +196,18 @@ func @up_propagate_region() -> i32 {
   %0 = "foo.region"() ({
     // CHECK-NEXT:  %c0_i32 = arith.constant 0 : i32
     // CHECK-NEXT: %true = arith.constant true
-    // CHECK-NEXT: cond_br
+    // CHECK-NEXT: cf.cond_br
 
     %1 = arith.constant 0 : i32
     %true = arith.constant true
-    cond_br %true, ^bb1, ^bb2(%1 : i32)
+    cf.cond_br %true, ^bb1, ^bb2(%1 : i32)
 
   ^bb1: // CHECK: ^bb1:
     // CHECK-NEXT: %c1_i32 = arith.constant 1 : i32
-    // CHECK-NEXT: br
+    // CHECK-NEXT: cf.br
 
     %c1_i32 = arith.constant 1 : i32
-    br ^bb2(%c1_i32 : i32)
+    cf.br ^bb2(%c1_i32 : i32)
 
   ^bb2(%arg : i32): // CHECK: ^bb2(%1: i32):
     // CHECK-NEXT: %c1_i32_0 = arith.constant 1 : i32

diff  --git a/mlir/test/Transforms/inlining.mlir b/mlir/test/Transforms/inlining.mlir
index 4c894eb0a9e80..4d762edd58441 100644
--- a/mlir/test/Transforms/inlining.mlir
+++ b/mlir/test/Transforms/inlining.mlir
@@ -20,7 +20,7 @@ func @inline_with_arg(%arg0 : i32) -> i32 {
 
 // Inline a function that has multiple return operations.
 func @func_with_multi_return(%a : i1) -> (i32) {
-  cond_br %a, ^bb1, ^bb2
+  cf.cond_br %a, ^bb1, ^bb2
 
 ^bb1:
   %const_0 = arith.constant 0 : i32
@@ -34,13 +34,13 @@ func @func_with_multi_return(%a : i1) -> (i32) {
 // CHECK-LABEL: func @inline_with_multi_return() -> i32
 func @inline_with_multi_return() -> i32 {
 // CHECK-NEXT:    [[VAL_7:%.*]] = arith.constant false
-// CHECK-NEXT:    cond_br [[VAL_7]], ^bb1, ^bb2
+// CHECK-NEXT:    cf.cond_br [[VAL_7]], ^bb1, ^bb2
 // CHECK:       ^bb1:
 // CHECK-NEXT:    [[VAL_8:%.*]] = arith.constant 0 : i32
-// CHECK-NEXT:    br ^bb3([[VAL_8]] : i32)
+// CHECK-NEXT:    cf.br ^bb3([[VAL_8]] : i32)
 // CHECK:       ^bb2:
 // CHECK-NEXT:    [[VAL_9:%.*]] = arith.constant 55 : i32
-// CHECK-NEXT:    br ^bb3([[VAL_9]] : i32)
+// CHECK-NEXT:    cf.br ^bb3([[VAL_9]] : i32)
 // CHECK:       ^bb3([[VAL_10:%.*]]: i32):
 // CHECK-NEXT:    return [[VAL_10]] : i32
 
@@ -133,7 +133,7 @@ func @inline_convert_call() -> i16 {
 }
 
 func @convert_callee_fn_multiblock() -> i32 {
-  br ^bb0
+  cf.br ^bb0
 ^bb0:
   %0 = arith.constant 0 : i32
   return %0 : i32
@@ -141,10 +141,10 @@ func @convert_callee_fn_multiblock() -> i32 {
 
 // CHECK-LABEL: func @inline_convert_result_multiblock
 func @inline_convert_result_multiblock() -> i16 {
-// CHECK:   br ^bb1 {inlined_conversion}
+// CHECK:   cf.br ^bb1 {inlined_conversion}
 // CHECK: ^bb1:
 // CHECK:   %[[C:.+]] = arith.constant {inlined_conversion} 0 : i32
-// CHECK:   br ^bb2(%[[C]] : i32)
+// CHECK:   cf.br ^bb2(%[[C]] : i32)
 // CHECK: ^bb2(%[[BBARG:.+]]: i32):
 // CHECK:   %[[CAST_RESULT:.+]] = "test.cast"(%[[BBARG]]) : (i32) -> i16
 // CHECK:   return %[[CAST_RESULT]] : i16
@@ -206,14 +206,14 @@ func @inline_gpu_ops() -> memref<1024xf32> {
 // Test block arguments location propagation.
 // Use two call-sites to force cloning.
 func @func_with_block_args_location(%arg0 : i32) {
-  br ^bb1(%arg0 : i32)
+  cf.br ^bb1(%arg0 : i32)
 ^bb1(%x : i32 loc("foo")):
   "test.foo" (%x) : (i32) -> () loc("bar")
   return
 }
 
 // INLINE-LOC-LABEL: func @func_with_block_args_location_callee1
-// INLINE-LOC: br
+// INLINE-LOC: cf.br
 // INLINE-LOC: ^bb{{[0-9]+}}(%{{.*}}: i32 loc("foo")
 func @func_with_block_args_location_callee1(%arg0 : i32) {
   call @func_with_block_args_location(%arg0) : (i32) -> ()

diff  --git a/mlir/test/Transforms/normalize-memrefs.mlir b/mlir/test/Transforms/normalize-memrefs.mlir
index 9bc1353318668..95c867465aa30 100644
--- a/mlir/test/Transforms/normalize-memrefs.mlir
+++ b/mlir/test/Transforms/normalize-memrefs.mlir
@@ -206,7 +206,7 @@ func @ret_multiple_argument_type(%A: memref<16xf64, #tile>, %B: f64, %C: memref<
   %a = affine.load %A[0] : memref<16xf64, #tile>
   %p = arith.mulf %a, %a : f64
   %cond = arith.constant 1 : i1
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
   ^bb1:
     %res1, %res2 = call @ret_single_argument_type(%C) : (memref<8xf64, #tile>) -> (memref<16xf64, #tile>, memref<8xf64, #tile>)
     return %res2, %p: memref<8xf64, #tile>, f64
@@ -217,7 +217,7 @@ func @ret_multiple_argument_type(%A: memref<16xf64, #tile>, %B: f64, %C: memref<
 // CHECK:   %[[a:[0-9]+]] = affine.load %[[A]][0, 0] : memref<4x4xf64>
 // CHECK:   %[[p:[0-9]+]] = arith.mulf %[[a]], %[[a]] : f64
 // CHECK:   %true = arith.constant true
-// CHECK:   cond_br %true, ^bb1, ^bb2
+// CHECK:   cf.cond_br %true, ^bb1, ^bb2
 // CHECK: ^bb1:  // pred: ^bb0
 // CHECK:   %[[res:[0-9]+]]:2 = call @ret_single_argument_type(%[[C]]) : (memref<2x4xf64>) -> (memref<4x4xf64>, memref<2x4xf64>)
 // CHECK:   return %[[res]]#1, %[[p]] : memref<2x4xf64>, f64

diff  --git a/mlir/test/Transforms/promote-buffers-to-stack.mlir b/mlir/test/Transforms/promote-buffers-to-stack.mlir
index 5dfe9ccf8fad7..31abdbfb794f0 100644
--- a/mlir/test/Transforms/promote-buffers-to-stack.mlir
+++ b/mlir/test/Transforms/promote-buffers-to-stack.mlir
@@ -16,19 +16,19 @@
 
 // CHECK-LABEL: func @condBranch
 func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br {{.*}}
+// CHECK-NEXT: cf.cond_br {{.*}}
 //      CHECK: ^bb2
 // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
 //      CHECK: test.copy
@@ -51,24 +51,24 @@ func @condBranchDynamicType(
   %arg1: memref<?xf32>,
   %arg2: memref<?xf32>,
   %arg3: index) {
-  cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
 ^bb1:
-  br ^bb3(%arg1 : memref<?xf32>)
+  cf.br ^bb3(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
   %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
-  br ^bb3(%1 : memref<?xf32>)
+  cf.br ^bb3(%1 : memref<?xf32>)
 ^bb3(%2: memref<?xf32>):
   test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br
+// CHECK-NEXT: cf.cond_br
 //      CHECK: ^bb2
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
 // CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[IDX]])
 // CHECK-NEXT: test.buffer_based
-//      CHECK: br ^bb3
+//      CHECK: cf.br ^bb3
 // CHECK-NEXT: ^bb3(%[[ALLOC0:.*]]:{{.*}})
 //      CHECK: test.copy(%[[ALLOC0]],
 // CHECK-NEXT: return
@@ -135,17 +135,17 @@ func @emptyUsesValue(%arg0: memref<4xf32>) {
 
 // CHECK-LABEL: func @criticalEdge
 func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^bb2(%0 : memref<2xf32>)
+  cf.br ^bb2(%0 : memref<2xf32>)
 ^bb2(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br {{.*}}
+// CHECK-NEXT: cf.cond_br {{.*}}
 //      CHECK: ^bb1
 // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
 //      CHECK: test.copy
@@ -165,16 +165,16 @@ func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
+  cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
-  br ^bb2(%0 : memref<2xf32>)
+  cf.br ^bb2(%0 : memref<2xf32>)
 ^bb2(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
 // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
-//      CHECK: cond_br
+//      CHECK: cf.cond_br
 //      CHECK: test.copy
 // CHECK-NEXT: return
 
@@ -192,13 +192,13 @@ func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
   %7 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>)
@@ -227,13 +227,13 @@ func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
   test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
@@ -260,17 +260,17 @@ func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
-  cond_br %arg0,
+  cf.cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
     ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
-  br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
-  cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
+  cf.cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
 ^bb3(%5: memref<2xf32>):
-  br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
 ^bb4(%6: memref<2xf32>):
-  br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
+  cf.br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
 ^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
   %9 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
@@ -323,21 +323,21 @@ func @moving_alloc_and_inserting_missing_dealloc(
   %cond: i1,
     %arg0: memref<2xf32>,
     %arg1: memref<2xf32>) {
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 ^bb1:
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
-  br ^exit(%0 : memref<2xf32>)
+  cf.br ^exit(%0 : memref<2xf32>)
 ^bb2:
   %1 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
-  br ^exit(%1 : memref<2xf32>)
+  cf.br ^exit(%1 : memref<2xf32>)
 ^exit(%arg2: memref<2xf32>):
   test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: cond_br {{.*}}
+// CHECK-NEXT: cf.cond_br {{.*}}
 //      CHECK: ^bb1
 // CHECK-NEXT: %{{.*}} = memref.alloca()
 //      CHECK: ^bb2
@@ -357,9 +357,9 @@ func @nested_regions_and_cond_branch(
   %arg0: i1,
   %arg1: memref<2xf32>,
   %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
@@ -369,13 +369,13 @@ func @nested_regions_and_cond_branch(
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
   }
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT:   cond_br {{.*}}
+// CHECK-NEXT:   cf.cond_br {{.*}}
 //      CHECK:   ^bb2
 // CHECK-NEXT:   %[[ALLOCA0:.*]] = memref.alloca()
 //      CHECK:   ^bb0

diff  --git a/mlir/test/Transforms/sccp-callgraph.mlir b/mlir/test/Transforms/sccp-callgraph.mlir
index 7bec5dcb5d8a3..6df1a2e044a39 100644
--- a/mlir/test/Transforms/sccp-callgraph.mlir
+++ b/mlir/test/Transforms/sccp-callgraph.mlir
@@ -185,11 +185,11 @@ func @conflicting_constant(%arg0 : i32) -> (i32, i32) {
 func private @complex_inner_if(%arg0 : i32) -> i32 {
   // CHECK-DAG: %[[TRUE:.*]] = arith.constant true
   // CHECK-DAG: %[[CST:.*]] = arith.constant 1 : i32
-  // CHECK: cond_br %[[TRUE]], ^bb1
+  // CHECK: cf.cond_br %[[TRUE]], ^bb1
 
   %cst_20 = arith.constant 20 : i32
   %cond = arith.cmpi ult, %arg0, %cst_20 : i32
-  cond_br %cond, ^bb1, ^bb2
+  cf.cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
   // CHECK: ^bb1:
@@ -211,7 +211,7 @@ func private @complex_callee(%arg0 : i32) -> i32 {
   // CHECK: %[[CST:.*]] = arith.constant 1 : i32
 
   %loop_cond = call @complex_cond() : () -> i1
-  cond_br %loop_cond, ^bb1, ^bb2
+  cf.cond_br %loop_cond, ^bb1, ^bb2
 
 ^bb1:
   // CHECK: ^bb1:

diff  --git a/mlir/test/Transforms/sccp.mlir b/mlir/test/Transforms/sccp.mlir
index 3ff1b526fc525..73e321fce432f 100644
--- a/mlir/test/Transforms/sccp.mlir
+++ b/mlir/test/Transforms/sccp.mlir
@@ -22,10 +22,10 @@ func @simple_control_flow(%arg0 : i32) -> i32 {
 
   %cond = arith.constant true
   %1 = arith.constant 1 : i32
-  cond_br %cond, ^bb1, ^bb2(%arg0 : i32)
+  cf.cond_br %cond, ^bb1, ^bb2(%arg0 : i32)
 
 ^bb1:
-  br ^bb2(%1 : i32)
+  cf.br ^bb2(%1 : i32)
 
 ^bb2(%arg : i32):
   // CHECK: ^bb2(%{{.*}}: i32):
@@ -40,10 +40,10 @@ func @simple_control_flow(%arg0 : i32) -> i32 {
 // CHECK-LABEL: func @simple_control_flow_overdefined
 func @simple_control_flow_overdefined(%arg0 : i32, %arg1 : i1) -> i32 {
   %1 = arith.constant 1 : i32
-  cond_br %arg1, ^bb1, ^bb2(%arg0 : i32)
+  cf.cond_br %arg1, ^bb1, ^bb2(%arg0 : i32)
 
 ^bb1:
-  br ^bb2(%1 : i32)
+  cf.br ^bb2(%1 : i32)
 
 ^bb2(%arg : i32):
   // CHECK: ^bb2(%[[ARG:.*]]: i32):
@@ -59,10 +59,10 @@ func @simple_control_flow_overdefined(%arg0 : i32, %arg1 : i1) -> i32 {
 func @simple_control_flow_constant_overdefined(%arg0 : i32, %arg1 : i1) -> i32 {
   %1 = arith.constant 1 : i32
   %2 = arith.constant 2 : i32
-  cond_br %arg1, ^bb1, ^bb2(%arg0 : i32)
+  cf.cond_br %arg1, ^bb1, ^bb2(%arg0 : i32)
 
 ^bb1:
-  br ^bb2(%2 : i32)
+  cf.br ^bb2(%2 : i32)
 
 ^bb2(%arg : i32):
   // CHECK: ^bb2(%[[ARG:.*]]: i32):
@@ -79,7 +79,7 @@ func @unknown_terminator(%arg0 : i32, %arg1 : i1) -> i32 {
   "foo.cond_br"() [^bb1, ^bb2] : () -> ()
 
 ^bb1:
-  br ^bb2(%1 : i32)
+  cf.br ^bb2(%1 : i32)
 
 ^bb2(%arg : i32):
   // CHECK: ^bb2(%[[ARG:.*]]: i32):
@@ -97,17 +97,17 @@ func @simple_loop(%arg0 : i32, %cond1 : i1) -> i32 {
   // CHECK: %[[CST:.*]] = arith.constant 1 : i32
 
   %cst_1 = arith.constant 1 : i32
-  cond_br %cond1, ^bb1(%cst_1 : i32), ^bb2(%cst_1 : i32)
+  cf.cond_br %cond1, ^bb1(%cst_1 : i32), ^bb2(%cst_1 : i32)
 
 ^bb1(%iv: i32):
   // CHECK: ^bb1(%{{.*}}: i32):
   // CHECK-NEXT: %[[COND:.*]] = call @ext_cond_fn()
-  // CHECK-NEXT: cond_br %[[COND]], ^bb1(%[[CST]] : i32), ^bb2(%[[CST]] : i32)
+  // CHECK-NEXT: cf.cond_br %[[COND]], ^bb1(%[[CST]] : i32), ^bb2(%[[CST]] : i32)
 
   %cst_0 = arith.constant 0 : i32
   %res = arith.addi %iv, %cst_0 : i32
   %cond2 = call @ext_cond_fn() : () -> i1
-  cond_br %cond2, ^bb1(%res : i32), ^bb2(%res : i32)
+  cf.cond_br %cond2, ^bb1(%res : i32), ^bb2(%res : i32)
 
 ^bb2(%arg : i32):
   // CHECK: ^bb2(%{{.*}}: i32):
@@ -126,30 +126,30 @@ func @simple_loop_inner_control_flow(%arg0 : i32) -> i32 {
   // CHECK-DAG: %[[TRUE:.*]] = arith.constant true
 
   %cst_1 = arith.constant 1 : i32
-  br ^bb1(%cst_1 : i32)
+  cf.br ^bb1(%cst_1 : i32)
 
 ^bb1(%iv: i32):
   %cond2 = call @ext_cond_fn() : () -> i1
-  cond_br %cond2, ^bb5(%iv : i32), ^bb2
+  cf.cond_br %cond2, ^bb5(%iv : i32), ^bb2
 
 ^bb2:
   // CHECK: ^bb2:
-  // CHECK: cond_br %[[TRUE]], ^bb3, ^bb4
+  // CHECK: cf.cond_br %[[TRUE]], ^bb3, ^bb4
 
   %cst_20 = arith.constant 20 : i32
   %cond = arith.cmpi ult, %iv, %cst_20 : i32
-  cond_br %cond, ^bb3, ^bb4
+  cf.cond_br %cond, ^bb3, ^bb4
 
 ^bb3:
   // CHECK: ^bb3:
-  // CHECK: br ^bb1(%[[CST]] : i32)
+  // CHECK: cf.br ^bb1(%[[CST]] : i32)
 
   %cst_1_2 = arith.constant 1 : i32
-  br ^bb1(%cst_1_2 : i32)
+  cf.br ^bb1(%cst_1_2 : i32)
 
 ^bb4:
   %iv_inc = arith.addi %iv, %cst_1 : i32
-  br ^bb1(%iv_inc : i32)
+  cf.br ^bb1(%iv_inc : i32)
 
 ^bb5(%result: i32):
   // CHECK: ^bb5(%{{.*}}: i32):
@@ -166,11 +166,11 @@ func private @ext_cond_and_value_fn() -> (i1, i32)
 // CHECK-LABEL: func @simple_loop_overdefined
 func @simple_loop_overdefined(%arg0 : i32, %cond1 : i1) -> i32 {
   %cst_1 = arith.constant 1 : i32
-  cond_br %cond1, ^bb1(%cst_1 : i32), ^bb2(%cst_1 : i32)
+  cf.cond_br %cond1, ^bb1(%cst_1 : i32), ^bb2(%cst_1 : i32)
 
 ^bb1(%iv: i32):
   %cond2, %res = call @ext_cond_and_value_fn() : () -> (i1, i32)
-  cond_br %cond2, ^bb1(%res : i32), ^bb2(%res : i32)
+  cf.cond_br %cond2, ^bb1(%res : i32), ^bb2(%res : i32)
 
 ^bb2(%arg : i32):
   // CHECK: ^bb2(%[[ARG:.*]]: i32):
@@ -185,13 +185,13 @@ func @simple_loop_overdefined(%arg0 : i32, %cond1 : i1) -> i32 {
 func @recheck_executable_edge(%cond0: i1) -> (i1, i1) {
   %true = arith.constant true
   %false = arith.constant false
-  cond_br %cond0, ^bb_1a, ^bb2(%false : i1)
+  cf.cond_br %cond0, ^bb_1a, ^bb2(%false : i1)
 ^bb_1a:
-  br ^bb2(%true : i1)
+  cf.br ^bb2(%true : i1)
 
 ^bb2(%x: i1):
   // CHECK: ^bb2(%[[X:.*]]: i1):
-  br ^bb3(%x : i1)
+  cf.br ^bb3(%x : i1)
 
 ^bb3(%y: i1):
   // CHECK: ^bb3(%[[Y:.*]]: i1):

diff  --git a/mlir/test/Transforms/test-legalizer-full.mlir b/mlir/test/Transforms/test-legalizer-full.mlir
index 5480d3d3d7286..586635d4eb58a 100644
--- a/mlir/test/Transforms/test-legalizer-full.mlir
+++ b/mlir/test/Transforms/test-legalizer-full.mlir
@@ -89,8 +89,8 @@ builtin.module {
   func @test_undo_region_inline() {
     "test.region"() ({
       ^bb1(%i0: i64):
-        // expected-error at +1 {{failed to legalize operation 'std.br'}}
-        br ^bb2(%i0 : i64)
+        // expected-error at +1 {{failed to legalize operation 'cf.br'}}
+        cf.br ^bb2(%i0 : i64)
       ^bb2(%i1: i64):
         "test.invalid"(%i1) : (i64) -> ()
     }) {} : () -> ()
@@ -110,7 +110,7 @@ builtin.module {
     // expected-error at +1 {{failed to legalize operation 'test.region'}}
     "test.region"() ({
       ^bb1(%i0: i64):
-        br ^bb2(%i0 : i64)
+        cf.br ^bb2(%i0 : i64)
       ^bb2(%i1: i64):
         "test.invalid"(%i1) : (i64) -> ()
     }) {legalizer.should_clone, legalizer.erase_old_blocks} : () -> ()

diff  --git a/mlir/test/mlir-cpu-runner/async-error.mlir b/mlir/test/mlir-cpu-runner/async-error.mlir
index 63becf3aa6f6b..465af45859b00 100644
--- a/mlir/test/mlir-cpu-runner/async-error.mlir
+++ b/mlir/test/mlir-cpu-runner/async-error.mlir
@@ -3,7 +3,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-linalg-to-loops                                 \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -convert-linalg-to-llvm                                  \
 // RUN:               -convert-vector-to-llvm                                  \
 // RUN:               -convert-arith-to-llvm                                   \
@@ -35,7 +35,7 @@ func @main() {
   // Check that assertion in the async region converted to async error.
   // ------------------------------------------------------------------------ //
   %token1 = async.execute {
-    assert %false, "error"
+    cf.assert %false, "error"
     async.yield
   }
   async.runtime.await %token1 : !async.token
@@ -49,7 +49,7 @@ func @main() {
   // ------------------------------------------------------------------------ //
   %token2 = async.execute {
     %token = async.execute {
-      assert %false, "error"
+      cf.assert %false, "error"
       async.yield
     }
     async.await %token : !async.token
@@ -66,7 +66,7 @@ func @main() {
   // ------------------------------------------------------------------------ //
   %token3, %value3 = async.execute -> !async.value<f32> {
     %token, %value = async.execute -> !async.value<f32> {
-      assert %false, "error"
+      cf.assert %false, "error"
       %0 = arith.constant 123.45 : f32
       async.yield %0 : f32
     }
@@ -95,7 +95,7 @@ func @main() {
   }
 
   %token5 = async.execute {
-    assert %false, "error"
+    cf.assert %false, "error"
     async.yield
   }
 

diff  --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir
index f021c89209e37..c5fc875084ff7 100644
--- a/mlir/test/mlir-cpu-runner/async.mlir
+++ b/mlir/test/mlir-cpu-runner/async.mlir
@@ -3,7 +3,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-linalg-to-loops                                 \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                      \
 // RUN:               -convert-linalg-to-llvm                                  \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-arith-to-llvm                                   \

diff  --git a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
index 214e820cb0f4d..4e43350f396d4 100644
--- a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
+++ b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' -reconcile-unrealized-casts | mlir-cpu-runner -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' -reconcile-unrealized-casts | mlir-cpu-runner -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s
 
 // Verify bare pointer memref calling convention. `simple_add1_add2_test`
 // gets two 2xf32 memrefs, adds 1.0f to the first one and 2.0f to the second

diff  --git a/mlir/test/mlir-cpu-runner/copy.mlir b/mlir/test/mlir-cpu-runner/copy.mlir
index 35cedc46e2b15..fcf8bcc1eb727 100644
--- a/mlir/test/mlir-cpu-runner/copy.mlir
+++ b/mlir/test/mlir-cpu-runner/copy.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
 // RUN: | mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir b/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir
index c33f813957731..138724fc92bd3 100644
--- a/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir
+++ b/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-memref-to-llvm -convert-arith-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
+// RUN: mlir-opt %s -convert-scf-to-cf -convert-memref-to-llvm -convert-arith-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
 // RUN: | mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/mlir-cpu-runner/memref-reshape.mlir b/mlir/test/mlir-cpu-runner/memref-reshape.mlir
index e74d6219a1f33..4ceb55654c770 100644
--- a/mlir/test/mlir-cpu-runner/memref-reshape.mlir
+++ b/mlir/test/mlir-cpu-runner/memref-reshape.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -memref-expand -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
+// RUN: mlir-opt %s -convert-scf-to-cf -memref-expand -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
 // RUN: | mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
index 7187f176d0553..fbfa0cde032cb 100644
--- a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
+++ b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -convert-linalg-to-loops -lower-affine -convert-scf-to-std -convert-arith-to-llvm -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
+// RUN: mlir-opt -convert-linalg-to-loops -lower-affine -convert-scf-to-cf -convert-arith-to-llvm -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
 
 func @main() {
   %A = memref.alloc() : memref<16x16xf32>

diff  --git a/mlir/test/mlir-cpu-runner/unranked-memref.mlir b/mlir/test/mlir-cpu-runner/unranked-memref.mlir
index d7a60205fbe8d..5fc24d0f5125c 100644
--- a/mlir/test/mlir-cpu-runner/unranked-memref.mlir
+++ b/mlir/test/mlir-cpu-runner/unranked-memref.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -convert-linalg-to-loops             \
-// RUN:             -convert-scf-to-std                  \
+// RUN:             -convert-scf-to-cf                  \
 // RUN:             -convert-arith-to-llvm               \
 // RUN:             -convert-linalg-to-llvm              \
 // RUN:             -convert-memref-to-llvm              \

diff  --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir
index d6b4f472f1566..baf5ea52e7981 100644
--- a/mlir/test/mlir-cpu-runner/utils.mlir
+++ b/mlir/test/mlir-cpu-runner/utils.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-arith-to-llvm -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-arith-to-llvm -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-arith-to-llvm -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-arith-to-llvm -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-arith-to-llvm -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-arith-to-llvm -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-arith-to-llvm -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-arith-to-llvm -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D
 
 func @print_0d() {
   %f = arith.constant 2.00000e+00 : f32

diff  --git a/mlir/test/mlir-lsp-server/hover.test b/mlir/test/mlir-lsp-server/hover.test
index 8467f24852f62..cf981100f3209 100644
--- a/mlir/test/mlir-lsp-server/hover.test
+++ b/mlir/test/mlir-lsp-server/hover.test
@@ -5,7 +5,7 @@
   "uri":"test:///foo.mlir",
   "languageId":"mlir",
   "version":1,
-  "text":"func @foo(%arg: i1) {\n%value = arith.constant true\nbr ^bb2\n^bb2:\nreturn\n}"
+  "text":"func @foo(%arg: i1) {\n%value = arith.constant true\ncf.br ^bb2\n^bb2:\nreturn\n}"
 }}}
 // -----
 // Hover on an operation.

diff  --git a/mlir/test/mlir-opt/async.mlir b/mlir/test/mlir-opt/async.mlir
index 265666f21a076..f7dafeacd0258 100644
--- a/mlir/test/mlir-opt/async.mlir
+++ b/mlir/test/mlir-opt/async.mlir
@@ -5,7 +5,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-linalg-to-loops                                 \
-// RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-scf-to-cf                                       \
 // RUN:               -convert-linalg-to-llvm                                  \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-arith-to-llvm                                   \

diff  --git a/mlir/test/mlir-opt/commandline.mlir b/mlir/test/mlir-opt/commandline.mlir
index 4629240df3efc..baee3d7f0187e 100644
--- a/mlir/test/mlir-opt/commandline.mlir
+++ b/mlir/test/mlir-opt/commandline.mlir
@@ -9,6 +9,7 @@
 // CHECK-NEXT: async
 // CHECK-NEXT: bufferization
 // CHECK-NEXT: builtin
+// CHECK-NEXT: cf
 // CHECK-NEXT: complex
 // CHECK-NEXT: dlti
 // CHECK-NEXT: emitc

diff  --git a/mlir/test/mlir-reduce/multiple-function.mlir b/mlir/test/mlir-reduce/multiple-function.mlir
index 22a444040fabe..98554c7afe474 100644
--- a/mlir/test/mlir-reduce/multiple-function.mlir
+++ b/mlir/test/mlir-reduce/multiple-function.mlir
@@ -22,12 +22,12 @@ func @simple3() {
 
 // CHECK-NOT: func @simple4() {
 func @simple4(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   "test.op_crash"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
   return

diff  --git a/mlir/test/mlir-reduce/simple-test.mlir b/mlir/test/mlir-reduce/simple-test.mlir
index fd90da3e08392..b41726a5ae130 100644
--- a/mlir/test/mlir-reduce/simple-test.mlir
+++ b/mlir/test/mlir-reduce/simple-test.mlir
@@ -2,12 +2,12 @@
 // RUN: mlir-reduce %s -reduction-tree='traversal-mode=0 test=%S/test.sh'
 
 func @simple1(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  cond_br %arg0, ^bb1, ^bb2
+  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
-  br ^bb3(%arg1 : memref<2xf32>)
+  cf.br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = memref.alloc() : memref<2xf32>
-  br ^bb3(%0 : memref<2xf32>)
+  cf.br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   return
 }

diff  --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py
index c01614f686215..7e1fe9ab9f61e 100644
--- a/mlir/test/python/execution_engine.py
+++ b/mlir/test/python/execution_engine.py
@@ -276,28 +276,28 @@ def testDynamicMemrefAdd2D():
           %c0 = arith.constant 0 : index
           %c2 = arith.constant 2 : index
           %c1 = arith.constant 1 : index
-          br ^bb1(%c0 : index)
+          cf.br ^bb1(%c0 : index)
         ^bb1(%0: index):  // 2 preds: ^bb0, ^bb5
           %1 = arith.cmpi slt, %0, %c2 : index
-          cond_br %1, ^bb2, ^bb6
+          cf.cond_br %1, ^bb2, ^bb6
         ^bb2:  // pred: ^bb1
           %c0_0 = arith.constant 0 : index
           %c2_1 = arith.constant 2 : index
           %c1_2 = arith.constant 1 : index
-          br ^bb3(%c0_0 : index)
+          cf.br ^bb3(%c0_0 : index)
         ^bb3(%2: index):  // 2 preds: ^bb2, ^bb4
           %3 = arith.cmpi slt, %2, %c2_1 : index
-          cond_br %3, ^bb4, ^bb5
+          cf.cond_br %3, ^bb4, ^bb5
         ^bb4:  // pred: ^bb3
           %4 = memref.load %arg0[%0, %2] : memref<2x2xf32>
           %5 = memref.load %arg1[%0, %2] : memref<?x?xf32>
           %6 = arith.addf %4, %5 : f32
           memref.store %6, %arg2[%0, %2] : memref<2x2xf32>
           %7 = arith.addi %2, %c1_2 : index
-          br ^bb3(%7 : index)
+          cf.br ^bb3(%7 : index)
         ^bb5:  // pred: ^bb3
           %8 = arith.addi %0, %c1 : index
-          br ^bb1(%8 : index)
+          cf.br ^bb1(%8 : index)
         ^bb6:  // pred: ^bb1
           return
         }

diff  --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py
index e0f00168f1766..b75de12085501 100644
--- a/mlir/test/python/integration/dialects/linalg/opsrun.py
+++ b/mlir/test/python/integration/dialects/linalg/opsrun.py
@@ -128,7 +128,7 @@ def transform(module, boilerplate):
       boilerplate)
   pm = PassManager.parse(
       "builtin.func(convert-linalg-to-loops, lower-affine, " +
-      "convert-scf-to-std, arith-expand, memref-expand), convert-vector-to-llvm," +
+      "convert-scf-to-cf, arith-expand, memref-expand), convert-vector-to-llvm," +
       "convert-memref-to-llvm, convert-std-to-llvm," +
       "reconcile-unrealized-casts")
   pm.run(mod)

diff  --git a/mlir/test/python/ir/blocks.py b/mlir/test/python/ir/blocks.py
index 1bc38768949f4..811dcd7c2bdd4 100644
--- a/mlir/test/python/ir/blocks.py
+++ b/mlir/test/python/ir/blocks.py
@@ -19,9 +19,9 @@ def run(f):
 
 # CHECK-LABEL: TEST: testBlockCreation
 # CHECK: func @test(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i16)
-# CHECK:   br ^bb1(%[[ARG1]] : i16)
+# CHECK:   cf.br ^bb1(%[[ARG1]] : i16)
 # CHECK: ^bb1(%[[PHI0:.*]]: i16):
-# CHECK:   br ^bb2(%[[ARG0]] : i32)
+# CHECK:   cf.br ^bb2(%[[ARG0]] : i32)
 # CHECK: ^bb2(%[[PHI1:.*]]: i32):
 # CHECK:   return
 @run

diff  --git a/mlir/test/python/ir/dialects.py b/mlir/test/python/ir/dialects.py
index 05e9222c3e318..10e1a67b931f8 100644
--- a/mlir/test/python/ir/dialects.py
+++ b/mlir/test/python/ir/dialects.py
@@ -100,7 +100,7 @@ def createInput():
 def testIsRegisteredOperation():
   ctx = Context()
 
-  # CHECK: std.cond_br: True
-  print(f"std.cond_br: {ctx.is_registered_operation('std.cond_br')}")
+  # CHECK: cf.cond_br: True
+  print(f"cf.cond_br: {ctx.is_registered_operation('cf.cond_br')}")
   # CHECK: std.not_existing: False
   print(f"std.not_existing: {ctx.is_registered_operation('std.not_existing')}")