[Mlir-commits] [mlir] 60f443b - [mlir] Change dialect namespace loop->scf

Wed May 13 10:20:31 PDT 2020

Author: Alex Zinenko
Date: 2020-05-13T19:20:21+02:00
New Revision: 60f443bb3b3df8ee1cb391544ad80417c9bb8293

URL: https://github.com/llvm/llvm-project/commit/60f443bb3b3df8ee1cb391544ad80417c9bb8293
DIFF: https://github.com/llvm/llvm-project/commit/60f443bb3b3df8ee1cb391544ad80417c9bb8293.diff

LOG: [mlir] Change dialect namespace loop->scf

All ops of the SCF dialect now use the `scf.` prefix instead of `loop.`. This
is a part of dialect renaming.

Differential Revision: https://reviews.llvm.org/D79844

Added: 
    

Modified: 
    mlir/docs/EDSC.md
    mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h
    mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h
    mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h
    mlir/include/mlir/Conversion/Passes.td
    mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
    mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td
    mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
    mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
    mlir/include/mlir/Dialect/Linalg/Passes.h
    mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
    mlir/include/mlir/Dialect/SCF/CMakeLists.txt
    mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
    mlir/include/mlir/Dialect/SCF/SCF.h
    mlir/include/mlir/Dialect/SCF/SCFOps.td
    mlir/include/mlir/Dialect/SCF/Transforms.h
    mlir/include/mlir/Transforms/LoopUtils.h
    mlir/lib/Conversion/LoopToStandard/LoopToStandard.cpp
    mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
    mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
    mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
    mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
    mlir/lib/Dialect/SCF/SCF.cpp
    mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp
    mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp
    mlir/test/Analysis/test-dominance.mlir
    mlir/test/Analysis/test-liveness.mlir
    mlir/test/Conversion/GPUToSPIRV/if.mlir
    mlir/test/Conversion/GPUToSPIRV/loop.mlir
    mlir/test/Conversion/LoopsToGPU/imperfect_2D.mlir
    mlir/test/Conversion/LoopsToGPU/imperfect_3D.mlir
    mlir/test/Conversion/LoopsToGPU/imperfect_4D.mlir
    mlir/test/Conversion/LoopsToGPU/imperfect_linalg.mlir
    mlir/test/Conversion/LoopsToGPU/linalg_to_gpu.mlir
    mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir
    mlir/test/Conversion/LoopsToGPU/perfect_1D_setlaunch.mlir
    mlir/test/Conversion/LoopsToGPU/step_one.mlir
    mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
    mlir/test/Conversion/convert-to-cfg.mlir
    mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
    mlir/test/Dialect/Affine/dma-generate.mlir
    mlir/test/Dialect/Affine/slicing-utils.mlir
    mlir/test/Dialect/Affine/unroll.mlir
    mlir/test/Dialect/GPU/mapping.mlir
    mlir/test/Dialect/GPU/promotion.mlir
    mlir/test/Dialect/Linalg/fusion-2-level.mlir
    mlir/test/Dialect/Linalg/fusion.mlir
    mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
    mlir/test/Dialect/Linalg/llvm.mlir
    mlir/test/Dialect/Linalg/loops.mlir
    mlir/test/Dialect/Linalg/parallel_loops.mlir
    mlir/test/Dialect/Linalg/promote.mlir
    mlir/test/Dialect/Linalg/tile.mlir
    mlir/test/Dialect/Linalg/tile_conv.mlir
    mlir/test/Dialect/Linalg/tile_conv_padding.mlir
    mlir/test/Dialect/Linalg/tile_indexed_generic.mlir
    mlir/test/Dialect/Linalg/tile_parallel.mlir
    mlir/test/Dialect/Linalg/transform-patterns.mlir
    mlir/test/Dialect/SCF/invalid.mlir
    mlir/test/Dialect/SCF/loop-unroll.mlir
    mlir/test/Dialect/SCF/ops.mlir
    mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
    mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
    mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
    mlir/test/EDSC/builder-api-test.cpp
    mlir/test/Transforms/canonicalize-block-merge.mlir
    mlir/test/Transforms/canonicalize.mlir
    mlir/test/Transforms/loop-coalescing.mlir
    mlir/test/Transforms/loop-fusion-slice-computation.mlir
    mlir/test/Transforms/loop-fusion.mlir
    mlir/test/Transforms/loop-invariant-code-motion.mlir
    mlir/test/Transforms/memref-dependence-check.mlir
    mlir/test/Transforms/parallel-loop-collapsing.mlir
    mlir/test/Transforms/parametric-mapping.mlir
    mlir/test/Transforms/parametric-tiling.mlir
    mlir/test/Transforms/sccp-structured.mlir
    mlir/test/Transforms/single-parallel-loop-collapsing.mlir
    mlir/test/lib/Transforms/TestLoopMapping.cpp
    mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
    mlir/test/mlir-opt/commandline.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/docs/EDSC.md b/mlir/docs/EDSC.md
index b31a3583a7fb..befbd44a127e 100644

--- a/mlir/docs/EDSC.md
+++ b/mlir/docs/EDSC.md
@@ -103,7 +103,7 @@ or the following, for a 0-D `memref<f32>`:
 //       CHECK: store {{.*}}, %arg2[] : memref<f32>
 ```
 
-Similar APIs are provided to emit the lower-level `loop.for` op with
+Similar APIs are provided to emit the lower-level `scf.for` op with
 `LoopNestBuilder`. See the `builder-api-test.cpp` test for more usage examples.
 
 Since the implementation of declarative builders is in C++, it is also available

diff  --git a/mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h b/mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h
index c680b1eb3573..5e6d674922e8 100644
--- a/mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h
+++ b/mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h
@@ -21,13 +21,13 @@ class RewritePattern;
 // Owning list of rewriting patterns.
 class OwningRewritePatternList;
 
-/// Collect a set of patterns to lower from loop.for, loop.if, and
+/// Collect a set of patterns to lower from scf.for, scf.if, and
 /// loop.terminator to CFG operations within the Standard dialect, in particular
 /// convert structured control flow into CFG branch-based control flow.
 void populateLoopToStdConversionPatterns(OwningRewritePatternList &patterns,
                                          MLIRContext *ctx);
 
-/// Creates a pass to convert loop.for, loop.if and loop.terminator ops to CFG.
+/// Creates a pass to convert scf.for, scf.if and loop.terminator ops to CFG.
 std::unique_ptr<Pass> createLowerToCFGPass();
 
 } // namespace mlir

diff  --git a/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h b/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h
index f87b21d13d93..a3be39b07c15 100644
--- a/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h
+++ b/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h
@@ -74,7 +74,7 @@ LogicalResult convertLoopToGPULaunch(scf::ForOp forOp,
                                      ArrayRef<Value> numWorkGroups,
                                      ArrayRef<Value> workGroupSizes);
 
-/// Adds the conversion pattern from `loop.parallel` to `gpu.launch` to the
+/// Adds the conversion pattern from `scf.parallel` to `gpu.launch` to the
 /// provided pattern list.
 void populateParallelLoopToGPUPatterns(OwningRewritePatternList &patterns,
                                        MLIRContext *ctx);

diff  --git a/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h b/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h
index 4488b071ea43..5e56bc876fcf 100644
--- a/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h
+++ b/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h
@@ -40,7 +40,7 @@ createLoopToGPUPass(ArrayRef<int64_t> numWorkGroups,
                     ArrayRef<int64_t> workGroupSize);
 std::unique_ptr<OperationPass<FuncOp>> createLoopToGPUPass();
 
-/// Creates a pass that converts loop.parallel operations into a gpu.launch
+/// Creates a pass that converts scf.parallel operations into a gpu.launch
 /// operation. The mapping of loop dimensions to launch dimensions is derived
 /// from mapping attributes. See ParallelToGpuLaunchLowering::matchAndRewrite
 /// for a description of the used attributes.

diff  --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index a0d405b5fb48..a14e8a43e2d9 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -16,16 +16,16 @@ include "mlir/Pass/PassBase.td"
 //===----------------------------------------------------------------------===//
 
 def ConvertAffineToStandard : FunctionPass<"lower-affine"> {
-  let summary = "Lower Affine operations to a combination of Standard and Loop "
+  let summary = "Lower Affine operations to a combination of Standard and SCF "
                 "operations";
   let description = [{
 
     Convert operations from the affine dialect into operations from the loop and
     standard dialects.
 
-    `affine.for` operations are converted to `loop.for` operations that are free
+    `affine.for` operations are converted to `scf.for` operations that are free
     of certain structural restrictions (on their bounds and step). `affine.if`
-    is similarly converted to the `loop.if` operation. `affine.apply` operations
+    is similarly converted to the `scf.if` operation. `affine.apply` operations
     are converted into sequences of primitive arithmetic operations from the
     standard dialect that have the same effect, using operands of the `index`
     type. Consequently, named maps and sets thare are no longer in use may be
@@ -155,7 +155,7 @@ def ConvertLinalgToSPIRV : Pass<"convert-linalg-to-spirv", "ModuleOp"> {
 //===----------------------------------------------------------------------===//
 
 def ConvertLoopToStandard : Pass<"convert-loop-to-std"> {
-  let summary = "Convert Loop dialect to Standard dialect, replacing structured"
+  let summary = "Convert SCF dialect to Standard dialect, replacing structured"
                 " control flow with a CFG";
   let constructor = "mlir::createLowerToCFGPass()";
 }
@@ -189,7 +189,7 @@ def ConvertLoopsToGPU : FunctionPass<"convert-loop-op-to-gpu"> {
 }
 
 def ConvertParallelLoopToGpu : Pass<"convert-parallel-loops-to-gpu"> {
-  let summary = "Convert mapped loop.parallel ops to gpu launch operations";
+  let summary = "Convert mapped scf.parallel ops to gpu launch operations";
   let constructor = "mlir::createParallelLoopToGpuPass()";
 }
 

diff  --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
index 92c2f279b5d9..8dd7c7f61212 100644
--- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
+++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
@@ -49,7 +49,7 @@ ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
                                                      AffineMap map,
                                                      AffineMap bound);
 
-/// Sets the mapping attribute of a loop.parallel operation. Verifies that the
+/// Sets the mapping attribute of a scf.parallel operation. Verifies that the
 /// mapping passed is valid.
 /// - the number of DimMapperAttr provided is same as the number of loops of
 ///   the `ploopOp`.

diff  --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td
index 1bfdfe5ebcfc..c0380739d1f8 100644
--- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td
+++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Defines the attribute used for driving conversion from loop.parallel to
+// Defines the attribute used for driving conversion from scf.parallel to
 // gpu.launch operations
 //
 //===----------------------------------------------------------------------===//
@@ -30,7 +30,7 @@ def ProcessorAttr : I64EnumAttr<"Processor", "processor for loop mapping", [
   let cppNamespace = "::mlir::gpu";
 }
 
-// Attribute that drives conversion of a loop.parallel to gpu.launch
+// Attribute that drives conversion of a scf.parallel to gpu.launch
 // operation.
 // processor: the hardware id to map to.
 // map : An affine map that is used to pre-process hardware ids before

diff  --git a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
index 696e7c16b720..f580161c3ccb 100644
--- a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
@@ -34,13 +34,13 @@ namespace edsc {
 class AffineLoopNestBuilder;
 class ParallelLoopNestBuilder;
 
-/// A LoopRangeBuilder is a generic NestedBuilder for loop.for operations.
+/// A LoopRangeBuilder is a generic NestedBuilder for scf.for operations.
 /// More specifically it is meant to be used as a temporary object for
 /// representing any nested MLIR construct that is "related to" an mlir::Value
 /// (for now an induction variable).
 class LoopRangeBuilder : public NestedBuilder {
 public:
-  /// Constructs a new loop.for and captures the associated induction
+  /// Constructs a new scf.for and captures the associated induction
   /// variable. A Value pointer is passed as the first argument and is the
   /// *only* way to capture the loop induction variable.
   LoopRangeBuilder(Value *iv, Value range);
@@ -58,9 +58,9 @@ class LoopRangeBuilder : public NestedBuilder {
   Value operator()(std::function<void(void)> fun = nullptr);
 };
 
-/// Helper class to sugar building loop.for loop nests from ranges.
+/// Helper class to sugar building scf.for loop nests from ranges.
 /// This is similar to edsc::AffineLoopNestBuilder except it works on ranges
-/// directly. In the current implementation it produces loop.for operations.
+/// directly. In the current implementation it produces scf.for operations.
 class LoopNestRangeBuilder {
 public:
   LoopNestRangeBuilder(MutableArrayRef<Value> ivs, ArrayRef<Value> ranges);
@@ -72,7 +72,7 @@ class LoopNestRangeBuilder {
   SmallVector<LoopRangeBuilder, 4> loops;
 };
 
-/// Helper template class for building loop.for and affine.loop nests from
+/// Helper template class for building scf.for and affine.loop nests from
 /// ranges.
 template <typename LoopTy> class GenericLoopNestRangeBuilder {
 public:

diff  --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
index 074d659778d2..c2784d08b2d2 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -71,7 +71,7 @@ def CopyOp : LinalgStructured_Op<"copy", [NInputs<1>, NOutputs<1>]> {
 
     ```mlir
     %0 = linalg.dim %arg0, 0 : index
-    loop.for %i0 = %c0 to %0 step %c1 {
+    scf.for %i0 = %c0 to %0 step %c1 {
       %1 = load %arg0[%i0] : memref<?xf32, stride_specification>
       store %1, %arg1[%i0] : memref<?xf32, stride_specification>
     }
@@ -95,9 +95,9 @@ def CopyOp : LinalgStructured_Op<"copy", [NInputs<1>, NOutputs<1>]> {
     %0 = linalg.dim %arg0, 0
     %1 = linalg.dim %arg0, 1
     %2 = linalg.dim %arg0, 2
-    loop.for %i0 = %c0 to %{{.*}} step %c1 {
-      loop.for %i1 = %c0 to %{{.*}} step %c1 {
-        loop.for %i2 = %c0 to %{{.*}} step %c1 {
+    scf.for %i0 = %c0 to %{{.*}} step %c1 {
+      scf.for %i1 = %c0 to %{{.*}} step %c1 {
+        scf.for %i2 = %c0 to %{{.*}} step %c1 {
           %3 = load %arg0[%i0, %i2, %i1] :
                   memref<?x?x?xf32, stride_specification>
           store %3, %arg1[%i2, %i1, %i0] :
@@ -628,9 +628,9 @@ def GenericOp : GenericOpBase<"generic"> {
 
     or IR resembling:
     ```mlir
-    loop.for %m = %c0 to %M step %c1 {
-      loop.for %n = %c0 to %N step %c1 {
-        loop.for %k = %c0 to %K step %c1 {
+    scf.for %m = %c0 to %M step %c1 {
+      scf.for %n = %c0 to %N step %c1 {
+        scf.for %k = %c0 to %K step %c1 {
           %a = load %A[%m, %k] : memref<?x?xf32, stride_specification>
           %b = load %B[%k, %n] : memref<?x?xf32, stride_specification>
           %c = load %C[%m, %n] : memref<?x?xf32, stride_specification>
@@ -752,9 +752,9 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
     or IR resembling:
 
     ```mlir
-    loop.for %m = %c0 to %M step %c1 {
-      loop.for %n = %c0 to %N step %c1 {
-        loop.for %k = %c0 to %K step %c1 {
+    scf.for %m = %c0 to %M step %c1 {
+      scf.for %n = %c0 to %N step %c1 {
+        scf.for %k = %c0 to %K step %c1 {
           %a = load %A[%m, %k] : memref<?x?xf32, stride_specification>
           %b = load %B[%k, %n] : memref<?x?xf32, stride_specification>
           %c = load %C[%m, %n] : memref<?x?xf32, stride_specification>

diff  --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h
index 90f894fccced..d3bfa90e6bdb 100644
--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@@ -37,11 +37,11 @@ std::unique_ptr<OperationPass<FuncOp>>
 createLinalgPromotionPass(bool dynamicBuffers);
 std::unique_ptr<OperationPass<FuncOp>> createLinalgPromotionPass();
 
-/// Create a pass to convert Linalg operations to loop.for loops and
+/// Create a pass to convert Linalg operations to scf.for loops and
 /// std.load/std.store accesses.
 std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToLoopsPass();
 
-/// Create a pass to convert Linalg operations to loop.parallel loops and
+/// Create a pass to convert Linalg operations to scf.parallel loops and
 /// std.load/std.store accesses.
 std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToParallelLoopsPass();
 

diff  --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index f5bf5892199f..70c3f00f5216 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -121,11 +121,11 @@ void vectorizeLinalgOp(OpBuilder &builder, Operation *op);
 template <typename LoopTy, typename ConcreteOp>
 Optional<LinalgLoops> linalgLowerOpToLoops(OpBuilder &builder, Operation *op);
 
-/// Emits a loop nest of `loop.for` with the proper body for `op`.
+/// Emits a loop nest of `scf.for` with the proper body for `op`.
 template <typename ConcreteOp>
 LogicalResult linalgOpToLoops(OpBuilder &builder, Operation *op);
 
-/// Emits a loop nest of `loop.parallel` with the proper body for `op`.
+/// Emits a loop nest of `scf.parallel` with the proper body for `op`.
 template <typename ConcreteOp>
 LogicalResult linalgOpToParallelLoops(OpBuilder &builder, Operation *op);
 
@@ -362,8 +362,8 @@ struct LinalgLoweringPattern : public RewritePattern {
 private:
   /// LinalgTransformMarker handles special attribute manipulations.
   LinalgMarker marker;
-  /// Controls whether the pattern lowers to library calls, loop.for, affine.for
-  /// or loop.parallel.
+  /// Controls whether the pattern lowers to library calls, scf.for, affine.for
+  /// or scf.parallel.
   LinalgLoweringType loweringType;
 };
 

diff  --git a/mlir/include/mlir/Dialect/SCF/CMakeLists.txt b/mlir/include/mlir/Dialect/SCF/CMakeLists.txt
index b8c533ce7b98..9467b97b384b 100644
--- a/mlir/include/mlir/Dialect/SCF/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/SCF/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_mlir_dialect(SCFOps loop Ops)
+add_mlir_dialect(SCFOps scf Ops)
 add_mlir_doc(SCFOps -gen-dialect-doc SCFDialect Dialects/)
 
 set(LLVM_TARGET_DEFINITIONS Passes.td)

diff  --git a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
index 0bfc8b9f64e4..439a25f3a094 100644
--- a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
@@ -41,7 +41,7 @@ inline LoopBuilder makeLoopBuilder(Value *iv, Value lb, Value ub, Value step) {
   return makeLoopBuilder(iv, lb, ub, step, MutableArrayRef<Value>{}, {});
 }
 
-/// Helper class to sugar building loop.parallel loop nests from lower/upper
+/// Helper class to sugar building scf.parallel loop nests from lower/upper
 /// bounds and step sizes.
 class ParallelLoopNestBuilder {
 public:
@@ -54,9 +54,9 @@ class ParallelLoopNestBuilder {
   SmallVector<LoopBuilder, 4> loops;
 };
 
-/// Helper class to sugar building loop.for loop nests from ranges.
+/// Helper class to sugar building scf.for loop nests from ranges.
 /// This is similar to edsc::AffineLoopNestBuilder except it operates on
-/// loop.for.
+/// scf.for.
 class LoopNestBuilder {
 public:
   LoopNestBuilder(Value *iv, Value lb, Value ub, Value step);

diff  --git a/mlir/include/mlir/Dialect/SCF/SCF.h b/mlir/include/mlir/Dialect/SCF/SCF.h
index 142df41d7ad6..db71a63af828 100644
--- a/mlir/include/mlir/Dialect/SCF/SCF.h
+++ b/mlir/include/mlir/Dialect/SCF/SCF.h
@@ -29,8 +29,8 @@ namespace scf {
 #define GET_OP_CLASSES
 #include "mlir/Dialect/SCF/SCFOps.h.inc"
 
-// Insert `loop.terminator` at the end of the only region's only block if it
-// does not have a terminator already.  If a new `loop.terminator` is inserted,
+// Insert `loop.yield` at the end of the only region's only block if it
+// does not have a terminator already.  If a new `loop.yield` is inserted,
 // the location is specified by `loc`. If the region is empty, insert a new
 // block first.
 void ensureLoopTerminator(Region &region, Builder &builder, Location loc);

diff  --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td
index 62e72cf9594d..1f93e07bfc2a 100644
--- a/mlir/include/mlir/Dialect/SCF/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td
@@ -18,7 +18,7 @@ include "mlir/Interfaces/LoopLikeInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 
 def SCF_Dialect : Dialect {
-  let name = "loop";
+  let name = "scf";
   let cppNamespace = "";
 }
 
@@ -43,7 +43,7 @@ def ForOp : SCF_Op<"for",
        RecursiveSideEffects]> {
   let summary = "for operation";
   let description = [{
-    The "loop.for" operation represents a loop taking 3 SSA value as operands
+    The "scf.for" operation represents a loop taking 3 SSA value as operands
     that represent the lower bound, upper bound and step respectively.  The
     operation defines an SSA value for its induction variable. It has one
     region capturing the loop body. The induction variable is represented as an
@@ -54,30 +54,30 @@ def ForOp : SCF_Op<"for",
     the lower bound but does not include the upper bound.
 
     The body region must contain exactly one block that terminates with
-    "loop.yield". Calling ForOp::build will create such a region and insert
+    "scf.yield". Calling ForOp::build will create such a region and insert
     the terminator implicitly if none is defined, so will the parsing even in
     cases when it is absent from the custom format. For example:
 
     ```mlir
-    loop.for %iv = %lb to %ub step %step {
+    scf.for %iv = %lb to %ub step %step {
       ... // body
     }
     ```
 
-    `loop.for` can also operate on loop-carried variables and returns the final
+    `scf.for` can also operate on loop-carried variables and returns the final
     values after loop termination. The initial values of the variables are
-    passed as additional SSA operands to the "loop.for" following the 3 loop
+    passed as additional SSA operands to the "scf.for" following the 3 loop
     control SSA values mentioned above (lower bound, upper bound and step). The
     operation region has equivalent arguments for each variable representing
     the value of the variable at the current iteration.
 
-    The region must terminate with a "loop.yield" that passes all the current
-    iteration variables to the next iteration, or to the "loop.for" result, if
+    The region must terminate with a "scf.yield" that passes all the current
+    iteration variables to the next iteration, or to the "scf.for" result, if
     at the last iteration. Note, that when the loop-carried variables are
     present, calling ForOp::build will not insert the terminator implicitly.
-    The caller must insert "loop.yield" in that case.
+    The caller must insert "scf.yield" in that case.
 
-    "loop.for" results hold the final values after the last iteration.
+    "scf.for" results hold the final values after the last iteration.
     For example, to sum-reduce a memref:
 
     ```mlir
@@ -86,23 +86,23 @@ def ForOp : SCF_Op<"for",
       // Initial sum set to 0.
       %sum_0 = constant 0.0 : f32
       // iter_args binds initial values to the loop's region arguments.
-      %sum = loop.for %iv = %lb to %ub step %step
+      %sum = scf.for %iv = %lb to %ub step %step
           iter_args(%sum_iter = %sum_0) -> (f32) {
         %t = load %buffer[%iv] : memref<1024xf32>
         %sum_next = addf %sum_iter, %t : f32
         // Yield current iteration sum to next iteration %sum_iter or to %sum
         // if final iteration.
-        loop.yield %sum_next : f32
+        scf.yield %sum_next : f32
       }
       return %sum : f32
     }
     ```
 
-    If the "loop.for" defines any values, a yield must be explicitly present.
-    The number and types of the "loop.for" results must match the initial
+    If the "scf.for" defines any values, a yield must be explicitly present.
+    The number and types of the "scf.for" results must match the initial
     values in the "iter_args" binding and the yield operands.
 
-    Another example with a nested "loop.if" (see "loop.if" for details) to
+    Another example with a nested "scf.if" (see "scf.if" for details) to
     perform conditional reduction:
 
     ```mlir
@@ -110,17 +110,17 @@ def ForOp : SCF_Op<"for",
                              %ub: index, %step: index) -> (f32) {
       %sum_0 = constant 0.0 : f32
       %c0 = constant 0.0 : f32
-      %sum = loop.for %iv = %lb to %ub step %step
+      %sum = scf.for %iv = %lb to %ub step %step
           iter_args(%sum_iter = %sum_0) -> (f32) {
         %t = load %buffer[%iv] : memref<1024xf32>
         %cond = cmpf "ugt", %t, %c0 : f32
-        %sum_next = loop.if %cond -> (f32) {
+        %sum_next = scf.if %cond -> (f32) {
           %new_sum = addf %sum_iter, %t : f32
-          loop.yield %new_sum : f32
+          scf.yield %new_sum : f32
         } else {
-          loop.yield %sum_iter : f32
+          scf.yield %sum_iter : f32
         }
-        loop.yield %sum_next : f32
+        scf.yield %sum_next : f32
       }
       return %sum : f32
     }
@@ -181,45 +181,45 @@ def IfOp : SCF_Op<"if",
        SingleBlockImplicitTerminator<"YieldOp">, RecursiveSideEffects]> {
   let summary = "if-then-else operation";
   let description = [{
-    The `loop.if` operation represents an if-then-else construct for
+    The `scf.if` operation represents an if-then-else construct for
     conditionally executing two regions of code. The operand to an if operation
     is a boolean value. For example:
 
     ```mlir
-    loop.if %b  {
+    scf.if %b  {
       ...
     } else {
       ...
     }
     ```
 
-    `loop.if` may also return results that are defined in its regions. The
+    `scf.if` may also return results that are defined in its regions. The
     values defined are determined by which execution path is taken.
 
     Example:
 
     ```mlir
-    %x, %y = loop.if %b -> (f32, f32) {
+    %x, %y = scf.if %b -> (f32, f32) {
       %x_true = ...
       %y_true = ...
-      loop.yield %x_true, %y_true : f32, f32
+      scf.yield %x_true, %y_true : f32, f32
     } else {
       %x_false = ...
       %y_false = ...
-      loop.yield %x_false, %y_false : f32, f32
+      scf.yield %x_false, %y_false : f32, f32
     }
     ```
 
-    `loop.if` regions are always terminated with "loop.yield". If "loop.if"
-    defines no values, the "loop.yield" can be left out, and will be inserted
+    `scf.if` regions are always terminated with "scf.yield". If "scf.if"
+    defines no values, the "scf.yield" can be left out, and will be inserted
     implicitly. Otherwise, it must be explicit.
-    Also, if "loop.if" defines one or more values, the 'else' block cannot be
+    Also, if "scf.if" defines one or more values, the 'else' block cannot be
     omitted.
 
     Example:
 
     ```mlir
-    loop.if %b  {
+    scf.if %b  {
       ...
     }
     ```
@@ -257,7 +257,7 @@ def ParallelOp : SCF_Op<"parallel",
      SingleBlockImplicitTerminator<"YieldOp">]> {
   let summary = "parallel for operation";
   let description = [{
-    The "loop.parallel" operation represents a loop nest taking 4 groups of SSA
+    The "scf.parallel" operation represents a loop nest taking 4 groups of SSA
     values as operands that represent the lower bounds, upper bounds, steps and
     initial values, respectively. The operation defines a variadic number of
     SSA values for its induction variables. It has one region capturing the
@@ -266,7 +266,7 @@ def ParallelOp : SCF_Op<"parallel",
     machine word. The steps are values of type index, required to be positive.
     The lower and upper bounds specify a half-open range: the range includes
     the lower bound but does not include the upper bound. The initial values
-    have the same types as results of "loop.parallel". If there are no results,
+    have the same types as results of "scf.parallel". If there are no results,
     the keyword `init` can be omitted.
 
     Semantically we require that the iteration space can be iterated in any
@@ -275,27 +275,27 @@ def ParallelOp : SCF_Op<"parallel",
 
     The parallel loop operation supports reduction of values produced by
     individual iterations into a single result. This is modeled using the
-    loop.reduce operation (see loop.reduce for details). Each result of a
-    loop.parallel operation is associated with an initial value operand and
+    scf.reduce operation (see scf.reduce for details). Each result of a
+    scf.parallel operation is associated with an initial value operand and
     reduce operation that is an immediate child. Reductions are matched to
     result and initial values in order of their appearance in the body.
     Consequently, we require that the body region has the same number of
     results and initial values as it has reduce operations.
 
     The body region must contain exactly one block that terminates with
-    "loop.yield" without operands. Parsing ParallelOp will create such a region
+    "scf.yield" without operands. Parsing ParallelOp will create such a region
     and insert the terminator when it is absent from the custom format.
 
     Example:
 
     ```mlir
     %init = constant 0.0 : f32
-    loop.parallel (%iv) = (%lb) to (%ub) step (%step) init (%init) -> f32 {
+    scf.parallel (%iv) = (%lb) to (%ub) step (%step) init (%init) -> f32 {
       %elem_to_reduce = load %buffer[%iv] : memref<100xf32>
-      loop.reduce(%elem_to_reduce) : f32 {
+      scf.reduce(%elem_to_reduce) : f32 {
         ^bb0(%lhs : f32, %rhs: f32):
           %res = addf %lhs, %rhs : f32
-          loop.reduce.return %res : f32
+          scf.reduce.return %res : f32
       }
     }
     ```
@@ -327,17 +327,17 @@ def ParallelOp : SCF_Op<"parallel",
 def ReduceOp : SCF_Op<"reduce", [HasParent<"ParallelOp">]> {
   let summary = "reduce operation for parallel for";
   let description = [{
-    "loop.reduce" is an operation occurring inside "loop.parallel" operations.
+    "scf.reduce" is an operation occurring inside "scf.parallel" operations.
     It consists of one block with two arguments which have the same type as the
-    operand of "loop.reduce".
+    operand of "scf.reduce".
 
-    "loop.reduce" is used to model the value for reduction computations of a
-    "loop.parallel" operation. It has to appear as an immediate child of a
-    "loop.parallel" and is associated with a result value of its parent
+    "scf.reduce" is used to model the value for reduction computations of a
+    "scf.parallel" operation. It has to appear as an immediate child of a
+    "scf.parallel" and is associated with a result value of its parent
     operation.
 
     Association is in the order of appearance in the body where the first
-    result of a parallel loop operation corresponds to the first "loop.reduce"
+    result of a parallel loop operation corresponds to the first "scf.reduce"
     in the operation's body region. The reduce operation takes a single
     operand, which is the value to be used in the reduction.
 
@@ -353,10 +353,10 @@ def ReduceOp : SCF_Op<"reduce", [HasParent<"ParallelOp">]> {
 
     ```mlir
     %operand = constant 1.0 : f32
-    loop.reduce(%operand) : f32 {
+    scf.reduce(%operand) : f32 {
       ^bb0(%lhs : f32, %rhs: f32):
         %res = addf %lhs, %rhs : f32
-        loop.reduce.return %res : f32
+        scf.reduce.return %res : f32
     }
     ```
   }];
@@ -376,12 +376,12 @@ def ReduceReturnOp :
                               Terminator]> {
   let summary = "terminator for reduce operation";
   let description = [{
-    "loop.reduce.return" is a special terminator operation for the block inside
-    "loop.reduce". It terminates the region. It should have the same type as
-    the operand of "loop.reduce". Example for the custom format:
+    "scf.reduce.return" is a special terminator operation for the block inside
+    "scf.reduce". It terminates the region. It should have the same type as
+    the operand of "scf.reduce". Example for the custom format:
 
     ```mlir
-    loop.reduce.return %res : f32
+    scf.reduce.return %res : f32
     ```
   }];
 
@@ -392,12 +392,12 @@ def ReduceReturnOp :
 def YieldOp : SCF_Op<"yield", [NoSideEffect, ReturnLike, Terminator]> {
   let summary = "loop yield and termination operation";
   let description = [{
-    "loop.yield" yields an SSA value from a loop dialect op region and
+    "scf.yield" yields an SSA value from the SCF dialect op region and
     terminates the regions. The semantics of how the values are yielded is
     defined by the parent operation.
-    If "loop.yield" has any operands, the operands must match the parent
+    If "scf.yield" has any operands, the operands must match the parent
     operation's results.
-    If the parent operation defines no values, then the "loop.yield" may be
+    If the parent operation defines no values, then the "scf.yield" may be
     left out in the custom syntax and the builders will insert one implicitly.
     Otherwise, it has to be present in the syntax to indicate which values are
     yielded.

diff  --git a/mlir/include/mlir/Dialect/SCF/Transforms.h b/mlir/include/mlir/Dialect/SCF/Transforms.h
index a3e1c0b03749..222ad6bf5584 100644
--- a/mlir/include/mlir/Dialect/SCF/Transforms.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms.h
@@ -23,20 +23,20 @@ namespace scf {
 
 class ParallelOp;
 
-/// Fuses all adjacent loop.parallel operations with identical bounds and step
-/// into one loop.parallel operations. Uses a naive aliasing and dependency
+/// Fuses all adjacent scf.parallel operations with identical bounds and step
+/// into one scf.parallel operations. Uses a naive aliasing and dependency
 /// analysis.
 void naivelyFuseParallelOps(Region &region);
 
 /// Tile a parallel loop of the form
-///   loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+///   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
 ///                                             step (%arg4, %arg5)
 ///
 /// into
-///   loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+///   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
 ///                                             step (%arg4*tileSize[0],
 ///                                                   %arg5*tileSize[1])
-///     loop.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)
+///     scf.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)
 ///                                           min(tileSize[1], %arg3-%j1))
 ///                                        step (%arg4, %arg5)
 /// The old loop is replaced with the new one.

diff  --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h
index 9a3a3c7f6ba2..b3f23aaa4397 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@@ -251,7 +251,7 @@ void collapseParallelLoops(scf::ParallelOp loops,
 /// numProcessors = [gridDim.x, blockDim.x], the loop:
 ///
 /// ```
-///    loop.for %i = %lb to %ub step %step {
+///    scf.for %i = %lb to %ub step %step {
 ///      ...
 ///    }
 /// ```
@@ -259,7 +259,7 @@ void collapseParallelLoops(scf::ParallelOp loops,
 /// is rewritten into a version resembling the following pseudo-IR:
 ///
 /// ```
-///    loop.for %i = %lb + %step * (threadIdx.x + blockIdx.x * blockDim.x)
+///    scf.for %i = %lb + %step * (threadIdx.x + blockIdx.x * blockDim.x)
 ///       to %ub step %gridDim.x * blockDim.x * %step {
 ///      ...
 ///    }

diff  --git a/mlir/lib/Conversion/LoopToStandard/LoopToStandard.cpp b/mlir/lib/Conversion/LoopToStandard/LoopToStandard.cpp
index 9a3c3ba7e175..d064ad24071f 100644
--- a/mlir/lib/Conversion/LoopToStandard/LoopToStandard.cpp
+++ b/mlir/lib/Conversion/LoopToStandard/LoopToStandard.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a pass to convert loop.for, loop.if and loop.terminator
+// This file implements a pass to convert scf.for, scf.if and loop.terminator
 // ops into standard CFG ops.
 //
 //===----------------------------------------------------------------------===//
@@ -41,7 +41,7 @@ struct LoopToStandardPass
 // first/last blocks in the parent region.  The original loop operation is
 // replaced by the initialization operations that set up the initial value of
 // the loop induction variable (%iv) and computes the loop bounds that are loop-
-// invariant for affine loops.  The operations following the original loop.for
+// invariant for affine loops.  The operations following the original scf.for
 // are split out into a separate continuation (exit) block. A condition block is
 // created before the continuation block. It checks the exit condition of the
 // loop and branches either to the continuation block, or to the first block of
@@ -102,27 +102,27 @@ struct ForLowering : public OpRewritePattern<ForOp> {
                                 PatternRewriter &rewriter) const override;
 };
 
-// Create a CFG subgraph for the loop.if operation (including its "then" and
+// Create a CFG subgraph for the scf.if operation (including its "then" and
 // optional "else" operation blocks).  We maintain the invariants that the
 // subgraph has a single entry and a single exit point, and that the entry/exit
 // blocks are respectively the first/last block of the enclosing region. The
-// operations following the loop.if are split into a continuation (subgraph
+// operations following the scf.if are split into a continuation (subgraph
 // exit) block. The condition is lowered to a chain of blocks that implement the
-// short-circuit scheme. The "loop.if" operation is replaced with a conditional
+// short-circuit scheme. The "scf.if" operation is replaced with a conditional
 // branch to either the first block of the "then" region, or to the first block
-// of the "else" region. In these blocks, "loop.yield" is unconditional branches
-// to the post-dominating block. When the "loop.if" does not return values, the
+// of the "else" region. In these blocks, "scf.yield" is unconditional branches
+// to the post-dominating block. When the "scf.if" does not return values, the
 // post-dominating block is the same as the continuation block. When it returns
 // values, the post-dominating block is a new block with arguments that
-// correspond to the values returned by the "loop.if" that unconditionally
+// correspond to the values returned by the "scf.if" that unconditionally
 // branches to the continuation block. This allows block arguments to dominate
-// any uses of the hitherto "loop.if" results that they replaced. (Inserting a
+// any uses of the hitherto "scf.if" results that they replaced. (Inserting a
 // new block allows us to avoid modifying the argument list of an existing
 // block, which is illegal in a conversion pattern). When the "else" region is
-// empty, which is only allowed for "loop.if"s that don't return values, the
+// empty, which is only allowed for "scf.if"s that don't return values, the
 // condition branches directly to the continuation block.
 //
-// CFG for a loop.if with else and without results.
+// CFG for a scf.if with else and without results.
 //
 //      +--------------------------------+
 //      | <code before the IfOp>         |
@@ -152,7 +152,7 @@ struct ForLowering : public OpRewritePattern<ForOp> {
 //      |   <code after the IfOp>        |
 //      +--------------------------------+
 //
-// CFG for a loop.if with results.
+// CFG for a scf.if with results.
 //
 //      +--------------------------------+
 //      | <code before the IfOp>         |
@@ -207,7 +207,7 @@ LogicalResult ForLowering::matchAndRewrite(ForOp forOp,
                                            PatternRewriter &rewriter) const {
   Location loc = forOp.getLoc();
 
-  // Start by splitting the block containing the 'loop.for' into two parts.
+  // Start by splitting the block containing the 'scf.for' into two parts.
   // The part before will get the init code, the part after will be the end
   // point.
   auto *initBlock = rewriter.getInsertionBlock();
@@ -273,7 +273,7 @@ LogicalResult IfLowering::matchAndRewrite(IfOp ifOp,
                                           PatternRewriter &rewriter) const {
   auto loc = ifOp.getLoc();
 
-  // Start by splitting the block containing the 'loop.if' into two parts.
+  // Start by splitting the block containing the 'scf.if' into two parts.
   // The part before will contain the condition, the part after will be the
   // continuation point.
   auto *condBlock = rewriter.getInsertionBlock();
@@ -288,7 +288,7 @@ LogicalResult IfLowering::matchAndRewrite(IfOp ifOp,
     rewriter.create<BranchOp>(loc, remainingOpsBlock);
   }
 
-  // Move blocks from the "then" region to the region containing 'loop.if',
+  // Move blocks from the "then" region to the region containing 'scf.if',
   // place it before the continuation block, and branch to it.
   auto &thenRegion = ifOp.thenRegion();
   auto *thenBlock = &thenRegion.front();
@@ -300,7 +300,7 @@ LogicalResult IfLowering::matchAndRewrite(IfOp ifOp,
   rewriter.inlineRegionBefore(thenRegion, continueBlock);
 
   // Move blocks from the "else" region (if present) to the region containing
-  // 'loop.if', place it before the continuation block and branch to it.  It
+  // 'scf.if', place it before the continuation block and branch to it.  It
   // will be placed after the "then" regions.
   auto *elseBlock = continueBlock;
   auto &elseRegion = ifOp.elseRegion();
@@ -331,7 +331,7 @@ ParallelLowering::matchAndRewrite(ParallelOp parallelOp,
   BlockAndValueMapping mapping;
 
   // For a parallel loop, we essentially need to create an n-dimensional loop
-  // nest. We do this by translating to loop.for ops and have those lowered in
+  // nest. We do this by translating to scf.for ops and have those lowered in
   // a further rewrite. If a parallel loop contains reductions (and thus returns
   // values), forward the initial values for the reductions down the loop
   // hierarchy and bubble up the results by modifying the "yield" terminator.
@@ -375,10 +375,10 @@ ParallelLowering::matchAndRewrite(ParallelOp parallelOp,
     }
 
     // Clone the body of the reduction operation into the body of the loop,
-    // using operands of "loop.reduce" and iteration arguments corresponding
+    // using operands of "scf.reduce" and iteration arguments corresponding
     // to the reduction value to replace arguments of the reduction block.
-    // Collect operands of "loop.reduce.return" to be returned by a final
-    // "loop.yield" instead.
+    // Collect operands of "scf.reduce.return" to be returned by a final
+    // "scf.yield" instead.
     Value arg = iterArgs[yieldOperands.size()];
     Block &reduceBlock = reduce.reductionOperator().front();
     mapping.map(reduceBlock.getArgument(0), mapping.lookupOrDefault(arg));

diff  --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
index 3821b4a2cf34..84cbf1bf7dde 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
@@ -563,7 +563,7 @@ static unsigned getLaunchOpArgumentNum(gpu::Processor processor) {
 }
 
 /// Modifies the current transformation state to capture the effect of the given
-/// `loop.parallel` operation on index substitutions and the operations to be
+/// `scf.parallel` operation on index substitutions and the operations to be
 /// inserted.
 /// Specifically, if a dimension of a parallel loop is mapped to a hardware id,
 /// this function will
@@ -734,11 +734,11 @@ static LogicalResult processParallelLoop(
   return success();
 }
 
-/// Lower a `loop.parallel` operation into a corresponding `gpu.launch`
+/// Lower a `scf.parallel` operation into a corresponding `gpu.launch`
 /// operation.
 ///
 /// This essentially transforms a loop nest into a corresponding SIMT function.
-/// The conversion is driven by mapping annotations on the `loop.parallel`
+/// The conversion is driven by mapping annotations on the `scf.parallel`
 /// operations. The mapping is provided via a `DictionaryAttribute` named
 /// `mapping`, which has three entries:
 ///  - processor: the hardware id to map to. 0-2 are block dimensions, 3-5 are
@@ -747,9 +747,9 @@ static LogicalResult processParallelLoop(
 ///          substitution.
 ///  - bound : An affine map that is used to compute the bound of the hardware
 ///            id based on an upper bound of the number of iterations.
-/// If the `loop.parallel` contains nested `loop.parallel` operations, those
+/// If the `scf.parallel` contains nested `scf.parallel` operations, those
 /// need to be annotated, as well. Structurally, the transformation works by
-/// splicing all operations from nested `loop.parallel` operations into a single
+/// splicing all operations from nested `scf.parallel` operations into a single
 /// sequence. Indices mapped to hardware ids are substituted with those ids,
 /// wheras sequential mappings result in a sequential for-loop. To have more
 /// flexibility when mapping code to hardware ids, the transform supports two
@@ -791,7 +791,7 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp,
   while (!worklist.empty()) {
     Operation *op = worklist.pop_back_val();
     // Now walk over the body and clone it.
-    // TODO: This is only correct if there either is no further loop.parallel
+    // TODO: This is only correct if there either is no further scf.parallel
     //       nested or this code is side-effect free. Otherwise we might need
     //       predication. We are overly conservative for now and only allow
     //       side-effects in the innermost scope.
@@ -800,7 +800,7 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp,
       // sideeffects until now.
       if (seenSideeffects)
         return failure();
-      // A nested loop.parallel needs insertion of code to compute indices.
+      // A nested scf.parallel needs insertion of code to compute indices.
       // Insert that now. This will also update the worklist with the loops
       // body.
       if (failed(processParallelLoop(nestedParallel, launchOp, cloningMap,

diff  --git a/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp b/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
index dc929a2e6747..ad573559cb28 100644
--- a/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
+++ b/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
@@ -350,9 +350,9 @@ namespace {
 ///    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
 ///    // vector<32x256xf32> and pad with %f0 to handle the boundary case:
 ///    %f0 = constant 0.0f : f32
-///    loop.for %i0 = 0 to %0 {
-///      loop.for %i1 = 0 to %1 step %c256 {
-///        loop.for %i2 = 0 to %2 step %c32 {
+///    scf.for %i0 = 0 to %0 {
+///      scf.for %i1 = 0 to %1 step %c256 {
+///        scf.for %i2 = 0 to %2 step %c32 {
 ///          %v = vector.transfer_read %A[%i0, %i1, %i2], %f0
 ///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
 ///               memref<?x?x?xf32>, vector<32x256xf32>
@@ -364,8 +364,8 @@ namespace {
 /// abstraction):
 ///
 /// ```mlir
-///    loop.for %d2 = 0 to %c256 {
-///      loop.for %d1 = 0 to %c32 {
+///    scf.for %d2 = 0 to %c256 {
+///      scf.for %d1 = 0 to %c32 {
 ///        %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
 ///        %tmp[%d2, %d1] = %s
 ///      }

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
index a978143cd834..77947ba1101b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
@@ -134,9 +134,9 @@ namespace {
 /// An example output may resemble:
 ///
 /// ```
-///    loop.for %i = %c0 to %0 step %c1 {
-///      loop.for %j = %c0 to %1 step %c1 {
-///        loop.for %k = %c0 to %4 step %c1 {
+///    scf.for %i = %c0 to %0 step %c1 {
+///      scf.for %j = %c0 to %1 step %c1 {
+///        scf.for %k = %c0 to %4 step %c1 {
 ///          %11 = load %arg0[%i, %j] :
 ///            memref<?x?xf32, stride_specification>
 ///          %12 = load %arg1[%i, %j, %k] :
@@ -419,9 +419,9 @@ class LinalgScopedEmitter<IndexedValueType, PoolingSumOp> {
 /// An example output may resemble:
 ///
 /// ```
-///    loop.for %i = %c0 to %0 step %c1 {
-///      loop.for %j = %c0 to %1 step %c1 {
-///        loop.for %k = %c0 to %4 step %c1 {
+///    scf.for %i = %c0 to %0 step %c1 {
+///      scf.for %j = %c0 to %1 step %c1 {
+///        scf.for %k = %c0 to %4 step %c1 {
 ///          %11 = load %arg0[%i, %j] :
 ///            memref<?x?xf32, stride_specification>
 ///          %12 = load %arg1[%i, %j, %k] :
@@ -509,8 +509,8 @@ class GenerateLoopNest {
   }
 };
 
-/// Generates loop nest using loop.parallel. loop.parallel is only used for the
-/// outer parallel loops. All other loops are generated using loop.for
+/// Generates loop nest using scf.parallel. scf.parallel is only used for the
+/// outer parallel loops. All other loops are generated using scf.for
 /// operation.
 template <typename ConcreteOpTy>
 class GenerateLoopNest<scf::ParallelOp, ConcreteOpTy> {
@@ -519,9 +519,9 @@ class GenerateLoopNest<scf::ParallelOp, ConcreteOpTy> {
 
   static void doit(ConcreteOpTy linalgOp, ArrayRef<Value> loopRanges,
                    MutableArrayRef<Value> allIvs) {
-    // Only generate loop.parallel for outer consecutive "parallel"
+    // Only generate scf.parallel for outer consecutive "parallel"
     // iterator_types.
-    // TODO(ravishankarm): Generate loop.parallel for all "parallel" iterator
+    // TODO(ravishankarm): Generate scf.parallel for all "parallel" iterator
     // types, not just the outer most ones. Also handle "reduction" iterator
     // types.
     auto nOuterPar = linalgOp.iterator_types()
@@ -532,7 +532,7 @@ class GenerateLoopNest<scf::ParallelOp, ConcreteOpTy> {
                          })
                          .size();
     // If there are no outer parallel loops, then number of loop ops is same as
-    // the number of loops, and they are all loop.for ops.
+    // the number of loops, and they are all scf.for ops.
     if (nOuterPar) {
       GenericLoopNestRangeBuilder<scf::ParallelOp>(
           allIvs.take_front(nOuterPar), loopRanges.take_front(nOuterPar))([&] {
@@ -545,7 +545,7 @@ class GenerateLoopNest<scf::ParallelOp, ConcreteOpTy> {
         });
       });
     } else {
-      // If there are no parallel loops then fallback to generating all loop.for
+      // If there are no parallel loops then fallback to generating all scf.for
       // operations.
       GenericLoopNestRangeBuilder<scf::ForOp>(allIvs, loopRanges)([&] {
         SmallVector<Value, 4> allIvValues(allIvs.begin(), allIvs.end());
@@ -595,7 +595,7 @@ Optional<LinalgLoops> linalgOpToLoopsImpl(Operation *op, OpBuilder &builder) {
   assert(loopRanges.size() == allIvs.size());
   Impl::doit(linalgOp, loopRanges, allIvs);
   // Number of loop ops might be 
diff erent from the number of ivs since some
-  // loops like affine.parallel and loop.parallel have multiple ivs.
+  // loops like affine.parallel and scf.parallel have multiple ivs.
   llvm::SetVector<Operation *> loopSet;
   for (Value iv : allIvs) {
     if (!iv)
@@ -747,7 +747,7 @@ Optional<LinalgLoops> mlir::linalg::linalgLowerOpToLoops(OpBuilder &builder,
   return linalgOpToLoopsImpl<LoopTy, ConcreteOp>(op, builder);
 }
 
-/// Emits a loop nest of `loop.for` with the proper body for `op`.
+/// Emits a loop nest of `scf.for` with the proper body for `op`.
 template <typename ConcreteOp>
 LogicalResult mlir::linalg::linalgOpToLoops(OpBuilder &builder, Operation *op) {
   Optional<LinalgLoops> loops =
@@ -764,7 +764,7 @@ LogicalResult mlir::linalg::linalgOpToAffineLoops(OpBuilder &builder,
   return loops ? success() : failure();
 }
 
-/// Emits a loop nest of `loop.parallel` with the proper body for `op`.
+/// Emits a loop nest of `scf.parallel` with the proper body for `op`.
 template <typename ConcreteOp>
 LogicalResult mlir::linalg::linalgOpToParallelLoops(OpBuilder &builder,
                                                     Operation *op) {

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index 462c2ef0c9ba..4c460eaeaba0 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -144,8 +144,8 @@ struct TileCheck : public AffineExprVisitor<TileCheck> {
 // %c10 = constant 10 : index
 // operand_dim_0 = dim %operand, 0 : memref<50x100xf32>
 // operand_dim_1 = dim %operand, 1 : memref<50x100xf32>
-// loop.for %k = %c0 to operand_dim_0 step %c10 {
-//   loop.for %l = %c0 to operand_dim_1 step %c25 {
+// scf.for %k = %c0 to operand_dim_0 step %c10 {
+//   scf.for %l = %c0 to operand_dim_1 step %c25 {
 //     %4 = std.subview %operand[%k, %l][%c10, %c25][%c1, %c1]
 //       : memref<50x100xf32> to memref<?x?xf32, #strided>
 //     %5 = std.subview %result[%k, %l][%c10, %c25][%c1, %c1]

diff  --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp
index 591179455c94..ba2fead9bda0 100644
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@@ -582,7 +582,7 @@ static LogicalResult verify(ReduceOp op) {
   // Check that the block is terminated by a ReduceReturnOp.
   if (!isa<ReduceReturnOp>(block.getTerminator()))
     return op.emitOpError("the block inside reduce should be terminated with a "
-                          "'loop.reduce.return' op");
+                          "'scf.reduce.return' op");
 
   return success();
 }
@@ -649,7 +649,7 @@ static LogicalResult verify(YieldOp op) {
   } else if (isa<ParallelOp>(parentOp)) {
     if (op.getNumOperands() != 0)
       return op.emitOpError()
-             << "yield inside loop.parallel is not allowed to have operands";
+             << "yield inside scf.parallel is not allowed to have operands";
   } else {
     return op.emitOpError()
            << "yield only terminates If, For or Parallel regions";

diff  --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp
index 94dba40a6436..12c35b117488 100644
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp
@@ -1,4 +1,4 @@
-//===- ParallelLoopSpecialization.cpp - loop.parallel specialization ------===//
+//===- ParallelLoopSpecialization.cpp - scf.parallel specialization ------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

diff  --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp
index 22da511c8b41..8e84566659f8 100644
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp
@@ -1,4 +1,4 @@
-//===- ParallelLoopTiling.cpp - Tiles loop.parallel ---------------===//
+//===- ParallelLoopTiling.cpp - Tiles scf.parallel ---------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -23,14 +23,14 @@ using namespace mlir;
 using namespace mlir::scf;
 
 /// Tile a parallel loop of the form
-///   loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+///   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
 ///                                             step (%arg4, %arg5)
 ///
 /// into
-///   loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+///   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
 ///                                             step (%arg4*tileSize[0],
 ///                                                   %arg5*tileSize[1])
-///     loop.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)
+///     scf.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)
 ///                                           min(tileSize[1], %arg3-%j1))
 ///                                        step (%arg4, %arg5)
 /// The old loop is replaced with the new one.

diff  --git a/mlir/test/Analysis/test-dominance.mlir b/mlir/test/Analysis/test-dominance.mlir
index 3e3678b40468..6366a49a62e3 100644
--- a/mlir/test/Analysis/test-dominance.mlir
+++ b/mlir/test/Analysis/test-dominance.mlir
@@ -91,7 +91,7 @@ func @func_loop(%arg0 : i32, %arg1 : i32) {
 
 // CHECK-LABEL: Testing : nested_region
 func @nested_region(%arg0 : index, %arg1 : index, %arg2 : index) {
-  loop.for %arg3 = %arg0 to %arg1 step %arg2 { }
+  scf.for %arg3 = %arg0 to %arg1 step %arg2 { }
   return
 }
 
@@ -110,9 +110,9 @@ func @nested_region(%arg0 : index, %arg1 : index, %arg2 : index) {
 
 // CHECK-LABEL: Testing : nested_region2
 func @nested_region2(%arg0 : index, %arg1 : index, %arg2 : index) {
-  loop.for %arg3 = %arg0 to %arg1 step %arg2 {
-    loop.for %arg4 = %arg0 to %arg1 step %arg2 {
-      loop.for %arg5 = %arg0 to %arg1 step %arg2 { }
+  scf.for %arg3 = %arg0 to %arg1 step %arg2 {
+    scf.for %arg4 = %arg0 to %arg1 step %arg2 {
+      scf.for %arg5 = %arg0 to %arg1 step %arg2 { }
     }
   }
   return
@@ -160,8 +160,8 @@ func @func_loop_nested_region(
 ^loopBody:
   %const0 = constant 1 : i32
   %inc = addi %counter, %const0 : i32
-  loop.for %arg5 = %arg2 to %arg3 step %arg4 {
-    loop.for %arg6 = %arg2 to %arg3 step %arg4 { }
+  scf.for %arg5 = %arg2 to %arg3 step %arg4 {
+    scf.for %arg6 = %arg2 to %arg3 step %arg4 { }
   }
   br ^loopHeader(%inc : i32)
 ^exit:

diff  --git a/mlir/test/Analysis/test-liveness.mlir b/mlir/test/Analysis/test-liveness.mlir
index bd3fb7b79f7b..9e1329f3609b 100644
--- a/mlir/test/Analysis/test-liveness.mlir
+++ b/mlir/test/Analysis/test-liveness.mlir
@@ -205,17 +205,17 @@ func @nested_region(
   // CHECK-NEXT: val_7
   // CHECK-NEXT:    %0 = addi
   // CHECK-NEXT:    %1 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // %2 = addi
   // CHECK-NEXT:    %3 = addi
   // CHECK-NEXT: val_8
   // CHECK-NEXT:    %1 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // return %1
   // CHECK: EndLiveness
   %0 = addi %arg3, %arg4 : i32
   %1 = addi %arg4, %arg5 : i32
-  loop.for %arg6 = %arg0 to %arg1 step %arg2 {
+  scf.for %arg6 = %arg0 to %arg1 step %arg2 {
     // CHECK: Block: 1
     // CHECK-NEXT: LiveIn: arg5 at 0 arg6 at 0 val_7
     // CHECK-NEXT: LiveOut:{{ *$}}
@@ -238,13 +238,13 @@ func @nested_region2(
   // CHECK-NEXT: val_7
   // CHECK-NEXT:    %0 = addi
   // CHECK-NEXT:    %1 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // %2 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // %3 = addi
   // CHECK-NEXT: val_8
   // CHECK-NEXT:    %1 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // return %1
   // CHECK: EndLiveness
   %arg0 : index, %arg1 : index, %arg2 : index,
@@ -252,18 +252,18 @@ func @nested_region2(
   %buffer : memref<i32>) -> i32 {
   %0 = addi %arg3, %arg4 : i32
   %1 = addi %arg4, %arg5 : i32
-  loop.for %arg6 = %arg0 to %arg1 step %arg2 {
+  scf.for %arg6 = %arg0 to %arg1 step %arg2 {
     // CHECK: Block: 1
     // CHECK-NEXT: LiveIn: arg0 at 0 arg1 at 0 arg2 at 0 arg5 at 0 arg6 at 0 val_7
     // CHECK-NEXT: LiveOut:{{ *$}}
     // CHECK-NEXT: BeginLiveness
     // CHECK-NEXT: val_10
     // CHECK-NEXT:    %2 = addi
-    // CHECK-NEXT:    loop.for
+    // CHECK-NEXT:    scf.for
     // CHECK:         // %3 = addi
     // CHECK: EndLiveness
     %2 = addi %0, %arg5 : i32
-    loop.for %arg7 = %arg0 to %arg1 step %arg2 {
+    scf.for %arg7 = %arg0 to %arg1 step %arg2 {
       %3 = addi %2, %0 : i32
       store %3, %buffer[] : memref<i32>
     }
@@ -283,10 +283,10 @@ func @nested_region3(
   // CHECK-NEXT: val_7
   // CHECK-NEXT:    %0 = addi
   // CHECK-NEXT:    %1 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // br ^bb1
   // CHECK-NEXT:    %2 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // %2 = addi
   // CHECK: EndLiveness
   %arg0 : index, %arg1 : index, %arg2 : index,
@@ -294,7 +294,7 @@ func @nested_region3(
   %buffer : memref<i32>) -> i32 {
   %0 = addi %arg3, %arg4 : i32
   %1 = addi %arg4, %arg5 : i32
-  loop.for %arg6 = %arg0 to %arg1 step %arg2 {
+  scf.for %arg6 = %arg0 to %arg1 step %arg2 {
     // CHECK: Block: 1
     // CHECK-NEXT: LiveIn: arg5 at 0 arg6 at 0 val_7
     // CHECK-NEXT: LiveOut:{{ *$}}
@@ -307,7 +307,7 @@ func @nested_region3(
   // CHECK: Block: 2
   // CHECK-NEXT: LiveIn: arg0 at 0 arg1 at 0 arg2 at 0 arg6 at 0 val_7 val_8
   // CHECK-NEXT: LiveOut:{{ *$}}
-  loop.for %arg7 = %arg0 to %arg1 step %arg2 {
+  scf.for %arg7 = %arg0 to %arg1 step %arg2 {
     // CHECK: Block: 3
     // CHECK-NEXT: LiveIn: arg6 at 0 val_7 val_8
     // CHECK-NEXT: LiveOut:{{ *$}}

diff  --git a/mlir/test/Conversion/GPUToSPIRV/if.mlir b/mlir/test/Conversion/GPUToSPIRV/if.mlir
index 8a637457884e..1e8164cb310d 100644
--- a/mlir/test/Conversion/GPUToSPIRV/if.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir
@@ -29,7 +29,7 @@ module attributes {
       // CHECK-NEXT:  }
       // CHECK-NEXT:  spv.Return
 
-      loop.if %arg3 {
+      scf.if %arg3 {
         store %value, %arg2[%i] : memref<10xf32>
       }
       gpu.return
@@ -70,8 +70,8 @@ module attributes {
       // CHECK-NEXT:  }
       // CHECK-NEXT:  spv.Return
 
-      loop.if %arg5 {
-        loop.if %arg6 {
+      scf.if %arg5 {
+        scf.if %arg6 {
           %value = load %arg3[%i] : memref<10xf32>
           store %value, %arg4[%i] : memref<10xf32>
         } else {
@@ -79,7 +79,7 @@ module attributes {
           store %value, %arg3[%i] : memref<10xf32>
         }
       } else {
-        loop.if %arg6 {
+        scf.if %arg6 {
           %value = load %arg3[%j] : memref<10xf32>
           store %value, %arg4[%j] : memref<10xf32>
         } else {

diff  --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
index 56bff8a3985b..00a13f610f10 100644
--- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
@@ -39,7 +39,7 @@ module attributes {
       // CHECK:      [[MERGE]]
       // CHECK:        spv._merge
       // CHECK:      }
-      loop.for %arg4 = %lb to %ub step %step {
+      scf.for %arg4 = %lb to %ub step %step {
         %1 = load %arg2[%arg4] : memref<10xf32>
         store %1, %arg3[%arg4] : memref<10xf32>
       }

diff  --git a/mlir/test/Conversion/LoopsToGPU/imperfect_2D.mlir b/mlir/test/Conversion/LoopsToGPU/imperfect_2D.mlir
index 49562a7f7840..d308a9e65e24 100644
--- a/mlir/test/Conversion/LoopsToGPU/imperfect_2D.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/imperfect_2D.mlir
@@ -12,15 +12,15 @@ module {
     // CHECK: [[TEMP1:%.*]] = muli [[ARG3]], [[ARG6]] : index
     // CHECK: [[BLOCKLOOPYLB:%.*]] = addi {{%.*}}, [[TEMP1]] : index
     // CHECK: [[BLOCKLOOPYSTEP:%.*]] = muli [[ARG3]], [[ARG12]] : index
-    // CHECK: loop.for [[BLOCKLOOPYIV:%.*]] = [[BLOCKLOOPYLB]] to {{%.*}} step [[BLOCKLOOPYSTEP]]
-    loop.for %iv1 = %c0 to %0 step %arg3 {
+    // CHECK: scf.for [[BLOCKLOOPYIV:%.*]] = [[BLOCKLOOPYLB]] to {{%.*}} step [[BLOCKLOOPYSTEP]]
+    scf.for %iv1 = %c0 to %0 step %arg3 {
 
       // CHECK: [[TEMP2:%.*]] = muli [[ARG4]], [[ARG5]] : index
       // CHECK: [[BLOCKLOOPXLB:%.*]] = addi  {{%.*}}, [[TEMP2]] : index
       // CHECK: [[BLOCKLOOPXSTEP:%.*]] = muli [[ARG4]], [[ARG11]] : index
-      // CHECK: loop.for [[BLOCKLOOPXIV:%.*]] = [[BLOCKLOOPXLB]] to {{%.*}} step [[BLOCKLOOPXSTEP]]
+      // CHECK: scf.for [[BLOCKLOOPXIV:%.*]] = [[BLOCKLOOPXLB]] to {{%.*}} step [[BLOCKLOOPXSTEP]]
 
-      loop.for %iv2 = %c0 to %1 step %arg4 {
+      scf.for %iv2 = %c0 to %1 step %arg4 {
 
         // TODO: This is effectively shared memory. Lower it to a
         // shared memory.
@@ -30,13 +30,13 @@ module {
         // CHECK: [[TEMP3:%.*]] = muli [[ARG20:%.*]], [[ARG9:%.*]] : index
         // CHECK: [[THREADLOOP1YLB:%.*]] = addi {{%.*}}, [[TEMP3]] : index
         // CHECK: [[THREADLOOP1YSTEP:%.*]] = muli [[ARG20]], [[ARG15]] : index
-        // CHECK: loop.for [[THREADLOOP1YIV:%.*]] = [[THREADLOOP1YLB]] to {{%.*}} step [[THREADLOOP1YSTEP]]
-        loop.for %iv3 = %c0 to %arg3 step %c1 {
+        // CHECK: scf.for [[THREADLOOP1YIV:%.*]] = [[THREADLOOP1YLB]] to {{%.*}} step [[THREADLOOP1YSTEP]]
+        scf.for %iv3 = %c0 to %arg3 step %c1 {
           // CHECK: [[TEMP4:%.*]] = muli [[ARG20]], [[ARG8]] : index
           // CHECK: [[THREADLOOP1XLB:%.*]] = addi {{%.*}}, [[TEMP4]] : index
           // CHECK: [[THREADLOOP1XSTEP:%.*]] = muli [[ARG20]], [[ARG14]] : index
-          // CHECK: loop.for [[THREADLOOP1XIV:%.*]] = [[THREADLOOP1XLB]] to {{%.*}} step [[THREADLOOP1XSTEP]]
-          loop.for %iv4 = %c1 to %arg4 step %c1 {
+          // CHECK: scf.for [[THREADLOOP1XIV:%.*]] = [[THREADLOOP1XLB]] to {{%.*}} step [[THREADLOOP1XSTEP]]
+          scf.for %iv4 = %c1 to %arg4 step %c1 {
             // CHECK: [[INDEX2:%.*]] = addi [[BLOCKLOOPYIV]], [[THREADLOOP1YIV]] : index
             %10 = addi %iv1, %iv3 : index
             // CHECK: [[INDEX1:%.*]] = addi [[BLOCKLOOPXIV]], [[THREADLOOP1XIV]] : index
@@ -54,13 +54,13 @@ module {
         // CHECK: [[TEMP5:%.*]] = muli [[ARG20]], [[ARG9]] : index
         // CHECK: [[THREADLOOP2YLB:%.*]] = addi {{%.*}}, [[TEMP5]] : index
         // CHECK: [[THREADLOOP2YSTEP:%.*]] = muli [[ARG20]], [[ARG15]] : index
-        // CHECK: loop.for [[THREADLOOP2YIV:%.*]] = [[THREADLOOP2YLB]] to {{%.*}} step [[THREADLOOP2YSTEP]]
-        loop.for %iv3 = %c0 to %arg3 step %c1 {
+        // CHECK: scf.for [[THREADLOOP2YIV:%.*]] = [[THREADLOOP2YLB]] to {{%.*}} step [[THREADLOOP2YSTEP]]
+        scf.for %iv3 = %c0 to %arg3 step %c1 {
           // CHECK: [[TEMP6:%.*]] = muli [[ARG20]], [[ARG8]] : index
           // CHECK: [[THREADLOOP2XLB:%.*]] = addi {{%.*}}, [[TEMP6]] : index
           // CHECK: [[THREADLOOP2XSTEP:%.*]] = muli [[ARG20]], [[ARG14]] : index
-          // CHECK: loop.for [[THREADLOOP2XIV:%.*]] = [[THREADLOOP2XLB]] to {{%.*}} step [[THREADLOOP2XSTEP]]
-          loop.for %iv4 = %c1 to %arg4 step %c1 {
+          // CHECK: scf.for [[THREADLOOP2XIV:%.*]] = [[THREADLOOP2XLB]] to {{%.*}} step [[THREADLOOP2XSTEP]]
+          scf.for %iv4 = %c1 to %arg4 step %c1 {
             // CHECK: [[INDEX3:%.*]] = addi [[BLOCKLOOPYIV]], [[THREADLOOP2YIV]] : index
             %13 = addi %iv1, %iv3 : index
             // CHECK: [[INDEX4:%.*]] = addi [[BLOCKLOOPXIV]], [[THREADLOOP2XIV]] : index

diff  --git a/mlir/test/Conversion/LoopsToGPU/imperfect_3D.mlir b/mlir/test/Conversion/LoopsToGPU/imperfect_3D.mlir
index f6cc5e2398b5..61f62a99cd97 100644
--- a/mlir/test/Conversion/LoopsToGPU/imperfect_3D.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/imperfect_3D.mlir
@@ -7,13 +7,13 @@ module {
     %2 = dim %arg0, 2 : memref<?x?x?xf32>
     %c0 = constant 0 : index
     // CHECK: gpu.launch
-    // CHECK:   loop.for {{.*}} {
-    // CHECK:     loop.for {{.*}} {
-    // CHECK:       loop.for {{.*}} {
+    // CHECK:   scf.for {{.*}} {
+    // CHECK:     scf.for {{.*}} {
+    // CHECK:       scf.for {{.*}} {
     // CHECK:         alloc
-    // CHECK:         loop.for {{.*}} {
-    // CHECK:           loop.for {{.*}} {
-    // CHECK:             loop.for {{.*}} {
+    // CHECK:         scf.for {{.*}} {
+    // CHECK:           scf.for {{.*}} {
+    // CHECK:             scf.for {{.*}} {
     // CHECK:               load
     // CHECK:               load
     // CHECK:               addf
@@ -21,9 +21,9 @@ module {
     // CHECK:             }
     // CHECK-NEXT:      }
     // CHECK-NEXT:    }
-    // CHECK:         loop.for {{.*}} {
-    // CHECK:           loop.for {{.*}} {
-    // CHECK:             loop.for {{.*}} {
+    // CHECK:         scf.for {{.*}} {
+    // CHECK:           scf.for {{.*}} {
+    // CHECK:             scf.for {{.*}} {
     // CHECK:               load
     // CHECK:               load
     // CHECK:               mulf
@@ -32,9 +32,9 @@ module {
     // CHECK-NEXT:      }
     // CHECK-NEXT:    }
     // CHECK:         dealloc
-    loop.for %iv1 = %c0 to %0 step %t1 {
-      loop.for %iv2 = %c0 to %1 step %t2 {
-        loop.for %iv3 = %c0 to %2 step %t3 {
+    scf.for %iv1 = %c0 to %0 step %t1 {
+      scf.for %iv2 = %c0 to %1 step %t2 {
+        scf.for %iv3 = %c0 to %2 step %t3 {
           %6 = alloc(%t1, %t2, %t3) : memref<?x?x?xf32>
           %ubcmp1 = cmpi "slt", %0, %t1 : index
           %ub1 = select %ubcmp1, %0, %t1 : index
@@ -42,9 +42,9 @@ module {
           %ub2 = select %ubcmp2, %1, %t2 : index
           %ubcmp3 = cmpi "slt", %2, %t3 : index
           %ub3 = select %ubcmp3, %2, %t3 : index
-          loop.for %iv4 = %iv1 to %ub1 step %step1 {
-            loop.for %iv5 = %iv2 to %ub2 step %step2 {
-              loop.for %iv6 = %iv3 to %ub3 step %step3 {
+          scf.for %iv4 = %iv1 to %ub1 step %step1 {
+            scf.for %iv5 = %iv2 to %ub2 step %step2 {
+              scf.for %iv6 = %iv3 to %ub3 step %step3 {
                 %7 = load %arg0[%iv4, %iv5, %iv6] : memref<?x?x?xf32>
                 %8 = load %arg1[%iv4, %iv6, %iv5] : memref<?x?x?xf32>
                 %9 = addf %7, %8 : f32
@@ -58,9 +58,9 @@ module {
               }
             }
           }
-          loop.for %iv7 = %iv1 to %ub1 step %step1 {
-            loop.for %iv8 = %iv2 to %ub2 step %step2 {
-              loop.for %iv9 = %iv3 to %ub3 step %step3 {
+          scf.for %iv7 = %iv1 to %ub1 step %step1 {
+            scf.for %iv8 = %iv2 to %ub2 step %step2 {
+              scf.for %iv9 = %iv3 to %ub3 step %step3 {
                 %16 = subi %iv7, %iv1 : index
                 %17 = divi_signed %16, %step1 : index
                 %18 = subi %iv8, %iv2 : index

diff  --git a/mlir/test/Conversion/LoopsToGPU/imperfect_4D.mlir b/mlir/test/Conversion/LoopsToGPU/imperfect_4D.mlir
index 8858a3e5e631..636dce234145 100644
--- a/mlir/test/Conversion/LoopsToGPU/imperfect_4D.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/imperfect_4D.mlir
@@ -8,30 +8,30 @@ module {
     %3 = dim %arg0, 3 : memref<?x?x?x?xf32>
     %c0 = constant 0 : index
     // CHECK: gpu.launch
-    // CHECK:   loop.for
-    // CHECK:     loop.for
-    // CHECK:       loop.for
+    // CHECK:   scf.for
+    // CHECK:     scf.for
+    // CHECK:       scf.for
     // CHECK:         alloc
-    // CHECK:         loop.for
-    // CHECK:           loop.for
-    // CHECK:             loop.for
-    // CHECK:               loop.for
+    // CHECK:         scf.for
+    // CHECK:           scf.for
+    // CHECK:             scf.for
+    // CHECK:               scf.for
     // CHECK:                 load
     // CHECK:                 load
     // CHECK:                 addf
     // CHECK:                 store
-    // CHECK:         loop.for
-    // CHECK:           loop.for
-    // CHECK:             loop.for
-    // CHECK:               loop.for
+    // CHECK:         scf.for
+    // CHECK:           scf.for
+    // CHECK:             scf.for
+    // CHECK:               scf.for
     // CHECK:                 load
     // CHECK:                 load
     // CHECK:                 mulf
     // CHECK:                 store
     // CHECK:         dealloc
-    loop.for %iv1 = %c0 to %0 step %t1 {
-      loop.for %iv2 = %c0 to %1 step %t2 {
-        loop.for %iv3 = %c0 to %2 step %t3 {
+    scf.for %iv1 = %c0 to %0 step %t1 {
+      scf.for %iv2 = %c0 to %1 step %t2 {
+        scf.for %iv3 = %c0 to %2 step %t3 {
           %6 = alloc(%t1, %t2, %t3, %3) : memref<?x?x?x?xf32>
           %ubcmp1 = cmpi "slt", %0, %t1 : index
           %ub1 = select %ubcmp1, %0, %t1 : index
@@ -41,10 +41,10 @@ module {
           %ub3 = select %ubcmp3, %2, %t3 : index
           %ubcmp4 = cmpi "slt", %3, %t4 : index
           %ub4 = select %ubcmp3, %3, %t4 : index
-          loop.for %iv5 = %iv1 to %ub1 step %step1 {
-            loop.for %iv6 = %iv2 to %ub2 step %step2 {
-              loop.for %iv7 = %iv3 to %ub3 step %step3 {
-                loop.for %iv8 = %c0 to %3 step %step4 {
+          scf.for %iv5 = %iv1 to %ub1 step %step1 {
+            scf.for %iv6 = %iv2 to %ub2 step %step2 {
+              scf.for %iv7 = %iv3 to %ub3 step %step3 {
+                scf.for %iv8 = %c0 to %3 step %step4 {
                   %7 = load %arg0[%iv5, %iv6, %iv7, %iv8] : memref<?x?x?x?xf32>
                   %8 = load %arg1[%iv5, %iv6, %iv7, %iv8] : memref<?x?x?x?xf32>
                   %9 = addf %7, %8 : f32
@@ -59,10 +59,10 @@ module {
               }
             }
           }
-          loop.for %iv9 = %iv1 to %ub1 step %step1 {
-            loop.for %iv10 = %iv2 to %ub2 step %step2 {
-              loop.for %iv11 = %iv3 to %ub3 step %step3 {
-                loop.for %iv12 = %c0 to %3 step %step4 {
+          scf.for %iv9 = %iv1 to %ub1 step %step1 {
+            scf.for %iv10 = %iv2 to %ub2 step %step2 {
+              scf.for %iv11 = %iv3 to %ub3 step %step3 {
+                scf.for %iv12 = %c0 to %3 step %step4 {
                   %18 = subi %iv9, %iv1 : index
                   %19 = divi_signed %18, %step1 : index
                   %20 = subi %iv10, %iv2 : index

diff  --git a/mlir/test/Conversion/LoopsToGPU/imperfect_linalg.mlir b/mlir/test/Conversion/LoopsToGPU/imperfect_linalg.mlir
index 4ffb8906d4d6..fe147c5929d0 100644
--- a/mlir/test/Conversion/LoopsToGPU/imperfect_linalg.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/imperfect_linalg.mlir
@@ -8,24 +8,24 @@ module {
     %0 = dim %arg0, 0 : memref<?x?xf32>
     %1 = dim %arg0, 1 : memref<?x?xf32>
     // CHECK-LABEL: gpu.launch
-    // CHECK:   loop.for
-    // CHECK:     loop.for
-    // CHECK:       loop.for
-    // CHECK:         loop.for
+    // CHECK:   scf.for
+    // CHECK:     scf.for
+    // CHECK:       scf.for
+    // CHECK:         scf.for
     // CHECK:           load
     // CHECK:           load
     // CHECK:           load
     // CHECK:           mulf
     // CHECK:           store
-    loop.for %arg3 = %c0 to %0 step %c2 {
-      loop.for %arg4 = %c0 to %1 step %c2 {
+    scf.for %arg3 = %c0 to %0 step %c2 {
+      scf.for %arg4 = %c0 to %1 step %c2 {
         %4 = std.subview %arg0[%arg3, %arg4][%c2, %c2][%c1, %c1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %7 = std.subview %arg1[%arg3, %arg4][%c2, %c2][%c1, %c1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %10 = std.subview %arg2[%arg3, %arg4][%c2, %c2][%c1, %c1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %11 = dim %4, 0 : memref<?x?xf32, offset: ?, strides: [?, ?]>
         %12 = dim %4, 1 : memref<?x?xf32, offset: ?, strides: [?, ?]>
-        loop.for %arg5 = %c0 to %11 step %c1 {
-          loop.for %arg6 = %c0 to %12 step %c1 {
+        scf.for %arg5 = %c0 to %11 step %c1 {
+          scf.for %arg6 = %c0 to %12 step %c1 {
             %13 = load %4[%arg5, %arg6] : memref<?x?xf32, offset: ?, strides: [?, ?]>
             %14 = load %7[%arg5, %arg6] : memref<?x?xf32, offset: ?, strides: [?, ?]>
             %15 = load %10[%arg5, %arg6] : memref<?x?xf32, offset: ?, strides: [?, ?]>

diff  --git a/mlir/test/Conversion/LoopsToGPU/linalg_to_gpu.mlir b/mlir/test/Conversion/LoopsToGPU/linalg_to_gpu.mlir
index cda416242b84..71555fab685b 100644
--- a/mlir/test/Conversion/LoopsToGPU/linalg_to_gpu.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/linalg_to_gpu.mlir
@@ -7,10 +7,10 @@ func @foo(%arg0: memref<?xf32>, %arg1 : index) {
   %c3 = constant 3 : index
   // CHECK:      subi %{{.*}}, %{{.*}} : index
   // CHECK-NEXT: %[[range_i:.*]] = divi_signed {{.*}}, %{{.*}} : index
-  loop.for %i0 = %c0 to %c42 step %c3 {
+  scf.for %i0 = %c0 to %c42 step %c3 {
     // CHECK:      subi %{{.*}}, %{{.*}} : index
     // CHECK-NEXT: %[[range_j:.*]] = divi_signed {{.*}}, %{{.*}} : index
-    loop.for %i1 = %c3 to %c42 step %arg1 {
+    scf.for %i1 = %c3 to %c42 step %arg1 {
       // CHECK:      gpu.launch
       // CHECK-SAME: blocks
       // CHECK-SAME: threads

diff  --git a/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir b/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir
index ca5fb36091d7..52ed94cae567 100644
--- a/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir
@@ -7,7 +7,7 @@ func @parallel_loop_bidy_bidx(%arg0 : index, %arg1 : index, %arg2 : index,
                               %buf : memref<?x?xf32>,
                               %res : memref<?x?xf32>) {
   %step = constant 2 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%arg4, %step)  {
     %val = load %buf[%i0, %i1] : memref<?x?xf32>
     store %val, %res[%i1, %i0] : memref<?x?xf32>
@@ -47,9 +47,9 @@ func @parallel_loop_tiled(%arg0 : index, %arg1 : index, %arg2 : index,
   %zero = constant 0 : index
   %one = constant 1 : index
   %four = constant 4 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%four, %four)  {
-    loop.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
+    scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
                                             step (%one, %one)  {
       %idx0 = addi %i0, %si0 : index
       %idx1 = addi %i1, %si1 : index
@@ -104,7 +104,7 @@ func @parallel_loop_bidy_seq(%arg0 : index, %arg1 : index, %arg2 : index,
                              %buf : memref<?x?xf32>,
                              %res : memref<?x?xf32>) {
   %step = constant 2 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%arg4, %step)  {
     %val = load %buf[%i0, %i1] : memref<?x?xf32>
     store %val, %res[%i1, %i0] : memref<?x?xf32>
@@ -126,7 +126,7 @@ func @parallel_loop_bidy_seq(%arg0 : index, %arg1 : index, %arg2 : index,
 // CHECK:           [[VAL_68:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_61]], [[VAL_59]], [[VAL_63]]]
 // CHECK:           gpu.launch blocks([[VAL_69:%.*]], [[VAL_70:%.*]], [[VAL_71:%.*]]) in ([[VAL_72:%.*]] = [[VAL_67]], [[VAL_73:%.*]] = [[VAL_68]], [[VAL_74:%.*]] = [[VAL_67]]) threads([[VAL_75:%.*]], [[VAL_76:%.*]], [[VAL_77:%.*]]) in ([[VAL_78:%.*]] = [[VAL_67]], [[VAL_79:%.*]] = [[VAL_67]], [[VAL_80:%.*]] = [[VAL_67]]) {
 // CHECK:             [[VAL_81:%.*]] = affine.apply #[[MAP1]]([[VAL_70]]){{\[}}[[VAL_63]], [[VAL_59]]]
-// CHECK:             loop.for [[VAL_82:%.*]] = [[VAL_60]] to [[VAL_62]] step [[VAL_66]] {
+// CHECK:             scf.for [[VAL_82:%.*]] = [[VAL_60]] to [[VAL_62]] step [[VAL_66]] {
 // CHECK:               [[VAL_83:%.*]] = load [[VAL_64]]{{\[}}[[VAL_81]], [[VAL_82]]] : memref<?x?xf32>
 // CHECK:               store [[VAL_83]], [[VAL_65]]{{\[}}[[VAL_82]], [[VAL_81]]] : memref<?x?xf32>
 // CHECK:             }
@@ -147,9 +147,9 @@ func @parallel_loop_tiled_seq(%arg0 : index, %arg1 : index, %arg2 : index,
   %zero = constant 0 : index
   %one = constant 1 : index
   %four = constant 4 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%four, %four)  {
-    loop.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
+    scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
                                             step (%one, %one)  {
       %idx0 = addi %i0, %si0 : index
       %idx1 = addi %i1, %si1 : index
@@ -180,9 +180,9 @@ func @parallel_loop_tiled_seq(%arg0 : index, %arg1 : index, %arg2 : index,
 // CHECK:           [[VAL_95:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_92]], [[VAL_90]], [[VAL_91]]]
 // CHECK:           gpu.launch blocks([[VAL_96:%.*]], [[VAL_97:%.*]], [[VAL_98:%.*]]) in ([[VAL_99:%.*]] = [[VAL_93]], [[VAL_100:%.*]] = [[VAL_94]], [[VAL_101:%.*]] = [[VAL_93]]) threads([[VAL_102:%.*]], [[VAL_103:%.*]], [[VAL_104:%.*]]) in ([[VAL_105:%.*]] = [[VAL_93]], [[VAL_106:%.*]] = [[VAL_95]], [[VAL_107:%.*]] = [[VAL_93]]) {
 // CHECK:             [[VAL_108:%.*]] = affine.apply #[[MAP1]]([[VAL_97]]){{\[}}[[VAL_92]], [[VAL_84]]]
-// CHECK:             loop.for [[VAL_109:%.*]] = [[VAL_85]] to [[VAL_87]] step [[VAL_92]] {
+// CHECK:             scf.for [[VAL_109:%.*]] = [[VAL_85]] to [[VAL_87]] step [[VAL_92]] {
 // CHECK:               [[VAL_110:%.*]] = affine.apply #[[MAP1]]([[VAL_103]]){{\[}}[[VAL_91]], [[VAL_90]]]
-// CHECK:               loop.for [[VAL_111:%.*]] = [[VAL_90]] to [[VAL_92]] step [[VAL_91]] {
+// CHECK:               scf.for [[VAL_111:%.*]] = [[VAL_90]] to [[VAL_92]] step [[VAL_91]] {
 // CHECK:                 [[VAL_112:%.*]] = addi [[VAL_108]], [[VAL_110]] : index
 // CHECK:                 [[VAL_113:%.*]] = addi [[VAL_109]], [[VAL_111]] : index
 // CHECK:                 [[VAL_114:%.*]] = load [[VAL_88]]{{\[}}[[VAL_112]], [[VAL_113]]] : memref<?x?xf32>
@@ -210,7 +210,7 @@ module {
     %c2 = constant 2 : index
     %0 = dim %arg0, 0 : memref<?x?xf32, #map0>
     %1 = dim %arg0, 1 : memref<?x?xf32, #map0>
-    loop.parallel (%arg3, %arg4) = (%c0, %c0) to (%0, %1) step (%c2, %c3) {
+    scf.parallel (%arg3, %arg4) = (%c0, %c0) to (%0, %1) step (%c2, %c3) {
       %2 = dim %arg0, 0 : memref<?x?xf32, #map0>
       %3 = affine.min #map1(%arg3)[%2]
       %squared_min = muli %3, %3 : index
@@ -227,15 +227,15 @@ module {
       %14 = dim %arg2, 1 : memref<?x?xf32, #map0>
       %15 = affine.min #map2(%arg4)[%14]
       %16 = std.subview %arg2[%arg3, %arg4][%13, %15][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3>
-      loop.parallel (%arg5, %arg6) = (%c0, %c0) to (%squared_min, %5) step (%c1, %c1) {
+      scf.parallel (%arg5, %arg6) = (%c0, %c0) to (%squared_min, %5) step (%c1, %c1) {
         %17 = load %6[%arg5, %arg6] : memref<?x?xf32, #map3>
         %18 = load %11[%arg5, %arg6] : memref<?x?xf32, #map3>
         %19 = load %16[%arg5, %arg6] : memref<?x?xf32, #map3>
         %20 = addf %17, %18 : f32
         store %20, %16[%arg5, %arg6] : memref<?x?xf32, #map3>
-        loop.yield
+        scf.yield
       } {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 3 : i64}, {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 4 : i64}]}
-      loop.yield
+      scf.yield
     } {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 0 : i64}, {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 1 : i64}]}
     return
   }
@@ -285,10 +285,10 @@ module {
 // CHECK:             [[VAL_44:%.*]] = subview [[VAL_2]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_41]], [[VAL_43]]] {{\[}}[[VAL_3]], [[VAL_3]]] : memref<?x?xf32, #[[MAP0]]> to memref<?x?xf32, #[[MAP5]]>
 // CHECK:             [[VAL_45:%.*]] = affine.apply #[[MAP2]]([[VAL_22]]){{\[}}[[VAL_3]], [[VAL_4]]]
 // CHECK:             [[VAL_46:%.*]] = cmpi "slt", [[VAL_45]], [[VAL_31_SQUARED]] : index
-// CHECK:             loop.if [[VAL_46]] {
+// CHECK:             scf.if [[VAL_46]] {
 // CHECK:               [[VAL_47:%.*]] = affine.apply #[[MAP2]]([[VAL_23]]){{\[}}[[VAL_3]], [[VAL_4]]]
 // CHECK:               [[VAL_48:%.*]] = cmpi "slt", [[VAL_47]], [[VAL_33]] : index
-// CHECK:               loop.if [[VAL_48]] {
+// CHECK:               scf.if [[VAL_48]] {
 // CHECK:                 [[VAL_49:%.*]] = load [[VAL_34]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[MAP5]]>
 // CHECK:                 [[VAL_50:%.*]] = load [[VAL_39]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[MAP5]]>
 // CHECK:                 [[VAL_51:%.*]] = load [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[MAP5]]>
@@ -312,8 +312,8 @@ func @parallel_double_map(%arg0 : index, %arg1 : index, %arg2 : index,
                           %res : memref<?x?xf32>) {
   %four = constant 4 : index
   // expected-error at +2 {{cannot redefine the bound for processor 1}}
-  // expected-error at +1 {{failed to legalize operation 'loop.parallel'}}
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  // expected-error at +1 {{failed to legalize operation 'scf.parallel'}}
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%four, %four)  {
   } { mapping = [
       {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>},
@@ -333,11 +333,11 @@ func @parallel_loop_loop_variant_bound(%arg0 : index, %arg1 : index, %arg2 : ind
   %zero = constant 0 : index
   %one = constant 1 : index
   %four = constant 4 : index
-  // expected-error at +1 {{failed to legalize operation 'loop.parallel'}}
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  // expected-error at +1 {{failed to legalize operation 'scf.parallel'}}
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%four, %four)  {
     // expected-error at +1 {{cannot derive loop-invariant upper bound}}
-    loop.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1)
+    scf.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1)
                                             step (%one, %one)  {
       %idx0 = addi %i0, %si0 : index
       %idx1 = addi %i1, %si1 : index

diff  --git a/mlir/test/Conversion/LoopsToGPU/perfect_1D_setlaunch.mlir b/mlir/test/Conversion/LoopsToGPU/perfect_1D_setlaunch.mlir
index 2861b33c9e7b..e4ec4a14df08 100644
--- a/mlir/test/Conversion/LoopsToGPU/perfect_1D_setlaunch.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/perfect_1D_setlaunch.mlir
@@ -7,14 +7,14 @@ module {
     %c0 = constant 0 : index
     %c1 = constant 1 : index
     // CHECK: gpu.launch
-    // CHECK:   loop.for
-    // CHECK:     loop.for
+    // CHECK:   scf.for
+    // CHECK:     scf.for
     // CHECK:       load
     // CHECK:       load
     // CHECK:       add
     // CHECK:       store
-    loop.for %iv1 = %c0 to %0 step %c1 {
-      loop.for %iv2 = %c0 to %1 step %c1 {
+    scf.for %iv1 = %c0 to %0 step %c1 {
+      scf.for %iv2 = %c0 to %1 step %c1 {
          %12 = load %arg0[%iv1, %iv2] : memref<?x?xf32>
          %13 = load %arg1[%iv2, %iv1] : memref<?x?xf32>
          %14 = addf %12, %13 : f32

diff  --git a/mlir/test/Conversion/LoopsToGPU/step_one.mlir b/mlir/test/Conversion/LoopsToGPU/step_one.mlir
index a088880e5821..d5fc395871e6 100644
--- a/mlir/test/Conversion/LoopsToGPU/step_one.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/step_one.mlir
@@ -57,7 +57,7 @@ func @step_1(%A : memref<?x?x?x?xf32>, %B : memref<?x?x?x?xf32>) {
         // CHECK-22-SAME: blocks
         // CHECK-22-SAME: threads
 
-          // Remapping of the loop induction variables in the last mapped loop.
+          // Remapping of the loop induction variables in the last mapped scf.
           // CHECK-22:        %[[i:.*]] = addi %{{.*}}, %{{.*}} : index
           // CHECK-22-NEXT:   %[[j:.*]] = addi %{{.*}}, %{{.*}} : index
           // CHECK-22-NEXT:   %[[ii:.*]] = addi %{{.*}}, %{{.*}} : index

diff  --git a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
index 7b12254b6d35..13796c012991 100644
--- a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
@@ -68,9 +68,9 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
   // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %{{.*}} {
   // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
   //      CHECK:          %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
-  // CHECK-NEXT:          loop.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
-  // CHECK-NEXT:            loop.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
-  // CHECK-NEXT:              loop.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
+  // CHECK-NEXT:          scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
+  // CHECK-NEXT:            scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
+  // CHECK-NEXT:              scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
   // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
   // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%{{.*}}]
   // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}} : index
@@ -147,9 +147,9 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
   // CHECK:               %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
   // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4x3xf32>
   //      CHECK:          store %{{.*}}, {{.*}} : memref<vector<5x4x3xf32>>
-  // CHECK-NEXT:          loop.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
-  // CHECK-NEXT:            loop.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
-  // CHECK-NEXT:              loop.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
+  // CHECK-NEXT:          scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
+  // CHECK-NEXT:            scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
+  // CHECK-NEXT:              scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
   // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
   // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%{{.*}}]
   // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
@@ -228,7 +228,7 @@ func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<17
   // CHECK:   %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]]
   // CHECK:   %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index
   // CHECK:   %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1
-  // CHECK:   loop.if %[[cond1]] {
+  // CHECK:   scf.if %[[cond1]] {
   // CHECK:     %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %[[cst]]  {permutation_map = #[[MAP1]]} : memref<?x?xf32>, vector<15xf32>
   // CHECK:     store %[[vec_1d]], %[[alloc]][%[[I]]] : memref<17xvector<15xf32>>
   // CHECK:   } else {
@@ -262,7 +262,7 @@ func @transfer_write_progressive(%A : memref<?x?xf32>, %base: index, %vec: vecto
   // CHECK:   %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]]
   // CHECK:   %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index
   // CHECK:   %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1
-  // CHECK:   loop.if %[[cond1]] {
+  // CHECK:   scf.if %[[cond1]] {
   // CHECK:     %[[vec_1d:.*]] = load %0[%[[I]]] : memref<17xvector<15xf32>>
   // CHECK:     vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {permutation_map = #[[MAP1]]} : vector<15xf32>, memref<?x?xf32>
   // CHECK:   }

diff  --git a/mlir/test/Conversion/convert-to-cfg.mlir b/mlir/test/Conversion/convert-to-cfg.mlir
index 74ae6aeffd9c..5ebfbc409e74 100644
--- a/mlir/test/Conversion/convert-to-cfg.mlir
+++ b/mlir/test/Conversion/convert-to-cfg.mlir
@@ -12,7 +12,7 @@
 //  CHECK-NEXT:  ^bb3:   // pred: ^bb1
 //  CHECK-NEXT:    return
 func @simple_std_for_loop(%arg0 : index, %arg1 : index, %arg2 : index) {
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
     %c1 = constant 1 : index
   }
   return
@@ -39,9 +39,9 @@ func @simple_std_for_loop(%arg0 : index, %arg1 : index, %arg2 : index) {
 //  CHECK-NEXT:  ^bb6:   // pred: ^bb1
 //  CHECK-NEXT:    return
 func @simple_std_2_for_loops(%arg0 : index, %arg1 : index, %arg2 : index) {
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
     %c1 = constant 1 : index
-    loop.for %i1 = %arg0 to %arg1 step %arg2 {
+    scf.for %i1 = %arg0 to %arg1 step %arg2 {
       %c1_0 = constant 1 : index
     }
   }
@@ -56,7 +56,7 @@ func @simple_std_2_for_loops(%arg0 : index, %arg1 : index, %arg2 : index) {
 //  CHECK-NEXT:   ^bb2:   // 2 preds: ^bb0, ^bb1
 //  CHECK-NEXT:     return
 func @simple_std_if(%arg0: i1) {
-  loop.if %arg0 {
+  scf.if %arg0 {
     %c1 = constant 1 : index
   }
   return
@@ -73,7 +73,7 @@ func @simple_std_if(%arg0: i1) {
 //  CHECK-NEXT:   ^bb3:   // 2 preds: ^bb1, ^bb2
 //  CHECK-NEXT:     return
 func @simple_std_if_else(%arg0: i1) {
-  loop.if %arg0 {
+  scf.if %arg0 {
     %c1 = constant 1 : index
   } else {
     %c1_0 = constant 1 : index
@@ -97,9 +97,9 @@ func @simple_std_if_else(%arg0: i1) {
 //  CHECK-NEXT: ^bb5:   // 2 preds: ^bb0, ^bb4
 //  CHECK-NEXT:   return
 func @simple_std_2_ifs(%arg0: i1) {
-  loop.if %arg0 {
+  scf.if %arg0 {
     %c1 = constant 1 : index
-    loop.if %arg0 {
+    scf.if %arg0 {
       %c1_0 = constant 1 : index
     } else {
       %c1_1 = constant 1 : index
@@ -134,11 +134,11 @@ func @simple_std_2_ifs(%arg0: i1) {
 //  CHECK-NEXT:     return
 //  CHECK-NEXT: }
 func @simple_std_for_loop_with_2_ifs(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i1) {
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
     %c1 = constant 1 : index
-    loop.if %arg3 {
+    scf.if %arg3 {
       %c1_0 = constant 1 : index
-      loop.if %arg3 {
+      scf.if %arg3 {
         %c1_1 = constant 1 : index
       } else {
         %c1_2 = constant 1 : index
@@ -151,14 +151,14 @@ func @simple_std_for_loop_with_2_ifs(%arg0 : index, %arg1 : index, %arg2 : index
 // CHECK-LABEL: func @simple_if_yield
 func @simple_if_yield(%arg0: i1) -> (i1, i1) {
 // CHECK:   cond_br %{{.*}}, ^[[then:.*]], ^[[else:.*]]
-  %0:2 = loop.if %arg0 -> (i1, i1) {
+  %0:2 = scf.if %arg0 -> (i1, i1) {
 // CHECK: ^[[then]]:
 // CHECK:   %[[v0:.*]] = constant 0
 // CHECK:   %[[v1:.*]] = constant 1
 // CHECK:   br ^[[dom:.*]](%[[v0]], %[[v1]] : i1, i1)
     %c0 = constant 0 : i1
     %c1 = constant 1 : i1
-    loop.yield %c0, %c1 : i1, i1
+    scf.yield %c0, %c1 : i1, i1
   } else {
 // CHECK: ^[[else]]:
 // CHECK:   %[[v2:.*]] = constant 0
@@ -166,7 +166,7 @@ func @simple_if_yield(%arg0: i1) -> (i1, i1) {
 // CHECK:   br ^[[dom]](%[[v3]], %[[v2]] : i1, i1)
     %c0 = constant 0 : i1
     %c1 = constant 1 : i1
-    loop.yield %c1, %c0 : i1, i1
+    scf.yield %c1, %c0 : i1, i1
   }
 // CHECK: ^[[dom]](%[[arg1:.*]]: i1, %[[arg2:.*]]: i1):
 // CHECK:   br ^[[cont:.*]]
@@ -178,45 +178,45 @@ func @simple_if_yield(%arg0: i1) -> (i1, i1) {
 // CHECK-LABEL: func @nested_if_yield
 func @nested_if_yield(%arg0: i1) -> (index) {
 // CHECK:   cond_br %{{.*}}, ^[[first_then:.*]], ^[[first_else:.*]]
-  %0 = loop.if %arg0 -> i1 {
+  %0 = scf.if %arg0 -> i1 {
 // CHECK: ^[[first_then]]:
     %1 = constant 1 : i1
 // CHECK:   br ^[[first_dom:.*]]({{.*}})
-    loop.yield %1 : i1
+    scf.yield %1 : i1
   } else {
 // CHECK: ^[[first_else]]:
     %2 = constant 0 : i1
 // CHECK:   br ^[[first_dom]]({{.*}})
-    loop.yield %2 : i1
+    scf.yield %2 : i1
   }
 // CHECK: ^[[first_dom]](%[[arg1:.*]]: i1):
 // CHECK:   br ^[[first_cont:.*]]
 // CHECK: ^[[first_cont]]:
 // CHECK:   cond_br %[[arg1]], ^[[second_outer_then:.*]], ^[[second_outer_else:.*]]
-  %1 = loop.if %0 -> index {
+  %1 = scf.if %0 -> index {
 // CHECK: ^[[second_outer_then]]:
 // CHECK:   cond_br %arg0, ^[[second_inner_then:.*]], ^[[second_inner_else:.*]]
-    %3 = loop.if %arg0 -> index {
+    %3 = scf.if %arg0 -> index {
 // CHECK: ^[[second_inner_then]]:
       %4 = constant 40 : index
 // CHECK:   br ^[[second_inner_dom:.*]]({{.*}})
-      loop.yield %4 : index
+      scf.yield %4 : index
     } else {
 // CHECK: ^[[second_inner_else]]:
       %5 = constant 41 : index
 // CHECK:   br ^[[second_inner_dom]]({{.*}})
-      loop.yield %5 : index
+      scf.yield %5 : index
     }
 // CHECK: ^[[second_inner_dom]](%[[arg2:.*]]: index):
 // CHECK:   br ^[[second_inner_cont:.*]]
 // CHECK: ^[[second_inner_cont]]:
 // CHECK:   br ^[[second_outer_dom:.*]]({{.*}})
-    loop.yield %3 : index
+    scf.yield %3 : index
   } else {
 // CHECK: ^[[second_outer_else]]:
     %6 = constant 42 : index
 // CHECK:   br ^[[second_outer_dom]]({{.*}}
-    loop.yield %6 : index
+    scf.yield %6 : index
   }
 // CHECK: ^[[second_outer_dom]](%[[arg3:.*]]: index):
 // CHECK:   br ^[[second_outer_cont:.*]]
@@ -251,7 +251,7 @@ func @nested_if_yield(%arg0: i1) -> (index) {
 func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                         %arg3 : index, %arg4 : index) {
   %step = constant 1 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%arg4, %step) {
     %c1 = constant 1 : index
   }
@@ -278,9 +278,9 @@ func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
 func @for_yield(%arg0 : index, %arg1 : index, %arg2 : index) -> (f32, f32) {
   %s0 = constant 0.0 : f32
   %s1 = constant 1.0 : f32
-  %result:2 = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0, %sj = %s1) -> (f32, f32) {
+  %result:2 = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0, %sj = %s1) -> (f32, f32) {
     %sn = addf %si, %sj : f32
-    loop.yield %sn, %sn : f32, f32
+    scf.yield %sn, %sn : f32, f32
   }
   return %result#0, %result#1 : f32, f32
 }
@@ -304,12 +304,12 @@ func @for_yield(%arg0 : index, %arg1 : index, %arg2 : index) -> (f32, f32) {
 // CHECK:         return %[[ARG_OUT]] : f32
 func @nested_for_yield(%arg0 : index, %arg1 : index, %arg2 : index) -> f32 {
   %s0 = constant 1.0 : f32
-  %r = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%iter = %s0) -> (f32) {
-    %result = loop.for %i1 = %arg0 to %arg1 step %arg2 iter_args(%si = %iter) -> (f32) {
+  %r = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%iter = %s0) -> (f32) {
+    %result = scf.for %i1 = %arg0 to %arg1 step %arg2 iter_args(%si = %iter) -> (f32) {
       %sn = addf %si, %si : f32
-      loop.yield %sn : f32
+      scf.yield %sn : f32
     }
-    loop.yield %result : f32
+    scf.yield %result : f32
   }
   return %r : f32
 }
@@ -333,7 +333,7 @@ func @simple_parallel_reduce_loop(%arg0: index, %arg1: index,
   // CHECK:   %[[COMP:.*]] = cmpi "slt", %[[ITER]], %[[UB]]
   // CHECK:   cond_br %[[COMP]], ^[[BODY:.*]], ^[[CONTINUE:.*]]
 
-  // Bodies of loop.reduce operations are folded into the main loop body. The
+  // Bodies of scf.reduce operations are folded into the main loop body. The
   // result of this partial reduction is passed as argument to the condition
   // block.
   // CHECK: ^[[BODY]]:
@@ -345,12 +345,12 @@ func @simple_parallel_reduce_loop(%arg0: index, %arg1: index,
   // The continuation block has access to the (last value of) reduction.
   // CHECK: ^[[CONTINUE]]:
   // CHECK:   return %[[ITER_ARG]]
-  %0 = loop.parallel (%i) = (%arg0) to (%arg1) step (%arg2) init(%arg3) -> f32 {
+  %0 = scf.parallel (%i) = (%arg0) to (%arg1) step (%arg2) init(%arg3) -> f32 {
     %cst = constant 42.0 : f32
-    loop.reduce(%cst) : f32 {
+    scf.reduce(%cst) : f32 {
     ^bb0(%lhs: f32, %rhs: f32):
       %1 = mulf %lhs, %rhs : f32
-      loop.reduce.return %1 : f32
+      scf.reduce.return %1 : f32
     }
   }
   return %0 : f32
@@ -380,20 +380,20 @@ func @parallel_reduce_loop(%arg0 : index, %arg1 : index, %arg2 : index,
   // CHECK:   return %[[ITER_ARG1_OUT]], %[[ITER_ARG2_OUT]]
   %step = constant 1 : index
   %init = constant 42 : i64
-  %0:2 = loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  %0:2 = scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                        step (%arg4, %step) init(%arg5, %init) -> (f32, i64) {
     %cf = constant 42.0 : f32
-    loop.reduce(%cf) : f32 {
+    scf.reduce(%cf) : f32 {
     ^bb0(%lhs: f32, %rhs: f32):
       %1 = addf %lhs, %rhs : f32
-      loop.reduce.return %1 : f32
+      scf.reduce.return %1 : f32
     }
 
     %2 = call @generate() : () -> i64
-    loop.reduce(%2) : i64 {
+    scf.reduce(%2) : i64 {
     ^bb0(%lhs: i64, %rhs: i64):
       %3 = or %lhs, %rhs : i64
-      loop.reduce.return %3 : i64
+      scf.reduce.return %3 : i64
     }
   }
   return %0#0, %0#1 : f32, i64

diff  --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
index 94943df75bd1..b577e229ba76 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
@@ -363,7 +363,7 @@ func @vec_rejected_11(%A : memref<?x?xf32>, %C : memref<?x?xf32>) {
   return
 }
 
-// This should not vectorize due to the sequential dependence in the loop.
+// This should not vectorize due to the sequential dependence in the scf.
 // CHECK-LABEL: @vec_rejected_sequential
 func @vec_rejected_sequential(%A : memref<?xf32>) {
   %N = dim %A, 0 : memref<?xf32>

diff  --git a/mlir/test/Dialect/Affine/dma-generate.mlir b/mlir/test/Dialect/Affine/dma-generate.mlir
index 3572b0a12e20..9995c1908d5b 100644
--- a/mlir/test/Dialect/Affine/dma-generate.mlir
+++ b/mlir/test/Dialect/Affine/dma-generate.mlir
@@ -375,7 +375,7 @@ func @dma_loop_straightline_interspersed() {
   return
 }
 // There are three regions here - the 'load' preceding the loop, the loop
-// itself, and the operations appearing after the loop.
+// itself, and the operations appearing after the scf.
 // CHECK:       alloc() : memref<256xf32>
 // CHECK-NEXT:  alloc() : memref<1xf32, 2>
 // CHECK-NEXT:  alloc() : memref<1xi32>
@@ -583,7 +583,7 @@ func @load_store_same_memref(%arg0: memref<256x1024xf32>) {
 // With fast mem capacity set to 16 KB, the DMAs if placed under %k will fit.
 // However, the region of arg2 accessed is invariant w.r.t the %k loop unlike
 // %arg0 and %arg1. So, its DMA can be hoisted one level up and placed under
-// %j, while the DMAs for arg0 and arg1 appear right under the %k loop.
+// %j, while the DMAs for arg0 and arg1 appear right under the %k scf.
 
 #map0 = affine_map<(d0) -> (d0)>
 #map1 = affine_map<(d0) -> (d0 + 4)>

diff  --git a/mlir/test/Dialect/Affine/slicing-utils.mlir b/mlir/test/Dialect/Affine/slicing-utils.mlir
index defb49a538d0..5cc0c3ddcdfb 100644
--- a/mlir/test/Dialect/Affine/slicing-utils.mlir
+++ b/mlir/test/Dialect/Affine/slicing-utils.mlir
@@ -229,7 +229,7 @@ func @slicing_test_2() {
       // BWD: matched: %[[b:.*]] {{.*}} backward static slice:
       // BWD: affine.for {{.*}}
 
-      // affine.for appears in the body of loop.for
+      // affine.for appears in the body of scf.for
       // BWD: affine.for {{.*}}
 
       // affine.for appears as a proper op in the backward slice
@@ -239,10 +239,10 @@ func @slicing_test_2() {
       // BWD: matched: %[[c:.*]] {{.*}} backward static slice:
       // BWD: affine.for {{.*}}
 
-      // affine.for appears in the body of loop.for
+      // affine.for appears in the body of scf.for
       // BWD-NEXT: affine.for {{.*}}
 
-      // affine.for only appears in the body of loop.for
+      // affine.for only appears in the body of scf.for
       // BWD-NOT: affine.for {{.*}}
       %c = "slicing-test-op"(%i0): (index) -> index
     }
@@ -257,9 +257,9 @@ func @slicing_test_3() {
   %f = constant 1.0 : f32
   %c = "slicing-test-op"(%f): (f32) -> index
   // FWD: matched: {{.*}} (f32) -> index forward static slice:
-  // FWD: loop.for {{.*}}
+  // FWD: scf.for {{.*}}
   // FWD: matched: {{.*}} (index, index) -> index forward static slice:
-  loop.for %i2 = %c to %c step %c {
+  scf.for %i2 = %c to %c step %c {
     %d = "slicing-test-op"(%c, %i2): (index, index) -> index
   }
   return

diff  --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index cd9fb63f1585..a7d65d154544 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -550,7 +550,7 @@ func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index)
 // CHECK-NEXT:  return
 
 // The trip count here is a multiple of four, but this can be inferred only
-// through composition. Check for no cleanup loop.
+// through composition. Check for no cleanup scf.
 // UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound
 func @loop_nest_non_trivial_multiple_upper_bound(%M : index, %N : index) {
   %T = affine.apply affine_map<(d0) -> (4*d0 + 1)>(%M)

diff  --git a/mlir/test/Dialect/GPU/mapping.mlir b/mlir/test/Dialect/GPU/mapping.mlir
index 6721738a1048..cdbda37a17bc 100644
--- a/mlir/test/Dialect/GPU/mapping.mlir
+++ b/mlir/test/Dialect/GPU/mapping.mlir
@@ -5,9 +5,9 @@ func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
   %zero = constant 0 : index
   %one = constant 1 : index
   %four = constant 4 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%four, %four)  {
-    loop.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
+    scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
                                             step (%one, %one)  {
     }
   }
@@ -15,8 +15,8 @@ func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
 }
 
 // CHECK-LABEL:   func @parallel_loop(
-// CHECK:           loop.parallel 
-// CHECK:             loop.parallel 
+// CHECK:           scf.parallel 
+// CHECK:             scf.parallel 
 // CHECK:      {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 3 : i64},
 // CHECK-SAME:             {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 4 : i64}]}
 // CHECK:      {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 0 : i64},
@@ -30,11 +30,11 @@ func @parallel_loop_4d(%arg0 : index, %arg1 : index, %arg2 : index,
   %zero = constant 0 : index
   %one = constant 1 : index
   %four = constant 4 : index
-  loop.parallel (%i0, %i1, %i2, %i3) = (%zero, %zero, %zero, %zero) to (%arg0, %arg1, %arg2, %arg3)
+  scf.parallel (%i0, %i1, %i2, %i3) = (%zero, %zero, %zero, %zero) to (%arg0, %arg1, %arg2, %arg3)
                                        step (%four, %four, %four, %four)  {
-    loop.parallel (%si0, %si1, %si2, %si3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four)
+    scf.parallel (%si0, %si1, %si2, %si3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four)
                                              step (%one, %one, %one, %one)  {
-      loop.parallel (%ti0, %ti1, %ti2, %ti3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four)
+      scf.parallel (%ti0, %ti1, %ti2, %ti3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four)
                                                step (%one, %one, %one, %one)  {
       }
     }
@@ -43,9 +43,9 @@ func @parallel_loop_4d(%arg0 : index, %arg1 : index, %arg2 : index,
 }
 
 // CHECK-LABEL:   func @parallel_loop_4d(
-// CHECK:           loop.parallel 
-// CHECK:             loop.parallel 
-// CHECK:               loop.parallel
+// CHECK:           scf.parallel 
+// CHECK:             scf.parallel 
+// CHECK:               scf.parallel
 // CHECK:      {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64},
 // CHECK-SAME:             {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64},
 // CHECK-SAME:             {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64},

diff  --git a/mlir/test/Dialect/GPU/promotion.mlir b/mlir/test/Dialect/GPU/promotion.mlir
index 7a51048c858f..9df7bf19dff6 100644
--- a/mlir/test/Dialect/GPU/promotion.mlir
+++ b/mlir/test/Dialect/GPU/promotion.mlir
@@ -21,9 +21,9 @@ gpu.module @foo {
     // Verify that loops for the copy are emitted. We only check the number of
     // loops here since their bounds are produced by mapLoopToProcessorIds,
     // tested separately.
-    // CHECK: loop.for %[[i0:.*]] =
-    // CHECK:   loop.for %[[i1:.*]] =
-    // CHECK:     loop.for %[[i2:.*]] =
+    // CHECK: scf.for %[[i0:.*]] =
+    // CHECK:   scf.for %[[i1:.*]] =
+    // CHECK:     scf.for %[[i2:.*]] =
 
     // Verify that the copy is emitted and uses only the last two loops.
     // CHECK:       %[[v:.*]] = load %[[arg]][%[[i1]], %[[i2]]]
@@ -37,9 +37,9 @@ gpu.module @foo {
     // Verify that loops for the copy are emitted. We only check the number of
     // loops here since their bounds are produced by mapLoopToProcessorIds,
     // tested separately.
-    // CHECK: loop.for %[[i0:.*]] =
-    // CHECK:   loop.for %[[i1:.*]] =
-    // CHECK:     loop.for %[[i2:.*]] =
+    // CHECK: scf.for %[[i0:.*]] =
+    // CHECK:   scf.for %[[i1:.*]] =
+    // CHECK:     scf.for %[[i2:.*]] =
 
     // Verify that the copy is emitted and uses only the last two loops.
     // CHECK:       %[[v:.*]] = load %[[promoted]][%[[i1]], %[[i2]]]
@@ -73,11 +73,11 @@ gpu.module @foo {
     // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"}
 
     // Verify that loops for the copy are emitted.
-    // CHECK: loop.for %[[i0:.*]] =
-    // CHECK:   loop.for %[[i1:.*]] =
-    // CHECK:     loop.for %[[i2:.*]] =
-    // CHECK:       loop.for %[[i3:.*]] =
-    // CHECK:         loop.for %[[i4:.*]] =
+    // CHECK: scf.for %[[i0:.*]] =
+    // CHECK:   scf.for %[[i1:.*]] =
+    // CHECK:     scf.for %[[i2:.*]] =
+    // CHECK:       scf.for %[[i3:.*]] =
+    // CHECK:         scf.for %[[i4:.*]] =
 
     // Verify that the copy is emitted.
     // CHECK:           %[[v:.*]] = load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
@@ -88,11 +88,11 @@ gpu.module @foo {
     "use"(%arg0) : (memref<8x7x6x5x4xf32>) -> ()
 
     // Verify that loop loops for the copy are emitted.
-    // CHECK: loop.for %[[i0:.*]] =
-    // CHECK:   loop.for %[[i1:.*]] =
-    // CHECK:     loop.for %[[i2:.*]] =
-    // CHECK:       loop.for %[[i3:.*]] =
-    // CHECK:         loop.for %[[i4:.*]] =
+    // CHECK: scf.for %[[i0:.*]] =
+    // CHECK:   scf.for %[[i1:.*]] =
+    // CHECK:     scf.for %[[i2:.*]] =
+    // CHECK:       scf.for %[[i3:.*]] =
+    // CHECK:         scf.for %[[i4:.*]] =
 
     // Verify that the copy is emitted.
     // CHECK:           %[[v:.*]] = load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]

diff  --git a/mlir/test/Dialect/Linalg/fusion-2-level.mlir b/mlir/test/Dialect/Linalg/fusion-2-level.mlir
index b6a0cabf57f7..f4864945f5d1 100644
--- a/mlir/test/Dialect/Linalg/fusion-2-level.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-2-level.mlir
@@ -13,18 +13,18 @@ func @f1(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>, %B: memref<?x?xf32, of
   %1 = dim %C, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   %2 = dim %D, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   linalg.matmul(%A, %B, %C) : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>
-  loop.for %arg5 = %c0 to %0 step %c20 {
-    loop.for %arg6 = %c0 to %2 step %c30 {
-      loop.for %arg7 = %c0 to %1 step %c40 {
+  scf.for %arg5 = %c0 to %0 step %c20 {
+    scf.for %arg6 = %c0 to %2 step %c30 {
+      scf.for %arg7 = %c0 to %1 step %c40 {
         %5 = std.subview %C[%arg5, %arg7][%c20, %c40][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %7 = std.subview %D[%arg7, %arg6][%c40, %c30][%c1, %c1]: memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %8 = std.subview %E[%arg5, %arg6][%c20, %c40][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %9 = dim %5, 0 : memref<?x?xf32, offset: ?, strides: [?, ?]>
         %10 = dim %5, 1 : memref<?x?xf32, offset: ?, strides: [?, ?]>
         %11 = dim %7, 1 : memref<?x?xf32, offset: ?, strides: [?, ?]>
-        loop.for %arg8 = %c0 to %9 step %c2 {
-          loop.for %arg9 = %c0 to %11 step %c3 {
-            loop.for %arg10 = %c0 to %10 step %c4 {
+        scf.for %arg8 = %c0 to %9 step %c2 {
+          scf.for %arg9 = %c0 to %11 step %c3 {
+            scf.for %arg10 = %c0 to %10 step %c4 {
               %14 = std.subview %5[%arg8, %arg10][%c2, %c4][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
               %16 = std.subview %7[%arg10, %arg9][%c4, %c3][%c1, %c1]: memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
               %17 = std.subview %8[%arg8, %arg9][%c2, %c4][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -39,11 +39,11 @@ func @f1(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>, %B: memref<?x?xf32, of
 }
 // CHECK-LABEL: func @f1
 //       CHECK:   (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
-//      CHECK: loop.for
-//      CHECK:   loop.for
-//      CHECK:     loop.for
-//      CHECK:      loop.for
-//      CHECK:        loop.for
-//      CHECK:          loop.for
+//      CHECK: scf.for
+//      CHECK:   scf.for
+//      CHECK:     scf.for
+//      CHECK:      scf.for
+//      CHECK:        scf.for
+//      CHECK:          scf.for
 //      CHECK:            linalg.matmul
 //      CHECK:            linalg.matmul

diff  --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir
index 3f20fd842675..2f472aa6aaf2 100644
--- a/mlir/test/Dialect/Linalg/fusion.mlir
+++ b/mlir/test/Dialect/Linalg/fusion.mlir
@@ -18,9 +18,9 @@ func @f1(%A: memref<?x?xf32, offset: 0, strides: [?, 1]>,
     memref<?x?xf32, offset: 0, strides: [?, 1]>,
     memref<?x?xf32, offset: 0, strides: [?, 1]>
   %c1 = constant 1 : index
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %5 = std.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, 1]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -41,9 +41,9 @@ func @f1(%A: memref<?x?xf32, offset: 0, strides: [?, 1]>,
 }
 // CHECK-LABEL: func @f1
 // CHECK:   (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
-// CHECK: loop.for
-// CHECK:   loop.for
-// CHECK:     loop.for
+// CHECK: scf.for
+// CHECK:   scf.for
+// CHECK:     scf.for
 // CHECK:       linalg.matmul
 // CHECK:       linalg.matmul
 
@@ -68,9 +68,9 @@ func @f2(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
   %0 = dim %C, 0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %1 = dim %C, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %2 = dim %D, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %5 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -94,9 +94,9 @@ func @f2(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
 // CHECK-DAG:  %[[C_0:.*]] = dim %[[C]], 0 : memref<?x?xf32, #[[strided2D]]>
 // CHECK-DAG:  %[[C_1:.*]] = dim %[[C]], 1 : memref<?x?xf32, #[[strided2D]]>
 // CHECK-DAG:  %[[D_1:.*]] = dim %[[D]], 1 : memref<?x?xf32, #[[strided2D]]>
-// CHECK:  loop.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
-// CHECK:    loop.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
-// CHECK:      loop.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
+// CHECK:  scf.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
+// CHECK:    scf.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
+// CHECK:      scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
 
@@ -120,9 +120,9 @@ func @f3(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
   %0 = dim %D, 0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %1 = dim %D, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %2 = dim %C, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %5 = std.subview %D[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -146,9 +146,9 @@ func @f3(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
 // CHECK:  %[[D_0:.*]] = dim %[[D]], 0 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  %[[D_1:.*]] = dim %[[D]], 1 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  %[[C_1:.*]] = dim %[[C]], 1 : memref<?x?xf32, #[[strided2D]]>
-// CHECK:  loop.for %{{.*}} = %{{.*}} to %[[D_0]] step %{{.*}} {
-// CHECK:    loop.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
-// CHECK:      loop.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
+// CHECK:  scf.for %{{.*}} = %{{.*}} to %[[D_0]] step %{{.*}} {
+// CHECK:    scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
+// CHECK:      scf.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
 
@@ -176,9 +176,9 @@ func @f4(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
   %0 = dim %C, 0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %1 = dim %C, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %2 = dim %D, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %5 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -202,9 +202,9 @@ func @f4(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
 // CHECK:  %[[C_0:.*]] = dim %[[C]], 0 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  %[[C_1:.*]] = dim %[[C]], 1 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  %[[D_1:.*]] = dim %[[D]], 1 : memref<?x?xf32, #[[strided2D]]>
-// CHECK:  loop.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
-// CHECK:    loop.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
-// CHECK:      loop.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
+// CHECK:  scf.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
+// CHECK:    scf.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
+// CHECK:      scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
 // Fuse D then fuse C, no false dependence prevent it.
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
@@ -235,9 +235,9 @@ func @f5(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %1 step %c2 {
-    loop.for %arg6 = %c0 to %0 step %c3 {
-      loop.for %arg7 = %c0 to %2 step %c4 {
+  scf.for %arg5 = %c0 to %1 step %c2 {
+    scf.for %arg6 = %c0 to %0 step %c3 {
+      scf.for %arg7 = %c0 to %2 step %c4 {
         %5 = std.subview %D[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -261,9 +261,9 @@ func @f5(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
 // CHECK-DAG:  %[[B_1:.*]] = dim %[[B]], 1 : memref<?x?xf32, #[[strided2D]]>
 // CHECK-DAG:  %[[D_0:.*]] = dim %[[D]], 0 : memref<?x?xf32, #[[strided2D]]>
 // CHECK-DAG:  %[[D_1:.*]] = dim %[[D]], 1 : memref<?x?xf32, #[[strided2D]]>
-// CHECK:  loop.for %[[I:.*]] = %{{.*}} to %[[D_0]] step %{{.*}} {
-// CHECK:    loop.for %[[J:.*]] = %{{.*}} to %[[B_1]] step %{{.*}} {
-// CHECK:      loop.for %[[K:.*]] = %{{.*}} to %[[D_1]] step %{{.*}} {
+// CHECK:  scf.for %[[I:.*]] = %{{.*}} to %[[D_0]] step %{{.*}} {
+// CHECK:    scf.for %[[J:.*]] = %{{.*}} to %[[B_1]] step %{{.*}} {
+// CHECK:      scf.for %[[K:.*]] = %{{.*}} to %[[D_1]] step %{{.*}} {
 // CHECK-DAG:    %[[D_IK:.*]] = subview %[[D]][%[[I]], %[[K]]]
 // CHECK-DAG:    %[[B_KJ:.*]] = subview %[[B]][%[[K]], %[[J]]]
 // CHECK-DAG:    %[[E_IJ:.*]] = subview %[[E]][%[[I]], %[[J]]]
@@ -307,9 +307,9 @@ func @f6(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>
   %1 = dim %C, 0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %2 = dim %D, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %1 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %0 step %c4 {
+  scf.for %arg5 = %c0 to %1 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %0 step %c4 {
         %3 = affine.apply #map0(%arg5)
         %4 = affine.apply #map1(%arg7)
         %5 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
@@ -334,9 +334,9 @@ func @f6(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
 // CHECK-LABEL: func @f6
 // CHECK:  (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
 // Fuse the producer of E (WAW) then the producer of C (WAR).
-// CHECK:  loop.for
-// CHECK:    loop.for
-// CHECK:      loop.for
+// CHECK:  scf.for
+// CHECK:    scf.for
+// CHECK:      scf.for
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
@@ -367,9 +367,9 @@ func @f7(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %7 = std.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -386,9 +386,9 @@ func @f7(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
       }
     }
   }
-  loop.for %arg5 = %c0 to %3 step %c2 {
-    loop.for %arg6 = %c0 to %4 step %c3 {
-      loop.for %arg7 = %c0 to %2 step %c4 {
+  scf.for %arg5 = %c0 to %3 step %c2 {
+    scf.for %arg6 = %c0 to %4 step %c3 {
+      scf.for %arg7 = %c0 to %2 step %c4 {
         %7 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -415,14 +415,14 @@ func @f7(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
 // CHECK:  %[[C_0:.*]] = dim %[[C]], 0 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  %[[D_1:.*]] = dim %[[D]], 1 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  linalg.matmul(%[[A]], %[[C]], %[[E]])
-// CHECK:  loop.for %{{.*}} = %{{.*}} to %[[A_0]] step %{{.*}} {
-// CHECK:    loop.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
-// CHECK:      loop.for %{{.*}} = %{{.*}} to %[[A_1]] step %{{.*}} {
+// CHECK:  scf.for %{{.*}} = %{{.*}} to %[[A_0]] step %{{.*}} {
+// CHECK:    scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
+// CHECK:      scf.for %{{.*}} = %{{.*}} to %[[A_1]] step %{{.*}} {
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
-// CHECK:  loop.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
-// CHECK:    loop.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
-// CHECK:      loop.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
+// CHECK:  scf.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
+// CHECK:    scf.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
+// CHECK:      scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
 // CHECK:        linalg.matmul
 // CHECK-NOT:      linalg.matmul
 
@@ -454,9 +454,9 @@ func @f8(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>
   %2 = dim %D, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %3 = affine.apply #map0(%arg5)
         %4 = affine.apply #map1(%arg7)
         %5 = std.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
@@ -482,9 +482,9 @@ func @f8(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
 // CHECK:  (%[[A:.*]]: memref{{.*}}, %[[B:.*]]: memref{{.*}}, %[[C:.*]]: memref{{.*}}, %[[D:.*]]: memref{{.*}}, %[[E:.*]]: memref{{.*}})
 // CHECK:  linalg.matmul
 // CHECK:  linalg.matmul
-// CHECK:  loop.for
-// CHECK:    loop.for
-// CHECK:      loop.for
+// CHECK:  scf.for
+// CHECK:    scf.for
+// CHECK:      scf.for
 // CHECK:        linalg.matmul
 // CHECK-NOT:      linalg.matmul
 
@@ -514,8 +514,8 @@ func @pointwise(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
      memref<?x?xf32, offset: 0, strides: [?, ?]>
   %0 = dim %B, 0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %1 = dim %B, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg4 = %c0 to %0 step %c2 {
-    loop.for %arg5 = %c0 to %1 step %c3 {
+  scf.for %arg4 = %c0 to %0 step %c2 {
+    scf.for %arg5 = %c0 to %1 step %c3 {
       %4 = std.subview %B[%arg4, %arg5][%c2, %c3][%c1, %c1] :
         memref<?x?xf32, offset: 0, strides: [?, ?]> to
         memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -537,9 +537,9 @@ func @pointwise(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
   return
 }
 // CHECK-LABEL: func @pointwise
-// CHECK:  loop.for
-// CHECK:    loop.for
-// CHECK-NOT:  loop.for
+// CHECK:  scf.for
+// CHECK:    scf.for
+// CHECK-NOT:  scf.for
 // CHECK:      linalg.generic
 // CHECK:        addf
 // CHECK:      linalg.generic
@@ -573,8 +573,8 @@ func @pointwise_no_view(%M: index, %N: index) {
      memref<?x?xf32>
   %0 = dim %B, 0 : memref<?x?xf32>
   %1 = dim %B, 1 : memref<?x?xf32>
-  loop.for %arg4 = %c0 to %0 step %c2 {
-    loop.for %arg5 = %c0 to %1 step %c3 {
+  scf.for %arg4 = %c0 to %0 step %c2 {
+    scf.for %arg5 = %c0 to %1 step %c3 {
       %4 = std.subview %B[%arg4, %arg5][%c2, %c3][%c1, %c1] :
         memref<?x?xf32> to
         memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -596,9 +596,9 @@ func @pointwise_no_view(%M: index, %N: index) {
   return
 }
 // CHECK-LABEL: func @pointwise_no_view
-// CHECK:  loop.for
-// CHECK:    loop.for
-// CHECK-NOT:  loop.for
+// CHECK:  scf.for
+// CHECK:    scf.for
+// CHECK-NOT:  scf.for
 // CHECK:      linalg.generic
 // CHECK:        addf
 // CHECK:      linalg.generic
@@ -642,8 +642,8 @@ func @fusion_of_three(%arg0: memref<100x10xf32>,
   %3 = dim %1, 1 : memref<100x10xf32>
   %4 = dim %arg2, 0 : memref<100x10xf32>
   %5 = dim %arg2, 1 : memref<100x10xf32>
-  loop.for %i = %c0 to %2 step %c1 {
-    loop.for %j = %c0 to %3 step %c1 {
+  scf.for %i = %c0 to %2 step %c1 {
+    scf.for %j = %c0 to %3 step %c1 {
       %6 = std.subview %1[%i, %j][%c1, %c1][%c1, %c1] :
       memref<100x10xf32> to memref<?x?xf32, #map2>
       %7 = std.subview %arg2[%i, %j][%c1, %c1][%c1, %c1] :
@@ -666,9 +666,9 @@ func @fusion_of_three(%arg0: memref<100x10xf32>,
 }
 // CHECK-LABEL: func @fusion
 // CHECK-NOT: linalg.generic
-// CHECK:     loop.for
-// CHECK:       loop.for
-// CHECK-NOT: loop.for
+// CHECK:     scf.for
+// CHECK:       scf.for
+// CHECK-NOT: scf.for
 // CHECK:       linalg.generic
 // CHECK:         linalg.yield
 // CHECK:       linalg.generic
@@ -704,8 +704,8 @@ func @fill_and_conv(%arg0: memref<1x4x5x1xf32>, %arg1: memref<2x3x1x1xf32>, %arg
   %12 = dim %arg2, 3 : memref<1x4x5x1xf32>
   %13 = linalg.range %c0 : %6 : %c2 : !linalg.range
   %14 = linalg.range %c0 : %10 : %c3 : !linalg.range
-  loop.for %arg3 = %c0 to %6 step %c2 {
-    loop.for %arg4 = %c0 to %10 step %c3 {
+  scf.for %arg3 = %c0 to %6 step %c2 {
+    scf.for %arg4 = %c0 to %10 step %c3 {
       %15 = affine.min #map0(%c2, %c1, %arg3)
       %16 = affine.apply #map2()[%7]
       %17 = affine.min #map0(%16, %c4, %arg4)
@@ -723,8 +723,8 @@ func @fill_and_conv(%arg0: memref<1x4x5x1xf32>, %arg1: memref<2x3x1x1xf32>, %arg
   return
 }
 // CHECK-LABEL: func @fill_and_conv
-// CHECK: loop.for
-// CHECK:   loop.for
+// CHECK: scf.for
+// CHECK:   scf.for
 // CHECK:     linalg.fill
 // CHECK:     linalg.conv
 
@@ -747,9 +747,9 @@ func @accept_
diff erent_alloc_ops(%dim: index, %s0 : index, %s1: index) {
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>
 
-  loop.for %i = %c0 to %dim step %c2 {
-    loop.for %j = %c0 to %dim step %c3 {
-      loop.for %k = %c0 to %dim step %c4 {
+  scf.for %i = %c0 to %dim step %c2 {
+    scf.for %j = %c0 to %dim step %c3 {
+      scf.for %k = %c0 to %dim step %c4 {
         %0 = std.subview %A[%i, %k][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -770,5 +770,5 @@ func @accept_
diff erent_alloc_ops(%dim: index, %s0 : index, %s1: index) {
 }
 
 // CHECK-LABEL: func @accept_
diff erent_alloc_ops
-// CHECK-COUNT-3: loop.for
+// CHECK-COUNT-3: scf.for
 // CHECK-COUNT-2:   linalg.matmul

diff  --git a/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
index eaef27b2f3de..de16e4b50f33 100644
--- a/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
+++ b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
@@ -25,8 +25,8 @@ func @fuse_indexed_generic_consumer(%A: memref<?x?xf32>,
   %1 = dim %C, 1 : memref<?x?xf32>
   %2 = dim %D, 0 : memref<?x?xf32>
   %3 = dim %D, 1 : memref<?x?xf32>
-  loop.for %arg2 = %c0 to %0 step %c10 {
-    loop.for %arg3 = %c0 to %1 step %c25 {
+  scf.for %arg2 = %c0 to %0 step %c10 {
+    scf.for %arg3 = %c0 to %1 step %c25 {
       %4 = std.subview %C[%arg2, %arg3][%c10, %c25][%c1, %c1] :
           memref<?x?xf32> to memref<?x?xf32, #map>
       %5 = std.subview %D[%arg2, %arg3][%c10, %c25][%c1, %c1] :
@@ -52,9 +52,9 @@ func @fuse_indexed_generic_consumer(%A: memref<?x?xf32>,
   return
 }
 // CHECK-LABEL: func @fuse_indexed_generic_consumer
-// CHECK:  loop.for
-// CHECK:    loop.for
-// CHECK-NOT:  loop.for
+// CHECK:  scf.for
+// CHECK:    scf.for
+// CHECK-NOT:  scf.for
 // CHECK:      linalg.generic
 // CHECK-NOT:    addi
 // CHECK:        addf
@@ -91,7 +91,7 @@ func @fuse_indexed_generic_producer(%A: memref<?x?xf32>,
   %C_Y = dim %C, 1 : memref<?x?xf32>
   %D_X = dim %D, 0 : memref<?x?xf32>
   %D_Y = dim %D, 1 : memref<?x?xf32>
-  loop.parallel (%arg2, %arg3) = (%c0, %c0) to (%C_X, %C_Y) step (%c10, %c25) {
+  scf.parallel (%arg2, %arg3) = (%c0, %c0) to (%C_X, %C_Y) step (%c10, %c25) {
     %C_view = std.subview %C[%arg2, %arg3][%c10, %c25][%c1, %c1] :
         memref<?x?xf32> to memref<?x?xf32, #map>
     %D_view = std.subview %D[%arg2, %arg3][%c10, %c25][%c1, %c1] :
@@ -110,8 +110,8 @@ func @fuse_indexed_generic_producer(%A: memref<?x?xf32>,
   return
 }
 // CHECK-LABEL: func @fuse_indexed_generic_producer
-// CHECK:  loop.parallel ([[I:%.*]], [[J:%.*]]) =
-// CHECK-NOT:  loop.parallel
+// CHECK:  scf.parallel ([[I:%.*]], [[J:%.*]]) =
+// CHECK-NOT:  scf.parallel
 // CHECK:      linalg.indexed_generic
 // CHECK:        ^bb0([[i:%.*]]: index, [[j:%.*]]: index
 // CHECK:          [[i_new:%.*]] = addi [[i]], [[I]] : index
@@ -150,7 +150,7 @@ func @fuse_indexed_generic_producer_tile_second_dim_only(%A: memref<?x?xf32>,
   %D_X = dim %D, 0 : memref<?x?xf32>
   %D_Y = dim %D, 1 : memref<?x?xf32>
   %3 = linalg.range %c0 : %C_Y : %c3 : !linalg.range
-  loop.parallel (%j) = (%c0) to (%C_Y) step (%c3) {
+  scf.parallel (%j) = (%c0) to (%C_Y) step (%c3) {
     %0 = affine.min affine_map<(d0, d1, d2) -> (d0, d1 - d2)>(%c3, %C_Y, %j)
     %C_view = subview %C[%c0, %j] [%C_X, %0] [%c1, %c1] :
       memref<?x?xf32> to memref<?x?xf32, #map>
@@ -169,14 +169,14 @@ func @fuse_indexed_generic_producer_tile_second_dim_only(%A: memref<?x?xf32>,
       %ab = addf %a, %b : f32
       linalg.yield %ab : f32
     }: memref<?x?xf32, #map>, memref<?x?xf32, #map>
-    loop.yield
+    scf.yield
   }
   return
 }
 // CHECK-LABEL: func @fuse_indexed_generic_producer_tile_second_dim_only
 // CHECK:  [[C0:%.*]] = constant 0 : index
-// CHECK:  loop.parallel ([[J:%.*]]) =
-// CHECK-NOT:  loop.parallel
+// CHECK:  scf.parallel ([[J:%.*]]) =
+// CHECK-NOT:  scf.parallel
 // CHECK:      linalg.indexed_generic
 // CHECK:        ^bb0([[i:%.*]]: index, [[j:%.*]]: index
 // CHECK:          [[i_new:%.*]] = addi [[i]], [[C0]] : index

diff  --git a/mlir/test/Dialect/Linalg/llvm.mlir b/mlir/test/Dialect/Linalg/llvm.mlir
index e158e70caec8..377775cd9dc6 100644
--- a/mlir/test/Dialect/Linalg/llvm.mlir
+++ b/mlir/test/Dialect/Linalg/llvm.mlir
@@ -62,7 +62,7 @@ func @slice_with_range_and_index(%arg0: memref<?x?xf64, offset: ?, strides: [?,
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %R = linalg.range %c0:%c1:%c1 : !linalg.range
-  loop.for %i0 = %c0 to %c1 step %c1 {
+  scf.for %i0 = %c0 to %c1 step %c1 {
     %1 = linalg.slice %arg0[%i0, %R] : memref<?x?xf64, offset: ?, strides: [?, 1]>, index, !linalg.range, memref<?xf64, offset: ?, strides: [1]>
   }
   return
@@ -180,9 +180,9 @@ func @matmul_vec_impl(%A: !matrix_type_A, %B: !matrix_type_B, %C: !matrix_type_C
 // LLVM-LOOPS: %[[T0:.*]] = dim %[[A]], 0 : memref<?x?xvector<4xf32>>
 // LLVM-LOOPS: %[[T1:.*]] = dim %[[A]], 1 : memref<?x?xvector<4xf32>>
 // LLVM-LOOPS: %[[T2:.*]] = dim %[[B]], 1 : memref<?x?xvector<4xf32>>
-// LLVM-LOOPS: loop.for %[[I:.*]] = %[[C0]] to %[[T0]] step %[[C1]] {
-// LLVM-LOOPS: loop.for %[[J:.*]] = %[[C0]] to %[[T2]] step %[[C1]] {
-// LLVM-LOOPS: loop.for %[[K:.*]] = %[[C0]] to %[[T1]] step %[[C1]] {
+// LLVM-LOOPS: scf.for %[[I:.*]] = %[[C0]] to %[[T0]] step %[[C1]] {
+// LLVM-LOOPS: scf.for %[[J:.*]] = %[[C0]] to %[[T2]] step %[[C1]] {
+// LLVM-LOOPS: scf.for %[[K:.*]] = %[[C0]] to %[[T1]] step %[[C1]] {
 // LLVM-LOOPS:   %[[T3:.*]] = load %[[A]][%[[I]], %[[K]]] : memref<?x?xvector<4xf32>>
 // LLVM-LOOPS:   %[[T4:.*]] = load %[[B]][%[[K]], %[[J]]] : memref<?x?xvector<4xf32>>
 // LLVM-LOOPS:   %[[T5:.*]] = load %[[C]][%[[I]], %[[J]]] : memref<?x?xvector<4x4xf32>>

diff  --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
index 427d44168ec5..72457bc6034e 100644
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -43,9 +43,9 @@ func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
 //       CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKLOOP-DAG:       %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKLOOP-DAG:       %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKLOOP-DAG:       %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -60,8 +60,8 @@ func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
 //       CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} {
-//       CHECKPARALLEL:   loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} {
+//       CHECKPARALLEL:   scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKPARALLEL-DAG:     %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKPARALLEL-DAG:     %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKPARALLEL-DAG:     %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -86,8 +86,8 @@ func @matvec(%arg0: memref<?xi8>, %M: index, %N: index) {
 //       CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
-//       CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKLOOP-DAG:     %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKLOOP-DAG:     %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKLOOP-DAG:     %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -101,8 +101,8 @@ func @matvec(%arg0: memref<?xi8>, %M: index, %N: index) {
 //       CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
-//       CHECKPARALLEL: loop.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) {
-//       CHECKPARALLEL:   loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) {
+//       CHECKPARALLEL:   scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKPARALLEL-DAG:     %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKPARALLEL-DAG:     %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKPARALLEL-DAG:     %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -125,7 +125,7 @@ func @dot(%arg0: memref<?xi8>, %M: index) {
 //       CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
-//       CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKLOOP-DAG:   %[[a:.*]] = load %[[A]][%{{.*}}] : memref<?xf32>
 //   CHECKLOOP-DAG:   %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKLOOP-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -138,7 +138,7 @@ func @dot(%arg0: memref<?xi8>, %M: index) {
 //       CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
-//       CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKPARALLEL-DAG:   %[[a:.*]] = load %[[A]][%{{.*}}] : memref<?xf32>
 //   CHECKPARALLEL-DAG:   %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKPARALLEL-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -154,7 +154,7 @@ func @dot_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf3
 // CHECKLOOP-LABEL: func @dot_view(
 //       CHECKLOOP:   %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<f32>) {
 //       CHECKLOOP: %[[K:.*]] = dim %arg0, 0 : memref<?xf32, #[[strided1D]]>
-//       CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKLOOP-DAG:   %[[a:.*]] = load %arg0[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //   CHECKLOOP-DAG:   %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //   CHECKLOOP-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -165,7 +165,7 @@ func @dot_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf3
 // CHECKPARALLEL-LABEL: func @dot_view(
 //       CHECKPARALLEL:   %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<f32>) {
 //       CHECKPARALLEL: %[[K:.*]] = dim %arg0, 0 : memref<?xf32, #[[strided1D]]>
-//       CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKPARALLEL-DAG:   %[[a:.*]] = load %arg0[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //   CHECKPARALLEL-DAG:   %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //   CHECKPARALLEL-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -179,12 +179,12 @@ func @fill_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: f32) {
 }
 // CHECKLOOP-LABEL: func @fill_view(
 //       CHECKLOOP: %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: f32) {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECKLOOP:     store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 
 // CHECKPARALLEL-LABEL: func @fill_view(
 //       CHECKPARALLEL: %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: f32) {
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
 //       CHECKPARALLEL:     store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 
 func @fill_view0(%arg0: memref<f32>, %arg1: f32) {
@@ -203,14 +203,14 @@ func @fill_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
 }
 // CHECKLOOP-LABEL: func @fill_view3(
 //       CHECKLOOP: %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: f32) {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECKLOOP:         store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 
 // CHECKPARALLEL-LABEL: func @fill_view3(
 //       CHECKPARALLEL: %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: f32) {
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 
 func @copy_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, offset: ?, strides: [1]>) {
@@ -219,13 +219,13 @@ func @copy_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf
 }
 // CHECKLOOP-LABEL: func @copy_view(
 //       CHECKLOOP: %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<?xf32, #[[strided1D]]>) {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECKLOOP:     %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //       CHECKLOOP:     store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 
 // CHECKPARALLEL-LABEL: func @copy_view(
 //       CHECKPARALLEL: %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<?xf32, #[[strided1D]]>) {
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
 //       CHECKPARALLEL:     %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //       CHECKPARALLEL:     store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 
@@ -249,15 +249,15 @@ func @copy_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
 }
 // CHECKLOOP-LABEL: func @copy_view3
 //       CHECKLOOP: (%{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>) {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECKLOOP:         %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKLOOP:         store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 
 // CHECKPARALLEL-LABEL: func @copy_view3
 //       CHECKPARALLEL: (%{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>) {
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKPARALLEL:     store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 
@@ -272,11 +272,11 @@ func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
 //       CHECKLOOP:   %[[K:.*]] = dim %arg0, 2 : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKLOOP:   %[[B:.*]] = dim %arg1, 0 : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKLOOP:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKLOOP:           loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKLOOP:           scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
 //       CHECKLOOP:             %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:             %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKLOOP:             %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
@@ -292,9 +292,9 @@ func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
 //       CHECKPARALLEL:   %[[K:.*]] = dim %arg0, 2 : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKPARALLEL:   %[[B:.*]] = dim %arg1, 0 : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKPARALLEL:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
@@ -316,13 +316,13 @@ func @conv_view4(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %
 //       CHECKLOOP:   %[[B:.*]] = dim %arg1, 0 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       CHECKLOOP:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       CHECKLOOP:   %[[X1:.*]] = dim %arg2, 2 : memref<?x?x?x?xf32, #[[strided4D]]>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//       CHECKLOOP:           loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKLOOP:             loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-//       CHECKLOOP:               loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP:           scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKLOOP:             scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKLOOP:               scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
 //       CHECKLOOP:                 %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:                 %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:                 %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[strided4D]]>
@@ -341,10 +341,10 @@ func @conv_view4(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %
 //       CHECKPARALLEL:   %[[B:.*]] = dim %arg1, 0 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       CHECKPARALLEL:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       CHECKPARALLEL:   %[[X1:.*]] = dim %arg2, 2 : memref<?x?x?x?xf32, #[[strided4D]]>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-//       CHECKPARALLEL:         loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKPARALLEL:         scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
 //       CHECKPARALLEL:           %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:           %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:           %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[strided4D]]>
@@ -373,13 +373,13 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32>,
 //       CHECKLOOP:   %[[B:.*]] =  dim %arg1, 0 : memref<?x?x?x?xf32>
 //       CHECKLOOP:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?x?xf32>
 //       CHECKLOOP:   %[[X1:.*]] = dim %arg2, 2 : memref<?x?x?x?xf32>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//       CHECKLOOP:           loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKLOOP:             loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-//       CHECKLOOP:               loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP:           scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKLOOP:             scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKLOOP:               scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
 //       CHECKLOOP:                 %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
 //       CHECKLOOP:                 %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
 //       CHECKLOOP:                 %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]])
@@ -402,10 +402,10 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32>,
 //       CHECKPARALLEL:   %[[B:.*]] =  dim %arg1, 0 : memref<?x?x?x?xf32>
 //       CHECKPARALLEL:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?x?xf32>
 //       CHECKPARALLEL:   %[[X1:.*]] = dim %arg2, 2 : memref<?x?x?x?xf32>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-//       CHECKPARALLEL:         loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKPARALLEL:         scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
 //       CHECKPARALLEL:           %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:           %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:           %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]])
@@ -430,10 +430,10 @@ func @pooling_max(%arg0: memref<?x?xf32>,
 //       CHECKLOOP:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKLOOP:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKLOOP:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
@@ -446,9 +446,9 @@ func @pooling_max(%arg0: memref<?x?xf32>,
 //       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
@@ -468,10 +468,10 @@ func @pooling_min(%arg0: memref<?x?xf32>,
 //       CHECKLOOP:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKLOOP:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKLOOP:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
@@ -484,9 +484,9 @@ func @pooling_min(%arg0: memref<?x?xf32>,
 //       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
@@ -506,10 +506,10 @@ func @pooling_sum(%arg0: memref<?x?xf32>,
 //       CHECKLOOP:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKLOOP:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKLOOP:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
@@ -522,9 +522,9 @@ func @pooling_sum(%arg0: memref<?x?xf32>,
 //       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
@@ -555,9 +555,9 @@ func @generic_region(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1:
   return
 }
 // CHECKLOOP-LABEL: @generic_region
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   loop.for %[[j:.*]] = {{.*}}
-//       CHECKLOOP:     loop.for %[[k:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
+//       CHECKLOOP:     scf.for %[[k:.*]] = {{.*}}
 //       CHECKLOOP:       %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[strided2D]]>
 //       CHECKLOOP:       %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKLOOP:       %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
@@ -567,7 +567,7 @@ func @generic_region(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1:
 //       CHECKLOOP:       store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
 
 // CHECKPARALLEL-LABEL: @generic_region
-//       CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
+//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
 //       CHECKPARALLEL:   %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[strided2D]]>
 //       CHECKPARALLEL:   %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKPARALLEL:   %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
@@ -606,9 +606,9 @@ func @indexed_generic_region(
 }
 
 // CHECKLOOP-LABEL: @indexed_generic_region
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   loop.for %[[j:.*]] = {{.*}}
-//       CHECKLOOP:     loop.for %[[k:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
+//       CHECKLOOP:     scf.for %[[k:.*]] = {{.*}}
 //       CHECKLOOP:       %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]]
 //       CHECKLOOP:       %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]]
 //       CHECKLOOP:       %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]]
@@ -622,7 +622,7 @@ func @indexed_generic_region(
 //       CHECKLOOP:       store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]]
 
 // CHECKPARALLEL-LABEL: @indexed_generic_region
-//       CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
+//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
 //       CHECKPARALLEL:   %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]]
 //       CHECKPARALLEL:   %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]]
 //       CHECKPARALLEL:   %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]]
@@ -662,15 +662,15 @@ func @generic_op_zero_rank(%arg0: memref<f32>, %arg1: memref<3x4xf32>)
 // CHECKLOOP-LABEL: @generic_op_zero_rank
 //  CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   loop.for %[[j:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
 //       CHECKLOOP:     %[[a:.*]] = load %[[ARG0]][]
 //       CHECKLOOP:     store %[[a]], %[[ARG1]][%[[i]], %[[j]]]
 
 // CHECKPARALLEL-LABEL: @generic_op_zero_rank
 //  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
-//       CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
+//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
 //       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][]
 //       CHECKPARALLEL:   store %[[a]], %[[ARG1]][%[[i]], %[[j]]]
 
@@ -689,8 +689,8 @@ func @indexed_generic_op_zero_rank(%arg0: memref<i32>, %arg1: memref<3x4xi32>)
 // CHECKLOOP-LABEL: @indexed_generic_op_zero_rank
 //  CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32>
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32>
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   loop.for %[[j:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
 //       CHECKLOOP:     %[[a:.*]] = load %[[ARG0]][
 //       CHECKLOOP:     %[[ij:.*]] = addi %[[i]], %[[j]] : index
 //       CHECKLOOP:     %[[ij_int:.*]] = index_cast %[[ij]] : index to i32
@@ -700,7 +700,7 @@ func @indexed_generic_op_zero_rank(%arg0: memref<i32>, %arg1: memref<3x4xi32>)
 // CHECKPARALLEL-LABEL: @indexed_generic_op_zero_rank
 //  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32>
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32>
-//       CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
+//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
 //       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][
 //       CHECKPARALLEL:   %[[ij:.*]] = addi %[[i]], %[[j]] : index
 //       CHECKPARALLEL:   %[[ij_int:.*]] = index_cast %[[ij]] : index to i32
@@ -732,7 +732,7 @@ func @generic_op_1D_reduce(%arg0: memref<?xf32>, %arg1: memref<f32>)
 // CHECKLOOP-LABEL: @generic_op_1D_reduce
 //  CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
 //       CHECKLOOP:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
 //       CHECKLOOP:   %[[b:.*]] = load %[[ARG1]][]
 //       CHECKLOOP:   %[[c:.*]] = addf %[[a]], %[[b]] : f32
@@ -741,7 +741,7 @@ func @generic_op_1D_reduce(%arg0: memref<?xf32>, %arg1: memref<f32>)
 // CHECKPARALLEL-LABEL: @generic_op_1D_reduce
 //  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
-//       CHECKPARALLEL: loop.for %[[i:.*]] = {{.*}}
+//       CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}}
 //       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
 //       CHECKPARALLEL:   %[[b:.*]] = load %[[ARG1]][]
 //       CHECKPARALLEL:   %[[c:.*]] = addf %[[a]], %[[b]] : f32
@@ -780,7 +780,7 @@ func @indexed_generic_op_1D_reduce(%arg0: memref<?xf32>,
 //  CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKLOOP-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
 //       CHECKLOOP:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
 //       CHECKLOOP:   %[[b:.*]] = load %[[ARG1]][]
 //       CHECKLOOP:   %[[c:.*]] = load %[[ARG2]][]
@@ -792,7 +792,7 @@ func @indexed_generic_op_1D_reduce(%arg0: memref<?xf32>,
 //  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
-//       CHECKPARALLEL: loop.for %[[i:.*]] = {{.*}}
+//       CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}}
 //       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
 //       CHECKPARALLEL:   %[[b:.*]] = load %[[ARG1]][]
 //       CHECKPARALLEL:   %[[c:.*]] = load %[[ARG2]][]
@@ -818,13 +818,13 @@ func @generic_const_init(%arg0: memref<?xf32>) {
 // CHECKLOOP-LABEL: @generic_const_init
 //  CHECKLOOP-SAME: %[[ARG0:.*]]: memref<?xf32>
 //       CHECKLOOP: %[[CONST:.*]] = constant 1.000000e+00 : f32
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
 //       CHECKLOOP:   store %[[CONST]], %[[ARG0]]
 
 // CHECKPARALLEL-LABEL: @generic_const_init
 //  CHECKPARALLEL-SAME: %[[ARG0:.*]]: memref<?xf32>
 //       CHECKPARALLEL: %[[CONST:.*]] = constant 1.000000e+00 : f32
-//       CHECKPARALLEL: loop.parallel (%[[i:.*]])
+//       CHECKPARALLEL: scf.parallel (%[[i:.*]])
 //       CHECKPARALLEL:   store %[[CONST]], %[[ARG0]]
 
 #scalar_access = [
@@ -852,7 +852,7 @@ func @scalar_code(%arg0: memref<f32>, %arg1 : memref<f32>, %arg2 : memref<f32>)
 //  CHECKLOOP-SAME: %[[ARG0]]: memref<f32>
 //  CHECKLOOP-SAME: %[[ARG1]]: memref<f32>
 //  CHECKLOOP-SAME: %[[ARG2]]: memref<f32>
-//   CHECKLOOP-NOT: loop.for
+//   CHECKLOOP-NOT: scf.for
 //       CHECKLOOP: load %[[ARG0]][]
 //       CHECKLOOP: load %[[ARG1]][]
 //       CHECKLOOP: addf
@@ -862,7 +862,7 @@ func @scalar_code(%arg0: memref<f32>, %arg1 : memref<f32>, %arg2 : memref<f32>)
 //  CHECKPARALLEL-SAME: %[[ARG0]]: memref<f32>
 //  CHECKPARALLEL-SAME: %[[ARG1]]: memref<f32>
 //  CHECKPARALLEL-SAME: %[[ARG2]]: memref<f32>
-//   CHECKPARALLEL-NOT: loop.for
+//   CHECKPARALLEL-NOT: scf.for
 //       CHECKPARALLEL: load %[[ARG0]][]
 //       CHECKPARALLEL: load %[[ARG1]][]
 //       CHECKPARALLEL: addf
@@ -883,10 +883,10 @@ func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memre
 //       CHECKLOOP: %[[M:.*]] = dim %[[mA]], 1 : memref<?x?x?xf32>
 //       CHECKLOOP: %[[K:.*]] = dim %[[mA]], 2 : memref<?x?x?xf32>
 //       CHECKLOOP: %[[N:.*]] = dim %[[mB]], 2 : memref<?x?x?xf32>
-//       CHECKLOOP: loop.for %[[b:.*]] = %{{.*}} to %[[B]] step %{{.*}} {
-//       CHECKLOOP:   loop.for %[[m:.*]] = %{{.*}} to %[[M]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %[[n:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP: scf.for %[[b:.*]] = %{{.*}} to %[[B]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %[[m:.*]] = %{{.*}} to %[[M]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %[[n:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
 //       CHECKLOOP:       %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
 //       CHECKLOOP:       %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
 //       CHECKLOOP:       %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
@@ -902,8 +902,8 @@ func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memre
 //       CHECKPARALLEL: %[[M:.*]] = dim %[[mA]], 1 : memref<?x?x?xf32>
 //       CHECKPARALLEL: %[[K:.*]] = dim %[[mA]], 2 : memref<?x?x?xf32>
 //       CHECKPARALLEL: %[[N:.*]] = dim %[[mB]], 2 : memref<?x?x?xf32>
-//       CHECKPARALLEL: loop.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) {
-//       CHECKPARALLEL:   loop.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) {
+//       CHECKPARALLEL:   scf.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
 //       CHECKPARALLEL:       %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
 //       CHECKPARALLEL:       %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
 //       CHECKPARALLEL:       %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>

diff  --git a/mlir/test/Dialect/Linalg/parallel_loops.mlir b/mlir/test/Dialect/Linalg/parallel_loops.mlir
index 15a3ed210d9d..abe9cccc8b75 100644
--- a/mlir/test/Dialect/Linalg/parallel_loops.mlir
+++ b/mlir/test/Dialect/Linalg/parallel_loops.mlir
@@ -21,12 +21,12 @@ func @linalg_generic_sum(%lhs: memref<2x2xf32>,
 // CHECK-DAG: %[[C2:.*]] = constant 2
 // CHECK-DAG: %[[C0:.*]] = constant 0
 // CHECK-DAG: %[[C1:.*]] = constant 1
-// CHECK: loop.parallel (%[[I:.*]], %[[J:.*]]) = {{.*}}
+// CHECK: scf.parallel (%[[I:.*]], %[[J:.*]]) = {{.*}}
 // CHECK:   %[[LHS_ELEM:.*]] = load %[[LHS]][%[[I]], %[[J]]]
 // CHECK:   %[[RHS_ELEM:.*]] = load %[[RHS]][%[[I]], %[[J]]]
 // CHECK:   %[[SUM:.*]] = addf %[[LHS_ELEM]], %[[RHS_ELEM]] : f32
 // CHECK:   store %[[SUM]], %{{.*}}[%[[I]], %[[J]]]
-// CHECK:   loop.yield
+// CHECK:   scf.yield
 
 // -----
 
@@ -55,8 +55,8 @@ func @lower_outer_parallel(%A: memref<?x?x?x?xf32>, %B: memref<?x?x?xf32>) {
 //   CHECK-DAG: %[[D1:.*]] = dim %{{.*}}, 1
 //   CHECK-DAG: %[[D2:.*]] = dim %{{.*}}, 2
 //   CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, 3
-//       CHECK: loop.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]])
-//       CHECK:   loop.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
-//       CHECK:     loop.for %[[IV3:.*]] = %[[C0]] to %[[D3]] step %[[C1]]
+//       CHECK: scf.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]])
+//       CHECK:   scf.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
+//       CHECK:     scf.for %[[IV3:.*]] = %[[C0]] to %[[D3]] step %[[C1]]
 //       CHECK:       load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 //       CHECK:       store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]]

diff  --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir
index bd6a3e7d7033..64534733846a 100644
--- a/mlir/test/Dialect/Linalg/promote.mlir
+++ b/mlir/test/Dialect/Linalg/promote.mlir
@@ -20,9 +20,9 @@ func @matmul_f32(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
   %6 = dim %3, 0 : memref<?x?xf32>
   %7 = dim %3, 1 : memref<?x?xf32>
   %8 = dim %4, 1 : memref<?x?xf32>
-  loop.for %arg4 = %c0 to %6 step %c2 {
-    loop.for %arg5 = %c0 to %8 step %c3 {
-      loop.for %arg6 = %c0 to %7 step %c4 {
+  scf.for %arg4 = %c0 to %6 step %c2 {
+    scf.for %arg5 = %c0 to %8 step %c3 {
+      scf.for %arg6 = %c0 to %7 step %c4 {
         %11 = std.subview %3[%arg4, %arg6][%c2, %c4][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
         %14 = std.subview %4[%arg6, %arg5][%c4, %c3][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
         %17 = std.subview %5[%arg4, %arg5][%c2, %c3][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
@@ -34,9 +34,9 @@ func @matmul_f32(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
 }
 
 // CHECK-LABEL: func @matmul_f32(%{{.*}}: memref<?xi8>, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
-//       CHECK:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECK:         %[[vA:.*]] = subview {{.*}} : memref<?x?xf32>
 //       CHECK:         %[[vB:.*]] = subview {{.*}} : memref<?x?xf32>
 //       CHECK:         %[[vC:.*]] = subview {{.*}} : memref<?x?xf32>
@@ -85,9 +85,9 @@ func @matmul_f64(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
   %6 = dim %3, 0 : memref<?x?xf64>
   %7 = dim %3, 1 : memref<?x?xf64>
   %8 = dim %4, 1 : memref<?x?xf64>
-  loop.for %arg4 = %c0 to %6 step %c2 {
-    loop.for %arg5 = %c0 to %8 step %c3 {
-      loop.for %arg6 = %c0 to %7 step %c4 {
+  scf.for %arg4 = %c0 to %6 step %c2 {
+    scf.for %arg5 = %c0 to %8 step %c3 {
+      scf.for %arg6 = %c0 to %7 step %c4 {
         %11 = std.subview %3[%arg4, %arg6][%c2, %c4][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
         %14 = std.subview %4[%arg6, %arg5][%c4, %c3][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
         %17 = std.subview %5[%arg4, %arg5][%c2, %c3][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
@@ -99,9 +99,9 @@ func @matmul_f64(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
 }
 
 // CHECK-LABEL: func @matmul_f64(%{{.*}}: memref<?xi8>, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
-//       CHECK:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECK:         %[[vA_f64:.*]] = subview {{.*}} : memref<?x?xf64>
 //       CHECK:         %[[vB_f64:.*]] = subview {{.*}} : memref<?x?xf64>
 //       CHECK:         %[[vC_f64:.*]] = subview {{.*}} : memref<?x?xf64>
@@ -150,9 +150,9 @@ func @matmul_i32(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
   %6 = dim %3, 0 : memref<?x?xi32>
   %7 = dim %3, 1 : memref<?x?xi32>
   %8 = dim %4, 1 : memref<?x?xi32>
-  loop.for %arg4 = %c0 to %6 step %c2 {
-    loop.for %arg5 = %c0 to %8 step %c3 {
-      loop.for %arg6 = %c0 to %7 step %c4 {
+  scf.for %arg4 = %c0 to %6 step %c2 {
+    scf.for %arg5 = %c0 to %8 step %c3 {
+      scf.for %arg6 = %c0 to %7 step %c4 {
         %11 = std.subview %3[%arg4, %arg6][%c2, %c4][1, 1] : memref<?x?xi32> to memref<?x?xi32, offset: ?, strides: [?, 1]>
         %14 = std.subview %4[%arg6, %arg5][%c4, %c3][1, 1] : memref<?x?xi32> to memref<?x?xi32, offset: ?, strides: [?, 1]>
         %17 = std.subview %5[%arg4, %arg5][%c2, %c3][1, 1] : memref<?x?xi32> to memref<?x?xi32, offset: ?, strides: [?, 1]>
@@ -164,9 +164,9 @@ func @matmul_i32(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
 }
 
 // CHECK-LABEL: func @matmul_i32(%{{.*}}: memref<?xi8>, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
-//       CHECK:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECK:         %[[vA_i32:.*]] = subview {{.*}} : memref<?x?xi32>
 //       CHECK:         %[[vB_i32:.*]] = subview {{.*}} : memref<?x?xi32>
 //       CHECK:         %[[vC_i32:.*]] = subview {{.*}} : memref<?x?xi32>

diff  --git a/mlir/test/Dialect/Linalg/tile.mlir b/mlir/test/Dialect/Linalg/tile.mlir
index 19aa26a83637..47fac4c8bf4b 100644
--- a/mlir/test/Dialect/Linalg/tile.mlir
+++ b/mlir/test/Dialect/Linalg/tile.mlir
@@ -43,7 +43,7 @@ func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
 //       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-2: loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
+//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-2:   %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-2:   %[[K:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
@@ -59,7 +59,7 @@ func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
 //       TILE-02-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-02-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-02: %[[N:.*]] = dim %arg1, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-02: loop.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
+//       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
 //       TILE-02:   %[[K:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-02:   %[[localN:.*]] = dim %{{.*}}, 1
 //       TILE-02:   %[[szN:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localN]], %[[J]])
@@ -75,7 +75,7 @@ func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
 //       TILE-002-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-002-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-002: %[[ubK:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-002: loop.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
+//       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
 //       TILE-002:   %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-002:   %[[localK:.*]] = dim %{{.*}}, 1
 //       TILE-002:   %[[szK:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localK]], %[[K]])
@@ -95,9 +95,9 @@ func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
 //       TILE-234: %[[ubM:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-234: %[[ubK:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-234: %[[ubN:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-234:  loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
-//       TILE-234:    loop.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
-//       TILE-234:      loop.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
+//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
+//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
+//       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
 //       TILE-234:        %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-234:        %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-234:        %[[localK:.*]] = dim %{{.*}}, 1
@@ -129,7 +129,7 @@ func @matmul_static(%arg0: memref<10x16xf32, offset: ?, strides: [?, 1]>, %arg1:
 //       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<10x16xf32, #[[strided2D]]>
-//       TILE-2: loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
+//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-2:   %[[K:.*]] = dim %{{.*}}, 1 : memref<10x16xf32, #[[strided2D]]>
 //       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]], %[[C0]]] [%[[C2]], %[[K]]] [%[[C1]], %[[C1]]] : memref<10x16xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
 //       TILE-2:   %[[N:.*]] = dim %{{.*}}, 1 : memref<10x12xf32, #[[strided2D]]>
@@ -141,7 +141,7 @@ func @matmul_static(%arg0: memref<10x16xf32, offset: ?, strides: [?, 1]>, %arg1:
 //       TILE-02-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-02-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-02: %[[N:.*]] = dim %arg1, 1 : memref<16x12xf32, #[[strided2D]]>
-//       TILE-02: loop.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
+//       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
 //       TILE-02:   %[[K:.*]] = dim %{{.*}}, 0 : memref<16x12xf32, #[[strided2D]]>
 //   TILE-02-NOT:   affine.min
 //       TILE-02:   %[[sBj:.*]] = subview %{{.*}}[%[[C0]], %[[J]]] [%[[K]], %[[C2]]] [%[[C1]], %[[C1]]] : memref<16x12xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
@@ -155,7 +155,7 @@ func @matmul_static(%arg0: memref<10x16xf32, offset: ?, strides: [?, 1]>, %arg1:
 //       TILE-002-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-002-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-002: %[[ubK:.*]] = dim %{{.*}}, 1 : memref<10x16xf32, #[[strided2D]]>
-//       TILE-002: loop.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
+//       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
 //       TILE-002:   %[[M:.*]] = dim %{{.*}}, 0 : memref<10x16xf32, #[[strided2D]]>
 //   TILE-002-NOT:   affine.min
 //       TILE-002:   %[[sAj:.*]] = subview %{{.*}}[%[[C0]], %[[K]]] [%[[M]], %[[C2]]] [%[[C1]], %[[C1]]] : memref<10x16xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
@@ -173,9 +173,9 @@ func @matmul_static(%arg0: memref<10x16xf32, offset: ?, strides: [?, 1]>, %arg1:
 //       TILE-234: %[[ubM:.*]] = dim %{{.*}}, 0 : memref<10x16xf32, #[[strided2D]]>
 //       TILE-234: %[[ubK:.*]] = dim %{{.*}}, 1 : memref<10x16xf32, #[[strided2D]]>
 //       TILE-234: %[[ubN:.*]] = dim %{{.*}}, 1 : memref<16x12xf32, #[[strided2D]]>
-//       TILE-234:  loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
-//       TILE-234:    loop.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
-//       TILE-234:      loop.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
+//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
+//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
+//       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
 //   TILE-234-NOT:   affine.min
 //       TILE-234:        %[[sAik:.*]] = subview %{{.*}}[%[[I]], %[[K]]] [%[[C2]], %[[C4]]] [%[[C1]], %[[C1]]] : memref<10x16xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
 //   TILE-234-NOT:   affine.min
@@ -194,7 +194,7 @@ func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
 //       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-2: loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
+//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-2:   %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-2:   %[[N:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
@@ -209,7 +209,7 @@ func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
 //       TILE-02-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-02-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-02: %[[K:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-02: loop.for %[[J]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
+//       TILE-02: scf.for %[[J]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
 //       TILE-02:   %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-02:   %[[localN:.*]] = dim %{{.*}}, 1
 //       TILE-02:   %[[szN:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localN]], %[[J]])
@@ -220,7 +220,7 @@ func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
 //       TILE-02:   linalg.matvec(%[[sAj]], %[[sBj]], %{{.*}}) : memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?xf32, #[[strided1D_dynamic]]>, memref<?xf32, #[[strided1D]]>
 
 // TILE-002-LABEL: func @matvec(
-//   TILE-002-NOT: loop.for
+//   TILE-002-NOT: scf.for
 
 // TILE-234-LABEL: func @matvec(
 //       TILE-234-DAG: %[[C0:.*]] = constant 0 : index
@@ -229,8 +229,8 @@ func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
 //       TILE-234-DAG: %[[C3:.*]] = constant 3 : index
 //       TILE-234: %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-234: %[[K:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-234:  loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-234:    loop.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
+//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
+//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
 //       TILE-234:      %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-234:      %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-234:      %[[localN:.*]] = dim %{{.*}}, 1
@@ -254,7 +254,7 @@ func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, of
 //       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<?xf32, #[[strided1D]]>
-//       TILE-2: loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
+//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-2:   %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>
@@ -264,17 +264,17 @@ func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, of
 //       TILE-2:   linalg.dot(%[[sAi]], %[[sBi]], {{.*}}) : memref<?xf32, #[[strided1D_dynamic]]>, memref<?xf32, #[[strided1D_dynamic]]>, memref<f32>
 
 // TILE-02-LABEL: func @dot(
-//   TILE-02-NOT: loop.for
+//   TILE-02-NOT: scf.for
 
 // TILE-002-LABEL: func @dot(
-//   TILE-002-NOT: loop.for
+//   TILE-002-NOT: scf.for
 
 // TILE-234-LABEL: func @dot(
 //       TILE-234-DAG: %[[C0:.*]] = constant 0 : index
 //       TILE-234-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-234-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-234:  %[[ubK:.*]] = dim %{{.*}}, 0 : memref<?xf32, #[[strided1D]]>
-//       TILE-234:  loop.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} {
+//       TILE-234:  scf.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} {
 //       TILE-234:    %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-234:    %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-234:    %[[sAi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>

diff  --git a/mlir/test/Dialect/Linalg/tile_conv.mlir b/mlir/test/Dialect/Linalg/tile_conv.mlir
index 713c4d4b1c14..e1f03fa9cf49 100644
--- a/mlir/test/Dialect/Linalg/tile_conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile_conv.mlir
@@ -21,9 +21,9 @@ func @conv(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg1:
 //       TILE-23004:   %[[B:.*]] = dim %{{.*}}, 0 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:   %[[PaddedInput0:.*]] = dim %{{.*}}, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:   %[[X0:.*]] = dim %{{.*}}, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
-//       TILE-23004:   loop.for %[[ivI:.*]] = %{{.*}} to %[[B]] step %{{.*}} {
-//       TILE-23004:     loop.for %[[ivJ:.*]] = %{{.*}} to %[[X0]] step %{{.*}} {
-//       TILE-23004:       loop.for %[[ivK:.*]] = %{{.*}} to %[[Q]] step %{{.*}} {
+//       TILE-23004:   scf.for %[[ivI:.*]] = %{{.*}} to %[[B]] step %{{.*}} {
+//       TILE-23004:     scf.for %[[ivJ:.*]] = %{{.*}} to %[[X0]] step %{{.*}} {
+//       TILE-23004:       scf.for %[[ivK:.*]] = %{{.*}} to %[[Q]] step %{{.*}} {
 //       TILE-23004:         %[[Z0:.*]] = dim %{{.*}}, 0 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:         %[[Z1:.*]] = dim %{{.*}}, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:         %[[Z2:.*]] = dim %{{.*}}, 2 : memref<?x?x?x?xf32, #[[strided4D]]>

diff  --git a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
index af3e9af5068a..4ce27b379b19 100644
--- a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
+++ b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
@@ -24,7 +24,7 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>,
 //   TILE-20000-DAG:   %[[C1:.*]] = constant 1 : index
 //   TILE-20000-DAG:   %[[C2:.*]] = constant 2 : index
 //       TILE-20000:   %[[B:.*]] = dim %[[ARG1]], 0
-//       TILE-20000:   loop.for %[[ivI:.*]] = %[[C0]] to %[[B]] step %[[C2]] {
+//       TILE-20000:   scf.for %[[ivI:.*]] = %[[C0]] to %[[B]] step %[[C2]] {
 //       TILE-20000:     %[[DIM10:.*]] = dim %[[ARG1]], 0
 //       TILE-20000:     %[[EXTENT:.*]] = affine.min #[[minmap]](%[[C2]], %[[DIM10]], %[[ivI]])
 //       TILE-20000:     %[[DIM11:.*]] = dim %[[ARG1]], 1

diff  --git a/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir b/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir
index fc1d27a5a268..93e1189cad35 100644
--- a/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir
+++ b/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir
@@ -21,7 +21,7 @@ func @indexed_generic_vector(%operand: memref<50xf32>, %result: memref<50xf32>)
 }
 // TILE-10n25-LABEL: func @indexed_generic_vector
 // TILE-10n25: %[[C10:.*]] = constant 10 : index
-// TILE-10n25: loop.for %[[J:.*]] = {{.*}} step %[[C10]]
+// TILE-10n25: scf.for %[[J:.*]] = {{.*}} step %[[C10]]
 // TILE-10n25:   linalg.indexed_generic
 // TILE-10n25:   ^bb0(%[[I:.*]]: index, %[[IN:.*]]: f32, %[[OUT:.*]]: f32)
 // TILE-10n25:     %[[NEW_I:.*]] = addi %[[I]], %[[J]] : index
@@ -31,7 +31,7 @@ func @indexed_generic_vector(%operand: memref<50xf32>, %result: memref<50xf32>)
 
 // TILE-25n0-LABEL: func @indexed_generic_vector
 // TILE-25n0: %[[C25:.*]] = constant 25 : index
-// TILE-25n0: loop.for %[[J:.*]] = {{.*}} step %[[C25]]
+// TILE-25n0: scf.for %[[J:.*]] = {{.*}} step %[[C25]]
 // TILE-25n0:   linalg.indexed_generic
 // TILE-25n0:   ^bb0(%[[I:.*]]: index, %[[IN:.*]]: f32, %[[OUT:.*]]: f32)
 // TILE-25n0:     %[[NEW_I:.*]] = addi %[[I]], %[[J]] : index
@@ -40,7 +40,7 @@ func @indexed_generic_vector(%operand: memref<50xf32>, %result: memref<50xf32>)
 // TILE-25n0:     %[[OUT:.*]] = addf %[[IN]], %[[NEW_I_FLOAT]] : f32
 
 // TILE-0n25-LABEL: func @indexed_generic_vector
-// TILE-0n25-NOT: loop.for %[[J:.*]] = {{.*}} step %[[C25]]
+// TILE-0n25-NOT: scf.for %[[J:.*]] = {{.*}} step %[[C25]]
 // TILE-0n25: linalg.indexed_generic
 
 #combined_indices_trait = {
@@ -67,8 +67,8 @@ func @indexed_generic_matrix(%operand: memref<50x100xf32>, %result: memref<50x10
 // TILE-10n25-LABEL: func @indexed_generic_matrix
 // TILE-10n25: %[[C25:.*]] = constant 25 : index
 // TILE-10n25: %[[C10:.*]] = constant 10 : index
-// TILE-10n25: loop.for %[[K:.*]] = {{.*}} step %[[C10]]
-// TILE-10n25:   loop.for %[[L:.*]] = {{.*}} step %[[C25]]
+// TILE-10n25: scf.for %[[K:.*]] = {{.*}} step %[[C10]]
+// TILE-10n25:   scf.for %[[L:.*]] = {{.*}} step %[[C25]]
 // TILE-10n25:     linalg.indexed_generic
 // TILE-10n25:     ^bb0(%[[I:.*]]: index, %[[J:.*]]: index, %[[IN:.*]]: f32, %[[OUT:.*]]: f32):
 // TILE-10n25:       %[[NEW_I:.*]] = addi %[[I]], %[[K]] : index
@@ -81,7 +81,7 @@ func @indexed_generic_matrix(%operand: memref<50x100xf32>, %result: memref<50x10
 
 // TILE-25n0-LABEL: func @indexed_generic_matrix
 // TILE-25n0: %[[C25:.*]] = constant 25 : index
-// TILE-25n0: loop.for %[[L:.*]] = {{.*}} step %[[C25]]
+// TILE-25n0: scf.for %[[L:.*]] = {{.*}} step %[[C25]]
 // TILE-25n0:   linalg.indexed_generic
 // TILE-25n0:   ^bb0(%[[I:.*]]: index, %[[J:.*]]: index, %[[IN:.*]]: f32, %[[OUT:.*]]: f32):
 // TILE-25n0:     %[[NEW_I:.*]] = addi %[[I]], %[[L]] : index
@@ -93,7 +93,7 @@ func @indexed_generic_matrix(%operand: memref<50x100xf32>, %result: memref<50x10
 
 // TILE-0n25-LABEL: func @indexed_generic_matrix
 // TILE-0n25: %[[C25:.*]] = constant 25 : index
-// TILE-0n25: loop.for %[[L:.*]] = {{.*}} step %[[C25]]
+// TILE-0n25: scf.for %[[L:.*]] = {{.*}} step %[[C25]]
 // TILE-0n25:   linalg.indexed_generic
 // TILE-0n25:   ^bb0(%[[I:.*]]: index, %[[J:.*]]: index, %[[IN:.*]]: f32, %[[OUT:.*]]: f32):
 // TILE-0n25:     %[[NEW_J:.*]] = addi %[[J]], %[[L]] : index

diff  --git a/mlir/test/Dialect/Linalg/tile_parallel.mlir b/mlir/test/Dialect/Linalg/tile_parallel.mlir
index caca3a0e795e..ef21ef0301d2 100644
--- a/mlir/test/Dialect/Linalg/tile_parallel.mlir
+++ b/mlir/test/Dialect/Linalg/tile_parallel.mlir
@@ -29,8 +29,8 @@ func @sum(%lhs: memref<?x?xf32, offset: ?, strides: [?, 1]>,
 // TILE-2-DAG: [[C1:%.*]] = constant 1 : index
 // TILE-2-DAG: [[C2:%.*]] = constant 2 : index
 // TILE-2: [[LHS_ROWS:%.*]] = dim [[LHS]], 0
-// TILE-2: loop.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_ROWS]]) step ([[C2]]) {
-// TILE-2-NO: loop.parallel
+// TILE-2: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_ROWS]]) step ([[C2]]) {
+// TILE-2-NO: scf.parallel
 // TILE-2:   [[LHS_SUBVIEW:%.*]] = subview [[LHS]]
 // TILE-2:   [[RHS_SUBVIEW:%.*]] = subview [[RHS]]
 // TILE-2:   [[SUM_SUBVIEW:%.*]] = subview [[SUM]]
@@ -42,8 +42,8 @@ func @sum(%lhs: memref<?x?xf32, offset: ?, strides: [?, 1]>,
 // TILE-02-DAG: [[C1:%.*]] = constant 1 : index
 // TILE-02-DAG: [[C2:%.*]] = constant 2 : index
 // TILE-02: [[LHS_COLS:%.*]] = dim [[LHS]], 1
-// TILE-02: loop.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_COLS]]) step ([[C2]]) {
-// TILE-02-NO: loop.parallel
+// TILE-02: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_COLS]]) step ([[C2]]) {
+// TILE-02-NO: scf.parallel
 // TILE-02:   [[LHS_SUBVIEW:%.*]] = subview [[LHS]]
 // TILE-02:   [[RHS_SUBVIEW:%.*]] = subview [[RHS]]
 // TILE-02:   [[SUM_SUBVIEW:%.*]] = subview [[SUM]]
@@ -51,7 +51,7 @@ func @sum(%lhs: memref<?x?xf32, offset: ?, strides: [?, 1]>,
 
 // TILE-002-LABEL: func @sum(
 // TILE-002-SAME:    [[LHS:%.*]]: {{.*}}, [[RHS:%.*]]: {{.*}}, [[SUM:%.*]]: {{.*}}) {
-// TILE-002-NO: loop.parallel
+// TILE-002-NO: scf.parallel
 // TILE-002:   linalg.generic {{.*}} [[LHS]], [[RHS]], [[SUM]] {
 
 // TILE-234-LABEL: func @sum(
@@ -62,8 +62,8 @@ func @sum(%lhs: memref<?x?xf32, offset: ?, strides: [?, 1]>,
 // TILE-234-DAG: [[C3:%.*]] = constant 3 : index
 // TILE-234: [[LHS_ROWS:%.*]] = dim [[LHS]], 0
 // TILE-234: [[LHS_COLS:%.*]] = dim [[LHS]], 1
-// TILE-234: loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) to ([[LHS_ROWS]], [[LHS_COLS]]) step ([[C2]], [[C3]]) {
-// TILE-234-NO: loop.parallel
+// TILE-234: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) to ([[LHS_ROWS]], [[LHS_COLS]]) step ([[C2]], [[C3]]) {
+// TILE-234-NO: scf.parallel
 // TILE-234:   [[LHS_SUBVIEW:%.*]] = subview [[LHS]]
 // TILE-234:   [[RHS_SUBVIEW:%.*]] = subview [[RHS]]
 // TILE-234:   [[SUM_SUBVIEW:%.*]] = subview [[SUM]]

diff  --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir
index f55233628672..ce868d156f6d 100644
--- a/mlir/test/Dialect/Linalg/transform-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir
@@ -23,8 +23,8 @@ func @dot(%x: memref<?xf32, offset: ?, strides: [1]>,
 // CHECK-DAG:     %[[c0:.*]] = constant 0 : index
 // CHECK-DAG:     %[[c1:.*]] = constant 1 : index
 // CHECK-DAG:     %[[c8000:.*]] = constant 8000 : index
-// CHECK:         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c8000]] {
-// CHECK:             loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c1]] {
+// CHECK:         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c8000]] {
+// CHECK:             scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c1]] {
 // CHECK:               load
 // CHECK:               load
 // CHECK:               mulf
@@ -44,7 +44,7 @@ func @matvec(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>,
 // CHECK-DAG:     %[[c0:.*]] = constant 0 : index
 // CHECK-DAG:     %[[c5:.*]] = constant 5 : index
 // CHECK-DAG:     %[[c6:.*]] = constant 6 : index
-// CHECK:         loop.parallel {{.*}} step (%[[c5]], %[[c6]])
+// CHECK:         scf.parallel {{.*}} step (%[[c5]], %[[c6]])
 // CHECK:             linalg.matvec({{.*}}, {{.*}}, {{.*}}) : memref<?x?xf32, #[[STRIDED_2D]]>, memref<?xf32, #[[STRIDED_1D]]>, memref<?xf32, #[[STRIDED_1D]]>
 
 func @matmul(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>,
@@ -69,18 +69,18 @@ func @matmul(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>,
 // CHECK-DAG:     %[[c2000:.*]] = constant 2000 : index
 // CHECK-DAG:     %[[c3000:.*]] = constant 3000 : index
 // CHECK-DAG:     %[[c4000:.*]] = constant 4000 : index
-// CHECK:         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
-// CHECK:           loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
-// CHECK:             loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
-// CHECK:               loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c200]] {
-// CHECK:                 loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c300]] {
-// CHECK:                   loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c400]] {
-// CHECK:                     loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c20]] {
-// CHECK:                       loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c30]] {
-// CHECK:                         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] {
-// CHECK:                           loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2]] {
-// CHECK:                             loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3]] {
-// CHECK:                               loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4]] {
+// CHECK:         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
+// CHECK:           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
+// CHECK:             scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
+// CHECK:               scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c200]] {
+// CHECK:                 scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c300]] {
+// CHECK:                   scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c400]] {
+// CHECK:                     scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c20]] {
+// CHECK:                       scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c30]] {
+// CHECK:                         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] {
+// CHECK:                           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2]] {
+// CHECK:                             scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3]] {
+// CHECK:                               scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4]] {
 // CHECK:                                 linalg.matmul({{.*}}, {{.*}}, {{.*}}) : memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>
 
 #matmul_trait = {
@@ -208,8 +208,8 @@ func @matvec_perm(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>,
 // CHECK-DAG:     %[[c0:.*]] = constant 0 : index
 // CHECK-DAG:     %[[c5:.*]] = constant 5 : index
 // CHECK-DAG:     %[[c6:.*]] = constant 6 : index
-// CHECK:         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c6]]
-// CHECK:           loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c5]]
+// CHECK:         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c6]]
+// CHECK:           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c5]]
 // CHECK:             linalg.matvec({{.*}}, {{.*}}, {{.*}}) : memref<?x?xf32, #[[STRIDED_2D]]>, memref<?xf32, #[[STRIDED_1D]]>, memref<?xf32, #[[STRIDED_1D]]>
 
 func @matmul_perm(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>,
@@ -232,15 +232,15 @@ func @matmul_perm(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>,
 // CHECK-DAG:     %[[c2000:.*]] = constant 2000 : index
 // CHECK-DAG:     %[[c3000:.*]] = constant 3000 : index
 // CHECK-DAG:     %[[c4000:.*]] = constant 4000 : index
-// CHECK:         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
-// CHECK:           loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
-// CHECK:             loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
-// CHECK:               loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c300]] {
-// CHECK:                 loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c200]] {
-// CHECK:                   loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c400]] {
-// CHECK:                     loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c20]] {
-// CHECK:                       loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c30]] {
-// CHECK:                         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] {
+// CHECK:         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
+// CHECK:           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
+// CHECK:             scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
+// CHECK:               scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c300]] {
+// CHECK:                 scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c200]] {
+// CHECK:                   scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c400]] {
+// CHECK:                     scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c20]] {
+// CHECK:                       scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c30]] {
+// CHECK:                         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] {
 // CHECK:                                 linalg.matmul({{.*}}, {{.*}}, {{.*}}) : memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>
 
 func @promote_subview_matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
@@ -254,9 +254,9 @@ func @promote_subview_matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
   %0 = dim %arg0, 0 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   %1 = dim %arg0, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   %2 = dim %arg1, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
-  loop.for %arg3 = %c0 to %0 step %c2000 {
-    loop.for %arg4 = %c0 to %2 step %c3000 {
-      loop.for %arg5 = %c0 to %1 step %c4000 {
+  scf.for %arg3 = %c0 to %0 step %c2000 {
+    scf.for %arg4 = %c0 to %2 step %c3000 {
+      scf.for %arg5 = %c0 to %1 step %c4000 {
         %3 = subview %arg0[%arg3, %arg5][%c2000, %c4000][%c1, %c1] :
              memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %4 = subview %arg1[%arg5, %arg4][%c4000, %c3000][%c1, %c1] :
@@ -273,9 +273,9 @@ func @promote_subview_matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
   return
 }
 // CHECK-LABEL: func @promote_subview_matmul
-// CHECK:         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
-// CHECK:           loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
-// CHECK:             loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
+// CHECK:         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
+// CHECK:           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
+// CHECK:             scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
 // CHECK:               %[[s0:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
 // CHECK:               %[[s1:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
 // CHECK:               %[[s2:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
@@ -304,9 +304,9 @@ func @promote_first_subview_matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?
   %0 = dim %arg0, 0 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   %1 = dim %arg0, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   %2 = dim %arg1, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
-  loop.for %arg3 = %c0 to %0 step %c2000 {
-    loop.for %arg4 = %c0 to %2 step %c3000 {
-      loop.for %arg5 = %c0 to %1 step %c4000 {
+  scf.for %arg3 = %c0 to %0 step %c2000 {
+    scf.for %arg4 = %c0 to %2 step %c3000 {
+      scf.for %arg5 = %c0 to %1 step %c4000 {
         %3 = std.subview %arg0[%arg3, %arg5][%c2000, %c4000][%c1, %c1] :
              memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %4 = std.subview %arg1[%arg5, %arg4][%c4000, %c3000][%c1, %c1] :
@@ -323,9 +323,9 @@ func @promote_first_subview_matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?
   return
 }
 // CHECK-LABEL: func @promote_first_subview_matmul
-// CHECK:   loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
-// CHECK:     loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
-// CHECK:       loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
+// CHECK:   scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
+// CHECK:     scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
+// CHECK:       scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
 // CHECK:         %[[s0:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
 // CHECK:         %[[s1:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
 // CHECK:         %[[s2:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>

diff  --git a/mlir/test/Dialect/SCF/invalid.mlir b/mlir/test/Dialect/SCF/invalid.mlir
index 562e6031e9b7..96813e5bc711 100644
--- a/mlir/test/Dialect/SCF/invalid.mlir
+++ b/mlir/test/Dialect/SCF/invalid.mlir
@@ -2,7 +2,7 @@
 
 func @loop_for_lb(%arg0: f32, %arg1: index) {
   // expected-error at +1 {{operand #0 must be index}}
-  "loop.for"(%arg0, %arg1, %arg1) ({}) : (f32, index, index) -> ()
+  "scf.for"(%arg0, %arg1, %arg1) ({}) : (f32, index, index) -> ()
   return
 }
 
@@ -10,7 +10,7 @@ func @loop_for_lb(%arg0: f32, %arg1: index) {
 
 func @loop_for_ub(%arg0: f32, %arg1: index) {
   // expected-error at +1 {{operand #1 must be index}}
-  "loop.for"(%arg1, %arg0, %arg1) ({}) : (index, f32, index) -> ()
+  "scf.for"(%arg1, %arg0, %arg1) ({}) : (index, f32, index) -> ()
   return
 }
 
@@ -18,7 +18,7 @@ func @loop_for_ub(%arg0: f32, %arg1: index) {
 
 func @loop_for_step(%arg0: f32, %arg1: index) {
   // expected-error at +1 {{operand #2 must be index}}
-  "loop.for"(%arg1, %arg1, %arg0) ({}) : (index, index, f32) -> ()
+  "scf.for"(%arg1, %arg1, %arg0) ({}) : (index, index, f32) -> ()
   return
 }
 
@@ -27,9 +27,9 @@ func @loop_for_step(%arg0: f32, %arg1: index) {
 func @loop_for_step_positive(%arg0: index) {
   // expected-error at +2 {{constant step operand must be positive}}
   %c0 = constant 0 : index
-  "loop.for"(%arg0, %arg0, %c0) ({
+  "scf.for"(%arg0, %arg0, %c0) ({
     ^bb0(%arg1: index):
-      loop.yield
+      scf.yield
   }) : (index, index, index) -> ()
   return
 }
@@ -38,9 +38,9 @@ func @loop_for_step_positive(%arg0: index) {
 
 func @loop_for_one_region(%arg0: index) {
   // expected-error at +1 {{requires one region}}
-  "loop.for"(%arg0, %arg0, %arg0) (
-    {loop.yield},
-    {loop.yield}
+  "scf.for"(%arg0, %arg0, %arg0) (
+    {scf.yield},
+    {scf.yield}
   ) : (index, index, index) -> ()
   return
 }
@@ -49,12 +49,12 @@ func @loop_for_one_region(%arg0: index) {
 
 func @loop_for_single_block(%arg0: index) {
   // expected-error at +1 {{expects region #0 to have 0 or 1 blocks}}
-  "loop.for"(%arg0, %arg0, %arg0) (
+  "scf.for"(%arg0, %arg0, %arg0) (
     {
     ^bb1:
-      loop.yield
+      scf.yield
     ^bb2:
-      loop.yield
+      scf.yield
     }
   ) : (index, index, index) -> ()
   return
@@ -64,10 +64,10 @@ func @loop_for_single_block(%arg0: index) {
 
 func @loop_for_single_index_argument(%arg0: index) {
   // expected-error at +1 {{op expected body first argument to be an index argument for the induction variable}}
-  "loop.for"(%arg0, %arg0, %arg0) (
+  "scf.for"(%arg0, %arg0, %arg0) (
     {
     ^bb0(%i0 : f32):
-      loop.yield
+      scf.yield
     }
   ) : (index, index, index) -> ()
   return
@@ -77,7 +77,7 @@ func @loop_for_single_index_argument(%arg0: index) {
 
 func @loop_if_not_i1(%arg0: index) {
   // expected-error at +1 {{operand #0 must be 1-bit signless integer}}
-  "loop.if"(%arg0) ({}, {}) : (index) -> ()
+  "scf.if"(%arg0) ({}, {}) : (index) -> ()
   return
 }
 
@@ -85,7 +85,7 @@ func @loop_if_not_i1(%arg0: index) {
 
 func @loop_if_more_than_2_regions(%arg0: i1) {
   // expected-error at +1 {{expected 2 regions}}
-  "loop.if"(%arg0) ({}, {}, {}): (i1) -> ()
+  "scf.if"(%arg0) ({}, {}, {}): (i1) -> ()
   return
 }
 
@@ -93,11 +93,11 @@ func @loop_if_more_than_2_regions(%arg0: i1) {
 
 func @loop_if_not_one_block_per_region(%arg0: i1) {
   // expected-error at +1 {{expects region #0 to have 0 or 1 blocks}}
-  "loop.if"(%arg0) ({
+  "scf.if"(%arg0) ({
     ^bb0:
-      loop.yield
+      scf.yield
     ^bb1:
-      loop.yield
+      scf.yield
   }, {}): (i1) -> ()
   return
 }
@@ -106,9 +106,9 @@ func @loop_if_not_one_block_per_region(%arg0: i1) {
 
 func @loop_if_illegal_block_argument(%arg0: i1) {
   // expected-error at +1 {{requires that child entry blocks have no arguments}}
-  "loop.if"(%arg0) ({
+  "scf.if"(%arg0) ({
     ^bb0(%0 : index):
-      loop.yield
+      scf.yield
   }, {}): (i1) -> ()
   return
 }
@@ -117,8 +117,8 @@ func @loop_if_illegal_block_argument(%arg0: i1) {
 
 func @parallel_arguments_
diff erent_tuple_size(
     %arg0: index, %arg1: index, %arg2: index) {
-  // expected-error at +1 {{custom op 'loop.parallel' expected 1 operands}}
-  loop.parallel (%i0) = (%arg0) to (%arg1, %arg2) step () {
+  // expected-error at +1 {{custom op 'scf.parallel' expected 1 operands}}
+  scf.parallel (%i0) = (%arg0) to (%arg1, %arg2) step () {
   }
   return
 }
@@ -127,10 +127,10 @@ func @parallel_arguments_
diff erent_tuple_size(
 
 func @parallel_body_arguments_wrong_type(
     %arg0: index, %arg1: index, %arg2: index) {
-  // expected-error at +1 {{'loop.parallel' op expects arguments for the induction variable to be of index type}}
-  "loop.parallel"(%arg0, %arg1, %arg2) ({
+  // expected-error at +1 {{'scf.parallel' op expects arguments for the induction variable to be of index type}}
+  "scf.parallel"(%arg0, %arg1, %arg2) ({
     ^bb0(%i0: f32):
-      loop.yield
+      scf.yield
   }) {operand_segment_sizes = dense<[1, 1, 1, 0]>: vector<4xi32>}: (index, index, index) -> ()
   return
 }
@@ -139,10 +139,10 @@ func @parallel_body_arguments_wrong_type(
 
 func @parallel_body_wrong_number_of_arguments(
     %arg0: index, %arg1: index, %arg2: index) {
-  // expected-error at +1 {{'loop.parallel' op expects the same number of induction variables: 2 as bound and step values: 1}}
-  "loop.parallel"(%arg0, %arg1, %arg2) ({
+  // expected-error at +1 {{'scf.parallel' op expects the same number of induction variables: 2 as bound and step values: 1}}
+  "scf.parallel"(%arg0, %arg1, %arg2) ({
     ^bb0(%i0: index, %i1: index):
-      loop.yield
+      scf.yield
   }) {operand_segment_sizes = dense<[1, 1, 1, 0]>: vector<4xi32>}: (index, index, index) -> ()
   return
 }
@@ -150,8 +150,8 @@ func @parallel_body_wrong_number_of_arguments(
 // -----
 
 func @parallel_no_tuple_elements() {
-  // expected-error at +1 {{'loop.parallel' op needs at least one tuple element for lowerBound, upperBound and step}}
-  loop.parallel () = () to () step () {
+  // expected-error at +1 {{'scf.parallel' op needs at least one tuple element for lowerBound, upperBound and step}}
+  scf.parallel () = () to () step () {
   }
   return
 }
@@ -163,7 +163,7 @@ func @parallel_step_not_positive(
   // expected-error at +3 {{constant step operand must be positive}}
   %c0 = constant 1 : index
   %c1 = constant 0 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%c0, %c1) {
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%c0, %c1) {
   }
   return
 }
@@ -173,11 +173,11 @@ func @parallel_step_not_positive(
 func @parallel_fewer_results_than_reduces(
     %arg0 : index, %arg1: index, %arg2: index) {
   // expected-error at +1 {{expects number of results: 0 to be the same as number of reductions: 1}}
-  loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
+  scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
     %c0 = constant 1.0 : f32
-    loop.reduce(%c0) : f32 {
+    scf.reduce(%c0) : f32 {
       ^bb0(%lhs: f32, %rhs: f32):
-        loop.reduce.return %lhs : f32
+        scf.reduce.return %lhs : f32
     }
   }
   return
@@ -189,7 +189,7 @@ func @parallel_more_results_than_reduces(
     %arg0 : index, %arg1 : index, %arg2 : index) {
   // expected-error at +2 {{expects number of results: 1 to be the same as number of reductions: 0}}
   %zero = constant 1.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) init (%zero) -> f32 {
+  %res = scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) init (%zero) -> f32 {
   }
 
   return
@@ -200,10 +200,10 @@ func @parallel_more_results_than_reduces(
 func @parallel_more_results_than_initial_values(
     %arg0 : index, %arg1: index, %arg2: index) {
   // expected-error at +1 {{expects number of results: 1 to be the same as number of initial values: 0}}
-  %res = loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) -> f32 {
-    loop.reduce(%arg0) : index {
+  %res = scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) -> f32 {
+    scf.reduce(%arg0) : index {
       ^bb0(%lhs: index, %rhs: index):
-        loop.reduce.return %lhs : index
+        scf.reduce.return %lhs : index
     }
   }
 }
@@ -213,12 +213,12 @@ func @parallel_more_results_than_initial_values(
 func @parallel_
diff erent_types_of_results_and_reduces(
     %arg0 : index, %arg1: index, %arg2: index) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg1)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg1)
                                        step (%arg2) init (%zero) -> f32 {
     // expected-error at +1 {{expects type of reduce: 'index' to be the same as result type: 'f32'}}
-    loop.reduce(%arg0) : index {
+    scf.reduce(%arg0) : index {
       ^bb0(%lhs: index, %rhs: index):
-        loop.reduce.return %lhs : index
+        scf.reduce.return %lhs : index
     }
   }
   return
@@ -227,10 +227,10 @@ func @parallel_
diff erent_types_of_results_and_reduces(
 // -----
 
 func @top_level_reduce(%arg0 : f32) {
-  // expected-error at +1 {{expects parent op 'loop.parallel'}}
-  loop.reduce(%arg0) : f32 {
+  // expected-error at +1 {{expects parent op 'scf.parallel'}}
+  scf.reduce(%arg0) : f32 {
     ^bb0(%lhs : f32, %rhs : f32):
-      loop.reduce.return %lhs : f32
+      scf.reduce.return %lhs : f32
   }
   return
 }
@@ -239,10 +239,10 @@ func @top_level_reduce(%arg0 : f32) {
 
 func @reduce_empty_block(%arg0 : index, %arg1 : f32) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg0)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg0)
                                        step (%arg0) init (%zero) -> f32 {
     // expected-error at +1 {{the block inside reduce should not be empty}}
-    loop.reduce(%arg1) : f32 {
+    scf.reduce(%arg1) : f32 {
       ^bb0(%lhs : f32, %rhs : f32):
     }
   }
@@ -253,12 +253,12 @@ func @reduce_empty_block(%arg0 : index, %arg1 : f32) {
 
 func @reduce_too_many_args(%arg0 : index, %arg1 : f32) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg0)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg0)
                                        step (%arg0) init (%zero) -> f32 {
     // expected-error at +1 {{expects two arguments to reduce block of type 'f32'}}
-    loop.reduce(%arg1) : f32 {
+    scf.reduce(%arg1) : f32 {
       ^bb0(%lhs : f32, %rhs : f32, %other : f32):
-        loop.reduce.return %lhs : f32
+        scf.reduce.return %lhs : f32
     }
   }
   return
@@ -268,12 +268,12 @@ func @reduce_too_many_args(%arg0 : index, %arg1 : f32) {
 
 func @reduce_wrong_args(%arg0 : index, %arg1 : f32) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg0)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg0)
                                        step (%arg0) init (%zero) -> f32 {
     // expected-error at +1 {{expects two arguments to reduce block of type 'f32'}}
-    loop.reduce(%arg1) : f32 {
+    scf.reduce(%arg1) : f32 {
       ^bb0(%lhs : f32, %rhs : i32):
-        loop.reduce.return %lhs : f32
+        scf.reduce.return %lhs : f32
     }
   }
   return
@@ -284,12 +284,12 @@ func @reduce_wrong_args(%arg0 : index, %arg1 : f32) {
 
 func @reduce_wrong_terminator(%arg0 : index, %arg1 : f32) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg0)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg0)
                                        step (%arg0) init (%zero) -> f32 {
-    // expected-error at +1 {{the block inside reduce should be terminated with a 'loop.reduce.return' op}}
-    loop.reduce(%arg1) : f32 {
+    // expected-error at +1 {{the block inside reduce should be terminated with a 'scf.reduce.return' op}}
+    scf.reduce(%arg1) : f32 {
       ^bb0(%lhs : f32, %rhs : f32):
-        loop.yield
+        scf.yield
     }
   }
   return
@@ -299,13 +299,13 @@ func @reduce_wrong_terminator(%arg0 : index, %arg1 : f32) {
 
 func @reduceReturn_wrong_type(%arg0 : index, %arg1: f32) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg0)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg0)
                                        step (%arg0) init (%zero) -> f32 {
-    loop.reduce(%arg1) : f32 {
+    scf.reduce(%arg1) : f32 {
       ^bb0(%lhs : f32, %rhs : f32):
         %c0 = constant 1 : index
         // expected-error at +1 {{needs to have type 'f32' (the type of the enclosing ReduceOp)}}
-        loop.reduce.return %c0 : index
+        scf.reduce.return %c0 : index
     }
   }
   return
@@ -315,8 +315,8 @@ func @reduceReturn_wrong_type(%arg0 : index, %arg1: f32) {
 
 func @reduceReturn_not_inside_reduce(%arg0 : f32) {
   "foo.region"() ({
-    // expected-error at +1 {{expects parent op 'loop.reduce'}}
-    loop.reduce.return %arg0 : f32
+    // expected-error at +1 {{expects parent op 'scf.reduce'}}
+    scf.reduce.return %arg0 : f32
   }): () -> ()
   return
 }
@@ -325,13 +325,13 @@ func @reduceReturn_not_inside_reduce(%arg0 : f32) {
 
 func @std_if_incorrect_yield(%arg0: i1, %arg1: f32)
 {
-  %x, %y = loop.if %arg0 -> (f32, f32) {
+  %x, %y = scf.if %arg0 -> (f32, f32) {
     %0 = addf %arg1, %arg1 : f32
     // expected-error at +1 {{parent of yield must have same number of results as the yield operands}}
-    loop.yield %0 : f32
+    scf.yield %0 : f32
   } else {
     %0 = subf %arg1, %arg1 : f32
-    loop.yield %0 : f32
+    scf.yield %0 : f32
   }
   return
 }
@@ -341,9 +341,9 @@ func @std_if_incorrect_yield(%arg0: i1, %arg1: f32)
 func @std_if_missing_else(%arg0: i1, %arg1: f32)
 {
   // expected-error at +1 {{must have an else block if defining values}}
-  %x = loop.if %arg0 -> (f32) {
+  %x = scf.if %arg0 -> (f32) {
     %0 = addf %arg1, %arg1 : f32
-    loop.yield %0 : f32
+    scf.yield %0 : f32
   }
   return
 }
@@ -354,11 +354,11 @@ func @std_for_operands_mismatch(%arg0 : index, %arg1 : index, %arg2 : index) {
   %s0 = constant 0.0 : f32
   %t0 = constant 1 : i32
   // expected-error at +1 {{mismatch in number of loop-carried values and defined values}}
-  %result1:3 = loop.for %i0 = %arg0 to %arg1 step %arg2
+  %result1:3 = scf.for %i0 = %arg0 to %arg1 step %arg2
                     iter_args(%si = %s0, %ti = %t0) -> (f32, i32, f32) {
     %sn = addf %si, %si : f32
     %tn = addi %ti, %ti : i32
-    loop.yield %sn, %tn, %sn : f32, i32, f32
+    scf.yield %sn, %tn, %sn : f32, i32, f32
   }
   return
 }
@@ -370,12 +370,12 @@ func @std_for_operands_mismatch_2(%arg0 : index, %arg1 : index, %arg2 : index) {
   %t0 = constant 1 : i32
   %u0 = constant 1.0 : f32
   // expected-error at +1 {{mismatch in number of loop-carried values and defined values}}
-  %result1:2 = loop.for %i0 = %arg0 to %arg1 step %arg2
+  %result1:2 = scf.for %i0 = %arg0 to %arg1 step %arg2
                     iter_args(%si = %s0, %ti = %t0, %ui = %u0) -> (f32, i32) {
     %sn = addf %si, %si : f32
     %tn = addi %ti, %ti : i32
     %un = subf %ui, %ui : f32
-    loop.yield %sn, %tn, %un : f32, i32, f32
+    scf.yield %sn, %tn, %un : f32, i32, f32
   }
   return
 }
@@ -387,11 +387,11 @@ func @std_for_operands_mismatch_3(%arg0 : index, %arg1 : index, %arg2 : index) {
   %s0 = constant 0.0 : f32
   %t0 = constant 1.0 : f32
   // expected-error at +2 {{expects 
diff erent type than prior uses: 'i32' vs 'f32'}}
-  %result1:2 = loop.for %i0 = %arg0 to %arg1 step %arg2
+  %result1:2 = scf.for %i0 = %arg0 to %arg1 step %arg2
                     iter_args(%si = %s0, %ti = %t0) -> (i32, i32) {
     %sn = addf %si, %si : i32
     %tn = addf %ti, %ti : i32
-    loop.yield %sn, %tn : i32, i32
+    scf.yield %sn, %tn : i32, i32
   }
   return
 }
@@ -400,10 +400,10 @@ func @std_for_operands_mismatch_3(%arg0 : index, %arg1 : index, %arg2 : index) {
 
 func @parallel_invalid_yield(
     %arg0: index, %arg1: index, %arg2: index) {
-  loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
+  scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
     %c0 = constant 1.0 : f32
-    // expected-error at +1 {{yield inside loop.parallel is not allowed to have operands}}
-    loop.yield %c0 : f32
+    // expected-error at +1 {{yield inside scf.parallel is not allowed to have operands}}
+    scf.yield %c0 : f32
   }
   return
 }

diff  --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir
index fa3ebc173e51..775188bf0ed9 100644
--- a/mlir/test/Dialect/SCF/loop-unroll.mlir
+++ b/mlir/test/Dialect/SCF/loop-unroll.mlir
@@ -6,7 +6,7 @@
 func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
                           %arg3: memref<?xf32>) {
   %0 = constant 7.0 : f32
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
     store %0, %arg3[%i0] : memref<?xf32>
   }
   return
@@ -32,14 +32,14 @@ func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
 //   UNROLL-BY-2-DAG:  %[[V7:.*]] = addi %[[LB]], %[[V6]] : index
 //       Compute step of unrolled loop in V8.
 //   UNROLL-BY-2-DAG:  %[[V8:.*]] = muli %[[STEP]], %[[C2]] : index
-//       UNROLL-BY-2:  loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
+//       UNROLL-BY-2:  scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
 //  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-2-NEXT:    %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
 //  UNROLL-BY-2-NEXT:    %[[V10:.*]] = addi %[[IV]], %[[V9]] : index
 //  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:  }
-//  UNROLL-BY-2-NEXT:  loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
+//  UNROLL-BY-2-NEXT:  scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
 //  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:  }
 //  UNROLL-BY-2-NEXT:  return
@@ -65,7 +65,7 @@ func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
 //   UNROLL-BY-3-DAG:  %[[V7:.*]] = addi %[[LB]], %[[V6]] : index
 //       Compute step of unrolled loop in V8.
 //   UNROLL-BY-3-DAG:  %[[V8:.*]] = muli %[[STEP]], %[[C3]] : index
-//       UNROLL-BY-3:  loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
+//       UNROLL-BY-3:  scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-3-NEXT:    %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
@@ -76,7 +76,7 @@ func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
 //  UNROLL-BY-3-NEXT:    %[[V12:.*]] = addi %[[IV]], %[[V11]] : index
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V12]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
-//  UNROLL-BY-3-NEXT:  loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
+//  UNROLL-BY-3-NEXT:  scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
 //  UNROLL-BY-3-NEXT:  return
@@ -85,8 +85,8 @@ func @dynamic_loop_unroll_outer_by_2(
   %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
   %arg5 : index, %arg6: memref<?xf32>) {
   %0 = constant 7.0 : f32
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
-    loop.for %i1 = %arg3 to %arg4 step %arg5 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
+    scf.for %i1 = %arg3 to %arg4 step %arg5 {
      store %0, %arg6[%i1] : memref<?xf32>
     }
   }
@@ -101,16 +101,16 @@ func @dynamic_loop_unroll_outer_by_2(
 //  UNROLL-OUTER-BY-2-SAME:  %[[STEP1:.*5]]: index,
 //  UNROLL-OUTER-BY-2-SAME:  %[[MEM:.*6]]: memref<?xf32>
 //
-//       UNROLL-OUTER-BY-2:  loop.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} {
-//  UNROLL-OUTER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+//       UNROLL-OUTER-BY-2:  scf.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} {
+//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
 //  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-OUTER-BY-2-NEXT:    }
-//  UNROLL-OUTER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
 //  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-OUTER-BY-2-NEXT:    }
 //  UNROLL-OUTER-BY-2-NEXT:  }
-//  UNROLL-OUTER-BY-2-NEXT:  loop.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] {
-//  UNROLL-OUTER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+//  UNROLL-OUTER-BY-2-NEXT:  scf.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] {
+//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
 //  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-OUTER-BY-2-NEXT:    }
 //  UNROLL-OUTER-BY-2-NEXT:  }
@@ -120,8 +120,8 @@ func @dynamic_loop_unroll_inner_by_2(
   %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
   %arg5 : index, %arg6: memref<?xf32>) {
   %0 = constant 7.0 : f32
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
-    loop.for %i1 = %arg3 to %arg4 step %arg5 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
+    scf.for %i1 = %arg3 to %arg4 step %arg5 {
      store %0, %arg6[%i1] : memref<?xf32>
     }
   }
@@ -136,15 +136,15 @@ func @dynamic_loop_unroll_inner_by_2(
 //  UNROLL-INNER-BY-2-SAME:  %[[STEP1:.*5]]: index,
 //  UNROLL-INNER-BY-2-SAME:  %[[MEM:.*6]]: memref<?xf32>
 //
-//       UNROLL-INNER-BY-2:  loop.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
-//       UNROLL-INNER-BY-2:    loop.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} {
+//       UNROLL-INNER-BY-2:  scf.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
+//       UNROLL-INNER-BY-2:    scf.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} {
 //  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-INNER-BY-2-NEXT:      %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-INNER-BY-2-NEXT:      %[[V0:.*]] = muli %[[STEP1]], %[[C1_IV]] : index
 //  UNROLL-INNER-BY-2-NEXT:      %[[V1:.*]] = addi %[[IV1]], %[[V0]] : index
 //  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
 //  UNROLL-INNER-BY-2-NEXT:    }
-//  UNROLL-INNER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] {
+//  UNROLL-INNER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] {
 //  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-INNER-BY-2-NEXT:    }
 //  UNROLL-INNER-BY-2-NEXT:  }
@@ -157,7 +157,7 @@ func @static_loop_unroll_by_2(%arg0 : memref<?xf32>) {
   %lb = constant 0 : index
   %ub = constant 20 : index
   %step = constant 1 : index
-  loop.for %i0 = %lb to %ub step %step {
+  scf.for %i0 = %lb to %ub step %step {
     store %0, %arg0[%i0] : memref<?xf32>
   }
   return
@@ -169,7 +169,7 @@ func @static_loop_unroll_by_2(%arg0 : memref<?xf32>) {
 //   UNROLL-BY-2-DAG:  %[[C1:.*]] = constant 1 : index
 //   UNROLL-BY-2-DAG:  %[[C20:.*]] = constant 20 : index
 //   UNROLL-BY-2-DAG:  %[[C2:.*]] = constant 2 : index
-//   UNROLL-BY-2:  loop.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] {
+//   UNROLL-BY-2:  scf.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] {
 //  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-2-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
@@ -185,7 +185,7 @@ func @static_loop_unroll_by_3(%arg0 : memref<?xf32>) {
   %lb = constant 0 : index
   %ub = constant 20 : index
   %step = constant 1 : index
-  loop.for %i0 = %lb to %ub step %step {
+  scf.for %i0 = %lb to %ub step %step {
     store %0, %arg0[%i0] : memref<?xf32>
   }
   return
@@ -199,7 +199,7 @@ func @static_loop_unroll_by_3(%arg0 : memref<?xf32>) {
 //   UNROLL-BY-3-DAG:  %[[C20:.*]] = constant 20 : index
 //   UNROLL-BY-3-DAG:  %[[C18:.*]] = constant 18 : index
 //   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
-//       UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] {
+//       UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] {
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-3-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
@@ -210,7 +210,7 @@ func @static_loop_unroll_by_3(%arg0 : memref<?xf32>) {
 //  UNROLL-BY-3-NEXT:    %[[V3:.*]] = addi %[[IV]], %[[V2]] : index
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
-//  UNROLL-BY-3-NEXT:  loop.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] {
+//  UNROLL-BY-3-NEXT:  scf.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] {
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
 //  UNROLL-BY-3-NEXT:  return
@@ -222,7 +222,7 @@ func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref<?xf32>) {
   %lb = constant 0 : index
   %ub = constant 10 : index
   %step = constant 1 : index
-  loop.for %i0 = %lb to %ub step %step {
+  scf.for %i0 = %lb to %ub step %step {
     store %0, %arg0[%i0] : memref<?xf32>
   }
   return
@@ -235,7 +235,7 @@ func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref<?xf32>) {
 //   UNROLL-BY-3-DAG:  %[[C10:.*]] = constant 10 : index
 //   UNROLL-BY-3-DAG:  %[[C9:.*]] = constant 9 : index
 //   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
-//       UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] {
+//       UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] {
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-3-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index

diff  --git a/mlir/test/Dialect/SCF/ops.mlir b/mlir/test/Dialect/SCF/ops.mlir
index 881feb46ead4..c21451d8cf7f 100644
--- a/mlir/test/Dialect/SCF/ops.mlir
+++ b/mlir/test/Dialect/SCF/ops.mlir
@@ -5,39 +5,39 @@
 // RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s --dump-input-on-failure
 
 func @std_for(%arg0 : index, %arg1 : index, %arg2 : index) {
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
-    loop.for %i1 = %arg0 to %arg1 step %arg2 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
+    scf.for %i1 = %arg0 to %arg1 step %arg2 {
       %min_cmp = cmpi "slt", %i0, %i1 : index
       %min = select %min_cmp, %i0, %i1 : index
       %max_cmp = cmpi "sge", %i0, %i1 : index
       %max = select %max_cmp, %i0, %i1 : index
-      loop.for %i2 = %min to %max step %i1 {
+      scf.for %i2 = %min to %max step %i1 {
       }
     }
   }
   return
 }
 // CHECK-LABEL: func @std_for(
-//  CHECK-NEXT:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//  CHECK-NEXT:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//  CHECK-NEXT:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //  CHECK-NEXT:       %{{.*}} = cmpi "slt", %{{.*}}, %{{.*}} : index
 //  CHECK-NEXT:       %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : index
 //  CHECK-NEXT:       %{{.*}} = cmpi "sge", %{{.*}}, %{{.*}} : index
 //  CHECK-NEXT:       %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : index
-//  CHECK-NEXT:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//  CHECK-NEXT:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 
 func @std_if(%arg0: i1, %arg1: f32) {
-  loop.if %arg0 {
+  scf.if %arg0 {
     %0 = addf %arg1, %arg1 : f32
   }
   return
 }
 // CHECK-LABEL: func @std_if(
-//  CHECK-NEXT:   loop.if %{{.*}} {
+//  CHECK-NEXT:   scf.if %{{.*}} {
 //  CHECK-NEXT:     %{{.*}} = addf %{{.*}}, %{{.*}} : f32
 
 func @std_if_else(%arg0: i1, %arg1: f32) {
-  loop.if %arg0 {
+  scf.if %arg0 {
     %0 = addf %arg1, %arg1 : f32
   } else {
     %1 = addf %arg1, %arg1 : f32
@@ -45,7 +45,7 @@ func @std_if_else(%arg0: i1, %arg1: f32) {
   return
 }
 // CHECK-LABEL: func @std_if_else(
-//  CHECK-NEXT:   loop.if %{{.*}} {
+//  CHECK-NEXT:   scf.if %{{.*}} {
 //  CHECK-NEXT:     %{{.*}} = addf %{{.*}}, %{{.*}} : f32
 //  CHECK-NEXT:   } else {
 //  CHECK-NEXT:     %{{.*}} = addf %{{.*}}, %{{.*}} : f32
@@ -53,7 +53,7 @@ func @std_if_else(%arg0: i1, %arg1: f32) {
 func @std_parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                         %arg3 : index, %arg4 : index) {
   %step = constant 1 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%arg4, %step) {
     %min_cmp = cmpi "slt", %i0, %i1 : index
     %min = select %min_cmp, %i0, %i1 : index
@@ -61,19 +61,19 @@ func @std_parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
     %max = select %max_cmp, %i0, %i1 : index
     %zero = constant 0.0 : f32
     %int_zero = constant 0 : i32
-    %red:2 = loop.parallel (%i2) = (%min) to (%max) step (%i1)
+    %red:2 = scf.parallel (%i2) = (%min) to (%max) step (%i1)
                                       init (%zero, %int_zero) -> (f32, i32) {
       %one = constant 1.0 : f32
-      loop.reduce(%one) : f32 {
+      scf.reduce(%one) : f32 {
         ^bb0(%lhs : f32, %rhs: f32):
           %res = addf %lhs, %rhs : f32
-          loop.reduce.return %res : f32
+          scf.reduce.return %res : f32
       }
       %int_one = constant 1 : i32
-      loop.reduce(%int_one) : i32 {
+      scf.reduce(%int_one) : i32 {
         ^bb0(%lhs : i32, %rhs: i32):
           %res = muli %lhs, %rhs : i32
-          loop.reduce.return %res : i32
+          scf.reduce.return %res : i32
       }
     }
   }
@@ -86,7 +86,7 @@ func @std_parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
 //  CHECK-SAME: %[[ARG3:[A-Za-z0-9]+]]:
 //  CHECK-SAME: %[[ARG4:[A-Za-z0-9]+]]:
 //       CHECK:   %[[STEP:.*]] = constant 1 : index
-//  CHECK-NEXT:   loop.parallel (%[[I0:.*]], %[[I1:.*]]) = (%[[ARG0]], %[[ARG1]]) to
+//  CHECK-NEXT:   scf.parallel (%[[I0:.*]], %[[I1:.*]]) = (%[[ARG0]], %[[ARG1]]) to
 //       CHECK:   (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[STEP]]) {
 //  CHECK-NEXT:     %[[MIN_CMP:.*]] = cmpi "slt", %[[I0]], %[[I1]] : index
 //  CHECK-NEXT:     %[[MIN:.*]] = select %[[MIN_CMP]], %[[I0]], %[[I1]] : index
@@ -94,29 +94,29 @@ func @std_parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
 //  CHECK-NEXT:     %[[MAX:.*]] = select %[[MAX_CMP]], %[[I0]], %[[I1]] : index
 //  CHECK-NEXT:     %[[ZERO:.*]] = constant 0.000000e+00 : f32
 //  CHECK-NEXT:     %[[INT_ZERO:.*]] = constant 0 : i32
-//  CHECK-NEXT:     loop.parallel (%{{.*}}) = (%[[MIN]]) to (%[[MAX]])
+//  CHECK-NEXT:     scf.parallel (%{{.*}}) = (%[[MIN]]) to (%[[MAX]])
 //  CHECK-SAME:          step (%[[I1]])
 //  CHECK-SAME:          init (%[[ZERO]], %[[INT_ZERO]]) -> (f32, i32) {
 //  CHECK-NEXT:       %[[ONE:.*]] = constant 1.000000e+00 : f32
-//  CHECK-NEXT:       loop.reduce(%[[ONE]]) : f32 {
+//  CHECK-NEXT:       scf.reduce(%[[ONE]]) : f32 {
 //  CHECK-NEXT:       ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32):
 //  CHECK-NEXT:         %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32
-//  CHECK-NEXT:         loop.reduce.return %[[RES]] : f32
+//  CHECK-NEXT:         scf.reduce.return %[[RES]] : f32
 //  CHECK-NEXT:       }
 //  CHECK-NEXT:       %[[INT_ONE:.*]] = constant 1 : i32
-//  CHECK-NEXT:       loop.reduce(%[[INT_ONE]]) : i32 {
+//  CHECK-NEXT:       scf.reduce(%[[INT_ONE]]) : i32 {
 //  CHECK-NEXT:       ^bb0(%[[LHS:.*]]: i32, %[[RHS:.*]]: i32):
 //  CHECK-NEXT:         %[[RES:.*]] = muli %[[LHS]], %[[RHS]] : i32
-//  CHECK-NEXT:         loop.reduce.return %[[RES]] : i32
+//  CHECK-NEXT:         scf.reduce.return %[[RES]] : i32
 //  CHECK-NEXT:       }
-//  CHECK-NEXT:       loop.yield
+//  CHECK-NEXT:       scf.yield
 //  CHECK-NEXT:     }
-//  CHECK-NEXT:     loop.yield
+//  CHECK-NEXT:     scf.yield
 
 func @parallel_explicit_yield(
     %arg0: index, %arg1: index, %arg2: index) {
-  loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
-    loop.yield
+  scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
+    scf.yield
   }
   return
 }
@@ -125,43 +125,43 @@ func @parallel_explicit_yield(
 //  CHECK-SAME: %[[ARG0:[A-Za-z0-9]+]]:
 //  CHECK-SAME: %[[ARG1:[A-Za-z0-9]+]]:
 //  CHECK-SAME: %[[ARG2:[A-Za-z0-9]+]]:
-//  CHECK-NEXT: loop.parallel (%{{.*}}) = (%[[ARG0]]) to (%[[ARG1]]) step (%[[ARG2]])
-//  CHECK-NEXT: loop.yield
+//  CHECK-NEXT: scf.parallel (%{{.*}}) = (%[[ARG0]]) to (%[[ARG1]]) step (%[[ARG2]])
+//  CHECK-NEXT: scf.yield
 //  CHECK-NEXT: }
 //  CHECK-NEXT: return
 //  CHECK-NEXT: }
 
 func @std_if_yield(%arg0: i1, %arg1: f32)
 {
-  %x, %y = loop.if %arg0 -> (f32, f32) {
+  %x, %y = scf.if %arg0 -> (f32, f32) {
     %0 = addf %arg1, %arg1 : f32
     %1 = subf %arg1, %arg1 : f32
-    loop.yield %0, %1 : f32, f32
+    scf.yield %0, %1 : f32, f32
   } else {
     %0 = subf %arg1, %arg1 : f32
     %1 = addf %arg1, %arg1 : f32
-    loop.yield %0, %1 : f32, f32
+    scf.yield %0, %1 : f32, f32
   }
   return
 }
 // CHECK-LABEL: func @std_if_yield(
 //  CHECK-SAME: %[[ARG0:[A-Za-z0-9]+]]:
 //  CHECK-SAME: %[[ARG1:[A-Za-z0-9]+]]:
-//  CHECK-NEXT: %{{.*}}:2 = loop.if %[[ARG0]] -> (f32, f32) {
+//  CHECK-NEXT: %{{.*}}:2 = scf.if %[[ARG0]] -> (f32, f32) {
 //  CHECK-NEXT: %[[T1:.*]] = addf %[[ARG1]], %[[ARG1]]
 //  CHECK-NEXT: %[[T2:.*]] = subf %[[ARG1]], %[[ARG1]]
-//  CHECK-NEXT: loop.yield %[[T1]], %[[T2]] : f32, f32
+//  CHECK-NEXT: scf.yield %[[T1]], %[[T2]] : f32, f32
 //  CHECK-NEXT: } else {
 //  CHECK-NEXT: %[[T3:.*]] = subf %[[ARG1]], %[[ARG1]]
 //  CHECK-NEXT: %[[T4:.*]] = addf %[[ARG1]], %[[ARG1]]
-//  CHECK-NEXT: loop.yield %[[T3]], %[[T4]] : f32, f32
+//  CHECK-NEXT: scf.yield %[[T3]], %[[T4]] : f32, f32
 //  CHECK-NEXT: }
 
 func @std_for_yield(%arg0 : index, %arg1 : index, %arg2 : index) {
   %s0 = constant 0.0 : f32
-  %result = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (f32) {
+  %result = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (f32) {
     %sn = addf %si, %si : f32
-    loop.yield %sn : f32
+    scf.yield %sn : f32
   }
   return
 }
@@ -170,10 +170,10 @@ func @std_for_yield(%arg0 : index, %arg1 : index, %arg2 : index) {
 // CHECK-SAME: %[[ARG1:[A-Za-z0-9]+]]:
 // CHECK-SAME: %[[ARG2:[A-Za-z0-9]+]]:
 // CHECK-NEXT: %[[INIT:.*]] = constant
-// CHECK-NEXT: %{{.*}} = loop.for %{{.*}} = %[[ARG0]] to %[[ARG1]] step %[[ARG2]]
+// CHECK-NEXT: %{{.*}} = scf.for %{{.*}} = %[[ARG0]] to %[[ARG1]] step %[[ARG2]]
 // CHECK-SAME: iter_args(%[[ITER:.*]] = %[[INIT]]) -> (f32) {
 // CHECK-NEXT: %[[NEXT:.*]] = addf %[[ITER]], %[[ITER]] : f32
-// CHECK-NEXT: loop.yield %[[NEXT]] : f32
+// CHECK-NEXT: scf.yield %[[NEXT]] : f32
 // CHECK-NEXT: }
 
 
@@ -181,11 +181,11 @@ func @std_for_yield_multi(%arg0 : index, %arg1 : index, %arg2 : index) {
   %s0 = constant 0.0 : f32
   %t0 = constant 1 : i32
   %u0 = constant 1.0 : f32
-  %result1:3 = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0, %ti = %t0, %ui = %u0) -> (f32, i32, f32) {
+  %result1:3 = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0, %ti = %t0, %ui = %u0) -> (f32, i32, f32) {
     %sn = addf %si, %si : f32
     %tn = addi %ti, %ti : i32
     %un = subf %ui, %ui : f32
-    loop.yield %sn, %tn, %un : f32, i32, f32
+    scf.yield %sn, %tn, %un : f32, i32, f32
   }
   return
 }
@@ -196,27 +196,27 @@ func @std_for_yield_multi(%arg0 : index, %arg1 : index, %arg2 : index) {
 // CHECK-NEXT: %[[INIT1:.*]] = constant
 // CHECK-NEXT: %[[INIT2:.*]] = constant
 // CHECK-NEXT: %[[INIT3:.*]] = constant
-// CHECK-NEXT: %{{.*}}:3 = loop.for %{{.*}} = %[[ARG0]] to %[[ARG1]] step %[[ARG2]]
+// CHECK-NEXT: %{{.*}}:3 = scf.for %{{.*}} = %[[ARG0]] to %[[ARG1]] step %[[ARG2]]
 // CHECK-SAME: iter_args(%[[ITER1:.*]] = %[[INIT1]], %[[ITER2:.*]] = %[[INIT2]], %[[ITER3:.*]] = %[[INIT3]]) -> (f32, i32, f32) {
 // CHECK-NEXT: %[[NEXT1:.*]] = addf %[[ITER1]], %[[ITER1]] : f32
 // CHECK-NEXT: %[[NEXT2:.*]] = addi %[[ITER2]], %[[ITER2]] : i32
 // CHECK-NEXT: %[[NEXT3:.*]] = subf %[[ITER3]], %[[ITER3]] : f32
-// CHECK-NEXT: loop.yield %[[NEXT1]], %[[NEXT2]], %[[NEXT3]] : f32, i32, f32
+// CHECK-NEXT: scf.yield %[[NEXT1]], %[[NEXT2]], %[[NEXT3]] : f32, i32, f32
 
 
 func @conditional_reduce(%buffer: memref<1024xf32>, %lb: index, %ub: index, %step: index) -> (f32) {
   %sum_0 = constant 0.0 : f32
   %c0 = constant 0.0 : f32
-  %sum = loop.for %iv = %lb to %ub step %step iter_args(%sum_iter = %sum_0) -> (f32) {
+  %sum = scf.for %iv = %lb to %ub step %step iter_args(%sum_iter = %sum_0) -> (f32) {
 	  %t = load %buffer[%iv] : memref<1024xf32>
 	  %cond = cmpf "ugt", %t, %c0 : f32
-	  %sum_next = loop.if %cond -> (f32) {
+	  %sum_next = scf.if %cond -> (f32) {
 	    %new_sum = addf %sum_iter, %t : f32
-      loop.yield %new_sum : f32
+      scf.yield %new_sum : f32
 	  } else {
-  		loop.yield %sum_iter : f32
+  		scf.yield %sum_iter : f32
 	  }
-    loop.yield %sum_next : f32
+    scf.yield %sum_next : f32
   }
   return %sum : f32
 }
@@ -227,16 +227,16 @@ func @conditional_reduce(%buffer: memref<1024xf32>, %lb: index, %ub: index, %ste
 //  CHECK-SAME: %[[ARG3:[A-Za-z0-9]+]]
 //  CHECK-NEXT: %[[INIT:.*]] = constant
 //  CHECK-NEXT: %[[ZERO:.*]] = constant
-//  CHECK-NEXT: %[[RESULT:.*]] = loop.for %[[IV:.*]] = %[[ARG1]] to %[[ARG2]] step %[[ARG3]]
+//  CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[ARG1]] to %[[ARG2]] step %[[ARG3]]
 //  CHECK-SAME: iter_args(%[[ITER:.*]] = %[[INIT]]) -> (f32) {
 //  CHECK-NEXT: %[[T:.*]] = load %[[ARG0]][%[[IV]]]
 //  CHECK-NEXT: %[[COND:.*]] = cmpf "ugt", %[[T]], %[[ZERO]]
-//  CHECK-NEXT: %[[IFRES:.*]] = loop.if %[[COND]] -> (f32) {
+//  CHECK-NEXT: %[[IFRES:.*]] = scf.if %[[COND]] -> (f32) {
 //  CHECK-NEXT: %[[THENRES:.*]] = addf %[[ITER]], %[[T]]
-//  CHECK-NEXT: loop.yield %[[THENRES]] : f32
+//  CHECK-NEXT: scf.yield %[[THENRES]] : f32
 //  CHECK-NEXT: } else {
-//  CHECK-NEXT: loop.yield %[[ITER]] : f32
+//  CHECK-NEXT: scf.yield %[[ITER]] : f32
 //  CHECK-NEXT: }
-//  CHECK-NEXT: loop.yield %[[IFRES]] : f32
+//  CHECK-NEXT: scf.yield %[[IFRES]] : f32
 //  CHECK-NEXT: }
 //  CHECK-NEXT: return %[[RESULT]]

diff  --git a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
index f9c59b44354a..6ed5ad36819e 100644
--- a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
@@ -4,11 +4,11 @@ func @fuse_empty_loops() {
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
@@ -16,11 +16,11 @@ func @fuse_empty_loops() {
 // CHECK:        [[C2:%.*]] = constant 2 : index
 // CHECK:        [[C0:%.*]] = constant 0 : index
 // CHECK:        [[C1:%.*]] = constant 1 : index
-// CHECK:        loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
+// CHECK:        scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:       to ([[C2]], [[C2]]) step ([[C1]], [[C1]]) {
-// CHECK:          loop.yield
+// CHECK:          scf.yield
 // CHECK:        }
-// CHECK-NOT:    loop.parallel
+// CHECK-NOT:    scf.parallel
 
 // -----
 
@@ -30,19 +30,19 @@ func @fuse_two(%A: memref<2x2xf32>, %B: memref<2x2xf32>,
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %sum = alloc()  : memref<2x2xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %B_elem = load %B[%i, %j] : memref<2x2xf32>
     %C_elem = load %C[%i, %j] : memref<2x2xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
     store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %sum_elem = load %sum[%i, %j] : memref<2x2xf32>
     %A_elem = load %A[%i, %j] : memref<2x2xf32>
     %product_elem = mulf %sum_elem, %A_elem : f32
     store %product_elem, %result[%i, %j] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
   dealloc %sum : memref<2x2xf32>
   return
@@ -54,7 +54,7 @@ func @fuse_two(%A: memref<2x2xf32>, %B: memref<2x2xf32>,
 // CHECK:      [[C0:%.*]] = constant 0 : index
 // CHECK:      [[C1:%.*]] = constant 1 : index
 // CHECK:      [[SUM:%.*]] = alloc()
-// CHECK:      loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
+// CHECK:      scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:     to ([[C2]], [[C2]]) step ([[C1]], [[C1]]) {
 // CHECK:        [[B_ELEM:%.*]] = load [[B]]{{\[}}[[I]], [[J]]]
 // CHECK:        [[C_ELEM:%.*]] = load [[C]]{{\[}}[[I]], [[J]]]
@@ -64,7 +64,7 @@ func @fuse_two(%A: memref<2x2xf32>, %B: memref<2x2xf32>,
 // CHECK:        [[A_ELEM:%.*]] = load [[A]]{{\[}}[[I]], [[J]]]
 // CHECK:        [[PRODUCT_ELEM:%.*]] = mulf [[SUM_ELEM_]], [[A_ELEM]]
 // CHECK:        store [[PRODUCT_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
-// CHECK:        loop.yield
+// CHECK:        scf.yield
 // CHECK:      }
 // CHECK:      dealloc [[SUM]]
 
@@ -78,23 +78,23 @@ func @fuse_three(%lhs: memref<100x10xf32>, %rhs: memref<100xf32>,
   %c1 = constant 1 : index
   %broadcast_rhs = alloc() : memref<100x10xf32>
   %
diff  = alloc() : memref<100x10xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
     %rhs_elem = load %rhs[%i] : memref<100xf32>
     store %rhs_elem, %broadcast_rhs[%i, %j] : memref<100x10xf32>
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
     %lhs_elem = load %lhs[%i, %j] : memref<100x10xf32>
     %broadcast_rhs_elem = load %broadcast_rhs[%i, %j] : memref<100x10xf32>
     %
diff _elem = subf %lhs_elem, %broadcast_rhs_elem : f32
     store %
diff _elem, %
diff [%i, %j] : memref<100x10xf32>
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
     %
diff _elem = load %
diff [%i, %j] : memref<100x10xf32>
     %exp_elem = exp %
diff _elem : f32
     store %exp_elem, %result[%i, %j] : memref<100x10xf32>
-    loop.yield
+    scf.yield
   }
   dealloc %broadcast_rhs : memref<100x10xf32>
   dealloc %
diff  : memref<100x10xf32>
@@ -109,7 +109,7 @@ func @fuse_three(%lhs: memref<100x10xf32>, %rhs: memref<100xf32>,
 // CHECK:      [[C1:%.*]] = constant 1 : index
 // CHECK:      [[BROADCAST_RHS:%.*]] = alloc()
 // CHECK:      [[DIFF:%.*]] = alloc()
-// CHECK:      loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
+// CHECK:      scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:     to ([[C100]], [[C10]]) step ([[C1]], [[C1]]) {
 // CHECK:        [[RHS_ELEM:%.*]] = load [[RHS]]{{\[}}[[I]]]
 // CHECK:        store [[RHS_ELEM]], [[BROADCAST_RHS]]{{\[}}[[I]], [[J]]]
@@ -120,7 +120,7 @@ func @fuse_three(%lhs: memref<100x10xf32>, %rhs: memref<100xf32>,
 // CHECK:        [[DIFF_ELEM_:%.*]] = load [[DIFF]]{{\[}}[[I]], [[J]]]
 // CHECK:        [[EXP_ELEM:%.*]] = exp [[DIFF_ELEM_]]
 // CHECK:        store [[EXP_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
-// CHECK:        loop.yield
+// CHECK:        scf.yield
 // CHECK:      }
 // CHECK:      dealloc [[BROADCAST_RHS]]
 // CHECK:      dealloc [[DIFF]]
@@ -131,21 +131,21 @@ func @do_not_fuse_nested_ploop1() {
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-      loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+      scf.yield
     }
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_nested_ploop1
-// CHECK:        loop.parallel
-// CHECK:          loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:          scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -153,21 +153,21 @@ func @do_not_fuse_nested_ploop2() {
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-      loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+      scf.yield
     }
-    loop.yield
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_nested_ploop2
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
-// CHECK:          loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
+// CHECK:          scf.parallel
 
 // -----
 
@@ -175,17 +175,17 @@ func @do_not_fuse_loops_unmatching_num_loops() {
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
-  loop.parallel (%i) = (%c0) to (%c2) step (%c1) {
-    loop.yield
+  scf.parallel (%i) = (%c0) to (%c2) step (%c1) {
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_loops_unmatching_num_loops
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -193,18 +193,18 @@ func @do_not_fuse_loops_with_side_effecting_ops_in_between() {
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
   %buffer  = alloc() : memref<2x2xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_loops_with_side_effecting_ops_in_between
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -213,17 +213,17 @@ func @do_not_fuse_loops_unmatching_iteration_space() {
   %c1 = constant 1 : index
   %c2 = constant 2 : index
   %c4 = constant 4 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c4, %c4) step (%c2, %c2) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c4, %c4) step (%c2, %c2) {
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_loops_unmatching_iteration_space
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -234,27 +234,27 @@ func @do_not_fuse_unmatching_write_read_patterns(
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %common_buf = alloc() : memref<2x2xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %B_elem = load %B[%i, %j] : memref<2x2xf32>
     %C_elem = load %C[%i, %j] : memref<2x2xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
     store %sum_elem, %common_buf[%i, %j] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %k = addi %i, %c1 : index
     %sum_elem = load %common_buf[%k, %j] : memref<2x2xf32>
     %A_elem = load %A[%i, %j] : memref<2x2xf32>
     %product_elem = mulf %sum_elem, %A_elem : f32
     store %product_elem, %result[%i, %j] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
   dealloc %common_buf : memref<2x2xf32>
   return
 }
 // CHECK-LABEL: func @do_not_fuse_unmatching_write_read_patterns
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -264,27 +264,27 @@ func @do_not_fuse_unmatching_read_write_patterns(
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %sum = alloc() : memref<2x2xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %B_elem = load %B[%i, %j] : memref<2x2xf32>
     %C_elem = load %common_buf[%i, %j] : memref<2x2xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
     store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %k = addi %i, %c1 : index
     %sum_elem = load %sum[%k, %j] : memref<2x2xf32>
     %A_elem = load %A[%i, %j] : memref<2x2xf32>
     %product_elem = mulf %sum_elem, %A_elem : f32
     store %product_elem, %common_buf[%j, %i] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
   dealloc %sum : memref<2x2xf32>
   return
 }
 // CHECK-LABEL: func @do_not_fuse_unmatching_read_write_patterns
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -293,20 +293,20 @@ func @do_not_fuse_loops_with_memref_defined_in_loop_bodies() {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %buffer  = alloc() : memref<2x2xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %A = subview %buffer[%c0, %c0][%c2, %c2][%c1, %c1]
       : memref<2x2xf32> to memref<?x?xf32, offset: ?, strides:[?, ?]>
     %A_elem = load %A[%i, %j] : memref<?x?xf32, offset: ?, strides:[?, ?]>
-    loop.yield
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_loops_with_memref_defined_in_loop_bodies
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -316,20 +316,20 @@ func @nested_fuse(%A: memref<2x2xf32>, %B: memref<2x2xf32>,
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %sum = alloc()  : memref<2x2xf32>
-  loop.parallel (%k) = (%c0) to (%c2) step (%c1) {
-    loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%k) = (%c0) to (%c2) step (%c1) {
+    scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
       %B_elem = load %B[%i, %j] : memref<2x2xf32>
       %C_elem = load %C[%i, %j] : memref<2x2xf32>
       %sum_elem = addf %B_elem, %C_elem : f32
       store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
-      loop.yield
+      scf.yield
     }
-    loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
       %sum_elem = load %sum[%i, %j] : memref<2x2xf32>
       %A_elem = load %A[%i, %j] : memref<2x2xf32>
       %product_elem = mulf %sum_elem, %A_elem : f32
       store %product_elem, %result[%i, %j] : memref<2x2xf32>
-      loop.yield
+      scf.yield
     }
   }
   dealloc %sum : memref<2x2xf32>
@@ -342,8 +342,8 @@ func @nested_fuse(%A: memref<2x2xf32>, %B: memref<2x2xf32>,
 // CHECK:      [[C0:%.*]] = constant 0 : index
 // CHECK:      [[C1:%.*]] = constant 1 : index
 // CHECK:      [[SUM:%.*]] = alloc()
-// CHECK:      loop.parallel
-// CHECK:        loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
+// CHECK:      scf.parallel
+// CHECK:        scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:       to ([[C2]], [[C2]]) step ([[C1]], [[C1]]) {
 // CHECK:          [[B_ELEM:%.*]] = load [[B]]{{\[}}[[I]], [[J]]]
 // CHECK:          [[C_ELEM:%.*]] = load [[C]]{{\[}}[[I]], [[J]]]
@@ -353,7 +353,7 @@ func @nested_fuse(%A: memref<2x2xf32>, %B: memref<2x2xf32>,
 // CHECK:          [[A_ELEM:%.*]] = load [[A]]{{\[}}[[I]], [[J]]]
 // CHECK:          [[PRODUCT_ELEM:%.*]] = mulf [[SUM_ELEM_]], [[A_ELEM]]
 // CHECK:          store [[PRODUCT_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
-// CHECK:          loop.yield
+// CHECK:          scf.yield
 // CHECK:        }
 // CHECK:      }
 // CHECK:      dealloc [[SUM]]

diff  --git a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
index ab736c985986..5843eb6d4134 100644
--- a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
@@ -11,7 +11,7 @@ func @parallel_loop(%outer_i0: index, %outer_i1: index, %A: memref<?x?xf32>, %B:
   %d1 = dim %A, 1 : memref<?x?xf32>
   %b0 = affine.min #map0()[%d0, %outer_i0]
   %b1 = affine.min #map1()[%d1, %outer_i1]
-  loop.parallel (%i0, %i1) = (%c0, %c0) to (%b0, %b1) step (%c1, %c1) {
+  scf.parallel (%i0, %i1) = (%c0, %c0) to (%b0, %b1) step (%c1, %c1) {
     %B_elem = load %B[%i0, %i1] : memref<?x?xf32>
     %C_elem = load %C[%i0, %i1] : memref<?x?xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
@@ -33,12 +33,12 @@ func @parallel_loop(%outer_i0: index, %outer_i1: index, %A: memref<?x?xf32>, %B:
 // CHECK:           [[VAL_14:%.*]] = constant 64 : index
 // CHECK:           [[VAL_15:%.*]] = cmpi "eq", [[VAL_11]], [[VAL_14]] : index
 // CHECK:           [[VAL_16:%.*]] = and [[VAL_13]], [[VAL_15]] : i1
-// CHECK:           loop.if [[VAL_16]] {
-// CHECK:             loop.parallel ([[VAL_17:%.*]], [[VAL_18:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_12]], [[VAL_14]]) step ([[VAL_7]], [[VAL_7]]) {
+// CHECK:           scf.if [[VAL_16]] {
+// CHECK:             scf.parallel ([[VAL_17:%.*]], [[VAL_18:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_12]], [[VAL_14]]) step ([[VAL_7]], [[VAL_7]]) {
 // CHECK:               store
 // CHECK:             }
 // CHECK:           } else {
-// CHECK:             loop.parallel ([[VAL_22:%.*]], [[VAL_23:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_10]], [[VAL_11]]) step ([[VAL_7]], [[VAL_7]]) {
+// CHECK:             scf.parallel ([[VAL_22:%.*]], [[VAL_23:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_10]], [[VAL_11]]) step ([[VAL_7]], [[VAL_7]]) {
 // CHECK:               store
 // CHECK:             }
 // CHECK:           }

diff  --git a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
index b9c67a62842b..7b37830e8c5d 100644
--- a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
@@ -4,7 +4,7 @@ func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                     %arg3 : index, %arg4 : index, %arg5 : index,
 		    %A: memref<?x?xf32>, %B: memref<?x?xf32>,
                     %C: memref<?x?xf32>, %result: memref<?x?xf32>) {
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
     %B_elem = load %B[%i0, %i1] : memref<?x?xf32>
     %C_elem = load %C[%i0, %i1] : memref<?x?xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
@@ -21,10 +21,10 @@ func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
 // CHECK:           [[VAL_12:%.*]] = constant 4 : index
 // CHECK:           [[VAL_13:%.*]] = muli [[VAL_4]], [[VAL_11]] : index
 // CHECK:           [[VAL_14:%.*]] = muli [[VAL_5]], [[VAL_12]] : index
-// CHECK:           loop.parallel ([[VAL_15:%.*]], [[VAL_16:%.*]]) = ([[VAL_0]], [[VAL_1]]) to ([[VAL_2]], [[VAL_3]]) step ([[VAL_13]], [[VAL_14]]) {
+// CHECK:           scf.parallel ([[VAL_15:%.*]], [[VAL_16:%.*]]) = ([[VAL_0]], [[VAL_1]]) to ([[VAL_2]], [[VAL_3]]) step ([[VAL_13]], [[VAL_14]]) {
 // CHECK:             [[VAL_17:%.*]] = affine.min #map0([[VAL_11]], [[VAL_2]], [[VAL_15]])
 // CHECK:             [[VAL_18:%.*]] = affine.min #map0([[VAL_12]], [[VAL_3]], [[VAL_16]])
-// CHECK:             loop.parallel ([[VAL_19:%.*]], [[VAL_20:%.*]]) = ([[VAL_10]], [[VAL_10]]) to ([[VAL_17]], [[VAL_18]]) step ([[VAL_4]], [[VAL_5]]) {
+// CHECK:             scf.parallel ([[VAL_19:%.*]], [[VAL_20:%.*]]) = ([[VAL_10]], [[VAL_10]]) to ([[VAL_17]], [[VAL_18]]) step ([[VAL_4]], [[VAL_5]]) {
 // CHECK:               [[VAL_21:%.*]] = load [[VAL_7]]{{\[}}[[VAL_19]], [[VAL_20]]] : memref<?x?xf32>
 // CHECK:               [[VAL_22:%.*]] = load [[VAL_8]]{{\[}}[[VAL_19]], [[VAL_20]]] : memref<?x?xf32>
 // CHECK:               [[VAL_23:%.*]] = addf [[VAL_21]], [[VAL_22]] : f32
@@ -39,11 +39,11 @@ func @tile_nested_innermost() {
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     }
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
   }
   return
 }
@@ -52,16 +52,16 @@ func @tile_nested_innermost() {
 // CHECK:           [[VAL_24:%.*]] = constant 2 : index
 // CHECK:           [[VAL_25:%.*]] = constant 0 : index
 // CHECK:           [[VAL_26:%.*]] = constant 1 : index
-// CHECK:           loop.parallel ([[VAL_27:%.*]], [[VAL_28:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_26]], [[VAL_26]]) {
+// CHECK:           scf.parallel ([[VAL_27:%.*]], [[VAL_28:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_26]], [[VAL_26]]) {
 // CHECK:             [[VAL_29:%.*]] = constant 0 : index
 // CHECK:             [[VAL_30:%.*]] = constant 1 : index
 // CHECK:             [[VAL_31:%.*]] = constant 4 : index
 // CHECK:             [[VAL_32:%.*]] = muli [[VAL_26]], [[VAL_30]] : index
 // CHECK:             [[VAL_33:%.*]] = muli [[VAL_26]], [[VAL_31]] : index
-// CHECK:             loop.parallel ([[VAL_34:%.*]], [[VAL_35:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_32]], [[VAL_33]]) {
+// CHECK:             scf.parallel ([[VAL_34:%.*]], [[VAL_35:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_32]], [[VAL_33]]) {
 // CHECK:               [[VAL_36:%.*]] = affine.min #map0([[VAL_30]], [[VAL_24]], [[VAL_34]])
 // CHECK:               [[VAL_37:%.*]] = affine.min #map0([[VAL_31]], [[VAL_24]], [[VAL_35]])
-// CHECK:               loop.parallel ([[VAL_38:%.*]], [[VAL_39:%.*]]) = ([[VAL_29]], [[VAL_29]]) to ([[VAL_36]], [[VAL_37]]) step ([[VAL_26]], [[VAL_26]]) {
+// CHECK:               scf.parallel ([[VAL_38:%.*]], [[VAL_39:%.*]]) = ([[VAL_29]], [[VAL_29]]) to ([[VAL_36]], [[VAL_37]]) step ([[VAL_26]], [[VAL_26]]) {
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
@@ -70,10 +70,10 @@ func @tile_nested_innermost() {
 // CHECK:           [[VAL_42:%.*]] = constant 4 : index
 // CHECK:           [[VAL_43:%.*]] = muli [[VAL_26]], [[VAL_41]] : index
 // CHECK:           [[VAL_44:%.*]] = muli [[VAL_26]], [[VAL_42]] : index
-// CHECK:           loop.parallel ([[VAL_45:%.*]], [[VAL_46:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_43]], [[VAL_44]]) {
+// CHECK:           scf.parallel ([[VAL_45:%.*]], [[VAL_46:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_43]], [[VAL_44]]) {
 // CHECK:             [[VAL_47:%.*]] = affine.min #map0([[VAL_41]], [[VAL_24]], [[VAL_45]])
 // CHECK:             [[VAL_48:%.*]] = affine.min #map0([[VAL_42]], [[VAL_24]], [[VAL_46]])
-// CHECK:             loop.parallel ([[VAL_49:%.*]], [[VAL_50:%.*]]) = ([[VAL_40]], [[VAL_40]]) to ([[VAL_47]], [[VAL_48]]) step ([[VAL_26]], [[VAL_26]]) {
+// CHECK:             scf.parallel ([[VAL_49:%.*]], [[VAL_50:%.*]]) = ([[VAL_40]], [[VAL_40]]) to ([[VAL_47]], [[VAL_48]]) step ([[VAL_26]], [[VAL_26]]) {
 // CHECK:             }
 // CHECK:           }
 // CHECK:           return

diff  --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp
index a4c98e6cc413..ce55187b2888 100644
--- a/mlir/test/EDSC/builder-api-test.cpp
+++ b/mlir/test/EDSC/builder-api-test.cpp
@@ -148,7 +148,7 @@ TEST_FUNC(builder_loop_for) {
   // CHECK-LABEL: func @builder_loop_for(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
   // CHECK-DAG:    [[r0:%[0-9]+]] = affine.apply affine_map<()[s0, s1] -> (s0 - s1)>()[%{{.*}}, %{{.*}}]
   // CHECK-DAG:    [[r1:%[0-9]+]] = affine.apply affine_map<()[s0, s1] -> (s0 + s1)>()[%{{.*}}, %{{.*}}]
-  // CHECK-NEXT:   loop.for %{{.*}} = [[r0]] to [[r1]] step {{.*}} {
+  // CHECK-NEXT:   scf.for %{{.*}} = [[r0]] to [[r1]] step {{.*}} {
   // clang-format on
   f.print(llvm::outs());
   f.erase();
@@ -1094,9 +1094,9 @@ TEST_FUNC(builder_loop_for_yield) {
   // CHECK:     [[init1:%.*]] = constant
   // CHECK-DAG:    [[r0:%[0-9]+]] = affine.apply affine_map<()[s0, s1] -> (s0 - s1)>()[%{{.*}}, %{{.*}}]
   // CHECK-DAG:    [[r1:%[0-9]+]] = affine.apply affine_map<()[s0, s1] -> (s0 + s1)>()[%{{.*}}, %{{.*}}]
-  // CHECK-NEXT: [[res:%[0-9]+]]:2 = loop.for %{{.*}} = [[r0]] to [[r1]] step {{.*}} iter_args([[arg0:%.*]] = [[init0]], [[arg1:%.*]] = [[init1]]) -> (f32, f32) {
+  // CHECK-NEXT: [[res:%[0-9]+]]:2 = scf.for %{{.*}} = [[r0]] to [[r1]] step {{.*}} iter_args([[arg0:%.*]] = [[init0]], [[arg1:%.*]] = [[init1]]) -> (f32, f32) {
   // CHECK:     [[sum:%[0-9]+]] = addf [[arg0]], [[arg1]] : f32
-  // CHECK:     loop.yield [[arg1]], [[sum]] : f32, f32
+  // CHECK:     scf.yield [[arg1]], [[sum]] : f32, f32
   // CHECK:     addf [[res]]#0, [[res]]#1 : f32
   // clang-format on
 

diff  --git a/mlir/test/Transforms/canonicalize-block-merge.mlir b/mlir/test/Transforms/canonicalize-block-merge.mlir
index 86cac9dddbb3..fe028b8af1fc 100644
--- a/mlir/test/Transforms/canonicalize-block-merge.mlir
+++ b/mlir/test/Transforms/canonicalize-block-merge.mlir
@@ -163,12 +163,12 @@ func @contains_regions(%cond : i1) {
   cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
-  loop.if %cond {
+  scf.if %cond {
     "foo.op"() : () -> ()
   }
   return
 ^bb2:
-  loop.if %cond {
+  scf.if %cond {
     "foo.op"() : () -> ()
   }
   return

diff  --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index 76bd6b48f543..eb768e1e3b4b 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -469,9 +469,9 @@ func @dim_op_fold(%arg0: index, %arg1: index, %arg2: index, %BUF: memref<?xi8>,
   %M_ = dim %A, 0 : memref<?x?xf32>
   %K_ = dim %A, 1 : memref<?x?xf32>
   %N_ = dim %C, 1 : memref<?x?xf32>
-  loop.for %i = %c0 to %M_ step %c1 {
-    loop.for %j = %c0 to %N_ step %c1 {
-      loop.for %k = %c0 to %K_ step %c1 {
+  scf.for %i = %c0 to %M_ step %c1 {
+    scf.for %j = %c0 to %N_ step %c1 {
+      scf.for %k = %c0 to %K_ step %c1 {
       }
     }
   }

diff  --git a/mlir/test/Transforms/loop-coalescing.mlir b/mlir/test/Transforms/loop-coalescing.mlir
index b25f91214c95..fc219d4e186a 100644
--- a/mlir/test/Transforms/loop-coalescing.mlir
+++ b/mlir/test/Transforms/loop-coalescing.mlir
@@ -15,23 +15,23 @@ func @one_3d_nest() {
   %c3 = constant 3 : index
   %c42 = constant 42 : index
   %c56 = constant 56 : index
-  // The range of the new loop.
+  // The range of the new scf.
   // CHECK:     %[[partial_range:.*]] = muli %[[orig_ub_i]], %[[orig_ub_j]]
   // CHECK-NEXT:%[[range:.*]] = muli %[[partial_range]], %[[orig_ub_k]]
 
   // Updated loop bounds.
-  // CHECK: loop.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]]
-  loop.for %i = %c0 to %c42 step %c1 {
+  // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]]
+  scf.for %i = %c0 to %c42 step %c1 {
     // Inner loops must have been removed.
-    // CHECK-NOT: loop.for
+    // CHECK-NOT: scf.for
 
     // Reconstruct original IVs from the linearized one.
     // CHECK: %[[orig_k:.*]] = remi_signed %[[i]], %[[orig_ub_k]]
     // CHECK: %[[div:.*]] = divi_signed %[[i]], %[[orig_ub_k]]
     // CHECK: %[[orig_j:.*]] = remi_signed %[[div]], %[[orig_ub_j]]
     // CHECK: %[[orig_i:.*]] = divi_signed %[[div]], %[[orig_ub_j]]
-    loop.for %j = %c0 to %c56 step %c1 {
-      loop.for %k = %c0 to %c3 step %c1 {
+    scf.for %j = %c0 to %c56 step %c1 {
+      scf.for %k = %c0 to %c3 step %c1 {
         // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
         "use"(%i, %j, %k) : (index, index, index) -> ()
       }
@@ -48,10 +48,10 @@ func @multi_use() {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c10 = constant 10 : index
-  // CHECK: loop.for %[[iv:.*]] =
-  loop.for %i = %c1 to %c10 step %c1 {
-    loop.for %j = %c1 to %c10 step %c1 {
-      loop.for %k = %c1 to %c10 step %c1 {
+  // CHECK: scf.for %[[iv:.*]] =
+  scf.for %i = %c1 to %c10 step %c1 {
+    scf.for %j = %c1 to %c10 step %c1 {
+      scf.for %k = %c1 to %c10 step %c1 {
         // CHECK: %[[k_unshifted:.*]] = remi_signed %[[iv]], %[[k_extent:.*]]
         // CHECK: %[[ij:.*]] = divi_signed %[[iv]], %[[k_extent]]
         // CHECK: %[[j_unshifted:.*]] = remi_signed %[[ij]], %[[j_extent:.*]]
@@ -86,14 +86,14 @@ func @unnormalized_loops() {
   %c10 = constant 10 : index
   %c17 = constant 17 : index
 
-  // Number of iterations in the outer loop.
+  // Number of iterations in the outer scf.
   // CHECK: %[[
diff _i:.*]] = subi %[[orig_ub_i]], %[[orig_lb_i]]
   // CHECK: %[[c1:.*]] = constant 1
   // CHECK: %[[step_minus_c1:.*]] = subi %[[orig_step_i]], %[[c1]]
   // CHECK: %[[dividend:.*]] = addi %[[
diff _i]], %[[step_minus_c1]]
   // CHECK: %[[numiter_i:.*]] = divi_signed %[[dividend]], %[[orig_step_i]]
 
-  // Normalized lower bound and step for the outer loop.
+  // Normalized lower bound and step for the outer scf.
   // CHECK: %[[lb_i:.*]] = constant 0
   // CHECK: %[[step_i:.*]] = constant 1
 
@@ -101,13 +101,13 @@ func @unnormalized_loops() {
   // only capture the final result.
   // CHECK: %[[numiter_j:.*]] = divi_signed {{.*}}, %[[orig_step_j]]
 
-  // New bounds of the outer loop.
+  // New bounds of the outer scf.
   // CHECK: %[[range:.*]] = muli %[[numiter_i]], %[[numiter_j]]
-  // CHECK: loop.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]]
-  loop.for %i = %c5 to %c10 step %c2 {
+  // CHECK: scf.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]]
+  scf.for %i = %c5 to %c10 step %c2 {
     // The inner loop has been removed.
-    // CHECK-NOT: loop.for
-    loop.for %j = %c7 to %c17 step %c3 {
+    // CHECK-NOT: scf.for
+    scf.for %j = %c7 to %c17 step %c3 {
       // The IVs are rewritten.
       // CHECK: %[[normalized_j:.*]] = remi_signed %[[i]], %[[numiter_j]]
       // CHECK: %[[normalized_i:.*]] = divi_signed %[[i]], %[[numiter_j]]
@@ -145,11 +145,11 @@ func @parametric(%lb1 : index, %ub1 : index, %step1 : index,
   // CHECK: %[[range:.*]] = muli %[[numiter1]], %[[numiter2]] : index
 
   // Check that the outer loop is updated.
-  // CHECK: loop.for %[[i:.*]] = %c0{{.*}} to %[[range]] step %c1
-  loop.for %i = %lb1 to %ub1 step %step1 {
+  // CHECK: scf.for %[[i:.*]] = %c0{{.*}} to %[[range]] step %c1
+  scf.for %i = %lb1 to %ub1 step %step1 {
     // Check that the inner loop is removed.
-    // CHECK-NOT: loop.for
-    loop.for %j = %lb2 to %ub2 step %step2 {
+    // CHECK-NOT: scf.for
+    scf.for %j = %lb2 to %ub2 step %step2 {
       // Remapping of the induction variables.
       // CHECK: %[[normalized_j:.*]] = remi_signed %[[i]], %[[numiter2]] : index
       // CHECK: %[[normalized_i:.*]] = divi_signed %[[i]], %[[numiter2]] : index
@@ -171,19 +171,19 @@ func @two_bands() {
   %c1 = constant 1 : index
   %c10 = constant 10 : index
   // CHECK: %[[outer_range:.*]] = muli
-  // CHECK: loop.for %{{.*}} = %{{.*}} to %[[outer_range]]
-  loop.for %i = %c0 to %c10 step %c1 {
+  // CHECK: scf.for %{{.*}} = %{{.*}} to %[[outer_range]]
+  scf.for %i = %c0 to %c10 step %c1 {
     // Check that the "j" loop was removed and that the inner loops were
     // coalesced as well.  The preparation step for coalescing will inject the
     // subtraction operation unlike the IV remapping.
-    // CHECK-NOT: loop.for
+    // CHECK-NOT: scf.for
     // CHECK: subi
-    loop.for %j = %c0 to %c10 step %c1 {
+    scf.for %j = %c0 to %c10 step %c1 {
       // The inner pair of loops is coalesced separately.
-      // CHECK: loop.for
-      loop.for %k = %i to %j step %c1 {
-        // CHECK_NOT: loop.for
-        loop.for %l = %i to %j step %c1 {
+      // CHECK: scf.for
+      scf.for %k = %i to %j step %c1 {
+        // CHECK_NOT: scf.for
+        scf.for %l = %i to %j step %c1 {
           "foo"() : () -> ()
         }
       }

diff  --git a/mlir/test/Transforms/loop-fusion-slice-computation.mlir b/mlir/test/Transforms/loop-fusion-slice-computation.mlir
index dd1a8a339caf..cc54a09ce0c7 100644
--- a/mlir/test/Transforms/loop-fusion-slice-computation.mlir
+++ b/mlir/test/Transforms/loop-fusion-slice-computation.mlir
@@ -41,7 +41,7 @@ func @slice_depth1_loop_nest_with_offsets() {
 
 // -----
 
-// Slices at loop depth 1 should only slice the loop bounds of the first loop.
+// Slices at loop depth 1 should only slice the loop bounds of the first scf.
 // Slices at loop depth 2 should slice loop bounds of both loops.
 // CHECK-LABEL: func @slice_depth2_loop_nest() {
 func @slice_depth2_loop_nest() {
@@ -121,7 +121,7 @@ func @slice_depth2_loop_nest_two_stores() {
 
 // -----
 
-// Test loop nest which has a smaller outer trip count than its inner loop.
+// Test loop nest which has a smaller outer trip count than its inner scf.
 // CHECK-LABEL: func @slice_loop_nest_with_smaller_outer_trip_count() {
 func @slice_loop_nest_with_smaller_outer_trip_count() {
   %0 = alloc() : memref<100x100xf32>

diff  --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir
index 866925b4a9f7..e0eee71e89d9 100644
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
@@ -242,7 +242,7 @@ func @should_fuse_first_and_second_loops() {
   }
 
   // Should fuse first loop into the second (last loop should not be fused).
-  // Should create private memref '%2' for fused loop.
+  // Should create private memref '%2' for fused scf.
   // CHECK:      affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
   // CHECK-NEXT:   affine.load %{{.*}}[0] : memref<1xf32>

diff  --git a/mlir/test/Transforms/loop-invariant-code-motion.mlir b/mlir/test/Transforms/loop-invariant-code-motion.mlir
index 494bfa45345c..d6af573d4e97 100644
--- a/mlir/test/Transforms/loop-invariant-code-motion.mlir
+++ b/mlir/test/Transforms/loop-invariant-code-motion.mlir
@@ -228,8 +228,8 @@ func @invariant_loop_dialect() {
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
-  loop.for %arg0 = %ci0 to %ci10 step %ci1 {
-    loop.for %arg1 = %ci0 to %ci10 step %ci1 {
+  scf.for %arg0 = %ci0 to %ci10 step %ci1 {
+    scf.for %arg1 = %ci0 to %ci10 step %ci1 {
       %v0 = addf %cf7, %cf8 : f32
     }
   }
@@ -249,15 +249,15 @@ func @variant_loop_dialect() {
   %ci10 = constant 10 : index
   %ci1 = constant 1 : index
   %m = alloc() : memref<10xf32>
-  loop.for %arg0 = %ci0 to %ci10 step %ci1 {
-    loop.for %arg1 = %ci0 to %ci10 step %ci1 {
+  scf.for %arg0 = %ci0 to %ci10 step %ci1 {
+    scf.for %arg1 = %ci0 to %ci10 step %ci1 {
       %v0 = addi %arg0, %arg1 : index
     }
   }
 
   // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: loop.for
-  // CHECK-NEXT: loop.for
+  // CHECK-NEXT: scf.for
+  // CHECK-NEXT: scf.for
   // CHECK-NEXT: addi
 
   return
@@ -271,7 +271,7 @@ func @parallel_loop_with_invariant() {
   %c1 = constant 1 : index
   %c7 = constant 7 : i32
   %c8 = constant 8 : i32
-  loop.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
+  scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
       %v0 = addi %c7, %c8 : i32
       %v3 = addi %arg0, %arg1 : index
   }
@@ -283,7 +283,7 @@ func @parallel_loop_with_invariant() {
   // CHECK-NEXT: %c7_i32 = constant 7 : i32
   // CHECK-NEXT: %c8_i32 = constant 8 : i32
   // CHECK-NEXT: addi %c7_i32, %c8_i32 : i32
-  // CHECK-NEXT: loop.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1)
+  // CHECK-NEXT: scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1)
   // CHECK-NEXT:   addi %arg0, %arg1 : index
   // CHECK-NEXT:   yield
   // CHECK-NEXT: }

diff  --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir
index f48a63ef984b..65af899c726c 100644
--- a/mlir/test/Transforms/memref-dependence-check.mlir
+++ b/mlir/test/Transforms/memref-dependence-check.mlir
@@ -36,7 +36,7 @@ func @dependent_loops() {
   %0 = alloc() : memref<10xf32>
   %cst = constant 7.000000e+00 : f32
   // There is a dependence from 0 to 1 at depth 1 (common surrounding loops 0)
-  // because the first loop with the store dominates the second loop.
+  // because the first loop with the store dominates the second scf.
   affine.for %i0 = 0 to 10 {
     affine.store %cst, %0[%i0] : memref<10xf32>
     // expected-remark at above {{dependence from 0 to 0 at depth 1 = false}}
@@ -332,7 +332,7 @@ func @store_range_load_first_in_range() {
     %a0 = affine.apply affine_map<(d0) -> (d0)> (%i0)
     // Dependence from 0 to 1 at depth 1 is a range because all loads at
     // constant index zero are reads after first store at index zero during
-    // first iteration of the loop.
+    // first iteration of the scf.
     affine.store %c7, %m[%a0] : memref<100xf32>
     // expected-remark at above {{dependence from 0 to 0 at depth 1 = false}}
     // expected-remark at above {{dependence from 0 to 0 at depth 2 = false}}
@@ -785,7 +785,7 @@ func @delinearize_mod_floordiv() {
 
 // -----
 
-// Load and store ops access the same elements in strided loop.
+// Load and store ops access the same elements in strided scf.
 // CHECK-LABEL: func @strided_loop_with_dependence_at_depth2
 func @strided_loop_with_dependence_at_depth2() {
   %0 = alloc() : memref<10xf32>

diff  --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir
index 55c851dce913..a124d180e42c 100644
--- a/mlir/test/Transforms/parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir
@@ -17,7 +17,7 @@ func @parallel_many_dims() {
   %c13 = constant 13 : index
   %c14 = constant 14 : index
 
-  loop.parallel (%i0, %i1, %i2, %i3, %i4) = (%c0, %c3, %c6, %c9, %c12) to (%c2, %c5, %c8, %c11, %c14)
+  scf.parallel (%i0, %i1, %i2, %i3, %i4) = (%c0, %c3, %c6, %c9, %c12) to (%c2, %c5, %c8, %c11, %c14)
                                           step (%c1, %c4, %c7, %c10, %c13) {
     %result = "magic.op"(%i0, %i1, %i2, %i3, %i4): (index, index, index, index, index) -> index
   }
@@ -35,7 +35,7 @@ func @parallel_many_dims() {
 // CHECK:         [[C0:%.*]] = constant 0 : index
 // CHECK:         [[C1:%.*]] = constant 1 : index
 // CHECK:         [[C2:%.*]] = constant 2 : index
-// CHECK:         loop.parallel ([[NEW_I0:%.*]], [[NEW_I1:%.*]], [[NEW_I2:%.*]]) = ([[C0]], [[C0]], [[C0]]) to ([[C2]], [[C1]], [[C1]]) step ([[C1]], [[C1]], [[C1]]) {
+// CHECK:         scf.parallel ([[NEW_I0:%.*]], [[NEW_I1:%.*]], [[NEW_I2:%.*]]) = ([[C0]], [[C0]], [[C0]]) to ([[C2]], [[C1]], [[C1]]) step ([[C1]], [[C1]], [[C1]]) {
 // CHECK:           [[I0:%.*]] = remi_signed [[NEW_I0]], [[C2]] : index
 // CHECK:           [[VAL_16:%.*]] = muli [[NEW_I1]], [[C13]] : index
 // CHECK:           [[I4:%.*]] = addi [[VAL_16]], [[C12]] : index
@@ -44,6 +44,6 @@ func @parallel_many_dims() {
 // CHECK:           [[VAL_20:%.*]] = muli [[NEW_I2]], [[C7]] : index
 // CHECK:           [[I2:%.*]] = addi [[VAL_20]], [[C6]] : index
 // CHECK:           "magic.op"([[I0]], [[C3]], [[I2]], [[I3]], [[I4]]) : (index, index, index, index, index) -> index
-// CHECK:           loop.yield
+// CHECK:           scf.yield
 // CHECK-NEXT:    }
 // CHECK-NEXT:    return

diff  --git a/mlir/test/Transforms/parametric-mapping.mlir b/mlir/test/Transforms/parametric-mapping.mlir
index a7981e5beddf..2ad24e1ae6b8 100644
--- a/mlir/test/Transforms/parametric-mapping.mlir
+++ b/mlir/test/Transforms/parametric-mapping.mlir
@@ -9,8 +9,8 @@ func @map1d(%lb: index, %ub: index, %step: index) {
   // CHECK: %[[thread_offset:.*]] = muli %[[step]], %[[threads]]#0
   // CHECK: %[[new_lb:.*]] = addi %[[lb]], %[[thread_offset]]
   // CHECK: %[[new_step:.*]] = muli %[[step]], %[[threads]]#1
-  // CHECK: loop.for %{{.*}} = %[[new_lb]] to %[[ub]] step %[[new_step]] {
-  loop.for %i = %lb to %ub step %step {}
+  // CHECK: scf.for %{{.*}} = %[[new_lb]] to %[[ub]] step %[[new_step]] {
+  scf.for %i = %lb to %ub step %step {}
   return
 }
 
@@ -41,7 +41,7 @@ func @map2d(%lb : index, %ub : index, %step : index) {
   // new_step = step * gridDim.x * blockDim.x
   // CHECK: %[[new_step:.*]] = muli %[[stepXgdimx]], %[[threads]]#1 : index
   //
-  // CHECK: loop.for %{{.*}} = %[[new_lb]] to %[[ub]] step %[[new_step]] {
-  loop.for %i = %lb to %ub step %step {}
+  // CHECK: scf.for %{{.*}} = %[[new_lb]] to %[[ub]] step %[[new_step]] {
+  scf.for %i = %lb to %ub step %step {}
   return
 }

diff  --git a/mlir/test/Transforms/parametric-tiling.mlir b/mlir/test/Transforms/parametric-tiling.mlir
index afa33cb07c16..13ea6a969e4f 100644
--- a/mlir/test/Transforms/parametric-tiling.mlir
+++ b/mlir/test/Transforms/parametric-tiling.mlir
@@ -28,16 +28,16 @@ func @rectangular(%arg0: memref<?x?xf32>) {
   // TILE_74-NEXT: %[[
diff 2_adj:.*]] = addi %[[
diff 2]], %[[adjustment2]]
   // TILE_74-NEXT: %[[range2:.*]] = divi_signed %[[
diff 2_adj]], %c2
 
-  // Ceildiv to get the parametric tile size for the second original loop.
+  // Ceildiv to get the parametric tile size for the second original scf.
   // TILE_74:      %[[sum2:.*]] = addi %[[range2]], %c3
   // TILE_74-NEXT: %[[size2:.*]] = divi_signed %[[sum2]], %c4
   // New inner step (original is %c2).
   // TILE_74-NEXT:     %[[step2:.*]] = muli %c2, %[[size2]]
 
   // Updated outer loop(s) use new steps.
-  // COMMON: loop.for %[[i:.*]] = %c2 to %c44 step %[[step]]
-  // TILE_74:loop.for %[[j:.*]] = %c1 to %c44 step %[[step2]]
- loop.for %i = %c2 to %c44 step %c1 {
+  // COMMON: scf.for %[[i:.*]] = %c2 to %c44 step %[[step]]
+  // TILE_74:scf.for %[[j:.*]] = %c1 to %c44 step %[[step2]]
+ scf.for %i = %c2 to %c44 step %c1 {
     // Upper bound for the inner loop min(%i + %step, %c44).
     // COMMON:      %[[stepped:.*]] = addi %[[i]], %[[step]]
     // COMMON-NEXT: cmpi "slt", %c44, %[[stepped]]
@@ -47,15 +47,15 @@ func @rectangular(%arg0: memref<?x?xf32>) {
     // TILE_74-NEXT: cmpi "slt", %c44, %[[stepped2]]
     // TILE_74-NEXT: %[[ub2:.*]] = select {{.*}}, %c44, %[[stepped2]]
 
-    // Created inner loop.
-    // COMMON:loop.for %[[ii:.*]] = %[[i]] to %[[ub:.*]] step %c1
+    // Created inner scf.
+    // COMMON:scf.for %[[ii:.*]] = %[[i]] to %[[ub:.*]] step %c1
 
     // This loop is not modified in TILE_7 case.
-    // TILE_7: loop.for %[[j:.*]] = %c1 to %c44 step %c2
+    // TILE_7: scf.for %[[j:.*]] = %c1 to %c44 step %c2
     //
     // But is modified in TILE_74 case.
-    // TILE_74:loop.for %[[jj:.*]] = %[[j]] to %[[ub2]] step %c2
-   loop.for %j = %c1 to %c44 step %c2 {
+    // TILE_74:scf.for %[[jj:.*]] = %[[j]] to %[[ub2]] step %c2
+   scf.for %j = %c1 to %c44 step %c2 {
       // The right iterator are used.
       // TILE_7:  load %arg0[%[[ii]], %[[j]]]
       // TILE_74: load %arg0[%[[ii]], %[[jj]]]
@@ -87,8 +87,8 @@ func @triangular(%arg0: memref<?x?xf32>) {
   // Constant adjustment for inner loop has been hoisted out.
   // TILE_74:      %[[adjustment2:.*]] = subi %c2, %c1_{{.*}}
 
-  // New outer loop.
-  // COMMON: loop.for %[[i:.*]] = %c2 to %c44 step %[[step]]
+  // New outer scf.
+  // COMMON: scf.for %[[i:.*]] = %c2 to %c44 step %[[step]]
 
   // Range of the original inner loop
   //   (upper - lower + step - 1) / step
@@ -97,15 +97,15 @@ func @triangular(%arg0: memref<?x?xf32>) {
   // TILE_74-NEXT: %[[
diff 2_adj:.*]] = addi %[[
diff 2]], %[[adjustment2]]
   // TILE_74-NEXT: %[[range2:.*]] = divi_signed %[[
diff 2_adj]], %c2
 
-  // Ceildiv to get the parametric tile size for the second original loop.
+  // Ceildiv to get the parametric tile size for the second original scf.
   // TILE_74:      %[[sum2:.*]] = addi %[[range2]], %c3
   // TILE_74-NEXT: %[[size2:.*]] = divi_signed %[[sum2]], %c4
   // New inner step (original is %c2).
   // TILE_74-NEXT:     %[[step2:.*]] = muli %c2, %[[size2]]
 
-  // New inner loop.
-  // TILE_74:loop.for %[[j:.*]] = %c1 to %[[i]] step %[[step2]]
- loop.for %i = %c2 to %c44 step %c1 {
+  // New inner scf.
+  // TILE_74:scf.for %[[j:.*]] = %c1 to %[[i]] step %[[step2]]
+ scf.for %i = %c2 to %c44 step %c1 {
     // Upper bound for the inner loop min(%i + %step, %c44).
     // COMMON:      %[[stepped:.*]] = addi %[[i]], %[[step]]
     // COMMON-NEXT: cmpi "slt", %c44, %[[stepped]]
@@ -114,15 +114,15 @@ func @triangular(%arg0: memref<?x?xf32>) {
     // TILE_74-NEXT: cmpi "slt", %[[i]], %[[stepped2]]
     // TILE_74-NEXT: %[[ub2:.*]] = select {{.*}}, %[[i]], %[[stepped2]]
     //
-    // Created inner loop.
-    // COMMON:loop.for %[[ii:.*]] = %[[i]] to %[[ub:.*]] step %c1
+    // Created inner scf.
+    // COMMON:scf.for %[[ii:.*]] = %[[i]] to %[[ub:.*]] step %c1
 
     // This loop is not modified in TILE_7 case.
-    // TILE_7: loop.for %[[j:.*]] = %c1 to %[[ii]] step %c2
+    // TILE_7: scf.for %[[j:.*]] = %c1 to %[[ii]] step %c2
     //
     // But is modified in TILE_74 case.
-    // TILE_74:loop.for %[[jj:.*]] = %[[j]] to %[[ub2]] step %c2
-   loop.for %j = %c1 to %i step %c2 {
+    // TILE_74:scf.for %[[jj:.*]] = %[[j]] to %[[ub2]] step %c2
+   scf.for %j = %c1 to %i step %c2 {
       // The right iterator are used.
       // TILE_7:  load %arg0[%[[ii]], %[[j]]]
       // TILE_74: load %arg0[%[[ii]], %[[jj]]]

diff  --git a/mlir/test/Transforms/sccp-structured.mlir b/mlir/test/Transforms/sccp-structured.mlir
index 4acb6f9c99f2..0aaa9da4507f 100644
--- a/mlir/test/Transforms/sccp-structured.mlir
+++ b/mlir/test/Transforms/sccp-structured.mlir
@@ -5,15 +5,15 @@
 // CHECK-LABEL: func @simple(
 func @simple(%arg0 : i32) -> i32 {
   // CHECK: %[[CST:.*]] = constant 1 : i32
-  // CHECK-NOT: loop.if
+  // CHECK-NOT: scf.if
   // CHECK: return %[[CST]] : i32
 
   %cond = constant true
-  %res = loop.if %cond -> (i32) {
+  %res = scf.if %cond -> (i32) {
     %1 = constant 1 : i32
-    loop.yield %1 : i32
+    scf.yield %1 : i32
   } else {
-    loop.yield %arg0 : i32
+    scf.yield %arg0 : i32
   }
   return %res : i32
 }
@@ -24,15 +24,15 @@ func @simple(%arg0 : i32) -> i32 {
 // CHECK-LABEL: func @simple_both_same(
 func @simple_both_same(%cond : i1) -> i32 {
   // CHECK: %[[CST:.*]] = constant 1 : i32
-  // CHECK-NOT: loop.if
+  // CHECK-NOT: scf.if
   // CHECK: return %[[CST]] : i32
 
-  %res = loop.if %cond -> (i32) {
+  %res = scf.if %cond -> (i32) {
     %1 = constant 1 : i32
-    loop.yield %1 : i32
+    scf.yield %1 : i32
   } else {
     %2 = constant 1 : i32
-    loop.yield %2 : i32
+    scf.yield %2 : i32
   }
   return %res : i32
 }
@@ -42,14 +42,14 @@ func @simple_both_same(%cond : i1) -> i32 {
 
 // CHECK-LABEL: func @overdefined_unknown_condition(
 func @overdefined_unknown_condition(%cond : i1, %arg0 : i32) -> i32 {
-  // CHECK: %[[RES:.*]] = loop.if
+  // CHECK: %[[RES:.*]] = scf.if
   // CHECK: return %[[RES]] : i32
 
-  %res = loop.if %cond -> (i32) {
+  %res = scf.if %cond -> (i32) {
     %1 = constant 1 : i32
-    loop.yield %1 : i32
+    scf.yield %1 : i32
   } else {
-    loop.yield %arg0 : i32
+    scf.yield %arg0 : i32
   }
   return %res : i32
 }
@@ -59,15 +59,15 @@ func @overdefined_unknown_condition(%cond : i1, %arg0 : i32) -> i32 {
 
 // CHECK-LABEL: func @overdefined_
diff erent_constants(
 func @overdefined_
diff erent_constants(%cond : i1) -> i32 {
-  // CHECK: %[[RES:.*]] = loop.if
+  // CHECK: %[[RES:.*]] = scf.if
   // CHECK: return %[[RES]] : i32
 
-  %res = loop.if %cond -> (i32) {
+  %res = scf.if %cond -> (i32) {
     %1 = constant 1 : i32
-    loop.yield %1 : i32
+    scf.yield %1 : i32
   } else {
     %2 = constant 2 : i32
-    loop.yield %2 : i32
+    scf.yield %2 : i32
   }
   return %res : i32
 }
@@ -77,13 +77,13 @@ func @overdefined_
diff erent_constants(%cond : i1) -> i32 {
 // CHECK-LABEL: func @simple_loop(
 func @simple_loop(%arg0 : index, %arg1 : index, %arg2 : index) -> i32 {
   // CHECK: %[[CST:.*]] = constant 0 : i32
-  // CHECK-NOT: loop.for
+  // CHECK-NOT: scf.for
   // CHECK: return %[[CST]] : i32
 
   %s0 = constant 0 : i32
-  %result = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (i32) {
+  %result = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (i32) {
     %sn = addi %si, %si : i32
-    loop.yield %sn : i32
+    scf.yield %sn : i32
   }
   return %result : i32
 }
@@ -93,13 +93,13 @@ func @simple_loop(%arg0 : index, %arg1 : index, %arg2 : index) -> i32 {
 
 // CHECK-LABEL: func @loop_overdefined(
 func @loop_overdefined(%arg0 : index, %arg1 : index, %arg2 : index) -> i32 {
-  // CHECK: %[[RES:.*]] = loop.for
+  // CHECK: %[[RES:.*]] = scf.for
   // CHECK: return %[[RES]] : i32
 
   %s0 = constant 1 : i32
-  %result = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (i32) {
+  %result = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (i32) {
     %sn = addi %si, %si : i32
-    loop.yield %sn : i32
+    scf.yield %sn : i32
   }
   return %result : i32
 }
@@ -111,22 +111,22 @@ func @loop_overdefined(%arg0 : index, %arg1 : index, %arg2 : index) -> i32 {
 // CHECK-LABEL: func @loop_inner_control_flow(
 func @loop_inner_control_flow(%arg0 : index, %arg1 : index, %arg2 : index) -> i32 {
   // CHECK: %[[CST:.*]] = constant 1 : i32
-  // CHECK-NOT: loop.for
-  // CHECK-NOT: loop.if
+  // CHECK-NOT: scf.for
+  // CHECK-NOT: scf.if
   // CHECK: return %[[CST]] : i32
 
   %cst_1 = constant 1 : i32
-  %result = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %cst_1) -> (i32) {
+  %result = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %cst_1) -> (i32) {
     %cst_20 = constant 20 : i32
     %cond = cmpi "ult", %si, %cst_20 : i32
-    %inner_res = loop.if %cond -> (i32) {
+    %inner_res = scf.if %cond -> (i32) {
       %1 = constant 1 : i32
-      loop.yield %1 : i32
+      scf.yield %1 : i32
     } else {
       %si_inc = addi %si, %cst_1 : i32
-      loop.yield %si_inc : i32
+      scf.yield %si_inc : i32
     }
-    loop.yield %inner_res : i32
+    scf.yield %inner_res : i32
   }
   return %result : i32
 }

diff  --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
index 6662411eaf14..4cc354bca0b2 100644
--- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
@@ -7,7 +7,7 @@ func @collapse_to_single() {
   %c3 = constant 29 : index
   %c4 = constant 3 : index
   %c5 = constant 4 : index
-  loop.parallel (%i0, %i1) = (%c0, %c1) to (%c2, %c3) step (%c4, %c5) {
+  scf.parallel (%i0, %i1) = (%c0, %c1) to (%c2, %c3) step (%c4, %c5) {
     %result = "magic.op"(%i0, %i1): (index, index) -> index
   }
   return
@@ -21,7 +21,7 @@ func @collapse_to_single() {
 // CHECK:         [[C6:%.*]] = constant 6 : index
 // CHECK:         [[C0:%.*]] = constant 0 : index
 // CHECK:         [[C1:%.*]] = constant 1 : index
-// CHECK:         loop.parallel ([[NEW_I:%.*]]) = ([[C0]]) to ([[C18]]) step ([[C1]]) {
+// CHECK:         scf.parallel ([[NEW_I:%.*]]) = ([[C0]]) to ([[C18]]) step ([[C1]]) {
 // CHECK:           [[I0_COUNT:%.*]] = remi_signed [[NEW_I]], [[C3]] : index
 // CHECK:           [[I1_COUNT:%.*]] = divi_signed [[NEW_I]], [[C6]] : index
 // CHECK:           [[VAL_10:%.*]] = muli [[I1_COUNT]], [[C4]] : index
@@ -29,6 +29,6 @@ func @collapse_to_single() {
 // CHECK:           [[VAL_12:%.*]] = muli [[I0_COUNT]], [[C3]] : index
 // CHECK:           [[I0:%.*]] = addi [[VAL_12]], [[C3]] : index
 // CHECK:           "magic.op"([[I0]], [[I1]]) : (index, index) -> index
-// CHECK:           loop.yield
+// CHECK:           scf.yield
 // CHECK-NEXT:    }
 // CHECK-NEXT:    return

diff  --git a/mlir/test/lib/Transforms/TestLoopMapping.cpp b/mlir/test/lib/Transforms/TestLoopMapping.cpp
index 98fe5b539f5a..b8039b221153 100644
--- a/mlir/test/lib/Transforms/TestLoopMapping.cpp
+++ b/mlir/test/lib/Transforms/TestLoopMapping.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a pass to parametrically map loop.for loops to virtual
+// This file implements a pass to parametrically map scf.for loops to virtual
 // processing element dimensions.
 //
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
index 1760feb365f0..61b98b1c4839 100644
--- a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
+++ b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
@@ -11,7 +11,7 @@ func @simple_add1_add2_test(%arg0: memref<2xf32>, %arg1: memref<2xf32>) {
   %c1 = constant 1 : index
   %cst = constant 1.000000e+00 : f32
   %cst_0 = constant 2.000000e+00 : f32
-  loop.for %arg2 = %c0 to %c2 step %c1 {
+  scf.for %arg2 = %c0 to %c2 step %c1 {
     %0 = load %arg0[%arg2] : memref<2xf32>
     %1 = addf %0, %cst : f32
     store %1, %arg0[%arg2] : memref<2xf32>
@@ -46,7 +46,7 @@ func @print_newline()
 //  %cst_0 = constant 2.000000e+00 : f32
 //  %a = alloc() : memref<2xf32>
 //  %b = alloc() : memref<2xf32>
-//  loop.for %i = %c0 to %c2 step %c1 {
+//  scf.for %i = %c0 to %c2 step %c1 {
 //    store %cst, %a[%i] : memref<2xf32>
 //    store %cst, %b[%i] : memref<2xf32>
 //  }

diff  --git a/mlir/test/mlir-opt/commandline.mlir b/mlir/test/mlir-opt/commandline.mlir
index cce9a72c94c6..f99a68d6303c 100644
--- a/mlir/test/mlir-opt/commandline.mlir
+++ b/mlir/test/mlir-opt/commandline.mlir
@@ -4,11 +4,11 @@
 // CHECK: gpu
 // CHECK: linalg
 // CHECK: llvm
-// CHECK: loop
 // CHECK: nvvm
 // CHECK: omp
 // CHECK: quant
 // CHECK: rocdl
+// CHECK: scf
 // CHECK: sdbm
 // CHECK: spv
 // CHECK: std