[Mlir-commits] [mlir] 412b885 - [mlir][NFC] Update textual references of `func` to `func.func` in Bufferization/Complex/EmitC/CF/Func/GPU tests

Wed Apr 20 22:24:29 PDT 2022

Author: River Riddle
Date: 2022-04-20T22:17:28-07:00
New Revision: 412b8850f64c631a9e49f7a163e0cbfdfa071499

URL: https://github.com/llvm/llvm-project/commit/412b8850f64c631a9e49f7a163e0cbfdfa071499
DIFF: https://github.com/llvm/llvm-project/commit/412b8850f64c631a9e49f7a163e0cbfdfa071499.diff

LOG: [mlir][NFC] Update textual references of `func` to `func.func` in Bufferization/Complex/EmitC/CF/Func/GPU tests

The special case parsing of `func` operations is being removed.

Added: 
    

Modified: 
    mlir/test/Dialect/Bufferization/Transforms/buffer-deallocation.mlir
    mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
    mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
    mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir
    mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
    mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
    mlir/test/Dialect/Bufferization/canonicalize.mlir
    mlir/test/Dialect/Bufferization/inlining.mlir
    mlir/test/Dialect/Bufferization/ops.mlir
    mlir/test/Dialect/Builtin/canonicalize.mlir
    mlir/test/Dialect/Builtin/invalid.mlir
    mlir/test/Dialect/Complex/canonicalize.mlir
    mlir/test/Dialect/Complex/invalid.mlir
    mlir/test/Dialect/Complex/ops.mlir
    mlir/test/Dialect/ControlFlow/canonicalize.mlir
    mlir/test/Dialect/ControlFlow/invalid.mlir
    mlir/test/Dialect/ControlFlow/ops.mlir
    mlir/test/Dialect/EmitC/attrs.mlir
    mlir/test/Dialect/EmitC/invalid_ops.mlir
    mlir/test/Dialect/EmitC/invalid_types.mlir
    mlir/test/Dialect/EmitC/ops.mlir
    mlir/test/Dialect/EmitC/types.mlir
    mlir/test/Dialect/Func/func-bufferize.mlir
    mlir/test/Dialect/Func/invalid.mlir
    mlir/test/Dialect/GPU/async-region.mlir
    mlir/test/Dialect/GPU/canonicalize.mlir
    mlir/test/Dialect/GPU/invalid.mlir
    mlir/test/Dialect/GPU/mapping.mlir
    mlir/test/Dialect/GPU/multiple-all-reduce.mlir
    mlir/test/Dialect/GPU/ops.mlir
    mlir/test/Dialect/GPU/outlining.mlir
    mlir/test/Dialect/GPU/sink-ops.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/test/Dialect/Bufferization/Transforms/buffer-deallocation.mlir b/mlir/test/Dialect/Bufferization/Transforms/buffer-deallocation.mlir
index c50b053b25bab..240cc2a60681f 100644

--- a/mlir/test/Dialect/Bufferization/Transforms/buffer-deallocation.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/buffer-deallocation.mlir
@@ -16,7 +16,7 @@
 // moved to bb0, we need to insert allocs and copies.
 
 // CHECK-LABEL: func @condBranch
-func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
   cf.br ^bb3(%arg1 : memref<2xf32>)
@@ -57,7 +57,7 @@ func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 // to %2 in block bb3.
 
 // CHECK-LABEL: func @condBranchDynamicType
-func @condBranchDynamicType(
+func.func @condBranchDynamicType(
   %arg0: i1,
   %arg1: memref<?xf32>,
   %arg2: memref<?xf32>,
@@ -93,7 +93,7 @@ func @condBranchDynamicType(
 // Test case: See above.
 
 // CHECK-LABEL: func @condBranchUnrankedType
-func @condBranchUnrankedType(
+func.func @condBranchUnrankedType(
   %arg0: i1,
   %arg1: memref<*xf32>,
   %arg2: memref<*xf32>,
@@ -148,7 +148,7 @@ func @condBranchUnrankedType(
 // bb6. Furthermore, there should be no copy inserted for %4.
 
 // CHECK-LABEL: func @condBranchDynamicTypeNested
-func @condBranchDynamicTypeNested(
+func.func @condBranchDynamicTypeNested(
   %arg0: i1,
   %arg1: memref<?xf32>,
   %arg2: memref<?xf32>,
@@ -203,7 +203,7 @@ func @condBranchDynamicTypeNested(
 // before ReturnOp.
 
 // CHECK-LABEL: func @emptyUsesValue
-func @emptyUsesValue(%arg0: memref<4xf32>) {
+func.func @emptyUsesValue(%arg0: memref<4xf32>) {
   %0 = memref.alloc() : memref<4xf32>
   return
 }
@@ -224,7 +224,7 @@ func @emptyUsesValue(%arg0: memref<4xf32>) {
 // we have to insert a copy and an alloc in the beginning of the function.
 
 // CHECK-LABEL: func @criticalEdge
-func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+func.func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
   %0 = memref.alloc() : memref<2xf32>
@@ -257,7 +257,7 @@ func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 // exit block after CopyOp since %1 is an alias for %0 and %arg1.
 
 // CHECK-LABEL: func @invCriticalEdge
-func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+func.func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
@@ -285,7 +285,7 @@ func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 // %7 should happen after CopyOp.
 
 // CHECK-LABEL: func @ifElse
-func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+func.func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cf.cond_br %arg0,
@@ -323,7 +323,7 @@ func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 // in the exit block since %5 or %6 are the latest aliases of %0.
 
 // CHECK-LABEL: func @ifElseNoUsers
-func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+func.func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cf.cond_br %arg0,
@@ -358,7 +358,7 @@ func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 // inserted in the exit block.
 
 // CHECK-LABEL: func @ifElseNested
-func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+func.func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cf.cond_br %arg0,
@@ -395,7 +395,7 @@ func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 // DeallocOps after the last BufferBasedOp.
 
 // CHECK-LABEL: func @redundantOperations
-func @redundantOperations(%arg0: memref<2xf32>) {
+func.func @redundantOperations(%arg0: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
   %1 = memref.alloc() : memref<2xf32>
@@ -426,7 +426,7 @@ func @redundantOperations(%arg0: memref<2xf32>) {
 // block.
 
 // CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc
-func @moving_alloc_and_inserting_missing_dealloc(
+func.func @moving_alloc_and_inserting_missing_dealloc(
   %cond: i1,
     %arg0: memref<2xf32>,
     %arg1: memref<2xf32>) {
@@ -475,7 +475,7 @@ func @moving_alloc_and_inserting_missing_dealloc(
 // moved to exit block.
 
 // CHECK-LABEL: func @moving_invalid_dealloc_op_complex
-func @moving_invalid_dealloc_op_complex(
+func.func @moving_invalid_dealloc_op_complex(
   %cond: i1,
     %arg0: memref<2xf32>,
     %arg1: memref<2xf32>) {
@@ -503,7 +503,7 @@ func @moving_invalid_dealloc_op_complex(
 // Test Case: Inserting missing DeallocOp in a single block.
 
 // CHECK-LABEL: func @inserting_missing_dealloc_simple
-func @inserting_missing_dealloc_simple(
+func.func @inserting_missing_dealloc_simple(
   %arg0 : memref<2xf32>,
   %arg1: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
@@ -522,7 +522,7 @@ func @inserting_missing_dealloc_simple(
 // single block.
 
 // CHECK-LABEL: func @moving_invalid_dealloc_op
-func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>) {
+func.func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
   memref.dealloc %0 : memref<2xf32>
@@ -544,7 +544,7 @@ func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>) {
 // inserted after CopyOp.
 
 // CHECK-LABEL: func @nested_regions_and_cond_branch
-func @nested_regions_and_cond_branch(
+func.func @nested_regions_and_cond_branch(
   %arg0: i1,
   %arg1: memref<2xf32>,
   %arg2: memref<2xf32>) {
@@ -589,7 +589,7 @@ func @nested_regions_and_cond_branch(
 // deallocating. It should dealloc %y after CopyOp.
 
 // CHECK-LABEL: func @memref_in_function_results
-func @memref_in_function_results(
+func.func @memref_in_function_results(
   %arg0: memref<5xf32>,
   %arg1: memref<10xf32>,
   %arg2: memref<5xf32>) -> (memref<10xf32>, memref<15xf32>) {
@@ -615,7 +615,7 @@ func @memref_in_function_results(
 // requires a dealloc.
 
 // CHECK-LABEL: func @nested_region_control_flow
-func @nested_region_control_flow(
+func.func @nested_region_control_flow(
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
   %0 = arith.cmpi eq, %arg0, %arg1 : index
@@ -645,7 +645,7 @@ func @nested_region_control_flow(
 // returned in the end.
 
 // CHECK-LABEL: func @nested_region_control_flow_div
-func @nested_region_control_flow_div(
+func.func @nested_region_control_flow_div(
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
   %0 = arith.cmpi eq, %arg0, %arg1 : index
@@ -677,7 +677,7 @@ func @nested_region_control_flow_div(
 // the method.
 
 // CHECK-LABEL: func @inner_region_control_flow
-func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> {
+func.func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> {
   %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %1 = test.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>) then {
     ^bb0(%arg1 : memref<?x?xf32>):
@@ -705,7 +705,7 @@ func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> {
 // -----
 
 // CHECK-LABEL: func @subview
-func @subview(%arg0 : index, %arg1 : index, %arg2 : memref<?x?xf32>) {
+func.func @subview(%arg0 : index, %arg1 : index, %arg2 : memref<?x?xf32>) {
   %0 = memref.alloc() : memref<64x4xf32, offset: 0, strides: [4, 1]>
   %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
     memref<64x4xf32, offset: 0, strides: [4, 1]>
@@ -727,7 +727,7 @@ func @subview(%arg0 : index, %arg1 : index, %arg2 : memref<?x?xf32>) {
 // Therefore, all allocas are not handled.
 
 // CHECK-LABEL: func @condBranchAlloca
-func @condBranchAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+func.func @condBranchAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
   cf.br ^bb3(%arg1 : memref<2xf32>)
@@ -754,7 +754,7 @@ func @condBranchAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 // dealloc.
 
 // CHECK-LABEL: func @ifElseAlloca
-func @ifElseAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+func.func @ifElseAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cf.cond_br %arg0,
@@ -782,7 +782,7 @@ func @ifElseAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 // -----
 
 // CHECK-LABEL: func @ifElseNestedAlloca
-func @ifElseNestedAlloca(
+func.func @ifElseNestedAlloca(
   %arg0: i1,
   %arg1: memref<2xf32>,
   %arg2: memref<2xf32>) {
@@ -817,7 +817,7 @@ func @ifElseNestedAlloca(
 // -----
 
 // CHECK-LABEL: func @nestedRegionsAndCondBranchAlloca
-func @nestedRegionsAndCondBranchAlloca(
+func.func @nestedRegionsAndCondBranchAlloca(
   %arg0: i1,
   %arg1: memref<2xf32>,
   %arg2: memref<2xf32>) {
@@ -857,7 +857,7 @@ func @nestedRegionsAndCondBranchAlloca(
 // -----
 
 // CHECK-LABEL: func @nestedRegionControlFlowAlloca
-func @nestedRegionControlFlowAlloca(
+func.func @nestedRegionControlFlowAlloca(
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
   %0 = arith.cmpi eq, %arg0, %arg1 : index
@@ -885,7 +885,7 @@ func @nestedRegionControlFlowAlloca(
 // memory leaks.
 
 // CHECK-LABEL: func @loop_alloc
-func @loop_alloc(
+func.func @loop_alloc(
   %lb: index,
   %ub: index,
   %step: index,
@@ -926,7 +926,7 @@ func @loop_alloc(
 // that are passed via the backedges.
 
 // CHECK-LABEL: func @loop_nested_if_no_alloc
-func @loop_nested_if_no_alloc(
+func.func @loop_nested_if_no_alloc(
   %lb: index,
   %ub: index,
   %step: index,
@@ -967,7 +967,7 @@ func @loop_nested_if_no_alloc(
 // "returning" %3.
 
 // CHECK-LABEL: func @loop_nested_if_alloc
-func @loop_nested_if_alloc(
+func.func @loop_nested_if_alloc(
   %lb: index,
   %ub: index,
   %step: index,
@@ -1017,7 +1017,7 @@ func @loop_nested_if_alloc(
 // before each yield in all loops recursively.
 
 // CHECK-LABEL: func @loop_nested_alloc
-func @loop_nested_alloc(
+func.func @loop_nested_alloc(
   %lb: index,
   %ub: index,
   %step: index,
@@ -1093,7 +1093,7 @@ func @loop_nested_alloc(
 // -----
 
 // CHECK-LABEL: func @affine_loop
-func @affine_loop() {
+func.func @affine_loop() {
   %buffer = memref.alloc() : memref<1024xf32>
   %sum_init_0 = arith.constant 0.0 : f32
   %res = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_init_0) -> f32 {
@@ -1115,7 +1115,7 @@ func @affine_loop() {
 // control-flow loop since they are not supported.
 
 // expected-error at +1 {{Only structured control-flow loops are supported}}
-func @loop_dynalloc(
+func.func @loop_dynalloc(
   %arg0 : i32,
   %arg1 : i32,
   %arg2: memref<?xf32>,
@@ -1148,7 +1148,7 @@ func @loop_dynalloc(
 // control-flow loop since they are not supported.
 
 // expected-error at +1 {{Only structured control-flow loops are supported}}
-func @do_loop_alloc(
+func.func @do_loop_alloc(
   %arg0 : i32,
   %arg1 : i32,
   %arg2: memref<2xf32>,
@@ -1176,7 +1176,7 @@ func @do_loop_alloc(
 // -----
 
 // CHECK-LABEL: func @assumingOp(
-func @assumingOp(
+func.func @assumingOp(
   %arg0: !shape.witness,
   %arg2: memref<2xf32>,
   %arg3: memref<2xf32>) {
@@ -1214,7 +1214,7 @@ func @assumingOp(
 // Test Case: The op "test.bar" does not implement the RegionBranchOpInterface.
 // This is not allowed in buffer deallocation.
 
-func @noRegionBranchOpInterface() {
+func.func @noRegionBranchOpInterface() {
 // expected-error at +1 {{All operations with attached regions need to implement the RegionBranchOpInterface.}}
   %0 = "test.bar"() ({
 // expected-error at +1 {{All operations with attached regions need to implement the RegionBranchOpInterface.}}
@@ -1235,7 +1235,7 @@ func @noRegionBranchOpInterface() {
 // CHECK-NOT: memref.dealloc %[[RES0]]
 // CHECK: memref.dealloc %[[RES1]]
 // CHECK: return %[[RES0]]
-func @dealloc_existing_clones(%arg0: memref<?x?xf64>, %arg1: memref<?x?xf64>) -> memref<?x?xf64> {
+func.func @dealloc_existing_clones(%arg0: memref<?x?xf64>, %arg1: memref<?x?xf64>) -> memref<?x?xf64> {
   %0 = bufferization.clone %arg0 : memref<?x?xf64> to memref<?x?xf64>
   %1 = bufferization.clone %arg1 : memref<?x?xf64> to memref<?x?xf64>
   return %0 : memref<?x?xf64>
@@ -1244,7 +1244,7 @@ func @dealloc_existing_clones(%arg0: memref<?x?xf64>, %arg1: memref<?x?xf64>) ->
 // -----
 
 // CHECK-LABEL: func @while_two_arg
-func @while_two_arg(%arg0: index) {
+func.func @while_two_arg(%arg0: index) {
   %a = memref.alloc(%arg0) : memref<?xf32>
 // CHECK: %[[WHILE:.*]]:2 = scf.while (%[[ARG1:.*]] = %[[ALLOC:.*]], %[[ARG2:.*]] = %[[CLONE:.*]])
   scf.while (%arg1 = %a, %arg2 = %a) : (memref<?xf32>, memref<?xf32>) -> (memref<?xf32>, memref<?xf32>) {
@@ -1271,7 +1271,7 @@ func @while_two_arg(%arg0: index) {
 
 // -----
 
-func @while_three_arg(%arg0: index) {
+func.func @while_three_arg(%arg0: index) {
 // CHECK: %[[ALLOC:.*]] = memref.alloc
   %a = memref.alloc(%arg0) : memref<?xf32>
 // CHECK-NEXT: %[[CLONE1:.*]] = bufferization.clone %[[ALLOC]]

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
index 5d70e90b75402..61e7973f4260c 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
@@ -3,7 +3,7 @@
 // CHECK-LABEL:   func @eliminate_materializations(
 // CHECK-SAME:                                     %[[ARG:.*]]: memref<f32>) -> memref<f32> {
 // CHECK:           return %[[ARG]] : memref<f32>
-func @eliminate_materializations(%arg0: memref<f32>) -> memref<f32> {
+func.func @eliminate_materializations(%arg0: memref<f32>) -> memref<f32> {
   %0 = bufferization.to_tensor %arg0 : memref<f32>
   %1 = bufferization.to_memref %0 : memref<f32>
   return %1 : memref<f32>
@@ -11,7 +11,7 @@ func @eliminate_materializations(%arg0: memref<f32>) -> memref<f32> {
 
 // -----
 
-func @unable_to_convert_lone_buffer_cast() -> memref<f32> {
+func.func @unable_to_convert_lone_buffer_cast() -> memref<f32> {
   // expected-error @+1 {{failed to legalize operation 'test.source'}}
   %0 = "test.source"() : () -> tensor<f32>
   %1 = bufferization.to_memref %0 : memref<f32>
@@ -20,7 +20,7 @@ func @unable_to_convert_lone_buffer_cast() -> memref<f32> {
 
 // -----
 
-func @unable_to_convert_lone_tensor_load(%arg0: memref<f32>) {
+func.func @unable_to_convert_lone_tensor_load(%arg0: memref<f32>) {
   %0 = bufferization.to_tensor %arg0 : memref<f32>
   // expected-error @+1 {{failed to legalize operation 'test.sink'}}
   "test.sink"(%0) : (tensor<f32>) -> ()
@@ -38,7 +38,7 @@ func @unable_to_convert_lone_tensor_load(%arg0: memref<f32>) {
 //       CHECK:   memref.copy %[[arg]], %[[alloc]]
 //       CHECK:   return %[[alloc]]
 #map1 = affine_map<(d0)[s0] -> (d0 + s0)>
-func @dyn_layout_to_no_layout_cast(%m: memref<?xf32, #map1>) -> memref<?xf32> {
+func.func @dyn_layout_to_no_layout_cast(%m: memref<?xf32, #map1>) -> memref<?xf32> {
   %0 = bufferization.to_tensor %m : memref<?xf32, #map1>
   %1 = bufferization.to_memref %0 : memref<?xf32>
   return %1 : memref<?xf32>
@@ -55,7 +55,7 @@ func @dyn_layout_to_no_layout_cast(%m: memref<?xf32, #map1>) -> memref<?xf32> {
 //       CHECK:   memref.copy %[[arg]], %[[alloc]]
 //       CHECK:   return %[[alloc]]
 #map2 = affine_map<(d0)[s0] -> (d0 * 100 + s0)>
-func @fancy_layout_to_no_layout_cast(%m: memref<?xf32, #map2>) -> memref<?xf32> {
+func.func @fancy_layout_to_no_layout_cast(%m: memref<?xf32, #map2>) -> memref<?xf32> {
   %0 = bufferization.to_tensor %m : memref<?xf32, #map2>
   %1 = bufferization.to_memref %0 : memref<?xf32>
   return %1 : memref<?xf32>
@@ -72,7 +72,7 @@ func @fancy_layout_to_no_layout_cast(%m: memref<?xf32, #map2>) -> memref<?xf32>
 //       CHECK:   memref.copy %[[arg]], %[[alloc]]
 //       CHECK:   return %[[alloc]]
 #map3 = affine_map<(d0)[s0] -> (d0 + 25)>
-func @static_layout_to_no_layout_cast(%m: memref<?xf32, #map3>) -> memref<?xf32> {
+func.func @static_layout_to_no_layout_cast(%m: memref<?xf32, #map3>) -> memref<?xf32> {
   %0 = bufferization.to_tensor %m : memref<?xf32, #map3>
   %1 = bufferization.to_memref %0 : memref<?xf32>
   return %1 : memref<?xf32>
@@ -83,7 +83,7 @@ func @static_layout_to_no_layout_cast(%m: memref<?xf32, #map3>) -> memref<?xf32>
 // TODO: to_memref with layout maps not supported yet. This should fold to a
 // memref.cast.
 #map4 = affine_map<(d0)[s0] -> (d0 + s0)>
-func @no_layout_to_dyn_layout_cast(%m: memref<?xf32>) -> memref<?xf32, #map4> {
+func.func @no_layout_to_dyn_layout_cast(%m: memref<?xf32>) -> memref<?xf32, #map4> {
   %0 = bufferization.to_tensor %m : memref<?xf32>
   // expected-error @+1 {{failed to materialize conversion for result #0 of operation 'bufferization.to_memref' that remained live after conversion}}
   %1 = bufferization.to_memref %0 : memref<?xf32, #map4>
@@ -93,7 +93,7 @@ func @no_layout_to_dyn_layout_cast(%m: memref<?xf32>) -> memref<?xf32, #map4> {
 
 // -----
 
-func @illegal_unranked_to_rank(%m: memref<*xf32>) -> memref<?xf32> {
+func.func @illegal_unranked_to_rank(%m: memref<*xf32>) -> memref<?xf32> {
   // expected-note @+1 {{prior use here}}
   %0 = bufferization.to_tensor %m : memref<*xf32>
   // expected-error @+1 {{expects 
diff erent type than prior uses: 'tensor<?xf32>' vs 'tensor<*xf32>'}}

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
index df03c75190dd7..ae097ca47b428 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
@@ -7,7 +7,7 @@
 
 // CHECK-LABEL: func @buffer_not_deallocated(
 //  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>
-func @buffer_not_deallocated(%t : tensor<?xf32>, %c : i1) -> tensor<?xf32> {
+func.func @buffer_not_deallocated(%t : tensor<?xf32>, %c : i1) -> tensor<?xf32> {
   // CHECK: %[[r:.*]] = scf.if %{{.*}} {
   %r = scf.if %c -> tensor<?xf32> {
     // CHECK: %[[some_op:.*]] = "test.some_op"

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir
index a42e1d8c055e4..06c79d450cea7 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir
@@ -10,7 +10,7 @@
 
 // CHECK-NODEALLOC-LABEL: func @out_of_place_bufferization
 // CHECK-BUFFERDEALLOC-LABEL: func @out_of_place_bufferization
-func @out_of_place_bufferization(%t1 : tensor<?xf32>) -> (f32, f32) {
+func.func @out_of_place_bufferization(%t1 : tensor<?xf32>) -> (f32, f32) {
   //     CHECK-NODEALLOC: memref.alloc
   //     CHECK-NODEALLOC: memref.copy
   // CHECK-NODEALLOC-NOT: memref.dealloc

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
index efc3038820ace..7774d96a83ee4 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
@@ -17,7 +17,7 @@
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32>
 // CHECK-NO-LAYOUT-MAP-LABEL: func @use_of_unknown_op_1(
 //  CHECK-NO-LAYOUT-MAP-SAME:     %[[t1:.*]]: tensor<?xf32>
-func @use_of_unknown_op_1(%t1: tensor<?xf32>)
+func.func @use_of_unknown_op_1(%t1: tensor<?xf32>)
     -> vector<5xf32> {
   // ToTensorOp is generated because the function is bufferized and has a
   // memref block argument.
@@ -39,7 +39,7 @@ func @use_of_unknown_op_1(%t1: tensor<?xf32>)
 
 // CHECK-LABEL: func @use_of_unknown_op_2(
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32>
-func @use_of_unknown_op_2(%t1: tensor<?xf32>) -> tensor<?xf32> {
+func.func @use_of_unknown_op_2(%t1: tensor<?xf32>) -> tensor<?xf32> {
   // CHECK: %[[dummy1:.*]] = "test.dummy_op"(%[[t1]])
   %0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
   // CHECK: %[[dummy2:.*]] = "test.another_dummy_op"(%[[dummy1]])
@@ -55,7 +55,7 @@ func @use_of_unknown_op_2(%t1: tensor<?xf32>) -> tensor<?xf32> {
 
 // CHECK-LABEL: func @use_of_unknown_op_3(
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32>
-func @use_of_unknown_op_3(%t1: tensor<?xf32>)
+func.func @use_of_unknown_op_3(%t1: tensor<?xf32>)
     -> (vector<5xf32>, vector<5xf32>) {
   %idx = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
@@ -77,7 +77,7 @@ func @use_of_unknown_op_3(%t1: tensor<?xf32>)
 
 // CHECK-LABEL: func @use_of_unknown_op_4(
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32>
-func @use_of_unknown_op_4(%t1: tensor<?xf32>)
+func.func @use_of_unknown_op_4(%t1: tensor<?xf32>)
     -> (vector<5xf32>, tensor<?xf32>) {
   %idx = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
@@ -100,7 +100,7 @@ func @use_of_unknown_op_4(%t1: tensor<?xf32>)
 
 // CHECK-LABEL: func @use_of_bufferizable_op_in_unbufferizable_op
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32>
-func @use_of_bufferizable_op_in_unbufferizable_op(
+func.func @use_of_bufferizable_op_in_unbufferizable_op(
     %t1: tensor<?xf32>, %o: index, %s: index) -> (tensor<?xf32>, tensor<?xf32>) {
   // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]]
   // CHECK: %[[subview:.*]] = memref.subview %[[m1]]
@@ -116,7 +116,7 @@ func @use_of_bufferizable_op_in_unbufferizable_op(
 
 // CHECK-LABEL: func @unused_unknown_op(
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32>
-func @unused_unknown_op(%t1 : tensor<?xf32>) -> vector<5xf32> {
+func.func @unused_unknown_op(%t1 : tensor<?xf32>) -> vector<5xf32> {
   %idx = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
 
@@ -133,7 +133,7 @@ func @unused_unknown_op(%t1 : tensor<?xf32>) -> vector<5xf32> {
 // -----
 
 // CHECK-LABEL: func @unknown_op_may_read(
-func @unknown_op_may_read(%v: vector<5xf32>)
+func.func @unknown_op_may_read(%v: vector<5xf32>)
     -> (tensor<10xf32>, tensor<10xf32>) {
   %idx = arith.constant 0 : index
   %cst = arith.constant 5.0 : f32
@@ -167,7 +167,7 @@ func @unknown_op_may_read(%v: vector<5xf32>)
 
 // CHECK-LABEL: func @unknown_op_not_writable
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32>
-func @unknown_op_not_writable(
+func.func @unknown_op_not_writable(
     %t1 : tensor<?xf32>, %v :  vector<5xf32>, %idx : index) -> tensor<?xf32> {
   // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1]])
   // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]]
@@ -189,7 +189,7 @@ func @unknown_op_not_writable(
 
 // CHECK-TENSOR-LABEL: func @simple_tensor_test(
 //  CHECK-TENSOR-SAME:     %[[t1:.*]]: tensor<?xf32>
-func @simple_tensor_test(%t1 : tensor<?xf32>, %f : f32) -> tensor<?xf32> {
+func.func @simple_tensor_test(%t1 : tensor<?xf32>, %f : f32) -> tensor<?xf32> {
   // CHECK-TENSOR: %[[t1_memref:.*]] = bufferization.to_memref %[[t1]]
   %c0 = arith.constant 0 : index
   // CHECK-TENSOR: %[[alloc:.*]] = memref.alloc
@@ -205,7 +205,7 @@ func @simple_tensor_test(%t1 : tensor<?xf32>, %f : f32) -> tensor<?xf32> {
 
 // CHECK-SCF-LABEL: func @simple_scf_if(
 //  CHECK-SCF-SAME:     %[[t1:.*]]: tensor<?xf32> {linalg.inplaceable = true}, %[[c:.*]]: i1, %[[pos:.*]]: index
-func @simple_scf_if(%t1: tensor<?xf32> {linalg.inplaceable = true}, %c: i1, %pos: index, %f: f32)
+func.func @simple_scf_if(%t1: tensor<?xf32> {linalg.inplaceable = true}, %c: i1, %pos: index, %f: f32)
     -> (tensor<?xf32>, index) {
   // CHECK-SCF: %[[r:.*]] = scf.if %[[c]] -> (memref<?xf32, #{{.*}}>) {
   %r1, %r2 = scf.if %c -> (tensor<?xf32>, index) {

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
index 3d8d09460484a..f9ea4dce5294d 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
@@ -7,7 +7,7 @@
 
 // CHECK-LABEL: func @use_tensor_func_arg(
 //  CHECK-SAME:     %[[A:.*]]: tensor<?xf32>
-func @use_tensor_func_arg(%A : tensor<?xf32>) -> (vector<4xf32>) {
+func.func @use_tensor_func_arg(%A : tensor<?xf32>) -> (vector<4xf32>) {
   %c0 = arith.constant 0 : index
   %f0 = arith.constant 0.0 : f32
 
@@ -23,7 +23,7 @@ func @use_tensor_func_arg(%A : tensor<?xf32>) -> (vector<4xf32>) {
 
 // CHECK-LABEL: func @return_tensor(
 //  CHECK-SAME:     %[[A:.*]]: tensor<?xf32>
-func @return_tensor(%A : tensor<?xf32>, %v : vector<4xf32>) -> (tensor<?xf32>) {
+func.func @return_tensor(%A : tensor<?xf32>, %v : vector<4xf32>) -> (tensor<?xf32>) {
   %c0 = arith.constant 0 : index
 
   // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]]
@@ -42,7 +42,7 @@ func @return_tensor(%A : tensor<?xf32>, %v : vector<4xf32>) -> (tensor<?xf32>) {
 // -----
 
 // CHECK-LABEL: func @func_without_tensor_args
-func @func_without_tensor_args(%v : vector<10xf32>) -> () {
+func.func @func_without_tensor_args(%v : vector<10xf32>) -> () {
   // CHECK: %[[alloc:.*]] = memref.alloc()
   %0 = linalg.init_tensor[10] : tensor<10xf32>
 
@@ -61,17 +61,17 @@ func @func_without_tensor_args(%v : vector<10xf32>) -> () {
 // -----
 
 // CHECK-LABEL: func private @private_func
-func private @private_func(tensor<?xf32>) -> ()
+func.func private @private_func(tensor<?xf32>) -> ()
 
 // CHECK-LABEL: func @empty_func()
-func @empty_func() -> () {
+func.func @empty_func() -> () {
   return
 }
 
 // -----
 
 // CHECK-LABEL: func @read_after_write_conflict(
-func @read_after_write_conflict(%cst : f32, %idx : index, %idx2 : index)
+func.func @read_after_write_conflict(%cst : f32, %idx : index, %idx2 : index)
     -> (f32, f32) {
   // CHECK-DAG: %[[alloc:.*]] = memref.alloc
   // CHECK-DAG: %[[dummy:.*]] = "test.dummy_op"
@@ -95,7 +95,7 @@ func @read_after_write_conflict(%cst : f32, %idx : index, %idx2 : index)
 // -----
 
 // CHECK-LABEL: func @copy_deallocated(
-func @copy_deallocated() -> tensor<10xf32> {
+func.func @copy_deallocated() -> tensor<10xf32> {
   // CHECK: %[[alloc:.*]] = memref.alloc()
   %0 = linalg.init_tensor[10] : tensor<10xf32>
   // CHECK: %[[alloc_tensor:.*]] = bufferization.to_tensor %[[alloc]]
@@ -108,7 +108,7 @@ func @copy_deallocated() -> tensor<10xf32> {
 
 // CHECK-LABEL: func @select_
diff erent_tensors(
 //  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>
-func @select_
diff erent_tensors(%t: tensor<?xf32>, %sz: index, %c: i1) -> tensor<?xf32> {
+func.func @select_
diff erent_tensors(%t: tensor<?xf32>, %sz: index, %c: i1) -> tensor<?xf32> {
   // CHECK-DAG: %[[m:.*]] = bufferization.to_memref %[[t]] : memref<?xf32, #{{.*}}>
   // CHECK-DAG: %[[alloc:.*]] = memref.alloc(%{{.*}}) {{.*}} : memref<?xf32>
   %0 = linalg.init_tensor [%sz] : tensor<?xf32>

diff  --git a/mlir/test/Dialect/Bufferization/canonicalize.mlir b/mlir/test/Dialect/Bufferization/canonicalize.mlir
index 7a4468ea767ce..89d4bb5988ae2 100644
--- a/mlir/test/Dialect/Bufferization/canonicalize.mlir
+++ b/mlir/test/Dialect/Bufferization/canonicalize.mlir
@@ -4,7 +4,7 @@
 
 // Basic folding of to_tensor(to_memref(t)) -> t
 // CHECK-LABEL: func @tensor_load_of_buffer_cast(
-func @tensor_load_of_buffer_cast(%arg0: tensor<?xf32>) -> tensor<?xf32> {
+func.func @tensor_load_of_buffer_cast(%arg0: tensor<?xf32>) -> tensor<?xf32> {
   %0 = bufferization.to_memref %arg0 : memref<?xf32>
   %1 = bufferization.to_tensor %0 : memref<?xf32>
   return %1 : tensor<?xf32>
@@ -16,7 +16,7 @@ func @tensor_load_of_buffer_cast(%arg0: tensor<?xf32>) -> tensor<?xf32> {
 
 // Basic folding of to_memref(to_tensor(m)) -> m
 // CHECK-LABEL: func @buffer_cast_of_tensor_load(
-func @buffer_cast_of_tensor_load(%arg0: memref<?xf32>) -> memref<?xf32> {
+func.func @buffer_cast_of_tensor_load(%arg0: memref<?xf32>) -> memref<?xf32> {
   %0 = bufferization.to_tensor %arg0 : memref<?xf32>
   %1 = bufferization.to_memref %0 : memref<?xf32>
   return %1 : memref<?xf32>
@@ -37,7 +37,7 @@ func @buffer_cast_of_tensor_load(%arg0: memref<?xf32>) -> memref<?xf32> {
 //       CHECK: %[[MEMREF_ADDRSPACE7:.*]] = bufferization.to_memref
 //  CHECK-SAME:   %[[TENSOR]] : memref<?xf32, 7>
 //       CHECK: return %[[MEMREF_ADDRSPACE7]]
-func @no_fold_buffer_cast_of_tensor_load(%arg0: memref<?xf32, 2>)
+func.func @no_fold_buffer_cast_of_tensor_load(%arg0: memref<?xf32, 2>)
     -> memref<?xf32, 7> {
   %0 = bufferization.to_tensor %arg0 : memref<?xf32, 2>
   %1 = bufferization.to_memref %0 : memref<?xf32, 7>
@@ -59,7 +59,7 @@ func @no_fold_buffer_cast_of_tensor_load(%arg0: memref<?xf32, 2>)
 //       CHECK: %[[R:.*]] = memref.cast %[[M]]
 //  CHECK-SAME:   memref<?xf32, #[[$OFF_3]]> to memref<?xf32, #[[$OFF_UNK]]>
 //       CHECK: return %[[R]]
-func @canonicalize_buffer_cast_of_tensor_load(
+func.func @canonicalize_buffer_cast_of_tensor_load(
   %arg0: memref<?xf32, offset: 3, strides: [1]>)
   -> memref<?xf32, offset: ?, strides: [1]>
 {
@@ -76,7 +76,7 @@ func @canonicalize_buffer_cast_of_tensor_load(
 // If the memrefs are potentially cast-compatible, canonicalize to
 //            copy.
 // CHECK-LABEL: func @canonicalize_buffer_cast_of_tensor_load_to_copy(
-func @canonicalize_buffer_cast_of_tensor_load_to_copy(
+func.func @canonicalize_buffer_cast_of_tensor_load_to_copy(
   %arg0: memref<?xf32, offset: ?, strides: [1]>)
   -> memref<?xf32, offset: 3, strides: [1]> {
   %0 = bufferization.to_tensor %arg0 : memref<?xf32, offset: ?, strides: [1]>
@@ -103,7 +103,7 @@ func @canonicalize_buffer_cast_of_tensor_load_to_copy(
 //       CHECK:   %[[C0:.*]] = arith.constant 0
 //       CHECK:   %[[D:.*]] = memref.dim %[[MEMREF]], %[[C0]]
 //       CHECK:   return %[[D]] : index
-func @dim_of_tensor_load(%arg0: memref<?xf32>) -> index {
+func.func @dim_of_tensor_load(%arg0: memref<?xf32>) -> index {
   %c0 = arith.constant 0 : index
   %0 = bufferization.to_tensor %arg0 : memref<?xf32>
   %1 = tensor.dim %0, %c0 : tensor<?xf32>
@@ -113,7 +113,7 @@ func @dim_of_tensor_load(%arg0: memref<?xf32>) -> index {
 // -----
 
 // CHECK-LABEL: @clone_before_dealloc
-func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
+func.func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
   %0 = bufferization.clone %arg0 : memref<?xf32> to memref<?xf32>
   memref.dealloc %arg0 : memref<?xf32>
   return %0 : memref<?xf32>
@@ -124,7 +124,7 @@ func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
 // -----
 
 // CHECK-LABEL: @clone_before_dealloc
-func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
+func.func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
   %0 = bufferization.clone %arg0 : memref<?xf32> to memref<?xf32>
   "use"(%0) : (memref<?xf32>) -> ()
   memref.dealloc %0 : memref<?xf32>
@@ -137,7 +137,7 @@ func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
 // -----
 
 // CHECK-LABEL: @clone_after_cast
-func @clone_after_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
+func.func @clone_after_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
   %0 = memref.cast %arg0 : memref<?xf32> to memref<32xf32>
   %1 = bufferization.clone %0 : memref<32xf32> to memref<32xf32>
   return %1 : memref<32xf32>
@@ -149,7 +149,7 @@ func @clone_after_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
 // -----
 
 // CHECK-LABEL: @clone_and_cast
-func @clone_and_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
+func.func @clone_and_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
   %0 = bufferization.clone %arg0 : memref<?xf32> to memref<32xf32>
   memref.dealloc %arg0 : memref<?xf32>
   return %0 : memref<32xf32>
@@ -162,7 +162,7 @@ func @clone_and_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
 // -----
 
 // CHECK-LABEL: @alias_is_freed
-func @alias_is_freed(%arg0 : memref<?xf32>) {
+func.func @alias_is_freed(%arg0 : memref<?xf32>) {
   %0 = memref.cast %arg0 : memref<?xf32> to memref<32xf32>
   %1 = bufferization.clone %0 : memref<32xf32> to memref<32xf32>
   memref.dealloc %arg0 : memref<?xf32>
@@ -178,7 +178,7 @@ func @alias_is_freed(%arg0 : memref<?xf32>) {
 
 // Verify SimplifyClones skips clones with multiple deallocations.
 // CHECK-LABEL: @clone_multiple_dealloc_of_source
-func @clone_multiple_dealloc_of_source(%arg0: memref<?xf32>) -> memref<?xf32> {
+func.func @clone_multiple_dealloc_of_source(%arg0: memref<?xf32>) -> memref<?xf32> {
   %0 = bufferization.clone %arg0 : memref<?xf32> to memref<?xf32>
   "if_else"() ({
     memref.dealloc %arg0 : memref<?xf32>
@@ -197,7 +197,7 @@ func @clone_multiple_dealloc_of_source(%arg0: memref<?xf32>) -> memref<?xf32> {
 
 // CHECK-LABEL: @clone_multiple_dealloc_of_clone
 // CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
-func @clone_multiple_dealloc_of_clone(%arg0: memref<?xf32>) -> memref<?xf32> {
+func.func @clone_multiple_dealloc_of_clone(%arg0: memref<?xf32>) -> memref<?xf32> {
   // CHECK-NEXT: %[[CLONE:.*]] = bufferization.clone %[[ARG]]
   // CHECK: memref.dealloc %[[CLONE]]
   // CHECK: memref.dealloc %[[CLONE]]
@@ -217,7 +217,7 @@ func @clone_multiple_dealloc_of_clone(%arg0: memref<?xf32>) -> memref<?xf32> {
 
 // CHECK-LABEL: func @tensor_cast_to_memref
 //  CHECK-SAME:   %[[ARG0:.+]]: tensor<4x6x16x32xi8>
-func @tensor_cast_to_memref(%arg0 : tensor<4x6x16x32xi8>) ->
+func.func @tensor_cast_to_memref(%arg0 : tensor<4x6x16x32xi8>) ->
   memref<?x?x16x32xi8> {
   %0 = tensor.cast %arg0 : tensor<4x6x16x32xi8> to tensor<?x?x16x32xi8>
   %1 = bufferization.to_memref %0 : memref<?x?x16x32xi8>
@@ -232,7 +232,7 @@ func @tensor_cast_to_memref(%arg0 : tensor<4x6x16x32xi8>) ->
 
 // Folding of memref.load(to_memref(%v, %idxs)) -> tensor.extract(%v, %idx)
 // CHECK-LABEL: func @load_from_buffer_cast(
-func @load_from_buffer_cast(%arg0: index, %arg1: index,
+func.func @load_from_buffer_cast(%arg0: index, %arg1: index,
                             %arg2: tensor<?x?xf32>) -> f32 {
   %0 = bufferization.to_memref %arg2 : memref<?x?xf32>
   %1 = memref.load %0[%arg0, %arg1] : memref<?x?xf32>

diff  --git a/mlir/test/Dialect/Bufferization/inlining.mlir b/mlir/test/Dialect/Bufferization/inlining.mlir
index 6974f01420321..62efb3801c871 100644
--- a/mlir/test/Dialect/Bufferization/inlining.mlir
+++ b/mlir/test/Dialect/Bufferization/inlining.mlir
@@ -5,12 +5,12 @@
 // CHECK-NOT: call
 // CHECK: %[[RES:.*]] = bufferization.clone %[[ARG]]
 // CHECK: return %[[RES]]
-func @test_inline(%buf : memref<*xf32>) -> memref<*xf32> {
+func.func @test_inline(%buf : memref<*xf32>) -> memref<*xf32> {
   %0 = call @inner_func(%buf) : (memref<*xf32>) -> memref<*xf32>
   return %0 : memref<*xf32>
 }
 
-func @inner_func(%buf : memref<*xf32>) -> memref<*xf32> {
+func.func @inner_func(%buf : memref<*xf32>) -> memref<*xf32> {
   %clone = bufferization.clone %buf : memref<*xf32> to memref<*xf32>
   return %clone : memref<*xf32>
 }

diff  --git a/mlir/test/Dialect/Bufferization/ops.mlir b/mlir/test/Dialect/Bufferization/ops.mlir
index b70bce1b8b2d8..23ec897df9d45 100644
--- a/mlir/test/Dialect/Bufferization/ops.mlir
+++ b/mlir/test/Dialect/Bufferization/ops.mlir
@@ -2,13 +2,13 @@
 // RUN: mlir-opt %s --mlir-print-op-generic | mlir-opt | FileCheck %s
 
 // CHECK-LABEL: func @test_clone
-func @test_clone(%buf : memref<*xf32>) -> memref<*xf32> {
+func.func @test_clone(%buf : memref<*xf32>) -> memref<*xf32> {
   %clone = bufferization.clone %buf : memref<*xf32> to memref<*xf32>
   return %clone : memref<*xf32>
 }
 
 // CHECK-LABEL: test_to_memref
-func @test_to_memref(%arg0: tensor<?xi64>, %arg1: tensor<*xi64>)
+func.func @test_to_memref(%arg0: tensor<?xi64>, %arg1: tensor<*xi64>)
     -> (memref<?xi64, affine_map<(d0) -> (d0 + 7)>>, memref<*xi64, 1>) {
   %0 = bufferization.to_memref %arg0
     : memref<?xi64, affine_map<(d0) -> (d0 + 7)>>
@@ -18,7 +18,7 @@ func @test_to_memref(%arg0: tensor<?xi64>, %arg1: tensor<*xi64>)
 }
 
 // CHECK-LABEL: func @test_to_tensor
-func @test_to_tensor(%buf : memref<2xf32>) -> tensor<2xf32> {
+func.func @test_to_tensor(%buf : memref<2xf32>) -> tensor<2xf32> {
   %tensor = bufferization.to_tensor %buf : memref<2xf32>
   return %tensor : tensor<2xf32>
 }

diff  --git a/mlir/test/Dialect/Builtin/canonicalize.mlir b/mlir/test/Dialect/Builtin/canonicalize.mlir
index 57a79d02138c6..6e29429b9d501 100644
--- a/mlir/test/Dialect/Builtin/canonicalize.mlir
+++ b/mlir/test/Dialect/Builtin/canonicalize.mlir
@@ -7,7 +7,7 @@
 // Test folding conversion casts feeding into other casts.
 // CHECK-LABEL: func @multiple_conversion_casts
 // CHECK-SAME: %[[ARG0:.*]]: i32, %[[ARG1:.*]]:
-func @multiple_conversion_casts(%arg0: i32, %arg1: i32) -> (i32, i32) {
+func.func @multiple_conversion_casts(%arg0: i32, %arg1: i32) -> (i32, i32) {
   // CHECK-NOT: unrealized_conversion_cast
   // CHECK: return %[[ARG0]], %[[ARG1]]
   %inputs:2 = builtin.unrealized_conversion_cast %arg0, %arg1 : i32, i32 to i64, i64
@@ -16,7 +16,7 @@ func @multiple_conversion_casts(%arg0: i32, %arg1: i32) -> (i32, i32) {
 }
 
 // CHECK-LABEL: func @multiple_conversion_casts
-func @multiple_conversion_casts_failure(%arg0: i32, %arg1: i32, %arg2: i64) -> (i32, i32) {
+func.func @multiple_conversion_casts_failure(%arg0: i32, %arg1: i32, %arg2: i64) -> (i32, i32) {
   // CHECK: unrealized_conversion_cast
   // CHECK: unrealized_conversion_cast
   %inputs:2 = builtin.unrealized_conversion_cast %arg0, %arg1 : i32, i32 to i64, i64

diff  --git a/mlir/test/Dialect/Builtin/invalid.mlir b/mlir/test/Dialect/Builtin/invalid.mlir
index f0177c4e6ee50..79c8b8337af9d 100644
--- a/mlir/test/Dialect/Builtin/invalid.mlir
+++ b/mlir/test/Dialect/Builtin/invalid.mlir
@@ -14,6 +14,6 @@
 //===----------------------------------------------------------------------===//
 
 // expected-error at +1 {{missing ']' closing set of scalable dimensions}}
-func @scalable_vector_arg(%arg0: vector<[4xf32>) { }
+func.func @scalable_vector_arg(%arg0: vector<[4xf32>) { }
 
 // -----

diff  --git a/mlir/test/Dialect/Complex/canonicalize.mlir b/mlir/test/Dialect/Complex/canonicalize.mlir
index c68d87e8c0773..2d492a223d4c7 100644
--- a/mlir/test/Dialect/Complex/canonicalize.mlir
+++ b/mlir/test/Dialect/Complex/canonicalize.mlir
@@ -2,7 +2,7 @@
 
 // CHECK-LABEL: func @create_of_real_and_imag
 // CHECK-SAME: (%[[CPLX:.*]]: complex<f32>)
-func @create_of_real_and_imag(%cplx: complex<f32>) -> complex<f32> {
+func.func @create_of_real_and_imag(%cplx: complex<f32>) -> complex<f32> {
   // CHECK-NEXT: return %[[CPLX]] : complex<f32>
   %real = complex.re %cplx : complex<f32>
   %imag = complex.im %cplx : complex<f32>
@@ -12,7 +12,7 @@ func @create_of_real_and_imag(%cplx: complex<f32>) -> complex<f32> {
 
 // CHECK-LABEL: func @create_of_real_and_imag_
diff erent_operand
 // CHECK-SAME: (%[[CPLX:.*]]: complex<f32>, %[[CPLX2:.*]]: complex<f32>)
-func @create_of_real_and_imag_
diff erent_operand(
+func.func @create_of_real_and_imag_
diff erent_operand(
     %cplx: complex<f32>, %cplx2 : complex<f32>) -> complex<f32> {
   // CHECK-NEXT: %[[REAL:.*]] = complex.re %[[CPLX]] : complex<f32>
   // CHECK-NEXT: %[[IMAG:.*]] = complex.im %[[CPLX2]] : complex<f32>
@@ -24,7 +24,7 @@ func @create_of_real_and_imag_
diff erent_operand(
 }
 
 // CHECK-LABEL: func @real_of_const(
-func @real_of_const() -> f32 {
+func.func @real_of_const() -> f32 {
   // CHECK: %[[CST:.*]] = arith.constant 1.000000e+00 : f32
   // CHECK-NEXT: return %[[CST]] : f32
   %complex = complex.constant [1.0 : f32, 0.0 : f32] : complex<f32>
@@ -33,7 +33,7 @@ func @real_of_const() -> f32 {
 }
 
 // CHECK-LABEL: func @real_of_create_op(
-func @real_of_create_op() -> f32 {
+func.func @real_of_create_op() -> f32 {
   // CHECK: %[[CST:.*]] = arith.constant 1.000000e+00 : f32
   // CHECK-NEXT: return %[[CST]] : f32
   %real = arith.constant 1.0 : f32
@@ -44,7 +44,7 @@ func @real_of_create_op() -> f32 {
 }
 
 // CHECK-LABEL: func @imag_of_const(
-func @imag_of_const() -> f32 {
+func.func @imag_of_const() -> f32 {
   // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
   // CHECK-NEXT: return %[[CST]] : f32
   %complex = complex.constant [1.0 : f32, 0.0 : f32] : complex<f32>
@@ -53,7 +53,7 @@ func @imag_of_const() -> f32 {
 }
 
 // CHECK-LABEL: func @imag_of_create_op(
-func @imag_of_create_op() -> f32 {
+func.func @imag_of_create_op() -> f32 {
   // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
   // CHECK-NEXT: return %[[CST]] : f32
   %real = arith.constant 1.0 : f32

diff  --git a/mlir/test/Dialect/Complex/invalid.mlir b/mlir/test/Dialect/Complex/invalid.mlir
index ec046effacf8c..591ebe79f7b77 100644
--- a/mlir/test/Dialect/Complex/invalid.mlir
+++ b/mlir/test/Dialect/Complex/invalid.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt -split-input-file %s -verify-diagnostics
 
-func @complex_constant_wrong_array_attribute_length() {
+func.func @complex_constant_wrong_array_attribute_length() {
   // expected-error @+1 {{requires 'value' to be a complex constant, represented as array of two values}}
   %0 = complex.constant [1.0 : f32] : complex<f32>
   return
@@ -8,7 +8,7 @@ func @complex_constant_wrong_array_attribute_length() {
 
 // -----
 
-func @complex_constant_wrong_element_types() {
+func.func @complex_constant_wrong_element_types() {
   // expected-error @+1 {{requires attribute's element types ('f32', 'f32') to match the element type of the op's return type ('f64')}}
   %0 = complex.constant [1.0 : f32, -1.0 : f32] : complex<f64>
   return
@@ -16,7 +16,7 @@ func @complex_constant_wrong_element_types() {
 
 // -----
 
-func @complex_constant_two_
diff erent_element_types() {
+func.func @complex_constant_two_
diff erent_element_types() {
   // expected-error @+1 {{requires attribute's element types ('f32', 'f64') to match the element type of the op's return type ('f64')}}
   %0 = complex.constant [1.0 : f32, -1.0 : f64] : complex<f64>
   return

diff  --git a/mlir/test/Dialect/Complex/ops.mlir b/mlir/test/Dialect/Complex/ops.mlir
index 75bb082efb2ab..a24d780d05687 100644
--- a/mlir/test/Dialect/Complex/ops.mlir
+++ b/mlir/test/Dialect/Complex/ops.mlir
@@ -4,7 +4,7 @@
 
 // CHECK-LABEL: func @ops(
 // CHECK-SAME:            %[[F:.*]]: f32) {
-func @ops(%f: f32) {
+func.func @ops(%f: f32) {
   // CHECK: complex.constant [1.{{.*}}, -1.{{.*}}] : complex<f64>
   %cst_f64 = complex.constant [0.1, -1.0] : complex<f64>
 

diff  --git a/mlir/test/Dialect/ControlFlow/canonicalize.mlir b/mlir/test/Dialect/ControlFlow/canonicalize.mlir
index 2e6a6b8e4a9eb..9ad790af8f499 100644
--- a/mlir/test/Dialect/ControlFlow/canonicalize.mlir
+++ b/mlir/test/Dialect/ControlFlow/canonicalize.mlir
@@ -3,7 +3,7 @@
 /// Test the folding of BranchOp.
 
 // CHECK-LABEL: func @br_folding(
-func @br_folding() -> i32 {
+func.func @br_folding() -> i32 {
   // CHECK-NEXT: %[[CST:.*]] = arith.constant 0 : i32
   // CHECK-NEXT: return %[[CST]] : i32
   %c0_i32 = arith.constant 0 : i32
@@ -16,7 +16,7 @@ func @br_folding() -> i32 {
 
 // CHECK-LABEL: func @br_passthrough(
 // CHECK-SAME: %[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32
-func @br_passthrough(%arg0 : i32, %arg1 : i32) -> (i32, i32) {
+func.func @br_passthrough(%arg0 : i32, %arg1 : i32) -> (i32, i32) {
   "foo.switch"() [^bb1, ^bb2, ^bb3] : () -> ()
 
 ^bb1:
@@ -35,7 +35,7 @@ func @br_passthrough(%arg0 : i32, %arg1 : i32) -> (i32, i32) {
 /// Test the folding of CondBranchOp with a constant condition.
 
 // CHECK-LABEL: func @cond_br_folding(
-func @cond_br_folding(%cond : i1, %a : i32) {
+func.func @cond_br_folding(%cond : i1, %a : i32) {
   // CHECK-NEXT: return
 
   %false_cond = arith.constant false
@@ -55,7 +55,7 @@ func @cond_br_folding(%cond : i1, %a : i32) {
 /// Test the folding of CondBranchOp when the successors are identical.
 
 // CHECK-LABEL: func @cond_br_same_successor(
-func @cond_br_same_successor(%cond : i1, %a : i32) {
+func.func @cond_br_same_successor(%cond : i1, %a : i32) {
   // CHECK-NEXT: return
 
   cf.cond_br %cond, ^bb1(%a : i32), ^bb1(%a : i32)
@@ -70,7 +70,7 @@ func @cond_br_same_successor(%cond : i1, %a : i32) {
 // CHECK-LABEL: func @cond_br_same_successor_insert_select(
 // CHECK-SAME: %[[COND:.*]]: i1, %[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32
 // CHECK-SAME: %[[ARG2:.*]]: tensor<2xi32>, %[[ARG3:.*]]: tensor<2xi32>
-func @cond_br_same_successor_insert_select(
+func.func @cond_br_same_successor_insert_select(
       %cond : i1, %a : i32, %b : i32, %c : tensor<2xi32>, %d : tensor<2xi32>
     ) -> (i32, tensor<2xi32>)  {
   // CHECK: %[[RES:.*]] = arith.select %[[COND]], %[[ARG0]], %[[ARG1]]
@@ -86,7 +86,7 @@ func @cond_br_same_successor_insert_select(
 /// Test the compound folding of BranchOp and CondBranchOp.
 
 // CHECK-LABEL: func @cond_br_and_br_folding(
-func @cond_br_and_br_folding(%a : i32) {
+func.func @cond_br_and_br_folding(%a : i32) {
   // CHECK-NEXT: return
 
   %false_cond = arith.constant false
@@ -104,7 +104,7 @@ func @cond_br_and_br_folding(%a : i32) {
 
 // CHECK-LABEL: func @cond_br_passthrough(
 // CHECK-SAME: %[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32, %[[COND:.*]]: i1
-func @cond_br_passthrough(%arg0 : i32, %arg1 : i32, %arg2 : i32, %cond : i1) -> (i32, i32) {
+func.func @cond_br_passthrough(%arg0 : i32, %arg1 : i32, %arg2 : i32, %cond : i1) -> (i32, i32) {
   // CHECK: %[[RES:.*]] = arith.select %[[COND]], %[[ARG0]], %[[ARG2]]
   // CHECK: %[[RES2:.*]] = arith.select %[[COND]], %[[ARG1]], %[[ARG2]]
   // CHECK: return %[[RES]], %[[RES2]]
@@ -121,7 +121,7 @@ func @cond_br_passthrough(%arg0 : i32, %arg1 : i32, %arg2 : i32, %cond : i1) ->
 /// Test the failure modes of collapsing CondBranchOp pass-throughs successors.
 
 // CHECK-LABEL: func @cond_br_pass_through_fail(
-func @cond_br_pass_through_fail(%cond : i1) {
+func.func @cond_br_pass_through_fail(%cond : i1) {
   // CHECK: cf.cond_br %{{.*}}, ^bb1, ^bb2
 
   cf.cond_br %cond, ^bb1, ^bb2
@@ -145,7 +145,7 @@ func @cond_br_pass_through_fail(%cond : i1) {
 // CHECK-LABEL: func @switch_only_default(
 // CHECK-SAME: %[[FLAG:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_0:[a-zA-Z0-9_]+]]
-func @switch_only_default(%flag : i32, %caseOperand0 : f32) {
+func.func @switch_only_default(%flag : i32, %caseOperand0 : f32) {
   // add predecessors for all blocks to avoid other canonicalizations.
   "foo.pred"() [^bb1, ^bb2] : () -> ()
   ^bb1:
@@ -165,7 +165,7 @@ func @switch_only_default(%flag : i32, %caseOperand0 : f32) {
 // CHECK-SAME: %[[FLAG:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_0:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_1:[a-zA-Z0-9_]+]]
-func @switch_case_matching_default(%flag : i32, %caseOperand0 : f32, %caseOperand1 : f32) {
+func.func @switch_case_matching_default(%flag : i32, %caseOperand0 : f32, %caseOperand1 : f32) {
   // add predecessors for all blocks to avoid other canonicalizations.
   "foo.pred"() [^bb1, ^bb2, ^bb3] : () -> ()
   ^bb1:
@@ -190,7 +190,7 @@ func @switch_case_matching_default(%flag : i32, %caseOperand0 : f32, %caseOperan
 // CHECK-SAME: %[[CASE_OPERAND_0:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_1:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_2:[a-zA-Z0-9_]+]]
-func @switch_on_const_no_match(%caseOperand0 : f32, %caseOperand1 : f32, %caseOperand2 : f32) {
+func.func @switch_on_const_no_match(%caseOperand0 : f32, %caseOperand1 : f32, %caseOperand2 : f32) {
   // add predecessors for all blocks to avoid other canonicalizations.
   "foo.pred"() [^bb1, ^bb2, ^bb3, ^bb4] : () -> ()
   ^bb1:
@@ -216,7 +216,7 @@ func @switch_on_const_no_match(%caseOperand0 : f32, %caseOperand1 : f32, %caseOp
 // CHECK-SAME: %[[CASE_OPERAND_0:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_1:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_2:[a-zA-Z0-9_]+]]
-func @switch_on_const_with_match(%caseOperand0 : f32, %caseOperand1 : f32, %caseOperand2 : f32) {
+func.func @switch_on_const_with_match(%caseOperand0 : f32, %caseOperand1 : f32, %caseOperand2 : f32) {
   // add predecessors for all blocks to avoid other canonicalizations.
   "foo.pred"() [^bb1, ^bb2, ^bb3, ^bb4] : () -> ()
   ^bb1:
@@ -244,7 +244,7 @@ func @switch_on_const_with_match(%caseOperand0 : f32, %caseOperand1 : f32, %case
 // CHECK-SAME: %[[CASE_OPERAND_1:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_2:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_3:[a-zA-Z0-9_]+]]
-func @switch_passthrough(%flag : i32,
+func.func @switch_passthrough(%flag : i32,
                          %caseOperand0 : f32,
                          %caseOperand1 : f32,
                          %caseOperand2 : f32,
@@ -285,7 +285,7 @@ func @switch_passthrough(%flag : i32,
 // CHECK-SAME: %[[FLAG:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_0:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_1:[a-zA-Z0-9_]+]]
-func @switch_from_switch_with_same_value_with_match(%flag : i32, %caseOperand0 : f32, %caseOperand1 : f32) {
+func.func @switch_from_switch_with_same_value_with_match(%flag : i32, %caseOperand0 : f32, %caseOperand1 : f32) {
   // add predecessors for all blocks except ^bb3 to avoid other canonicalizations.
   "foo.pred"() [^bb1, ^bb2, ^bb4, ^bb5] : () -> ()
 
@@ -322,7 +322,7 @@ func @switch_from_switch_with_same_value_with_match(%flag : i32, %caseOperand0 :
 // CHECK-SAME: %[[CASE_OPERAND_0:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_1:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_2:[a-zA-Z0-9_]+]]
-func @switch_from_switch_with_same_value_no_match(%flag : i32, %caseOperand0 : f32, %caseOperand1 : f32, %caseOperand2 : f32) {
+func.func @switch_from_switch_with_same_value_no_match(%flag : i32, %caseOperand0 : f32, %caseOperand1 : f32, %caseOperand2 : f32) {
   // add predecessors for all blocks except ^bb3 to avoid other canonicalizations.
   "foo.pred"() [^bb1, ^bb2, ^bb4, ^bb5, ^bb6] : () -> ()
 
@@ -362,7 +362,7 @@ func @switch_from_switch_with_same_value_no_match(%flag : i32, %caseOperand0 : f
 // CHECK-SAME: %[[CASE_OPERAND_0:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_1:[a-zA-Z0-9_]+]]
 // CHECK-SAME: %[[CASE_OPERAND_2:[a-zA-Z0-9_]+]]
-func @switch_from_switch_default_with_same_value(%flag : i32, %caseOperand0 : f32, %caseOperand1 : f32, %caseOperand2 : f32) {
+func.func @switch_from_switch_default_with_same_value(%flag : i32, %caseOperand0 : f32, %caseOperand1 : f32, %caseOperand2 : f32) {
   // add predecessors for all blocks except ^bb3 to avoid other canonicalizations.
   "foo.pred"() [^bb1, ^bb2, ^bb4, ^bb5, ^bb6] : () -> ()
 
@@ -405,7 +405,7 @@ func @switch_from_switch_default_with_same_value(%flag : i32, %caseOperand0 : f3
 /// branches with the same condition.
 
 // CHECK-LABEL: func @cond_br_from_cond_br_with_same_condition
-func @cond_br_from_cond_br_with_same_condition(%cond : i1) {
+func.func @cond_br_from_cond_br_with_same_condition(%cond : i1) {
   // CHECK:   cf.cond_br %{{.*}}, ^bb1, ^bb2
   // CHECK: ^bb1:
   // CHECK:   return
@@ -426,7 +426,7 @@ func @cond_br_from_cond_br_with_same_condition(%cond : i1) {
 
 // Erase assertion if condition is known to be true at compile time.
 // CHECK-LABEL: @assert_true
-func @assert_true() {
+func.func @assert_true() {
   // CHECK-NOT: cf.assert
   %true = arith.constant true
   cf.assert %true, "Computer says no"
@@ -438,7 +438,7 @@ func @assert_true() {
 // Keep assertion if condition unknown at compile time.
 // CHECK-LABEL: @cf.assert
 // CHECK-SAME:  (%[[ARG:.*]]: i1)
-func @cf.assert(%arg : i1) {
+func.func @cf.assert(%arg : i1) {
   // CHECK: cf.assert %[[ARG]], "Computer says no"
   cf.assert %arg, "Computer says no"
   return
@@ -451,7 +451,7 @@ func @cf.assert(%arg : i1) {
 //       CHECK:       %[[falseval:.+]] = arith.constant false
 //       CHECK:       "test.consumer1"(%[[trueval]]) : (i1) -> ()
 //       CHECK:       "test.consumer2"(%[[falseval]]) : (i1) -> ()
-func @branchCondProp(%arg0: i1) {
+func.func @branchCondProp(%arg0: i1) {
   cf.cond_br %arg0, ^trueB, ^falseB
 
 ^trueB:

diff  --git a/mlir/test/Dialect/ControlFlow/invalid.mlir b/mlir/test/Dialect/ControlFlow/invalid.mlir
index b35c8fcce2734..fd95e97e208cf 100644
--- a/mlir/test/Dialect/ControlFlow/invalid.mlir
+++ b/mlir/test/Dialect/ControlFlow/invalid.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt -verify-diagnostics -split-input-file %s
 
-func @switch_missing_case_value(%flag : i32, %caseOperand : i32) {
+func.func @switch_missing_case_value(%flag : i32, %caseOperand : i32) {
   cf.switch %flag : i32, [
     default: ^bb1(%caseOperand : i32),
     45: ^bb2(%caseOperand : i32),
@@ -18,7 +18,7 @@ func @switch_missing_case_value(%flag : i32, %caseOperand : i32) {
 
 // -----
 
-func @switch_wrong_type_case_value(%flag : i32, %caseOperand : i32) {
+func.func @switch_wrong_type_case_value(%flag : i32, %caseOperand : i32) {
   cf.switch %flag : i32, [
     default: ^bb1(%caseOperand : i32),
     // expected-error at +1 {{expected integer value}}
@@ -35,7 +35,7 @@ func @switch_wrong_type_case_value(%flag : i32, %caseOperand : i32) {
 
 // -----
 
-func @switch_missing_comma(%flag : i32, %caseOperand : i32) {
+func.func @switch_missing_comma(%flag : i32, %caseOperand : i32) {
   cf.switch %flag : i32, [
     default: ^bb1(%caseOperand : i32),
     45: ^bb2(%caseOperand : i32)
@@ -53,7 +53,7 @@ func @switch_missing_comma(%flag : i32, %caseOperand : i32) {
 
 // -----
 
-func @switch_missing_default(%flag : i32, %caseOperand : i32) {
+func.func @switch_missing_default(%flag : i32, %caseOperand : i32) {
   cf.switch %flag : i32, [
     // expected-error at +1 {{expected 'default'}}
     45: ^bb2(%caseOperand : i32)

diff  --git a/mlir/test/Dialect/ControlFlow/ops.mlir b/mlir/test/Dialect/ControlFlow/ops.mlir
index 43f33feeb8a72..8453c2b7038f1 100644
--- a/mlir/test/Dialect/ControlFlow/ops.mlir
+++ b/mlir/test/Dialect/ControlFlow/ops.mlir
@@ -2,13 +2,13 @@
 // RUN: mlir-opt %s --mlir-print-op-generic | mlir-opt | FileCheck %s
 
 // CHECK-LABEL: @assert
-func @assert(%arg : i1) {
+func.func @assert(%arg : i1) {
   cf.assert %arg, "Some message in case this assertion fails."
   return
 }
 
 // CHECK-LABEL: func @switch(
-func @switch(%flag : i32, %caseOperand : i32) {
+func.func @switch(%flag : i32, %caseOperand : i32) {
   cf.switch %flag : i32, [
     default: ^bb1(%caseOperand : i32),
     42: ^bb2(%caseOperand : i32),
@@ -24,7 +24,7 @@ func @switch(%flag : i32, %caseOperand : i32) {
 }
 
 // CHECK-LABEL: func @switch_i64(
-func @switch_i64(%flag : i64, %caseOperand : i32) {
+func.func @switch_i64(%flag : i64, %caseOperand : i32) {
   cf.switch %flag : i64, [
     default: ^bb1(%caseOperand : i32),
     42: ^bb2(%caseOperand : i32),

diff  --git a/mlir/test/Dialect/EmitC/attrs.mlir b/mlir/test/Dialect/EmitC/attrs.mlir
index 804f16bc1ed08..8bf196297176e 100644
--- a/mlir/test/Dialect/EmitC/attrs.mlir
+++ b/mlir/test/Dialect/EmitC/attrs.mlir
@@ -3,7 +3,7 @@
 // RUN: mlir-opt -verify-diagnostics %s | mlir-opt -verify-diagnostics | FileCheck %s
 
 // CHECK-LABEL: func @opaque_attrs() {
-func @opaque_attrs() {
+func.func @opaque_attrs() {
   // CHECK-NEXT: #emitc.opaque<"attr">
   emitc.call "f"() {args = [#emitc.opaque<"attr">]} : () -> ()
   // CHECK-NEXT: #emitc.opaque<"\22quoted_attr\22">

diff  --git a/mlir/test/Dialect/EmitC/invalid_ops.mlir b/mlir/test/Dialect/EmitC/invalid_ops.mlir
index 865e2dcf52724..f6e4701d24466 100644
--- a/mlir/test/Dialect/EmitC/invalid_ops.mlir
+++ b/mlir/test/Dialect/EmitC/invalid_ops.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -split-input-file -verify-diagnostics
 
-func @const_attribute_return_type_1() {
+func.func @const_attribute_return_type_1() {
     // expected-error @+1 {{'emitc.constant' op requires attribute's type ('i64') to match op's return type ('i32')}}
     %c0 = "emitc.constant"(){value = 42: i64} : () -> i32
     return
@@ -8,7 +8,7 @@ func @const_attribute_return_type_1() {
 
 // -----
 
-func @const_attribute_return_type_2() {
+func.func @const_attribute_return_type_2() {
     // expected-error @+1 {{'emitc.constant' op requires attribute's type ('!emitc.opaque<"char">') to match op's return type ('!emitc.opaque<"mychar">')}}
     %c0 = "emitc.constant"(){value = "CHAR_MIN" : !emitc.opaque<"char">} : () -> !emitc.opaque<"mychar">
     return
@@ -16,7 +16,7 @@ func @const_attribute_return_type_2() {
 
 // -----
 
-func @index_args_out_of_range_1() {
+func.func @index_args_out_of_range_1() {
     // expected-error @+1 {{'emitc.call' op index argument is out of range}}
     emitc.call "test" () {args = [0 : index]} : () -> ()
     return
@@ -24,7 +24,7 @@ func @index_args_out_of_range_1() {
 
 // -----
 
-func @index_args_out_of_range_2(%arg : i32) {
+func.func @index_args_out_of_range_2(%arg : i32) {
     // expected-error @+1 {{'emitc.call' op index argument is out of range}}
     emitc.call "test" (%arg, %arg) {args = [2 : index]} : (i32, i32) -> ()
     return
@@ -32,7 +32,7 @@ func @index_args_out_of_range_2(%arg : i32) {
 
 // -----
 
-func @empty_callee() {
+func.func @empty_callee() {
     // expected-error @+1 {{'emitc.call' op callee must not be empty}}
     emitc.call "" () : () -> ()
     return
@@ -40,7 +40,7 @@ func @empty_callee() {
 
 // -----
 
-func @nonetype_arg(%arg : i32) {
+func.func @nonetype_arg(%arg : i32) {
     // expected-error @+1 {{'emitc.call' op array argument has no type}}
     emitc.call "nonetype_arg"(%arg) {args = [0 : index, [0, 1, 2]]} : (i32) -> i32
     return
@@ -48,7 +48,7 @@ func @nonetype_arg(%arg : i32) {
 
 // -----
 
-func @array_template_arg(%arg : i32) {
+func.func @array_template_arg(%arg : i32) {
     // expected-error @+1 {{'emitc.call' op template argument has invalid type}}
     emitc.call "nonetype_template_arg"(%arg) {template_args = [[0, 1, 2]]} : (i32) -> i32
     return
@@ -56,7 +56,7 @@ func @array_template_arg(%arg : i32) {
 
 // -----
 
-func @dense_template_argument(%arg : i32) {
+func.func @dense_template_argument(%arg : i32) {
     // expected-error @+1 {{'emitc.call' op template argument has invalid type}}
     emitc.call "dense_template_argument"(%arg) {template_args = [dense<[1.0, 1.0]> : tensor<2xf32>]} : (i32) -> i32
     return
@@ -64,7 +64,7 @@ func @dense_template_argument(%arg : i32) {
 
 // -----
 
-func @empty_operator(%arg : i32) {
+func.func @empty_operator(%arg : i32) {
     // expected-error @+1 {{'emitc.apply' op applicable operator must not be empty}}
     %2 = emitc.apply ""(%arg) : (i32) -> !emitc.ptr<i32>
     return
@@ -72,7 +72,7 @@ func @empty_operator(%arg : i32) {
 
 // -----
 
-func @illegal_operator(%arg : i32) {
+func.func @illegal_operator(%arg : i32) {
     // expected-error @+1 {{'emitc.apply' op applicable operator is illegal}}
     %2 = emitc.apply "+"(%arg) : (i32) -> !emitc.ptr<i32>
     return
@@ -80,7 +80,7 @@ func @illegal_operator(%arg : i32) {
 
 // -----
 
-func @var_attribute_return_type_1() {
+func.func @var_attribute_return_type_1() {
     // expected-error @+1 {{'emitc.variable' op requires attribute's type ('i64') to match op's return type ('i32')}}
     %c0 = "emitc.variable"(){value = 42: i64} : () -> i32
     return
@@ -88,7 +88,7 @@ func @var_attribute_return_type_1() {
 
 // -----
 
-func @var_attribute_return_type_2() {
+func.func @var_attribute_return_type_2() {
     // expected-error @+1 {{'emitc.variable' op requires attribute's type ('!emitc.ptr<i64>') to match op's return type ('!emitc.ptr<i32>')}}
     %c0 = "emitc.variable"(){value = "nullptr" : !emitc.ptr<i64>} : () -> !emitc.ptr<i32>
     return

diff  --git a/mlir/test/Dialect/EmitC/invalid_types.mlir b/mlir/test/Dialect/EmitC/invalid_types.mlir
index 5e05b56fa5e83..f47361378a79e 100644
--- a/mlir/test/Dialect/EmitC/invalid_types.mlir
+++ b/mlir/test/Dialect/EmitC/invalid_types.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -split-input-file -verify-diagnostics
 
-func @illegal_opaque_type_1() {
+func.func @illegal_opaque_type_1() {
     // expected-error @+1 {{expected non empty string in !emitc.opaque type}}
     %1 = "emitc.variable"(){value = "42" : !emitc.opaque<"">} : () -> !emitc.opaque<"mytype">
 }

diff  --git a/mlir/test/Dialect/EmitC/ops.mlir b/mlir/test/Dialect/EmitC/ops.mlir
index 98bd919cbf35e..cce9c281b0265 100644
--- a/mlir/test/Dialect/EmitC/ops.mlir
+++ b/mlir/test/Dialect/EmitC/ops.mlir
@@ -4,7 +4,7 @@ emitc.include <"test.h">
 emitc.include "test.h"
 
 // CHECK-LABEL: func @f(%{{.*}}: i32, %{{.*}}: !emitc.opaque<"int32_t">) {
-func @f(%arg0: i32, %f: !emitc.opaque<"int32_t">) {
+func.func @f(%arg0: i32, %f: !emitc.opaque<"int32_t">) {
   %1 = "emitc.call"() {callee = "blah"} : () -> i64
   emitc.call "foo" (%1) {args = [
     0 : index, dense<[0, 1]> : tensor<2xi32>, 0 : index
@@ -12,12 +12,12 @@ func @f(%arg0: i32, %f: !emitc.opaque<"int32_t">) {
   return
 }
 
-func @c() {
+func.func @c() {
   %1 = "emitc.constant"(){value = 42 : i32} : () -> i32
   return
 }
 
-func @a(%arg0: i32, %arg1: i32) {
+func.func @a(%arg0: i32, %arg1: i32) {
   %1 = "emitc.apply"(%arg0) {applicableOperator = "&"} : (i32) -> !emitc.ptr<i32>
   %2 = emitc.apply "&"(%arg1) : (i32) -> !emitc.ptr<i32>
   return

diff  --git a/mlir/test/Dialect/EmitC/types.mlir b/mlir/test/Dialect/EmitC/types.mlir
index a8aebf058f890..480bf1dd444ed 100644
--- a/mlir/test/Dialect/EmitC/types.mlir
+++ b/mlir/test/Dialect/EmitC/types.mlir
@@ -3,7 +3,7 @@
 // RUN: mlir-opt -verify-diagnostics %s | mlir-opt -verify-diagnostics | FileCheck %s
 
 // CHECK-LABEL: func @opaque_types() {
-func @opaque_types() {
+func.func @opaque_types() {
   // CHECK-NEXT: !emitc.opaque<"int">
   emitc.call "f"() {template_args = [!emitc<"opaque<\"int\">">]} : () -> ()
   // CHECK-NEXT: !emitc.opaque<"byte">
@@ -19,7 +19,7 @@ func @opaque_types() {
 }
 
 // CHECK-LABEL: func @pointer_types() {
-func @pointer_types() {
+func.func @pointer_types() {
   // CHECK-NEXT: !emitc.ptr<i32>
   emitc.call "f"() {template_args = [!emitc<"ptr<i32>">]} : () -> ()
   // CHECK-NEXT: !emitc.ptr<i64>

diff  --git a/mlir/test/Dialect/Func/func-bufferize.mlir b/mlir/test/Dialect/Func/func-bufferize.mlir
index 6895e44c27913..22986bbc76010 100644
--- a/mlir/test/Dialect/Func/func-bufferize.mlir
+++ b/mlir/test/Dialect/Func/func-bufferize.mlir
@@ -3,7 +3,7 @@
 // CHECK-LABEL:   func @identity(
 // CHECK-SAME:                   %[[ARG:.*]]: memref<f32>) -> memref<f32> {
 // CHECK:           return %[[ARG]] : memref<f32>
-func @identity(%arg0: tensor<f32>) -> tensor<f32> {
+func.func @identity(%arg0: tensor<f32>) -> tensor<f32> {
   return %arg0 : tensor<f32>
 }
 
@@ -12,7 +12,7 @@ func @identity(%arg0: tensor<f32>) -> tensor<f32> {
 // CHECK:           cf.br ^bb1(%[[ARG]] : memref<f32>)
 // CHECK:         ^bb1(%[[BBARG:.*]]: memref<f32>):
 // CHECK:           return %[[BBARG]] : memref<f32>
-func @block_arguments(%arg0: tensor<f32>) -> tensor<f32> {
+func.func @block_arguments(%arg0: tensor<f32>) -> tensor<f32> {
   cf.br ^bb1(%arg0: tensor<f32>)
 ^bb1(%bbarg: tensor<f32>):
   return %bbarg : tensor<f32>
@@ -22,8 +22,8 @@ func @block_arguments(%arg0: tensor<f32>) -> tensor<f32> {
 // CHECK-LABEL:   func @call_source() -> memref<f32> {
 // CHECK:           %[[RET:.*]] = call @source() : () -> memref<f32>
 // CHECK:           return %[[RET]] : memref<f32>
-func private @source() -> tensor<f32>
-func @call_source() -> tensor<f32> {
+func.func private @source() -> tensor<f32>
+func.func @call_source() -> tensor<f32> {
   %0 = call @source() : () -> tensor<f32>
   return %0 : tensor<f32>
 }
@@ -31,8 +31,8 @@ func @call_source() -> tensor<f32> {
 // CHECK-SAME:                    %[[ARG:.*]]: memref<f32>) {
 // CHECK:           call @sink(%[[ARG]]) : (memref<f32>) -> ()
 // CHECK:           return
-func private @sink(tensor<f32>)
-func @call_sink(%arg0: tensor<f32>) {
+func.func private @sink(tensor<f32>)
+func.func @call_sink(%arg0: tensor<f32>) {
   call @sink(%arg0) : (tensor<f32>) -> ()
   return
 }
@@ -41,7 +41,7 @@ func @call_sink(%arg0: tensor<f32>) {
 // CHECK:           %[[TENSOR:.*]] = "test.source"() : () -> tensor<f32>
 // CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<f32>
 // CHECK:           return %[[MEMREF]] : memref<f32>
-func @unconverted_op_in_body() -> tensor<f32> {
+func.func @unconverted_op_in_body() -> tensor<f32> {
   %0 = "test.source"() : () -> tensor<f32>
   return %0 : tensor<f32>
 }
@@ -50,7 +50,7 @@ func @unconverted_op_in_body() -> tensor<f32> {
 
 // Because this pass updates block arguments, it needs to also atomically
 // update all terminators and issue an error if that is not possible.
-func @unable_to_update_terminator(%arg0: tensor<f32>) -> tensor<f32> {
+func.func @unable_to_update_terminator(%arg0: tensor<f32>) -> tensor<f32> {
     %0 = arith.constant true
     cf.cond_br %0, ^bb1(%arg0: tensor<f32>), ^bb2(%arg0: tensor<f32>)
   ^bb1(%bbarg0: tensor<f32>):
@@ -69,7 +69,7 @@ func @unable_to_update_terminator(%arg0: tensor<f32>) -> tensor<f32> {
 // CHECK: bufferize_while
 // CHECK: scf.while
 // CHECK: scf.condition
-func @bufferize_while(%arg0: i64, %arg1: i64) -> i64 {
+func.func @bufferize_while(%arg0: i64, %arg1: i64) -> i64 {
   %c2_i64 = arith.constant 2 : i64
   %0:2 = scf.while (%arg2 = %arg0) : (i64) -> (i64, i64) {
     %1 = arith.cmpi slt, %arg2, %arg1 : i64

diff  --git a/mlir/test/Dialect/Func/invalid.mlir b/mlir/test/Dialect/Func/invalid.mlir
index f2f13d562da3c..d9d67f5944a31 100644
--- a/mlir/test/Dialect/Func/invalid.mlir
+++ b/mlir/test/Dialect/Func/invalid.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt -split-input-file %s -verify-diagnostics
 
-func @unsupported_attribute() {
+func.func @unsupported_attribute() {
   // expected-error @+1 {{invalid kind of attribute specified}}
   %0 = constant "" : index
   return
@@ -8,9 +8,9 @@ func @unsupported_attribute() {
 
 // -----
 
-func private @return_i32_f32() -> (i32, f32)
+func.func private @return_i32_f32() -> (i32, f32)
 
-func @call() {
+func.func @call() {
   // expected-error @+3 {{op result type mismatch at index 0}}
   // expected-note @+2 {{op result types: 'f32', 'i32'}}
   // expected-note @+1 {{function result types: 'i32', 'f32'}}

diff  --git a/mlir/test/Dialect/GPU/async-region.mlir b/mlir/test/Dialect/GPU/async-region.mlir
index d9ba9ce338806..9275ff4a6e25f 100644
--- a/mlir/test/Dialect/GPU/async-region.mlir
+++ b/mlir/test/Dialect/GPU/async-region.mlir
@@ -7,10 +7,10 @@ module attributes {gpu.container_module} {
     gpu.func @kernel() kernel { gpu.return }
   }
 
-  func private @foo() -> ()
+  func.func private @foo() -> ()
 
   // CHECK-LABEL:func @async(%{{.*}}: index)
-  func @async(%sz : index) {
+  func.func @async(%sz : index) {
     // CHECK: %[[t0:.*]] = gpu.wait async
     // CHECK: %[[t1:.*]] = gpu.launch_func async [%[[t0]]]
     gpu.launch_func @kernels::@kernel
@@ -29,7 +29,7 @@ module attributes {gpu.container_module} {
   }
 
   // CHECK-LABEL:func @defer_wait(%{{.*}}: index)
-  func @defer_wait(%sz : index) {
+  func.func @defer_wait(%sz : index) {
     // CHECK: %[[a0:.*]], %[[f0:.*]] = async.execute
     %a0 = async.execute {
       // CHECK: %[[t:.*]] = gpu.launch_func async
@@ -59,7 +59,7 @@ module attributes {gpu.container_module} {
   }
 
   // CHECK-LABEL:func @defer_wait_blocked_by_side_effect(%{{.*}}: index)
-  func @defer_wait_blocked_by_side_effect(%sz : index) {
+  func.func @defer_wait_blocked_by_side_effect(%sz : index) {
     // CHECK: %[[a:.*]] = async.execute
     %a = async.execute {
       // CHECK: %[[t:.*]] = gpu.launch_func async
@@ -77,7 +77,7 @@ module attributes {gpu.container_module} {
   }
 
   // CHECK-LABEL:func @defer_wait_pass_through(%{{.*}}: index)
-  func @defer_wait_pass_through(%sz : index) {
+  func.func @defer_wait_pass_through(%sz : index) {
     // CHECK: %[[a0:.*]], %[[f0:.*]] = async.execute
     %a0 = async.execute {
       // CHECK: %[[t:.*]] = gpu.launch_func async
@@ -104,7 +104,7 @@ module attributes {gpu.container_module} {
   }
 
   // CHECK-LABEL:func @async_execute_with_result(%{{.*}}: index)
-  func @async_execute_with_result(%sz : index) -> index {
+  func.func @async_execute_with_result(%sz : index) -> index {
     // CHECK: %[[a0:.*]], %[[f0:.*]]:2 = async.execute
     // CHECK-SAME: -> (!async.value<index>, !async.value<!gpu.async.token>)
     %a0, %f0 = async.execute -> !async.value<index> {
@@ -127,7 +127,7 @@ module attributes {gpu.container_module} {
   }
 
   // CHECK-LABEL:func @async_execute_no_use(%{{.*}}: index)
-  func @async_execute_no_use(%sz : index) {
+  func.func @async_execute_no_use(%sz : index) {
     // CHECK: async.execute {
     %a0 = async.execute {
       // CHECK: %[[t:.*]] = gpu.launch_func async
@@ -140,7 +140,7 @@ module attributes {gpu.container_module} {
   }
 
   // CHECK-LABEL:func @async_execute_fork(%{{.*}}: index)
-  func @async_execute_fork(%sz : index) {
+  func.func @async_execute_fork(%sz : index) {
     // CHECK: %[[a0:.*]], %[[f0:.*]]:2 = async.execute
     // CHECK-SAME: -> (!async.value<!gpu.async.token>, !async.value<!gpu.async.token>)
     %a0 = async.execute {
@@ -171,7 +171,7 @@ module attributes {gpu.container_module} {
   }
 
   // CHECK-LABEL:func @existing_tokens()
-  func @existing_tokens() {
+  func.func @existing_tokens() {
     // CHECK: %[[t0:.*]] = gpu.wait async
     // CHECK-NOT: [{{.*}}]
     %t0 = gpu.wait async

diff  --git a/mlir/test/Dialect/GPU/canonicalize.mlir b/mlir/test/Dialect/GPU/canonicalize.mlir
index 979095b7f41e2..db577f0558aa7 100644
--- a/mlir/test/Dialect/GPU/canonicalize.mlir
+++ b/mlir/test/Dialect/GPU/canonicalize.mlir
@@ -2,7 +2,7 @@
 
 // Fold all the gpu.wait ops as they are redundant.
 // CHECK-LABEL: func @fold_wait_op_test1
-func @fold_wait_op_test1() {
+func.func @fold_wait_op_test1() {
   %1 = gpu.wait async
   gpu.wait []
   %3 = gpu.wait async
@@ -13,7 +13,7 @@ func @fold_wait_op_test1() {
 
 // Replace uses of gpu.wait op with its async dependency.
 // CHECK-LABEL: func @fold_wait_op_test2
-func @fold_wait_op_test2(%arg0: i1) -> (memref<5xf16>, memref<5xf16>) {
+func.func @fold_wait_op_test2(%arg0: i1) -> (memref<5xf16>, memref<5xf16>) {
   %0 = gpu.wait async
   %memref, %asyncToken = gpu.alloc async [%0] () : memref<5xf16>
   gpu.wait [%0]
@@ -29,7 +29,7 @@ func @fold_wait_op_test2(%arg0: i1) -> (memref<5xf16>, memref<5xf16>) {
 // CHECK-NEXT: return
 
 // CHECK-LABEL: func @fold_memcpy_op
-func @fold_memcpy_op(%arg0: i1) {
+func.func @fold_memcpy_op(%arg0: i1) {
     %cst = arith.constant 0.000000e+00 : f16
     %1 = memref.alloc() : memref<2xf16>
     %2 = gpu.wait async
@@ -52,7 +52,7 @@ func @fold_memcpy_op(%arg0: i1) {
 
 // We cannot fold memcpy here as dest is a block argument.
 // CHECK-LABEL: func @do_not_fold_memcpy_op1
-func @do_not_fold_memcpy_op1(%arg0: i1, %arg1: memref<2xf16>) {
+func.func @do_not_fold_memcpy_op1(%arg0: i1, %arg1: memref<2xf16>) {
     %cst = arith.constant 0.000000e+00 : f16
     %2 = gpu.wait async
     %memref, %asyncToken = gpu.alloc async [%2] () : memref<2xf16>
@@ -67,7 +67,7 @@ func @do_not_fold_memcpy_op1(%arg0: i1, %arg1: memref<2xf16>) {
 
 // We cannot fold gpu.memcpy as it is used by an op having read effect on dest.
 // CHECK-LABEL: func @do_not_fold_memcpy_op2
-func @do_not_fold_memcpy_op2(%arg0: i1, %arg1: index) -> f16 {
+func.func @do_not_fold_memcpy_op2(%arg0: i1, %arg1: index) -> f16 {
     %cst = arith.constant 0.000000e+00 : f16
     %1 = memref.alloc() : memref<2xf16>
     %2 = gpu.wait async
@@ -83,7 +83,7 @@ func @do_not_fold_memcpy_op2(%arg0: i1, %arg1: index) -> f16 {
 // CHECK: gpu.memcpy
 
 // CHECK-LABEL: @memcpy_after_cast
-func @memcpy_after_cast(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
+func.func @memcpy_after_cast(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
   // CHECK-NOT: memref.cast
   // CHECK: gpu.memcpy
   %0 = memref.cast %arg0 : memref<10xf32> to memref<?xf32>
@@ -93,7 +93,7 @@ func @memcpy_after_cast(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
 }
 
 // CHECK-LABEL: @memset_after_cast
-func @memset_after_cast(%arg0: memref<10xf32>, %arg1: f32) {
+func.func @memset_after_cast(%arg0: memref<10xf32>, %arg1: f32) {
   // CHECK-NOT: memref.cast
   // CHECK: gpu.memset
   %0 = memref.cast %arg0 : memref<10xf32> to memref<?xf32>
@@ -107,7 +107,7 @@ func @memset_after_cast(%arg0: memref<10xf32>, %arg1: f32) {
 // CHECK-LABEL: func @gpu_dim_of_alloc(
 //  CHECK-SAME:     %[[SIZE:[0-9a-z]+]]: index
 //  CHECK-NEXT:   return %[[SIZE]] : index
-func @gpu_dim_of_alloc(%size: index) -> index {
+func.func @gpu_dim_of_alloc(%size: index) -> index {
   %0 = gpu.alloc(%size) : memref<?xindex>
   %c0 = arith.constant 0 : index
   %1 = memref.dim %0, %c0 : memref<?xindex>
@@ -117,7 +117,7 @@ func @gpu_dim_of_alloc(%size: index) -> index {
 // -----
 
 // CHECK-LABEL: func @simplify_gpu_launch
-func @simplify_gpu_launch() attributes {llvm.emit_c_interface} {
+func.func @simplify_gpu_launch() attributes {llvm.emit_c_interface} {
   %cst = arith.constant 0.000000e+00 : f32
   %c1 = arith.constant 1 : index
   %c32 = arith.constant 32 : index

diff  --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index a6fd6dbb775d9..5360e8f7f8ced 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt -split-input-file -verify-diagnostics %s
 
-func @not_enough_sizes(%sz : index) {
+func.func @not_enough_sizes(%sz : index) {
   // expected-error at +1 {{expected 6 or more operands, but found 5}}
   "gpu.launch"(%sz, %sz, %sz, %sz, %sz) ({
     gpu.return
@@ -10,7 +10,7 @@ func @not_enough_sizes(%sz : index) {
 
 // -----
 
-func @no_region_attrs(%sz : index) {
+func.func @no_region_attrs(%sz : index) {
   // expected-error at +1 {{unexpected number of region arguments}}
  "gpu.launch"(%sz, %sz, %sz, %sz, %sz, %sz) ({
   ^bb1(%bx: index, %by: index, %bz: index,
@@ -22,7 +22,7 @@ func @no_region_attrs(%sz : index) {
 
 // -----
 
-func @launch_requires_gpu_return(%sz : index) {
+func.func @launch_requires_gpu_return(%sz : index) {
   // @expected-note at +1 {{in 'gpu.launch' body region}}
   gpu.launch blocks(%bx, %by, %bz) in (%sbx = %sz, %sby = %sz, %sbz = %sz)
              threads(%tx, %ty, %tz) in (%stx = %sz, %sty = %sz, %stz = %sz) {
@@ -35,7 +35,7 @@ func @launch_requires_gpu_return(%sz : index) {
 
 // -----
 
-func @launch_func_too_few_operands(%sz : index) {
+func.func @launch_func_too_few_operands(%sz : index) {
   // expected-error at +1 {{expected 6 or more operands}}
   "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz)
       {operand_segment_sizes = dense<[0, 1, 1, 1, 1, 1, 0, 0]> : vector<8xi32>}
@@ -45,7 +45,7 @@ func @launch_func_too_few_operands(%sz : index) {
 
 // -----
 
-func @launch_func_missing_parent_module_attribute(%sz : index) {
+func.func @launch_func_missing_parent_module_attribute(%sz : index) {
   // expected-error at +1 {{expected the closest surrounding module to have the 'gpu.container_module' attribute}}
   gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
   return
@@ -54,7 +54,7 @@ func @launch_func_missing_parent_module_attribute(%sz : index) {
 // -----
 
 module attributes {gpu.container_module} {
-  func @launch_func_missing_callee_attribute(%sz : index) {
+  func.func @launch_func_missing_callee_attribute(%sz : index) {
     // expected-error at +1 {{'gpu.launch_func' op requires attribute 'kernel'}}
     "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
         {operand_segment_sizes = dense<[0, 1, 1, 1, 1, 1, 1, 0, 0]> : vector<9xi32>}
@@ -66,7 +66,7 @@ module attributes {gpu.container_module} {
 // -----
 
 module attributes {gpu.container_module} {
-  func @launch_func_no_function_attribute(%sz : index) {
+  func.func @launch_func_no_function_attribute(%sz : index) {
     // expected-error at +1 {{custom op 'gpu.launch_func' invalid kind of attribute specified}}
     gpu.launch_func "foo" blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
@@ -76,7 +76,7 @@ module attributes {gpu.container_module} {
 // -----
 
 module attributes {gpu.container_module} {
-  func @launch_func_undefined_module(%sz : index) {
+  func.func @launch_func_undefined_module(%sz : index) {
     // expected-error at +1 {{kernel module 'kernels' is undefined}}
     gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
@@ -100,7 +100,7 @@ module attributes {gpu.container_module} {
   module @kernels {
   }
 
-  func @launch_func_missing_module_attribute(%sz : index) {
+  func.func @launch_func_missing_module_attribute(%sz : index) {
     // expected-error at +1 {{kernel module 'kernels' is undefined}}
     gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
@@ -112,7 +112,7 @@ module attributes {gpu.container_module} {
 module attributes {gpu.container_module} {
   gpu.module @kernels { }
 
-  func @launch_func_undefined_function(%sz : index) {
+  func.func @launch_func_undefined_function(%sz : index) {
     // expected-error at +1 {{kernel function '@kernels::@kernel_1' is undefined}}
     gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
@@ -127,7 +127,7 @@ module attributes {gpu.container_module} {
     memref.global "private" @kernel_1 : memref<4xi32>
   }
 
-  func @launch_func_undefined_function(%sz : index) {
+  func.func @launch_func_undefined_function(%sz : index) {
     // expected-error at +1 {{referenced kernel '@kernels::@kernel_1' is not a function}}
     gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
@@ -143,7 +143,7 @@ module attributes {gpu.container_module} {
     }
   }
 
-  func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr<f32>) {
+  func.func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr<f32>) {
     // expected-error at +1 {{kernel module 'kernels' is undefined}}
     gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr<f32>)
     return
@@ -159,7 +159,7 @@ module attributes {gpu.container_module} {
     }
   }
 
-  func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr<f32>) {
+  func.func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr<f32>) {
     // expected-error at +1 {{kernel function is missing the 'gpu.kernel' attribute}}
     gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr<f32>)
     return
@@ -175,7 +175,7 @@ module attributes {gpu.container_module} {
     }
   }
 
-  func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm.ptr<f32>) {
+  func.func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm.ptr<f32>) {
     // expected-error at +1 {{got 2 kernel operands but expected 1}}
     gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr<f32>, %arg : !llvm.ptr<f32>)
     return
@@ -191,7 +191,7 @@ module attributes {gpu.container_module} {
     }
   }
 
-  func @launch_func_kernel_operand_types(%sz : index, %arg : f32) {
+  func.func @launch_func_kernel_operand_types(%sz : index, %arg : f32) {
     // expected-err at +1 {{type of function argument 0 does not match}}
     gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : f32)
     return
@@ -201,7 +201,7 @@ module attributes {gpu.container_module} {
 // -----
 
 module attributes {gpu.container_module} {
-  func @launch_func_kernel_operand_attr(%sz : index) {
+  func.func @launch_func_kernel_operand_attr(%sz : index) {
     // expected-error at +1 {{expected arguments without attributes}}
     gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%sz : index {foo})
     return
@@ -210,7 +210,7 @@ module attributes {gpu.container_module} {
 
 // -----
 
-func @reduce_no_op_no_body(%arg0 : f32) {
+func.func @reduce_no_op_no_body(%arg0 : f32) {
   // expected-error at +1 {{expected either an op attribute or a non-empty body}}
   %res = "gpu.all_reduce"(%arg0) ({}) : (f32) -> (f32)
   return
@@ -218,7 +218,7 @@ func @reduce_no_op_no_body(%arg0 : f32) {
 
 // -----
 
-func @reduce_op_and_body(%arg0 : f32) {
+func.func @reduce_op_and_body(%arg0 : f32) {
   // expected-error at +1 {{expected either an op attribute or a non-empty body}}
   %res = "gpu.all_reduce"(%arg0) ({
   ^bb(%lhs : f32, %rhs : f32):
@@ -229,7 +229,7 @@ func @reduce_op_and_body(%arg0 : f32) {
 
 // -----
 
-func @reduce_invalid_op(%arg0 : f32) {
+func.func @reduce_invalid_op(%arg0 : f32) {
   // expected-error at +1 {{invalid op kind}}
   %res = gpu.all_reduce foo %arg0 {} : (f32) -> (f32)
   return
@@ -237,7 +237,7 @@ func @reduce_invalid_op(%arg0 : f32) {
 
 // -----
 
-func @reduce_invalid_op_type(%arg0 : f32) {
+func.func @reduce_invalid_op_type(%arg0 : f32) {
   // expected-error at +1 {{`and` accumulator is only compatible with Integer type}}
   %res = gpu.all_reduce and %arg0 {} : (f32) -> (f32)
   return
@@ -245,7 +245,7 @@ func @reduce_invalid_op_type(%arg0 : f32) {
 
 // -----
 
-func @reduce_incorrect_region_arguments(%arg0 : f32) {
+func.func @reduce_incorrect_region_arguments(%arg0 : f32) {
   // expected-error at +1 {{expected two region arguments}}
   %res = gpu.all_reduce %arg0 {
   ^bb(%lhs : f32):
@@ -256,7 +256,7 @@ func @reduce_incorrect_region_arguments(%arg0 : f32) {
 
 // -----
 
-func @reduce_incorrect_region_arguments(%arg0 : f32) {
+func.func @reduce_incorrect_region_arguments(%arg0 : f32) {
   // expected-error at +1 {{incorrect region argument type}}
   %res = gpu.all_reduce %arg0 {
   ^bb(%lhs : f32, %rhs : i32):
@@ -267,7 +267,7 @@ func @reduce_incorrect_region_arguments(%arg0 : f32) {
 
 // -----
 
-func @reduce_incorrect_yield(%arg0 : f32) {
+func.func @reduce_incorrect_yield(%arg0 : f32) {
   // expected-error at +1 {{expected one gpu.yield operand}}
   %res = gpu.all_reduce %arg0 {
   ^bb(%lhs : f32, %rhs : f32):
@@ -278,7 +278,7 @@ func @reduce_incorrect_yield(%arg0 : f32) {
 
 // -----
 
-func @reduce_incorrect_yield(%arg0 : f32) {
+func.func @reduce_incorrect_yield(%arg0 : f32) {
   // expected-error at +1 {{incorrect gpu.yield type}}
   %res = gpu.all_reduce %arg0 {
   ^bb(%lhs : f32, %rhs : f32):
@@ -290,7 +290,7 @@ func @reduce_incorrect_yield(%arg0 : f32) {
 
 // -----
 
-func @reduce_incorrect_yield(%arg0 : f32) {
+func.func @reduce_incorrect_yield(%arg0 : f32) {
   // expected-error at +1 {{expected gpu.yield op in region}}
   %res = gpu.all_reduce %arg0 {
   ^bb(%lhs : f32, %rhs : f32):
@@ -301,7 +301,7 @@ func @reduce_incorrect_yield(%arg0 : f32) {
 
 // -----
 
-func @shuffle_mismatching_type(%arg0 : f32, %arg1 : i32, %arg2 : i32) {
+func.func @shuffle_mismatching_type(%arg0 : f32, %arg1 : i32, %arg2 : i32) {
   // expected-error at +1 {{op failed to verify that all of {value, result} have same type}}
   %shfl, %pred = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = #gpu<"shuffle_mode xor"> } : (f32, i32, i32) -> (i32, i1)
   return
@@ -309,7 +309,7 @@ func @shuffle_mismatching_type(%arg0 : f32, %arg1 : i32, %arg2 : i32) {
 
 // -----
 
-func @shuffle_unsupported_type(%arg0 : index, %arg1 : i32, %arg2 : i32) {
+func.func @shuffle_unsupported_type(%arg0 : index, %arg1 : i32, %arg2 : i32) {
   // expected-error at +1 {{operand #0 must be i32 or f32}}
   %shfl, %pred = gpu.shuffle xor %arg0, %arg1, %arg2 : index
   return
@@ -421,42 +421,42 @@ module {
 
 // -----
 
-func @sync_wait_with_result() {
+func.func @sync_wait_with_result() {
   // expected-error @+1 {{cannot name an operation with no results}}
   %t = gpu.wait
 }
 
 // -----
 
-func @async_wait_without_result() {
+func.func @async_wait_without_result() {
   // expected-error @+1 {{custom op 'gpu.wait' needs to be named when marked 'async'}}
   gpu.wait async
 }
 
 // -----
 
-func @memcpy_incompatible_type(%dst : memref<?xf32>, %src : memref<?xi32>) {
+func.func @memcpy_incompatible_type(%dst : memref<?xf32>, %src : memref<?xi32>) {
   // expected-error @+1 {{'gpu.memcpy' op arguments have incompatible element type}}
   gpu.memcpy %dst, %src  : memref<?xf32>, memref<?xi32>
 }
 
 // -----
 
-func @memcpy_incompatible_shape(%dst : memref<7xf32>, %src : memref<9xf32>) {
+func.func @memcpy_incompatible_shape(%dst : memref<7xf32>, %src : memref<9xf32>) {
   // expected-error @+1 {{'gpu.memcpy' op arguments have incompatible shape}}
   gpu.memcpy %dst, %src  : memref<7xf32>, memref<9xf32>
 }
 
 // -----
 
-func @memset_incompatible_shape(%dst : memref<?xf32>, %value : i32) {
+func.func @memset_incompatible_shape(%dst : memref<?xf32>, %value : i32) {
   // expected-error @+1 {{'gpu.memset' op failed to verify that all of {dst, value} have same element type}}
   gpu.memset %dst, %value  : memref<?xf32>, i32
 }
 
 // -----
 
-func @mmamatrix_invalid_shape(){
+func.func @mmamatrix_invalid_shape(){
     %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
     %i = arith.constant 16 : index
     // expected-error @+1 {{MMAMatrixType must have exactly two dimensions}}
@@ -466,7 +466,7 @@ func @mmamatrix_invalid_shape(){
 
 // -----
 
-func @mmamatrix_operand_type(){
+func.func @mmamatrix_operand_type(){
     %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
     %i = arith.constant 16 : index
     // expected-error @+1 {{operand expected to be one of AOp, BOp or COp}}
@@ -476,7 +476,7 @@ func @mmamatrix_operand_type(){
 
 // -----
 
-func @mmamatrix_invalid_element_type(){
+func.func @mmamatrix_invalid_element_type(){
     %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
     %i = arith.constant 16 : index
     // expected-error @+1 {{MMAMatrixType elements must be F16 or F32}}
@@ -488,7 +488,7 @@ func @mmamatrix_invalid_element_type(){
 
 #layout_map_col_major = affine_map<(i, j) -> (j, i)>
 
-func @mmaLoadOp_identity_layout(){
+func.func @mmaLoadOp_identity_layout(){
     %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, #layout_map_col_major, 3>
     %i = arith.constant 16 : index
     // expected-error @+1 {{expected source memref most minor dim must have unit stride}}
@@ -498,7 +498,7 @@ func @mmaLoadOp_identity_layout(){
 
 // -----
 
-func @mmaLoadOp_invalid_mem_space(){
+func.func @mmaLoadOp_invalid_mem_space(){
     %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 5>
     %i = arith.constant 16 : index
     // expected-error @+1 {{source memorySpace kGenericMemorySpace, kSharedMemorySpace or kGlobalMemorySpace only allowed}}
@@ -510,7 +510,7 @@ func @mmaLoadOp_invalid_mem_space(){
 
 #layout_map_col_major = affine_map<(i, j) -> (j, i)>
 
-func @wmmaStoreOp_invalid_map(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
+func.func @wmmaStoreOp_invalid_map(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
     %sg = memref.alloca(){alignment = 32} : memref<32x32xf16, #layout_map_col_major, 3>
     %i = arith.constant 16 : index
     %j = arith.constant 16 : index
@@ -521,7 +521,7 @@ func @wmmaStoreOp_invalid_map(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
 
 // -----
 
-func @wmmaStoreOp_invalid_mem_space(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
+func.func @wmmaStoreOp_invalid_mem_space(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
     %sg = memref.alloca(){alignment = 32} : memref<32x32xf16, 5>
     %i = arith.constant 16 : index
     %j = arith.constant 16 : index
@@ -532,7 +532,7 @@ func @wmmaStoreOp_invalid_mem_space(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -
 
 // -----
 
-func @wmmaStoreOp_invalid_store_operand(%arg0 : !gpu.mma_matrix<16x16xf16, "AOp">) -> () {
+func.func @wmmaStoreOp_invalid_store_operand(%arg0 : !gpu.mma_matrix<16x16xf16, "AOp">) -> () {
     %sg = memref.alloca(){alignment = 32} : memref<32x32xf16, 3>
     %i = arith.constant 16 : index
     %j = arith.constant 16 : index
@@ -543,7 +543,7 @@ func @wmmaStoreOp_invalid_store_operand(%arg0 : !gpu.mma_matrix<16x16xf16, "AOp"
 
 // -----
 
-func @wmmaMmaOp_invalid_operand_order(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
+func.func @wmmaMmaOp_invalid_operand_order(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
     // expected-error @+1 {{operands must be in the order AOp, BOp, COp}}
     %D = gpu.subgroup_mma_compute %B, %A, %C : !gpu.mma_matrix<16x16xf16, "BOp">, !gpu.mma_matrix<16x16xf16, "AOp"> -> !gpu.mma_matrix<16x16xf16, "COp">
     return
@@ -551,7 +551,7 @@ func @wmmaMmaOp_invalid_operand_order(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B
 
 // -----
 
-func @wmmaMmaOp_invalid_operand_shapes(%A : !gpu.mma_matrix<16x32xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
+func.func @wmmaMmaOp_invalid_operand_shapes(%A : !gpu.mma_matrix<16x32xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
     // expected-error @+1 {{operand shapes do not satisfy matmul constraints}}
     %D = gpu.subgroup_mma_compute %A, %B, %C : !gpu.mma_matrix<16x32xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp">
     return
@@ -559,7 +559,7 @@ func @wmmaMmaOp_invalid_operand_shapes(%A : !gpu.mma_matrix<16x32xf16, "AOp">, %
 
 // -----
 
-func @async_cp_memory_space(%dst : memref<16xf32>, %src : memref<16xf32>, %i : index) -> () {
+func.func @async_cp_memory_space(%dst : memref<16xf32>, %src : memref<16xf32>, %i : index) -> () {
   // expected-error @+1 {{destination memref must have memory space 3}}
   gpu.device_async_copy %src[%i], %dst[%i], 16 : memref<16xf32> to memref<16xf32>
   return
@@ -567,7 +567,7 @@ func @async_cp_memory_space(%dst : memref<16xf32>, %src : memref<16xf32>, %i : i
 
 // -----
 
-func @async_cp_memref_type(%dst : memref<16xi32, 3>, %src : memref<16xf32>, %i : index) -> () {
+func.func @async_cp_memref_type(%dst : memref<16xi32, 3>, %src : memref<16xf32>, %i : index) -> () {
   // expected-error @+1 {{source and destination must have the same element type}}
   gpu.device_async_copy %src[%i], %dst[%i], 16 : memref<16xf32> to memref<16xi32, 3>
   return
@@ -575,7 +575,7 @@ func @async_cp_memref_type(%dst : memref<16xi32, 3>, %src : memref<16xf32>, %i :
 
 // -----
 
-func @async_cp_num_src_indices(%dst : memref<16xf32, 3>, %src : memref<16x16xf32>, %i : index) -> () {
+func.func @async_cp_num_src_indices(%dst : memref<16xf32, 3>, %src : memref<16x16xf32>, %i : index) -> () {
   // expected-error @+1 {{expected 2 source indices, got 1}}
   gpu.device_async_copy %src[%i], %dst[%i], 16 : memref<16x16xf32> to memref<16xf32, 3>
   return
@@ -583,7 +583,7 @@ func @async_cp_num_src_indices(%dst : memref<16xf32, 3>, %src : memref<16x16xf32
 
 // -----
 
-func @async_cp_num_dst_indices(%dst : memref<16x16xf32, 3>, %src : memref<16xf32>, %i : index) -> () {
+func.func @async_cp_num_dst_indices(%dst : memref<16x16xf32, 3>, %src : memref<16xf32>, %i : index) -> () {
   // expected-error @+1 {{expected 2 destination indices, got 1}}
   gpu.device_async_copy %src[%i], %dst[%i], 16 : memref<16xf32> to memref<16x16xf32, 3>
   return
@@ -591,7 +591,7 @@ func @async_cp_num_dst_indices(%dst : memref<16x16xf32, 3>, %src : memref<16xf32
 
 // -----
 
-func @async_cp_num_src_stride(
+func.func @async_cp_num_src_stride(
   %dst : memref<200x100xf32, 3>,
   %src : memref<200x100xf32, affine_map<(d0, d1) -> (200*d0 + 2*d1)>>,
   %i : index) -> () {
@@ -603,7 +603,7 @@ func @async_cp_num_src_stride(
 
 // -----
 
-func @async_cp_num_dst_stride(
+func.func @async_cp_num_dst_stride(
   %dst : memref<200x100xf32, affine_map<(d0, d1) -> (200*d0 + 2*d1)>, 3>,
   %src : memref<200x100xf32>,
   %i : index) -> () {
@@ -616,7 +616,7 @@ func @async_cp_num_dst_stride(
 // -----
 
 // Number of symbol operand count less than memref symbol count.
-func @alloc() {
+func.func @alloc() {
    // expected-error at +1 {{symbol operand count does not equal memref symbol count}}
    %1 = gpu.alloc() : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
    return
@@ -625,7 +625,7 @@ func @alloc() {
 // -----
 
 // Number of symbol operand count greater than memref symbol count.
-func @alloc() {
+func.func @alloc() {
    %0 = arith.constant 7 : index
    // expected-error at +1 {{symbol operand count does not equal memref symbol count}}
    %1 = gpu.alloc()[%0] : memref<2x4xf32, 1>
@@ -635,7 +635,7 @@ func @alloc() {
 // -----
 
 // Number of dynamic dimension operand count greater than memref dynamic dimension count.
-func @alloc() {
+func.func @alloc() {
    %0 = arith.constant 7 : index
    // expected-error at +1 {{dimension operand count does not equal memref dynamic dimension count}}
    %1 = gpu.alloc(%0, %0) : memref<2x?xf32, 1>
@@ -645,7 +645,7 @@ func @alloc() {
 // -----
 
 // Number of dynamic dimension operand count less than memref dynamic dimension count.
-func @alloc() {
+func.func @alloc() {
    %0 = arith.constant 7 : index
    // expected-error at +1 {{dimension operand count does not equal memref dynamic dimension count}}
    %1 = gpu.alloc(%0) : memref<2x?x?xf32, 1>

diff  --git a/mlir/test/Dialect/GPU/mapping.mlir b/mlir/test/Dialect/GPU/mapping.mlir
index 28b26d823da55..ff5b07f1844c5 100644
--- a/mlir/test/Dialect/GPU/mapping.mlir
+++ b/mlir/test/Dialect/GPU/mapping.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt -test-gpu-greedy-parallel-loop-mapping -split-input-file %s | FileCheck %s
 
-func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
+func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                     %arg3 : index) {
   %zero = arith.constant 0 : index
   %one = arith.constant 1 : index
@@ -26,7 +26,7 @@ func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
 
 // -----
 
-func @parallel_loop_4d(%arg0 : index, %arg1 : index, %arg2 : index,
+func.func @parallel_loop_4d(%arg0 : index, %arg1 : index, %arg2 : index,
                        %arg3 : index) {
   %zero = arith.constant 0 : index
   %one = arith.constant 1 : index

diff  --git a/mlir/test/Dialect/GPU/multiple-all-reduce.mlir b/mlir/test/Dialect/GPU/multiple-all-reduce.mlir
index 2a52e18d2e8d5..9b8d1c9c1785d 100644
--- a/mlir/test/Dialect/GPU/multiple-all-reduce.mlir
+++ b/mlir/test/Dialect/GPU/multiple-all-reduce.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt --gpu-kernel-outlining --convert-gpu-to-nvvm %s | FileCheck %s
 
-func @main() {
+func.func @main() {
   %data = memref.alloc() : memref<2x6xf32>
   %sum = memref.alloc() : memref<2xf32>
   %mul = memref.alloc() : memref<2xf32>

diff  --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 140a806cfc92e..541607ecbbf1c 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -3,7 +3,7 @@
 module attributes {gpu.container_module} {
 
   // CHECK-LABEL:func @no_args(%{{.*}}: index)
-  func @no_args(%sz : index) {
+  func.func @no_args(%sz : index) {
     // CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}})
     gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
                threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
@@ -14,7 +14,7 @@ module attributes {gpu.container_module} {
   }
 
   // CHECK-LABEL:func @args(%{{.*}}: index, %{{.*}}: index, %{{.*}}: f32, %{{.*}}: memref<?xf32, 1>) {
-  func @args(%blk : index, %thrd : index, %float : f32, %data : memref<?xf32,1>) {
+  func.func @args(%blk : index, %thrd : index, %float : f32, %data : memref<?xf32,1>) {
     // CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}})
     gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %blk, %grid_y = %blk, %grid_z = %blk)
                threads(%tx, %ty, %tz) in (%block_x = %thrd, %block_y = %thrd, %block_z = %thrd) {
@@ -78,7 +78,7 @@ module attributes {gpu.container_module} {
     }
   }
 
-  func @foo() {
+  func.func @foo() {
     %0 = "op"() : () -> (f32)
     %1 = "op"() : () -> (memref<?xf32, 1>)
     // CHECK: %{{.*}} = arith.constant 8
@@ -165,7 +165,7 @@ module attributes {gpu.container_module} {
     } ) {function_type = (f32, memref<?xf32>) -> (), gpu.kernel, sym_name = "kernel_1", workgroup_attributions = 1: i64} : () -> ()
   }
 
-  func @alloc() {
+  func.func @alloc() {
     // CHECK-LABEL: func @alloc()
 
     // CHECK: %[[m0:.*]] = gpu.alloc () : memref<13xf32, 1>
@@ -182,13 +182,13 @@ module attributes {gpu.container_module} {
     return
   }
 
-  func @async_token(%arg0 : !gpu.async.token) -> !gpu.async.token {
+  func.func @async_token(%arg0 : !gpu.async.token) -> !gpu.async.token {
     // CHECK-LABEL: func @async_token({{.*}}: !gpu.async.token)
     // CHECK: return {{.*}} : !gpu.async.token
     return %arg0 : !gpu.async.token
   }
 
-  func @async_wait() {
+  func.func @async_wait() {
     // CHECK-LABEL: func @async_wait
     // CHECK: %[[t0:.*]] = gpu.wait async
     %0 = gpu.wait async
@@ -205,7 +205,7 @@ module attributes {gpu.container_module} {
     return
   }
 
-  func @memcpy(%dst : memref<3x7xf32>, %src : memref<3x7xf32, 1>) {
+  func.func @memcpy(%dst : memref<3x7xf32>, %src : memref<3x7xf32, 1>) {
     // CHECK-LABEL: func @memcpy
     // CHECK: gpu.memcpy {{.*}}, {{.*}} : memref<3x7xf32>, memref<3x7xf32, 1>
     gpu.memcpy %dst, %src : memref<3x7xf32>, memref<3x7xf32, 1>
@@ -216,7 +216,7 @@ module attributes {gpu.container_module} {
     return
   }
 
-  func @memset(%dst : memref<3x7xf32>, %value : f32) {
+  func.func @memset(%dst : memref<3x7xf32>, %value : f32) {
     // CHECK-LABEL: func @memset
     // CHECK: gpu.memset {{.*}}, {{.*}} : memref<3x7xf32>, f32
     gpu.memset %dst, %value : memref<3x7xf32>, f32
@@ -227,7 +227,7 @@ module attributes {gpu.container_module} {
     return
   }
 
-  func @mmamatrix_valid_element_type(%src : memref<32x32xf16, affine_map<(d0, d1) -> (d0 * 64 + d1)>>){
+  func.func @mmamatrix_valid_element_type(%src : memref<32x32xf16, affine_map<(d0, d1) -> (d0 * 64 + d1)>>){
     // CHECK-LABEL: func @mmamatrix_valid_element_type
     %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
     // CHECK: %[[wg:.*]] = memref.alloca()
@@ -247,7 +247,7 @@ module attributes {gpu.container_module} {
     return
   }
 
-  func @async_cp(%dst : memref<2x7x5xf32, 3>, %src : memref<4x5xf32>){
+  func.func @async_cp(%dst : memref<2x7x5xf32, 3>, %src : memref<4x5xf32>){
     // CHECK-LABEL: func @async_cp
     %c0 = arith.constant 0 : index
     // CHECK: gpu.device_async_copy %{{.*}}[{{.*}}, {{.*}}], %{{.*}}[{{.*}}, {{.*}}, {{.*}}], 4 : memref<4x5xf32> to memref<2x7x5xf32, 3>
@@ -260,7 +260,7 @@ module attributes {gpu.container_module} {
   }
 
   // CHECK-LABEL: func @set_default_device
-  func @set_default_device(%arg0: i32) {
+  func.func @set_default_device(%arg0: i32) {
     // CHECK: gpu.set_default_device
     gpu.set_default_device %arg0
     return

diff  --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index fc418ca442c46..c43ba3e19c6e7 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -4,7 +4,7 @@
 // CHECK: module attributes {gpu.container_module}
 
 // CHECK-LABEL: func @launch()
-func @launch() {
+func.func @launch() {
   // CHECK: %[[ARG0:.*]] = "op"() : () -> f32
   %0 = "op"() : () -> (f32)
   // CHECK: %[[ARG1:.*]] = "op"() : () -> memref<?xf32, 1>
@@ -63,7 +63,7 @@ func @launch() {
 
 // CHECK: module attributes {gpu.container_module}
 // CHECK-LABEL: @multiple_launches
-func @multiple_launches() {
+func.func @multiple_launches() {
   // CHECK: %[[CST:.*]] = arith.constant 8 : index
   %cst = arith.constant 8 : index
   // CHECK: gpu.launch_func @multiple_launches_kernel::@multiple_launches_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]])
@@ -94,7 +94,7 @@ func @multiple_launches() {
 // -----
 
 // CHECK-LABEL: @extra_constants_not_inlined
-func @extra_constants_not_inlined(%arg0: memref<?xf32>) {
+func.func @extra_constants_not_inlined(%arg0: memref<?xf32>) {
   // CHECK: %[[CST:.*]] = arith.constant 8 : index
   %cst = arith.constant 8 : index
   %cst2 = arith.constant 2 : index
@@ -120,7 +120,7 @@ func @extra_constants_not_inlined(%arg0: memref<?xf32>) {
 
 // CHECK-LABEL: @extra_constants
 // CHECK-SAME: %[[ARG0:.*]]: memref<?xf32>
-func @extra_constants(%arg0: memref<?xf32>) {
+func.func @extra_constants(%arg0: memref<?xf32>) {
   // CHECK: %[[CST:.*]] = arith.constant 8 : index
   %cst = arith.constant 8 : index
   %cst2 = arith.constant 2 : index
@@ -149,7 +149,7 @@ func @extra_constants(%arg0: memref<?xf32>) {
 
 // CHECK-LABEL: @extra_constants_noarg
 // CHECK-SAME: %[[ARG0:.*]]: memref<?xf32>, %[[ARG1:.*]]: memref<?xf32>
-func @extra_constants_noarg(%arg0: memref<?xf32>, %arg1: memref<?xf32>) {
+func.func @extra_constants_noarg(%arg0: memref<?xf32>, %arg1: memref<?xf32>) {
   // CHECK: %[[CST:.*]] = arith.constant 8 : index
   %cst = arith.constant 8 : index
   %cst2 = arith.constant 2 : index
@@ -177,7 +177,7 @@ func @extra_constants_noarg(%arg0: memref<?xf32>, %arg1: memref<?xf32>) {
 // -----
 
 // CHECK-LABEL: @multiple_uses
-func @multiple_uses(%arg0 : memref<?xf32>) {
+func.func @multiple_uses(%arg0 : memref<?xf32>) {
   %c1 = arith.constant 1 : index
   %c2 = arith.constant 2 : index
   // CHECK: gpu.func {{.*}} {
@@ -202,7 +202,7 @@ func @multiple_uses(%arg0 : memref<?xf32>) {
 // -----
 
 // CHECK-LABEL: @multiple_uses2
-func @multiple_uses2(%arg0 : memref<*xf32>) {
+func.func @multiple_uses2(%arg0 : memref<*xf32>) {
   %c1 = arith.constant 1 : index
   %c2 = arith.constant 2 : index
   %d = memref.dim %arg0, %c2 : memref<*xf32>
@@ -233,7 +233,7 @@ func @multiple_uses2(%arg0 : memref<*xf32>) {
 llvm.mlir.global internal @global(42 : i64) : i64
 
 //CHECK-LABEL: @function_call
-func @function_call(%arg0 : memref<?xf32>) {
+func.func @function_call(%arg0 : memref<?xf32>) {
   %cst = arith.constant 8 : index
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
@@ -247,12 +247,12 @@ func @function_call(%arg0 : memref<?xf32>) {
   return
 }
 
-func @device_function() {
+func.func @device_function() {
   call @recursive_device_function() : () -> ()
   return
 }
 
-func @recursive_device_function() {
+func.func @recursive_device_function() {
   call @recursive_device_function() : () -> ()
   return
 }

diff  --git a/mlir/test/Dialect/GPU/sink-ops.mlir b/mlir/test/Dialect/GPU/sink-ops.mlir
index e2b4c238b9ce2..8f5924f2e3265 100644
--- a/mlir/test/Dialect/GPU/sink-ops.mlir
+++ b/mlir/test/Dialect/GPU/sink-ops.mlir
@@ -3,7 +3,7 @@
 
 // CHECK-LABEL: @extra_constants
 // CHECK-SAME: %[[ARG0:.*]]: memref<?xf32>
-func @extra_constants(%arg0: memref<?xf32>) {
+func.func @extra_constants(%arg0: memref<?xf32>) {
   %cst = arith.constant 8 : index
   %cst2 = arith.constant 2 : index
   %c0 = arith.constant 0 : index
@@ -29,7 +29,7 @@ func @extra_constants(%arg0: memref<?xf32>) {
 
 // CHECK-LABEL: @extra_constants_not_inlined
 // CHECK-SAME: %[[ARG0:.*]]: memref<?xf32>
-func @extra_constants_not_inlined(%arg0: memref<?xf32>) {
+func.func @extra_constants_not_inlined(%arg0: memref<?xf32>) {
   %cst = arith.constant 8 : index
   %cst2 = arith.constant 2 : index
   %c0 = arith.constant 0 : index
@@ -54,7 +54,7 @@ func @extra_constants_not_inlined(%arg0: memref<?xf32>) {
 // -----
 
 // CHECK-LABEL: @multiple_uses
-func @multiple_uses(%arg0 : memref<?xf32>) {
+func.func @multiple_uses(%arg0 : memref<?xf32>) {
   %c1 = arith.constant 1 : index
   %c2 = arith.constant 2 : index
   // CHECK: gpu.launch blocks
@@ -76,7 +76,7 @@ func @multiple_uses(%arg0 : memref<?xf32>) {
 // -----
 
 // CHECK-LABEL: @multiple_uses2
-func @multiple_uses2(%arg0 : memref<*xf32>) {
+func.func @multiple_uses2(%arg0 : memref<*xf32>) {
   %c1 = arith.constant 1 : index
   %c2 = arith.constant 2 : index
   %d = memref.dim %arg0, %c2 : memref<*xf32>