[Mlir-commits] [mlir] 3655069 - [mlir] Move the Builtin FuncOp to the Func dialect

Wed Mar 16 17:15:26 PDT 2022

Author: River Riddle
Date: 2022-03-16T17:07:03-07:00
New Revision: 36550692340e4235550f3b9808b5e406821cc979

URL: https://github.com/llvm/llvm-project/commit/36550692340e4235550f3b9808b5e406821cc979
DIFF: https://github.com/llvm/llvm-project/commit/36550692340e4235550f3b9808b5e406821cc979.diff

LOG: [mlir] Move the Builtin FuncOp to the Func dialect

This commit moves FuncOp out of the builtin dialect, and into the Func
dialect. This move has been planned in some capacity from the moment
we made FuncOp an operation (years ago). This commit handles the
functional aspects of the move, but various aspects are left untouched
to ease migration: func::FuncOp is re-exported into mlir to reduce
the actual API churn, the assembly format still accepts the unqualified
`func`. These temporary measures will remain for a little while to
simplify migration before being removed.

Differential Revision: https://reviews.llvm.org/D121266

Added: 
    

Modified: 
    flang/include/flang/Lower/CallInterface.h
    flang/include/flang/Optimizer/Builder/FIRBuilder.h
    flang/include/flang/Optimizer/Builder/LowLevelIntrinsics.h
    flang/test/Fir/Todo/boxproc_host.fir
    flang/test/Fir/Todo/unboxproc.fir
    flang/unittests/Optimizer/Builder/CharacterTest.cpp
    flang/unittests/Optimizer/Builder/ComplexTest.cpp
    flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
    flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h
    mlir/benchmark/python/benchmark_sparse.py
    mlir/benchmark/python/common.py
    mlir/docs/Bindings/Python.md
    mlir/docs/Dialects/ShapeDialect.md
    mlir/docs/PassManagement.md
    mlir/docs/SymbolsAndSymbolTables.md
    mlir/docs/TargetLLVMIR.md
    mlir/include/mlir/Dialect/Affine/LoopUtils.h
    mlir/include/mlir/Dialect/Affine/Passes.h
    mlir/include/mlir/Dialect/Affine/Utils.h
    mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
    mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
    mlir/include/mlir/Dialect/Func/IR/FuncOps.h
    mlir/include/mlir/Dialect/Func/IR/FuncOps.td
    mlir/include/mlir/Dialect/Func/Transforms/Passes.td
    mlir/include/mlir/Dialect/GPU/Passes.h
    mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h
    mlir/include/mlir/Dialect/Linalg/Passes.h
    mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
    mlir/include/mlir/Dialect/Quant/Passes.h
    mlir/include/mlir/Dialect/SCF/Utils/Utils.h
    mlir/include/mlir/Dialect/Shape/IR/Shape.h
    mlir/include/mlir/IR/BuiltinOps.h
    mlir/include/mlir/IR/BuiltinOps.td
    mlir/include/mlir/IR/FunctionInterfaces.td
    mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
    mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
    mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp
    mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
    mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
    mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
    mlir/lib/Dialect/Affine/Transforms/PassDetail.h
    mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
    mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
    mlir/lib/Dialect/Affine/Utils/Utils.cpp
    mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
    mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
    mlir/lib/Dialect/Bufferization/Transforms/PassDetail.h
    mlir/lib/Dialect/Func/IR/FuncOps.cpp
    mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp
    mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
    mlir/lib/Dialect/GPU/Transforms/PassDetail.h
    mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
    mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
    mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
    mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
    mlir/lib/Dialect/Linalg/Transforms/PassDetail.h
    mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
    mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt
    mlir/lib/Dialect/Quant/Transforms/PassDetail.h
    mlir/lib/Dialect/SCF/Transforms/PassDetail.h
    mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt
    mlir/lib/Dialect/SPIRV/Transforms/DecorateCompositeTypeLayoutPass.cpp
    mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp
    mlir/lib/Dialect/Shape/IR/CMakeLists.txt
    mlir/lib/Dialect/Shape/Transforms/PassDetail.h
    mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
    mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
    mlir/lib/Dialect/Tensor/Transforms/PassDetail.h
    mlir/lib/Dialect/Vector/Transforms/PassDetail.h
    mlir/lib/IR/BuiltinDialect.cpp
    mlir/lib/Pass/PassRegistry.cpp
    mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp
    mlir/lib/Transforms/Utils/InliningUtils.cpp
    mlir/python/mlir/dialects/_builtin_ops_ext.py
    mlir/python/mlir/dialects/_func_ops_ext.py
    mlir/test/Analysis/test-alias-analysis-modref.mlir
    mlir/test/Analysis/test-alias-analysis.mlir
    mlir/test/Analysis/test-dominance.mlir
    mlir/test/Analysis/test-liveness.mlir
    mlir/test/Analysis/test-match-reduction.mlir
    mlir/test/Analysis/test-shape-fn-report.mlir
    mlir/test/Analysis/test-topoligical-sort.mlir
    mlir/test/CAPI/execution_engine.c
    mlir/test/CAPI/ir.c
    mlir/test/CAPI/pass.c
    mlir/test/Conversion/ArithmeticToLLVM/arith-to-llvm.mlir
    mlir/test/Conversion/ArithmeticToLLVM/convert-nd-vector-to-llvmir.mlir
    mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
    mlir/test/Conversion/ComplexToStandard/full-conversion.mlir
    mlir/test/Conversion/FuncToLLVM/func-memref.mlir
    mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir
    mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
    mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
    mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
    mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir
    mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
    mlir/test/Conversion/SCFToGPU/step_one.mlir
    mlir/test/Conversion/SCFToGPU/step_positive.mlir
    mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir
    mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
    mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
    mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
    mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
    mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir
    mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir
    mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir
    mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir
    mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
    mlir/test/Dialect/Affine/canonicalize.mlir
    mlir/test/Dialect/Affine/loop-unswitch.mlir
    mlir/test/Dialect/Affine/memref-stride-calculation.mlir
    mlir/test/Dialect/ControlFlow/canonicalize.mlir
    mlir/test/Dialect/LLVMIR/terminator.mlir
    mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
    mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
    mlir/test/Dialect/Linalg/detensorize_0d.mlir
    mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
    mlir/test/Dialect/Linalg/detensorize_if.mlir
    mlir/test/Dialect/Linalg/detensorize_trivial.mlir
    mlir/test/Dialect/Linalg/detensorize_while.mlir
    mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
    mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
    mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
    mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
    mlir/test/Dialect/Linalg/fusion-sequence.mlir
    mlir/test/Dialect/Linalg/tile-and-fuse-no-fuse.mlir
    mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
    mlir/test/Dialect/Linalg/tile-and-fuse-sequence-on-tensors.mlir
    mlir/test/Dialect/Quant/canonicalize.mlir
    mlir/test/Dialect/SCF/canonicalize.mlir
    mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir
    mlir/test/Dialect/SCF/loop-range.mlir
    mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
    mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir
    mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
    mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir
    mlir/test/Dialect/Shape/invalid.mlir
    mlir/test/Dialect/Tensor/invalid.mlir
    mlir/test/Dialect/Vector/canonicalize.mlir
    mlir/test/IR/diagnostic-handler-filter.mlir
    mlir/test/IR/generic-visitors-interrupt.mlir
    mlir/test/IR/generic-visitors.mlir
    mlir/test/IR/invalid-func-op.mlir
    mlir/test/IR/invalid-ops.mlir
    mlir/test/IR/invalid.mlir
    mlir/test/IR/print-ir-invalid.mlir
    mlir/test/IR/test-matchers.mlir
    mlir/test/IR/traits.mlir
    mlir/test/IR/visitors.mlir
    mlir/test/IR/wrapping_op.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
    mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py
    mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py
    mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py
    mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
    mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
    mlir/test/Pass/dynamic-pipeline.mlir
    mlir/test/Pass/interface-pass.mlir
    mlir/test/Pass/invalid-parent.mlir
    mlir/test/Pass/ir-printing.mlir
    mlir/test/Pass/pass-timing.mlir
    mlir/test/Pass/pipeline-options-parsing.mlir
    mlir/test/Pass/pipeline-parsing.mlir
    mlir/test/Pass/pipeline-stats.mlir
    mlir/test/Pass/run-reproducer.mlir
    mlir/test/Target/Cpp/invalid.mlir
    mlir/test/Target/LLVMIR/arm-neon-2d.mlir
    mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir
    mlir/test/Transforms/canonicalize-block-merge.mlir
    mlir/test/Transforms/canonicalize-dce.mlir
    mlir/test/Transforms/canonicalize-td.mlir
    mlir/test/Transforms/canonicalize.mlir
    mlir/test/Transforms/constant-fold.mlir
    mlir/test/Transforms/cse.mlir
    mlir/test/Transforms/parallel-loop-collapsing.mlir
    mlir/test/Transforms/parametric-mapping.mlir
    mlir/test/Transforms/print-op-graph.mlir
    mlir/test/Transforms/sccp-structured.mlir
    mlir/test/Transforms/sccp.mlir
    mlir/test/Transforms/single-parallel-loop-collapsing.mlir
    mlir/test/Transforms/test-canonicalize-filter.mlir
    mlir/test/Transforms/test-canonicalize.mlir
    mlir/test/Transforms/test-legalizer-analysis.mlir
    mlir/test/Transforms/test-legalizer-full.mlir
    mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
    mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp
    mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp
    mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp
    mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
    mlir/test/lib/Dialect/Linalg/TestLinalgElementwiseFusion.cpp
    mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp
    mlir/test/lib/Dialect/Linalg/TestLinalgHoisting.cpp
    mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
    mlir/test/lib/Dialect/SCF/TestSCFUtils.cpp
    mlir/test/lib/Dialect/SPIRV/TestAvailability.cpp
    mlir/test/lib/Dialect/Test/TestDialect.h
    mlir/test/lib/Dialect/Tosa/CMakeLists.txt
    mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
    mlir/test/lib/Pass/TestPassManager.cpp
    mlir/test/mlir-cpu-runner/async-error.mlir
    mlir/test/mlir-cpu-runner/async-group.mlir
    mlir/test/mlir-cpu-runner/async-value.mlir
    mlir/test/mlir-cpu-runner/async.mlir
    mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
    mlir/test/mlir-cpu-runner/copy.mlir
    mlir/test/mlir-cpu-runner/global-memref.mlir
    mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
    mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir
    mlir/test/mlir-cpu-runner/memref-reshape.mlir
    mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
    mlir/test/mlir-cpu-runner/unranked-memref.mlir
    mlir/test/mlir-cpu-runner/utils.mlir
    mlir/test/mlir-lsp-server/diagnostics.test
    mlir/test/mlir-lsp-server/hover.test
    mlir/test/mlir-opt/async.mlir
    mlir/test/mlir-pdll/Parser/expr-failure.pdll
    mlir/test/mlir-pdll/Parser/stmt-failure.pdll
    mlir/test/python/dialects/builtin.py
    mlir/test/python/dialects/func.py
    mlir/test/python/dialects/linalg/opdsl/emit_convolution.py
    mlir/test/python/dialects/linalg/opdsl/emit_fill.py
    mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
    mlir/test/python/dialects/linalg/opdsl/emit_misc.py
    mlir/test/python/dialects/linalg/opdsl/emit_pooling.py
    mlir/test/python/dialects/linalg/ops.py
    mlir/test/python/dialects/math_dialect.py
    mlir/test/python/dialects/scf.py
    mlir/test/python/dialects/shape.py
    mlir/test/python/dialects/tensor.py
    mlir/test/python/dialects/vector.py
    mlir/test/python/integration/dialects/linalg/opsrun.py
    mlir/test/python/ir/blocks.py
    mlir/test/python/ir/operation.py
    mlir/test/python/pass_manager.py
    mlir/unittests/ExecutionEngine/Invoke.cpp
    mlir/unittests/IR/InterfaceAttachmentTest.cpp
    mlir/unittests/Pass/AnalysisManagerTest.cpp
    mlir/unittests/Pass/CMakeLists.txt
    mlir/unittests/Pass/PassManagerTest.cpp

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Lower/CallInterface.h b/flang/include/flang/Lower/CallInterface.h
index e2dba7c8d0eeb..a1ec396d59bcd 100644

--- a/flang/include/flang/Lower/CallInterface.h
+++ b/flang/include/flang/Lower/CallInterface.h
@@ -29,6 +29,7 @@
 
 #include "flang/Common/reference.h"
 #include "flang/Evaluate/characteristics.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/BuiltinOps.h"
 #include <memory>
 #include <optional>

diff  --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index 032c881fb195c..9c7761d503dc8 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -19,6 +19,7 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/Support/KindMapping.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 

diff  --git a/flang/include/flang/Optimizer/Builder/LowLevelIntrinsics.h b/flang/include/flang/Optimizer/Builder/LowLevelIntrinsics.h
index 7ef5ff18c86d5..edfb1e8e48ed9 100644
--- a/flang/include/flang/Optimizer/Builder/LowLevelIntrinsics.h
+++ b/flang/include/flang/Optimizer/Builder/LowLevelIntrinsics.h
@@ -14,8 +14,10 @@
 #define FLANG_OPTIMIZER_BUILDER_LOWLEVELINTRINSICS_H
 
 namespace mlir {
+namespace func {
 class FuncOp;
-}
+} // namespace func
+} // namespace mlir
 namespace fir {
 class FirOpBuilder;
 }
@@ -23,10 +25,10 @@ class FirOpBuilder;
 namespace fir::factory {
 
 /// Get the `llvm.stacksave` intrinsic.
-mlir::FuncOp getLlvmStackSave(FirOpBuilder &builder);
+mlir::func::FuncOp getLlvmStackSave(FirOpBuilder &builder);
 
 /// Get the `llvm.stackrestore` intrinsic.
-mlir::FuncOp getLlvmStackRestore(FirOpBuilder &builder);
+mlir::func::FuncOp getLlvmStackRestore(FirOpBuilder &builder);
 
 } // namespace fir::factory
 

diff  --git a/flang/test/Fir/Todo/boxproc_host.fir b/flang/test/Fir/Todo/boxproc_host.fir
index a4cba400d1b57..392cbc0d1f71f 100644
--- a/flang/test/Fir/Todo/boxproc_host.fir
+++ b/flang/test/Fir/Todo/boxproc_host.fir
@@ -3,7 +3,7 @@
 // Test that `fir.boxproc_host` fails conversion to llvm.
 // At the moment this test fails since `fir.boxproc` type does not have a conversion.
 
-// CHECK: failed to legalize operation 'builtin.func'
+// CHECK: failed to legalize operation 'func.func'
 func @test(%bproc: !fir.boxproc<(i32) -> ()>) {
   %tuple = fir.boxproc_host %bproc : (!fir.boxproc<(i32) -> ()>) -> (!fir.ref<tuple<i32,f64>>)
   return

diff  --git a/flang/test/Fir/Todo/unboxproc.fir b/flang/test/Fir/Todo/unboxproc.fir
index e272f4dab2087..849f9b10acc0c 100644
--- a/flang/test/Fir/Todo/unboxproc.fir
+++ b/flang/test/Fir/Todo/unboxproc.fir
@@ -4,7 +4,7 @@
 // Not implemented yet.
 // Currently fails since coversion for boxproc type is not implemented.
 
-// CHECK: failed to legalize operation 'builtin.func'
+// CHECK: failed to legalize operation 'func.func'
 func @boxing_match(%bproc: !fir.boxproc<(i32) -> ()>) {
   %ubproc:2 = fir.unboxproc %bproc : (!fir.boxproc<(i32) -> ()>) -> ((i32) -> (), !fir.ref<tuple<i32,f64>>)
   return

diff  --git a/flang/unittests/Optimizer/Builder/CharacterTest.cpp b/flang/unittests/Optimizer/Builder/CharacterTest.cpp
index 563ee275bac1f..85ca91a686e71 100644
--- a/flang/unittests/Optimizer/Builder/CharacterTest.cpp
+++ b/flang/unittests/Optimizer/Builder/CharacterTest.cpp
@@ -16,6 +16,8 @@
 struct CharacterTest : public testing::Test {
 public:
   void SetUp() override {
+    fir::support::loadDialects(context);
+
     kindMap = std::make_unique<fir::KindMapping>(&context,
         "i10:80,l3:24,a1:8,r54:Double,c20:X86_FP80,r11:PPC_FP128,"
         "r12:FP128,r13:X86_FP80,r14:Double,r15:Float,r16:Half,r23:BFloat");
@@ -31,7 +33,6 @@ struct CharacterTest : public testing::Test {
     mod.push_back(mod);
     builder.setInsertionPointToStart(entryBlock);
 
-    fir::support::loadDialects(context);
     firBuilder = std::make_unique<fir::FirOpBuilder>(mod, *kindMap);
   }
 

diff  --git a/flang/unittests/Optimizer/Builder/ComplexTest.cpp b/flang/unittests/Optimizer/Builder/ComplexTest.cpp
index 54e335e7c031b..b44ee6a3320d9 100644
--- a/flang/unittests/Optimizer/Builder/ComplexTest.cpp
+++ b/flang/unittests/Optimizer/Builder/ComplexTest.cpp
@@ -15,6 +15,8 @@
 struct ComplexTest : public testing::Test {
 public:
   void SetUp() override {
+    fir::support::loadDialects(context);
+
     mlir::OpBuilder builder(&context);
     auto loc = builder.getUnknownLoc();
 
@@ -27,7 +29,6 @@ struct ComplexTest : public testing::Test {
     mod.push_back(mod);
     builder.setInsertionPointToStart(entryBlock);
 
-    fir::support::loadDialects(context);
     kindMap = std::make_unique<fir::KindMapping>(&context);
     firBuilder = std::make_unique<fir::FirOpBuilder>(mod, *kindMap);
     helper = std::make_unique<fir::factory::Complex>(*firBuilder, loc);

diff  --git a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
index bb8577a87fe50..925df97c1d0c6 100644
--- a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
+++ b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
@@ -17,6 +17,8 @@ using namespace mlir;
 struct FIRBuilderTest : public testing::Test {
 public:
   void SetUp() override {
+    fir::support::loadDialects(context);
+
     llvm::ArrayRef<fir::KindTy> defs;
     fir::KindMapping kindMap(&context, defs);
     mlir::OpBuilder builder(&context);
@@ -31,7 +33,6 @@ struct FIRBuilderTest : public testing::Test {
     mod.push_back(mod);
     builder.setInsertionPointToStart(entryBlock);
 
-    fir::support::loadDialects(context);
     firBuilder = std::make_unique<fir::FirOpBuilder>(mod, kindMap);
   }
 

diff  --git a/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h b/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h
index 6ce204d35e0c6..42f9683fffe36 100644
--- a/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h
+++ b/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h
@@ -17,6 +17,8 @@
 struct RuntimeCallTest : public testing::Test {
 public:
   void SetUp() override {
+    fir::support::loadDialects(context);
+
     mlir::OpBuilder builder(&context);
     auto loc = builder.getUnknownLoc();
 
@@ -29,7 +31,6 @@ struct RuntimeCallTest : public testing::Test {
     mod.push_back(mod);
     builder.setInsertionPointToStart(entryBlock);
 
-    fir::support::loadDialects(context);
     kindMap = std::make_unique<fir::KindMapping>(&context);
     firBuilder = std::make_unique<fir::FirOpBuilder>(mod, *kindMap);
 

diff  --git a/mlir/benchmark/python/benchmark_sparse.py b/mlir/benchmark/python/benchmark_sparse.py
index bfcff3ed459cf..a96dbb2be8ae0 100644
--- a/mlir/benchmark/python/benchmark_sparse.py
+++ b/mlir/benchmark/python/benchmark_sparse.py
@@ -43,7 +43,7 @@ def benchmark_sparse_mlir_multiplication():
         param2_type = ir.RankedTensorType.get([1500, 2000], f64)
         result_type = ir.RankedTensorType.get([1000, 2000], f64)
         with ir.InsertionPoint(module.body):
-            @builtin.FuncOp.from_py_func(param1_type, param2_type, result_type)
+            @func.FuncOp.from_py_func(param1_type, param2_type, result_type)
             def sparse_kernel(x, y, z):
                 return matmul_dsl(x, y, outs=[z])
 

diff  --git a/mlir/benchmark/python/common.py b/mlir/benchmark/python/common.py
index 9ab6b1aaea0dc..da0ef20a18296 100644
--- a/mlir/benchmark/python/common.py
+++ b/mlir/benchmark/python/common.py
@@ -41,7 +41,7 @@ def create_sparse_np_tensor(dimensions, number_of_elements):
     return tensor
 
 
-def get_kernel_func_from_module(module: ir.Module) -> builtin.FuncOp:
+def get_kernel_func_from_module(module: ir.Module) -> func.FuncOp:
     """Takes an mlir module object and extracts the function object out of it.
     This function only works for a module with one region, one block, and one
     operation.
@@ -55,12 +55,12 @@ def get_kernel_func_from_module(module: ir.Module) -> builtin.FuncOp:
     return module.operation.regions[0].blocks[0].operations[0]
 
 
-def emit_timer_func() -> builtin.FuncOp:
+def emit_timer_func() -> func.FuncOp:
     """Returns the declaration of nano_time function. If nano_time function is
     used, the `MLIR_RUNNER_UTILS` and `MLIR_C_RUNNER_UTILS` must be included.
     """
     i64_type = ir.IntegerType.get_signless(64)
-    nano_time = builtin.FuncOp(
+    nano_time = func.FuncOp(
         "nano_time", ([], [i64_type]), visibility="private")
     nano_time.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
     return nano_time
@@ -76,7 +76,7 @@ def emit_benchmark_wrapped_main_func(func, timer_func):
     """
     i64_type = ir.IntegerType.get_signless(64)
     memref_of_i64_type = ir.MemRefType.get([-1], i64_type)
-    wrapped_func = builtin.FuncOp(
+    wrapped_func = func.FuncOp(
         # Same signature and an extra buffer of indices to save timings.
         "main",
         (func.arguments.types + [memref_of_i64_type], func.type.results),

diff  --git a/mlir/docs/Bindings/Python.md b/mlir/docs/Bindings/Python.md
index f867948cc0078..cb4f3b1bf0d09 100644
--- a/mlir/docs/Bindings/Python.md
+++ b/mlir/docs/Bindings/Python.md
@@ -639,7 +639,7 @@ from mlir.dialects import builtin
 with Context():
   module = Module.create()
   with InsertionPoint(module.body), Location.unknown():
-    func = builtin.FuncOp("main", ([], []))
+    func = func.FuncOp("main", ([], []))
 ```
 
 Also see below for constructors generated from ODS.
@@ -660,12 +660,12 @@ with Context():
   with InsertionPoint(module.body), Location.unknown():
     # Operations can be created in a generic way.
     func = Operation.create(
-        "builtin.func", results=[], operands=[],
+        "func.func", results=[], operands=[],
         attributes={"type":TypeAttr.get(FunctionType.get([], []))},
         successors=None, regions=1)
     # The result will be downcasted to the concrete `OpView` subclass if
     # available.
-    assert isinstance(func, builtin.FuncOp)
+    assert isinstance(func, func.FuncOp)
 ```
 
 Regions are created for an operation when constructing it on the C++ side. They

diff  --git a/mlir/docs/Dialects/ShapeDialect.md b/mlir/docs/Dialects/ShapeDialect.md
index b38f2afd51c37..a86988122bbcb 100644
--- a/mlir/docs/Dialects/ShapeDialect.md
+++ b/mlir/docs/Dialects/ShapeDialect.md
@@ -34,7 +34,7 @@ below[^wip_form1]:
 ```mlir
 shape.function_library @shplib {
 
-builtin.func @matmul(%lhs: !shape.value_shape, %rhs: !shape.value_shape) -> !shape.shape {
+func.func @matmul(%lhs: !shape.value_shape, %rhs: !shape.value_shape) -> !shape.shape {
   %c1 = shape.const_size 1
   %c2 = shape.const_size 2
   // We could also allow rank etc operations directly on value_shape too, that

diff  --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md
index db9c383e1a5b2..61418d438f2f8 100644
--- a/mlir/docs/PassManagement.md
+++ b/mlir/docs/PassManagement.md
@@ -532,12 +532,12 @@ A pipeline view that models the structure of the pass manager, this is the
 default view:
 
 ```shell
-$ mlir-opt -pass-pipeline='builtin.func(my-pass,my-pass)' foo.mlir -pass-statistics
+$ mlir-opt -pass-pipeline='func.func(my-pass,my-pass)' foo.mlir -pass-statistics
 
 ===-------------------------------------------------------------------------===
                          ... Pass statistics report ...
 ===-------------------------------------------------------------------------===
-'builtin.func' Pipeline
+'func.func' Pipeline
   MyPass
     (S) 15 exampleStat - An example statistic
   VerifierPass
@@ -551,7 +551,7 @@ A list view that aggregates the statistics of all instances of a specific pass
 together:
 
 ```shell
-$ mlir-opt -pass-pipeline='builtin.func(my-pass, my-pass)' foo.mlir -pass-statistics -pass-statistics-display=list
+$ mlir-opt -pass-pipeline='func.func(my-pass, my-pass)' foo.mlir -pass-statistics -pass-statistics-display=list
 
 ===-------------------------------------------------------------------------===
                          ... Pass statistics report ...
@@ -657,7 +657,7 @@ options           ::= '{' (key ('=' value)?)+ '}'
 
 *   `op-name`
     *   This corresponds to the mnemonic name of an operation to run passes on,
-        e.g. `builtin.func` or `builtin.module`.
+        e.g. `func.func` or `builtin.module`.
 *   `pass-name` | `pass-pipeline-name`
     *   This corresponds to the argument of a registered pass or pass pipeline,
         e.g. `cse` or `canonicalize`.
@@ -676,7 +676,7 @@ $ mlir-opt foo.mlir -cse -canonicalize -convert-func-to-llvm='use-bare-ptr-memre
 Can also be specified as (via the `-pass-pipeline` flag):
 
 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}'
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}'
 ```
 
 In order to support round-tripping a pass to the textual representation using
@@ -997,7 +997,7 @@ pipeline. This display mode is available in mlir-opt via
 `-mlir-timing-display=list`.
 
 ```shell
-$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing -mlir-timing-display=list
+$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing -mlir-timing-display=list
 
 ===-------------------------------------------------------------------------===
                       ... Pass execution timing report ...
@@ -1022,7 +1022,7 @@ the most time, and can also be used to identify when analyses are being
 invalidated and recomputed. This is the default display mode.
 
 ```shell
-$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing
+$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing
 
 ===-------------------------------------------------------------------------===
                       ... Pass execution timing report ...
@@ -1030,7 +1030,7 @@ $ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.func(cse,can
   Total Execution Time: 0.0249 seconds
 
    ---Wall Time---  --- Name ---
-   0.0058 ( 70.8%)  'builtin.func' Pipeline
+   0.0058 ( 70.8%)  'func.func' Pipeline
    0.0004 (  4.3%)    CSE
    0.0002 (  2.6%)      (A) DominanceInfo
    0.0004 (  4.8%)    VerifierPass
@@ -1053,7 +1053,7 @@ perceived time, or clock time, whereas the `User Time` will display the total
 cpu time.
 
 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing
 
 ===-------------------------------------------------------------------------===
                       ... Pass execution timing report ...
@@ -1061,7 +1061,7 @@ $ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse,canonicalize)' -convert-fun
   Total Execution Time: 0.0078 seconds
 
    ---User Time---   ---Wall Time---  --- Name ---
-   0.0177 ( 88.5%)     0.0057 ( 71.3%)  'builtin.func' Pipeline
+   0.0177 ( 88.5%)     0.0057 ( 71.3%)  'func.func' Pipeline
    0.0044 ( 22.0%)     0.0015 ( 18.9%)    CSE
    0.0029 ( 14.5%)     0.0012 ( 15.2%)      (A) DominanceInfo
    0.0038 ( 18.9%)     0.0015 ( 18.7%)    VerifierPass
@@ -1089,7 +1089,7 @@ this instrumentation:
     *   Print the IR before every pass in the pipeline.
 
 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse)' -print-ir-before=cse
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse)' -print-ir-before=cse
 
 *** IR Dump Before CSE ***
 func @simple_constant() -> (i32, i32) {
@@ -1105,7 +1105,7 @@ func @simple_constant() -> (i32, i32) {
     *   Print the IR after every pass in the pipeline.
 
 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse)' -print-ir-after=cse
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse)' -print-ir-after=cse
 
 *** IR Dump After CSE ***
 func @simple_constant() -> (i32, i32) {
@@ -1126,7 +1126,7 @@ func @simple_constant() -> (i32, i32) {
         printing.
 
 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse,cse)' -print-ir-after=cse -print-ir-after-change
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,cse)' -print-ir-after=cse -print-ir-after-change
 
 *** IR Dump After CSE ***
 func @simple_constant() -> (i32, i32) {
@@ -1141,7 +1141,7 @@ func @simple_constant() -> (i32, i32) {
         above.
 
 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse,bad-pass)' -print-ir-failure
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,bad-pass)' -print-ir-failure
 
 *** IR Dump After BadPass Failed ***
 func @simple_constant() -> (i32, i32) {
@@ -1157,9 +1157,9 @@ func @simple_constant() -> (i32, i32) {
         is disabled(`-mlir-disable-threading`)
 
 ```shell
-$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.func(cse)' -print-ir-after=cse -print-ir-module-scope
+$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func.func(cse)' -print-ir-after=cse -print-ir-module-scope
 
-*** IR Dump After CSE ***  ('builtin.func' operation: @bar)
+*** IR Dump After CSE ***  ('func.func' operation: @bar)
 func @bar(%arg0: f32, %arg1: f32) -> f32 {
   ...
 }
@@ -1170,7 +1170,7 @@ func @simple_constant() -> (i32, i32) {
   return %c1_i32, %c1_i32_0 : i32, i32
 }
 
-*** IR Dump After CSE ***  ('builtin.func' operation: @simple_constant)
+*** IR Dump After CSE ***  ('func.func' operation: @simple_constant)
 func @bar(%arg0: f32, %arg1: f32) -> f32 {
   ...
 }
@@ -1194,7 +1194,7 @@ was executing, as well as the initial IR before any passes were run. A potential
 reproducible may have the form:
 
 ```mlir
-// configuration: -pass-pipeline='builtin.func(cse,canonicalize),inline' -verify-each
+// configuration: -pass-pipeline='func.func(cse,canonicalize),inline' -verify-each
 
 module {
   func @foo() {
@@ -1229,7 +1229,7 @@ For example, if the failure in the previous example came from `canonicalize`,
 the following reproducer will be generated:
 
 ```mlir
-// configuration: -pass-pipeline='builtin.func(canonicalize)' -verify-each -mlir-disable-threading
+// configuration: -pass-pipeline='func.func(canonicalize)' -verify-each -mlir-disable-threading
 
 module {
   func @foo() {

diff  --git a/mlir/docs/SymbolsAndSymbolTables.md b/mlir/docs/SymbolsAndSymbolTables.md
index 3786682f23697..d9c30b8d735c5 100644
--- a/mlir/docs/SymbolsAndSymbolTables.md
+++ b/mlir/docs/SymbolsAndSymbolTables.md
@@ -31,7 +31,7 @@ defines a [`SymbolTable`](#symbol-table). The name of a symbol *must* be unique
 within the parent `SymbolTable`. This name is semantically similarly to an SSA
 result value, and may be referred to by other operations to provide a symbolic
 link, or use, to the symbol. An example of a `Symbol` operation is
-[`builtin.func`](Dialects/Builtin.md/#func-mlirfuncop). `builtin.func` defines a
+[`func.func`](Dialects/Builtin.md/#func-mlirfuncop). `func.func` defines a
 symbol name, which is [referred to](#referencing-a-symbol) by operations like
 [`func.call`](Dialects/Func.md/#funccall-callop).
 
@@ -77,7 +77,7 @@ operation that is also a [symbol table](#symbol-table).
 Below is an example of how an operation can reference a symbol operation:
 
 ```mlir
-// This `builtin.func` operation defines a symbol named `symbol`.
+// This `func.func` operation defines a symbol named `symbol`.
 func @symbol()
 
 // Our `foo.user` operation contains a SymbolRefAttr with the name of the
@@ -106,7 +106,7 @@ module {
 // Here we define another nested symbol table, except this time it also defines
 // a symbol.
 module @module_symbol {
-  // This `builtin.func` operation defines a symbol named `nested_symbol`.
+  // This `func.func` operation defines a symbol named `nested_symbol`.
   func @nested_symbol()
 }
 

diff  --git a/mlir/docs/TargetLLVMIR.md b/mlir/docs/TargetLLVMIR.md
index e3ebe9973ba23..b313af6671ed9 100644
--- a/mlir/docs/TargetLLVMIR.md
+++ b/mlir/docs/TargetLLVMIR.md
@@ -348,7 +348,7 @@ individual scalar arguments.
 
 Examples:
 
-This convention is implemented in the conversion of `builtin.func` and `func.call` to
+This convention is implemented in the conversion of `func.func` and `func.call` to
 the LLVM dialect, with the former unpacking the descriptor into a set of
 individual values and the latter packing those values back into a descriptor so
 as to make it transparently usable by other operations. Conversions from other

diff  --git a/mlir/include/mlir/Dialect/Affine/LoopUtils.h b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
index bcaf864a43331..09b7717b39131 100644
--- a/mlir/include/mlir/Dialect/Affine/LoopUtils.h
+++ b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
@@ -22,13 +22,16 @@
 namespace mlir {
 class AffineForOp;
 class AffineMap;
-class FuncOp;
 class LoopLikeOpInterface;
 struct MemRefRegion;
 class OpBuilder;
 class Value;
 class ValueRange;
 
+namespace func {
+class FuncOp;
+} // namespace func
+
 namespace scf {
 class ForOp;
 class ParallelOp;
@@ -79,7 +82,7 @@ LogicalResult promoteIfSingleIteration(AffineForOp forOp);
 
 /// Promotes all single iteration AffineForOp's in the Function, i.e., moves
 /// their body into the containing Block.
-void promoteSingleIterationLoops(FuncOp f);
+void promoteSingleIterationLoops(func::FuncOp f);
 
 /// Skew the operations in an affine.for's body with the specified
 /// operation-wise shifts. The shifts are with respect to the original execution
@@ -92,7 +95,7 @@ LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
 /// Identify valid and profitable bands of loops to tile. This is currently just
 /// a temporary placeholder to test the mechanics of tiled code generation.
 /// Returns all maximal outermost perfect loop nests to tile.
-void getTileableBands(FuncOp f,
+void getTileableBands(func::FuncOp f,
                       std::vector<SmallVector<AffineForOp, 6>> *bands);
 
 /// Tiles the specified band of perfectly nested loops creating tile-space loops
@@ -259,8 +262,8 @@ LogicalResult coalesceLoops(MutableArrayRef<AffineForOp> loops);
 void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef<Value> processorId,
                            ArrayRef<Value> numProcessors);
 
-/// Gathers all AffineForOps in 'builtin.func' grouped by loop depth.
-void gatherLoops(FuncOp func,
+/// Gathers all AffineForOps in 'func.func' grouped by loop depth.
+void gatherLoops(func::FuncOp func,
                  std::vector<SmallVector<AffineForOp, 2>> &depthToLoops);
 
 /// Creates an AffineForOp while ensuring that the lower and upper bounds are

diff  --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index 8a94262a298b2..2e18a6fb7f3a1 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -18,6 +18,9 @@
 #include <limits>
 
 namespace mlir {
+namespace func {
+class FuncOp;
+} // namespace func
 
 class AffineForOp;
 
@@ -28,53 +31,56 @@ enum FusionMode { Greedy, ProducerConsumer, Sibling };
 /// Creates a simplification pass for affine structures (maps and sets). In
 /// addition, this pass also normalizes memrefs to have the trivial (identity)
 /// layout map.
-std::unique_ptr<OperationPass<FuncOp>> createSimplifyAffineStructuresPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createSimplifyAffineStructuresPass();
 
 /// Creates a loop invariant code motion pass that hoists loop invariant
 /// operations out of affine loops.
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createAffineLoopInvariantCodeMotionPass();
 
 /// Creates a pass to convert all parallel affine.for's into 1-d affine.parallel
 /// ops.
-std::unique_ptr<OperationPass<FuncOp>> createAffineParallelizePass();
+std::unique_ptr<OperationPass<func::FuncOp>> createAffineParallelizePass();
 
 /// Apply normalization transformations to affine loop-like ops.
-std::unique_ptr<OperationPass<FuncOp>> createAffineLoopNormalizePass();
+std::unique_ptr<OperationPass<func::FuncOp>> createAffineLoopNormalizePass();
 
 /// Performs packing (or explicit copying) of accessed memref regions into
 /// buffers in the specified faster memory space through either pointwise copies
 /// or DMA operations.
-std::unique_ptr<OperationPass<FuncOp>> createAffineDataCopyGenerationPass(
+std::unique_ptr<OperationPass<func::FuncOp>> createAffineDataCopyGenerationPass(
     unsigned slowMemorySpace, unsigned fastMemorySpace,
     unsigned tagMemorySpace = 0, int minDmaTransferSize = 1024,
     uint64_t fastMemCapacityBytes = std::numeric_limits<uint64_t>::max());
 /// Overload relying on pass options for initialization.
-std::unique_ptr<OperationPass<FuncOp>> createAffineDataCopyGenerationPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createAffineDataCopyGenerationPass();
 
 /// Creates a pass to replace affine memref accesses by scalars using store to
 /// load forwarding and redundant load elimination; consequently also eliminate
 /// dead allocs.
-std::unique_ptr<OperationPass<FuncOp>> createAffineScalarReplacementPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createAffineScalarReplacementPass();
 
 /// Creates a pass that transforms perfectly nested loops with independent
 /// bounds into a single loop.
-std::unique_ptr<OperationPass<FuncOp>> createLoopCoalescingPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createLoopCoalescingPass();
 
 /// Creates a loop fusion pass which fuses loops according to type of fusion
 /// specified in `fusionMode`. Buffers of size less than or equal to
 /// `localBufSizeThreshold` are promoted to memory space `fastMemorySpace`.
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createLoopFusionPass(unsigned fastMemorySpace = 0,
                      uint64_t localBufSizeThreshold = 0,
                      bool maximalFusion = false,
                      enum FusionMode fusionMode = FusionMode::Greedy);
 
 /// Creates a pass to perform tiling on loop nests.
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createLoopTilingPass(uint64_t cacheSizeBytes);
 /// Overload relying on pass options for initialization.
-std::unique_ptr<OperationPass<FuncOp>> createLoopTilingPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createLoopTilingPass();
 
 /// Creates a loop unrolling pass with the provided parameters.
 /// 'getUnrollFactor' is a function callback for clients to supply a function
@@ -82,7 +88,7 @@ std::unique_ptr<OperationPass<FuncOp>> createLoopTilingPass();
 /// factors supplied through other means. If -1 is passed as the unrollFactor
 /// and no callback is provided, anything passed from the command-line (if at
 /// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
-std::unique_ptr<OperationPass<FuncOp>> createLoopUnrollPass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLoopUnrollPass(
     int unrollFactor = -1, bool unrollUpToFactor = false,
     bool unrollFull = false,
     const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);
@@ -90,19 +96,19 @@ std::unique_ptr<OperationPass<FuncOp>> createLoopUnrollPass(
 /// Creates a loop unroll jam pass to unroll jam by the specified factor. A
 /// factor of -1 lets the pass use the default factor or the one on the command
 /// line if provided.
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createLoopUnrollAndJamPass(int unrollJamFactor = -1);
 
 /// Creates a pass to pipeline explicit movement of data across levels of the
 /// memory hierarchy.
-std::unique_ptr<OperationPass<FuncOp>> createPipelineDataTransferPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createPipelineDataTransferPass();
 
 /// Creates a pass to vectorize loops, operations and data types using a
 /// target-independent, n-D super-vector abstraction.
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createSuperVectorizePass(ArrayRef<int64_t> virtualVectorSize);
 /// Overload relying on pass options for initialization.
-std::unique_ptr<OperationPass<FuncOp>> createSuperVectorizePass();
+std::unique_ptr<OperationPass<func::FuncOp>> createSuperVectorizePass();
 
 //===----------------------------------------------------------------------===//
 // Registration

diff  --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h
index 90d8363fc0614..345f955e2061c 100644
--- a/mlir/include/mlir/Dialect/Affine/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Utils.h
@@ -21,10 +21,13 @@ class AffineForOp;
 class AffineIfOp;
 class AffineParallelOp;
 class DominanceInfo;
-class FuncOp;
 class Operation;
 class PostDominanceInfo;
 
+namespace func {
+class FuncOp;
+} // namespace func
+
 namespace memref {
 class AllocOp;
 } // namespace memref
@@ -96,7 +99,7 @@ struct VectorizationStrategy {
 /// Replace affine store and load accesses by scalars by forwarding stores to
 /// loads and eliminate invariant affine loads; consequently, eliminate dead
 /// allocs.
-void affineScalarReplace(FuncOp f, DominanceInfo &domInfo,
+void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
                          PostDominanceInfo &postDomInfo);
 
 /// Vectorizes affine loops in 'loops' using the n-D vectorization factors in

diff  --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
index 164c8106909c5..4a72934d4f506 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@@ -4,6 +4,10 @@
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
+namespace func {
+class FuncOp;
+} // namespace func
+
 namespace bufferization {
 struct OneShotBufferizationOptions;
 
@@ -31,7 +35,7 @@ std::unique_ptr<Pass> createBufferResultsToOutParamsPass();
 
 /// Creates a pass that finalizes a partial bufferization by removing remaining
 /// bufferization.to_tensor and bufferization.to_memref operations.
-std::unique_ptr<OperationPass<FuncOp>> createFinalizingBufferizePass();
+std::unique_ptr<OperationPass<func::FuncOp>> createFinalizingBufferizePass();
 
 /// Create a pass that bufferizes all ops that implement BufferizableOpInterface
 /// with One-Shot Bufferize.

diff  --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index a5970ee7b5a01..ee7579b1a3edb 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -11,7 +11,7 @@
 
 include "mlir/Pass/PassBase.td"
 
-def BufferDeallocation : Pass<"buffer-deallocation", "FuncOp"> {
+def BufferDeallocation : Pass<"buffer-deallocation", "func::FuncOp"> {
   let summary = "Adds all required dealloc operations for all allocations in "
                 "the input program";
   let description = [{
@@ -88,7 +88,7 @@ def BufferDeallocation : Pass<"buffer-deallocation", "FuncOp"> {
   let constructor = "mlir::bufferization::createBufferDeallocationPass()";
 }
 
-def BufferHoisting : Pass<"buffer-hoisting", "FuncOp"> {
+def BufferHoisting : Pass<"buffer-hoisting", "func::FuncOp"> {
   let summary = "Optimizes placement of allocation operations by moving them "
                 "into common dominators and out of nested regions";
   let description = [{
@@ -98,7 +98,7 @@ def BufferHoisting : Pass<"buffer-hoisting", "FuncOp"> {
   let constructor = "mlir::bufferization::createBufferHoistingPass()";
 }
 
-def BufferLoopHoisting : Pass<"buffer-loop-hoisting", "FuncOp"> {
+def BufferLoopHoisting : Pass<"buffer-loop-hoisting", "func::FuncOp"> {
   let summary = "Optimizes placement of allocation operations by moving them "
                 "out of loop nests";
   let description = [{
@@ -133,7 +133,7 @@ def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp">
   let dependentDialects = ["memref::MemRefDialect"];
 }
 
-def FinalizingBufferize : Pass<"finalizing-bufferize", "FuncOp"> {
+def FinalizingBufferize : Pass<"finalizing-bufferize", "func::FuncOp"> {
   let summary = "Finalize a partial bufferization";
   let description = [{
     A bufferize pass that finalizes a partial bufferization by removing
@@ -231,7 +231,7 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
   let constructor = "mlir::bufferization::createOneShotBufferizePass()";
 }
 
-def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "FuncOp"> {
+def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "func::FuncOp"> {
   let summary = "Promotes heap-based allocations to automatically managed "
                 "stack-based allocations";
   let description = [{

diff  --git a/mlir/include/mlir/Dialect/Func/IR/FuncOps.h b/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
index 8e1bf85bda115..987fe67234bc7 100644
--- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
+++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
@@ -11,10 +11,11 @@
 
 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
+#include "mlir/IR/FunctionInterfaces.h"
 #include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/SymbolTable.h"
 #include "mlir/Interfaces/CallInterfaces.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
@@ -29,4 +30,25 @@ class PatternRewriter;
 
 #include "mlir/Dialect/Func/IR/FuncOpsDialect.h.inc"
 
+namespace mlir {
+/// FIXME: This is a temporary using directive to ease the transition of FuncOp
+/// to the Func dialect. This will be removed after all uses are updated.
+using FuncOp = func::FuncOp;
+} // namespace mlir
+
+namespace llvm {
+
+/// Allow stealing the low bits of FuncOp.
+template <>
+struct PointerLikeTypeTraits<mlir::func::FuncOp> {
+  static inline void *getAsVoidPointer(mlir::func::FuncOp val) {
+    return const_cast<void *>(val.getAsOpaquePointer());
+  }
+  static inline mlir::func::FuncOp getFromVoidPointer(void *p) {
+    return mlir::func::FuncOp::getFromOpaquePointer(p);
+  }
+  static constexpr int numLowBitsAvailable = 3;
+};
+} // namespace llvm
+
 #endif // MLIR_DIALECT_FUNC_IR_OPS_H

diff  --git a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
index a2fd77305daa9..e405271aa62ec 100644
--- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
+++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
@@ -13,6 +13,7 @@ include "mlir/IR/OpAsmInterface.td"
 include "mlir/IR/SymbolInterfaces.td"
 include "mlir/Interfaces/CallInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/IR/FunctionInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 
@@ -201,6 +202,120 @@ def ConstantOp : Func_Op<"constant",
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// FuncOp
+//===----------------------------------------------------------------------===//
+
+def FuncOp : Func_Op<"func", [
+  AffineScope, AutomaticAllocationScope, CallableOpInterface,
+  FunctionOpInterface, IsolatedFromAbove, Symbol
+]> {
+  let summary = "An operation with a name containing a single `SSACFG` region";
+  let description = [{
+    Operations within the function cannot implicitly capture values defined
+    outside of the function, i.e. Functions are `IsolatedFromAbove`. All
+    external references must use function arguments or attributes that establish
+    a symbolic connection (e.g. symbols referenced by name via a string
+    attribute like SymbolRefAttr). An external function declaration (used when
+    referring to a function declared in some other module) has no body. While
+    the MLIR textual form provides a nice inline syntax for function arguments,
+    they are internally represented as “block arguments” to the first block in
+    the region.
+
+    Only dialect attribute names may be specified in the attribute dictionaries
+    for function arguments, results, or the function itself.
+
+    Example:
+
+    ```mlir
+    // External function definitions.
+    func.func @abort()
+    func.func @scribble(i32, i64, memref<? x 128 x f32, #layout_map0>) -> f64
+
+    // A function that returns its argument twice:
+    func.func @count(%x: i64) -> (i64, i64)
+      attributes {fruit: "banana"} {
+      return %x, %x: i64, i64
+    }
+
+    // A function with an argument attribute
+    func.func @example_fn_arg(%x: i32 {swift.self = unit})
+
+    // A function with a result attribute
+    func.func @example_fn_result() -> (f64 {dialectName.attrName = 0 : i64})
+
+    // A function with an attribute
+    func.func @example_fn_attr() attributes {dialectName.attrName = false}
+    ```
+  }];
+
+  let arguments = (ins SymbolNameAttr:$sym_name,
+                       TypeAttrOf<FunctionType>:$type,
+                       OptionalAttr<StrAttr>:$sym_visibility);
+  let regions = (region AnyRegion:$body);
+
+  let builders = [OpBuilder<(ins
+    "StringRef":$name, "FunctionType":$type,
+    CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs,
+    CArg<"ArrayRef<DictionaryAttr>", "{}">:$argAttrs)
+  >];
+  let extraClassDeclaration = [{
+    static FuncOp create(Location location, StringRef name, FunctionType type,
+                         ArrayRef<NamedAttribute> attrs = {});
+    static FuncOp create(Location location, StringRef name, FunctionType type,
+                         Operation::dialect_attr_range attrs);
+    static FuncOp create(Location location, StringRef name, FunctionType type,
+                         ArrayRef<NamedAttribute> attrs,
+                         ArrayRef<DictionaryAttr> argAttrs);
+
+    /// Create a deep copy of this function and all of its blocks, remapping any
+    /// operands that use values outside of the function using the map that is
+    /// provided (leaving them alone if no entry is present). If the mapper
+    /// contains entries for function arguments, these arguments are not
+    /// included in the new function. Replaces references to cloned sub-values
+    /// with the corresponding value that is copied, and adds those mappings to
+    /// the mapper.
+    FuncOp clone(BlockAndValueMapping &mapper);
+    FuncOp clone();
+
+    /// Clone the internal blocks and attributes from this function into dest.
+    /// Any cloned blocks are appended to the back of dest. This function
+    /// asserts that the attributes of the current function and dest are
+    /// compatible.
+    void cloneInto(FuncOp dest, BlockAndValueMapping &mapper);
+
+    //===------------------------------------------------------------------===//
+    // CallableOpInterface
+    //===------------------------------------------------------------------===//
+
+    /// Returns the region on the current operation that is callable. This may
+    /// return null in the case of an external callable object, e.g. an external
+    /// function.
+    ::mlir::Region *getCallableRegion() { return isExternal() ? nullptr : &getBody(); }
+
+    /// Returns the results types that the callable region produces when
+    /// executed.
+    ArrayRef<Type> getCallableResults() { return getType().getResults(); }
+
+    //===------------------------------------------------------------------===//
+    // FunctionOpInterface Methods
+    //===------------------------------------------------------------------===//
+
+    /// Returns the argument types of this function.
+    ArrayRef<Type> getArgumentTypes() { return getType().getInputs(); }
+
+    /// Returns the result types of this function.
+    ArrayRef<Type> getResultTypes() { return getType().getResults(); }
+
+    //===------------------------------------------------------------------===//
+    // SymbolOpInterface Methods
+    //===------------------------------------------------------------------===//
+
+    bool isDeclaration() { return isExternal(); }
+  }];
+  let hasCustomAssemblyFormat = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // ReturnOp
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/include/mlir/Dialect/Func/Transforms/Passes.td b/mlir/include/mlir/Dialect/Func/Transforms/Passes.td
index d928e2f069458..54fe4fdd6bbe6 100644
--- a/mlir/include/mlir/Dialect/Func/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Func/Transforms/Passes.td
@@ -14,9 +14,9 @@ include "mlir/Pass/PassBase.td"
 def FuncBufferize : Pass<"func-bufferize", "ModuleOp"> {
   let summary = "Bufferize func/call/return ops";
   let description = [{
-    A bufferize pass that bufferizes builtin.func and func.call ops.
+    A bufferize pass that bufferizes func.func and func.call ops.
 
-    Because this pass updates builtin.func ops, it must be a module pass. It is
+    Because this pass updates func.func ops, it must be a module pass. It is
     useful to keep this pass separate from other bufferizations so that the
     other ones can be run at function-level in parallel.
 

diff  --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h
index 729363ace255b..b9b127c6f5dbf 100644
--- a/mlir/include/mlir/Dialect/GPU/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Passes.h
@@ -23,6 +23,10 @@ class Module;
 } // namespace llvm
 
 namespace mlir {
+namespace func {
+class FuncOp;
+} // namespace func
+
 /// Pass that moves ops which are likely an index computation into gpu.launch
 /// body.
 std::unique_ptr<Pass> createGpuLauchSinkIndexComputationsPass();
@@ -33,7 +37,7 @@ std::unique_ptr<OperationPass<ModuleOp>>
 createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());
 
 /// Rewrites a function region so that GPU ops execute asynchronously.
-std::unique_ptr<OperationPass<FuncOp>> createGpuAsyncRegionPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createGpuAsyncRegionPass();
 
 /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
 void populateGpuAllReducePatterns(RewritePatternSet &patterns);

diff  --git a/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h b/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h
index 9b35bb3496b0d..350b41ac62535 100644
--- a/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h
+++ b/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h
@@ -14,7 +14,9 @@
 #include "mlir/IR/OpDefinition.h"
 
 namespace mlir {
+namespace func {
 class FuncOp;
+} // namespace func
 
 namespace linalg {
 
@@ -155,7 +157,8 @@ class LinalgDependenceGraph {
   static StringRef getDependenceTypeStr(DependenceType depType);
 
   // Builds a linalg dependence graph for the ops of type LinalgOp under `f`.
-  static LinalgDependenceGraph buildDependenceGraph(Aliases &aliases, FuncOp f);
+  static LinalgDependenceGraph buildDependenceGraph(Aliases &aliases,
+                                                    func::FuncOp f);
   LinalgDependenceGraph(Aliases &aliases, ArrayRef<LinalgOp> ops);
 
   /// Returns the X such that op -> X is a dependence of type dt.

diff  --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h
index ac9b3b2ace240..fadfdb438f0f5 100644
--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@@ -18,6 +18,13 @@
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
+namespace func {
+class FuncOp;
+} // namespace func
+
+// TODO: Remove when all references have been updated.
+using FuncOp = func::FuncOp;
+
 namespace bufferization {
 struct OneShotBufferizationOptions;
 } // namespace bufferization
@@ -31,29 +38,32 @@ std::unique_ptr<Pass> createFoldReshapeOpsByLinearizationPass();
 
 std::unique_ptr<Pass> createLinalgNamedOpConversionPass();
 
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createLinalgTilingPass(ArrayRef<int64_t> tileSizes = {},
                        linalg::LinalgTilingLoopType loopType =
                            linalg::LinalgTilingLoopType::Loops);
 
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createLinalgPromotionPass(bool dynamicBuffers, bool useAlloca);
-std::unique_ptr<OperationPass<FuncOp>> createLinalgPromotionPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgPromotionPass();
 
-std::unique_ptr<OperationPass<FuncOp>> createLinalgInlineScalarOperandsPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLinalgInlineScalarOperandsPass();
 
 /// Create a pass to convert Linalg operations to scf.for loops and
 /// memref.load/memref.store accesses.
-std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToLoopsPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createConvertLinalgToLoopsPass();
 
 /// Create a pass to convert Linalg operations to scf.parallel loops and
 /// memref.load/memref.store accesses.
-std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToParallelLoopsPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createConvertLinalgToParallelLoopsPass();
 
 /// Create a pass to convert Linalg operations to affine.for loops and
 /// affine_load/affine_store accesses.
 /// Placeholder for now, this is NYI.
-std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToAffineLoopsPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createConvertLinalgToAffineLoopsPass();
 
 /// This pass implements a cross-dialect bufferization approach and performs an
 /// analysis to determine which op operands and results may be bufferized in the
@@ -68,11 +78,11 @@ std::unique_ptr<Pass> createLinalgComprehensiveModuleBufferizePass(
 
 /// Create a pass to convert Linalg operations which work on tensors to use
 /// buffers instead.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgBufferizePass();
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgBufferizePass();
 
 /// Create a pass to convert named Linalg operations to Linalg generic
 /// operations.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgGeneralizationPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgGeneralizationPass();
 
 /// Create a pass to convert Linalg operations to equivalent operations that
 /// work on primitive types, if possible.
@@ -82,27 +92,28 @@ std::unique_ptr<Pass> createLinalgDetensorizePass();
 /// Linalg strategy passes.
 //===----------------------------------------------------------------------===//
 /// Create a LinalgStrategyTileAndFusePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyTileAndFusePass(
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLinalgStrategyTileAndFusePass(
     StringRef opName = "", const linalg::LinalgTilingAndFusionOptions &opt = {},
     const linalg::LinalgTransformationFilter &filter =
         linalg::LinalgTransformationFilter());
 
 /// Create a LinalgStrategyTilePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyTilePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyTilePass(
     StringRef opName = "",
     const linalg::LinalgTilingOptions &opt = linalg::LinalgTilingOptions(),
     const linalg::LinalgTransformationFilter &filter =
         linalg::LinalgTransformationFilter());
 
 /// Create a LinalgStrategyPadPass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyPadPass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyPadPass(
     StringRef opName = "",
     const linalg::LinalgPaddingOptions &opt = linalg::LinalgPaddingOptions(),
     const linalg::LinalgTransformationFilter &filter =
         linalg::LinalgTransformationFilter());
 
 /// Create a LinalgStrategyPromotePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyPromotePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyPromotePass(
     StringRef opName = "",
     const linalg::LinalgPromotionOptions &opt =
         linalg::LinalgPromotionOptions(),
@@ -110,24 +121,25 @@ std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyPromotePass(
         linalg::LinalgTransformationFilter());
 
 /// Create a LinalgStrategyGeneralizePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyGeneralizePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyGeneralizePass(
     StringRef opName = "", const linalg::LinalgTransformationFilter &filter =
                                linalg::LinalgTransformationFilter());
 
 /// Create a LinalgStrategyDecomposePass.
 // TODO: if/when we need finer control add an `opName` parameter.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyDecomposePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyDecomposePass(
     const linalg::LinalgTransformationFilter &filter =
         linalg::LinalgTransformationFilter());
 
 /// Create a LinalgStrategyInterchangePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyInterchangePass(
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLinalgStrategyInterchangePass(
     ArrayRef<int64_t> iteratorInterchange = {},
     const linalg::LinalgTransformationFilter &filter =
         linalg::LinalgTransformationFilter());
 
 /// Create a LinalgStrategyVectorizePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyVectorizePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyVectorizePass(
     StringRef opName = "",
     linalg::LinalgVectorizationOptions opt =
         linalg::LinalgVectorizationOptions(),
@@ -136,20 +148,22 @@ std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyVectorizePass(
     bool padVectorize = false);
 
 /// Create a LinalgStrategyEnablePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyEnablePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyEnablePass(
     linalg::LinalgEnablingOptions opt = linalg::LinalgEnablingOptions(),
     const linalg::LinalgTransformationFilter &filter =
         linalg::LinalgTransformationFilter());
 
 /// Create a LinalgStrategyLowerVectorsPass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyLowerVectorsPass(
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLinalgStrategyLowerVectorsPass(
     linalg::LinalgVectorLoweringOptions opt =
         linalg::LinalgVectorLoweringOptions(),
     const linalg::LinalgTransformationFilter &filter =
         linalg::LinalgTransformationFilter());
 
 /// Create a LinalgStrategyRemoveMarkersPass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyRemoveMarkersPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLinalgStrategyRemoveMarkersPass();
 
 //===----------------------------------------------------------------------===//
 // Registration

diff  --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
index 0c77b330b1a7e..355106ddd9175 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
@@ -10,7 +10,9 @@
 #define MLIR_DIALECT_LINALG_TRANSFORMS_HOISTING_H_
 
 namespace mlir {
+namespace func {
 class FuncOp;
+} // namespace func
 
 namespace linalg {
 
@@ -27,11 +29,11 @@ namespace linalg {
 /// results in scf::ForOp yielding the value that originally transited through
 /// memory.
 // TODO: generalize on a per-need basis.
-void hoistRedundantVectorTransfers(FuncOp func);
+void hoistRedundantVectorTransfers(func::FuncOp func);
 
 /// Same behavior as `hoistRedundantVectorTransfers` but works on tensors
 /// instead of buffers.
-void hoistRedundantVectorTransfersOnTensor(FuncOp func);
+void hoistRedundantVectorTransfersOnTensor(func::FuncOp func);
 
 } // namespace linalg
 } // namespace mlir

diff  --git a/mlir/include/mlir/Dialect/Quant/Passes.h b/mlir/include/mlir/Dialect/Quant/Passes.h
index 090653eabe3ff..ada9c8cee8b4e 100644
--- a/mlir/include/mlir/Dialect/Quant/Passes.h
+++ b/mlir/include/mlir/Dialect/Quant/Passes.h
@@ -19,18 +19,22 @@
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
+namespace func {
+class FuncOp;
+} // namespace func
+
 namespace quant {
 
 /// Creates a pass that converts quantization simulation operations (i.e.
 /// FakeQuant and those like it) to casts into/out of supported QuantizedTypes.
-std::unique_ptr<OperationPass<FuncOp>> createConvertSimulatedQuantPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createConvertSimulatedQuantPass();
 
 /// Creates a pass that converts constants followed by a qbarrier to a
 /// constant whose value is quantized. This is typically one of the last
 /// passes done when lowering to express actual quantized arithmetic in a
 /// low level representation. Because it modifies the constant, it is
 /// destructive and cannot be undone.
-std::unique_ptr<OperationPass<FuncOp>> createConvertConstPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createConvertConstPass();
 
 //===----------------------------------------------------------------------===//
 // Registration

diff  --git a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
index 04ccd1a05b4ac..bb5a4848dad1e 100644
--- a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
@@ -19,7 +19,6 @@
 #include "llvm/ADT/STLExtras.h"
 
 namespace mlir {
-class FuncOp;
 class Location;
 class Operation;
 class OpBuilder;
@@ -28,6 +27,10 @@ class RewriterBase;
 class ValueRange;
 class Value;
 
+namespace func {
+class FuncOp;
+} // namespace func
+
 namespace scf {
 class IfOp;
 class ForOp;
@@ -68,8 +71,9 @@ scf::ForOp cloneWithNewYields(OpBuilder &b, scf::ForOp loop,
 /// collide with another FuncOp name.
 // TODO: support more than single-block regions.
 // TODO: more flexible constant handling.
-FailureOr<FuncOp> outlineSingleBlockRegion(RewriterBase &rewriter, Location loc,
-                                           Region &region, StringRef funcName);
+FailureOr<func::FuncOp> outlineSingleBlockRegion(RewriterBase &rewriter,
+                                                 Location loc, Region &region,
+                                                 StringRef funcName);
 
 /// Outline the then and/or else regions of `ifOp` as follows:
 ///  - if `thenFn` is not null, `thenFnName` must be specified and the `then`
@@ -79,8 +83,8 @@ FailureOr<FuncOp> outlineSingleBlockRegion(RewriterBase &rewriter, Location loc,
 /// Creates new FuncOps and thus cannot be used in a FuncOp pass.
 /// The client is responsible for providing a unique `thenFnName`/`elseFnName`
 /// that will not collide with another FuncOp name.
-LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, FuncOp *thenFn,
-                          StringRef thenFnName, FuncOp *elseFn,
+LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn,
+                          StringRef thenFnName, func::FuncOp *elseFn,
                           StringRef elseFnName);
 
 /// Get a list of innermost parallel loops contained in `rootOp`. Innermost

diff  --git a/mlir/include/mlir/Dialect/Shape/IR/Shape.h b/mlir/include/mlir/Dialect/Shape/IR/Shape.h
index bebd1f834b448..6e92a4b17380c 100644
--- a/mlir/include/mlir/Dialect/Shape/IR/Shape.h
+++ b/mlir/include/mlir/Dialect/Shape/IR/Shape.h
@@ -15,6 +15,7 @@
 #define MLIR_DIALECT_SHAPE_IR_SHAPE_H
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dialect.h"

diff  --git a/mlir/include/mlir/IR/BuiltinOps.h b/mlir/include/mlir/IR/BuiltinOps.h
index 99bc09166e83f..f01237dde3fd0 100644
--- a/mlir/include/mlir/IR/BuiltinOps.h
+++ b/mlir/include/mlir/IR/BuiltinOps.h
@@ -13,12 +13,10 @@
 #ifndef MLIR_IR_BUILTINOPS_H_
 #define MLIR_IR_BUILTINOPS_H_
 
-#include "mlir/IR/FunctionInterfaces.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/OwningOpRef.h"
 #include "mlir/IR/RegionKindInterface.h"
 #include "mlir/IR/SymbolTable.h"
-#include "mlir/Interfaces/CallInterfaces.h"
 #include "mlir/Interfaces/CastInterfaces.h"
 #include "mlir/Interfaces/DataLayoutInterfaces.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
@@ -32,18 +30,6 @@
 #include "mlir/IR/BuiltinOps.h.inc"
 
 namespace llvm {
-/// Allow stealing the low bits of FuncOp.
-template <>
-struct PointerLikeTypeTraits<mlir::FuncOp> {
-  static inline void *getAsVoidPointer(mlir::FuncOp val) {
-    return const_cast<void *>(val.getAsOpaquePointer());
-  }
-  static inline mlir::FuncOp getFromVoidPointer(void *p) {
-    return mlir::FuncOp::getFromOpaquePointer(p);
-  }
-  static constexpr int numLowBitsAvailable = 3;
-};
-
 /// Allow stealing the low bits of ModuleOp.
 template <>
 struct PointerLikeTypeTraits<mlir::ModuleOp> {

diff  --git a/mlir/include/mlir/IR/BuiltinOps.td b/mlir/include/mlir/IR/BuiltinOps.td
index 7f5d7144163a9..6ba2611c501eb 100644
--- a/mlir/include/mlir/IR/BuiltinOps.td
+++ b/mlir/include/mlir/IR/BuiltinOps.td
@@ -15,11 +15,9 @@
 #define BUILTIN_OPS
 
 include "mlir/IR/BuiltinDialect.td"
-include "mlir/IR/FunctionInterfaces.td"
 include "mlir/IR/OpAsmInterface.td"
 include "mlir/IR/RegionKindInterface.td"
 include "mlir/IR/SymbolInterfaces.td"
-include "mlir/Interfaces/CallInterfaces.td"
 include "mlir/Interfaces/CastInterfaces.td"
 include "mlir/Interfaces/DataLayoutInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
@@ -28,120 +26,6 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
 class Builtin_Op<string mnemonic, list<Trait> traits = []> :
     Op<Builtin_Dialect, mnemonic, traits>;
 
-//===----------------------------------------------------------------------===//
-// FuncOp
-//===----------------------------------------------------------------------===//
-
-def FuncOp : Builtin_Op<"func", [
-  AffineScope, AutomaticAllocationScope, CallableOpInterface,
-  FunctionOpInterface, IsolatedFromAbove, Symbol
-]> {
-  let summary = "An operation with a name containing a single `SSACFG` region";
-  let description = [{
-    Operations within the function cannot implicitly capture values defined
-    outside of the function, i.e. Functions are `IsolatedFromAbove`. All
-    external references must use function arguments or attributes that establish
-    a symbolic connection (e.g. symbols referenced by name via a string
-    attribute like SymbolRefAttr). An external function declaration (used when
-    referring to a function declared in some other module) has no body. While
-    the MLIR textual form provides a nice inline syntax for function arguments,
-    they are internally represented as “block arguments” to the first block in
-    the region.
-
-    Only dialect attribute names may be specified in the attribute dictionaries
-    for function arguments, results, or the function itself.
-
-    Example:
-
-    ```mlir
-    // External function definitions.
-    func @abort()
-    func @scribble(i32, i64, memref<? x 128 x f32, #layout_map0>) -> f64
-
-    // A function that returns its argument twice:
-    func @count(%x: i64) -> (i64, i64)
-      attributes {fruit: "banana"} {
-      return %x, %x: i64, i64
-    }
-
-    // A function with an argument attribute
-    func @example_fn_arg(%x: i32 {swift.self = unit})
-
-    // A function with a result attribute
-    func @example_fn_result() -> (f64 {dialectName.attrName = 0 : i64})
-
-    // A function with an attribute
-    func @example_fn_attr() attributes {dialectName.attrName = false}
-    ```
-  }];
-
-  let arguments = (ins SymbolNameAttr:$sym_name,
-                       TypeAttrOf<FunctionType>:$type,
-                       OptionalAttr<StrAttr>:$sym_visibility);
-  let regions = (region AnyRegion:$body);
-
-  let builders = [OpBuilder<(ins
-    "StringRef":$name, "FunctionType":$type,
-    CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs,
-    CArg<"ArrayRef<DictionaryAttr>", "{}">:$argAttrs)
-  >];
-  let extraClassDeclaration = [{
-    static FuncOp create(Location location, StringRef name, FunctionType type,
-                         ArrayRef<NamedAttribute> attrs = {});
-    static FuncOp create(Location location, StringRef name, FunctionType type,
-                         Operation::dialect_attr_range attrs);
-    static FuncOp create(Location location, StringRef name, FunctionType type,
-                         ArrayRef<NamedAttribute> attrs,
-                         ArrayRef<DictionaryAttr> argAttrs);
-
-    /// Create a deep copy of this function and all of its blocks, remapping any
-    /// operands that use values outside of the function using the map that is
-    /// provided (leaving them alone if no entry is present). If the mapper
-    /// contains entries for function arguments, these arguments are not
-    /// included in the new function. Replaces references to cloned sub-values
-    /// with the corresponding value that is copied, and adds those mappings to
-    /// the mapper.
-    FuncOp clone(BlockAndValueMapping &mapper);
-    FuncOp clone();
-
-    /// Clone the internal blocks and attributes from this function into dest.
-    /// Any cloned blocks are appended to the back of dest. This function
-    /// asserts that the attributes of the current function and dest are
-    /// compatible.
-    void cloneInto(FuncOp dest, BlockAndValueMapping &mapper);
-
-    //===------------------------------------------------------------------===//
-    // CallableOpInterface
-    //===------------------------------------------------------------------===//
-
-    /// Returns the region on the current operation that is callable. This may
-    /// return null in the case of an external callable object, e.g. an external
-    /// function.
-    ::mlir::Region *getCallableRegion() { return isExternal() ? nullptr : &getBody(); }
-
-    /// Returns the results types that the callable region produces when
-    /// executed.
-    ArrayRef<Type> getCallableResults() { return getType().getResults(); }
-
-    //===------------------------------------------------------------------===//
-    // FunctionOpInterface Methods
-    //===------------------------------------------------------------------===//
-
-    /// Returns the argument types of this function.
-    ArrayRef<Type> getArgumentTypes() { return getType().getInputs(); }
-
-    /// Returns the result types of this function.
-    ArrayRef<Type> getResultTypes() { return getType().getResults(); }
-
-    //===------------------------------------------------------------------===//
-    // SymbolOpInterface Methods
-    //===------------------------------------------------------------------===//
-
-    bool isDeclaration() { return isExternal(); }
-  }];
-  let hasCustomAssemblyFormat = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // ModuleOp
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/include/mlir/IR/FunctionInterfaces.td b/mlir/include/mlir/IR/FunctionInterfaces.td
index 5e5f163e87fd8..73251cbf27e06 100644
--- a/mlir/include/mlir/IR/FunctionInterfaces.td
+++ b/mlir/include/mlir/IR/FunctionInterfaces.td
@@ -158,7 +158,7 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> {
     /// Block argument iterator types.
     using BlockArgListType = Region::BlockArgListType;
     using args_iterator = BlockArgListType::iterator;
-
+    
     //===------------------------------------------------------------------===//
     // Body Handling
     //===------------------------------------------------------------------===//

diff  --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 37d6541cd02be..9efdc8a832a35 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -22,6 +22,7 @@
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"

diff  --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 043b66e85f405..6a50f0c78165e 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -22,6 +22,7 @@
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 #include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"

diff  --git a/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp b/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp
index 815e3d16e54c5..dd2369fb006ec 100644
--- a/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp
+++ b/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp
@@ -9,6 +9,7 @@
 #include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h"
 #include "../PassDetail.h"
 #include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h"
 

diff  --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
index c0d9a2953291f..01cbd93e47557 100644
--- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
+++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"

diff  --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
index 2f57423d2d6de..dfe95dc1b83ef 100644
--- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
+++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
@@ -10,6 +10,7 @@
 
 #include "../PassDetail.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Shape/IR/Shape.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"

diff  --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
index 648a18d4eafad..09fdc6ff04873 100644
--- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/IntegerSet.h"

diff  --git a/mlir/lib/Dialect/Affine/Transforms/PassDetail.h b/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
index a7262d76c88d3..37dccb132f447 100644
--- a/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
@@ -10,6 +10,7 @@
 #define DIALECT_AFFINE_TRANSFORMS_PASSDETAIL_H_
 
 #include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {

diff  --git a/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
index dcaa88a6dca19..bfc4f63fbbcda 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BlockAndValueMapping.h"

diff  --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index f0e784aeb81dc..7500a507967f8 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
 #include "mlir/Dialect/Affine/Utils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@@ -2611,7 +2612,7 @@ gatherLoopsInBlock(Block *block, unsigned currLoopDepth,
   }
 }
 
-/// Gathers all AffineForOps in 'builtin.func' grouped by loop depth.
+/// Gathers all AffineForOps in 'func.func' grouped by loop depth.
 void mlir::gatherLoops(FuncOp func,
                        std::vector<SmallVector<AffineForOp, 2>> &depthToLoops) {
   for (auto &block : func)

diff  --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
index 4bf97af2b8475..6e794fda135ab 100644
--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
 #include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/BlockAndValueMapping.h"

diff  --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 11a3f068d6b4d..ee3ea01083986 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -8,6 +8,7 @@
 
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/AsmState.h"
 #include "mlir/IR/BlockAndValueMapping.h"

diff  --git a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
index 8ec23af66eac3..302f6a409a48a 100644
--- a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
@@ -13,6 +13,7 @@ add_mlir_dialect_library(MLIRBufferization
 
   LINK_LIBS PUBLIC
   MLIRDialect
+  MLIRFunc
   MLIRIR
   MLIRTensor
   MLIRMemRef

diff  --git a/mlir/lib/Dialect/Bufferization/Transforms/PassDetail.h b/mlir/lib/Dialect/Bufferization/Transforms/PassDetail.h
index d695f2a409ccb..c1c8a63e071da 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Bufferization/Transforms/PassDetail.h
@@ -9,6 +9,7 @@
 #ifndef DIALECT_BUFFERIZATION_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_BUFFERIZATION_TRANSFORMS_PASSDETAIL_H_
 
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {

diff  --git a/mlir/lib/Dialect/Func/IR/FuncOps.cpp b/mlir/lib/Dialect/Func/IR/FuncOps.cpp
index b1a557b3c6c27..b4a1f89051cff 100644
--- a/mlir/lib/Dialect/Func/IR/FuncOps.cpp
+++ b/mlir/lib/Dialect/Func/IR/FuncOps.cpp
@@ -13,6 +13,7 @@
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/FunctionImplementation.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/PatternMatch.h"
@@ -21,6 +22,7 @@
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/InliningUtils.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/FormatVariadic.h"
@@ -56,6 +58,12 @@ struct FuncInlinerInterface : public DialectInlinerInterface {
     return true;
   }
 
+  /// All functions can be inlined.
+  bool isLegalToInline(Region *, Region *, bool,
+                       BlockAndValueMapping &) const final {
+    return true;
+  }
+
   //===--------------------------------------------------------------------===//
   // Transformation Hooks
   //===--------------------------------------------------------------------===//
@@ -208,6 +216,129 @@ bool ConstantOp::isBuildableWith(Attribute value, Type type) {
   return value.isa<FlatSymbolRefAttr>() && type.isa<FunctionType>();
 }
 
+//===----------------------------------------------------------------------===//
+// FuncOp
+//===----------------------------------------------------------------------===//
+
+FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
+                      ArrayRef<NamedAttribute> attrs) {
+  OpBuilder builder(location->getContext());
+  OperationState state(location, getOperationName());
+  FuncOp::build(builder, state, name, type, attrs);
+  return cast<FuncOp>(Operation::create(state));
+}
+FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
+                      Operation::dialect_attr_range attrs) {
+  SmallVector<NamedAttribute, 8> attrRef(attrs);
+  return create(location, name, type, llvm::makeArrayRef(attrRef));
+}
+FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
+                      ArrayRef<NamedAttribute> attrs,
+                      ArrayRef<DictionaryAttr> argAttrs) {
+  FuncOp func = create(location, name, type, attrs);
+  func.setAllArgAttrs(argAttrs);
+  return func;
+}
+
+void FuncOp::build(OpBuilder &builder, OperationState &state, StringRef name,
+                   FunctionType type, ArrayRef<NamedAttribute> attrs,
+                   ArrayRef<DictionaryAttr> argAttrs) {
+  state.addAttribute(SymbolTable::getSymbolAttrName(),
+                     builder.getStringAttr(name));
+  state.addAttribute(FunctionOpInterface::getTypeAttrName(),
+                     TypeAttr::get(type));
+  state.attributes.append(attrs.begin(), attrs.end());
+  state.addRegion();
+
+  if (argAttrs.empty())
+    return;
+  assert(type.getNumInputs() == argAttrs.size());
+  function_interface_impl::addArgAndResultAttrs(builder, state, argAttrs,
+                                                /*resultAttrs=*/llvm::None);
+}
+
+ParseResult FuncOp::parse(OpAsmParser &parser, OperationState &result) {
+  auto buildFuncType =
+      [](Builder &builder, ArrayRef<Type> argTypes, ArrayRef<Type> results,
+         function_interface_impl::VariadicFlag,
+         std::string &) { return builder.getFunctionType(argTypes, results); };
+
+  return function_interface_impl::parseFunctionOp(
+      parser, result, /*allowVariadic=*/false, buildFuncType);
+}
+
+void FuncOp::print(OpAsmPrinter &p) {
+  function_interface_impl::printFunctionOp(p, *this, /*isVariadic=*/false);
+}
+
+/// Clone the internal blocks from this function into dest and all attributes
+/// from this function to dest.
+void FuncOp::cloneInto(FuncOp dest, BlockAndValueMapping &mapper) {
+  // Add the attributes of this function to dest.
+  llvm::MapVector<StringAttr, Attribute> newAttrMap;
+  for (const auto &attr : dest->getAttrs())
+    newAttrMap.insert({attr.getName(), attr.getValue()});
+  for (const auto &attr : (*this)->getAttrs())
+    newAttrMap.insert({attr.getName(), attr.getValue()});
+
+  auto newAttrs = llvm::to_vector(llvm::map_range(
+      newAttrMap, [](std::pair<StringAttr, Attribute> attrPair) {
+        return NamedAttribute(attrPair.first, attrPair.second);
+      }));
+  dest->setAttrs(DictionaryAttr::get(getContext(), newAttrs));
+
+  // Clone the body.
+  getBody().cloneInto(&dest.getBody(), mapper);
+}
+
+/// Create a deep copy of this function and all of its blocks, remapping
+/// any operands that use values outside of the function using the map that is
+/// provided (leaving them alone if no entry is present). Replaces references
+/// to cloned sub-values with the corresponding value that is copied, and adds
+/// those mappings to the mapper.
+FuncOp FuncOp::clone(BlockAndValueMapping &mapper) {
+  // Create the new function.
+  FuncOp newFunc = cast<FuncOp>(getOperation()->cloneWithoutRegions());
+
+  // If the function has a body, then the user might be deleting arguments to
+  // the function by specifying them in the mapper. If so, we don't add the
+  // argument to the input type vector.
+  if (!isExternal()) {
+    FunctionType oldType = getType();
+
+    unsigned oldNumArgs = oldType.getNumInputs();
+    SmallVector<Type, 4> newInputs;
+    newInputs.reserve(oldNumArgs);
+    for (unsigned i = 0; i != oldNumArgs; ++i)
+      if (!mapper.contains(getArgument(i)))
+        newInputs.push_back(oldType.getInput(i));
+
+    /// If any of the arguments were dropped, update the type and drop any
+    /// necessary argument attributes.
+    if (newInputs.size() != oldNumArgs) {
+      newFunc.setType(FunctionType::get(oldType.getContext(), newInputs,
+                                        oldType.getResults()));
+
+      if (ArrayAttr argAttrs = getAllArgAttrs()) {
+        SmallVector<Attribute> newArgAttrs;
+        newArgAttrs.reserve(newInputs.size());
+        for (unsigned i = 0; i != oldNumArgs; ++i)
+          if (!mapper.contains(getArgument(i)))
+            newArgAttrs.push_back(argAttrs[i]);
+        newFunc.setAllArgAttrs(newArgAttrs);
+      }
+    }
+  }
+
+  /// Clone the current function into the new one and return it.
+  cloneInto(newFunc, mapper);
+  return newFunc;
+}
+FuncOp FuncOp::clone() {
+  BlockAndValueMapping mapper;
+  return clone(mapper);
+}
+
 //===----------------------------------------------------------------------===//
 // ReturnOp
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp b/mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp
index 15208b57ce7fa..573348cf7b156 100644
--- a/mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp
+++ b/mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements bufferization of builtin.func's and func.call's.
+// This file implements bufferization of func.func's and func.call's.
 //
 //===----------------------------------------------------------------------===//
 

diff  --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index f0f3a659974e2..3dba4164c42ea 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/GPU/Utils.h"

diff  --git a/mlir/lib/Dialect/GPU/Transforms/PassDetail.h b/mlir/lib/Dialect/GPU/Transforms/PassDetail.h
index faa9d3cf7231a..99faa71eb1a93 100644
--- a/mlir/lib/Dialect/GPU/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/GPU/Transforms/PassDetail.h
@@ -11,6 +11,7 @@
 
 #include "mlir/Dialect/Async/IR/Async.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Pass/Pass.h"
 

diff  --git a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
index 675c7b3a82bb2..46d4f67c57b21 100644
--- a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
+++ b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/IR/BuiltinOps.h"
 

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
index 907ffa3be4b95..e4f46a4415e49 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Dialect/Linalg/Transforms/HoistPadding.h"
 #include "mlir/Analysis/SliceAnalysis.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/SCF/SCF.h"

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
index 90aa03b18e33e..4e3c7c8497d5a 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/SCF/SCF.h"

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
index faf992e237ec2..17e0cd79ff8e5 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
@@ -362,7 +362,7 @@ struct LinalgStrategyEnablePass
       hoistRedundantVectorTransfersOnTensor(funcOp);
 
     // Run CSE to cleanup after canonicalization.
-    OpPassManager dynamicPM("builtin.func");
+    OpPassManager dynamicPM("func.func");
     dynamicPM.addPass(createCSEPass());
     if (failed(runPipeline(dynamicPM, funcOp)))
       return signalPassFailure();

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h b/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h
index cd594b69dc29e..e2f8d10b80b04 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h
@@ -10,6 +10,7 @@
 #define DIALECT_LINALG_TRANSFORMS_PASSDETAIL_H_
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/Pass/Pass.h"
 

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index ffdd142716fa9..f24561ea15b97 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -13,6 +13,7 @@
 
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/HoistPadding.h"

diff  --git a/mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt
index 0515e52fa0bdd..2633fbef9eed9 100644
--- a/mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt
@@ -10,6 +10,7 @@ add_mlir_dialect_library(MLIRQuantTransforms
 
   LINK_LIBS PUBLIC
   MLIRArithmetic
+  MLIRFunc
   MLIRIR
   MLIRQuant
   MLIRQuantUtils

diff  --git a/mlir/lib/Dialect/Quant/Transforms/PassDetail.h b/mlir/lib/Dialect/Quant/Transforms/PassDetail.h
index 4d6fb3e1f3710..358b6e078d587 100644
--- a/mlir/lib/Dialect/Quant/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Quant/Transforms/PassDetail.h
@@ -9,6 +9,7 @@
 #ifndef DIALECT_QUANT_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_QUANT_TRANSFORMS_PASSDETAIL_H_
 
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {

diff  --git a/mlir/lib/Dialect/SCF/Transforms/PassDetail.h b/mlir/lib/Dialect/SCF/Transforms/PassDetail.h
index 49f67c27873df..c921f2fc40349 100644
--- a/mlir/lib/Dialect/SCF/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/SCF/Transforms/PassDetail.h
@@ -6,9 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef DIALECT_LOOPOPS_TRANSFORMS_PASSDETAIL_H_
-#define DIALECT_LOOPOPS_TRANSFORMS_PASSDETAIL_H_
+#ifndef DIALECT_SCF_TRANSFORMS_PASSDETAIL_H_
+#define DIALECT_SCF_TRANSFORMS_PASSDETAIL_H_
 
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
@@ -39,4 +40,4 @@ class TensorDialect;
 
 } // namespace mlir
 
-#endif // DIALECT_LOOPOPS_TRANSFORMS_PASSDETAIL_H_
+#endif // DIALECT_SCF_TRANSFORMS_PASSDETAIL_H_

diff  --git a/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt
index c5aaf65551da5..685cbe416d0aa 100644
--- a/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt
@@ -15,6 +15,7 @@ add_mlir_dialect_library(MLIRSPIRVConversion
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SPIRV
 
   LINK_LIBS PUBLIC
+  MLIRFunc
   MLIRSPIRV
   MLIRTransformUtils
 )

diff  --git a/mlir/lib/Dialect/SPIRV/Transforms/DecorateCompositeTypeLayoutPass.cpp b/mlir/lib/Dialect/SPIRV/Transforms/DecorateCompositeTypeLayoutPass.cpp
index b1772c2645c7e..29ec0b3c07e0e 100644
--- a/mlir/lib/Dialect/SPIRV/Transforms/DecorateCompositeTypeLayoutPass.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/DecorateCompositeTypeLayoutPass.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
 #include "mlir/Dialect/SPIRV/Transforms/Passes.h"

diff  --git a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp
index f73d626364290..000e58d4b5c37 100644
--- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
 #include "mlir/Transforms/DialectConversion.h"

diff  --git a/mlir/lib/Dialect/Shape/IR/CMakeLists.txt b/mlir/lib/Dialect/Shape/IR/CMakeLists.txt
index e0cc5abe33438..c860834c3c0fa 100644
--- a/mlir/lib/Dialect/Shape/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Shape/IR/CMakeLists.txt
@@ -16,6 +16,7 @@ add_mlir_dialect_library(MLIRShape
   MLIRCastInterfaces
   MLIRControlFlowInterfaces
   MLIRDialect
+  MLIRFunc
   MLIRInferTypeOpInterface
   MLIRIR
   MLIRSideEffectInterfaces

diff  --git a/mlir/lib/Dialect/Shape/Transforms/PassDetail.h b/mlir/lib/Dialect/Shape/Transforms/PassDetail.h
index d1d54b929d81e..2856871f8b5e3 100644
--- a/mlir/lib/Dialect/Shape/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Shape/Transforms/PassDetail.h
@@ -9,6 +9,7 @@
 #ifndef DIALECT_SHAPE_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_SHAPE_TRANSFORMS_PASSDETAIL_H_
 
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {

diff  --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
index 29231774fc86d..1f5e26689e056 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
@@ -11,6 +11,7 @@
 #include "mlir/Conversion/Passes.h"
 #include "mlir/Dialect/Arithmetic/Transforms/Passes.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Func/Transforms/Passes.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index da9a2f53934d7..d1fb285c17149 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"

diff  --git a/mlir/lib/Dialect/Tensor/Transforms/PassDetail.h b/mlir/lib/Dialect/Tensor/Transforms/PassDetail.h
index 858a0ebf0b8f8..033372b0533bc 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Tensor/Transforms/PassDetail.h
@@ -9,6 +9,7 @@
 #ifndef DIALECT_TENSOR_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_TENSOR_TRANSFORMS_PASSDETAIL_H_
 
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {

diff  --git a/mlir/lib/Dialect/Vector/Transforms/PassDetail.h b/mlir/lib/Dialect/Vector/Transforms/PassDetail.h
index 2ef3176bf67a3..305aae47e5bb4 100644
--- a/mlir/lib/Dialect/Vector/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Vector/Transforms/PassDetail.h
@@ -9,6 +9,7 @@
 #ifndef DIALECT_VECTOR_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_VECTOR_TRANSFORMS_PASSDETAIL_H_
 
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {

diff  --git a/mlir/lib/IR/BuiltinDialect.cpp b/mlir/lib/IR/BuiltinDialect.cpp
index cbe5ad24f2c8e..679591efa5c67 100644
--- a/mlir/lib/IR/BuiltinDialect.cpp
+++ b/mlir/lib/IR/BuiltinDialect.cpp
@@ -16,10 +16,8 @@
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/FunctionImplementation.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/PatternMatch.h"
-#include "llvm/ADT/MapVector.h"
 
 using namespace mlir;
 
@@ -72,129 +70,6 @@ void BuiltinDialect::initialize() {
   addInterfaces<BuiltinOpAsmDialectInterface>();
 }
 
-//===----------------------------------------------------------------------===//
-// FuncOp
-//===----------------------------------------------------------------------===//
-
-FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
-                      ArrayRef<NamedAttribute> attrs) {
-  OpBuilder builder(location->getContext());
-  OperationState state(location, getOperationName());
-  FuncOp::build(builder, state, name, type, attrs);
-  return cast<FuncOp>(Operation::create(state));
-}
-FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
-                      Operation::dialect_attr_range attrs) {
-  SmallVector<NamedAttribute, 8> attrRef(attrs);
-  return create(location, name, type, llvm::makeArrayRef(attrRef));
-}
-FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
-                      ArrayRef<NamedAttribute> attrs,
-                      ArrayRef<DictionaryAttr> argAttrs) {
-  FuncOp func = create(location, name, type, attrs);
-  func.setAllArgAttrs(argAttrs);
-  return func;
-}
-
-void FuncOp::build(OpBuilder &builder, OperationState &state, StringRef name,
-                   FunctionType type, ArrayRef<NamedAttribute> attrs,
-                   ArrayRef<DictionaryAttr> argAttrs) {
-  state.addAttribute(SymbolTable::getSymbolAttrName(),
-                     builder.getStringAttr(name));
-  state.addAttribute(function_interface_impl::getTypeAttrName(),
-                     TypeAttr::get(type));
-  state.attributes.append(attrs.begin(), attrs.end());
-  state.addRegion();
-
-  if (argAttrs.empty())
-    return;
-  assert(type.getNumInputs() == argAttrs.size());
-  function_interface_impl::addArgAndResultAttrs(builder, state, argAttrs,
-                                                /*resultAttrs=*/llvm::None);
-}
-
-ParseResult FuncOp::parse(OpAsmParser &parser, OperationState &result) {
-  auto buildFuncType =
-      [](Builder &builder, ArrayRef<Type> argTypes, ArrayRef<Type> results,
-         function_interface_impl::VariadicFlag,
-         std::string &) { return builder.getFunctionType(argTypes, results); };
-
-  return function_interface_impl::parseFunctionOp(
-      parser, result, /*allowVariadic=*/false, buildFuncType);
-}
-
-void FuncOp::print(OpAsmPrinter &p) {
-  function_interface_impl::printFunctionOp(p, *this, /*isVariadic=*/false);
-}
-
-/// Clone the internal blocks from this function into dest and all attributes
-/// from this function to dest.
-void FuncOp::cloneInto(FuncOp dest, BlockAndValueMapping &mapper) {
-  // Add the attributes of this function to dest.
-  llvm::MapVector<StringAttr, Attribute> newAttrMap;
-  for (const auto &attr : dest->getAttrs())
-    newAttrMap.insert({attr.getName(), attr.getValue()});
-  for (const auto &attr : (*this)->getAttrs())
-    newAttrMap.insert({attr.getName(), attr.getValue()});
-
-  auto newAttrs = llvm::to_vector(llvm::map_range(
-      newAttrMap, [](std::pair<StringAttr, Attribute> attrPair) {
-        return NamedAttribute(attrPair.first, attrPair.second);
-      }));
-  dest->setAttrs(DictionaryAttr::get(getContext(), newAttrs));
-
-  // Clone the body.
-  getBody().cloneInto(&dest.getBody(), mapper);
-}
-
-/// Create a deep copy of this function and all of its blocks, remapping
-/// any operands that use values outside of the function using the map that is
-/// provided (leaving them alone if no entry is present). Replaces references
-/// to cloned sub-values with the corresponding value that is copied, and adds
-/// those mappings to the mapper.
-FuncOp FuncOp::clone(BlockAndValueMapping &mapper) {
-  // Create the new function.
-  FuncOp newFunc = cast<FuncOp>(getOperation()->cloneWithoutRegions());
-
-  // If the function has a body, then the user might be deleting arguments to
-  // the function by specifying them in the mapper. If so, we don't add the
-  // argument to the input type vector.
-  if (!isExternal()) {
-    FunctionType oldType = getType();
-
-    unsigned oldNumArgs = oldType.getNumInputs();
-    SmallVector<Type, 4> newInputs;
-    newInputs.reserve(oldNumArgs);
-    for (unsigned i = 0; i != oldNumArgs; ++i)
-      if (!mapper.contains(getArgument(i)))
-        newInputs.push_back(oldType.getInput(i));
-
-    /// If any of the arguments were dropped, update the type and drop any
-    /// necessary argument attributes.
-    if (newInputs.size() != oldNumArgs) {
-      newFunc.setType(FunctionType::get(oldType.getContext(), newInputs,
-                                        oldType.getResults()));
-
-      if (ArrayAttr argAttrs = getAllArgAttrs()) {
-        SmallVector<Attribute> newArgAttrs;
-        newArgAttrs.reserve(newInputs.size());
-        for (unsigned i = 0; i != oldNumArgs; ++i)
-          if (!mapper.contains(getArgument(i)))
-            newArgAttrs.push_back(argAttrs[i]);
-        newFunc.setAllArgAttrs(newArgAttrs);
-      }
-    }
-  }
-
-  /// Clone the current function into the new one and return it.
-  cloneInto(newFunc, mapper);
-  return newFunc;
-}
-FuncOp FuncOp::clone() {
-  BlockAndValueMapping mapper;
-  return clone(mapper);
-}
-
 //===----------------------------------------------------------------------===//
 // ModuleOp
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp
index b47f538bbeb01..6f3c861aa59ab 100644
--- a/mlir/lib/Pass/PassRegistry.cpp
+++ b/mlir/lib/Pass/PassRegistry.cpp
@@ -322,7 +322,7 @@ class TextualPipeline {
   ///
   /// A pipeline is defined as a series of names, each of which may in itself
   /// recursively contain a nested pipeline. A name is either the name of a pass
-  /// (e.g. "cse") or the name of an operation type (e.g. "builtin.func"). If
+  /// (e.g. "cse") or the name of an operation type (e.g. "buitin.module"). If
   /// the name is the name of a pass, the InnerPipeline is empty, since passes
   /// cannot contain inner pipelines.
   struct PipelineElement {

diff  --git a/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp
index d64cfd0db9ef5..70f86d2728f98 100644
--- a/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp
+++ b/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Target/LLVMIR/Dialect/All.h"
 #include "mlir/Target/LLVMIR/Export.h"
@@ -34,7 +35,7 @@ void registerToLLVMIRTranslation() {
         return success();
       },
       [](DialectRegistry &registry) {
-        registry.insert<DLTIDialect>();
+        registry.insert<DLTIDialect, func::FuncDialect>();
         registerAllToLLVMIRTranslations(registry);
       });
 }

diff  --git a/mlir/lib/Transforms/Utils/InliningUtils.cpp b/mlir/lib/Transforms/Utils/InliningUtils.cpp
index 2a4c5dab95341..ae3e066cd1f28 100644
--- a/mlir/lib/Transforms/Utils/InliningUtils.cpp
+++ b/mlir/lib/Transforms/Utils/InliningUtils.cpp
@@ -14,8 +14,8 @@
 
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Operation.h"
+#include "mlir/Interfaces/CallInterfaces.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -67,10 +67,6 @@ bool InlinerInterface::isLegalToInline(Operation *call, Operation *callable,
 bool InlinerInterface::isLegalToInline(
     Region *dest, Region *src, bool wouldBeCloned,
     BlockAndValueMapping &valueMapping) const {
-  // Regions can always be inlined into functions.
-  if (isa<FuncOp>(dest->getParentOp()))
-    return true;
-
   if (auto *handler = getInterfaceFor(dest->getParentOp()))
     return handler->isLegalToInline(dest, src, wouldBeCloned, valueMapping);
   return false;

diff  --git a/mlir/python/mlir/dialects/_builtin_ops_ext.py b/mlir/python/mlir/dialects/_builtin_ops_ext.py
index a3a1474692661..b69163fa41519 100644
--- a/mlir/python/mlir/dialects/_builtin_ops_ext.py
+++ b/mlir/python/mlir/dialects/_builtin_ops_ext.py
@@ -3,17 +3,10 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 try:
-  from typing import Optional, Sequence, Union
-
-  import inspect
-
   from ..ir import *
 except ImportError as e:
   raise RuntimeError("Error loading imports from extension module") from e
 
-ARGUMENT_ATTRIBUTE_NAME = "arg_attrs"
-RESULT_ATTRIBUTE_NAME = "res_attrs"
-
 class ModuleOp:
   """Specialization for the module op class."""
 
@@ -25,208 +18,3 @@ def __init__(self, *, loc=None, ip=None):
   @property
   def body(self):
     return self.regions[0].blocks[0]
-
-
-class FuncOp:
-  """Specialization for the func op class."""
-
-  def __init__(self,
-               name,
-               type,
-               *,
-               visibility=None,
-               body_builder=None,
-               loc=None,
-               ip=None):
-    """
-    Create a FuncOp with the provided `name`, `type`, and `visibility`.
-    - `name` is a string representing the function name.
-    - `type` is either a FunctionType or a pair of list describing inputs and
-      results.
-    - `visibility` is a string matching `public`, `private`, or `nested`. None
-      implies private visibility.
-    - `body_builder` is an optional callback, when provided a new entry block
-      is created and the callback is invoked with the new op as argument within
-      an InsertionPoint context already set for the block. The callback is
-      expected to insert a terminator in the block.
-    """
-    sym_name = StringAttr.get(str(name))
-
-    # If the type is passed as a tuple, build a FunctionType on the fly.
-    if isinstance(type, tuple):
-      type = FunctionType.get(inputs=type[0], results=type[1])
-
-    type = TypeAttr.get(type)
-    sym_visibility = StringAttr.get(
-        str(visibility)) if visibility is not None else None
-    super().__init__(sym_name, type, sym_visibility, loc=loc, ip=ip)
-    if body_builder:
-      entry_block = self.add_entry_block()
-      with InsertionPoint(entry_block):
-        body_builder(self)
-
-  @property
-  def is_external(self):
-    return len(self.regions[0].blocks) == 0
-
-  @property
-  def body(self):
-    return self.regions[0]
-
-  @property
-  def type(self):
-    return FunctionType(TypeAttr(self.attributes["type"]).value)
-
-  @property
-  def visibility(self):
-    return self.attributes["sym_visibility"]
-
-  @property
-  def name(self) -> StringAttr:
-    return StringAttr(self.attributes["sym_name"])
-
-  @property
-  def entry_block(self):
-    if self.is_external:
-      raise IndexError('External function does not have a body')
-    return self.regions[0].blocks[0]
-
-  def add_entry_block(self):
-    """
-    Add an entry block to the function body using the function signature to
-    infer block arguments.
-    Returns the newly created block
-    """
-    if not self.is_external:
-      raise IndexError('The function already has an entry block!')
-    self.body.blocks.append(*self.type.inputs)
-    return self.body.blocks[0]
-
-  @property
-  def arg_attrs(self):
-    return ArrayAttr(self.attributes[ARGUMENT_ATTRIBUTE_NAME])
-
-  @arg_attrs.setter
-  def arg_attrs(self, attribute: Union[ArrayAttr, list]):
-    if isinstance(attribute, ArrayAttr):
-      self.attributes[ARGUMENT_ATTRIBUTE_NAME] = attribute
-    else:
-      self.attributes[ARGUMENT_ATTRIBUTE_NAME] = ArrayAttr.get(
-          attribute, context=self.context)
-
-  @property
-  def arguments(self):
-    return self.entry_block.arguments
-
-  @property
-  def result_attrs(self):
-    return self.attributes[RESULT_ATTRIBUTE_NAME]
-
-  @result_attrs.setter
-  def result_attrs(self, attribute: ArrayAttr):
-    self.attributes[RESULT_ATTRIBUTE_NAME] = attribute
-
-  @classmethod
-  def from_py_func(FuncOp,
-                   *inputs: Type,
-                   results: Optional[Sequence[Type]] = None,
-                   name: Optional[str] = None):
-    """Decorator to define an MLIR FuncOp specified as a python function.
-
-    Requires that an `mlir.ir.InsertionPoint` and `mlir.ir.Location` are
-    active for the current thread (i.e. established in a `with` block).
-
-    When applied as a decorator to a Python function, an entry block will
-    be constructed for the FuncOp with types as specified in `*inputs`. The
-    block arguments will be passed positionally to the Python function. In
-    addition, if the Python function accepts keyword arguments generally or
-    has a corresponding keyword argument, the following will be passed:
-      * `func_op`: The `func` op being defined.
-
-    By default, the function name will be the Python function `__name__`. This
-    can be overriden by passing the `name` argument to the decorator.
-
-    If `results` is not specified, then the decorator will implicitly
-    insert a `ReturnOp` with the `Value`'s returned from the decorated
-    function. It will also set the `FuncOp` type with the actual return
-    value types. If `results` is specified, then the decorated function
-    must return `None` and no implicit `ReturnOp` is added (nor are the result
-    types updated). The implicit behavior is intended for simple, single-block
-    cases, and users should specify result types explicitly for any complicated
-    cases.
-
-    The decorated function can further be called from Python and will insert
-    a `CallOp` at the then-current insertion point, returning either None (
-    if no return values), a unary Value (for one result), or a list of Values).
-    This mechanism cannot be used to emit recursive calls (by construction).
-    """
-
-    def decorator(f):
-      from . import func
-      # Introspect the callable for optional features.
-      sig = inspect.signature(f)
-      has_arg_func_op = False
-      for param in sig.parameters.values():
-        if param.kind == param.VAR_KEYWORD:
-          has_arg_func_op = True
-        if param.name == "func_op" and (param.kind
-                                        == param.POSITIONAL_OR_KEYWORD or
-                                        param.kind == param.KEYWORD_ONLY):
-          has_arg_func_op = True
-
-      # Emit the FuncOp.
-      implicit_return = results is None
-      symbol_name = name or f.__name__
-      function_type = FunctionType.get(
-          inputs=inputs, results=[] if implicit_return else results)
-      func_op = FuncOp(name=symbol_name, type=function_type)
-      with InsertionPoint(func_op.add_entry_block()):
-        func_args = func_op.entry_block.arguments
-        func_kwargs = {}
-        if has_arg_func_op:
-          func_kwargs["func_op"] = func_op
-        return_values = f(*func_args, **func_kwargs)
-        if not implicit_return:
-          return_types = list(results)
-          assert return_values is None, (
-              "Capturing a python function with explicit `results=` "
-              "requires that the wrapped function returns None.")
-        else:
-          # Coerce return values, add ReturnOp and rewrite func type.
-          if return_values is None:
-            return_values = []
-          elif isinstance(return_values, tuple):
-            return_values = list(return_values)
-          elif isinstance(return_values, Value):
-            # Returning a single value is fine, coerce it into a list.
-            return_values = [return_values]
-          elif isinstance(return_values, OpView):
-            # Returning a single operation is fine, coerce its results a list.
-            return_values = return_values.operation.results
-          elif isinstance(return_values, Operation):
-            # Returning a single operation is fine, coerce its results a list.
-            return_values = return_values.results
-          else:
-            return_values = list(return_values)
-          func.ReturnOp(return_values)
-          # Recompute the function type.
-          return_types = [v.type for v in return_values]
-          function_type = FunctionType.get(inputs=inputs, results=return_types)
-          func_op.attributes["type"] = TypeAttr.get(function_type)
-
-      def emit_call_op(*call_args):
-        call_op = func.CallOp(return_types, FlatSymbolRefAttr.get(symbol_name),
-                              call_args)
-        if return_types is None:
-          return None
-        elif len(return_types) == 1:
-          return call_op.result
-        else:
-          return call_op.results
-
-      wrapped = emit_call_op
-      wrapped.__name__ = f.__name__
-      wrapped.func_op = func_op
-      return wrapped
-
-    return decorator

diff  --git a/mlir/python/mlir/dialects/_func_ops_ext.py b/mlir/python/mlir/dialects/_func_ops_ext.py
index 850562673b7ee..6932efd791943 100644
--- a/mlir/python/mlir/dialects/_func_ops_ext.py
+++ b/mlir/python/mlir/dialects/_func_ops_ext.py
@@ -4,13 +4,16 @@
 
 try:
   from ..ir import *
-  from .builtin import FuncOp
   from ._ods_common import get_default_loc_context as _get_default_loc_context
 
-  from typing import Any, List, Optional, Union
+  import inspect
+
+  from typing import Any, List, Optional, Sequence, Union
 except ImportError as e:
   raise RuntimeError("Error loading imports from extension module") from e
 
+ARGUMENT_ATTRIBUTE_NAME = "arg_attrs"
+RESULT_ATTRIBUTE_NAME = "res_attrs"
 
 class ConstantOp:
   """Specialization for the constant op class."""
@@ -23,6 +26,210 @@ def type(self):
     return self.results[0].type
 
 
+class FuncOp:
+  """Specialization for the func op class."""
+
+  def __init__(self,
+               name,
+               type,
+               *,
+               visibility=None,
+               body_builder=None,
+               loc=None,
+               ip=None):
+    """
+    Create a FuncOp with the provided `name`, `type`, and `visibility`.
+    - `name` is a string representing the function name.
+    - `type` is either a FunctionType or a pair of list describing inputs and
+      results.
+    - `visibility` is a string matching `public`, `private`, or `nested`. None
+      implies private visibility.
+    - `body_builder` is an optional callback, when provided a new entry block
+      is created and the callback is invoked with the new op as argument within
+      an InsertionPoint context already set for the block. The callback is
+      expected to insert a terminator in the block.
+    """
+    sym_name = StringAttr.get(str(name))
+
+    # If the type is passed as a tuple, build a FunctionType on the fly.
+    if isinstance(type, tuple):
+      type = FunctionType.get(inputs=type[0], results=type[1])
+
+    type = TypeAttr.get(type)
+    sym_visibility = StringAttr.get(
+        str(visibility)) if visibility is not None else None
+    super().__init__(sym_name, type, sym_visibility, loc=loc, ip=ip)
+    if body_builder:
+      entry_block = self.add_entry_block()
+      with InsertionPoint(entry_block):
+        body_builder(self)
+
+  @property
+  def is_external(self):
+    return len(self.regions[0].blocks) == 0
+
+  @property
+  def body(self):
+    return self.regions[0]
+
+  @property
+  def type(self):
+    return FunctionType(TypeAttr(self.attributes["type"]).value)
+
+  @property
+  def visibility(self):
+    return self.attributes["sym_visibility"]
+
+  @property
+  def name(self) -> StringAttr:
+    return StringAttr(self.attributes["sym_name"])
+
+  @property
+  def entry_block(self):
+    if self.is_external:
+      raise IndexError('External function does not have a body')
+    return self.regions[0].blocks[0]
+
+  def add_entry_block(self):
+    """
+    Add an entry block to the function body using the function signature to
+    infer block arguments.
+    Returns the newly created block
+    """
+    if not self.is_external:
+      raise IndexError('The function already has an entry block!')
+    self.body.blocks.append(*self.type.inputs)
+    return self.body.blocks[0]
+
+  @property
+  def arg_attrs(self):
+    return ArrayAttr(self.attributes[ARGUMENT_ATTRIBUTE_NAME])
+
+  @arg_attrs.setter
+  def arg_attrs(self, attribute: Union[ArrayAttr, list]):
+    if isinstance(attribute, ArrayAttr):
+      self.attributes[ARGUMENT_ATTRIBUTE_NAME] = attribute
+    else:
+      self.attributes[ARGUMENT_ATTRIBUTE_NAME] = ArrayAttr.get(
+          attribute, context=self.context)
+
+  @property
+  def arguments(self):
+    return self.entry_block.arguments
+
+  @property
+  def result_attrs(self):
+    return self.attributes[RESULT_ATTRIBUTE_NAME]
+
+  @result_attrs.setter
+  def result_attrs(self, attribute: ArrayAttr):
+    self.attributes[RESULT_ATTRIBUTE_NAME] = attribute
+
+  @classmethod
+  def from_py_func(FuncOp,
+                   *inputs: Type,
+                   results: Optional[Sequence[Type]] = None,
+                   name: Optional[str] = None):
+    """Decorator to define an MLIR FuncOp specified as a python function.
+
+    Requires that an `mlir.ir.InsertionPoint` and `mlir.ir.Location` are
+    active for the current thread (i.e. established in a `with` block).
+
+    When applied as a decorator to a Python function, an entry block will
+    be constructed for the FuncOp with types as specified in `*inputs`. The
+    block arguments will be passed positionally to the Python function. In
+    addition, if the Python function accepts keyword arguments generally or
+    has a corresponding keyword argument, the following will be passed:
+      * `func_op`: The `func` op being defined.
+
+    By default, the function name will be the Python function `__name__`. This
+    can be overriden by passing the `name` argument to the decorator.
+
+    If `results` is not specified, then the decorator will implicitly
+    insert a `ReturnOp` with the `Value`'s returned from the decorated
+    function. It will also set the `FuncOp` type with the actual return
+    value types. If `results` is specified, then the decorated function
+    must return `None` and no implicit `ReturnOp` is added (nor are the result
+    types updated). The implicit behavior is intended for simple, single-block
+    cases, and users should specify result types explicitly for any complicated
+    cases.
+
+    The decorated function can further be called from Python and will insert
+    a `CallOp` at the then-current insertion point, returning either None (
+    if no return values), a unary Value (for one result), or a list of Values).
+    This mechanism cannot be used to emit recursive calls (by construction).
+    """
+
+    def decorator(f):
+      from . import func
+      # Introspect the callable for optional features.
+      sig = inspect.signature(f)
+      has_arg_func_op = False
+      for param in sig.parameters.values():
+        if param.kind == param.VAR_KEYWORD:
+          has_arg_func_op = True
+        if param.name == "func_op" and (param.kind
+                                        == param.POSITIONAL_OR_KEYWORD or
+                                        param.kind == param.KEYWORD_ONLY):
+          has_arg_func_op = True
+
+      # Emit the FuncOp.
+      implicit_return = results is None
+      symbol_name = name or f.__name__
+      function_type = FunctionType.get(
+          inputs=inputs, results=[] if implicit_return else results)
+      func_op = FuncOp(name=symbol_name, type=function_type)
+      with InsertionPoint(func_op.add_entry_block()):
+        func_args = func_op.entry_block.arguments
+        func_kwargs = {}
+        if has_arg_func_op:
+          func_kwargs["func_op"] = func_op
+        return_values = f(*func_args, **func_kwargs)
+        if not implicit_return:
+          return_types = list(results)
+          assert return_values is None, (
+              "Capturing a python function with explicit `results=` "
+              "requires that the wrapped function returns None.")
+        else:
+          # Coerce return values, add ReturnOp and rewrite func type.
+          if return_values is None:
+            return_values = []
+          elif isinstance(return_values, tuple):
+            return_values = list(return_values)
+          elif isinstance(return_values, Value):
+            # Returning a single value is fine, coerce it into a list.
+            return_values = [return_values]
+          elif isinstance(return_values, OpView):
+            # Returning a single operation is fine, coerce its results a list.
+            return_values = return_values.operation.results
+          elif isinstance(return_values, Operation):
+            # Returning a single operation is fine, coerce its results a list.
+            return_values = return_values.results
+          else:
+            return_values = list(return_values)
+          func.ReturnOp(return_values)
+          # Recompute the function type.
+          return_types = [v.type for v in return_values]
+          function_type = FunctionType.get(inputs=inputs, results=return_types)
+          func_op.attributes["type"] = TypeAttr.get(function_type)
+
+      def emit_call_op(*call_args):
+        call_op = func.CallOp(return_types, FlatSymbolRefAttr.get(symbol_name),
+                              call_args)
+        if return_types is None:
+          return None
+        elif len(return_types) == 1:
+          return call_op.result
+        else:
+          return call_op.results
+
+      wrapped = emit_call_op
+      wrapped.__name__ = f.__name__
+      wrapped.func_op = func_op
+      return wrapped
+
+    return decorator
+
 class CallOp:
   """Specialization for the call op class."""
 
@@ -45,7 +252,7 @@ def __init__(self,
 
     For example
 
-        f = builtin.FuncOp("foo", ...)
+        f = func.FuncOp("foo", ...)
         func.CallOp(f, [args])
         func.CallOp([result_types], "foo", [args])
 

diff  --git a/mlir/test/Analysis/test-alias-analysis-modref.mlir b/mlir/test/Analysis/test-alias-analysis-modref.mlir
index 91341a5e9ffb2..14502dda3533f 100644
--- a/mlir/test/Analysis/test-alias-analysis-modref.mlir
+++ b/mlir/test/Analysis/test-alias-analysis-modref.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(test-alias-analysis-modref)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(test-alias-analysis-modref)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s
 
 // CHECK-LABEL: Testing : "no_side_effects"
 // CHECK: alloc -> func.region0#0: NoModRef

diff  --git a/mlir/test/Analysis/test-alias-analysis.mlir b/mlir/test/Analysis/test-alias-analysis.mlir
index 2f6db065b596e..d8fd43bfc4bdd 100644
--- a/mlir/test/Analysis/test-alias-analysis.mlir
+++ b/mlir/test/Analysis/test-alias-analysis.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(test-alias-analysis)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(test-alias-analysis)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s
 
 // CHECK-LABEL: Testing : "simple"
 // CHECK-DAG: func.region0#0 <-> func.region0#1: MayAlias

diff  --git a/mlir/test/Analysis/test-dominance.mlir b/mlir/test/Analysis/test-dominance.mlir
index 89d248a8a1494..120b49b5e3f54 100644
--- a/mlir/test/Analysis/test-dominance.mlir
+++ b/mlir/test/Analysis/test-dominance.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-print-dominance)" -split-input-file 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-dominance)" -split-input-file 2>&1 | FileCheck %s
 
 // CHECK-LABEL: Testing : func_condBranch
 func @func_condBranch(%cond : i1) {

diff  --git a/mlir/test/Analysis/test-liveness.mlir b/mlir/test/Analysis/test-liveness.mlir
index 6cd990bb29c29..184c65fbcbccb 100644
--- a/mlir/test/Analysis/test-liveness.mlir
+++ b/mlir/test/Analysis/test-liveness.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-print-liveness)" -split-input-file 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-liveness)" -split-input-file 2>&1 | FileCheck %s
 
 // CHECK-LABEL: Testing : func_empty
 func @func_empty() {

diff  --git a/mlir/test/Analysis/test-match-reduction.mlir b/mlir/test/Analysis/test-match-reduction.mlir
index b5ef4110d9a7d..fd5ecc607d1ff 100644
--- a/mlir/test/Analysis/test-match-reduction.mlir
+++ b/mlir/test/Analysis/test-match-reduction.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-match-reduction)" -verify-diagnostics -split-input-file
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-match-reduction)" -verify-diagnostics -split-input-file
 
 // Verify that the generic reduction detection utility works on 
diff erent
 // dialects.

diff  --git a/mlir/test/Analysis/test-shape-fn-report.mlir b/mlir/test/Analysis/test-shape-fn-report.mlir
index 7587e5d723bf0..19c1b0e5a5bfd 100644
--- a/mlir/test/Analysis/test-shape-fn-report.mlir
+++ b/mlir/test/Analysis/test-shape-fn-report.mlir
@@ -15,7 +15,7 @@ func @tanh(%arg: tensor<10x20xf32>) -> tensor<10x20xf32>
 // The shape function library with some local functions.
 shape.function_library @shape_lib {
   // Test shape function that returns the shape of input arg as result shape.
-  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  func.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
     %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
     return %0 : !shape.shape
   }

diff  --git a/mlir/test/Analysis/test-topoligical-sort.mlir b/mlir/test/Analysis/test-topoligical-sort.mlir
index 5e49b79986dd3..d3bed56668fc8 100644
--- a/mlir/test/Analysis/test-topoligical-sort.mlir
+++ b/mlir/test/Analysis/test-topoligical-sort.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-print-topological-sort)" 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-topological-sort)" 2>&1 | FileCheck %s
 
 // CHECK-LABEL: Testing : region
 //       CHECK: arith.addi {{.*}} : index

diff  --git a/mlir/test/CAPI/execution_engine.c b/mlir/test/CAPI/execution_engine.c
index 73750c83fdcad..6d1e39f4a95d8 100644
--- a/mlir/test/CAPI/execution_engine.c
+++ b/mlir/test/CAPI/execution_engine.c
@@ -26,7 +26,7 @@
 void lowerModuleToLLVM(MlirContext ctx, MlirModule module) {
   MlirPassManager pm = mlirPassManagerCreate(ctx);
   MlirOpPassManager opm = mlirPassManagerGetNestedUnder(
-      pm, mlirStringRefCreateFromCString("builtin.func"));
+      pm, mlirStringRefCreateFromCString("func.func"));
   mlirPassManagerAddOwnedPass(pm, mlirCreateConversionConvertFuncToLLVM());
   mlirOpPassManagerAddOwnedPass(opm,
                                 mlirCreateConversionConvertArithmeticToLLVM());

diff  --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c
index f2769d34cdd2b..5c4e36b9881a0 100644
--- a/mlir/test/CAPI/ir.c
+++ b/mlir/test/CAPI/ir.c
@@ -101,7 +101,7 @@ MlirModule makeAndDumpAdd(MlirContext ctx, MlirLocation location) {
           mlirIdentifierGet(ctx, mlirStringRefCreateFromCString("sym_name")),
           funcNameAttr)};
   MlirOperationState funcState = mlirOperationStateGet(
-      mlirStringRefCreateFromCString("builtin.func"), location);
+      mlirStringRefCreateFromCString("func.func"), location);
   mlirOperationStateAddAttributes(&funcState, 2, funcAttrs);
   mlirOperationStateAddOwnedRegions(&funcState, 1, &funcBodyRegion);
   MlirOperation func = mlirOperationCreate(&funcState);

diff  --git a/mlir/test/CAPI/pass.c b/mlir/test/CAPI/pass.c
index d83d44d929312..69d052c3131a3 100644
--- a/mlir/test/CAPI/pass.c
+++ b/mlir/test/CAPI/pass.c
@@ -42,7 +42,7 @@ void testRunPassOnModule() {
   // Run the print-op-stats pass on the top-level module:
   // CHECK-LABEL: Operations encountered:
   // CHECK: arith.addi        , 1
-  // CHECK: builtin.func      , 1
+  // CHECK: func.func      , 1
   // CHECK: func.return        , 1
   {
     MlirPassManager pm = mlirPassManagerCreate(ctx);
@@ -84,12 +84,12 @@ void testRunPassOnNestedModule() {
   // Run the print-op-stats pass on functions under the top-level module:
   // CHECK-LABEL: Operations encountered:
   // CHECK: arith.addi        , 1
-  // CHECK: builtin.func      , 1
+  // CHECK: func.func      , 1
   // CHECK: func.return        , 1
   {
     MlirPassManager pm = mlirPassManagerCreate(ctx);
     MlirOpPassManager nestedFuncPm = mlirPassManagerGetNestedUnder(
-        pm, mlirStringRefCreateFromCString("builtin.func"));
+        pm, mlirStringRefCreateFromCString("func.func"));
     MlirPass printOpStatPass = mlirCreateTransformsPrintOpStats();
     mlirOpPassManagerAddOwnedPass(nestedFuncPm, printOpStatPass);
     MlirLogicalResult success = mlirPassManagerRun(pm, module);
@@ -100,14 +100,14 @@ void testRunPassOnNestedModule() {
   // Run the print-op-stats pass on functions under the nested module:
   // CHECK-LABEL: Operations encountered:
   // CHECK: arith.addf        , 1
-  // CHECK: builtin.func      , 1
+  // CHECK: func.func      , 1
   // CHECK: func.return        , 1
   {
     MlirPassManager pm = mlirPassManagerCreate(ctx);
     MlirOpPassManager nestedModulePm = mlirPassManagerGetNestedUnder(
         pm, mlirStringRefCreateFromCString("builtin.module"));
     MlirOpPassManager nestedFuncPm = mlirOpPassManagerGetNestedUnder(
-        nestedModulePm, mlirStringRefCreateFromCString("builtin.func"));
+        nestedModulePm, mlirStringRefCreateFromCString("func.func"));
     MlirPass printOpStatPass = mlirCreateTransformsPrintOpStats();
     mlirOpPassManagerAddOwnedPass(nestedFuncPm, printOpStatPass);
     MlirLogicalResult success = mlirPassManagerRun(pm, module);
@@ -132,19 +132,19 @@ void testPrintPassPipeline() {
   MlirOpPassManager nestedModulePm = mlirPassManagerGetNestedUnder(
       pm, mlirStringRefCreateFromCString("builtin.module"));
   MlirOpPassManager nestedFuncPm = mlirOpPassManagerGetNestedUnder(
-      nestedModulePm, mlirStringRefCreateFromCString("builtin.func"));
+      nestedModulePm, mlirStringRefCreateFromCString("func.func"));
   MlirPass printOpStatPass = mlirCreateTransformsPrintOpStats();
   mlirOpPassManagerAddOwnedPass(nestedFuncPm, printOpStatPass);
 
   // Print the top level pass manager
-  // CHECK: Top-level: builtin.module(builtin.func(print-op-stats))
+  // CHECK: Top-level: builtin.module(func.func(print-op-stats))
   fprintf(stderr, "Top-level: ");
   mlirPrintPassPipeline(mlirPassManagerGetAsOpPassManager(pm), printToStderr,
                         NULL);
   fprintf(stderr, "\n");
 
   // Print the pipeline nested one level down
-  // CHECK: Nested Module: builtin.func(print-op-stats)
+  // CHECK: Nested Module: func.func(print-op-stats)
   fprintf(stderr, "Nested Module: ");
   mlirPrintPassPipeline(nestedModulePm, printToStderr, NULL);
   fprintf(stderr, "\n");
@@ -165,8 +165,8 @@ void testParsePassPipeline() {
   // Try parse a pipeline.
   MlirLogicalResult status = mlirParsePassPipeline(
       mlirPassManagerGetAsOpPassManager(pm),
-      mlirStringRefCreateFromCString(
-          "builtin.module(builtin.func(print-op-stats), builtin.func(print-op-stats))"));
+      mlirStringRefCreateFromCString("builtin.module(func.func(print-op-stats),"
+                                     " func.func(print-op-stats))"));
   // Expect a failure, we haven't registered the print-op-stats pass yet.
   if (mlirLogicalResultIsSuccess(status)) {
     fprintf(stderr, "Unexpected success parsing pipeline without registering the pass\n");
@@ -176,15 +176,16 @@ void testParsePassPipeline() {
   mlirRegisterTransformsPrintOpStats();
   status = mlirParsePassPipeline(
       mlirPassManagerGetAsOpPassManager(pm),
-      mlirStringRefCreateFromCString(
-          "builtin.module(builtin.func(print-op-stats), builtin.func(print-op-stats))"));
+      mlirStringRefCreateFromCString("builtin.module(func.func(print-op-stats),"
+                                     " func.func(print-op-stats))"));
   // Expect a failure, we haven't registered the print-op-stats pass yet.
   if (mlirLogicalResultIsFailure(status)) {
     fprintf(stderr, "Unexpected failure parsing pipeline after registering the pass\n");
     exit(EXIT_FAILURE);
   }
 
-  // CHECK: Round-trip: builtin.module(builtin.func(print-op-stats), builtin.func(print-op-stats))
+  // CHECK: Round-trip: builtin.module(func.func(print-op-stats),
+  // func.func(print-op-stats))
   fprintf(stderr, "Round-trip: ");
   mlirPrintPassPipeline(mlirPassManagerGetAsOpPassManager(pm), printToStderr,
                         NULL);

diff  --git a/mlir/test/Conversion/ArithmeticToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithmeticToLLVM/arith-to-llvm.mlir
index 0f4b02173c067..fb25534e62b52 100644
--- a/mlir/test/Conversion/ArithmeticToLLVM/arith-to-llvm.mlir
+++ b/mlir/test/Conversion/ArithmeticToLLVM/arith-to-llvm.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s
 
 // CHECK-LABEL: @vector_ops
 func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> {

diff  --git a/mlir/test/Conversion/ArithmeticToLLVM/convert-nd-vector-to-llvmir.mlir b/mlir/test/Conversion/ArithmeticToLLVM/convert-nd-vector-to-llvmir.mlir
index 328b3eddb066f..5d324bafed249 100644
--- a/mlir/test/Conversion/ArithmeticToLLVM/convert-nd-vector-to-llvmir.mlir
+++ b/mlir/test/Conversion/ArithmeticToLLVM/convert-nd-vector-to-llvmir.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s
 
 // CHECK-LABEL: @vec_bin
 func @vec_bin(%arg0: vector<2x2x2xf32>) -> vector<2x2x2xf32> {

diff  --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
index 2efd19439b5da..4cb67167adf55 100644
--- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
+++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-complex-to-standard)" | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-complex-to-standard)" | FileCheck %s
 
 // CHECK-LABEL: func @complex_abs
 // CHECK-SAME: %[[ARG:.*]]: complex<f32>

diff  --git a/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir b/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir
index 135af60297b0f..b155ccbfea8db 100644
--- a/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir
+++ b/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-complex-to-standard),convert-complex-to-llvm,builtin.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-complex-to-standard),convert-complex-to-llvm,func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" | FileCheck %s
 
 // CHECK-LABEL: llvm.func @complex_abs
 // CHECK-SAME: %[[ARG:.*]]: ![[C_TY:.*]])

diff  --git a/mlir/test/Conversion/FuncToLLVM/func-memref.mlir b/mlir/test/Conversion/FuncToLLVM/func-memref.mlir
index 70af9af588421..d4da48d04c849 100644
--- a/mlir/test/Conversion/FuncToLLVM/func-memref.mlir
+++ b/mlir/test/Conversion/FuncToLLVM/func-memref.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" -split-input-file %s | FileCheck %s
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-arith-to-llvm),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts" -split-input-file %s | FileCheck %s --check-prefix=BAREPTR
+// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts" -split-input-file %s | FileCheck %s --check-prefix=BAREPTR
 
 // BAREPTR-LABEL: func @check_noalias
 // BAREPTR-SAME: %{{.*}}: !llvm.ptr<f32> {llvm.noalias}, %{{.*}}: !llvm.ptr<f32> {llvm.noalias}

diff  --git a/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir b/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir
index 2d328579d3015..4a5089efe9403 100644
--- a/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir
+++ b/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" %s -split-input-file | FileCheck %s
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-math-to-llvm,convert-arith-to-llvm{index-bitwidth=32}),convert-func-to-llvm{index-bitwidth=32},reconcile-unrealized-casts" %s -split-input-file | FileCheck --check-prefix=CHECK32 %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" %s -split-input-file | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-math-to-llvm,convert-arith-to-llvm{index-bitwidth=32}),convert-func-to-llvm{index-bitwidth=32},reconcile-unrealized-casts" %s -split-input-file | FileCheck --check-prefix=CHECK32 %s
 
 // CHECK-LABEL: func @empty() {
 // CHECK-NEXT:  llvm.return

diff  --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index 6e6f0a967ab44..973166b2b5b4d 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -4,7 +4,7 @@
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_index_ops()
   // CHECK32-LABEL: func @gpu_index_ops()
-  builtin.func @gpu_index_ops()
+  func.func @gpu_index_ops()
       -> (index, index, index, index, index, index,
           index, index, index, index, index, index) {
     // CHECK32-NOT: = llvm.sext %{{.*}} : i32 to i64
@@ -61,7 +61,7 @@ gpu.module @test_module {
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_index_comp
   // CHECK32-LABEL: func @gpu_index_comp
-  builtin.func @gpu_index_comp(%idx : index) -> index {
+  func.func @gpu_index_comp(%idx : index) -> index {
     // CHECK: = llvm.add %{{.*}}, %{{.*}} : i64
     // CHECK32: = llvm.add %{{.*}}, %{{.*}} : i32
     %0 = arith.addi %idx, %idx : index
@@ -109,7 +109,7 @@ gpu.module @test_module {
 
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_shuffle()
-  builtin.func @gpu_shuffle() -> (f32, f32, f32, f32) {
+  func.func @gpu_shuffle() -> (f32, f32, f32, f32) {
     // CHECK: %[[#VALUE:]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
     %arg0 = arith.constant 1.0 : f32
     // CHECK: %[[#OFFSET:]] = llvm.mlir.constant(4 : i32) : i32
@@ -148,7 +148,7 @@ gpu.module @test_module {
 
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_sync()
-  builtin.func @gpu_sync() {
+  func.func @gpu_sync() {
     // CHECK: nvvm.barrier0
     gpu.barrier
     func.return
@@ -161,7 +161,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_fabsf(f32) -> f32
   // CHECK: llvm.func @__nv_fabs(f64) -> f64
   // CHECK-LABEL: func @gpu_fabs
-  builtin.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.abs %arg_f32 : f32
     // CHECK: llvm.call @__nv_fabsf(%{{.*}}) : (f32) -> f32
     %result64 = math.abs %arg_f64 : f64
@@ -176,7 +176,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_ceilf(f32) -> f32
   // CHECK: llvm.func @__nv_ceil(f64) -> f64
   // CHECK-LABEL: func @gpu_ceil
-  builtin.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.ceil %arg_f32 : f32
     // CHECK: llvm.call @__nv_ceilf(%{{.*}}) : (f32) -> f32
     %result64 = math.ceil %arg_f64 : f64
@@ -191,7 +191,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_floorf(f32) -> f32
   // CHECK: llvm.func @__nv_floor(f64) -> f64
   // CHECK-LABEL: func @gpu_floor
-  builtin.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.floor %arg_f32 : f32
     // CHECK: llvm.call @__nv_floorf(%{{.*}}) : (f32) -> f32
     %result64 = math.floor %arg_f64 : f64
@@ -206,7 +206,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_cosf(f32) -> f32
   // CHECK: llvm.func @__nv_cos(f64) -> f64
   // CHECK-LABEL: func @gpu_cos
-  builtin.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.cos %arg_f32 : f32
     // CHECK: llvm.call @__nv_cosf(%{{.*}}) : (f32) -> f32
     %result64 = math.cos %arg_f64 : f64
@@ -220,7 +220,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_expf(f32) -> f32
   // CHECK: llvm.func @__nv_exp(f64) -> f64
   // CHECK-LABEL: func @gpu_exp
-  builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.exp %arg_f32 : f32
     // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
     %result64 = math.exp %arg_f64 : f64
@@ -234,7 +234,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_exp2f(f32) -> f32
   // CHECK: llvm.func @__nv_exp2(f64) -> f64
   // CHECK-LABEL: func @gpu_exp2
-  builtin.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.exp2 %arg_f32 : f32
     // CHECK: llvm.call @__nv_exp2f(%{{.*}}) : (f32) -> f32
     %result64 = math.exp2 %arg_f64 : f64
@@ -249,7 +249,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_logf(f32) -> f32
   // CHECK: llvm.func @__nv_log(f64) -> f64
   // CHECK-LABEL: func @gpu_log
-  builtin.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log %arg_f32 : f32
     // CHECK: llvm.call @__nv_logf(%{{.*}}) : (f32) -> f32
     %result64 = math.log %arg_f64 : f64
@@ -264,7 +264,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_log10f(f32) -> f32
   // CHECK: llvm.func @__nv_log10(f64) -> f64
   // CHECK-LABEL: func @gpu_log10
-  builtin.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log10 %arg_f32 : f32
     // CHECK: llvm.call @__nv_log10f(%{{.*}}) : (f32) -> f32
     %result64 = math.log10 %arg_f64 : f64
@@ -279,7 +279,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_log1pf(f32) -> f32
   // CHECK: llvm.func @__nv_log1p(f64) -> f64
   // CHECK-LABEL: func @gpu_log1p
-  builtin.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log1p %arg_f32 : f32
     // CHECK: llvm.call @__nv_log1pf(%{{.*}}) : (f32) -> f32
     %result64 = math.log1p %arg_f64 : f64
@@ -294,7 +294,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_log2f(f32) -> f32
   // CHECK: llvm.func @__nv_log2(f64) -> f64
   // CHECK-LABEL: func @gpu_log2
-  builtin.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log2 %arg_f32 : f32
     // CHECK: llvm.call @__nv_log2f(%{{.*}}) : (f32) -> f32
     %result64 = math.log2 %arg_f64 : f64
@@ -309,7 +309,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_sinf(f32) -> f32
   // CHECK: llvm.func @__nv_sin(f64) -> f64
   // CHECK-LABEL: func @gpu_sin
-  builtin.func @gpu_sin(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_sin(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.sin %arg_f32 : f32
     // CHECK: llvm.call @__nv_sinf(%{{.*}}) : (f32) -> f32
     %result64 = math.sin %arg_f64 : f64
@@ -324,7 +324,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_tanhf(f32) -> f32
   // CHECK: llvm.func @__nv_tanh(f64) -> f64
   // CHECK-LABEL: func @gpu_tanh
-  builtin.func @gpu_tanh(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+  func.func @gpu_tanh(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
     %result16 = math.tanh %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
     // CHECK-NEXT: llvm.call @__nv_tanhf(%{{.*}}) : (f32) -> f32
@@ -343,7 +343,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_rsqrtf(f32) -> f32
   // CHECK: llvm.func @__nv_rsqrt(f64) -> f64
   // CHECK-LABEL: func @gpu_rsqrt
-  builtin.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.rsqrt %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -363,7 +363,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_sqrtf(f32) -> f32
   // CHECK: llvm.func @__nv_sqrt(f64) -> f64
   // CHECK-LABEL: func @gpu_sqrt
-  builtin.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.sqrt %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -383,7 +383,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_atanf(f32) -> f32
   // CHECK: llvm.func @__nv_atan(f64) -> f64
   // CHECK-LABEL: func @gpu_atan
-  builtin.func @gpu_atan(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_atan(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.atan %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -403,7 +403,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_atan2f(f32, f32) -> f32
   // CHECK: llvm.func @__nv_atan2(f64, f64) -> f64
   // CHECK-LABEL: func @gpu_atan2
-  builtin.func @gpu_atan2(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_atan2(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.atan2 %arg_f16, %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -427,7 +427,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_expf(f32) -> f32
   // CHECK: llvm.func @__nv_exp(f64) -> f64
   // CHECK-LABEL: func @gpu_exp
-    builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+    func.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
       %result32 = math.exp %arg_f32 : f32
       // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
       %result64 = math.exp %arg_f64 : f64
@@ -444,7 +444,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_expm1f(f32) -> f32
   // CHECK: llvm.func @__nv_expm1(f64) -> f64
   // CHECK-LABEL: func @gpu_expm1
-  builtin.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.expm1 %arg_f32 : f32
     // CHECK: llvm.call @__nv_expm1f(%{{.*}}) : (f32) -> f32
     %result64 = math.expm1 %arg_f64 : f64
@@ -459,7 +459,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_powf(f32, f32) -> f32
   // CHECK: llvm.func @__nv_pow(f64, f64) -> f64
   // CHECK-LABEL: func @gpu_pow
-  builtin.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.powf %arg_f32, %arg_f32 : f32
     // CHECK: llvm.call @__nv_powf(%{{.*}}, %{{.*}}) : (f32, f32) -> f32
     %result64 = math.powf %arg_f64, %arg_f64 : f64

diff  --git a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
index 258d5cc41e7cb..ef8b8168b6c9d 100644
--- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
@@ -6,7 +6,7 @@ gpu.module @test_module {
   // CHECK-LABEL: func @gpu_wmma_load_op() ->
   // CHECK-SAME: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> {
   // CHECK32-LABEL: func @gpu_wmma_load_op() ->
-  builtin.func @gpu_wmma_load_op() -> (!gpu.mma_matrix<16x16xf16, "AOp">) {
+  func.func @gpu_wmma_load_op() -> (!gpu.mma_matrix<16x16xf16, "AOp">) {
     %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
     %i = arith.constant 16 : index
     %j = arith.constant 16 : index
@@ -46,7 +46,7 @@ gpu.module @test_module {
   // CHECK-SAME: (%[[D:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) {
   // CHECK32-LABEL: func @gpu_wmma_store_op
   // CHECK32-SAME: (%[[D:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) {
-  builtin.func @gpu_wmma_store_op(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
+  func.func @gpu_wmma_store_op(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
     %sg = memref.alloca(){alignment = 32} : memref<32x32xf16, 3>
     %i = arith.constant 16 : index
     %j = arith.constant 16 : index
@@ -92,7 +92,7 @@ gpu.module @test_module {
 
   // CHECK-LABEL: func @gpu_wmma_mma_op
   // CHECK-SAME: (%[[A:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>, %[[B:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>, %[[C:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>)
-  builtin.func @gpu_wmma_mma_op(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> (!gpu.mma_matrix<16x16xf16, "COp">) {
+  func.func @gpu_wmma_mma_op(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> (!gpu.mma_matrix<16x16xf16, "COp">) {
     %D = gpu.subgroup_mma_compute %A, %B, %C : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp">
     // CHECK:  %[[A1:.*]] = llvm.extractvalue %[[A]][0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
     // CHECK:  %[[A2:.*]] = llvm.extractvalue %[[A]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
@@ -163,7 +163,7 @@ gpu.module @test_module {
 //       CHECK:   %[[E3:.+]] = llvm.extractvalue %[[ACC]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK:   nvvm.wmma.store %{{.*}}, %{{.*}}, %[[E0]], %[[E1]], %[[E2]], %[[E3]] {eltype = #nvvm.mma_type<f16>, k = 16 : i32, layout = #nvvm.mma_layout<row>, m = 16 : i32, n = 16 : i32} : !llvm.ptr<f16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>
 
-  builtin.func @gpu_wmma_mma_loop_op(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) {
+  func.func @gpu_wmma_mma_loop_op(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) {
       %c0 = arith.constant 0 : index
       %c128 = arith.constant 128 : index
       %c32 = arith.constant 32 : index
@@ -202,7 +202,7 @@ gpu.module @test_module {
 //       CHECK: %[[M3:.+]] = llvm.insertvalue %[[V2]], %[[M2]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK: %[[M4:.+]] = llvm.insertvalue %[[V2]], %[[M3]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK: llvm.return %[[M4]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
-  builtin.func @gpu_wmma_constant_op()  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
+  func.func @gpu_wmma_constant_op()  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
     %cst = arith.constant 1.0 : f16
     %C = gpu.subgroup_mma_constant_matrix %cst : !gpu.mma_matrix<16x16xf16, "COp">
     return %C : !gpu.mma_matrix<16x16xf16, "COp">
@@ -232,7 +232,7 @@ gpu.module @test_module {
 //       CHECK: %[[C3:.*]] = llvm.fadd %[[A3]], %[[B3]]  : vector<2xf16>
 //       CHECK: %[[M4:.*]] = llvm.insertvalue %[[C3]], %[[M3]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK: llvm.return %[[M4]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
-  builtin.func @gpu_wmma_elementwise(%A : !gpu.mma_matrix<16x16xf16, "COp">, %B : !gpu.mma_matrix<16x16xf16, "COp">)  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
+  func.func @gpu_wmma_elementwise(%A : !gpu.mma_matrix<16x16xf16, "COp">, %B : !gpu.mma_matrix<16x16xf16, "COp">)  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
     %C = gpu.subgroup_mma_elementwise addf %A, %B : (!gpu.mma_matrix<16x16xf16, "COp">, !gpu.mma_matrix<16x16xf16, "COp">) -> !gpu.mma_matrix<16x16xf16, "COp">
     return %C : !gpu.mma_matrix<16x16xf16, "COp">
   }

diff  --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index 89c1ee0212b59..a50f5dd0314e6 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -4,7 +4,7 @@
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_index_ops()
   // CHECK32-LABEL: func @gpu_index_ops()
-  builtin.func @gpu_index_ops()
+  func.func @gpu_index_ops()
       -> (index, index, index, index, index, index,
           index, index, index, index, index, index) {
     // CHECK32-NOT: = llvm.sext %{{.*}} : i32 to i64
@@ -61,7 +61,7 @@ gpu.module @test_module {
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_index_comp
   // CHECK32-LABEL: func @gpu_index_comp
-  builtin.func @gpu_index_comp(%idx : index) -> index {
+  func.func @gpu_index_comp(%idx : index) -> index {
     // CHECK: = llvm.add %{{.*}}, %{{.*}} : i64
     // CHECK32: = llvm.add %{{.*}}, %{{.*}} : i32
     %0 = arith.addi %idx, %idx : index
@@ -75,7 +75,7 @@ gpu.module @test_module {
 
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_sync()
-  builtin.func @gpu_sync() {
+  func.func @gpu_sync() {
     // CHECK: rocdl.barrier
     gpu.barrier
     func.return
@@ -88,7 +88,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_fabs_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_fabs_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_fabs
-  builtin.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.abs %arg_f32 : f32
     // CHECK: llvm.call @__ocml_fabs_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.abs %arg_f64 : f64
@@ -103,7 +103,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_ceil_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_ceil_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_ceil
-  builtin.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.ceil %arg_f32 : f32
     // CHECK: llvm.call @__ocml_ceil_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.ceil %arg_f64 : f64
@@ -118,7 +118,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_floor_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_floor_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_floor
-  builtin.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.floor %arg_f32 : f32
     // CHECK: llvm.call @__ocml_floor_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.floor %arg_f64 : f64
@@ -133,7 +133,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_cos_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_cos_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_cos
-  builtin.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.cos %arg_f32 : f32
     // CHECK: llvm.call @__ocml_cos_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.cos %arg_f64 : f64
@@ -148,7 +148,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_exp_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_exp_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_exp
-  builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %exp_f32 = math.exp %arg_f32 : f32
     // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
     %result32 = math.exp %exp_f32 : f32
@@ -165,7 +165,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_exp2_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_exp2_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_exp2
-  builtin.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %exp2_f32 = math.exp2 %arg_f32 : f32
     // CHECK: llvm.call @__ocml_exp2_f32(%{{.*}}) : (f32) -> f32
     %result32 = math.exp2 %exp2_f32 : f32
@@ -185,7 +185,7 @@ gpu.module @test_module {
     // CHECK: llvm.func @__ocml_exp_f32(f32) -> f32
     // CHECK: llvm.func @__ocml_exp_f64(f64) -> f64
     // CHECK-LABEL: func @gpu_exp
-    builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+    func.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
       %exp_f32 = math.exp %arg_f32 : f32
       // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
       %result32 = math.exp %exp_f32 : f32
@@ -204,7 +204,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_expm1_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_expm1_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_expm1
-  builtin.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %expm1_f32 = math.expm1 %arg_f32 : f32
     // CHECK: llvm.call @__ocml_expm1_f32(%{{.*}}) : (f32) -> f32
     %result32 = math.expm1 %expm1_f32 : f32
@@ -221,7 +221,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_log_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_log_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_log
-  builtin.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log %arg_f32 : f32
     // CHECK: llvm.call @__ocml_log_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.log %arg_f64 : f64
@@ -236,7 +236,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_log1p_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_log1p_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_log1p
-  builtin.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log1p %arg_f32 : f32
     // CHECK: llvm.call @__ocml_log1p_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.log1p %arg_f64 : f64
@@ -251,7 +251,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_log10_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_log10_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_log10
-  builtin.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log10 %arg_f32 : f32
     // CHECK: llvm.call @__ocml_log10_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.log10 %arg_f64 : f64
@@ -266,7 +266,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_log2_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_log2_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_log2
-  builtin.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log2 %arg_f32 : f32
     // CHECK: llvm.call @__ocml_log2_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.log2 %arg_f64 : f64
@@ -281,7 +281,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_rsqrt_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_rsqrt_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_rsqrt
-  builtin.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.rsqrt %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -301,7 +301,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_sqrt_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_sqrt_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_sqrt
-  builtin.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.sqrt %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -321,7 +321,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_tanh_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_tanh_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_tanh
-  builtin.func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.tanh %arg_f32 : f32
     // CHECK: llvm.call @__ocml_tanh_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.tanh %arg_f64 : f64
@@ -336,7 +336,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_atan_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_atan_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_atan
-  builtin.func @gpu_atan(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_atan(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.atan %arg_f32 : f32
     // CHECK: llvm.call @__ocml_atan_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.atan %arg_f64 : f64
@@ -351,7 +351,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_atan2_f32(f32, f32) -> f32
   // CHECK: llvm.func @__ocml_atan2_f64(f64, f64) -> f64
   // CHECK-LABEL: func @gpu_atan2
-  builtin.func @gpu_atan2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_atan2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.atan2 %arg_f32, %arg_f32 : f32
     // CHECK: llvm.call @__ocml_atan2_f32(%{{.*}}) : (f32, f32) -> f32
     %result64 = math.atan2 %arg_f64, %arg_f64 : f64
@@ -366,7 +366,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_pow_f32(f32, f32) -> f32
   // CHECK: llvm.func @__ocml_pow_f64(f64, f64) -> f64
   // CHECK-LABEL: func @gpu_pow
-  builtin.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.powf %arg_f32, %arg_f32 : f32
     // CHECK: llvm.call @__ocml_pow_f32(%{{.*}}, %{{.*}}) : (f32, f32) -> f32
     %result64 = math.powf %arg_f64, %arg_f64 : f64

diff  --git a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir
index ab687fca536fa..78e31c32bb434 100644
--- a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir
+++ b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.func(convert-math-to-llvm)" | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(convert-math-to-llvm)" | FileCheck %s
 
 // CHECK-LABEL: @ops
 func @ops(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: i32, %arg4: f64) {

diff  --git a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
index 93beb56a0ce8d..1d6de6fe56acd 100644
--- a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
+++ b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-affine-for-to-gpu{gpu-block-dims=0 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-THREADS %s
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=0})" %s | FileCheck --check-prefix=CHECK-BLOCKS %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=0 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-THREADS %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=0})" %s | FileCheck --check-prefix=CHECK-BLOCKS %s
 
 // CHECK-THREADS-LABEL: @one_d_loop
 // CHECK-BLOCKS-LABEL: @one_d_loop

diff  --git a/mlir/test/Conversion/SCFToGPU/step_one.mlir b/mlir/test/Conversion/SCFToGPU/step_one.mlir
index 4acd669160ab0..5a571401cf07c 100644
--- a/mlir/test/Conversion/SCFToGPU/step_one.mlir
+++ b/mlir/test/Conversion/SCFToGPU/step_one.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-11 %s
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-affine-for-to-gpu{gpu-block-dims=2 gpu-thread-dims=2})" %s | FileCheck --check-prefix=CHECK-22 %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-11 %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=2 gpu-thread-dims=2})" %s | FileCheck --check-prefix=CHECK-22 %s
 
 // CHECK-11-LABEL: @step_1
 // CHECK-22-LABEL: @step_1

diff  --git a/mlir/test/Conversion/SCFToGPU/step_positive.mlir b/mlir/test/Conversion/SCFToGPU/step_positive.mlir
index 29987eb29a7ba..87e59c6264dc0 100644
--- a/mlir/test/Conversion/SCFToGPU/step_positive.mlir
+++ b/mlir/test/Conversion/SCFToGPU/step_positive.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck %s
 
 // CHECK-LABEL: @step_var
 func @step_var(%A : memref<?x?xf32>, %B : memref<?x?xf32>) {

diff  --git a/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir b/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir
index eda81bdaa2d2c..7624ab8119b9f 100644
--- a/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir
+++ b/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-shape-constraints)" <%s | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-shape-constraints)" <%s | FileCheck %s
 
 // There's not very much useful to check here other than pasting the output.
 // CHECK-LABEL:   func @cstr_broadcastable(

diff  --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
index 6d568183494b6..d769c5e96250e 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt --split-input-file -pass-pipeline="builtin.func(tosa-to-linalg-named)" %s -verify-diagnostics -o -| FileCheck %s
+// RUN: mlir-opt --split-input-file -pass-pipeline="func.func(tosa-to-linalg-named)" %s -verify-diagnostics -o -| FileCheck %s
 
 // CHECK-LABEL: @matmul
 func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) {

diff  --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
index fa2f7b82af243..9ce5377f1eb19 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt --split-input-file -pass-pipeline="builtin.func(tosa-to-linalg)" %s -verify-diagnostics -o -| FileCheck %s
+// RUN: mlir-opt --split-input-file -pass-pipeline="func.func(tosa-to-linalg)" %s -verify-diagnostics -o -| FileCheck %s
 
 // CHECK: #[[$MAP0:.*]] = affine_map<() -> ()>
 

diff  --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
index 3c83716812e4f..bb309c5363421 100644
--- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
+++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-gpu)" -canonicalize | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-gpu)" -canonicalize | FileCheck %s
 
 #map0 = affine_map<(d0, d1) -> (d1, d0)>
 #map1 = affine_map<(d0, d1, d2) -> (d0, d2)>

diff  --git a/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir b/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
index b98239bfcda66..c10ef3867da3e 100644
--- a/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
+++ b/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -convert-vector-to-rocdl | FileCheck %s
 
 gpu.module @test_read{
-builtin.func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32> {
+func.func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32> {
   %f0 = arith.constant 0.0: f32
   %f = vector.transfer_read %A[%base], %f0
       {permutation_map = affine_map<(d0) -> (d0)>} :
@@ -11,7 +11,7 @@ builtin.func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32>
 // CHECK-LABEL: @transfer_readx2
 // CHECK: rocdl.buffer.load {{.*}} vector<2xf32>
 
-builtin.func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
+func.func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
   %f0 = arith.constant 0.0: f32
   %f = vector.transfer_read %A[%base], %f0
       {permutation_map = affine_map<(d0) -> (d0)>} :
@@ -21,7 +21,7 @@ builtin.func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32>
 // CHECK-LABEL: @transfer_readx4
 // CHECK: rocdl.buffer.load {{.*}} vector<4xf32>
 
-builtin.func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
+func.func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
   %f0 = arith.constant 0.0: f32
   %f = vector.transfer_read %A[%base], %f0
       {permutation_map = affine_map<(d0) -> (d0)>} :
@@ -36,7 +36,7 @@ builtin.func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vec
 }
 
 gpu.module @test_write{
-builtin.func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
+func.func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
   vector.transfer_write %B, %A[%base]
       {permutation_map = affine_map<(d0) -> (d0)>} :
     vector<2xf32>, memref<?xf32>
@@ -45,7 +45,7 @@ builtin.func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: in
 // CHECK-LABEL: @transfer_writex2
 // CHECK: rocdl.buffer.store {{.*}} vector<2xf32>
 
-builtin.func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
+func.func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
   vector.transfer_write %B, %A[%base]
       {permutation_map = affine_map<(d0) -> (d0)>} :
     vector<4xf32>, memref<?xf32>
@@ -54,7 +54,7 @@ builtin.func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: in
 // CHECK-LABEL: @transfer_writex4
 // CHECK: rocdl.buffer.store {{.*}} vector<4xf32>
 
-builtin.func @transfer_write_dwordConfig(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
+func.func @transfer_write_dwordConfig(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
   vector.transfer_write %B, %A[%base]
       {permutation_map = affine_map<(d0) -> (d0)>} :
     vector<2xf32>, memref<?xf32>

diff  --git a/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir b/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir
index ee373b2597200..539828d1bbcb1 100644
--- a/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir
+++ b/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{lower-tensors=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-tensors=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s
 
 // CHECK-LABEL: func @transfer_read_2d(
 //       CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<vector<4x9xf32>>

diff  --git a/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir b/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir
index 7a3ad16b7a815..c03253d4cba6a 100644
--- a/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir
+++ b/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{full-unroll=true lower-tensors=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-tensors=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s
 
 // CHECK-LABEL: func @transfer_read_2d(
 //       CHECK: %[[V_INIT:.*]] = arith.constant dense<-4.200000e+01> : vector<4x9xf32>

diff  --git a/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir
index 0b9db046207ba..e17588be0592b 100644
--- a/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{full-unroll=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s
 
 // CHECK-LABEL: func @transfer_read_inbounds
 func @transfer_read_inbounds(%A : memref<?x?x?xf32>) -> (vector<2x3x4xf32>) {

diff  --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir
index 781f0bd470364..035814b11c6cb 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{lower-permutation-maps=true})" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true})" -split-input-file | FileCheck %s
 
 // Ensure that the permutation map is lowered (by inserting a transpose op)
 // before lowering the vector.transfer_read.

diff  --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
index 8cbac26486e2f..471d3992cf5ae 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf)" -split-input-file -allow-unregistered-dialect | FileCheck %s
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{full-unroll=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s --check-prefix=FULL-UNROLL
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf)" -split-input-file -allow-unregistered-dialect | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s --check-prefix=FULL-UNROLL
 
 // CHECK-LABEL: func @vector_transfer_ops_0d(
 func @vector_transfer_ops_0d(%M: memref<f32>) {

diff  --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir
index cd6b08a7bab11..d0e9d1dbf95d5 100644
--- a/mlir/test/Dialect/Affine/canonicalize.mlir
+++ b/mlir/test/Dialect/Affine/canonicalize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='builtin.func(canonicalize)' | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s
 
 // -----
 

diff  --git a/mlir/test/Dialect/Affine/loop-unswitch.mlir b/mlir/test/Dialect/Affine/loop-unswitch.mlir
index e7c3a7d585090..1ba3d30bdc6ba 100644
--- a/mlir/test/Dialect/Affine/loop-unswitch.mlir
+++ b/mlir/test/Dialect/Affine/loop-unswitch.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.func(test-affine-loop-unswitch)" | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(test-affine-loop-unswitch)" | FileCheck %s
 
 // CHECK-DAG: #[[$SET:.*]] = affine_set<(d0) : (d0 - 2 >= 0)>
 

diff  --git a/mlir/test/Dialect/Affine/memref-stride-calculation.mlir b/mlir/test/Dialect/Affine/memref-stride-calculation.mlir
index 06efd13f86ead..1b39756746bc8 100644
--- a/mlir/test/Dialect/Affine/memref-stride-calculation.mlir
+++ b/mlir/test/Dialect/Affine/memref-stride-calculation.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-memref-stride-calculation)" -o /dev/null | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-memref-stride-calculation)" -o /dev/null | FileCheck %s
 
 func @f(%0: index) {
 // CHECK-LABEL: Testing: f

diff  --git a/mlir/test/Dialect/ControlFlow/canonicalize.mlir b/mlir/test/Dialect/ControlFlow/canonicalize.mlir
index 528fc2c16bc90..2e6a6b8e4a9eb 100644
--- a/mlir/test/Dialect/ControlFlow/canonicalize.mlir
+++ b/mlir/test/Dialect/ControlFlow/canonicalize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline='builtin.func(canonicalize)' -split-input-file | FileCheck --dump-input-context 20 %s
+// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck --dump-input-context 20 %s
 
 /// Test the folding of BranchOp.
 

diff  --git a/mlir/test/Dialect/LLVMIR/terminator.mlir b/mlir/test/Dialect/LLVMIR/terminator.mlir
index 04c9eb0f567d2..4902e89ff0beb 100644
--- a/mlir/test/Dialect/LLVMIR/terminator.mlir
+++ b/mlir/test/Dialect/LLVMIR/terminator.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline='builtin.func(canonicalize)' %s | FileCheck %s
+// RUN: mlir-opt -pass-pipeline='func.func(canonicalize)' %s | FileCheck %s
 // verify that terminators survive the canonicalizer
 
 // CHECK-LABEL: @return

diff  --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
index 2324afb67ad63..b12eee16cb26a 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
@@ -784,7 +784,7 @@ func @write_into_constant_via_alias(%v : vector<5xi32>,
 
 // -----
 
-builtin.func @matmul_on_tensors(
+func.func @matmul_on_tensors(
     %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
     %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
     %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
@@ -822,7 +822,7 @@ builtin.func @matmul_on_tensors(
 
 // -----
 
-builtin.func @matmul_on_tensors(
+func.func @matmul_on_tensors(
     %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
     %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
     %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})

diff  --git a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
index cab6f5d5983bc..3fafc6e86d570 100644
--- a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
+++ b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-elementwise-to-linalg)" -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-elementwise-to-linalg)" -split-input-file %s | FileCheck %s
 
 // In-depth checking of the linalg.generic op for a very trivial case.
 // CHECK: #[[$MAP:.*]] = affine_map<() -> ()>

diff  --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
index ccab913606773..33ab4126b5894 100644
--- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="builtin.func(linalg-detensorize{aggressive-mode})" | FileCheck %s
+// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s
 
 #map = affine_map<() -> ()>
 

diff  --git a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
index b68c05b16c2f3..0c7e3e9322561 100644
--- a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="builtin.func(linalg-detensorize)" | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s
 
 // TODO: Detensoring breaks if %arg0 or %arg1 are passed directly as tensors. Fix that.
 func @if_true_test(%arg0: i1, %arg1: i32) -> tensor<i32> attributes {} {

diff  --git a/mlir/test/Dialect/Linalg/detensorize_if.mlir b/mlir/test/Dialect/Linalg/detensorize_if.mlir
index d53d1797ca67f..1fcd7f6c7a8a6 100644
--- a/mlir/test/Dialect/Linalg/detensorize_if.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_if.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="builtin.func(linalg-detensorize)" | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s
 
 #map0 = affine_map<() -> ()>
 

diff  --git a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
index 76b99d916acb1..ad851a736d248 100644
--- a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF
+// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL
+// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF
 
 
 #map0 = affine_map<() -> ()>

diff  --git a/mlir/test/Dialect/Linalg/detensorize_while.mlir b/mlir/test/Dialect/Linalg/detensorize_while.mlir
index b90e6926d68af..44c38120f6e53 100644
--- a/mlir/test/Dialect/Linalg/detensorize_while.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF
+// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL
+// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF
 
 #map0 = affine_map<() -> ()>
 

diff  --git a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
index 6200a13e3c7ce..0acb82cefd8ab 100644
--- a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF
+// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL
+// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF
 
 #map0 = affine_map<() -> ()>
 #map1 = affine_map<(i) -> ()>

diff  --git a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
index d0c22f396275c..993d52225f37e 100644
--- a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="builtin.func(linalg-detensorize)" | FileCheck %s
+// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s
 
 #map0 = affine_map<() -> ()>
 

diff  --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
index ef1ed2618012e..ca3bf0aa61880 100644
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.func(linalg-fold-unit-extent-dims)" | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(linalg-fold-unit-extent-dims)" | FileCheck %s
 
 #accesses = [
   affine_map<(i, j, k, l, m) -> (i, k, m)>,

diff  --git a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
index 123625c96a49d..4576cf0d8f5b8 100644
--- a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
+++ b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.func(linalg-fold-unit-extent-dims{fold-one-trip-loops-only})" | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(linalg-fold-unit-extent-dims{fold-one-trip-loops-only})" | FileCheck %s
 
 #accesses = [
   affine_map<(i, j, k, l, m) -> (i, k, m)>,

diff  --git a/mlir/test/Dialect/Linalg/fusion-sequence.mlir b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
index 10dc8d3fcb7d2..37605503af4d2 100644
--- a/mlir/test/Dialect/Linalg/fusion-sequence.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(test-linalg-tile-and-fuse{tile-sizes=16,32,64}),resolve-shaped-type-result-dims,canonicalize,cse" -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(test-linalg-tile-and-fuse{tile-sizes=16,32,64}),resolve-shaped-type-result-dims,canonicalize,cse" -split-input-file %s | FileCheck %s
 
 module {
   func @three_op_fusion(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>,

diff  --git a/mlir/test/Dialect/Linalg/tile-and-fuse-no-fuse.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-no-fuse.mlir
index 7ad921b582c96..85c6cca7e366b 100644
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-no-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-no-fuse.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul fuse tile-sizes=0,0,0 run-enable-pass=false" -split-input-file | FileCheck %s
 
-builtin.func @no_fuse_gemm(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
+func.func @no_fuse_gemm(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %cst = arith.constant 0.0 : f32

diff  --git a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
index d3f670d2b67b9..0126ad1a026e2 100644
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
@@ -8,7 +8,7 @@
 
 //      MATMUL:  fuse_input
 // MATMUL-SAME:    %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>
-builtin.func @fuse_input(%arg0: tensor<24x12xf32>,
+func.func @fuse_input(%arg0: tensor<24x12xf32>,
                          %arg1: tensor<12x25xf32>,
                          %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
   %c0 = arith.constant 0 : index
@@ -44,7 +44,7 @@ builtin.func @fuse_input(%arg0: tensor<24x12xf32>,
 
 //      MATMUL:  fuse_output
 // MATMUL-SAME:    %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>
-builtin.func @fuse_output(%arg0: tensor<24x12xf32>,
+func.func @fuse_output(%arg0: tensor<24x12xf32>,
                           %arg1: tensor<12x25xf32>,
                           %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
   //  MATMUL-DAG:  %[[C0:.*]] = arith.constant 0 : index
@@ -96,7 +96,7 @@ builtin.func @fuse_output(%arg0: tensor<24x12xf32>,
 //      MATMUL:  fuse_reduction
 // MATMUL-SAME:    %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>
 // MATMUL-SAME:    %[[ARG3:[0-9a-zA-Z]*]]: tensor<12x7x25xf32>
-builtin.func @fuse_reduction(%arg0: tensor<24x12xf32>,
+func.func @fuse_reduction(%arg0: tensor<24x12xf32>,
                              %arg1: tensor<12x25xf32>,
                              %arg2: tensor<24x25xf32>,
                              %arg3: tensor<12x7x25xf32>) -> tensor<24x25xf32> {
@@ -140,7 +140,7 @@ builtin.func @fuse_reduction(%arg0: tensor<24x12xf32>,
 //      MATMUL:  fuse_transposed
 // MATMUL-SAME:    %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>
 // MATMUL-SAME:    %[[ARG3:[0-9a-zA-Z]*]]: tensor<12x24xf32>
-builtin.func @fuse_transposed(%arg0: tensor<24x12xf32>,
+func.func @fuse_transposed(%arg0: tensor<24x12xf32>,
                               %arg1: tensor<12x25xf32>,
                               %arg2: tensor<24x25xf32>,
                               %arg3: tensor<12x24xf32>) -> tensor<24x25xf32> {
@@ -175,7 +175,7 @@ builtin.func @fuse_transposed(%arg0: tensor<24x12xf32>,
 //      MATMUL:  fuse_input_and_output
 // MATMUL-SAME:    %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>
 // MATMUL-SAME:    %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>
-builtin.func @fuse_input_and_output(%arg0: tensor<24x12xf32>,
+func.func @fuse_input_and_output(%arg0: tensor<24x12xf32>,
                                     %arg1: tensor<12x25xf32>,
                                     %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
   %c0 = arith.constant 0 : index
@@ -210,7 +210,7 @@ builtin.func @fuse_input_and_output(%arg0: tensor<24x12xf32>,
 
 //      MATMUL:  fuse_indexed
 // MATMUL-SAME:    %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xi32>
-builtin.func @fuse_indexed(%arg0: tensor<24x12xi32>,
+func.func @fuse_indexed(%arg0: tensor<24x12xi32>,
                            %arg1: tensor<12x25xi32>,
                            %arg2: tensor<24x25xi32>) -> tensor<24x25xi32> {
   %c0 = arith.constant 0 : index

diff  --git a/mlir/test/Dialect/Linalg/tile-and-fuse-sequence-on-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-sequence-on-tensors.mlir
index 5de6c1ad84e1a..67b2c606f3648 100644
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-sequence-on-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-sequence-on-tensors.mlir
@@ -7,7 +7,7 @@
 // CONV-SAME:    %[[ARG2:[0-9a-zA-Z]*]]: tensor<10x10xf32>
 // CONV-SAME:    %[[ARG3:[0-9a-zA-Z]*]]: tensor<9x9xf32>
 // CONV-SAME:    %[[ARG4:[0-9a-zA-Z]*]]: tensor<8x8xf32>
-builtin.func @fuse_conv_chain(%arg0: tensor<2x2xf32>,
+func.func @fuse_conv_chain(%arg0: tensor<2x2xf32>,
                               %arg1: tensor<11x11xf32>,
                               %arg2: tensor<10x10xf32>,
                               %arg3: tensor<9x9xf32>,
@@ -52,7 +52,7 @@ builtin.func @fuse_conv_chain(%arg0: tensor<2x2xf32>,
 
 //      MATMUL:  fuse_matmul_chain
 // MATMUL-SAME:    %[[ARG0:[0-9a-zA-Z]*]]: tensor<8x8xf32>
-builtin.func @fuse_matmul_chain(%arg0: tensor<8x8xf32>) -> tensor<8x8xf32> {
+func.func @fuse_matmul_chain(%arg0: tensor<8x8xf32>) -> tensor<8x8xf32> {
   %c0 = arith.constant 0 : index
   %c12 = arith.constant 12 : index
   %c25 = arith.constant 25 : index

diff  --git a/mlir/test/Dialect/Quant/canonicalize.mlir b/mlir/test/Dialect/Quant/canonicalize.mlir
index 5102d0399e804..8bca8abe92017 100644
--- a/mlir/test/Dialect/Quant/canonicalize.mlir
+++ b/mlir/test/Dialect/Quant/canonicalize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -pass-pipeline='builtin.func(canonicalize)' | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s
 
 // -----
 // CHECK-LABEL: redundant_scast

diff  --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir
index 86c478ec4eb68..955f6bbac7b61 100644
--- a/mlir/test/Dialect/SCF/canonicalize.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(canonicalize)' -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck %s
 
 
 // -----

diff  --git a/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir b/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir
index be823defd6789..4187ae591a5f4 100644
--- a/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir
+++ b/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(scf-for-to-while)' -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(scf-for-to-while)' -split-input-file | FileCheck %s
 // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
 
 // CHECK-LABEL:   func @single_loop(

diff  --git a/mlir/test/Dialect/SCF/loop-range.mlir b/mlir/test/Dialect/SCF/loop-range.mlir
index 9fe80079e2450..ab344d04ffae3 100644
--- a/mlir/test/Dialect/SCF/loop-range.mlir
+++ b/mlir/test/Dialect/SCF/loop-range.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(scf-for-loop-range-folding)' -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(scf-for-loop-range-folding)' -split-input-file | FileCheck %s
 
 func @fold_one_loop(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
   %c0 = arith.constant 0 : index

diff  --git a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
index 2dc78aa36f61a..e082b85750056 100644
--- a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.func(scf-parallel-loop-fusion)' -split-input-file | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(scf-parallel-loop-fusion)' -split-input-file | FileCheck %s
 
 func @fuse_empty_loops() {
   %c2 = arith.constant 2 : index

diff  --git a/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir
index 53a51de2d576a..6c4cef3d4fbde 100644
--- a/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4 no-min-max-bounds=true})' -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4 no-min-max-bounds=true})' -split-input-file | FileCheck %s
 
 func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                     %arg3 : index, %arg4 : index, %arg5 : index,

diff  --git a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
index 7939dce71dc39..e3e21cf68e355 100644
--- a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4})' -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4})' -split-input-file | FileCheck %s
 
 func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                     %arg3 : index, %arg4 : index, %arg5 : index,

diff  --git a/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir b/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir
index 37553fd4f23a0..7b0fa444817a1 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -pass-pipeline='builtin.func(canonicalize)' | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s
 
 //===----------------------------------------------------------------------===//
 // spv.AccessChain

diff  --git a/mlir/test/Dialect/Shape/invalid.mlir b/mlir/test/Dialect/Shape/invalid.mlir
index 648786e88c6eb..37a6e87d67bde 100644
--- a/mlir/test/Dialect/Shape/invalid.mlir
+++ b/mlir/test/Dialect/Shape/invalid.mlir
@@ -172,7 +172,7 @@ module attributes {shape.lib = [@shape_lib, "shape_lib"]} {
 
 shape.function_library @shape_lib {
   // Test shape function that returns the shape of input arg as result shape.
-  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  func.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
     %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
     return %0 : !shape.shape
   }
@@ -192,7 +192,7 @@ module attributes {shape.lib = [@shape_lib, @shape_lib]} {
 
 shape.function_library @shape_lib {
   // Test shape function that returns the shape of input arg as result shape.
-  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  func.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
     %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
     return %0 : !shape.shape
   }
@@ -212,7 +212,7 @@ module attributes {shape.lib = [@shape_lib]} {
 
 shape.function_library @shape_lib {
   // Test shape function that returns the shape of input arg as result shape.
-  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  func.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
     %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
     return %0 : !shape.shape
   }

diff  --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir
index a614d2228e11f..2a60919378770 100644
--- a/mlir/test/Dialect/Tensor/invalid.mlir
+++ b/mlir/test/Dialect/Tensor/invalid.mlir
@@ -91,7 +91,7 @@ func @tensor.generate(%m : index, %n : index)
 
 func @tensor.generate(%m : index, %n : index)
     -> tensor<?x3x?xf32> {
-  // expected-error @+4 {{'func.return' op expects parent op 'builtin.func'}}
+  // expected-error @+4 {{'func.return' op expects parent op 'func.func'}}
   %tnsr = tensor.generate %m, %n {
     ^bb0(%i : index, %j : index, %k : index):
       %elem = arith.constant 8.0 : f32

diff  --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index 522f8dea8b470..9647fb018bcaa 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(canonicalize)' -split-input-file -allow-unregistered-dialect | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' -split-input-file -allow-unregistered-dialect | FileCheck %s
 
 // -----
 

diff  --git a/mlir/test/IR/diagnostic-handler-filter.mlir b/mlir/test/IR/diagnostic-handler-filter.mlir
index 8630dbe492888..940155e8bdfdf 100644
--- a/mlir/test/IR/diagnostic-handler-filter.mlir
+++ b/mlir/test/IR/diagnostic-handler-filter.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-diagnostic-filter{filters=mysource1})" -split-input-file -o - 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-diagnostic-filter{filters=mysource1})" -split-input-file -o - 2>&1 | FileCheck %s
 // This test verifies that diagnostic handler can emit the call stack successfully.
 
 // CHECK-LABEL: Test 'test1'

diff  --git a/mlir/test/IR/generic-visitors-interrupt.mlir b/mlir/test/IR/generic-visitors-interrupt.mlir
index 35099f0e7a328..b07d72c052b09 100644
--- a/mlir/test/IR/generic-visitors-interrupt.mlir
+++ b/mlir/test/IR/generic-visitors-interrupt.mlir
@@ -8,7 +8,7 @@ func @main(%arg0: f32) -> f32 {
 }
 
 // CHECK: step 0 op 'builtin.module' before all regions
-// CHECK: step 1 op 'builtin.func' before all regions
+// CHECK: step 1 op 'func.func' before all regions
 // CHECK: step 2 walk was interrupted
 
 // -----
@@ -21,7 +21,7 @@ func @main(%arg0: f32) -> f32 {
 }
 
 // CHECK: step 0 op 'builtin.module' before all regions
-// CHECK: step 1 op 'builtin.func' before all regions
+// CHECK: step 1 op 'func.func' before all regions
 // CHECK: step 2 op 'foo' before all regions
 // CHECK: step 3 op 'bar' before all regions
 // CHECK: step 4 walk was interrupted
@@ -40,7 +40,7 @@ func @main(%arg0: f32) -> f32 {
 }
 
 // CHECK: step 0 op 'builtin.module' before all regions
-// CHECK: step 1 op 'builtin.func' before all regions
+// CHECK: step 1 op 'func.func' before all regions
 // CHECK: step 2 op 'foo' before all regions
 // CHECK: step 3 op 'bar0' before all regions
 // CHECK: step 4 walk was interrupted
@@ -59,7 +59,7 @@ func @main() {
 }
 
 // CHECK: step 0 op 'builtin.module' before all regions
-// CHECK: step 1 op 'builtin.func' before all regions
+// CHECK: step 1 op 'func.func' before all regions
 // CHECK: step 2 op 'foo' before all regions
 // CHECK: step 3 op 'test.two_region_op' before all regions
 // CHECK: step 4 op 'work' before all regions
@@ -83,7 +83,7 @@ func @main() {
 }
 
 // CHECK: step 0 op 'builtin.module' before all regions
-// CHECK: step 1 op 'builtin.func' before all regions
+// CHECK: step 1 op 'func.func' before all regions
 // CHECK: step 2 op 'foo' before all regions
 // CHECK: step 3 op 'test.two_region_op' before all regions
 // CHECK: step 4 op 'work' before all regions
@@ -106,10 +106,10 @@ func @main(%arg0: f32) -> f32 {
 }
 
 // CHECK: step 0 op 'builtin.module' before all regions
-// CHECK: step 1 op 'builtin.func' before all regions
+// CHECK: step 1 op 'func.func' before all regions
 // CHECK: step 2 op 'arith.addf' before all regions
 // CHECK: step 3 op 'func.return' before all regions
-// CHECK: step 4 op 'builtin.func' after all regions
+// CHECK: step 4 op 'func.func' after all regions
 // CHECK: step 5 op 'builtin.module' after all regions
 
 // -----
@@ -125,14 +125,14 @@ func @main(%arg0: f32) -> f32 {
 }
 
 // CHECK: step 0 op 'builtin.module' before all regions
-// CHECK: step 1 op 'builtin.func' before all regions
+// CHECK: step 1 op 'func.func' before all regions
 // CHECK: step 2 op 'foo' before all regions
 // CHECK: step 3 op 'bar0' before all regions
 // CHECK: step 4 op 'foo' before region #1
 // CHECK: step 5 op 'bar1' before all regions
 // CHECK: step 6 op 'arith.addf' before all regions
 // CHECK: step 7 op 'func.return' before all regions
-// CHECK: step 8 op 'builtin.func' after all regions
+// CHECK: step 8 op 'func.func' after all regions
 // CHECK: step 9 op 'builtin.module' after all regions
 
 // -----
@@ -148,10 +148,10 @@ func @main(%arg0: f32) -> f32 {
 }
 
 // CHECK: step 0 op 'builtin.module' before all regions
-// CHECK: step 1 op 'builtin.func' before all regions
+// CHECK: step 1 op 'func.func' before all regions
 // CHECK: step 2 op 'foo' before all regions
 // CHECK: step 3 op 'bar0' before all regions
 // CHECK: step 4 op 'arith.addf' before all regions
 // CHECK: step 5 op 'func.return' before all regions
-// CHECK: step 6 op 'builtin.func' after all regions
+// CHECK: step 6 op 'func.func' after all regions
 // CHECK: step 7 op 'builtin.module' after all regions

diff  --git a/mlir/test/IR/generic-visitors.mlir b/mlir/test/IR/generic-visitors.mlir
index b14cf93bc34e5..71f6d911a670b 100644
--- a/mlir/test/IR/generic-visitors.mlir
+++ b/mlir/test/IR/generic-visitors.mlir
@@ -20,7 +20,7 @@ func @structured_cfg() {
 }
 
 // CHECK: step 0 op 'builtin.module' before all regions
-// CHECK: step 1 op 'builtin.func' before all regions
+// CHECK: step 1 op 'func.func' before all regions
 // CHECK: step 2 op 'arith.constant' before all regions
 // CHECK: step 3 op 'arith.constant' before all regions
 // CHECK: step 4 op 'arith.constant' before all regions
@@ -37,7 +37,7 @@ func @structured_cfg() {
 // CHECK: step 15 op 'scf.yield' before all regions
 // CHECK: step 16 op 'scf.for' after all regions
 // CHECK: step 17 op 'func.return' before all regions
-// CHECK: step 18 op 'builtin.func' after all regions
+// CHECK: step 18 op 'func.func' after all regions
 // CHECK: step 19 op 'builtin.module' after all regions
 
 // -----

diff  --git a/mlir/test/IR/invalid-func-op.mlir b/mlir/test/IR/invalid-func-op.mlir
index c64992dfd84e6..903c65a4c4857 100644
--- a/mlir/test/IR/invalid-func-op.mlir
+++ b/mlir/test/IR/invalid-func-op.mlir
@@ -4,7 +4,7 @@
 
 func @func_op() {
   // expected-error at +1 {{expected valid '@'-identifier for symbol name}}
-  builtin.func missingsigil() -> (i1, index, f32)
+  func.func missingsigil() -> (i1, index, f32)
   return
 }
 
@@ -12,7 +12,7 @@ func @func_op() {
 
 func @func_op() {
   // expected-error at +1 {{expected type instead of SSA identifier}}
-  builtin.func @mixed_named_arguments(f32, %a : i32) {
+  func.func @mixed_named_arguments(f32, %a : i32) {
     return
   }
   return
@@ -22,7 +22,7 @@ func @func_op() {
 
 func @func_op() {
   // expected-error at +1 {{expected SSA identifier}}
-  builtin.func @mixed_named_arguments(%a : i32, f32) -> () {
+  func.func @mixed_named_arguments(%a : i32, f32) -> () {
     return
   }
   return
@@ -32,7 +32,7 @@ func @func_op() {
 
 func @func_op() {
   // expected-error at +1 {{entry block must have 1 arguments to match function signature}}
-  builtin.func @mixed_named_arguments(f32) {
+  func.func @mixed_named_arguments(f32) {
   ^entry:
     return
   }
@@ -43,7 +43,7 @@ func @func_op() {
 
 func @func_op() {
   // expected-error at +1 {{type of entry block argument #0('i32') must match the type of the corresponding argument in function signature('f32')}}
-  builtin.func @mixed_named_arguments(f32) {
+  func.func @mixed_named_arguments(f32) {
   ^entry(%arg : i32):
     return
   }

diff  --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir
index b5a0f1056c7d5..d3f9672dd4f71 100644
--- a/mlir/test/IR/invalid-ops.mlir
+++ b/mlir/test/IR/invalid-ops.mlir
@@ -98,7 +98,7 @@ func @func_with_ops(tensor<12xi1>, tensor<42xi32>, tensor<42xi32>) {
 
 func @return_not_in_function() {
   "foo.region"() ({
-    // expected-error at +1 {{'func.return' op expects parent op 'builtin.func'}}
+    // expected-error at +1 {{'func.return' op expects parent op 'func.func'}}
     return
   }): () -> ()
   return

diff  --git a/mlir/test/IR/invalid.mlir b/mlir/test/IR/invalid.mlir
index 26d67c1a002cc..83d3a5d39a99f 100644
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@@ -542,7 +542,7 @@ func @return_type_mismatch() -> i32 {
 
 func @return_inside_loop() {
   affine.for %i = 1 to 100 {
-    // expected-error at +1 {{'func.return' op expects parent op 'builtin.func'}}
+    // expected-error at +1 {{'func.return' op expects parent op 'func.func'}}
     return
   }
   return

diff  --git a/mlir/test/IR/print-ir-invalid.mlir b/mlir/test/IR/print-ir-invalid.mlir
index e83353433c7e4..70a0b90d53683 100644
--- a/mlir/test/IR/print-ir-invalid.mlir
+++ b/mlir/test/IR/print-ir-invalid.mlir
@@ -8,7 +8,7 @@ module {}
 // The operation is invalid because the body does not have a terminator, print
 // the generic form.
 // CHECK:      Invalid operation:
-// CHECK-NEXT: "builtin.func"() ({
+// CHECK-NEXT: "func.func"() ({
 // CHECK-NEXT: ^bb0:
 // CHECK-NEXT: })
 // CHECK-SAME: sym_name = "test"

diff  --git a/mlir/test/IR/test-matchers.mlir b/mlir/test/IR/test-matchers.mlir
index 948352494ee56..5f53cd2dc8eb5 100644
--- a/mlir/test/IR/test-matchers.mlir
+++ b/mlir/test/IR/test-matchers.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline="builtin.func(test-matchers)" -o /dev/null 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline="func.func(test-matchers)" -o /dev/null 2>&1 | FileCheck %s
 
 func @test1(%a: f32, %b: f32, %c: f32) {
   %0 = arith.addf %a, %b: f32

diff  --git a/mlir/test/IR/traits.mlir b/mlir/test/IR/traits.mlir
index 7432069ee13dc..2bd0ca6f3f020 100644
--- a/mlir/test/IR/traits.mlir
+++ b/mlir/test/IR/traits.mlir
@@ -575,7 +575,7 @@ func @failedHasDominanceScopeOutsideDominanceFreeScope() -> () {
 // checked for dominance
 func @illegalInsideDominanceFreeScope() -> () {
   test.graph_region {
-    builtin.func @test() -> i1 {
+    func.func @test() -> i1 {
     ^bb1:
       // expected-error @+1 {{operand #0 does not dominate this use}}
       %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
@@ -594,7 +594,7 @@ func @illegalInsideDominanceFreeScope() -> () {
 // checked for dominance
 func @illegalCDFGInsideDominanceFreeScope() -> () {
   test.graph_region {
-    builtin.func @test() -> i1 {
+    func.func @test() -> i1 {
     ^bb1:
       // expected-error @+1 {{operand #0 does not dominate this use}}
       %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)

diff  --git a/mlir/test/IR/visitors.mlir b/mlir/test/IR/visitors.mlir
index 189b305bbe58d..29284edab3698 100644
--- a/mlir/test/IR/visitors.mlir
+++ b/mlir/test/IR/visitors.mlir
@@ -23,7 +23,7 @@ func @structured_cfg() {
 
 // CHECK-LABEL: Op pre-order visit
 // CHECK:       Visiting op 'builtin.module'
-// CHECK:       Visiting op 'builtin.func'
+// CHECK:       Visiting op 'func.func'
 // CHECK:       Visiting op 'scf.for'
 // CHECK:       Visiting op 'use0'
 // CHECK:       Visiting op 'scf.if'
@@ -34,14 +34,14 @@ func @structured_cfg() {
 
 // CHECK-LABEL: Block pre-order visits
 // CHECK:       Visiting block ^bb0 from region 0 from operation 'builtin.module'
-// CHECK:       Visiting block ^bb0 from region 0 from operation 'builtin.func'
+// CHECK:       Visiting block ^bb0 from region 0 from operation 'func.func'
 // CHECK:       Visiting block ^bb0 from region 0 from operation 'scf.for'
 // CHECK:       Visiting block ^bb0 from region 0 from operation 'scf.if'
 // CHECK:       Visiting block ^bb0 from region 1 from operation 'scf.if'
 
 // CHECK-LABEL: Region pre-order visits
 // CHECK:       Visiting region 0 from operation 'builtin.module'
-// CHECK:       Visiting region 0 from operation 'builtin.func'
+// CHECK:       Visiting region 0 from operation 'func.func'
 // CHECK:       Visiting region 0 from operation 'scf.for'
 // CHECK:       Visiting region 0 from operation 'scf.if'
 // CHECK:       Visiting region 1 from operation 'scf.if'
@@ -54,21 +54,21 @@ func @structured_cfg() {
 // CHECK:       Visiting op 'use3'
 // CHECK:       Visiting op 'scf.for'
 // CHECK:       Visiting op 'func.return'
-// CHECK:       Visiting op 'builtin.func'
+// CHECK:       Visiting op 'func.func'
 // CHECK:       Visiting op 'builtin.module'
 
 // CHECK-LABEL: Block post-order visits
 // CHECK:       Visiting block ^bb0 from region 0 from operation 'scf.if'
 // CHECK:       Visiting block ^bb0 from region 1 from operation 'scf.if'
 // CHECK:       Visiting block ^bb0 from region 0 from operation 'scf.for'
-// CHECK:       Visiting block ^bb0 from region 0 from operation 'builtin.func'
+// CHECK:       Visiting block ^bb0 from region 0 from operation 'func.func'
 // CHECK:       Visiting block ^bb0 from region 0 from operation 'builtin.module'
 
 // CHECK-LABEL: Region post-order visits
 // CHECK:       Visiting region 0 from operation 'scf.if'
 // CHECK:       Visiting region 1 from operation 'scf.if'
 // CHECK:       Visiting region 0 from operation 'scf.for'
-// CHECK:       Visiting region 0 from operation 'builtin.func'
+// CHECK:       Visiting region 0 from operation 'func.func'
 // CHECK:       Visiting region 0 from operation 'builtin.module'
 
 // CHECK-LABEL: Op pre-order erasures
@@ -100,14 +100,14 @@ func @structured_cfg() {
 // CHECK:       Erasing op 'use3'
 // CHECK:       Erasing op 'scf.for'
 // CHECK:       Erasing op 'func.return'
-// CHECK:       Erasing op 'builtin.func'
+// CHECK:       Erasing op 'func.func'
 // CHECK:       Erasing op 'builtin.module'
 
 // CHECK-LABEL: Block post-order erasures (no skip)
 // CHECK:       Erasing block ^bb0 from region 0 from operation 'scf.if'
 // CHECK:       Erasing block ^bb0 from region 1 from operation 'scf.if'
 // CHECK:       Erasing block ^bb0 from region 0 from operation 'scf.for'
-// CHECK:       Erasing block ^bb0 from region 0 from operation 'builtin.func'
+// CHECK:       Erasing block ^bb0 from region 0 from operation 'func.func'
 // CHECK:       Erasing block ^bb0 from region 0 from operation 'builtin.module'
 
 // -----
@@ -128,7 +128,7 @@ func @unstructured_cfg() {
 
 // CHECK-LABEL: Op pre-order visits
 // CHECK:       Visiting op 'builtin.module'
-// CHECK:       Visiting op 'builtin.func'
+// CHECK:       Visiting op 'func.func'
 // CHECK:       Visiting op 'regionOp0'
 // CHECK:       Visiting op 'op0'
 // CHECK:       Visiting op 'cf.br'
@@ -139,14 +139,14 @@ func @unstructured_cfg() {
 
 // CHECK-LABEL: Block pre-order visits
 // CHECK:       Visiting block ^bb0 from region 0 from operation 'builtin.module'
-// CHECK:       Visiting block ^bb0 from region 0 from operation 'builtin.func'
+// CHECK:       Visiting block ^bb0 from region 0 from operation 'func.func'
 // CHECK:       Visiting block ^bb0 from region 0 from operation 'regionOp0'
 // CHECK:       Visiting block ^bb1 from region 0 from operation 'regionOp0'
 // CHECK:       Visiting block ^bb2 from region 0 from operation 'regionOp0'
 
 // CHECK-LABEL: Region pre-order visits
 // CHECK:       Visiting region 0 from operation 'builtin.module'
-// CHECK:       Visiting region 0 from operation 'builtin.func'
+// CHECK:       Visiting region 0 from operation 'func.func'
 // CHECK:       Visiting region 0 from operation 'regionOp0'
 
 // CHECK-LABEL: Op post-order visits
@@ -157,19 +157,19 @@ func @unstructured_cfg() {
 // CHECK:       Visiting op 'op2'
 // CHECK:       Visiting op 'regionOp0'
 // CHECK:       Visiting op 'func.return'
-// CHECK:       Visiting op 'builtin.func'
+// CHECK:       Visiting op 'func.func'
 // CHECK:       Visiting op 'builtin.module'
 
 // CHECK-LABEL: Block post-order visits
 // CHECK:       Visiting block ^bb0 from region 0 from operation 'regionOp0'
 // CHECK:       Visiting block ^bb1 from region 0 from operation 'regionOp0'
 // CHECK:       Visiting block ^bb2 from region 0 from operation 'regionOp0'
-// CHECK:       Visiting block ^bb0 from region 0 from operation 'builtin.func'
+// CHECK:       Visiting block ^bb0 from region 0 from operation 'func.func'
 // CHECK:       Visiting block ^bb0 from region 0 from operation 'builtin.module'
 
 // CHECK-LABEL: Region post-order visits
 // CHECK:       Visiting region 0 from operation 'regionOp0'
-// CHECK:       Visiting region 0 from operation 'builtin.func'
+// CHECK:       Visiting region 0 from operation 'func.func'
 // CHECK:       Visiting region 0 from operation 'builtin.module'
 
 // CHECK-LABEL: Op pre-order erasures (skip)
@@ -208,5 +208,5 @@ func @unstructured_cfg() {
 // CHECK:       Erasing block ^bb0 from region 0 from operation 'regionOp0'
 // CHECK:       Erasing block ^bb0 from region 0 from operation 'regionOp0'
 // CHECK:       Erasing block ^bb0 from region 0 from operation 'regionOp0'
-// CHECK:       Erasing block ^bb0 from region 0 from operation 'builtin.func'
+// CHECK:       Erasing block ^bb0 from region 0 from operation 'func.func'
 // CHECK:       Erasing block ^bb0 from region 0 from operation 'builtin.module'

diff  --git a/mlir/test/IR/wrapping_op.mlir b/mlir/test/IR/wrapping_op.mlir
index 982faee182eb3..b76d25d8233b8 100644
--- a/mlir/test/IR/wrapping_op.mlir
+++ b/mlir/test/IR/wrapping_op.mlir
@@ -2,7 +2,7 @@
 // RUN: mlir-opt -allow-unregistered-dialect -mlir-print-op-generic -mlir-print-debuginfo -mlir-print-local-scope %s | FileCheck %s --check-prefix=CHECK-GENERIC
 
 // CHECK-LABEL: func @wrapping_op
-// CHECK-GENERIC: "builtin.func"
+// CHECK-GENERIC: "func.func"
 func @wrapping_op(%arg0 : i32, %arg1 : f32) -> (i3, i2, i1) {
 // CHECK: %0:3 = test.wrapping_region wraps "some.op"(%arg1, %arg0) {test.attr = "attr"} : (f32, i32) -> (i1, i2, i3)
 // CHECK-GENERIC: "test.wrapping_region"() ({

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
index 8ee06c8a0ac7e..e1287386dabfe 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
@@ -4,8 +4,8 @@
 // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.fill register-tile-sizes=4,32 vectorize" | \
 // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=memref.copy register-tile-sizes=4,32 vectorize" | \
 
-// RUN: mlir-opt -pass-pipeline="builtin.func(canonicalize,convert-vector-to-scf,lower-affine,convert-linalg-to-loops)" | \
-// RUN: mlir-opt -pass-pipeline="builtin.func(canonicalize,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt -pass-pipeline="func.func(canonicalize,convert-vector-to-scf,lower-affine,convert-linalg-to-loops)" | \
+// RUN: mlir-opt -pass-pipeline="func.func(canonicalize,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // Activate to dump assembly
 // R_UN:   -dump-object-file -object-filename=/tmp/a.o \

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
index 4effdbcc8b063..0bae86650a4ce 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(canonicalize,cse),linalg-comprehensive-module-bufferize" |\
-// RUN: mlir-opt -pass-pipeline="builtin.func(buffer-deallocation,convert-vector-to-scf,lower-affine,convert-linalg-to-loops)" |\
-// RUN: mlir-opt -pass-pipeline="builtin.func(canonicalize,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(canonicalize,cse),linalg-comprehensive-module-bufferize" |\
+// RUN: mlir-opt -pass-pipeline="func.func(buffer-deallocation,convert-vector-to-scf,lower-affine,convert-linalg-to-loops)" |\
+// RUN: mlir-opt -pass-pipeline="func.func(canonicalize,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext |\

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py
index 538d5c853901a..353cd58d23e83 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py
@@ -12,6 +12,7 @@
 
 from mlir.dialects import sparse_tensor as st
 from mlir.dialects import builtin
+from mlir.dialects import func
 from mlir.dialects.linalg.opdsl import lang as dsl
 
 _SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
@@ -44,7 +45,7 @@ def build_SDDMM(attr: st.EncodingAttr):
   arguments = [a, b, s, c]
   with ir.InsertionPoint(module.body):
 
-    @builtin.FuncOp.from_py_func(*arguments)
+    @func.FuncOp.from_py_func(*arguments)
     def sddmm(*args):
       return sddmm_dsl(args[0], args[1], args[2], outs=[args[3]])
 

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py
index 77b94ea887767..4c51e19c31aed 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py
@@ -12,6 +12,7 @@
 
 from mlir.dialects import sparse_tensor as st
 from mlir.dialects import builtin
+from mlir.dialects import func
 from mlir.dialects.linalg.opdsl import lang as dsl
 
 _SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
@@ -41,7 +42,7 @@ def build_SpMM(attr: st.EncodingAttr):
   arguments = [a, b, c]
   with ir.InsertionPoint(module.body):
 
-    @builtin.FuncOp.from_py_func(*arguments)
+    @func.FuncOp.from_py_func(*arguments)
     def spMxM(*args):
       return matmul_dsl(args[0], args[1], outs=[args[2]])
 

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py
index 2afcf5506ce42..632619581f218 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py
@@ -110,7 +110,7 @@ def build(self, types: List[ir.Type]):
       # TODO: assert dense? assert element type is recognised by the TypeConverter?
       types.append(tp0)
       funcTp = ir.FunctionType.get(inputs=[tp0], results=[tp0])
-      funcOp = builtin.FuncOp(name='main', type=funcTp)
+      funcOp = func.FuncOp(name='main', type=funcTp)
       funcOp.attributes['llvm.emit_c_interface'] = ir.UnitAttr.get()
       with ir.InsertionPoint(funcOp.add_entry_block()):
         arg0 = funcOp.entry_block.arguments[0]

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
index 28b4ccc809f5b..8c77c8d55fc71 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
@@ -670,7 +670,7 @@ def _emit_assignment(
     # Build the kernel for the operations.
     with ir.InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(*input_types, name=_ENTRY_NAME)
+      @func.FuncOp.from_py_func(*input_types, name=_ENTRY_NAME)
       def linalg_funcop(*args):
         # Set up the mapping from the Access nodes to their MLIR values.
         for e, mlir in zip(input_accesses, args):

diff  --git a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
index 88b963c132df5..57ed5174e0cb1 100644
--- a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
+++ b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf,memref-expand,arith-expand),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf,memref-expand,arith-expand),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
index e485f4b751e53..f19cf55aa4805 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
@@ -1,19 +1,19 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
index f4469ea17ad01..36c969d798002 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
@@ -1,19 +1,19 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
index dac05da4d3e36..dfcfd528f2f8b 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
@@ -1,19 +1,19 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
index 87473807585cb..c5abed4fdb7e3 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
index ea7a6144f8f3c..b3744d000c59a 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
index 6001e58a42594..cce27bad977fc 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
@@ -1,14 +1,14 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-vector-to-forloop,convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-vector-to-forloop,convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e main \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e main \
 // RUN: -entry-point-result=void \
 // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-vector-to-forloop)" | FileCheck %s -check-prefix=TRANSFORM
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-vector-to-forloop)" | FileCheck %s -check-prefix=TRANSFORM
 
 
 func private @print_memref_f32(memref<*xf32>)

diff  --git a/mlir/test/Pass/dynamic-pipeline.mlir b/mlir/test/Pass/dynamic-pipeline.mlir
index a820253929fd6..e72ce401f1c20 100644
--- a/mlir/test/Pass/dynamic-pipeline.mlir
+++ b/mlir/test/Pass/dynamic-pipeline.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1, dynamic-pipeline=builtin.func(cse,canonicalize)})'  --mlir-disable-threading  -print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD1-ONLY --check-prefix=CHECK
-// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod2, dynamic-pipeline=builtin.func(cse,canonicalize)})'  --mlir-disable-threading  -print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD2 --check-prefix=MOD2-ONLY --check-prefix=CHECK
-// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1,inner_mod2, dynamic-pipeline=builtin.func(cse,canonicalize)})'  --mlir-disable-threading  -print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK
-// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{dynamic-pipeline=builtin.func(cse,canonicalize)})'  --mlir-disable-threading  -print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1, dynamic-pipeline=func.func(cse,canonicalize)})'  --mlir-disable-threading  -print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD1-ONLY --check-prefix=CHECK
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod2, dynamic-pipeline=func.func(cse,canonicalize)})'  --mlir-disable-threading  -print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD2 --check-prefix=MOD2-ONLY --check-prefix=CHECK
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1,inner_mod2, dynamic-pipeline=func.func(cse,canonicalize)})'  --mlir-disable-threading  -print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{dynamic-pipeline=func.func(cse,canonicalize)})'  --mlir-disable-threading  -print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK
 
 
 func @f() {

diff  --git a/mlir/test/Pass/interface-pass.mlir b/mlir/test/Pass/interface-pass.mlir
index 4506dde3d747b..1c2ce5b1f8c7b 100644
--- a/mlir/test/Pass/interface-pass.mlir
+++ b/mlir/test/Pass/interface-pass.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -verify-diagnostics -pass-pipeline='builtin.func(test-interface-pass)' -o /dev/null
+// RUN: mlir-opt %s -verify-diagnostics -pass-pipeline='func.func(test-interface-pass)' -o /dev/null
 
 // Test that we run the interface pass on the function.
 

diff  --git a/mlir/test/Pass/invalid-parent.mlir b/mlir/test/Pass/invalid-parent.mlir
index 2979ba9e89a4e..d4bc32eb35e48 100644
--- a/mlir/test/Pass/invalid-parent.mlir
+++ b/mlir/test/Pass/invalid-parent.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(test-pass-invalid-parent)' -verify-diagnostics
+// RUN: mlir-opt %s -pass-pipeline='func.func(test-pass-invalid-parent)' -verify-diagnostics
 
 // Test that we properly report errors when the parent becomes invalid after running a pass
 // on a child operation.

diff  --git a/mlir/test/Pass/ir-printing.mlir b/mlir/test/Pass/ir-printing.mlir
index 9c45776291ea4..e3de6bb02784b 100644
--- a/mlir/test/Pass/ir-printing.mlir
+++ b/mlir/test/Pass/ir-printing.mlir
@@ -1,10 +1,10 @@
-// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.func(cse,canonicalize)' -print-ir-before=cse  -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE %s
-// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.func(cse,canonicalize)' -print-ir-before-all -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_ALL %s
-// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.func(cse,canonicalize)' -print-ir-after=cse -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER %s
-// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.func(cse,canonicalize)' -print-ir-after-all -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL %s
-// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.func(cse,canonicalize)' -print-ir-before=cse -print-ir-module-scope -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_MODULE %s
-// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.func(cse,cse)' -print-ir-after-all -print-ir-after-change -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL_CHANGE %s
-// RUN: not mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.func(cse,test-pass-failure)' -print-ir-after-failure -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_FAILURE %s
+// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -print-ir-before=cse  -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE %s
+// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -print-ir-before-all -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_ALL %s
+// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -print-ir-after=cse -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER %s
+// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -print-ir-after-all -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL %s
+// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -print-ir-before=cse -print-ir-module-scope -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_MODULE %s
+// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,cse)' -print-ir-after-all -print-ir-after-change -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL_CHANGE %s
+// RUN: not mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,test-pass-failure)' -print-ir-after-failure -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_FAILURE %s
 
 func @foo() {
   %0 = arith.constant 0 : i32
@@ -49,10 +49,10 @@ func @bar() {
 // AFTER_ALL: // -----// IR Dump After{{.*}}Canonicalizer //----- //
 // AFTER_ALL-NEXT: func @bar()
 
-// BEFORE_MODULE: // -----// IR Dump Before{{.*}}CSE ('builtin.func' operation: @foo) //----- //
+// BEFORE_MODULE: // -----// IR Dump Before{{.*}}CSE ('func.func' operation: @foo) //----- //
 // BEFORE_MODULE: func @foo()
 // BEFORE_MODULE: func @bar()
-// BEFORE_MODULE: // -----// IR Dump Before{{.*}}CSE ('builtin.func' operation: @bar) //----- //
+// BEFORE_MODULE: // -----// IR Dump Before{{.*}}CSE ('func.func' operation: @bar) //----- //
 // BEFORE_MODULE: func @foo()
 // BEFORE_MODULE: func @bar()
 

diff  --git a/mlir/test/Pass/pass-timing.mlir b/mlir/test/Pass/pass-timing.mlir
index 82ff328ce894b..db18d9a0f6082 100644
--- a/mlir/test/Pass/pass-timing.mlir
+++ b/mlir/test/Pass/pass-timing.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='builtin.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=LIST %s
-// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='builtin.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=PIPELINE %s
-// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='builtin.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s
-// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='builtin.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s
+// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=LIST %s
+// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=PIPELINE %s
+// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s
+// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s
 // RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=false -test-pm-nested-pipeline -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=NESTED_PIPELINE %s
 
 // LIST: Execution time report
@@ -16,7 +16,7 @@
 // PIPELINE: Total Execution Time:
 // PIPELINE: Name
 // PIPELINE-NEXT: Parser
-// PIPELINE-NEXT: 'builtin.func' Pipeline
+// PIPELINE-NEXT: 'func.func' Pipeline
 // PIPELINE-NEXT:   CSE
 // PIPELINE-NEXT:     (A) DominanceInfo
 // PIPELINE-NEXT:   Canonicalizer
@@ -38,7 +38,7 @@
 // MT_PIPELINE: Total Execution Time:
 // MT_PIPELINE: Name
 // MT_PIPELINE-NEXT: Parser
-// MT_PIPELINE-NEXT: 'builtin.func' Pipeline
+// MT_PIPELINE-NEXT: 'func.func' Pipeline
 // MT_PIPELINE-NEXT:   CSE
 // MT_PIPELINE-NEXT:     (A) DominanceInfo
 // MT_PIPELINE-NEXT:   Canonicalizer
@@ -52,12 +52,12 @@
 // NESTED_PIPELINE: Total Execution Time:
 // NESTED_PIPELINE: Name
 // NESTED_PIPELINE-NEXT: Parser
-// NESTED_PIPELINE-NEXT: Pipeline Collection : ['builtin.func', 'builtin.module']
-// NESTED_PIPELINE-NEXT:   'builtin.func' Pipeline
+// NESTED_PIPELINE-NEXT: Pipeline Collection : ['builtin.module', 'func.func']
+// NESTED_PIPELINE-NEXT:   'func.func' Pipeline
 // NESTED_PIPELINE-NEXT:     TestFunctionPass
 // NESTED_PIPELINE-NEXT:   'builtin.module' Pipeline
 // NESTED_PIPELINE-NEXT:     TestModulePass
-// NESTED_PIPELINE-NEXT:     'builtin.func' Pipeline
+// NESTED_PIPELINE-NEXT:     'func.func' Pipeline
 // NESTED_PIPELINE-NEXT:       TestFunctionPass
 // NESTED_PIPELINE-NEXT: Output
 // NESTED_PIPELINE-NEXT: Rest

diff  --git a/mlir/test/Pass/pipeline-options-parsing.mlir b/mlir/test/Pass/pipeline-options-parsing.mlir
index 53a5712d79f41..987ab7ad29025 100644
--- a/mlir/test/Pass/pipeline-options-parsing.mlir
+++ b/mlir/test/Pass/pipeline-options-parsing.mlir
@@ -1,11 +1,11 @@
 // RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass{)' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s
 // RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass{test-option=3})' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_2 %s
-// RUN: not mlir-opt %s -pass-pipeline='builtin.module(builtin.func(test-options-pass{list=3}), test-module-pass{invalid-option=3})' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s
+// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-options-pass{list=3}), test-module-pass{invalid-option=3})' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s
 // RUN: not mlir-opt %s -pass-pipeline='test-options-pass{list=3 list=notaninteger}' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_4 %s
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(test-options-pass{list=1,2,3,4 list=5 string=value1 string=value2})'
-// RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.func(test-options-pass{string-list=a list=1,2,3,4 string-list=b,c list=5 string-list=d string=nested_pipeline{arg1=10 arg2=" {} " arg3=true}})' -test-dump-pipeline 2>&1 | FileCheck --check-prefix=CHECK_1 %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(test-options-pass{list=1,2,3,4 list=5 string=value1 string=value2})'
+// RUN: mlir-opt %s -verify-each=false -pass-pipeline='func.func(test-options-pass{string-list=a list=1,2,3,4 string-list=b,c list=5 string-list=d string=nested_pipeline{arg1=10 arg2=" {} " arg3=true}})' -test-dump-pipeline 2>&1 | FileCheck --check-prefix=CHECK_1 %s
 // RUN: mlir-opt %s -verify-each=false -test-options-pass-pipeline='list=1 string-list=a,b' -test-dump-pipeline 2>&1 | FileCheck --check-prefix=CHECK_2 %s
-// RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.module(builtin.func(test-options-pass{list=3}), builtin.func(test-options-pass{list=1,2,3,4}))' -test-dump-pipeline 2>&1 | FileCheck --check-prefix=CHECK_3 %s
+// RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.module(func.func(test-options-pass{list=3}), func.func(test-options-pass{list=1,2,3,4}))' -test-dump-pipeline 2>&1 | FileCheck --check-prefix=CHECK_3 %s
 
 // CHECK_ERROR_1: missing closing '}' while processing pass options
 // CHECK_ERROR_2: no such option test-option
@@ -14,4 +14,4 @@
 
 // CHECK_1: test-options-pass{list=1,2,3,4,5 string=nested_pipeline{arg1=10 arg2=" {} " arg3=true} string-list=a,b,c,d}
 // CHECK_2: test-options-pass{list=1 string= string-list=a,b}
-// CHECK_3: builtin.module(builtin.func(test-options-pass{list=3 string= }), builtin.func(test-options-pass{list=1,2,3,4 string= }))
+// CHECK_3: builtin.module(func.func(test-options-pass{list=3 string= }), func.func(test-options-pass{list=1,2,3,4 string= }))

diff  --git a/mlir/test/Pass/pipeline-parsing.mlir b/mlir/test/Pass/pipeline-parsing.mlir
index 9dc4c309f6253..a8abfbf786645 100644
--- a/mlir/test/Pass/pipeline-parsing.mlir
+++ b/mlir/test/Pass/pipeline-parsing.mlir
@@ -1,16 +1,16 @@
-// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass,builtin.func(test-function-pass)),builtin.func(test-function-pass)' -pass-pipeline="builtin.func(cse,canonicalize)" -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass,func.func(test-function-pass)),func.func(test-function-pass)' -pass-pipeline="func.func(cse,canonicalize)" -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s
 // RUN: mlir-opt %s -mlir-disable-threading -test-textual-pm-nested-pipeline -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=TEXTUAL_CHECK
 // RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s
 // RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass))' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_2 %s
 // RUN: not mlir-opt %s -pass-pipeline='builtin.module()(' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s
 // RUN: not mlir-opt %s -pass-pipeline=',' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_4 %s
-// RUN: not mlir-opt %s -pass-pipeline='builtin.func(test-module-pass)' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_5 %s
+// RUN: not mlir-opt %s -pass-pipeline='func.func(test-module-pass)' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_5 %s
 
 // CHECK_ERROR_1: encountered unbalanced parentheses while parsing pipeline
 // CHECK_ERROR_2: encountered extra closing ')' creating unbalanced parentheses while parsing pipeline
 // CHECK_ERROR_3: expected ',' after parsing pipeline
 // CHECK_ERROR_4: does not refer to a registered pass or pass pipeline
-// CHECK_ERROR_5:  Can't add pass '{{.*}}TestModulePass' restricted to 'builtin.module' on a PassManager intended to run on 'builtin.func', did you intend to nest?
+// CHECK_ERROR_5:  Can't add pass '{{.*}}TestModulePass' restricted to 'builtin.module' on a PassManager intended to run on 'func.func', did you intend to nest?
 func @foo() {
   return
 }
@@ -21,21 +21,21 @@ module {
   }
 }
 
-// CHECK: Pipeline Collection : ['builtin.func', 'builtin.module']
-// CHECK-NEXT:   'builtin.func' Pipeline
+// CHECK: Pipeline Collection : ['builtin.module', 'func.func']
+// CHECK-NEXT:   'func.func' Pipeline
 // CHECK-NEXT:     TestFunctionPass
 // CHECK-NEXT:     CSE
 // CHECK-NEXT:       DominanceInfo
 // CHECK-NEXT:     Canonicalizer
 // CHECK-NEXT:   'builtin.module' Pipeline
 // CHECK-NEXT:     TestModulePass
-// CHECK-NEXT:     'builtin.func' Pipeline
+// CHECK-NEXT:     'func.func' Pipeline
 // CHECK-NEXT:       TestFunctionPass
 
-// TEXTUAL_CHECK: Pipeline Collection : ['builtin.func', 'builtin.module']
-// TEXTUAL_CHECK-NEXT:   'builtin.func' Pipeline
+// TEXTUAL_CHECK: Pipeline Collection : ['builtin.module', 'func.func']
+// TEXTUAL_CHECK-NEXT:   'func.func' Pipeline
 // TEXTUAL_CHECK-NEXT:     TestFunctionPass
 // TEXTUAL_CHECK-NEXT:   'builtin.module' Pipeline
 // TEXTUAL_CHECK-NEXT:     TestModulePass
-// TEXTUAL_CHECK-NEXT:     'builtin.func' Pipeline
+// TEXTUAL_CHECK-NEXT:     'func.func' Pipeline
 // TEXTUAL_CHECK-NEXT:       TestFunctionPass

diff  --git a/mlir/test/Pass/pipeline-stats.mlir b/mlir/test/Pass/pipeline-stats.mlir
index 1594e1dad9acf..1c0b403cc33e4 100644
--- a/mlir/test/Pass/pipeline-stats.mlir
+++ b/mlir/test/Pass/pipeline-stats.mlir
@@ -1,6 +1,6 @@
 // REQUIRES: asserts
-// RUN: mlir-opt %s -verify-each=true -pass-pipeline='builtin.func(test-stats-pass,test-stats-pass)' -pass-statistics -pass-statistics-display=list 2>&1 | FileCheck -check-prefix=LIST %s
-// RUN: mlir-opt %s -verify-each=true -pass-pipeline='builtin.func(test-stats-pass,test-stats-pass)' -pass-statistics -pass-statistics-display=pipeline 2>&1 | FileCheck -check-prefix=PIPELINE %s
+// RUN: mlir-opt %s -verify-each=true -pass-pipeline='func.func(test-stats-pass,test-stats-pass)' -pass-statistics -pass-statistics-display=list 2>&1 | FileCheck -check-prefix=LIST %s
+// RUN: mlir-opt %s -verify-each=true -pass-pipeline='func.func(test-stats-pass,test-stats-pass)' -pass-statistics -pass-statistics-display=pipeline 2>&1 | FileCheck -check-prefix=PIPELINE %s
 
 // LIST: Pass statistics report
 // LIST: TestStatisticPass
@@ -8,7 +8,7 @@
 // LIST-NOT: Verifier
 
 // PIPELINE: Pass statistics report
-// PIPELINE: 'builtin.func' Pipeline
+// PIPELINE: 'func.func' Pipeline
 // PIPELINE-NEXT:   TestStatisticPass
 // PIPELINE-NEXT:     (S) {{0|4}} num-ops - Number of operations counted
 // PIPELINE-NEXT:   TestStatisticPass

diff  --git a/mlir/test/Pass/run-reproducer.mlir b/mlir/test/Pass/run-reproducer.mlir
index b9a7129f31a42..af3e8488e882e 100644
--- a/mlir/test/Pass/run-reproducer.mlir
+++ b/mlir/test/Pass/run-reproducer.mlir
@@ -1,4 +1,4 @@
-// configuration: -mlir-disable-threading=true -pass-pipeline='builtin.func(cse,canonicalize)' -print-ir-before=cse
+// configuration: -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -print-ir-before=cse
 
 // Test of the reproducer run option. The first line has to be the
 // configuration (matching what is produced by reproducer).

diff  --git a/mlir/test/Target/Cpp/invalid.mlir b/mlir/test/Target/Cpp/invalid.mlir
index 2ada598969d5d..d81c7ec1dfab8 100644
--- a/mlir/test/Target/Cpp/invalid.mlir
+++ b/mlir/test/Target/Cpp/invalid.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-translate -split-input-file -mlir-to-cpp -verify-diagnostics %s
 
-// expected-error at +1 {{'builtin.func' op with multiple blocks needs variables declared at top}}
+// expected-error at +1 {{'func.func' op with multiple blocks needs variables declared at top}}
 func @multiple_blocks() {
 ^bb1:
     cf.br ^bb2

diff  --git a/mlir/test/Target/LLVMIR/arm-neon-2d.mlir b/mlir/test/Target/LLVMIR/arm-neon-2d.mlir
index e6299f938d315..0fdaa6af68f27 100644
--- a/mlir/test/Target/LLVMIR/arm-neon-2d.mlir
+++ b/mlir/test/Target/LLVMIR/arm-neon-2d.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(arm-neon-2d-to-intr)" %s | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(arm-neon-2d-to-intr)" %s | FileCheck %s
 
 // CHECK-LABEL: arm_neon_sdot2d_4x4_i8i8
 func @arm_neon_sdot2d_4x4_i8i8(%a: vector<4xi32>, %b: vector<4x4xi8>, %c: vector<4x4xi8>) -> vector<4xi32> {

diff  --git a/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir b/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir
index 395e4ca8a1e93..eed974caa1043 100644
--- a/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir
+++ b/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="convert-vector-to-llvm,builtin.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" | mlir-translate -mlir-to-llvmir | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" | mlir-translate -mlir-to-llvmir | FileCheck %s
 
 func @genbool_1d() -> vector<8xi1> {
   %0 = vector.constant_mask [4] : vector<8xi1>

diff  --git a/mlir/test/Transforms/canonicalize-block-merge.mlir b/mlir/test/Transforms/canonicalize-block-merge.mlir
index fdf300a0b1389..2a9cf97f44e19 100644
--- a/mlir/test/Transforms/canonicalize-block-merge.mlir
+++ b/mlir/test/Transforms/canonicalize-block-merge.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.func(canonicalize)' -split-input-file | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck %s
 
 // Check the simple case of single operation blocks with a return.
 

diff  --git a/mlir/test/Transforms/canonicalize-dce.mlir b/mlir/test/Transforms/canonicalize-dce.mlir
index 9850fdfc781b8..335cf5a569371 100644
--- a/mlir/test/Transforms/canonicalize-dce.mlir
+++ b/mlir/test/Transforms/canonicalize-dce.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='builtin.func(canonicalize)' | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s
 
 // Test case: Simple case of deleting a dead pure op.
 
@@ -82,7 +82,7 @@ func @f(%arg0: f32, %pred: i1) {
 // CHECK-NEXT:     return
 
 func @f(%arg0: f32) {
-  builtin.func @g(%arg1: f32) {
+  func.func @g(%arg1: f32) {
     %0 = "arith.addf"(%arg1, %arg1) : (f32, f32) -> f32
     return
   }

diff  --git a/mlir/test/Transforms/canonicalize-td.mlir b/mlir/test/Transforms/canonicalize-td.mlir
index 6e538e57cd820..943802f66ec62 100644
--- a/mlir/test/Transforms/canonicalize-td.mlir
+++ b/mlir/test/Transforms/canonicalize-td.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.func(canonicalize{top-down=true})' | FileCheck %s --check-prefix=TD
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.func(canonicalize)' | FileCheck %s --check-prefix=BU
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize{top-down=true})' | FileCheck %s --check-prefix=TD
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize)' | FileCheck %s --check-prefix=BU
 
 
 // BU-LABEL: func @default_insertion_position

diff  --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index 37abd219b013d..794e456686e9f 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.func(canonicalize)' -split-input-file | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @test_subi_zero
 func @test_subi_zero(%arg0: i32) -> i32 {
@@ -424,7 +424,7 @@ func @write_only_alloca_fold(%v: f32) {
 // CHECK-LABEL: func @dead_block_elim
 func @dead_block_elim() {
   // CHECK-NOT: ^bb
-  builtin.func @nested() {
+  func.func @nested() {
     return
 
   ^bb1:

diff  --git a/mlir/test/Transforms/constant-fold.mlir b/mlir/test/Transforms/constant-fold.mlir
index 3542f37f61b76..f988b19381456 100644
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@@ -758,7 +758,7 @@ func @cmpf_inf() -> (i1, i1, i1, i1, i1, i1, i1, i1, i1, i1, i1, i1, i1, i1, i1,
 func @nested_isolated_region() {
   // CHECK-NEXT: func @isolated_op
   // CHECK-NEXT: arith.constant 2
-  builtin.func @isolated_op() {
+  func.func @isolated_op() {
     %0 = arith.constant 1 : i32
     %2 = arith.addi %0, %0 : i32
     "foo.yield"(%2) : (i32) -> ()

diff  --git a/mlir/test/Transforms/cse.mlir b/mlir/test/Transforms/cse.mlir
index eae8595f8af8b..982511fec2b87 100644
--- a/mlir/test/Transforms/cse.mlir
+++ b/mlir/test/Transforms/cse.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.func(cse)' | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(cse)' | FileCheck %s
 
 // CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 mod 2)>
 #map0 = affine_map<(d0) -> (d0 mod 2)>
@@ -229,7 +229,7 @@ func @nested_isolated() -> i32 {
   %0 = arith.constant 1 : i32
 
   // CHECK-NEXT: @nested_func
-  builtin.func @nested_func() {
+  func.func @nested_func() {
     // CHECK-NEXT: arith.constant 1
     %foo = arith.constant 1 : i32
     "foo.yield"(%foo) : (i32) -> ()

diff  --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir
index da715b7b12833..9ca1872b3b88e 100644
--- a/mlir/test/Transforms/parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize)' | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize)' | FileCheck %s
 
 // CHECK-LABEL: func @parallel_many_dims() {
 func @parallel_many_dims() {

diff  --git a/mlir/test/Transforms/parametric-mapping.mlir b/mlir/test/Transforms/parametric-mapping.mlir
index 7d155e82fefa8..ac942c85d6c3d 100644
--- a/mlir/test/Transforms/parametric-mapping.mlir
+++ b/mlir/test/Transforms/parametric-mapping.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline="builtin.func(test-mapping-to-processing-elements)" %s | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline="func.func(test-mapping-to-processing-elements)" %s | FileCheck %s
 
 // CHECK: #[[mul_map:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
 // CHECK: #[[add_map:.+]] = affine_map<()[s0, s1] -> (s0 + s1)>

diff  --git a/mlir/test/Transforms/print-op-graph.mlir b/mlir/test/Transforms/print-op-graph.mlir
index 8695af8df8328..6d9f006fe9d6e 100644
--- a/mlir/test/Transforms/print-op-graph.mlir
+++ b/mlir/test/Transforms/print-op-graph.mlir
@@ -4,7 +4,7 @@
 // DFG-LABEL: digraph G {
 //       DFG:   subgraph {{.*}} {
 //       DFG:     subgraph {{.*}}
-//       DFG:       label = "builtin.func{{.*}}merge_blocks
+//       DFG:       label = "func.func{{.*}}merge_blocks
 //       DFG:       subgraph {{.*}} {
 //       DFG:         v[[ARG0:.*]] [label = "arg0"
 //       DFG:         v[[CONST10:.*]] [label ={{.*}}10 : i32
@@ -26,7 +26,7 @@
 // CFG-LABEL: digraph G {
 //       CFG:   subgraph {{.*}} {
 //       CFG:     subgraph {{.*}}
-//       CFG:       label = "builtin.func{{.*}}merge_blocks
+//       CFG:       label = "func.func{{.*}}merge_blocks
 //       CFG:       subgraph {{.*}} {
 //       CFG:         v[[C1:.*]] [label = "arith.constant
 //       CFG:         v[[C2:.*]] [label = "arith.constant

diff  --git a/mlir/test/Transforms/sccp-structured.mlir b/mlir/test/Transforms/sccp-structured.mlir
index 1087f47707c1f..32184bfdfda11 100644
--- a/mlir/test/Transforms/sccp-structured.mlir
+++ b/mlir/test/Transforms/sccp-structured.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.func(sccp)" -split-input-file | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="func.func(sccp)" -split-input-file | FileCheck %s
 
 /// Check that a constant is properly propagated when only one edge is taken.
 

diff  --git a/mlir/test/Transforms/sccp.mlir b/mlir/test/Transforms/sccp.mlir
index 73e321fce432f..4879ee8c54c40 100644
--- a/mlir/test/Transforms/sccp.mlir
+++ b/mlir/test/Transforms/sccp.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.func(sccp)" -split-input-file | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="func.func(sccp)" -split-input-file | FileCheck %s
 
 /// Check simple forward constant propagation without any control flow.
 

diff  --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
index f521341f336e7..2e8e77b76fc49 100644
--- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize)' | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize)' | FileCheck %s
 
 func @collapse_to_single() {
   %c0 = arith.constant 3 : index

diff  --git a/mlir/test/Transforms/test-canonicalize-filter.mlir b/mlir/test/Transforms/test-canonicalize-filter.mlir
index fabc61eeef50b..4b4a85edc5ed8 100644
--- a/mlir/test/Transforms/test-canonicalize-filter.mlir
+++ b/mlir/test/Transforms/test-canonicalize-filter.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(canonicalize)' | FileCheck %s --check-prefix=NO_FILTER
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(canonicalize{enable-patterns=TestRemoveOpWithInnerOps})' | FileCheck %s --check-prefix=FILTER_ENABLE
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(canonicalize{disable-patterns=TestRemoveOpWithInnerOps})' | FileCheck %s --check-prefix=FILTER_DISABLE
+// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' | FileCheck %s --check-prefix=NO_FILTER
+// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize{enable-patterns=TestRemoveOpWithInnerOps})' | FileCheck %s --check-prefix=FILTER_ENABLE
+// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize{disable-patterns=TestRemoveOpWithInnerOps})' | FileCheck %s --check-prefix=FILTER_DISABLE
 
 // NO_FILTER-LABEL: func @remove_op_with_inner_ops_pattern
 // NO_FILTER-NEXT: return

diff  --git a/mlir/test/Transforms/test-canonicalize.mlir b/mlir/test/Transforms/test-canonicalize.mlir
index c2137a3984dd2..33a1db5d8d17d 100644
--- a/mlir/test/Transforms/test-canonicalize.mlir
+++ b/mlir/test/Transforms/test-canonicalize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(canonicalize)' | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' | FileCheck %s
 
 // CHECK-LABEL: func @remove_op_with_inner_ops_pattern
 func @remove_op_with_inner_ops_pattern() {

diff  --git a/mlir/test/Transforms/test-legalizer-analysis.mlir b/mlir/test/Transforms/test-legalizer-analysis.mlir
index 4955ac0c54b84..81536bd0f828a 100644
--- a/mlir/test/Transforms/test-legalizer-analysis.mlir
+++ b/mlir/test/Transforms/test-legalizer-analysis.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt -allow-unregistered-dialect -test-legalize-patterns -verify-diagnostics -test-legalize-mode=analysis %s | FileCheck %s
 // expected-remark at -2 {{op 'builtin.module' is legalizable}}
 
-// expected-remark at +1 {{op 'builtin.func' is legalizable}}
+// expected-remark at +1 {{op 'func.func' is legalizable}}
 func @test(%arg0: f32) {
   // expected-remark at +1 {{op 'test.illegal_op_a' is legalizable}}
   %result = "test.illegal_op_a"() : () -> (i32)

diff  --git a/mlir/test/Transforms/test-legalizer-full.mlir b/mlir/test/Transforms/test-legalizer-full.mlir
index 586635d4eb58a..0dacfa299c056 100644
--- a/mlir/test/Transforms/test-legalizer-full.mlir
+++ b/mlir/test/Transforms/test-legalizer-full.mlir
@@ -37,7 +37,7 @@ func @recursively_legal_invalid_op() {
   }
   /// Operation that is dynamically legal, i.e. the function has a pattern
   /// applied to legalize the argument type before it becomes recursively legal.
-  builtin.func @dynamic_func(%arg: i64) attributes {test.recursively_legal} {
+  func.func @dynamic_func(%arg: i64) attributes {test.recursively_legal} {
     %ignored = "test.illegal_op_f"() : () -> (i32)
     "test.return"() : () -> ()
   }

diff  --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
index b7490935b1294..a3831f903951b 100644
--- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -23,8 +24,6 @@
 
 using namespace mlir;
 
-static llvm::cl::OptionCategory clOptionsCategory(PASS_NAME " options");
-
 namespace {
 
 struct TestAffineDataCopy

diff  --git a/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp b/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp
index 7096e11d1ef65..8281b3e910478 100644
--- a/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 
 using namespace mlir;
 

diff  --git a/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp b/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp
index 608f668291dae..30a2e09cdbc3b 100644
--- a/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/LoopFusionUtils.h"
 #include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"
 
 #define DEBUG_TYPE "test-loop-fusion"

diff  --git a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp
index 3e20dc582be24..4a3f06810a9f6 100644
--- a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Utils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"
 #include "mlir/IR/Builders.h"

diff  --git a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
index 4e74ff1ec06b2..a1671ee84d84f 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
@@ -13,6 +13,7 @@
 #include <utility>
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/CodegenStrategy.h"
@@ -217,7 +218,7 @@ void TestLinalgCodegenStrategy::runStrategy(
               .enableTransferToSCFConversion());
   // Created a nested OpPassManager and run.
   FuncOp funcOp = getOperation();
-  OpPassManager dynamicPM("builtin.func");
+  OpPassManager dynamicPM("func.func");
   strategy.configurePassPipeline(dynamicPM, funcOp.getContext(), runEnablePass);
   if (failed(runPipeline(dynamicPM, funcOp)))
     return signalPassFailure();

diff  --git a/mlir/test/lib/Dialect/Linalg/TestLinalgElementwiseFusion.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgElementwiseFusion.cpp
index 16c81b5612d08..9a0604ef4aae9 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgElementwiseFusion.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgElementwiseFusion.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"

diff  --git a/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp
index 91dd9c15ca27a..0618eb2f19d82 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/SCF/Transforms.h"

diff  --git a/mlir/test/lib/Dialect/Linalg/TestLinalgHoisting.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgHoisting.cpp
index 249e3d873da92..a05a1f8ca529f 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgHoisting.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgHoisting.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/Hoisting.h"
 #include "mlir/Pass/Pass.h"

diff  --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
index 6e4fa01e174b1..42068ab79ff9c 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Passes.h"

diff  --git a/mlir/test/lib/Dialect/SCF/TestSCFUtils.cpp b/mlir/test/lib/Dialect/SCF/TestSCFUtils.cpp
index 3cba5ca710a51..2e4647a2482ad 100644
--- a/mlir/test/lib/Dialect/SCF/TestSCFUtils.cpp
+++ b/mlir/test/lib/Dialect/SCF/TestSCFUtils.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/SCF/Transforms.h"
 #include "mlir/Dialect/SCF/Utils/Utils.h"

diff  --git a/mlir/test/lib/Dialect/SPIRV/TestAvailability.cpp b/mlir/test/lib/Dialect/SPIRV/TestAvailability.cpp
index 07092290eb480..8e9a3c9b4c78a 100644
--- a/mlir/test/lib/Dialect/SPIRV/TestAvailability.cpp
+++ b/mlir/test/lib/Dialect/SPIRV/TestAvailability.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVTypes.h"
 #include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h"

diff  --git a/mlir/test/lib/Dialect/Test/TestDialect.h b/mlir/test/lib/Dialect/Test/TestDialect.h
index 7ce2be79da5f6..0c8fa037b508d 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.h
+++ b/mlir/test/lib/Dialect/Test/TestDialect.h
@@ -18,6 +18,7 @@
 #include "TestInterfaces.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
 #include "mlir/Dialect/DLTI/Traits.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Traits.h"
 #include "mlir/IR/BuiltinOps.h"

diff  --git a/mlir/test/lib/Dialect/Tosa/CMakeLists.txt b/mlir/test/lib/Dialect/Tosa/CMakeLists.txt
index c2f90df00d5d9..91437dd5bc74b 100644
--- a/mlir/test/lib/Dialect/Tosa/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Tosa/CMakeLists.txt
@@ -10,6 +10,7 @@ add_mlir_dialect_library(MLIRTosaTestPasses
   MLIRTosaPassIncGen
 
   LINK_LIBS PUBLIC
+  MLIRFunc
   MLIRPass
   MLIRTosa
   MLIRTransformUtils

diff  --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
index f139e3cdcd68e..59b5891263f50 100644
--- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
+++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
@@ -239,7 +239,7 @@ struct TestVectorTransposeLowering
                   .lower8x8xf32()));
     }
 
-    OpPassManager dynamicPM("builtin.func");
+    OpPassManager dynamicPM("func.func");
     dynamicPM.addPass(createLinalgStrategyLowerVectorsPass(options));
     if (failed(runPipeline(dynamicPM, getOperation())))
       return signalPassFailure();

diff  --git a/mlir/test/lib/Pass/TestPassManager.cpp b/mlir/test/lib/Pass/TestPassManager.cpp
index 85dc7bf8701ed..260bf0bf0a1b6 100644
--- a/mlir/test/lib/Pass/TestPassManager.cpp
+++ b/mlir/test/lib/Pass/TestPassManager.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "TestDialect.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"

diff  --git a/mlir/test/mlir-cpu-runner/async-error.mlir b/mlir/test/mlir-cpu-runner/async-error.mlir
index dfc76731dc033..096b7ede43363 100644
--- a/mlir/test/mlir-cpu-runner/async-error.mlir
+++ b/mlir/test/mlir-cpu-runner/async-error.mlir
@@ -1,4 +1,4 @@
-// RUN:   mlir-opt %s -pass-pipeline="async-to-async-runtime,builtin.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,builtin.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-vector-to-llvm,builtin.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
+// RUN:   mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:     -e main -entry-point-result=void -O0                               \
 // RUN:     -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext  \

diff  --git a/mlir/test/mlir-cpu-runner/async-group.mlir b/mlir/test/mlir-cpu-runner/async-group.mlir
index aab262d25db64..dc719584a4892 100644
--- a/mlir/test/mlir-cpu-runner/async-group.mlir
+++ b/mlir/test/mlir-cpu-runner/async-group.mlir
@@ -1,4 +1,4 @@
-// RUN:   mlir-opt %s -pass-pipeline="async-to-async-runtime,builtin.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,builtin.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
+// RUN:   mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:     -e main -entry-point-result=void -O0                               \
 // RUN:     -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext  \

diff  --git a/mlir/test/mlir-cpu-runner/async-value.mlir b/mlir/test/mlir-cpu-runner/async-value.mlir
index 5d8074f233827..2ce6999cbe221 100644
--- a/mlir/test/mlir-cpu-runner/async-value.mlir
+++ b/mlir/test/mlir-cpu-runner/async-value.mlir
@@ -1,4 +1,4 @@
-// RUN:   mlir-opt %s -pass-pipeline="async-to-async-runtime,builtin.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,builtin.func(convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \
+// RUN:   mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:     -e main -entry-point-result=void -O0                               \
 // RUN:     -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext  \

diff  --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir
index 0e820ed47357f..7b14d2c0848d3 100644
--- a/mlir/test/mlir-cpu-runner/async.mlir
+++ b/mlir/test/mlir-cpu-runner/async.mlir
@@ -1,4 +1,4 @@
-// RUN:   mlir-opt %s -pass-pipeline="async-to-async-runtime,builtin.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,builtin.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,builtin.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
+// RUN:   mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:     -e main -entry-point-result=void -O0                               \
 // RUN:     -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext  \

diff  --git a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
index 944d04d6fa189..ea5aab0b605c0 100644
--- a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
+++ b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}" -reconcile-unrealized-casts | mlir-cpu-runner -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}" -reconcile-unrealized-casts | mlir-cpu-runner -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s
 
 // Verify bare pointer memref calling convention. `simple_add1_add2_test`
 // gets two 2xf32 memrefs, adds 1.0f to the first one and 2.0f to the second

diff  --git a/mlir/test/mlir-cpu-runner/copy.mlir b/mlir/test/mlir-cpu-runner/copy.mlir
index df42845571e4e..87db27cf4bf1a 100644
--- a/mlir/test/mlir-cpu-runner/copy.mlir
+++ b/mlir/test/mlir-cpu-runner/copy.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \
 // RUN: | mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/mlir-cpu-runner/global-memref.mlir b/mlir/test/mlir-cpu-runner/global-memref.mlir
index 37c6e6f48f29b..510cbb9371204 100644
--- a/mlir/test/mlir-cpu-runner/global-memref.mlir
+++ b/mlir/test/mlir-cpu-runner/global-memref.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
 
 func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
 func private @print_memref_i32(memref<*xi32>) attributes { llvm.emit_c_interface }

diff  --git a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
index c4674c1d9cb31..4eab6e4b20ca0 100644
--- a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
+++ b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
@@ -1,4 +1,4 @@
-// RUN:   mlir-opt %s -pass-pipeline="builtin.func(test-math-polynomial-approximation,convert-arith-to-llvm),convert-vector-to-llvm,builtin.func(convert-math-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
+// RUN:   mlir-opt %s -pass-pipeline="func.func(test-math-polynomial-approximation,convert-arith-to-llvm),convert-vector-to-llvm,func.func(convert-math-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:     -e main -entry-point-result=void -O0                               \
 // RUN:     -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext  \

diff  --git a/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir b/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir
index e26730ccacaab..019cb8ffc3eb9 100644
--- a/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir
+++ b/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-scf-to-cf),convert-memref-to-llvm,builtin.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf),convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
 // RUN: | mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/mlir-cpu-runner/memref-reshape.mlir b/mlir/test/mlir-cpu-runner/memref-reshape.mlir
index 4fa312b7ed7f5..ea422ce89ffa2 100644
--- a/mlir/test/mlir-cpu-runner/memref-reshape.mlir
+++ b/mlir/test/mlir-cpu-runner/memref-reshape.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-scf-to-cf,memref-expand,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,memref-expand,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \
 // RUN: | mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
index 0fff711fe97e3..b346223f09f40 100644
--- a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
+++ b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-linalg-to-loops,lower-affine,convert-scf-to-cf,convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-linalg-to-loops,lower-affine,convert-scf-to-cf,convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
 
 func @main() {
   %A = memref.alloc() : memref<16x16xf32>

diff  --git a/mlir/test/mlir-cpu-runner/unranked-memref.mlir b/mlir/test/mlir-cpu-runner/unranked-memref.mlir
index 91163511ac9f2..d8b757229ecec 100644
--- a/mlir/test/mlir-cpu-runner/unranked-memref.mlir
+++ b/mlir/test/mlir-cpu-runner/unranked-memref.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" |        \
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" |        \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
 

diff  --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir
index a91abf1ddbf11..d8656fe698a13 100644
--- a/mlir/test/mlir-cpu-runner/utils.mlir
+++ b/mlir/test/mlir-cpu-runner/utils.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D
 
 func @print_0d() {
   %f = arith.constant 2.00000e+00 : f32

diff  --git a/mlir/test/mlir-lsp-server/diagnostics.test b/mlir/test/mlir-lsp-server/diagnostics.test
index 6c84e2133a767..fc6f251a535ba 100644
--- a/mlir/test/mlir-lsp-server/diagnostics.test
+++ b/mlir/test/mlir-lsp-server/diagnostics.test
@@ -12,7 +12,7 @@
 // CHECK-NEXT:     "diagnostics": [
 // CHECK-NEXT:       {
 // CHECK-NEXT:         "category": "Parse Error",
-// CHECK-NEXT:         "message": "custom op 'builtin.func' expected valid '@'-identifier for symbol name",
+// CHECK-NEXT:         "message": "custom op 'func.func' expected valid '@'-identifier for symbol name",
 // CHECK-NEXT:         "range": {
 // CHECK-NEXT:           "end": {
 // CHECK-NEXT:             "character": 7,

diff  --git a/mlir/test/mlir-lsp-server/hover.test b/mlir/test/mlir-lsp-server/hover.test
index cf981100f3209..4aedf9caeba76 100644
--- a/mlir/test/mlir-lsp-server/hover.test
+++ b/mlir/test/mlir-lsp-server/hover.test
@@ -66,7 +66,7 @@
 // CHECK-NEXT:  "result": {
 // CHECK-NEXT:    "contents": {
 // CHECK-NEXT:      "kind": "markdown",
-// CHECK-NEXT:      "value": "Operation: \"builtin.func\"\n\nBlock #1\n\nPredecessors: <Block #0>\n\n"
+// CHECK-NEXT:      "value": "Operation: \"func.func\"\n\nBlock #1\n\nPredecessors: <Block #0>\n\n"
 // CHECK-NEXT:    },
 // CHECK-NEXT:    "range": {
 // CHECK-NEXT:      "end": {
@@ -90,7 +90,7 @@
 // CHECK-NEXT:  "result": {
 // CHECK-NEXT:    "contents": {
 // CHECK-NEXT:      "kind": "markdown",
-// CHECK-NEXT:      "value": "Operation: \"builtin.func\"\n\nBlock: <Block #0>\n\nArgument #0\n\nType: `i1`\n\n"
+// CHECK-NEXT:      "value": "Operation: \"func.func\"\n\nBlock: <Block #0>\n\nArgument #0\n\nType: `i1`\n\n"
 // CHECK-NEXT:    },
 // CHECK-NEXT:    "range": {
 // CHECK-NEXT:      "end": {
@@ -114,7 +114,7 @@
 // CHECK-NEXT:  "result": {
 // CHECK-NEXT:    "contents": {
 // CHECK-NEXT:      "kind": "markdown",
-// CHECK-NEXT:      "value": "\"builtin.func\" : public @foo\n\nGeneric Form:\n\n```mlir\n\"builtin.func\"() ({\n}) {sym_name = \"foo\", type = (i1) -> ()} : () -> ()\n```\n"
+// CHECK-NEXT:      "value": "\"func.func\" : public @foo\n\nGeneric Form:\n\n```mlir\n\"func.func\"() ({\n}) {sym_name = \"foo\", type = (i1) -> ()} : () -> ()\n```\n"
 // CHECK-NEXT:    },
 // CHECK-NEXT:    "range": {
 // CHECK-NEXT:      "end": {
@@ -138,7 +138,7 @@
 // CHECK-NEXT:  "result": {
 // CHECK-NEXT:    "contents": {
 // CHECK-NEXT:      "kind": "markdown",
-// CHECK-NEXT:      "value": "\"builtin.func\" : public @foo\n\nGeneric Form:\n\n```mlir\n\"builtin.func\"() ({\n}) {sym_name = \"foo\", type = (i1) -> ()} : () -> ()\n```\n"
+// CHECK-NEXT:      "value": "\"func.func\" : public @foo\n\nGeneric Form:\n\n```mlir\n\"func.func\"() ({\n}) {sym_name = \"foo\", type = (i1) -> ()} : () -> ()\n```\n"
 // CHECK-NEXT:    },
 // CHECK-NEXT:    "range": {
 // CHECK-NEXT:      "end": {

diff  --git a/mlir/test/mlir-opt/async.mlir b/mlir/test/mlir-opt/async.mlir
index af06c397a6381..ba2f46094eab4 100644
--- a/mlir/test/mlir-opt/async.mlir
+++ b/mlir/test/mlir-opt/async.mlir
@@ -1,6 +1,6 @@
 // Check if mlir marks the corresponding function with required coroutine attribute.
 //
-// RUN:   mlir-opt %s -pass-pipeline="async-to-async-runtime,builtin.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,builtin.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,builtin.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
+// RUN:   mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \
 // RUN: | FileCheck %s
 
 // CHECK: llvm.func @async_execute_fn{{.*}}attributes{{.*}}"coroutine.presplit", "0"

diff  --git a/mlir/test/mlir-pdll/Parser/expr-failure.pdll b/mlir/test/mlir-pdll/Parser/expr-failure.pdll
index 08174de7cf160..4a766c817d23f 100644
--- a/mlir/test/mlir-pdll/Parser/expr-failure.pdll
+++ b/mlir/test/mlir-pdll/Parser/expr-failure.pdll
@@ -206,7 +206,7 @@ Pattern {
 Pattern {
   // CHECK: expected `)` after operation operand list
   let value: Value;
-  let foo = op<builtin.func>(value<;
+  let foo = op<func.func>(value<;
 }
 
 // -----
@@ -214,7 +214,7 @@ Pattern {
 Pattern {
   // CHECK: unable to convert expression of type `Attr` to the expected type of `ValueRange`
   let attr: Attr;
-  let foo = op<builtin.func>(attr);
+  let foo = op<func.func>(attr);
 }
 
 // -----

diff  --git a/mlir/test/mlir-pdll/Parser/stmt-failure.pdll b/mlir/test/mlir-pdll/Parser/stmt-failure.pdll
index 4220259e60ee8..aaf45a9603352 100644
--- a/mlir/test/mlir-pdll/Parser/stmt-failure.pdll
+++ b/mlir/test/mlir-pdll/Parser/stmt-failure.pdll
@@ -92,7 +92,7 @@ Pattern {
 
 Pattern {
   // CHECK: expected `>` after operation name
-  let foo: Op<builtin.func<;
+  let foo: Op<func.func<;
 }
 
 // -----

diff  --git a/mlir/test/python/dialects/builtin.py b/mlir/test/python/dialects/builtin.py
index d07c094850a4a..94e29892ba7b1 100644
--- a/mlir/test/python/dialects/builtin.py
+++ b/mlir/test/python/dialects/builtin.py
@@ -22,33 +22,33 @@ def testFromPyFunc():
     with InsertionPoint(m.body):
       # CHECK-LABEL: func @unary_return(%arg0: f64) -> f64
       # CHECK: return %arg0 : f64
-      @builtin.FuncOp.from_py_func(f64)
+      @func.FuncOp.from_py_func(f64)
       def unary_return(a):
         return a
 
       # CHECK-LABEL: func @binary_return(%arg0: f32, %arg1: f64) -> (f32, f64)
       # CHECK: return %arg0, %arg1 : f32, f64
-      @builtin.FuncOp.from_py_func(f32, f64)
+      @func.FuncOp.from_py_func(f32, f64)
       def binary_return(a, b):
         return a, b
 
       # CHECK-LABEL: func @none_return(%arg0: f32, %arg1: f64)
       # CHECK: return
-      @builtin.FuncOp.from_py_func(f32, f64)
+      @func.FuncOp.from_py_func(f32, f64)
       def none_return(a, b):
         pass
 
       # CHECK-LABEL: func @call_unary
       # CHECK: %0 = call @unary_return(%arg0) : (f64) -> f64
       # CHECK: return %0 : f64
-      @builtin.FuncOp.from_py_func(f64)
+      @func.FuncOp.from_py_func(f64)
       def call_unary(a):
         return unary_return(a)
 
       # CHECK-LABEL: func @call_binary
       # CHECK: %0:2 = call @binary_return(%arg0, %arg1) : (f32, f64) -> (f32, f64)
       # CHECK: return %0#0, %0#1 : f32, f64
-      @builtin.FuncOp.from_py_func(f32, f64)
+      @func.FuncOp.from_py_func(f32, f64)
       def call_binary(a, b):
         return binary_return(a, b)
 
@@ -56,41 +56,41 @@ def call_binary(a, b):
       # CHECK-LABEL: func @single_result_op
       # CHECK: %0 = "custom.op1"() : () -> f32
       # CHECK: return %0 : f32
-      @builtin.FuncOp.from_py_func()
+      @func.FuncOp.from_py_func()
       def single_result_op():
         return Operation.create("custom.op1", results=[f32])
 
       # CHECK-LABEL: func @call_none
       # CHECK: call @none_return(%arg0, %arg1) : (f32, f64) -> ()
       # CHECK: return
-      @builtin.FuncOp.from_py_func(f32, f64)
+      @func.FuncOp.from_py_func(f32, f64)
       def call_none(a, b):
         return none_return(a, b)
 
       ## Variants and optional feature tests.
       # CHECK-LABEL: func @from_name_arg
-      @builtin.FuncOp.from_py_func(f32, f64, name="from_name_arg")
+      @func.FuncOp.from_py_func(f32, f64, name="from_name_arg")
       def explicit_name(a, b):
         return b
 
-      @builtin.FuncOp.from_py_func(f32, f64)
+      @func.FuncOp.from_py_func(f32, f64)
       def positional_func_op(a, b, func_op):
-        assert isinstance(func_op, builtin.FuncOp)
+        assert isinstance(func_op, func.FuncOp)
         return b
 
-      @builtin.FuncOp.from_py_func(f32, f64)
+      @func.FuncOp.from_py_func(f32, f64)
       def kw_func_op(a, b=None, func_op=None):
-        assert isinstance(func_op, builtin.FuncOp)
+        assert isinstance(func_op, func.FuncOp)
         return b
 
-      @builtin.FuncOp.from_py_func(f32, f64)
+      @func.FuncOp.from_py_func(f32, f64)
       def kwargs_func_op(a, b=None, **kwargs):
-        assert isinstance(kwargs["func_op"], builtin.FuncOp)
+        assert isinstance(kwargs["func_op"], func.FuncOp)
         return b
 
       # CHECK-LABEL: func @explicit_results(%arg0: f32, %arg1: f64) -> f64
       # CHECK: return %arg1 : f64
-      @builtin.FuncOp.from_py_func(f32, f64, results=[f64])
+      @func.FuncOp.from_py_func(f32, f64, results=[f64])
       def explicit_results(a, b):
         func.ReturnOp([b])
 
@@ -107,7 +107,7 @@ def testFromPyFuncErrors():
     with InsertionPoint(m.body):
       try:
 
-        @builtin.FuncOp.from_py_func(f64, results=[f64])
+        @func.FuncOp.from_py_func(f64, results=[f64])
         def unary_return(a):
           return a
       except AssertionError as e:
@@ -125,7 +125,7 @@ def testBuildFuncOp():
     f32 = F32Type.get()
     tensor_type = RankedTensorType.get((2, 3, 4), f32)
     with InsertionPoint.at_block_begin(m.body):
-      f = builtin.FuncOp(name="some_func",
+      f = func.FuncOp(name="some_func",
                             type=FunctionType.get(
                                 inputs=[tensor_type, tensor_type],
                                 results=[tensor_type]),
@@ -156,7 +156,7 @@ def testBuildFuncOp():
         print(e)
 
       # Try the callback builder and passing type as tuple.
-      f = builtin.FuncOp(name="some_other_func",
+      f = func.FuncOp(name="some_other_func",
                             type=([tensor_type, tensor_type], [tensor_type]),
                             visibility="nested",
                             body_builder=lambda f: func.ReturnOp(
@@ -181,7 +181,7 @@ def testFuncArgumentAccess():
     f32 = F32Type.get()
     f64 = F64Type.get()
     with InsertionPoint(module.body):
-      f = builtin.FuncOp("some_func", ([f32, f32], [f32, f32]))
+      f = func.FuncOp("some_func", ([f32, f32], [f32, f32]))
       with InsertionPoint(f.add_entry_block()):
         func.ReturnOp(f.arguments)
       f.arg_attrs = ArrayAttr.get([
@@ -196,7 +196,7 @@ def testFuncArgumentAccess():
           DictAttr.get({"custom_dialect.res2": FloatAttr.get(f64, 256.0)})
       ])
 
-      other = builtin.FuncOp("other_func", ([f32, f32], []))
+      other = func.FuncOp("other_func", ([f32, f32], []))
       with InsertionPoint(other.add_entry_block()):
         func.ReturnOp([])
       other.arg_attrs = [

diff  --git a/mlir/test/python/dialects/func.py b/mlir/test/python/dialects/func.py
index c4d6417f0558c..3be9cac2c1925 100644
--- a/mlir/test/python/dialects/func.py
+++ b/mlir/test/python/dialects/func.py
@@ -77,14 +77,14 @@ def testConstantIndexOp():
 # CHECK-LABEL: TEST: testFunctionCalls
 @constructAndPrintInModule
 def testFunctionCalls():
-  foo = builtin.FuncOp("foo", ([], []))
+  foo = func.FuncOp("foo", ([], []))
   foo.sym_visibility = StringAttr.get("private")
-  bar = builtin.FuncOp("bar", ([], [IndexType.get()]))
+  bar = func.FuncOp("bar", ([], [IndexType.get()]))
   bar.sym_visibility = StringAttr.get("private")
-  qux = builtin.FuncOp("qux", ([], [F32Type.get()]))
+  qux = func.FuncOp("qux", ([], [F32Type.get()]))
   qux.sym_visibility = StringAttr.get("private")
 
-  with InsertionPoint(builtin.FuncOp("caller", ([], [])).add_entry_block()):
+  with InsertionPoint(func.FuncOp("caller", ([], [])).add_entry_block()):
     func.CallOp(foo, [])
     func.CallOp([IndexType.get()], "bar", [])
     func.CallOp([F32Type.get()], FlatSymbolRefAttr.get("qux"), [])

diff  --git a/mlir/test/python/dialects/linalg/opdsl/emit_convolution.py b/mlir/test/python/dialects/linalg/opdsl/emit_convolution.py
index 6e3ba8fd72c9c..ebe2c0f33a286 100644
--- a/mlir/test/python/dialects/linalg/opdsl/emit_convolution.py
+++ b/mlir/test/python/dialects/linalg/opdsl/emit_convolution.py
@@ -46,7 +46,7 @@ def conv_poly(
     # CHECK-NEXT:   %[[SUM:.+]] = arith.addi %[[OUT]], %[[PROD]] : i32
     # CHECK-NEXT:   linalg.yield %[[SUM]] : i32
     # CHECK-NEXT: -> tensor<1x2x4x1xi32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((1, 4, 16, 1), f32),
         RankedTensorType.get((2, 2, 1), f32),
         RankedTensorType.get((1, 2, 4, 1), i32))

diff  --git a/mlir/test/python/dialects/linalg/opdsl/emit_fill.py b/mlir/test/python/dialects/linalg/opdsl/emit_fill.py
index 55ca50be5fad1..1f840b09b0085 100644
--- a/mlir/test/python/dialects/linalg/opdsl/emit_fill.py
+++ b/mlir/test/python/dialects/linalg/opdsl/emit_fill.py
@@ -35,7 +35,7 @@ def fill_rank_zero_poly(I=TensorDef(T1), O=TensorDef(U, output=True)):
     # CHECK: linalg.generic
     # CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP0]]
     # CHECK-SAME: iterator_types = []
-    @builtin.FuncOp.from_py_func(f32, RankedTensorType.get([], f32))
+    @func.FuncOp.from_py_func(f32, RankedTensorType.get([], f32))
     def test_fill_0d(value, init_result):
       return fill_poly(value, outs=[init_result])
 
@@ -43,7 +43,7 @@ def test_fill_0d(value, init_result):
     # CHECK: linalg.generic
     # CHECK-SAME: indexing_maps = [#[[$MAP1]], #[[$MAP2]]]
     # CHECK-SAME: iterator_types = ["parallel", "parallel"]
-    @builtin.FuncOp.from_py_func(f32, RankedTensorType.get([4, 16], f32))
+    @func.FuncOp.from_py_func(f32, RankedTensorType.get([4, 16], f32))
     def test_fill_2d(value, init_result):
       return fill_poly(value, outs=[init_result])
 
@@ -51,7 +51,7 @@ def test_fill_2d(value, init_result):
     # CHECK: linalg.generic
     # CHECK-SAME: indexing_maps = [#[[$MAP3]], #[[$MAP4]]]
     # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"]
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get([], f32), RankedTensorType.get([4, 8, 16], f32))
     def test_fill_rank_zero_3d(input, init_result):
       return fill_rank_zero_poly(input, outs=[init_result])

diff  --git a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
index b3b4537874282..add7d6abc2d1c 100644
--- a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
+++ b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
@@ -56,7 +56,7 @@ def matmul_poly(
     # CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
     # CHECK-SAME: ins(%[[A]], %[[B]]
     # CHECK-SAME: outs(%[[INITC]]
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8), f32))
     def test_matmul_mono(lhs, rhs):
       init_result = linalg.InitTensorOp([4, 8], f32)
@@ -70,7 +70,7 @@ def test_matmul_mono(lhs, rhs):
     # CHECK-NEXT:   %[[ADD:.+]] = arith.addi %[[C_ARG]], %[[MUL]] : i32
     # CHECK-NEXT:   linalg.yield %[[ADD]] : i32
     # CHECK-NEXT: -> tensor<4x8xi32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8),
         RankedTensorType.get((4, 8), i32))
     def test_i8i8i32_matmul(lhs, rhs, init_result):
@@ -79,7 +79,7 @@ def test_i8i8i32_matmul(lhs, rhs, init_result):
     # CHECK-LABEL: @test_i8i8i32_matmul_unsigned
     # CHECK:   = arith.extui
     # CHECK:   = arith.extui
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8),
         RankedTensorType.get((4, 8), i32))
     def test_i8i8i32_matmul_unsigned(lhs, rhs, init_result):
@@ -94,7 +94,7 @@ def test_i8i8i32_matmul_unsigned(lhs, rhs, init_result):
     # CHECK-NEXT:   %[[ADD:.+]] = arith.addi %[[C_ARG]], %[[MUL]] : i32
     # CHECK-NEXT:   linalg.yield %[[ADD]] : i32
     # CHECK-NEXT: -> tensor<4x8xi32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i16),
         RankedTensorType.get((4, 8), i32))
     def test_i8i16i32_matmul(lhs, rhs, init_result):
@@ -108,7 +108,7 @@ def test_i8i16i32_matmul(lhs, rhs, init_result):
     # CHECK-NEXT:   %[[ADD:.+]] = arith.addi %[[C_ARG]], %[[MUL]] : i16
     # CHECK-NEXT:   linalg.yield %[[ADD]] : i16
     # CHECK-NEXT: -> tensor<4x8xi16>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), i32), RankedTensorType.get((16, 8), i32),
         RankedTensorType.get((4, 8), i16))
     def test_i32i32i16_matmul(lhs, rhs, init_result):
@@ -122,7 +122,7 @@ def test_i32i32i16_matmul(lhs, rhs, init_result):
     # CHECK-NEXT:   %[[ADD:.+]] = arith.addf %[[C_ARG]], %[[MUL]] : f32
     # CHECK-NEXT:   linalg.yield %[[ADD]] : f32
     # CHECK-NEXT: -> tensor<4x8xf32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8),
         RankedTensorType.get((4, 8), f32))
     def test_i8i8f32_matmul(lhs, rhs, init_result):
@@ -131,7 +131,7 @@ def test_i8i8f32_matmul(lhs, rhs, init_result):
     # CHECK-LABEL: @test_i8i8f32_matmul_unsigned
     # CHECK:   = arith.uitofp
     # CHECK:   = arith.uitofp
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8),
         RankedTensorType.get((4, 8), f32))
     def test_i8i8f32_matmul_unsigned(lhs, rhs, init_result):
@@ -146,7 +146,7 @@ def test_i8i8f32_matmul_unsigned(lhs, rhs, init_result):
     # CHECK-NEXT:   %[[ADD:.+]] = arith.addf %[[C_ARG]], %[[MUL]] : f32
     # CHECK-NEXT:   linalg.yield %[[ADD]] : f32
     # CHECK-NEXT: -> tensor<4x8xf32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f16), RankedTensorType.get((16, 8), f16),
         RankedTensorType.get((4, 8), f32))
     def test_f16f16f32_matmul(lhs, rhs, init_result):
@@ -160,7 +160,7 @@ def test_f16f16f32_matmul(lhs, rhs, init_result):
     # CHECK-NEXT:   %[[ADD:.+]] = arith.addf %[[C_ARG]], %[[MUL]] : f32
     # CHECK-NEXT:   linalg.yield %[[ADD]] : f32
     # CHECK-NEXT: -> tensor<4x8xf32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f64), RankedTensorType.get((16, 8), f64),
         RankedTensorType.get((4, 8), f32))
     def test_f64f64f32_matmul(lhs, rhs, init_result):

diff  --git a/mlir/test/python/dialects/linalg/opdsl/emit_misc.py b/mlir/test/python/dialects/linalg/opdsl/emit_misc.py
index e57a49bec7b82..2d045125f2858 100644
--- a/mlir/test/python/dialects/linalg/opdsl/emit_misc.py
+++ b/mlir/test/python/dialects/linalg/opdsl/emit_misc.py
@@ -54,7 +54,7 @@ def non_default_op_name(I=TensorDef(T, S.N), O=TensorDef(T, S.N, output=True)):
     # CHECK-DAG:    %[[CST1_CAST:.+]] = arith.truncf %[[CST1]] : f64 to f32
     # CHECK-DAG:    %[[SUM:.+]] = arith.addf %[[CST0_CAST]], %[[CST1_CAST]] : f32
     # CHECK-NEXT:   linalg.yield %[[SUM]] : f32
-    @builtin.FuncOp.from_py_func(RankedTensorType.get((4, 16), f32))
+    @func.FuncOp.from_py_func(RankedTensorType.get((4, 16), f32))
     def test_f32_const(init_result):
       return test_const(outs=[init_result])
 
@@ -65,7 +65,7 @@ def test_f32_const(init_result):
     # CHECK-DAG:    %[[IDX1_CAST:.+]] = arith.index_cast %[[IDX1]] : index to i32
     # CHECK-DAG:    %[[SUM:.+]] = arith.addi %[[IDX0_CAST]], %[[IDX1_CAST]] : i32
     # CHECK-NEXT:   linalg.yield %[[SUM]] : i32
-    @builtin.FuncOp.from_py_func(RankedTensorType.get((4, 16), i32))
+    @func.FuncOp.from_py_func(RankedTensorType.get((4, 16), i32))
     def test_i32_index(init_result):
       return test_index(outs=[init_result])
 
@@ -74,7 +74,7 @@ def test_i32_index(init_result):
     # CHECK-NEXT:   %[[EXP:.+]] = math.exp %[[IN]] : f32
     # CHECK-NEXT:   linalg.yield %[[EXP]] : f32
     # CHECK-NEXT: -> tensor<4x16xf32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f32), RankedTensorType.get((4, 16), f32))
     def test_f32_elemwise_exp(input, init_result):
       return elemwise_unary_poly(input, outs=[init_result], fun=UnaryFn.exp)
@@ -84,7 +84,7 @@ def test_f32_elemwise_exp(input, init_result):
     # CHECK-NEXT:   %[[LOG:.+]] = math.log %[[IN]] : f32
     # CHECK-NEXT:   linalg.yield %[[LOG]] : f32
     # CHECK-NEXT: -> tensor<4x16xf32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f32), RankedTensorType.get((4, 16), f32))
     def test_f32_elemwise_log(input, init_result):
       return elemwise_unary_poly(input, outs=[init_result], fun=UnaryFn.log)
@@ -94,7 +94,7 @@ def test_f32_elemwise_log(input, init_result):
     # CHECK-NEXT:   %[[EXP:.+]] = math.abs %[[IN]] : f32
     # CHECK-NEXT:   linalg.yield %[[EXP]] : f32
     # CHECK-NEXT: -> tensor<4x16xf32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f32), RankedTensorType.get((4, 16), f32))
     def test_f32_elemwise_abs(input, init_result):
       return elemwise_unary_poly(input, outs=[init_result], fun=UnaryFn.abs)
@@ -104,7 +104,7 @@ def test_f32_elemwise_abs(input, init_result):
     # CHECK-NEXT:   %[[EXP:.+]] = math.ceil %[[IN]] : f32
     # CHECK-NEXT:   linalg.yield %[[EXP]] : f32
     # CHECK-NEXT: -> tensor<4x16xf32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f32), RankedTensorType.get((4, 16), f32))
     def test_f32_elemwise_ceil(input, init_result):
       return elemwise_unary_poly(input, outs=[init_result], fun=UnaryFn.ceil)
@@ -114,7 +114,7 @@ def test_f32_elemwise_ceil(input, init_result):
     # CHECK-NEXT:   %[[EXP:.+]] = math.floor %[[IN]] : f32
     # CHECK-NEXT:   linalg.yield %[[EXP]] : f32
     # CHECK-NEXT: -> tensor<4x16xf32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f32), RankedTensorType.get((4, 16), f32))
     def test_f32_elemwise_floor(input, init_result):
       return elemwise_unary_poly(input, outs=[init_result], fun=UnaryFn.floor)
@@ -124,14 +124,14 @@ def test_f32_elemwise_floor(input, init_result):
     # CHECK-NEXT:   %[[EXP:.+]] = arith.negf %[[IN]] : f32
     # CHECK-NEXT:   linalg.yield %[[EXP]] : f32
     # CHECK-NEXT: -> tensor<4x16xf32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f32), RankedTensorType.get((4, 16), f32))
     def test_f32_elemwise_neg(input, init_result):
       return elemwise_unary_poly(input, outs=[init_result], fun=UnaryFn.negf)
 
     # Just check that we don't assert out on name mismatch.
     # CHECK-LABEL: @test_non_default_op_name
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((42,), f32), RankedTensorType.get((42,), f32))
     def test_non_default_op_name(input, init_result):
       return non_default_op_name(input, outs=[init_result])

diff  --git a/mlir/test/python/dialects/linalg/opdsl/emit_pooling.py b/mlir/test/python/dialects/linalg/opdsl/emit_pooling.py
index b97b0188e0e8d..2fd63382c4ec3 100644
--- a/mlir/test/python/dialects/linalg/opdsl/emit_pooling.py
+++ b/mlir/test/python/dialects/linalg/opdsl/emit_pooling.py
@@ -46,7 +46,7 @@ def pooling_poly(
     # CHECK-NEXT:   %[[MAX:.+]] = arith.maxsi %[[OUT]], %[[IN_CAST:.+]] : i32
     # CHECK-NEXT:   linalg.yield %[[MAX]] : i32
     # CHECK-NEXT: -> tensor<1x2x4x1xi32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((1, 4, 16, 1), f32),
         RankedTensorType.get((2, 2), f32),
         RankedTensorType.get((1, 2, 4, 1), i32))
@@ -57,7 +57,7 @@ def test_f32i32_max_pooling(input, shape, init_result):
     # CHECK-LABEL: @test_f32i32_max_unsigned_pooling
     # CHECK:   = arith.fptoui
     # CHECK:   = arith.maxui
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((1, 4, 16, 1), f32),
         RankedTensorType.get((2, 2), f32),
         RankedTensorType.get((1, 2, 4, 1), i32))
@@ -79,7 +79,7 @@ def test_f32i32_max_unsigned_pooling(input, shape, init_result):
     # CHECK-NEXT:   %[[MAX:.+]] = arith.maxf %[[OUT]], %[[IN:.+]] : f32
     # CHECK-NEXT:   linalg.yield %[[MAX]] : f32
     # CHECK-NEXT: -> tensor<1x2x4x1xf32>
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((1, 4, 16, 1), f32),
         RankedTensorType.get((2, 2), f32),
         RankedTensorType.get((1, 2, 4, 1), f32))
@@ -90,7 +90,7 @@ def test_f32f32_max_pooling(input, shape, init_result):
     # CHECK-LABEL: @test_f32i32_min_pooling
     # CHECK:   = arith.fptosi
     # CHECK:   = arith.minsi
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((1, 4, 16, 1), f32),
         RankedTensorType.get((2, 2), f32),
         RankedTensorType.get((1, 2, 4, 1), i32))
@@ -106,7 +106,7 @@ def test_f32i32_min_pooling(input, shape, init_result):
     # CHECK-LABEL: @test_f32i32_min_unsigned_pooling
     # CHECK:   = arith.fptoui
     # CHECK:   = arith.minui
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((1, 4, 16, 1), f32),
         RankedTensorType.get((2, 2), f32),
         RankedTensorType.get((1, 2, 4, 1), i32))
@@ -122,7 +122,7 @@ def test_f32i32_min_unsigned_pooling(input, shape, init_result):
 
     # CHECK-LABEL: @test_f32f32_min_pooling
     # CHECK:   = arith.minf
-    @builtin.FuncOp.from_py_func(
+    @func.FuncOp.from_py_func(
         RankedTensorType.get((1, 4, 16, 1), f32),
         RankedTensorType.get((2, 2), f32),
         RankedTensorType.get((1, 2, 4, 1), f32))

diff  --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py
index 494213ef2777f..f11c059d94a9d 100644
--- a/mlir/test/python/dialects/linalg/ops.py
+++ b/mlir/test/python/dialects/linalg/ops.py
@@ -24,19 +24,19 @@ def testInitTensor():
     with InsertionPoint(module.body):
       # CHECK-LABEL: func @static_sizes
       # CHECK: %0 = linalg.init_tensor [3, 4] : tensor<3x4xf32>
-      @builtin.FuncOp.from_py_func()
+      @func.FuncOp.from_py_func()
       def static_sizes():
         return linalg.InitTensorOp([3, 4], f32)
 
       # CHECK-LABEL: func @dynamic_sizes
       # CHECK: %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
-      @builtin.FuncOp.from_py_func(IndexType.get(), IndexType.get())
+      @func.FuncOp.from_py_func(IndexType.get(), IndexType.get())
       def dynamic_sizes(d0, d1):
         return linalg.InitTensorOp([d0, d1], f32)
 
       # CHECK-LABEL: func @zero_d
       # CHECK: %0 = linalg.init_tensor [] : tensor<f32>
-      @builtin.FuncOp.from_py_func()
+      @func.FuncOp.from_py_func()
       def zero_d():
         return linalg.InitTensorOp([], f32)
 
@@ -67,7 +67,7 @@ def testFill():
       #  CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32
       #  CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : tensor<12x?xf32>) -> tensor<12x?xf32>
       #  CHECK-NEXT: return %[[RES]] : tensor<12x?xf32>
-      @builtin.FuncOp.from_py_func(RankedTensorType.get((12, -1), f32))
+      @func.FuncOp.from_py_func(RankedTensorType.get((12, -1), f32))
       def fill_tensor(out):
         zero = arith.ConstantOp(value=FloatAttr.get(f32, 0.), result=f32).result
         return linalg.fill(zero, outs=[out])
@@ -77,7 +77,7 @@ def fill_tensor(out):
       #  CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32
       #  CHECK-NEXT: linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : memref<12x?xf32>)
       #  CHECK-NEXT: return
-      @builtin.FuncOp.from_py_func(MemRefType.get((12, -1), f32))
+      @func.FuncOp.from_py_func(MemRefType.get((12, -1), f32))
       def fill_buffer(out):
         zero = arith.ConstantOp(value=FloatAttr.get(f32, 0.), result=f32).result
         linalg.fill(zero, outs=[out])
@@ -93,7 +93,7 @@ def testNamedStructuredOpCustomForm():
     f32 = F32Type.get()
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           RankedTensorType.get((4, 8), f32), RankedTensorType.get((4, 8), f32))
       def named_form(lhs, rhs):
         init_result = linalg.InitTensorOp([4, 8], f32)
@@ -127,7 +127,7 @@ def testNamedStructuredOpGenericForm():
     f32 = F32Type.get()
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8),
                                                                    f32))
       def named_form(lhs, rhs):
@@ -153,7 +153,7 @@ def testNamedStructuredAsGenericOp():
     f32 = F32Type.get()
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8),
                                                                    f32))
       def generic_form(lhs, rhs):
@@ -173,7 +173,7 @@ def testOpResultFromOtherOp():
     f32 = F32Type.get()
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8),
                                                                    f32))
       def pass_an_op_directly(arg0, arg1):

diff  --git a/mlir/test/python/dialects/math_dialect.py b/mlir/test/python/dialects/math_dialect.py
index 08ffe0c460321..04b6d848c7422 100644
--- a/mlir/test/python/dialects/math_dialect.py
+++ b/mlir/test/python/dialects/math_dialect.py
@@ -4,7 +4,7 @@
 # python package's math module (coming in from random.py).
 
 from mlir.ir import *
-import mlir.dialects.builtin as builtin
+import mlir.dialects.func as func
 import mlir.dialects.math as mlir_math
 
 def run(f):
@@ -17,7 +17,7 @@ def testMathOps():
   with Context() as ctx, Location.unknown():
     module = Module.create()
     with InsertionPoint(module.body):
-      @builtin.FuncOp.from_py_func(F32Type.get())
+      @func.FuncOp.from_py_func(F32Type.get())
       def emit_sqrt(arg):
         return mlir_math.SqrtOp(arg)
 

diff  --git a/mlir/test/python/dialects/scf.py b/mlir/test/python/dialects/scf.py
index c45931c7e76cd..4a618ff4eecc3 100644
--- a/mlir/test/python/dialects/scf.py
+++ b/mlir/test/python/dialects/scf.py
@@ -22,7 +22,7 @@ def constructAndPrintInModule(f):
 def testSimpleLoop():
   index_type = IndexType.get()
 
-  @builtin.FuncOp.from_py_func(index_type, index_type, index_type)
+  @func.FuncOp.from_py_func(index_type, index_type, index_type)
   def simple_loop(lb, ub, step):
     loop = scf.ForOp(lb, ub, step, [lb, lb])
     with InsertionPoint(loop.body):
@@ -41,7 +41,7 @@ def simple_loop(lb, ub, step):
 def testInductionVar():
   index_type = IndexType.get()
 
-  @builtin.FuncOp.from_py_func(index_type, index_type, index_type)
+  @func.FuncOp.from_py_func(index_type, index_type, index_type)
   def induction_var(lb, ub, step):
     loop = scf.ForOp(lb, ub, step, [lb])
     with InsertionPoint(loop.body):
@@ -57,9 +57,9 @@ def induction_var(lb, ub, step):
 @constructAndPrintInModule
 def testOpsAsArguments():
   index_type = IndexType.get()
-  callee = builtin.FuncOp(
+  callee = func.FuncOp(
       "callee", ([], [index_type, index_type]), visibility="private")
-  f = builtin.FuncOp("ops_as_arguments", ([], []))
+  f = func.FuncOp("ops_as_arguments", ([], []))
   with InsertionPoint(f.add_entry_block()):
     lb = arith.ConstantOp.create_index(0)
     ub = arith.ConstantOp.create_index(42)
@@ -89,7 +89,7 @@ def testIfWithoutElse():
   bool = IntegerType.get_signless(1)
   i32 = IntegerType.get_signless(32)
 
-  @builtin.FuncOp.from_py_func(bool)
+  @func.FuncOp.from_py_func(bool)
   def simple_if(cond):
     if_op = scf.IfOp(cond)
     with InsertionPoint(if_op.then_block):
@@ -111,7 +111,7 @@ def testIfWithElse():
   bool = IntegerType.get_signless(1)
   i32 = IntegerType.get_signless(32)
 
-  @builtin.FuncOp.from_py_func(bool)
+  @func.FuncOp.from_py_func(bool)
   def simple_if_else(cond):
     if_op = scf.IfOp(cond, [i32, i32], hasElse=True)
     with InsertionPoint(if_op.then_block):

diff  --git a/mlir/test/python/dialects/shape.py b/mlir/test/python/dialects/shape.py
index a798b85843dad..dcfb6fe66a8d2 100644
--- a/mlir/test/python/dialects/shape.py
+++ b/mlir/test/python/dialects/shape.py
@@ -2,7 +2,7 @@
 
 from mlir.ir import *
 import numpy as np
-import mlir.dialects.builtin as builtin
+import mlir.dialects.func as func
 import mlir.dialects.shape as shape
 
 
@@ -19,7 +19,7 @@ def testConstShape():
     module = Module.create()
     f32 = F32Type.get()
     with InsertionPoint(module.body):
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           RankedTensorType.get((12, -1), f32))
       def const_shape_tensor(arg):
         return shape.ConstShapeOp(

diff  --git a/mlir/test/python/dialects/tensor.py b/mlir/test/python/dialects/tensor.py
index 3754097badd73..3a470856abe9c 100644
--- a/mlir/test/python/dialects/tensor.py
+++ b/mlir/test/python/dialects/tensor.py
@@ -2,7 +2,7 @@
 
 from mlir.ir import *
 import mlir.dialects.arith as arith
-import mlir.dialects.builtin as builtin
+import mlir.dialects.func as func
 import mlir.dialects.tensor as tensor
 
 
@@ -21,7 +21,7 @@ def testDimOp():
     indexType = IndexType.get()
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(RankedTensorType.get((-1, -1), f32Type))
+      @func.FuncOp.from_py_func(RankedTensorType.get((-1, -1), f32Type))
       #      CHECK: func @tensor_static_dim
       # CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?xf32>
       #  CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index

diff  --git a/mlir/test/python/dialects/vector.py b/mlir/test/python/dialects/vector.py
index 5c2043eec84b6..c31579545e6e7 100644
--- a/mlir/test/python/dialects/vector.py
+++ b/mlir/test/python/dialects/vector.py
@@ -17,7 +17,7 @@ def testPrintOp():
   module = Module.create()
   with InsertionPoint(module.body):
 
-    @builtin.FuncOp.from_py_func(VectorType.get((12, 5), F32Type.get()))
+    @func.FuncOp.from_py_func(VectorType.get((12, 5), F32Type.get()))
     def print_vector(arg):
       return vector.PrintOp(arg)
 
@@ -40,7 +40,7 @@ def testTransferReadOp():
     mask_type = VectorType.get(vector_type.shape, IntegerType.get_signless(1))
     identity_map = AffineMap.get_identity(vector_type.rank)
     identity_map_attr = AffineMapAttr.get(identity_map)
-    f = builtin.FuncOp("transfer_read",
+    f = func.FuncOp("transfer_read",
                           ([memref_type, index_type,
                             F32Type.get(), mask_type], []))
     with InsertionPoint(f.add_entry_block()):

diff  --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py
index f58292ee7241d..18c8c46617b04 100644
--- a/mlir/test/python/integration/dialects/linalg/opsrun.py
+++ b/mlir/test/python/integration/dialects/linalg/opsrun.py
@@ -196,7 +196,7 @@ def transform(module, boilerplate):
   mod = Module.parse("\n".join([str(op) for op in ops]) + boilerplate)
 
   pm = PassManager.parse(
-      "builtin.func(convert-linalg-to-loops, lower-affine, " +
+      "func.func(convert-linalg-to-loops, lower-affine, " +
       "convert-math-to-llvm, convert-scf-to-cf, arith-expand, memref-expand), "
       + "convert-vector-to-llvm, convert-memref-to-llvm, convert-func-to-llvm," +
       "reconcile-unrealized-casts")
@@ -211,14 +211,14 @@ def test_elemwise_builtin():
     i8 = IntegerType.get_signless(8)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((), f32), MemRefType.get((4, 8), f32),
           MemRefType.get((4, 8), f32))
       def elemwise_exp_add_on_buffers(lhs, rhs, out):
         linalg.elemwise_unary(lhs, outs=[out])
         linalg.elemwise_binary(out, rhs, outs=[out])
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((), f32), MemRefType.get((4, 8), f32),
           MemRefType.get((4, 8), f32))
       def elemwise_log_mul_on_buffers(lhs, rhs, out):
@@ -250,14 +250,14 @@ def test_elemwise_generic():
     i8 = IntegerType.get_signless(8)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((), f32), MemRefType.get((4, 8), f32),
           MemRefType.get((4, 8), f32))
       def elemwise_exp_add_on_buffers(lhs, rhs, out):
         linalg.elemwise_unary(lhs, outs=[out], emit_generic=True)
         linalg.elemwise_binary(out, rhs, outs=[out], emit_generic=True)
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((), f32), MemRefType.get((4, 8), f32),
           MemRefType.get((4, 8), f32))
       def elemwise_log_mul_on_buffers(lhs, rhs, out):
@@ -291,13 +291,13 @@ def test_matmul_builtin():
     i8 = IntegerType.get_signless(8)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((4, 16), i8), MemRefType.get((16, 8), f32),
           MemRefType.get((4, 8), f32))
       def matmul_signed_on_buffers(lhs, rhs, out):
         linalg.matmul(lhs, rhs, outs=[out])
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((4, 16), i8), MemRefType.get((16, 8), f32),
           MemRefType.get((4, 8), f32))
       def matmul_unsigned_on_buffers(lhs, rhs, out):
@@ -328,13 +328,13 @@ def test_matmul_generic():
     i8 = IntegerType.get_signless(8)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((4, 16), i8), MemRefType.get((16, 8), f32),
           MemRefType.get((4, 8), f32))
       def matmul_signed_on_buffers(lhs, rhs, out):
         linalg.matmul(lhs, rhs, outs=[out], emit_generic=True)
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((4, 16), i8), MemRefType.get((16, 8), f32),
           MemRefType.get((4, 8), f32))
       def matmul_unsigned_on_buffers(lhs, rhs, out):
@@ -366,15 +366,15 @@ def test_fill_builtin():
     i32 = IntegerType.get_signless(32)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(f32, MemRefType.get([], i32))
+      @func.FuncOp.from_py_func(f32, MemRefType.get([], i32))
       def fill_0d_on_buffers(value, out):
         linalg.fill(value, outs=[out])
 
-      @builtin.FuncOp.from_py_func(f32, MemRefType.get([16], i32))
+      @func.FuncOp.from_py_func(f32, MemRefType.get([16], i32))
       def fill_1d_on_buffers(value, out):
         linalg.fill(value, outs=[out])
 
-      @builtin.FuncOp.from_py_func(f32, MemRefType.get([4, 16], i32))
+      @func.FuncOp.from_py_func(f32, MemRefType.get([4, 16], i32))
       def fill_2d_on_buffers(value, out):
         linalg.fill(value, outs=[out])
 
@@ -401,15 +401,15 @@ def test_fill_generic():
     i32 = IntegerType.get_signless(32)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(f32, MemRefType.get([], i32))
+      @func.FuncOp.from_py_func(f32, MemRefType.get([], i32))
       def fill_0d_on_buffers(value, out):
         linalg.fill(value, outs=[out], emit_generic=True)
 
-      @builtin.FuncOp.from_py_func(f32, MemRefType.get([16], i32))
+      @func.FuncOp.from_py_func(f32, MemRefType.get([16], i32))
       def fill_1d_on_buffers(value, out):
         linalg.fill(value, outs=[out], emit_generic=True)
 
-      @builtin.FuncOp.from_py_func(f32, MemRefType.get([4, 16], i32))
+      @func.FuncOp.from_py_func(f32, MemRefType.get([4, 16], i32))
       def fill_2d_on_buffers(value, out):
         linalg.fill(value, outs=[out], emit_generic=True)
 
@@ -436,7 +436,7 @@ def test_fill_rng_builtin():
     i32 = IntegerType.get_signless(32)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(f64, f64, i32, MemRefType.get((4, 16), i32))
+      @func.FuncOp.from_py_func(f64, f64, i32, MemRefType.get((4, 16), i32))
       def fill_rng_on_buffers(min, max, seed, out):
         linalg.fill_rng_2d(min, max, seed, outs=[out])
 
@@ -463,7 +463,7 @@ def test_fill_rng_generic():
     i32 = IntegerType.get_signless(32)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(f64, f64, i32, MemRefType.get((4, 16), i32))
+      @func.FuncOp.from_py_func(f64, f64, i32, MemRefType.get((4, 16), i32))
       def fill_rng_on_buffers(min, max, seed, out):
         linalg.fill_rng_2d(min, max, seed, outs=[out], emit_generic=True)
 
@@ -490,7 +490,7 @@ def test_max_pooling_builtin():
     i32 = IntegerType.get_signless(32)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
           MemRefType.get((1, 2, 4, 1), i32))
       def pooling_on_buffers(input, shape, output):
@@ -521,7 +521,7 @@ def test_max_pooling_generic():
     i32 = IntegerType.get_signless(32)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
           MemRefType.get((1, 2, 4, 1), i32))
       def pooling_on_buffers(input, shape, output):
@@ -557,7 +557,7 @@ def test_min_pooling_builtin():
     i32 = IntegerType.get_signless(32)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
           MemRefType.get((1, 2, 4, 1), i32))
       # Set the strides and use the default dilations.
@@ -587,7 +587,7 @@ def test_min_pooling_generic():
     i32 = IntegerType.get_signless(32)
     with InsertionPoint(module.body):
 
-      @builtin.FuncOp.from_py_func(
+      @func.FuncOp.from_py_func(
           MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
           MemRefType.get((1, 2, 4, 1), i32))
       # Set the strides and use the default dilations.

diff  --git a/mlir/test/python/ir/blocks.py b/mlir/test/python/ir/blocks.py
index 9ccffaeb2ec08..758b2874ebbcc 100644
--- a/mlir/test/python/ir/blocks.py
+++ b/mlir/test/python/ir/blocks.py
@@ -32,7 +32,7 @@ def testBlockCreation():
       f_type = FunctionType.get(
           [IntegerType.get_signless(32),
            IntegerType.get_signless(16)], [])
-      f_op = builtin.FuncOp("test", f_type)
+      f_op = func.FuncOp("test", f_type)
       entry_block = f_op.add_entry_block()
       i32_arg, i16_arg = entry_block.arguments
       successor_block = entry_block.create_after(i32_arg.type)
@@ -62,7 +62,7 @@ def testFirstBlockCreation():
     module = Module.create()
     f32 = F32Type.get()
     with InsertionPoint(module.body):
-      f = builtin.FuncOp("test", ([f32], []))
+      f = func.FuncOp("test", ([f32], []))
       entry_block = Block.create_at_start(f.operation.regions[0], [f32])
       with InsertionPoint(entry_block):
         func.ReturnOp([])

diff  --git a/mlir/test/python/ir/operation.py b/mlir/test/python/ir/operation.py
index 2b63f15422da7..8dca68385947d 100644
--- a/mlir/test/python/ir/operation.py
+++ b/mlir/test/python/ir/operation.py
@@ -113,9 +113,9 @@ def walk_operations(indent, op):
   # CHECK:       REGION 0:
   # CHECK:         BLOCK 0:
   # CHECK:           OP 0: %0 = "custom.addi"
-  # CHECK:           OP 0: parent builtin.func
+  # CHECK:           OP 0: parent func.func
   # CHECK:           OP 1: return
-  # CHECK:           OP 1: parent builtin.func
+  # CHECK:           OP 1: parent func.func
   walk_operations("", module.operation)
 
 
@@ -127,7 +127,7 @@ def testBlockAndRegionOwners():
   module = Module.parse(
       r"""
     builtin.module {
-      builtin.func @f() {
+      func.func @f() {
         func.return
       }
     }

diff  --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py
index 3531b0507a343..48d7e566466af 100644
--- a/mlir/test/python/pass_manager.py
+++ b/mlir/test/python/pass_manager.py
@@ -36,19 +36,19 @@ def testParseSuccess():
     # A first import is expected to fail because the pass isn't registered
     # until we import mlir.transforms
     try:
-      pm = PassManager.parse("builtin.module(builtin.func(print-op-stats))")
+      pm = PassManager.parse("builtin.module(func.func(print-op-stats))")
       # TODO: this error should be propagate to Python but the C API does not help right now.
       # CHECK: error: 'print-op-stats' does not refer to a registered pass or pass pipeline
     except ValueError as e:
-      # CHECK: ValueError exception: invalid pass pipeline 'builtin.module(builtin.func(print-op-stats))'.
+      # CHECK: ValueError exception: invalid pass pipeline 'builtin.module(func.func(print-op-stats))'.
       log("ValueError exception:", e)
     else:
       log("Exception not produced")
 
     # This will register the pass and round-trip should be possible now.
     import mlir.transforms
-    pm = PassManager.parse("builtin.module(builtin.func(print-op-stats))")
-    # CHECK: Roundtrip: builtin.module(builtin.func(print-op-stats))
+    pm = PassManager.parse("builtin.module(func.func(print-op-stats))")
+    # CHECK: Roundtrip: builtin.module(func.func(print-op-stats))
     log("Roundtrip: ", pm)
 run(testParseSuccess)
 
@@ -72,10 +72,10 @@ def testInvalidNesting():
   with Context():
     try:
       import mlir.all_passes_registration
-      pm = PassManager.parse("builtin.func(normalize-memrefs)")
+      pm = PassManager.parse("func.func(normalize-memrefs)")
     except ValueError as e:
-      # CHECK: Can't add pass 'NormalizeMemRefs' restricted to 'builtin.module' on a PassManager intended to run on 'builtin.func', did you intend to nest?
-      # CHECK: ValueError exception: invalid pass pipeline 'builtin.func(normalize-memrefs)'.
+      # CHECK: Can't add pass 'NormalizeMemRefs' restricted to 'builtin.module' on a PassManager intended to run on 'func.func', did you intend to nest?
+      # CHECK: ValueError exception: invalid pass pipeline 'func.func(normalize-memrefs)'.
       log("ValueError exception:", e)
     else:
       log("Exception not produced")
@@ -90,7 +90,7 @@ def testRunPipeline():
     module = Module.parse(r"""func @successfulParse() { return }""")
     pm.run(module)
 # CHECK: Operations encountered:
-# CHECK: builtin.func      , 1
 # CHECK: builtin.module    , 1
+# CHECK: func.func      , 1
 # CHECK: func.return        , 1
 run(testRunPipeline)

diff  --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp
index b38e05c854aae..3b5f28e12cd91 100644
--- a/mlir/unittests/ExecutionEngine/Invoke.cpp
+++ b/mlir/unittests/ExecutionEngine/Invoke.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/ExecutionEngine/CRunnerUtils.h"
 #include "mlir/ExecutionEngine/ExecutionEngine.h"

diff  --git a/mlir/unittests/IR/InterfaceAttachmentTest.cpp b/mlir/unittests/IR/InterfaceAttachmentTest.cpp
index a9b5547e4fb8c..132625e0b8b42 100644
--- a/mlir/unittests/IR/InterfaceAttachmentTest.cpp
+++ b/mlir/unittests/IR/InterfaceAttachmentTest.cpp
@@ -278,7 +278,7 @@ struct TestExternalOpOverridingModel
   }
 
   static unsigned getNameLengthPlusArgTwice(unsigned arg) {
-    return FuncOp::getOperationName().size() + 2 * arg;
+    return UnrealizedConversionCastOp::getOperationName().size() + 2 * arg;
   }
 
   unsigned getNameLengthTimesArg(Operation *op, unsigned arg) const {
@@ -290,9 +290,11 @@ struct TestExternalOpOverridingModel
 
 TEST(InterfaceAttachment, Operation) {
   MLIRContext context;
+  OpBuilder builder(&context);
 
   // Initially, the operation doesn't have the interface.
-  OwningOpRef<ModuleOp> moduleOp = ModuleOp::create(UnknownLoc::get(&context));
+  OwningOpRef<ModuleOp> moduleOp =
+      builder.create<ModuleOp>(UnknownLoc::get(&context));
   ASSERT_FALSE(isa<TestExternalOpInterface>(moduleOp->getOperation()));
 
   // We can attach an external interface and now the operaiton has it.
@@ -305,16 +307,17 @@ TEST(InterfaceAttachment, Operation) {
   EXPECT_EQ(iface.getNameLengthMinusArg(5), 9u);
 
   // Default implementation can be overridden.
-  OwningOpRef<FuncOp> funcOp =
-      FuncOp::create(UnknownLoc::get(&context), "function",
-                     FunctionType::get(&context, {}, {}));
-  ASSERT_FALSE(isa<TestExternalOpInterface>(funcOp->getOperation()));
-  FuncOp::attachInterface<TestExternalOpOverridingModel>(context);
-  iface = dyn_cast<TestExternalOpInterface>(funcOp->getOperation());
+  OwningOpRef<UnrealizedConversionCastOp> castOp =
+      builder.create<UnrealizedConversionCastOp>(UnknownLoc::get(&context),
+                                                 TypeRange(), ValueRange());
+  ASSERT_FALSE(isa<TestExternalOpInterface>(castOp->getOperation()));
+  UnrealizedConversionCastOp::attachInterface<TestExternalOpOverridingModel>(
+      context);
+  iface = dyn_cast<TestExternalOpInterface>(castOp->getOperation());
   ASSERT_TRUE(iface != nullptr);
-  EXPECT_EQ(iface.getNameLengthPlusArg(10), 22u);
+  EXPECT_EQ(iface.getNameLengthPlusArg(10), 44u);
   EXPECT_EQ(iface.getNameLengthTimesArg(0), 42u);
-  EXPECT_EQ(iface.getNameLengthPlusArgTwice(8), 28u);
+  EXPECT_EQ(iface.getNameLengthPlusArgTwice(8), 50u);
   EXPECT_EQ(iface.getNameLengthMinusArg(1000), 21u);
 
   // Another context doesn't have the interfaces registered.

diff  --git a/mlir/unittests/Pass/AnalysisManagerTest.cpp b/mlir/unittests/Pass/AnalysisManagerTest.cpp
index d7a9681488d05..036dc7c93f5d4 100644
--- a/mlir/unittests/Pass/AnalysisManagerTest.cpp
+++ b/mlir/unittests/Pass/AnalysisManagerTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Pass/AnalysisManager.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass/Pass.h"
@@ -51,6 +52,7 @@ TEST(AnalysisManagerTest, FineGrainModuleAnalysisPreservation) {
 
 TEST(AnalysisManagerTest, FineGrainFunctionAnalysisPreservation) {
   MLIRContext context;
+  context.loadDialect<func::FuncDialect>();
   Builder builder(&context);
 
   // Create a function and a module.
@@ -81,6 +83,7 @@ TEST(AnalysisManagerTest, FineGrainFunctionAnalysisPreservation) {
 
 TEST(AnalysisManagerTest, FineGrainChildFunctionAnalysisPreservation) {
   MLIRContext context;
+  context.loadDialect<func::FuncDialect>();
   Builder builder(&context);
 
   // Create a function and a module.

diff  --git a/mlir/unittests/Pass/CMakeLists.txt b/mlir/unittests/Pass/CMakeLists.txt
index f396bd7ed5853..cb10611b2ad63 100644
--- a/mlir/unittests/Pass/CMakeLists.txt
+++ b/mlir/unittests/Pass/CMakeLists.txt
@@ -5,4 +5,5 @@ add_mlir_unittest(MLIRPassTests
 )
 target_link_libraries(MLIRPassTests
   PRIVATE
+  MLIRFunc
   MLIRPass)

diff  --git a/mlir/unittests/Pass/PassManagerTest.cpp b/mlir/unittests/Pass/PassManagerTest.cpp
index fc085ad1ce7e1..5227c91250b5c 100644
--- a/mlir/unittests/Pass/PassManagerTest.cpp
+++ b/mlir/unittests/Pass/PassManagerTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Pass/PassManager.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass/Pass.h"
@@ -47,6 +48,7 @@ struct AnnotateFunctionPass
 
 TEST(PassManagerTest, OpSpecificAnalysis) {
   MLIRContext context;
+  context.loadDialect<func::FuncDialect>();
   Builder builder(&context);
 
   // Create a module with 2 functions.