[Mlir-commits] [mlir] 387f955 - Add a new interface allowing to set a default dialect to be used for printing/parsing regions

Tue Aug 31 10:52:59 PDT 2021

Author: Mehdi Amini
Date: 2021-08-31T17:52:40Z
New Revision: 387f95541bdc575f75bba95d323b51198d3b9e2b

URL: https://github.com/llvm/llvm-project/commit/387f95541bdc575f75bba95d323b51198d3b9e2b
DIFF: https://github.com/llvm/llvm-project/commit/387f95541bdc575f75bba95d323b51198d3b9e2b.diff

LOG: Add a new interface allowing to set a default dialect to be used for printing/parsing regions

Currently the builtin dialect is the default namespace used for parsing
and printing. As such module and func don't need to be prefixed.
In the case of some dialects that defines new regions for their own
purpose (like SpirV modules for example), it can be beneficial to
change the default dialect in order to improve readability.

Differential Revision: https://reviews.llvm.org/D107236

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td
    mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
    mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
    mlir/include/mlir/IR/BuiltinOps.h
    mlir/include/mlir/IR/BuiltinOps.td
    mlir/include/mlir/IR/OpAsmInterface.td
    mlir/include/mlir/IR/OpDefinition.h
    mlir/include/mlir/IR/OperationSupport.h
    mlir/lib/IR/AsmPrinter.cpp
    mlir/lib/IR/Operation.cpp
    mlir/lib/Parser/Parser.cpp
    mlir/lib/Parser/ParserState.h
    mlir/test/Analysis/test-shape-fn-report.mlir
    mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
    mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
    mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
    mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
    mlir/test/Dialect/Builtin/canonicalize.mlir
    mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
    mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
    mlir/test/Dialect/Shape/invalid.mlir
    mlir/test/Dialect/SparseTensor/sparse_perm.mlir
    mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
    mlir/test/IR/invalid-func-op.mlir
    mlir/test/IR/invalid-module-op.mlir
    mlir/test/IR/invalid-ops.mlir
    mlir/test/IR/invalid.mlir
    mlir/test/IR/parser.mlir
    mlir/test/IR/traits.mlir
    mlir/test/Transforms/canonicalize-dce.mlir
    mlir/test/Transforms/canonicalize.mlir
    mlir/test/Transforms/constant-fold.mlir
    mlir/test/Transforms/cse.mlir
    mlir/test/Transforms/test-legalizer-full.mlir
    mlir/test/lib/Dialect/Test/TestOps.td

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td
index bb4e558cde546..23d981a589010 100644

--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td
@@ -24,7 +24,8 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
 // -----
 
 def SPV_AddressOfOp : SPV_Op<"mlir.addressof",
-    [DeclareOpInterfaceMethods<OpAsmOpInterface>, InFunctionScope, NoSideEffect]> {
+    [DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
+     InFunctionScope, NoSideEffect]> {
   let summary = "Get the address of a global variable.";
 
   let description = [{
@@ -70,7 +71,9 @@ def SPV_AddressOfOp : SPV_Op<"mlir.addressof",
 // -----
 
 def SPV_ConstantOp : SPV_Op<"Constant",
-    [ConstantLike, DeclareOpInterfaceMethods<OpAsmOpInterface>, NoSideEffect]> {
+    [ConstantLike,
+     DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
+     NoSideEffect]> {
   let summary = "The op that declares a SPIR-V normal constant";
 
   let description = [{

diff  --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
index 6b39fbffe9d43..a9c82bf41849c 100644
--- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
+++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
@@ -135,7 +135,7 @@ def Shape_ConstShapeOp : Shape_Op<"const_shape",
 def Shape_ConstSizeOp : Shape_Op<"const_size", [
     ConstantLike,
     NoSideEffect,
-    DeclareOpInterfaceMethods<OpAsmOpInterface>
+    DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>
   ]> {
   let summary = "Creates a constant of type `shape.size`";
   let description = [{

diff  --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
index 1633c0d98b63d..8436ac796a97e 100644
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -1037,7 +1037,8 @@ def CondBranchOp : Std_Op<"cond_br",
 //===----------------------------------------------------------------------===//
 
 def ConstantOp : Std_Op<"constant",
-    [ConstantLike, NoSideEffect, DeclareOpInterfaceMethods<OpAsmOpInterface>]> {
+    [ConstantLike, NoSideEffect,
+     DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
   let summary = "constant";
   let description = [{
     Syntax:

diff  --git a/mlir/include/mlir/IR/BuiltinOps.h b/mlir/include/mlir/IR/BuiltinOps.h
index bc341e624fcd4..b69ff60e4b129 100644
--- a/mlir/include/mlir/IR/BuiltinOps.h
+++ b/mlir/include/mlir/IR/BuiltinOps.h
@@ -14,6 +14,7 @@
 #define MLIR_IR_BUILTINOPS_H_
 
 #include "mlir/IR/FunctionSupport.h"
+#include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/OwningOpRef.h"
 #include "mlir/IR/RegionKindInterface.h"
 #include "mlir/IR/SymbolTable.h"

diff  --git a/mlir/include/mlir/IR/BuiltinOps.td b/mlir/include/mlir/IR/BuiltinOps.td
index 8b6852b693936..28a69dbc119a7 100644
--- a/mlir/include/mlir/IR/BuiltinOps.td
+++ b/mlir/include/mlir/IR/BuiltinOps.td
@@ -15,6 +15,7 @@
 #define BUILTIN_OPS
 
 include "mlir/IR/BuiltinDialect.td"
+include "mlir/IR/OpAsmInterface.td"
 include "mlir/IR/RegionKindInterface.td"
 include "mlir/IR/SymbolInterfaces.td"
 include "mlir/Interfaces/CallInterfaces.td"
@@ -160,8 +161,9 @@ def FuncOp : Builtin_Op<"func", [
 //===----------------------------------------------------------------------===//
 
 def ModuleOp : Builtin_Op<"module", [
-  AffineScope, IsolatedFromAbove, NoRegionArguments, SymbolTable, Symbol] 
-  # GraphRegionNoTerminator.traits> {
+    AffineScope, IsolatedFromAbove, NoRegionArguments, SymbolTable, Symbol,
+    OpAsmOpInterface
+  ] # GraphRegionNoTerminator.traits> {
   let summary = "A top level container operation";
   let description = [{
     A `module` represents a top-level container operation. It contains a single
@@ -206,6 +208,14 @@ def ModuleOp : Builtin_Op<"module", [
     //===------------------------------------------------------------------===//
 
     DataLayoutSpecInterface getDataLayoutSpec();
+
+    //===------------------------------------------------------------------===//
+    // OpAsmOpInterface Methods
+    //===------------------------------------------------------------------===//
+
+    static ::llvm::StringRef getDefaultDialect() {
+      return "builtin";
+    }
   }];
   let verifier = [{ return ::verify(*this); }];
 

diff  --git a/mlir/include/mlir/IR/OpAsmInterface.td b/mlir/include/mlir/IR/OpAsmInterface.td
index f009ed997bf8c..50b98b04dbee5 100644
--- a/mlir/include/mlir/IR/OpAsmInterface.td
+++ b/mlir/include/mlir/IR/OpAsmInterface.td
@@ -47,7 +47,19 @@ def OpAsmOpInterface : OpInterface<"OpAsmOpInterface"> {
           %first_result, %middle_results:2, %0 = "my.op" ...
         ```
       }],
-      "void", "getAsmResultNames", (ins "::mlir::OpAsmSetValueNameFn":$setNameFn)
+      "void", "getAsmResultNames",
+      (ins "::mlir::OpAsmSetValueNameFn":$setNameFn),
+      "", ";"
+    >,
+    StaticInterfaceMethod<[{
+      Return the default dialect used when printing/parsing operations in
+      regions nested under this operation. This allows for eliding the dialect
+      prefix from the operation name, for example it would be possible to omit
+      the `spv.` prefix from all operations within a SpirV module if this method
+      returned `spv`. The default implementation returns an empty string which
+      is ignored.
+      }],
+      "StringRef", "getDefaultDialect", (ins), "", "return \"\";"
     >,
   ];
 }

diff  --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h
index 2858b7e9cc3ae..e7f644f456b22 100644
--- a/mlir/include/mlir/IR/OpDefinition.h
+++ b/mlir/include/mlir/IR/OpDefinition.h
@@ -180,7 +180,8 @@ class OpState {
 
   // The fallback for the printer is to print it the generic assembly form.
   static void print(Operation *op, OpAsmPrinter &p);
-  static void printOpName(Operation *op, OpAsmPrinter &p);
+  static void printOpName(Operation *op, OpAsmPrinter &p,
+                          StringRef defaultDialect);
 
   /// Mutability management is handled by the OpWrapper/OpConstWrapper classes,
   /// so we can cast it away here.
@@ -1777,7 +1778,7 @@ class Op : public OpState, public Traits<ConcreteType>... {
   static std::enable_if_t<!detect_has_print<ConcreteOpT>::value,
                           AbstractOperation::PrintAssemblyFn>
   getPrintAssemblyFnImpl() {
-    return [](Operation *op, OpAsmPrinter &printer) {
+    return [](Operation *op, OpAsmPrinter &printer, StringRef defaultDialect) {
       return OpState::print(op, printer);
     };
   }
@@ -1789,8 +1790,9 @@ class Op : public OpState, public Traits<ConcreteType>... {
   getPrintAssemblyFnImpl() {
     return &printAssembly;
   }
-  static void printAssembly(Operation *op, OpAsmPrinter &p) {
-    OpState::printOpName(op, p);
+  static void printAssembly(Operation *op, OpAsmPrinter &p,
+                            StringRef defaultDialect) {
+    OpState::printOpName(op, p, defaultDialect);
     return cast<ConcreteType>(op).print(p);
   }
   /// Implementation of `VerifyInvariantsFn` AbstractOperation hook.

diff  --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h
index 05273505946fa..606c4cfbd6065 100644
--- a/mlir/include/mlir/IR/OperationSupport.h
+++ b/mlir/include/mlir/IR/OperationSupport.h
@@ -77,7 +77,7 @@ class AbstractOperation {
   using ParseAssemblyFn =
       llvm::unique_function<ParseResult(OpAsmParser &, OperationState &) const>;
   using PrintAssemblyFn =
-      llvm::unique_function<void(Operation *, OpAsmPrinter &) const>;
+      llvm::unique_function<void(Operation *, OpAsmPrinter &, StringRef) const>;
   using VerifyInvariantsFn =
       llvm::unique_function<LogicalResult(Operation *) const>;
 
@@ -97,8 +97,9 @@ class AbstractOperation {
   const ParseAssemblyFn &getParseAssemblyFn() const { return parseAssemblyFn; }
 
   /// This hook implements the AsmPrinter for this operation.
-  void printAssembly(Operation *op, OpAsmPrinter &p) const {
-    return printAssemblyFn(op, p);
+  void printAssembly(Operation *op, OpAsmPrinter &p,
+                     StringRef defaultDialect) const {
+    return printAssemblyFn(op, p, defaultDialect);
   }
 
   /// This hook implements the verifier for this operation.  It should emits an

diff  --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index c27e5c5e1ba37..b6d072f9f60a1 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -27,6 +27,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
@@ -371,7 +372,7 @@ class DummyAliasOperationPrinter : private OpAsmPrinter {
       // Check to see if this is a known operation.  If so, use the registered
       // custom printer hook.
       if (auto *opInfo = op->getAbstractOperation()) {
-        opInfo->printAssembly(op, *this);
+        opInfo->printAssembly(op, *this, /*defaultDialect=*/"");
         return;
       }
     }
@@ -2424,6 +2425,13 @@ class OperationPrinter : public ModulePrinter, private OpAsmPrinter {
   }
 
 private:
+  // Contains the stack of default dialects to use when printing regions.
+  // A new dialect is pushed to the stack before parsing regions nested under an
+  // operation implementing `OpAsmOpInterface`, and popped when done. At the
+  // top-level we start with "builtin" as the default, so that the top-level
+  // `module` operation prints as-is.
+  SmallVector<StringRef> defaultDialectStack{"builtin"};
+
   /// The number of spaces used for indenting nested operations.
   const static unsigned indentWidth = 2;
 
@@ -2503,7 +2511,7 @@ void OperationPrinter::printOperation(Operation *op) {
     // Check to see if this is a known operation.  If so, use the registered
     // custom printer hook.
     if (auto *opInfo = op->getAbstractOperation()) {
-      opInfo->printAssembly(op, *this);
+      opInfo->printAssembly(op, *this, defaultDialectStack.back());
       return;
     }
     // Otherwise try to dispatch to the dialect, if available.
@@ -2511,6 +2519,7 @@ void OperationPrinter::printOperation(Operation *op) {
       if (auto opPrinter = dialect->getOperationPrinter(op)) {
         // Print the op name first.
         StringRef name = op->getName().getStringRef();
+        name.consume_front((defaultDialectStack.back() + ".").str());
         printEscapedString(name, os);
         // Print the rest of the op now.
         opPrinter(op, *this);
@@ -2657,6 +2666,13 @@ void OperationPrinter::printRegion(Region &region, bool printEntryBlockArgs,
                                    bool printEmptyBlock) {
   os << " {" << newLine;
   if (!region.empty()) {
+    auto restoreDefaultDialect =
+        llvm::make_scope_exit([&]() { defaultDialectStack.pop_back(); });
+    if (auto iface = dyn_cast<OpAsmOpInterface>(region.getParentOp()))
+      defaultDialectStack.push_back(iface.getDefaultDialect());
+    else
+      defaultDialectStack.push_back("");
+
     auto *entryBlock = &region.front();
     // Force printing the block header if printEmptyBlock is set and the block
     // is empty or if printEntryBlockArgs is set and there are arguments to

diff  --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp
index f4c6da2fa6a58..f2b981390f93b 100644
--- a/mlir/lib/IR/Operation.cpp
+++ b/mlir/lib/IR/Operation.cpp
@@ -643,9 +643,13 @@ ParseResult OpState::parse(OpAsmParser &parser, OperationState &result) {
 // The fallback for the printer is to print in the generic assembly form.
 void OpState::print(Operation *op, OpAsmPrinter &p) { p.printGenericOp(op); }
 // The fallback for the printer is to print in the generic assembly form.
-void OpState::printOpName(Operation *op, OpAsmPrinter &p) {
+void OpState::printOpName(Operation *op, OpAsmPrinter &p,
+                          StringRef defaultDialect) {
   StringRef name = op->getName().getStringRef();
-  if (name.startswith("std."))
+  if (name.startswith((defaultDialect + ".").str()))
+    name = name.drop_front(defaultDialect.size() + 1);
+  // TODO: remove this special case.
+  else if (name.startswith("std."))
     name = name.drop_front(4);
   p.getStream() << name;
 }

diff  --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp
index aed89925ad6e4..38d6c0b444dc8 100644
--- a/mlir/lib/Parser/Parser.cpp
+++ b/mlir/lib/Parser/Parser.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Parser.h"
 #include "mlir/Parser/AsmParserState.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/bit.h"
 #include "llvm/Support/PrettyStackTrace.h"
@@ -1842,31 +1843,36 @@ class CustomOpAsmParser : public OpAsmParser {
 Operation *
 OperationParser::parseCustomOperation(ArrayRef<ResultRecord> resultIDs) {
   llvm::SMLoc opLoc = getToken().getLoc();
-  StringRef opName = getTokenSpelling();
+  std::string opName = getTokenSpelling().str();
   auto *opDefinition = AbstractOperation::lookup(opName, getContext());
+  StringRef defaultDialect = getState().defaultDialectStack.back();
   Dialect *dialect = nullptr;
   if (opDefinition) {
     dialect = &opDefinition->dialect;
   } else {
-    if (opName.contains('.')) {
+    if (StringRef(opName).contains('.')) {
       // This op has a dialect, we try to check if we can register it in the
       // context on the fly.
-      StringRef dialectName = opName.split('.').first;
+      StringRef dialectName = StringRef(opName).split('.').first;
       dialect = getContext()->getLoadedDialect(dialectName);
       if (!dialect && (dialect = getContext()->getOrLoadDialect(dialectName)))
         opDefinition = AbstractOperation::lookup(opName, getContext());
     } else {
-      // If the operation name has no namespace prefix we treat it as a builtin
-      // or standard operation and prefix it with "builtin" or "std".
-      // TODO: Remove the special casing here.
-      opDefinition = AbstractOperation::lookup(Twine("builtin." + opName).str(),
-                                               getContext());
+      // If the operation name has no namespace prefix we lookup the current
+      // default dialect (set through OpAsmOpInterface).
+      opDefinition = AbstractOperation::lookup(
+          Twine(defaultDialect + "." + opName).str(), getContext());
       if (!opDefinition && getContext()->getOrLoadDialect("std")) {
         opDefinition = AbstractOperation::lookup(Twine("std." + opName).str(),
                                                  getContext());
       }
-      if (opDefinition)
-        opName = opDefinition->name.strref();
+      if (opDefinition) {
+        dialect = &opDefinition->dialect;
+        opName = opDefinition->name.str();
+      } else if (!defaultDialect.empty()) {
+        dialect = getContext()->getOrLoadDialect(defaultDialect);
+        opName = (defaultDialect + "." + opName).str();
+      }
     }
   }
 
@@ -1876,10 +1882,14 @@ OperationParser::parseCustomOperation(ArrayRef<ResultRecord> resultIDs) {
   function_ref<ParseResult(OpAsmParser &, OperationState &)> parseAssemblyFn;
   bool isIsolatedFromAbove = false;
 
+  defaultDialect = "";
   if (opDefinition) {
     parseAssemblyFn = opDefinition->getParseAssemblyFn();
     isIsolatedFromAbove =
         opDefinition->hasTrait<OpTrait::IsIsolatedFromAbove>();
+    auto *iface = opDefinition->getInterface<OpAsmOpInterface>();
+    if (iface && !iface->getDefaultDialect().empty())
+      defaultDialect = iface->getDefaultDialect();
   } else {
     Optional<Dialect::ParseOpHook> dialectHook;
     if (dialect)
@@ -1890,14 +1900,16 @@ OperationParser::parseCustomOperation(ArrayRef<ResultRecord> resultIDs) {
     }
     parseAssemblyFn = *dialectHook;
   }
+  getState().defaultDialectStack.push_back(defaultDialect);
+  auto restoreDefaultDialect = llvm::make_scope_exit(
+      [&]() { getState().defaultDialectStack.pop_back(); });
 
   consumeToken();
 
   // If the custom op parser crashes, produce some indication to help
   // debugging.
-  std::string opNameStr = opName.str();
   llvm::PrettyStackTraceFormat fmt("MLIR Parser: custom op parser '%s'",
-                                   opNameStr.c_str());
+                                   opName.c_str());
 
   // Get location information for the operation.
   auto srcLocation = getEncodedSourceLocation(opLoc);

diff  --git a/mlir/lib/Parser/ParserState.h b/mlir/lib/Parser/ParserState.h
index 13d506d27e302..f5faffff31976 100644
--- a/mlir/lib/Parser/ParserState.h
+++ b/mlir/lib/Parser/ParserState.h
@@ -82,6 +82,13 @@ struct ParserState {
   /// An optional pointer to a struct containing high level parser state to be
   /// populated during parsing.
   AsmParserState *asmState;
+
+  // Contains the stack of default dialect to use when parsing regions.
+  // A new dialect get pushed to the stack before parsing regions nested
+  // under an operation implementing `OpAsmOpInterface`, and
+  // popped when done. At the top-level we start with "builtin" as the
+  // default, so that the top-level `module` operation parses as-is.
+  SmallVector<StringRef> defaultDialectStack{"builtin"};
 };
 
 } // end namespace detail

diff  --git a/mlir/test/Analysis/test-shape-fn-report.mlir b/mlir/test/Analysis/test-shape-fn-report.mlir
index c3b0f88160695..7587e5d723bf0 100644
--- a/mlir/test/Analysis/test-shape-fn-report.mlir
+++ b/mlir/test/Analysis/test-shape-fn-report.mlir
@@ -15,7 +15,7 @@ func @tanh(%arg: tensor<10x20xf32>) -> tensor<10x20xf32>
 // The shape function library with some local functions.
 shape.function_library @shape_lib {
   // Test shape function that returns the shape of input arg as result shape.
-  func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
     %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
     return %0 : !shape.shape
   }

diff  --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index 415090ee68c24..5fd3769cdd16d 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -4,7 +4,7 @@
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_index_ops()
   // CHECK32-LABEL: func @gpu_index_ops()
-  func @gpu_index_ops()
+  builtin.func @gpu_index_ops()
       -> (index, index, index, index, index, index,
           index, index, index, index, index, index) {
     // CHECK32-NOT: = llvm.sext %{{.*}} : i32 to i64
@@ -61,7 +61,7 @@ gpu.module @test_module {
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_index_comp
   // CHECK32-LABEL: func @gpu_index_comp
-  func @gpu_index_comp(%idx : index) -> index {
+  builtin.func @gpu_index_comp(%idx : index) -> index {
     // CHECK: = llvm.add %{{.*}}, %{{.*}} : i64
     // CHECK32: = llvm.add %{{.*}}, %{{.*}} : i32
     %0 = addi %idx, %idx : index
@@ -109,7 +109,7 @@ gpu.module @test_module {
 
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_shuffle()
-  func @gpu_shuffle() -> (f32) {
+  builtin.func @gpu_shuffle() -> (f32) {
     // CHECK: %[[#VALUE:]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
     %arg0 = constant 1.0 : f32
     // CHECK: %[[#OFFSET:]] = llvm.mlir.constant(4 : i32) : i32
@@ -133,7 +133,7 @@ gpu.module @test_module {
 
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_sync()
-  func @gpu_sync() {
+  builtin.func @gpu_sync() {
     // CHECK: nvvm.barrier0
     gpu.barrier
     std.return
@@ -146,7 +146,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_fabsf(f32) -> f32
   // CHECK: llvm.func @__nv_fabs(f64) -> f64
   // CHECK-LABEL: func @gpu_fabs
-  func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = std.absf %arg_f32 : f32
     // CHECK: llvm.call @__nv_fabsf(%{{.*}}) : (f32) -> f32
     %result64 = std.absf %arg_f64 : f64
@@ -161,7 +161,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_ceilf(f32) -> f32
   // CHECK: llvm.func @__nv_ceil(f64) -> f64
   // CHECK-LABEL: func @gpu_ceil
-  func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = std.ceilf %arg_f32 : f32
     // CHECK: llvm.call @__nv_ceilf(%{{.*}}) : (f32) -> f32
     %result64 = std.ceilf %arg_f64 : f64
@@ -176,7 +176,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_floorf(f32) -> f32
   // CHECK: llvm.func @__nv_floor(f64) -> f64
   // CHECK-LABEL: func @gpu_floor
-  func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = std.floorf %arg_f32 : f32
     // CHECK: llvm.call @__nv_floorf(%{{.*}}) : (f32) -> f32
     %result64 = std.floorf %arg_f64 : f64
@@ -191,7 +191,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_cosf(f32) -> f32
   // CHECK: llvm.func @__nv_cos(f64) -> f64
   // CHECK-LABEL: func @gpu_cos
-  func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.cos %arg_f32 : f32
     // CHECK: llvm.call @__nv_cosf(%{{.*}}) : (f32) -> f32
     %result64 = math.cos %arg_f64 : f64
@@ -205,7 +205,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_expf(f32) -> f32
   // CHECK: llvm.func @__nv_exp(f64) -> f64
   // CHECK-LABEL: func @gpu_exp
-  func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.exp %arg_f32 : f32
     // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
     %result64 = math.exp %arg_f64 : f64
@@ -219,7 +219,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_exp2f(f32) -> f32
   // CHECK: llvm.func @__nv_exp2(f64) -> f64
   // CHECK-LABEL: func @gpu_exp2
-  func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.exp2 %arg_f32 : f32
     // CHECK: llvm.call @__nv_exp2f(%{{.*}}) : (f32) -> f32
     %result64 = math.exp2 %arg_f64 : f64
@@ -234,7 +234,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_logf(f32) -> f32
   // CHECK: llvm.func @__nv_log(f64) -> f64
   // CHECK-LABEL: func @gpu_log
-  func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log %arg_f32 : f32
     // CHECK: llvm.call @__nv_logf(%{{.*}}) : (f32) -> f32
     %result64 = math.log %arg_f64 : f64
@@ -249,7 +249,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_log10f(f32) -> f32
   // CHECK: llvm.func @__nv_log10(f64) -> f64
   // CHECK-LABEL: func @gpu_log10
-  func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log10 %arg_f32 : f32
     // CHECK: llvm.call @__nv_log10f(%{{.*}}) : (f32) -> f32
     %result64 = math.log10 %arg_f64 : f64
@@ -264,7 +264,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_log1pf(f32) -> f32
   // CHECK: llvm.func @__nv_log1p(f64) -> f64
   // CHECK-LABEL: func @gpu_log1p
-  func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log1p %arg_f32 : f32
     // CHECK: llvm.call @__nv_log1pf(%{{.*}}) : (f32) -> f32
     %result64 = math.log1p %arg_f64 : f64
@@ -279,7 +279,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_log2f(f32) -> f32
   // CHECK: llvm.func @__nv_log2(f64) -> f64
   // CHECK-LABEL: func @gpu_log2
-  func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log2 %arg_f32 : f32
     // CHECK: llvm.call @__nv_log2f(%{{.*}}) : (f32) -> f32
     %result64 = math.log2 %arg_f64 : f64
@@ -294,7 +294,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_sinf(f32) -> f32
   // CHECK: llvm.func @__nv_sin(f64) -> f64
   // CHECK-LABEL: func @gpu_sin
-  func @gpu_sin(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_sin(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.sin %arg_f32 : f32
     // CHECK: llvm.call @__nv_sinf(%{{.*}}) : (f32) -> f32
     %result64 = math.sin %arg_f64 : f64
@@ -309,7 +309,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_tanhf(f32) -> f32
   // CHECK: llvm.func @__nv_tanh(f64) -> f64
   // CHECK-LABEL: func @gpu_tanh
-  func @gpu_tanh(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+  builtin.func @gpu_tanh(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
     %result16 = math.tanh %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
     // CHECK-NEXT: llvm.call @__nv_tanhf(%{{.*}}) : (f32) -> f32
@@ -328,7 +328,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_rsqrtf(f32) -> f32
   // CHECK: llvm.func @__nv_rsqrt(f64) -> f64
   // CHECK-LABEL: func @gpu_rsqrt
-  func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.rsqrt %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -348,7 +348,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_sqrtf(f32) -> f32
   // CHECK: llvm.func @__nv_sqrt(f64) -> f64
   // CHECK-LABEL: func @gpu_sqrt
-  func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.sqrt %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -368,7 +368,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_atanf(f32) -> f32
   // CHECK: llvm.func @__nv_atan(f64) -> f64
   // CHECK-LABEL: func @gpu_atan
-  func @gpu_atan(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_atan(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.atan %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -388,7 +388,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_atan2f(f32, f32) -> f32
   // CHECK: llvm.func @__nv_atan2(f64, f64) -> f64
   // CHECK-LABEL: func @gpu_atan2
-  func @gpu_atan2(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_atan2(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.atan2 %arg_f16, %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -412,7 +412,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_expf(f32) -> f32
   // CHECK: llvm.func @__nv_exp(f64) -> f64
   // CHECK-LABEL: func @gpu_exp
-    func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+    builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
       %result32 = math.exp %arg_f32 : f32
       // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
       %result64 = math.exp %arg_f64 : f64
@@ -429,7 +429,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_expm1f(f32) -> f32
   // CHECK: llvm.func @__nv_expm1(f64) -> f64
   // CHECK-LABEL: func @gpu_expm1
-  func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.expm1 %arg_f32 : f32
     // CHECK: llvm.call @__nv_expm1f(%{{.*}}) : (f32) -> f32
     %result64 = math.expm1 %arg_f64 : f64
@@ -444,7 +444,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__nv_powf(f32, f32) -> f32
   // CHECK: llvm.func @__nv_pow(f64, f64) -> f64
   // CHECK-LABEL: func @gpu_pow
-  func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.powf %arg_f32, %arg_f32 : f32
     // CHECK: llvm.call @__nv_powf(%{{.*}}, %{{.*}}) : (f32, f32) -> f32
     %result64 = math.powf %arg_f64, %arg_f64 : f64

diff  --git a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
index 6eb641b0e897a..8e3eada71ca0f 100644
--- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
@@ -4,7 +4,7 @@ gpu.module @test_module {
 
   // CHECK-LABEL: func @gpu_wmma_load_op() ->
   // CHECK-SAME: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> {
-  func @gpu_wmma_load_op() -> (!gpu.mma_matrix<16x16xf16, "AOp">) {
+  builtin.func @gpu_wmma_load_op() -> (!gpu.mma_matrix<16x16xf16, "AOp">) {
     %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
     %i = constant 16 : index
     %j = constant 16 : index
@@ -31,7 +31,7 @@ gpu.module @test_module {
 
   // CHECK-LABEL: func @gpu_wmma_store_op
   // CHECK-SAME: (%[[D:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) {
-  func @gpu_wmma_store_op(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
+  builtin.func @gpu_wmma_store_op(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
     %sg = memref.alloca(){alignment = 32} : memref<32x32xf16, 3>
     %i = constant 16 : index
     %j = constant 16 : index
@@ -62,7 +62,7 @@ gpu.module @test_module {
 
   // CHECK-LABEL: func @gpu_wmma_mma_op
   // CHECK-SAME: (%[[A:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>, %[[B:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>, %[[C:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>)
-  func @gpu_wmma_mma_op(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> (!gpu.mma_matrix<16x16xf16, "COp">) {
+  builtin.func @gpu_wmma_mma_op(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> (!gpu.mma_matrix<16x16xf16, "COp">) {
     %D = gpu.subgroup_mma_compute %A, %B, %C : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp">
     // CHECK:  %[[A1:.*]] = llvm.extractvalue %[[A]][0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
     // CHECK:  %[[A2:.*]] = llvm.extractvalue %[[A]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
@@ -131,7 +131,7 @@ gpu.module @test_module {
 //       CHECK:   %90 = llvm.extractvalue %[[ACC]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK:   nvvm.wmma.m16n16k16.store.d.f16.row.stride %86, %87, %88, %89, %90, %79 : !llvm.ptr<i32>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, i32
 
-  func @gpu_wmma_mma_loop_op(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) {
+  builtin.func @gpu_wmma_mma_loop_op(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) {
       %c0 = constant 0 : index
       %c128 = constant 128 : index
       %c32 = constant 32 : index
@@ -170,7 +170,7 @@ gpu.module @test_module {
 //       CHECK: %[[M3:.+]] = llvm.insertvalue %[[V2]], %[[M2]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK: %[[M4:.+]] = llvm.insertvalue %[[V2]], %[[M3]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK: llvm.return %[[M4]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
-  func @gpu_wmma_constant_op()  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
+  builtin.func @gpu_wmma_constant_op()  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
     %cst = constant 1.0 : f16
     %C = gpu.subgroup_mma_constant_matrix %cst : !gpu.mma_matrix<16x16xf16, "COp">
     return %C : !gpu.mma_matrix<16x16xf16, "COp">

diff  --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index a7efd9ff202a1..b62249f01768c 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -4,7 +4,7 @@
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_index_ops()
   // CHECK32-LABEL: func @gpu_index_ops()
-  func @gpu_index_ops()
+  builtin.func @gpu_index_ops()
       -> (index, index, index, index, index, index,
           index, index, index, index, index, index) {
     // CHECK32-NOT: = llvm.sext %{{.*}} : i32 to i64
@@ -61,7 +61,7 @@ gpu.module @test_module {
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_index_comp
   // CHECK32-LABEL: func @gpu_index_comp
-  func @gpu_index_comp(%idx : index) -> index {
+  builtin.func @gpu_index_comp(%idx : index) -> index {
     // CHECK: = llvm.add %{{.*}}, %{{.*}} : i64
     // CHECK32: = llvm.add %{{.*}}, %{{.*}} : i32
     %0 = addi %idx, %idx : index
@@ -75,7 +75,7 @@ gpu.module @test_module {
 
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_sync()
-  func @gpu_sync() {
+  builtin.func @gpu_sync() {
     // CHECK: rocdl.barrier
     gpu.barrier
     std.return
@@ -88,7 +88,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_fabs_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_fabs_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_fabs
-  func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = std.absf %arg_f32 : f32
     // CHECK: llvm.call @__ocml_fabs_f32(%{{.*}}) : (f32) -> f32
     %result64 = std.absf %arg_f64 : f64
@@ -103,7 +103,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_ceil_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_ceil_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_ceil
-  func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = std.ceilf %arg_f32 : f32
     // CHECK: llvm.call @__ocml_ceil_f32(%{{.*}}) : (f32) -> f32
     %result64 = std.ceilf %arg_f64 : f64
@@ -118,7 +118,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_floor_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_floor_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_floor
-  func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = std.floorf %arg_f32 : f32
     // CHECK: llvm.call @__ocml_floor_f32(%{{.*}}) : (f32) -> f32
     %result64 = std.floorf %arg_f64 : f64
@@ -133,7 +133,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_cos_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_cos_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_cos
-  func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.cos %arg_f32 : f32
     // CHECK: llvm.call @__ocml_cos_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.cos %arg_f64 : f64
@@ -148,7 +148,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_exp_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_exp_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_exp
-  func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %exp_f32 = math.exp %arg_f32 : f32
     // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
     %result32 = math.exp %exp_f32 : f32
@@ -165,7 +165,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_exp2_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_exp2_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_exp2
-  func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %exp2_f32 = math.exp2 %arg_f32 : f32
     // CHECK: llvm.call @__ocml_exp2_f32(%{{.*}}) : (f32) -> f32
     %result32 = math.exp2 %exp2_f32 : f32
@@ -185,7 +185,7 @@ gpu.module @test_module {
     // CHECK: llvm.func @__ocml_exp_f32(f32) -> f32
     // CHECK: llvm.func @__ocml_exp_f64(f64) -> f64
     // CHECK-LABEL: func @gpu_exp
-    func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+    builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
       %exp_f32 = math.exp %arg_f32 : f32
       // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
       %result32 = math.exp %exp_f32 : f32
@@ -204,7 +204,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_expm1_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_expm1_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_expm1
-  func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %expm1_f32 = math.expm1 %arg_f32 : f32
     // CHECK: llvm.call @__ocml_expm1_f32(%{{.*}}) : (f32) -> f32
     %result32 = math.expm1 %expm1_f32 : f32
@@ -221,7 +221,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_log_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_log_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_log
-  func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log %arg_f32 : f32
     // CHECK: llvm.call @__ocml_log_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.log %arg_f64 : f64
@@ -236,7 +236,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_log1p_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_log1p_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_log1p
-  func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log1p %arg_f32 : f32
     // CHECK: llvm.call @__ocml_log1p_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.log1p %arg_f64 : f64
@@ -251,7 +251,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_log10_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_log10_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_log10
-  func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log10 %arg_f32 : f32
     // CHECK: llvm.call @__ocml_log10_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.log10 %arg_f64 : f64
@@ -266,7 +266,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_log2_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_log2_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_log2
-  func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.log2 %arg_f32 : f32
     // CHECK: llvm.call @__ocml_log2_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.log2 %arg_f64 : f64
@@ -281,7 +281,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_rsqrt_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_rsqrt_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_rsqrt
-  func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.rsqrt %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -301,7 +301,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_sqrt_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_sqrt_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_sqrt
-  func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
       -> (f16, f32, f64) {
     %result16 = math.sqrt %arg_f16 : f16
     // CHECK: llvm.fpext %{{.*}} : f16 to f32
@@ -321,7 +321,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_tanh_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_tanh_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_tanh
-  func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.tanh %arg_f32 : f32
     // CHECK: llvm.call @__ocml_tanh_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.tanh %arg_f64 : f64
@@ -336,7 +336,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_atan_f32(f32) -> f32
   // CHECK: llvm.func @__ocml_atan_f64(f64) -> f64
   // CHECK-LABEL: func @gpu_atan
-  func @gpu_atan(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_atan(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.atan %arg_f32 : f32
     // CHECK: llvm.call @__ocml_atan_f32(%{{.*}}) : (f32) -> f32
     %result64 = math.atan %arg_f64 : f64
@@ -351,7 +351,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_atan2_f32(f32, f32) -> f32
   // CHECK: llvm.func @__ocml_atan2_f64(f64, f64) -> f64
   // CHECK-LABEL: func @gpu_atan2
-  func @gpu_atan2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_atan2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.atan2 %arg_f32, %arg_f32 : f32
     // CHECK: llvm.call @__ocml_atan2_f32(%{{.*}}) : (f32, f32) -> f32
     %result64 = math.atan2 %arg_f64, %arg_f64 : f64
@@ -366,7 +366,7 @@ gpu.module @test_module {
   // CHECK: llvm.func @__ocml_pow_f32(f32, f32) -> f32
   // CHECK: llvm.func @__ocml_pow_f64(f64, f64) -> f64
   // CHECK-LABEL: func @gpu_pow
-  func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
     %result32 = math.powf %arg_f32, %arg_f32 : f32
     // CHECK: llvm.call @__ocml_pow_f32(%{{.*}}, %{{.*}}) : (f32, f32) -> f32
     %result64 = math.powf %arg_f64, %arg_f64 : f64

diff  --git a/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir b/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
index a6d994ad7db62..72969d6fb0f02 100644
--- a/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
+++ b/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -convert-vector-to-rocdl | FileCheck %s
 
 gpu.module @test_read{
-func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32> {
+builtin.func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32> {
   %f0 = constant 0.0: f32
   %f = vector.transfer_read %A[%base], %f0
       {permutation_map = affine_map<(d0) -> (d0)>} :
@@ -11,7 +11,7 @@ func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32> {
 // CHECK-LABEL: @transfer_readx2
 // CHECK: rocdl.buffer.load {{.*}} vector<2xf32>
 
-func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
+builtin.func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
   %f0 = constant 0.0: f32
   %f = vector.transfer_read %A[%base], %f0
       {permutation_map = affine_map<(d0) -> (d0)>} :
@@ -21,7 +21,7 @@ func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
 // CHECK-LABEL: @transfer_readx4
 // CHECK: rocdl.buffer.load {{.*}} vector<4xf32>
 
-func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
+builtin.func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
   %f0 = constant 0.0: f32
   %f = vector.transfer_read %A[%base], %f0
       {permutation_map = affine_map<(d0) -> (d0)>} :
@@ -36,7 +36,7 @@ func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vector<4xf3
 }
 
 gpu.module @test_write{
-func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
+builtin.func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
   vector.transfer_write %B, %A[%base]
       {permutation_map = affine_map<(d0) -> (d0)>} :
     vector<2xf32>, memref<?xf32>
@@ -45,7 +45,7 @@ func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
 // CHECK-LABEL: @transfer_writex2
 // CHECK: rocdl.buffer.store {{.*}} vector<2xf32>
 
-func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
+builtin.func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
   vector.transfer_write %B, %A[%base]
       {permutation_map = affine_map<(d0) -> (d0)>} :
     vector<4xf32>, memref<?xf32>
@@ -54,7 +54,7 @@ func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
 // CHECK-LABEL: @transfer_writex4
 // CHECK: rocdl.buffer.store {{.*}} vector<4xf32>
 
-func @transfer_write_dwordConfig(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
+builtin.func @transfer_write_dwordConfig(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
   vector.transfer_write %B, %A[%base]
       {permutation_map = affine_map<(d0) -> (d0)>} :
     vector<2xf32>, memref<?xf32>

diff  --git a/mlir/test/Dialect/Builtin/canonicalize.mlir b/mlir/test/Dialect/Builtin/canonicalize.mlir
index 316684428f2df..57a79d02138c6 100644
--- a/mlir/test/Dialect/Builtin/canonicalize.mlir
+++ b/mlir/test/Dialect/Builtin/canonicalize.mlir
@@ -10,8 +10,8 @@
 func @multiple_conversion_casts(%arg0: i32, %arg1: i32) -> (i32, i32) {
   // CHECK-NOT: unrealized_conversion_cast
   // CHECK: return %[[ARG0]], %[[ARG1]]
-  %inputs:2 = unrealized_conversion_cast %arg0, %arg1 : i32, i32 to i64, i64
-  %outputs:2 = unrealized_conversion_cast %inputs#0, %inputs#1 : i64, i64 to i32, i32
+  %inputs:2 = builtin.unrealized_conversion_cast %arg0, %arg1 : i32, i32 to i64, i64
+  %outputs:2 = builtin.unrealized_conversion_cast %inputs#0, %inputs#1 : i64, i64 to i32, i32
   return %outputs#0, %outputs#1 : i32, i32
 }
 
@@ -19,7 +19,7 @@ func @multiple_conversion_casts(%arg0: i32, %arg1: i32) -> (i32, i32) {
 func @multiple_conversion_casts_failure(%arg0: i32, %arg1: i32, %arg2: i64) -> (i32, i32) {
   // CHECK: unrealized_conversion_cast
   // CHECK: unrealized_conversion_cast
-  %inputs:2 = unrealized_conversion_cast %arg0, %arg1 : i32, i32 to i64, i64
-  %outputs:2 = unrealized_conversion_cast %arg2, %inputs#1 : i64, i64 to i32, i32
+  %inputs:2 = builtin.unrealized_conversion_cast %arg0, %arg1 : i32, i32 to i64, i64
+  %outputs:2 = builtin.unrealized_conversion_cast %arg2, %inputs#1 : i64, i64 to i32, i32
   return %outputs#0, %outputs#1 : i32, i32
 }

diff  --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
index 5385083f470f5..0271638999a7a 100644
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -782,7 +782,7 @@ func @input_stays_same(%arg0 : memref<?x1x?xf32, #map0>, %arg1 : f32, %shape: me
 // CHECK:     #[[MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, 0, d2)>
 // CHECK:     #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> ()>
 // CHECK:     #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-// CHECK:     builtin.func @input_stays_same(
+// CHECK:     func @input_stays_same(
 // CHECK-SAME:  %[[ARG0:.*]]: memref<?x1x?xf32, #[[MAP0]]>,
 // CHECK-SAME:  %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<?x1x?x1x?xf32>)
 // CHECK-SAME   -> memref<?x1x?x1x?xf32> {

diff  --git a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
index f72723f36804c..2ea3548fe45c0 100644
--- a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
@@ -20,7 +20,7 @@ func @control_producer_reshape_fusion(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<
 }
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 //  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d1)>
-//      CHECK: builtin.func @control_producer_reshape_fusion
+//      CHECK: func @control_producer_reshape_fusion
 // CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
 // CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
 //  CHECK-DAG:   %[[C0:.+]] = constant 0 : index
@@ -54,7 +54,7 @@ func @control_consumer_reshape_fusion(%arg0 : tensor<1x?x?xf32>, %arg1 : tensor<
   return %1 : tensor<1x?x?xf32>
 }
 //  CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)
-//      CHECK: builtin.func @control_consumer_reshape_fusion
+//      CHECK: func @control_consumer_reshape_fusion
 //      CHECK:   %[[FILL:.+]] = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP]]]
 // CHECK-SAME:       outs(%{{.+}} : tensor<1x?x?xf32>)

diff  --git a/mlir/test/Dialect/Shape/invalid.mlir b/mlir/test/Dialect/Shape/invalid.mlir
index 030926a9cce4b..2185333aa7381 100644
--- a/mlir/test/Dialect/Shape/invalid.mlir
+++ b/mlir/test/Dialect/Shape/invalid.mlir
@@ -172,7 +172,7 @@ module attributes {shape.lib = [@shape_lib, "shape_lib"]} {
 
 shape.function_library @shape_lib {
   // Test shape function that returns the shape of input arg as result shape.
-  func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
     %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
     return %0 : !shape.shape
   }
@@ -192,7 +192,7 @@ module attributes {shape.lib = [@shape_lib, @shape_lib]} {
 
 shape.function_library @shape_lib {
   // Test shape function that returns the shape of input arg as result shape.
-  func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
     %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
     return %0 : !shape.shape
   }
@@ -212,7 +212,7 @@ module attributes {shape.lib = [@shape_lib]} {
 
 shape.function_library @shape_lib {
   // Test shape function that returns the shape of input arg as result shape.
-  func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
     %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
     return %0 : !shape.shape
   }

diff  --git a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
index 7cfb95b76e688..b3a6f386c4f32 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
@@ -14,7 +14,7 @@
   iterator_types = ["parallel", "parallel", "parallel"]
 }
 
-// CHECK-LABEL:   builtin.func @sparse_static_dims(
+// CHECK-LABEL:   func @sparse_static_dims(
 // CHECK-SAME:                          %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>,
 // CHECK-SAME:                          %[[VAL_1:.*]]: tensor<20x30x10xf32>) -> tensor<20x30x10xf32> {
 // CHECK:           %[[VAL_2:.*]] = constant 20 : index
@@ -52,7 +52,7 @@ func @sparse_static_dims(%arga: tensor<10x20x30xf32, #X>,
   return %0 : tensor<20x30x10xf32>
 }
 
-// CHECK-LABEL:   builtin.func @sparse_dynamic_dims(
+// CHECK-LABEL:   func @sparse_dynamic_dims(
 // CHECK-SAME:                          %[[VAL_0:.*]]: tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
 // CHECK-SAME:                          %[[VAL_1:.*]]: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
 // CHECK:           %[[VAL_2:.*]] = constant 2 : index

diff  --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
index 405b44bcc86e5..51cacb66a6f96 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
@@ -16,7 +16,7 @@
   iterator_types = ["reduction", "reduction", "reduction"]
 }
 
-// CHECK-HIR-LABEL:   builtin.func @sparse_dynamic_dims(
+// CHECK-HIR-LABEL:   func @sparse_dynamic_dims(
 // CHECK-HIR-SAME:                                      %[[VAL_0:.*]]: tensor<?x?x?xf32,  #sparse_tensor.encoding<{{{.*}}}>>,
 // CHECK-HIR-SAME:                                      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-HIR-DAG:       %[[C0:.*]] = constant 0 : index
@@ -48,7 +48,7 @@
 // CHECK-HIR:           return %[[VAL_24]] : tensor<f32>
 // CHECK-HIR:         }
 //
-// CHECK-MIR-LABEL:   builtin.func @sparse_dynamic_dims(
+// CHECK-MIR-LABEL:   func @sparse_dynamic_dims(
 // CHECK-MIR-SAME:                                      %[[VAL_0:.*]]: !llvm.ptr<i8>,
 // CHECK-MIR-SAME:                                      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-MIR-DAG:       %[[C0:.*]] = constant 0 : index

diff  --git a/mlir/test/IR/invalid-func-op.mlir b/mlir/test/IR/invalid-func-op.mlir
index 6b15a076f1d36..c64992dfd84e6 100644
--- a/mlir/test/IR/invalid-func-op.mlir
+++ b/mlir/test/IR/invalid-func-op.mlir
@@ -4,7 +4,7 @@
 
 func @func_op() {
   // expected-error at +1 {{expected valid '@'-identifier for symbol name}}
-  func missingsigil() -> (i1, index, f32)
+  builtin.func missingsigil() -> (i1, index, f32)
   return
 }
 
@@ -12,7 +12,7 @@ func @func_op() {
 
 func @func_op() {
   // expected-error at +1 {{expected type instead of SSA identifier}}
-  func @mixed_named_arguments(f32, %a : i32) {
+  builtin.func @mixed_named_arguments(f32, %a : i32) {
     return
   }
   return
@@ -22,7 +22,7 @@ func @func_op() {
 
 func @func_op() {
   // expected-error at +1 {{expected SSA identifier}}
-  func @mixed_named_arguments(%a : i32, f32) -> () {
+  builtin.func @mixed_named_arguments(%a : i32, f32) -> () {
     return
   }
   return
@@ -32,7 +32,7 @@ func @func_op() {
 
 func @func_op() {
   // expected-error at +1 {{entry block must have 1 arguments to match function signature}}
-  func @mixed_named_arguments(f32) {
+  builtin.func @mixed_named_arguments(f32) {
   ^entry:
     return
   }
@@ -43,7 +43,7 @@ func @func_op() {
 
 func @func_op() {
   // expected-error at +1 {{type of entry block argument #0('i32') must match the type of the corresponding argument in function signature('f32')}}
-  func @mixed_named_arguments(f32) {
+  builtin.func @mixed_named_arguments(f32) {
   ^entry(%arg : i32):
     return
   }

diff  --git a/mlir/test/IR/invalid-module-op.mlir b/mlir/test/IR/invalid-module-op.mlir
index d0de3db74d4f5..8caace93dc7b0 100644
--- a/mlir/test/IR/invalid-module-op.mlir
+++ b/mlir/test/IR/invalid-module-op.mlir
@@ -4,7 +4,7 @@
 
 func @module_op() {
   // expected-error at +1 {{Operations with a 'SymbolTable' must have exactly one block}}
-  module {
+  builtin.module {
   ^bb1:
     "test.dummy"() : () -> ()
   ^bb2:
@@ -17,7 +17,7 @@ func @module_op() {
 
 func @module_op() {
   // expected-error at +1 {{region should have no arguments}}
-  module {
+  builtin.module {
   ^bb1(%arg: i32):
   }
   return

diff  --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir
index 693fd6d770587..30b1b411d2df8 100644
--- a/mlir/test/IR/invalid-ops.mlir
+++ b/mlir/test/IR/invalid-ops.mlir
@@ -70,7 +70,7 @@ func @affine_apply_wrong_result_count() {
 
 func @unknown_custom_op() {
 ^bb0:
-  %i = crazyThing() {value = 0} : () -> index  // expected-error {{custom op 'crazyThing' is unknown}}
+  %i = test.crazyThing() {value = 0} : () -> index  // expected-error {{custom op 'test.crazyThing' is unknown}}
   return
 }
 

diff  --git a/mlir/test/IR/invalid.mlir b/mlir/test/IR/invalid.mlir
index 818f4584eb270..fc3eb9c3fbe87 100644
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@@ -263,7 +263,7 @@ func @for_negative_stride() {
 // -----
 
 func @non_operation() {
-  asd   // expected-error {{custom op 'asd' is unknown}}
+  test.asd   // expected-error {{custom op 'test.asd' is unknown}}
 }
 
 // -----

diff  --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir
index eb833ec6d74e8..bcc26ceebffb0 100644
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@@ -1307,6 +1307,28 @@ func @pretty_names() {
   return
 }
 
+
+// This tests the behavior of "default dialect":
+// operations like `test.default_dialect` can define a default dialect
+// used in nested region.
+// CHECK-LABEL: func @default_dialect
+func @default_dialect() {
+  test.default_dialect {
+    // The test dialect is the default in this region, the following two
+    // operations are parsed identically.
+    // CHECK-NOT: test.parse_integer_literal
+    parse_integer_literal : 5
+    // CHECK: parse_integer_literal : 6
+    test.parse_integer_literal : 6
+    // Verify that only an op prefix is stripped, not an attribute value for
+    // example.
+    // CHECK:  "test.op_with_attr"() {test.attr = "test.value"} : () -> ()
+    "test.op_with_attr"() {test.attr = "test.value"} : () -> ()
+    "test.terminator"() : ()->()
+  }
+  return
+}
+
 // CHECK-LABEL: func @unreachable_dominance_violation_ok
 func @unreachable_dominance_violation_ok() -> i1 {
 // CHECK:   [[VAL:%.*]] = constant false

diff  --git a/mlir/test/IR/traits.mlir b/mlir/test/IR/traits.mlir
index 2b5e314e5555f..fe4dcacc0ff89 100644
--- a/mlir/test/IR/traits.mlir
+++ b/mlir/test/IR/traits.mlir
@@ -506,7 +506,7 @@ func @failedHasDominanceScopeOutsideDominanceFreeScope() -> () {
 // checked for dominance
 func @illegalInsideDominanceFreeScope() -> () {
   test.graph_region {
-    func @test() -> i1 {
+    builtin.func @test() -> i1 {
     ^bb1:
       // expected-error @+1 {{operand #0 does not dominate this use}}
       %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
@@ -525,7 +525,7 @@ func @illegalInsideDominanceFreeScope() -> () {
 // checked for dominance
 func @illegalCDFGInsideDominanceFreeScope() -> () {
   test.graph_region {
-    func @test() -> i1 {
+    builtin.func @test() -> i1 {
     ^bb1:
       // expected-error @+1 {{operand #0 does not dominate this use}}
       %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)

diff  --git a/mlir/test/Transforms/canonicalize-dce.mlir b/mlir/test/Transforms/canonicalize-dce.mlir
index 1ac510a50f275..0746396ae216d 100644
--- a/mlir/test/Transforms/canonicalize-dce.mlir
+++ b/mlir/test/Transforms/canonicalize-dce.mlir
@@ -82,7 +82,7 @@ func @f(%arg0: f32, %pred: i1) {
 // CHECK-NEXT:     return
 
 func @f(%arg0: f32) {
-  func @g(%arg1: f32) {
+  builtin.func @g(%arg1: f32) {
     %0 = "std.addf"(%arg1, %arg1) : (f32, f32) -> f32
     return
   }

diff  --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index c819ce32c9b4d..a510be43669ae 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -412,7 +412,7 @@ func @write_only_alloca_fold(%v: f32) {
 // CHECK-LABEL: func @dead_block_elim
 func @dead_block_elim() {
   // CHECK-NOT: ^bb
-  func @nested() {
+  builtin.func @nested() {
     return
 
   ^bb1:

diff  --git a/mlir/test/Transforms/constant-fold.mlir b/mlir/test/Transforms/constant-fold.mlir
index 1f46253de5f80..e6c7ba20cab6b 100644
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@@ -746,7 +746,7 @@ func @fold_rank_memref(%arg0 : memref<?x?xf32>) -> (index) {
 func @nested_isolated_region() {
   // CHECK-NEXT: func @isolated_op
   // CHECK-NEXT: constant 2
-  func @isolated_op() {
+  builtin.func @isolated_op() {
     %0 = constant 1 : i32
     %2 = addi %0, %0 : i32
     "foo.yield"(%2) : (i32) -> ()

diff  --git a/mlir/test/Transforms/cse.mlir b/mlir/test/Transforms/cse.mlir
index 4e0c410e34c96..b45bf58473158 100644
--- a/mlir/test/Transforms/cse.mlir
+++ b/mlir/test/Transforms/cse.mlir
@@ -229,7 +229,7 @@ func @nested_isolated() -> i32 {
   %0 = constant 1 : i32
 
   // CHECK-NEXT: @nested_func
-  func @nested_func() {
+  builtin.func @nested_func() {
     // CHECK-NEXT: constant 1
     %foo = constant 1 : i32
     "foo.yield"(%foo) : (i32) -> ()

diff  --git a/mlir/test/Transforms/test-legalizer-full.mlir b/mlir/test/Transforms/test-legalizer-full.mlir
index 2cf23611486c1..3cbc1736b6aa2 100644
--- a/mlir/test/Transforms/test-legalizer-full.mlir
+++ b/mlir/test/Transforms/test-legalizer-full.mlir
@@ -32,12 +32,12 @@ func @replace_non_root_illegal_op() {
 // Test that children of recursively legal operations are ignored.
 func @recursively_legal_invalid_op() {
   /// Operation that is statically legal.
-  module attributes {test.recursively_legal} {
+  builtin.module attributes {test.recursively_legal} {
     %ignored = "test.illegal_op_f"() : () -> (i32)
   }
   /// Operation that is dynamically legal, i.e. the function has a pattern
   /// applied to legalize the argument type before it becomes recursively legal.
-  func @dynamic_func(%arg: i64) attributes {test.recursively_legal} {
+  builtin.func @dynamic_func(%arg: i64) attributes {test.recursively_legal} {
     %ignored = "test.illegal_op_f"() : () -> (i32)
     "test.return"() : () -> ()
   }

diff  --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 4dee0a1b366ca..e21ae0ba0f031 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -626,7 +626,7 @@ def AttrSizedResultOp : TEST_Op<"attr_sized_results",
 // pretty printed value name.
 def StringAttrPrettyNameOp
  : TEST_Op<"string_attr_pretty_name",
-           [DeclareOpInterfaceMethods<OpAsmOpInterface>]> {
+           [DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
   let arguments = (ins StrArrayAttr:$names);
   let results = (outs Variadic<I32>:$r);
 
@@ -634,6 +634,20 @@ def StringAttrPrettyNameOp
   let parser = [{ return ::parse$cppClass(parser, result); }];
 }
 
+// This is used to test the OpAsmOpInterface::getDefaultDialect() feature:
+// operations nested in a region under this op will drop the "test." dialect
+// prefix.
+def DefaultDialectOp : TEST_Op<"default_dialect", [OpAsmOpInterface]> {
+ let regions = (region AnyRegion:$body);
+  let extraClassDeclaration = [{
+    static ::llvm::StringRef getDefaultDialect() {
+      return "test";
+    }
+    void getAsmResultNames(::llvm::function_ref<void(::mlir::Value, ::llvm::StringRef)> setNameFn) {}
+  }];
+  let assemblyFormat = "regions attr-dict-with-keyword";
+}
+
 //===----------------------------------------------------------------------===//
 // Test Locations
 //===----------------------------------------------------------------------===//