[llvm] [TableGen] Spit GlobalISelCombinerEmitter into multiple files (PR #73325)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 24 05:19:55 PST 2023


https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/73325

>From 4c0153556a8be2f0d4a1686874cdff0fc5acec85 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Fri, 24 Nov 2023 14:08:52 +0100
Subject: [PATCH 1/2] [TableGen] Spit GlobalISelCombinerEmitter into multiple
 files

Split MatchDataInfo, CXXPredicates and the Pattern hierarchy into their own files.

This should help with maintenance a bit, and make the API easier to navigate.

I also hope this encourages a bit more experimentation with MIR patterns, e.g. I'd like to try getting them in ISel at some point.
---
 llvm/utils/TableGen/GlobalISel/CMakeLists.txt |    3 +
 .../TableGen/GlobalISel/CXXPredicates.cpp     |   51 +
 .../utils/TableGen/GlobalISel/CXXPredicates.h |   83 +
 .../TableGen/GlobalISel/MatchDataInfo.cpp     |   49 +
 .../utils/TableGen/GlobalISel/MatchDataInfo.h |   87 +
 llvm/utils/TableGen/GlobalISel/Patterns.cpp   |  845 ++++++++
 llvm/utils/TableGen/GlobalISel/Patterns.h     |  690 +++++++
 .../TableGen/GlobalISelCombinerEmitter.cpp    | 1705 +----------------
 8 files changed, 1892 insertions(+), 1621 deletions(-)
 create mode 100644 llvm/utils/TableGen/GlobalISel/CXXPredicates.cpp
 create mode 100644 llvm/utils/TableGen/GlobalISel/CXXPredicates.h
 create mode 100644 llvm/utils/TableGen/GlobalISel/MatchDataInfo.cpp
 create mode 100644 llvm/utils/TableGen/GlobalISel/MatchDataInfo.h
 create mode 100644 llvm/utils/TableGen/GlobalISel/Patterns.cpp
 create mode 100644 llvm/utils/TableGen/GlobalISel/Patterns.h

diff --git a/llvm/utils/TableGen/GlobalISel/CMakeLists.txt b/llvm/utils/TableGen/GlobalISel/CMakeLists.txt
index a85f1ac6cc9366a..7262c405839934a 100644
--- a/llvm/utils/TableGen/GlobalISel/CMakeLists.txt
+++ b/llvm/utils/TableGen/GlobalISel/CMakeLists.txt
@@ -5,6 +5,9 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_library(LLVMTableGenGlobalISel STATIC DISABLE_LLVM_LINK_LLVM_DYLIB
   CodeExpander.cpp
+  CXXPredicates.cpp
+  MatchDataInfo.cpp
+  Patterns.cpp
 
   DEPENDS
   vt_gen
diff --git a/llvm/utils/TableGen/GlobalISel/CXXPredicates.cpp b/llvm/utils/TableGen/GlobalISel/CXXPredicates.cpp
new file mode 100644
index 000000000000000..e39293ebfe7aee2
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/CXXPredicates.cpp
@@ -0,0 +1,51 @@
+//===- CXXPredicates.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "CXXPredicates.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace llvm {
+namespace gi {
+
+std::vector<const CXXPredicateCode *>
+CXXPredicateCode::getSorted(const CXXPredicateCodePool &Pool) {
+  std::vector<const CXXPredicateCode *> Out;
+  std::transform(Pool.begin(), Pool.end(), std::back_inserter(Out),
+                 [&](auto &Elt) { return Elt.second.get(); });
+  sort(Out, [](const auto *A, const auto *B) { return A->ID < B->ID; });
+  return Out;
+}
+
+const CXXPredicateCode &CXXPredicateCode::get(CXXPredicateCodePool &Pool,
+                                              std::string Code) {
+  // Check if we already have an identical piece of code, if not, create an
+  // entry in the pool.
+  const auto CodeHash = hash_value(Code);
+  if (auto It = Pool.find(CodeHash); It != Pool.end())
+    return *It->second;
+
+  const auto ID = Pool.size();
+  auto OwnedData = std::unique_ptr<CXXPredicateCode>(
+      new CXXPredicateCode(std::move(Code), ID));
+  const auto &DataRef = *OwnedData;
+  Pool[CodeHash] = std::move(OwnedData);
+  return DataRef;
+}
+
+// TODO: Make BaseEnumName prefix configurable.
+CXXPredicateCode::CXXPredicateCode(std::string Code, unsigned ID)
+    : Code(Code), ID(ID), BaseEnumName("GICombiner" + std::to_string(ID)) {}
+
+CXXPredicateCode::CXXPredicateCodePool CXXPredicateCode::AllCXXMatchCode;
+CXXPredicateCode::CXXPredicateCodePool CXXPredicateCode::AllCXXApplyCode;
+
+} // namespace gi
+} // namespace llvm
diff --git a/llvm/utils/TableGen/GlobalISel/CXXPredicates.h b/llvm/utils/TableGen/GlobalISel/CXXPredicates.h
new file mode 100644
index 000000000000000..c2aa0f0fbbc5974
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/CXXPredicates.h
@@ -0,0 +1,83 @@
+//===- CXXPredicates.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_MIRPATTERNS_CXXPREDICATES_H
+#define LLVM_UTILS_MIRPATTERNS_CXXPREDICATES_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace gi {
+
+/// Entry into the static pool of all CXX Predicate code. This contains
+/// fully expanded C++ code.
+///
+/// The static pool is hidden inside the object and can be accessed through
+/// getAllMatchCode/getAllApplyCode
+///
+/// Note that CXXPattern trims C++ code, so the Code is already expected to be
+/// free of leading/trailing whitespace.
+class CXXPredicateCode {
+  using CXXPredicateCodePool =
+      DenseMap<hash_code, std::unique_ptr<CXXPredicateCode>>;
+  static CXXPredicateCodePool AllCXXMatchCode;
+  static CXXPredicateCodePool AllCXXApplyCode;
+
+  /// Sorts a `CXXPredicateCodePool` by their IDs and returns it.
+  static std::vector<const CXXPredicateCode *>
+  getSorted(const CXXPredicateCodePool &Pool);
+
+  /// Gets an instance of `CXXPredicateCode` for \p Code, or returns an already
+  /// existing one.
+  static const CXXPredicateCode &get(CXXPredicateCodePool &Pool,
+                                     std::string Code);
+
+  CXXPredicateCode(std::string Code, unsigned ID);
+
+public:
+  static const CXXPredicateCode &getMatchCode(std::string Code) {
+    return get(AllCXXMatchCode, std::move(Code));
+  }
+
+  static const CXXPredicateCode &getApplyCode(std::string Code) {
+    return get(AllCXXApplyCode, std::move(Code));
+  }
+
+  static std::vector<const CXXPredicateCode *> getAllMatchCode() {
+    return getSorted(AllCXXMatchCode);
+  }
+
+  static std::vector<const CXXPredicateCode *> getAllApplyCode() {
+    return getSorted(AllCXXApplyCode);
+  }
+
+  const std::string Code;
+  const unsigned ID;
+  const std::string BaseEnumName;
+
+  bool needsUnreachable() const {
+    return !StringRef(Code).starts_with("return");
+  }
+
+  std::string getEnumNameWithPrefix(StringRef Prefix) const {
+    return Prefix.str() + BaseEnumName;
+  }
+};
+
+} // namespace gi
+} // end namespace llvm
+
+#endif // ifndef LLVM_UTILS_MIRPATTERNS_CXXPREDICATES_H
diff --git a/llvm/utils/TableGen/GlobalISel/MatchDataInfo.cpp b/llvm/utils/TableGen/GlobalISel/MatchDataInfo.cpp
new file mode 100644
index 000000000000000..b5c9e4f8c24852d
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/MatchDataInfo.cpp
@@ -0,0 +1,49 @@
+//===- MatchDataInfo.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "MatchDataInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace gi {
+
+StringMap<std::vector<std::string>> AllMatchDataVars;
+
+StringRef MatchDataInfo::getVariableName() const {
+  assert(hasVariableName());
+  return VarName;
+}
+
+void MatchDataInfo::print(raw_ostream &OS) const {
+  OS << "(MatchDataInfo pattern_symbol:" << PatternSymbol << " type:'" << Type
+     << "' var_name:" << (VarName.empty() ? "<unassigned>" : VarName) << ")";
+}
+
+void MatchDataInfo::dump() const { print(dbgs()); }
+
+void AssignMatchDataVariables(MutableArrayRef<MatchDataInfo> Infos) {
+  static unsigned NextVarID = 0;
+
+  StringMap<unsigned> SeenTypes;
+  for (auto &Info : Infos) {
+    unsigned &NumSeen = SeenTypes[Info.getType()];
+    auto &ExistingVars = AllMatchDataVars[Info.getType()];
+
+    if (NumSeen == ExistingVars.size())
+      ExistingVars.push_back("MDInfo" + std::to_string(NextVarID++));
+
+    Info.setVariableName(ExistingVars[NumSeen++]);
+  }
+}
+
+} // namespace gi
+} // namespace llvm
diff --git a/llvm/utils/TableGen/GlobalISel/MatchDataInfo.h b/llvm/utils/TableGen/GlobalISel/MatchDataInfo.h
new file mode 100644
index 000000000000000..1960a3a7c24d9b7
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/MatchDataInfo.h
@@ -0,0 +1,87 @@
+//===- MatchDataInfo.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_MIRPATTERNS_MATCHDATAINFO_H
+#define LLVM_UTILS_MIRPATTERNS_MATCHDATAINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace gi {
+
+/// Represents MatchData defined by the match stage and required by the apply
+/// stage.
+///
+/// This allows the plumbing of arbitrary data from C++ predicates between the
+/// stages.
+///
+/// When this class is initially created, it only has a pattern symbol and a
+/// type. When all of the MatchDatas declarations of a given pattern have been
+/// parsed, `AssignVariables` must be called to assign storage variable names to
+/// each MatchDataInfo.
+class MatchDataInfo {
+  StringRef PatternSymbol;
+  StringRef Type;
+  std::string VarName;
+
+public:
+  static constexpr StringLiteral StructTypeName = "MatchInfosTy";
+  static constexpr StringLiteral StructName = "MatchInfos";
+
+  MatchDataInfo(StringRef PatternSymbol, StringRef Type)
+      : PatternSymbol(PatternSymbol), Type(Type.trim()) {}
+
+  StringRef getPatternSymbol() const { return PatternSymbol; };
+  StringRef getType() const { return Type; };
+
+  bool hasVariableName() const { return !VarName.empty(); }
+  void setVariableName(StringRef Name) { VarName = Name; }
+  StringRef getVariableName() const;
+
+  std::string getQualifiedVariableName() const {
+    return StructName.str() + "." + getVariableName().str();
+  }
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+/// Pool of type -> variables used to emit MatchData variables declarations.
+///
+/// e.g. if the map contains "int64_t" -> ["MD0", "MD1"], then two variable
+/// declarations must be emitted: `int64_t MD0` and `int64_t MD1`.
+///
+/// This has a static lifetime and will outlive all the `MatchDataInfo` objects
+/// by design. It needs a static lifetime so the backends can emit variable
+/// declarations after processing all the inputs.
+extern StringMap<std::vector<std::string>> AllMatchDataVars;
+
+/// Assign variable names to all MatchDatas used by a pattern. This must be
+/// called after all MatchData decls have been parsed for a given processing
+/// unit (e.g. a combine rule)
+///
+/// Requires an array of MatchDataInfo so we can handle cases where a pattern
+/// uses multiple instances of the same MatchData type.
+///
+/// Writes to \ref AllMatchDataVars.
+void AssignMatchDataVariables(MutableArrayRef<MatchDataInfo> Infos);
+
+} // namespace gi
+} // end namespace llvm
+
+#endif // ifndef LLVM_UTILS_MIRPATTERNS_MATCHDATAINFO_H
diff --git a/llvm/utils/TableGen/GlobalISel/Patterns.cpp b/llvm/utils/TableGen/GlobalISel/Patterns.cpp
new file mode 100644
index 000000000000000..d4325771c0a8844
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/Patterns.cpp
@@ -0,0 +1,845 @@
+//===- Patterns.cpp --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Patterns.h"
+#include "../CodeGenInstruction.h"
+#include "CXXPredicates.h"
+#include "CodeExpander.h"
+#include "CodeExpansions.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+
+namespace llvm {
+namespace gi {
+
+//===- PatternType --------------------------------------------------------===//
+
+std::optional<PatternType> PatternType::get(ArrayRef<SMLoc> DiagLoc,
+                                            const Record *R, Twine DiagCtx) {
+  assert(R);
+  if (R->isSubClassOf("ValueType")) {
+    PatternType PT(PT_ValueType);
+    PT.Data.Def = R;
+    return PT;
+  }
+
+  if (R->isSubClassOf(TypeOfClassName)) {
+    auto RawOpName = R->getValueAsString("OpName");
+    if (!RawOpName.starts_with("$")) {
+      PrintError(DiagLoc, DiagCtx + ": invalid operand name format '" +
+                              RawOpName + "' in " + TypeOfClassName +
+                              ": expected '$' followed by an operand name");
+      return std::nullopt;
+    }
+
+    PatternType PT(PT_TypeOf);
+    PT.Data.Str = RawOpName.drop_front(1);
+    return PT;
+  }
+
+  PrintError(DiagLoc, DiagCtx + ": unknown type '" + R->getName() + "'");
+  return std::nullopt;
+}
+
+PatternType PatternType::getTypeOf(StringRef OpName) {
+  PatternType PT(PT_TypeOf);
+  PT.Data.Str = OpName;
+  return PT;
+}
+
+StringRef PatternType::getTypeOfOpName() const {
+  assert(isTypeOf());
+  return Data.Str;
+}
+
+const Record *PatternType::getLLTRecord() const {
+  assert(isLLT());
+  return Data.Def;
+}
+
+bool PatternType::operator==(const PatternType &Other) const {
+  if (Kind != Other.Kind)
+    return false;
+
+  switch (Kind) {
+  case PT_None:
+    return true;
+  case PT_ValueType:
+    return Data.Def == Other.Data.Def;
+  case PT_TypeOf:
+    return Data.Str == Other.Data.Str;
+  }
+
+  llvm_unreachable("Unknown Type Kind");
+}
+
+std::string PatternType::str() const {
+  switch (Kind) {
+  case PT_None:
+    return "";
+  case PT_ValueType:
+    return Data.Def->getName().str();
+  case PT_TypeOf:
+    return (TypeOfClassName + "<$" + getTypeOfOpName() + ">").str();
+  }
+
+  llvm_unreachable("Unknown type!");
+}
+
+//===- Pattern ------------------------------------------------------------===//
+
+void Pattern::dump() const { return print(dbgs()); }
+
+const char *Pattern::getKindName() const {
+  switch (Kind) {
+  case K_AnyOpcode:
+    return "AnyOpcodePattern";
+  case K_CXX:
+    return "CXXPattern";
+  case K_CodeGenInstruction:
+    return "CodeGenInstructionPattern";
+  case K_PatFrag:
+    return "PatFragPattern";
+  case K_Builtin:
+    return "BuiltinPattern";
+  }
+
+  llvm_unreachable("unknown pattern kind!");
+}
+
+void Pattern::printImpl(raw_ostream &OS, bool PrintName,
+                        function_ref<void()> ContentPrinter) const {
+  OS << "(" << getKindName() << " ";
+  if (PrintName)
+    OS << "name:" << getName() << " ";
+  ContentPrinter();
+  OS << ")";
+}
+
+//===- AnyOpcodePattern ---------------------------------------------------===//
+
+void AnyOpcodePattern::print(raw_ostream &OS, bool PrintName) const {
+  printImpl(OS, PrintName, [&OS, this]() {
+    OS << "["
+       << join(map_range(Insts,
+                         [](const auto *I) { return I->TheDef->getName(); }),
+               ", ")
+       << "]";
+  });
+}
+
+//===- CXXPattern ---------------------------------------------------------===//
+
+CXXPattern::CXXPattern(const StringInit &Code, StringRef Name)
+    : CXXPattern(Code.getAsUnquotedString(), Name) {}
+
+const CXXPredicateCode &
+CXXPattern::expandCode(const CodeExpansions &CE, ArrayRef<SMLoc> Locs,
+                       function_ref<void(raw_ostream &)> AddComment) const {
+  std::string Result;
+  raw_string_ostream OS(Result);
+
+  if (AddComment)
+    AddComment(OS);
+
+  CodeExpander Expander(RawCode, CE, Locs, /*ShowExpansions*/ false);
+  Expander.emit(OS);
+  if (IsApply)
+    return CXXPredicateCode::getApplyCode(std::move(Result));
+  return CXXPredicateCode::getMatchCode(std::move(Result));
+}
+
+void CXXPattern::print(raw_ostream &OS, bool PrintName) const {
+  printImpl(OS, PrintName, [&OS, this] {
+    OS << (IsApply ? "apply" : "match") << " code:\"";
+    printEscapedString(getRawCode(), OS);
+    OS << "\"";
+  });
+}
+
+//===- InstructionOperand -------------------------------------------------===//
+
+std::string InstructionOperand::describe() const {
+  if (!hasImmValue())
+    return "MachineOperand $" + getOperandName().str() + "";
+  std::string Str = "imm " + std::to_string(getImmValue());
+  if (isNamedImmediate())
+    Str += ":$" + getOperandName().str() + "";
+  return Str;
+}
+
+void InstructionOperand::print(raw_ostream &OS) const {
+  if (isDef())
+    OS << "<def>";
+
+  bool NeedsColon = true;
+  if (Type) {
+    if (hasImmValue())
+      OS << "(" << Type.str() << " " << getImmValue() << ")";
+    else
+      OS << Type.str();
+  } else if (hasImmValue())
+    OS << getImmValue();
+  else
+    NeedsColon = false;
+
+  if (isNamedOperand())
+    OS << (NeedsColon ? ":" : "") << "$" << getOperandName();
+}
+
+void InstructionOperand::dump() const { return print(dbgs()); }
+
+//===- InstructionPattern -------------------------------------------------===//
+
+bool InstructionPattern::diagnoseAllSpecialTypes(ArrayRef<SMLoc> Loc,
+                                                 Twine Msg) const {
+  bool HasDiag = false;
+  for (const auto &[Idx, Op] : enumerate(operands())) {
+    if (Op.getType().isSpecial()) {
+      PrintError(Loc, Msg);
+      PrintNote(Loc, "operand " + Twine(Idx) + " of '" + getName() +
+                         "' has type '" + Op.getType().str() + "'");
+      HasDiag = true;
+    }
+  }
+  return HasDiag;
+}
+
+void InstructionPattern::reportUnreachable(ArrayRef<SMLoc> Locs) const {
+  PrintError(Locs, "pattern '" + getName() + "' ('" + getInstName() +
+                       "') is unreachable from the pattern root!");
+}
+
+bool InstructionPattern::checkSemantics(ArrayRef<SMLoc> Loc) {
+  unsigned NumExpectedOperands = getNumInstOperands();
+
+  if (isVariadic()) {
+    if (Operands.size() < NumExpectedOperands) {
+      PrintError(Loc, +"'" + getInstName() + "' expected at least " +
+                          Twine(NumExpectedOperands) + " operands, got " +
+                          Twine(Operands.size()));
+      return false;
+    }
+  } else if (NumExpectedOperands != Operands.size()) {
+    PrintError(Loc, +"'" + getInstName() + "' expected " +
+                        Twine(NumExpectedOperands) + " operands, got " +
+                        Twine(Operands.size()));
+    return false;
+  }
+
+  unsigned OpIdx = 0;
+  unsigned NumDefs = getNumInstDefs();
+  for (auto &Op : Operands)
+    Op.setIsDef(OpIdx++ < NumDefs);
+
+  return true;
+}
+
+void InstructionPattern::print(raw_ostream &OS, bool PrintName) const {
+  printImpl(OS, PrintName, [&OS, this] {
+    OS << getInstName() << " operands:[";
+    StringRef Sep = "";
+    for (const auto &Op : Operands) {
+      OS << Sep;
+      Op.print(OS);
+      Sep = ", ";
+    }
+    OS << "]";
+
+    printExtras(OS);
+  });
+}
+
+//===- OperandTable -------------------------------------------------------===//
+
+bool OperandTable::addPattern(InstructionPattern *P,
+                              function_ref<void(StringRef)> DiagnoseRedef) {
+  for (const auto &Op : P->named_operands()) {
+    StringRef OpName = Op.getOperandName();
+
+    // We always create an entry in the OperandTable, even for uses.
+    // Uses of operands that don't have a def (= live-ins) will remain with a
+    // nullptr as the Def.
+    //
+    // This allows us tell whether an operand exists in a pattern or not. If
+    // there is no entry for it, it doesn't exist, if there is an entry, it's
+    // used/def'd at least once.
+    auto &Def = Table[OpName];
+
+    if (!Op.isDef())
+      continue;
+
+    if (Def) {
+      DiagnoseRedef(OpName);
+      return false;
+    }
+
+    Def = P;
+  }
+
+  return true;
+}
+
+void OperandTable::print(raw_ostream &OS, StringRef Name,
+                         StringRef Indent) const {
+  OS << Indent << "(OperandTable ";
+  if (!Name.empty())
+    OS << Name << " ";
+  if (Table.empty()) {
+    OS << "<empty>)\n";
+    return;
+  }
+
+  SmallVector<StringRef, 0> Keys(Table.keys());
+  sort(Keys);
+
+  OS << '\n';
+  for (const auto &Key : Keys) {
+    const auto *Def = Table.at(Key);
+    OS << Indent << "  " << Key << " -> "
+       << (Def ? Def->getName() : "<live-in>") << '\n';
+  }
+  OS << Indent << ")\n";
+}
+
+void OperandTable::dump() const { print(dbgs()); }
+
+//===- MIFlagsInfo --------------------------------------------------------===//
+
+void MIFlagsInfo::addSetFlag(const Record *R) {
+  SetF.insert(R->getValueAsString("EnumName"));
+}
+
+void MIFlagsInfo::addUnsetFlag(const Record *R) {
+  UnsetF.insert(R->getValueAsString("EnumName"));
+}
+
+void MIFlagsInfo::addCopyFlag(StringRef InstName) { CopyF.insert(InstName); }
+
+//===- CodeGenInstructionPattern ------------------------------------------===//
+
+bool CodeGenInstructionPattern::is(StringRef OpcodeName) const {
+  return I.TheDef->getName() == OpcodeName;
+}
+
+bool CodeGenInstructionPattern::isVariadic() const {
+  return I.Operands.isVariadic;
+}
+
+bool CodeGenInstructionPattern::hasVariadicDefs() const {
+  // Note: we cannot use variadicOpsAreDefs, it's not set for
+  // GenericInstructions.
+  if (!isVariadic())
+    return false;
+
+  if (I.variadicOpsAreDefs)
+    return true;
+
+  DagInit *OutOps = I.TheDef->getValueAsDag("OutOperandList");
+  if (OutOps->arg_empty())
+    return false;
+
+  auto *LastArgTy = dyn_cast<DefInit>(OutOps->getArg(OutOps->arg_size() - 1));
+  return LastArgTy && LastArgTy->getDef()->getName() == "variable_ops";
+}
+
+unsigned CodeGenInstructionPattern::getNumInstDefs() const {
+  if (!isVariadic() || !hasVariadicDefs())
+    return I.Operands.NumDefs;
+  unsigned NumOuts = I.Operands.size() - I.Operands.NumDefs;
+  assert(Operands.size() > NumOuts);
+  return std::max<unsigned>(I.Operands.NumDefs, Operands.size() - NumOuts);
+}
+
+unsigned CodeGenInstructionPattern::getNumInstOperands() const {
+  unsigned NumCGIOps = I.Operands.size();
+  return isVariadic() ? std::max<unsigned>(NumCGIOps, Operands.size())
+                      : NumCGIOps;
+}
+
+MIFlagsInfo &CodeGenInstructionPattern::getOrCreateMIFlagsInfo() {
+  if (!FI)
+    FI = std::make_unique<MIFlagsInfo>();
+  return *FI;
+}
+
+StringRef CodeGenInstructionPattern::getInstName() const {
+  return I.TheDef->getName();
+}
+
+void CodeGenInstructionPattern::printExtras(raw_ostream &OS) const {
+  if (!FI)
+    return;
+
+  OS << " (MIFlags";
+  if (!FI->set_flags().empty())
+    OS << " (set " << join(FI->set_flags(), ", ") << ")";
+  if (!FI->unset_flags().empty())
+    OS << " (unset " << join(FI->unset_flags(), ", ") << ")";
+  if (!FI->copy_flags().empty())
+    OS << " (copy " << join(FI->copy_flags(), ", ") << ")";
+  OS << ')';
+}
+
+//===- OperandTypeChecker -------------------------------------------------===//
+
+bool OperandTypeChecker::check(
+    InstructionPattern &P,
+    std::function<bool(const PatternType &)> VerifyTypeOfOperand) {
+  Pats.push_back(&P);
+
+  for (auto &Op : P.operands()) {
+    const auto Ty = Op.getType();
+    if (!Ty)
+      continue;
+
+    if (Ty.isTypeOf() && !VerifyTypeOfOperand(Ty))
+      return false;
+
+    if (!Op.isNamedOperand())
+      continue;
+
+    StringRef OpName = Op.getOperandName();
+    auto &Info = Types[OpName];
+    if (!Info.Type) {
+      Info.Type = Ty;
+      Info.PrintTypeSrcNote = [this, OpName, Ty, &P]() {
+        PrintSeenWithTypeIn(P, OpName, Ty);
+      };
+      continue;
+    }
+
+    if (Info.Type != Ty) {
+      PrintError(DiagLoc, "conflicting types for operand '" +
+                              Op.getOperandName() + "': '" + Info.Type.str() +
+                              "' vs '" + Ty.str() + "'");
+      PrintSeenWithTypeIn(P, OpName, Ty);
+      Info.PrintTypeSrcNote();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void OperandTypeChecker::propagateTypes() {
+  for (auto *Pat : Pats) {
+    for (auto &Op : Pat->named_operands()) {
+      if (auto &Info = Types[Op.getOperandName()]; Info.Type)
+        Op.setType(Info.Type);
+    }
+  }
+}
+
+void OperandTypeChecker::PrintSeenWithTypeIn(InstructionPattern &P,
+                                             StringRef OpName,
+                                             PatternType Ty) const {
+  PrintNote(DiagLoc, "'" + OpName + "' seen with type '" + Ty.str() + "' in '" +
+                         P.getName() + "'");
+}
+
+StringRef PatFrag::getParamKindStr(ParamKind OK) {
+  switch (OK) {
+  case PK_Root:
+    return "root";
+  case PK_MachineOperand:
+    return "machine_operand";
+  case PK_Imm:
+    return "imm";
+  }
+
+  llvm_unreachable("Unknown operand kind!");
+}
+
+//===- PatFrag -----------------------------------------------------------===//
+
+PatFrag::PatFrag(const Record &Def) : Def(Def) {
+  assert(Def.isSubClassOf(ClassName));
+}
+
+StringRef PatFrag::getName() const { return Def.getName(); }
+
+ArrayRef<SMLoc> PatFrag::getLoc() const { return Def.getLoc(); }
+
+void PatFrag::addInParam(StringRef Name, ParamKind Kind) {
+  Params.emplace_back(Param{Name, Kind});
+}
+
+iterator_range<PatFrag::ParamIt> PatFrag::in_params() const {
+  return {Params.begin() + NumOutParams, Params.end()};
+}
+
+void PatFrag::addOutParam(StringRef Name, ParamKind Kind) {
+  assert(NumOutParams == Params.size() &&
+         "Adding out-param after an in-param!");
+  Params.emplace_back(Param{Name, Kind});
+  ++NumOutParams;
+}
+
+iterator_range<PatFrag::ParamIt> PatFrag::out_params() const {
+  return {Params.begin(), Params.begin() + NumOutParams};
+}
+
+unsigned PatFrag::num_roots() const {
+  return count_if(out_params(),
+                  [&](const auto &P) { return P.Kind == PK_Root; });
+}
+
+unsigned PatFrag::getParamIdx(StringRef Name) const {
+  for (const auto &[Idx, Op] : enumerate(Params)) {
+    if (Op.Name == Name)
+      return Idx;
+  }
+
+  return -1;
+}
+
+bool PatFrag::checkSemantics() {
+  for (const auto &Alt : Alts) {
+    for (const auto &Pat : Alt.Pats) {
+      switch (Pat->getKind()) {
+      case Pattern::K_AnyOpcode:
+        PrintError("wip_match_opcode cannot be used in " + ClassName);
+        return false;
+      case Pattern::K_Builtin:
+        PrintError("Builtin instructions cannot be used in " + ClassName);
+        return false;
+      case Pattern::K_CXX:
+        continue;
+      case Pattern::K_CodeGenInstruction:
+        if (cast<CodeGenInstructionPattern>(Pat.get())->diagnoseAllSpecialTypes(
+                Def.getLoc(), PatternType::SpecialTyClassName +
+                                  " is not supported in " + ClassName))
+          return false;
+        continue;
+      case Pattern::K_PatFrag:
+        // TODO: It's just that the emitter doesn't handle it but technically
+        // there is no reason why we can't. We just have to be careful with
+        // operand mappings, it could get complex.
+        PrintError("nested " + ClassName + " are not supported");
+        return false;
+      }
+    }
+  }
+
+  StringSet<> SeenOps;
+  for (const auto &Op : in_params()) {
+    if (SeenOps.count(Op.Name)) {
+      PrintError("duplicate parameter '" + Op.Name + "'");
+      return false;
+    }
+
+    // Check this operand is NOT defined in any alternative's patterns.
+    for (const auto &Alt : Alts) {
+      if (Alt.OpTable.lookup(Op.Name).Def) {
+        PrintError("input parameter '" + Op.Name + "' cannot be redefined!");
+        return false;
+      }
+    }
+
+    if (Op.Kind == PK_Root) {
+      PrintError("input parameterr '" + Op.Name + "' cannot be a root!");
+      return false;
+    }
+
+    SeenOps.insert(Op.Name);
+  }
+
+  for (const auto &Op : out_params()) {
+    if (Op.Kind != PK_Root && Op.Kind != PK_MachineOperand) {
+      PrintError("output parameter '" + Op.Name +
+                 "' must be 'root' or 'gi_mo'");
+      return false;
+    }
+
+    if (SeenOps.count(Op.Name)) {
+      PrintError("duplicate parameter '" + Op.Name + "'");
+      return false;
+    }
+
+    // Check this operand is defined in all alternative's patterns.
+    for (const auto &Alt : Alts) {
+      const auto *OpDef = Alt.OpTable.getDef(Op.Name);
+      if (!OpDef) {
+        PrintError("output parameter '" + Op.Name +
+                   "' must be defined by all alternative patterns in '" +
+                   Def.getName() + "'");
+        return false;
+      }
+
+      if (Op.Kind == PK_Root && OpDef->getNumInstDefs() != 1) {
+        // The instruction that defines the root must have a single def.
+        // Otherwise we'd need to support multiple roots and it gets messy.
+        //
+        // e.g. this is not supported:
+        //   (pattern (G_UNMERGE_VALUES $x, $root, $vec))
+        PrintError("all instructions that define root '" + Op.Name + "' in '" +
+                   Def.getName() + "' can only have a single output operand");
+        return false;
+      }
+    }
+
+    SeenOps.insert(Op.Name);
+  }
+
+  if (num_out_params() != 0 && num_roots() == 0) {
+    PrintError(ClassName + " must have one root in its 'out' operands");
+    return false;
+  }
+
+  if (num_roots() > 1) {
+    PrintError(ClassName + " can only have one root");
+    return false;
+  }
+
+  // TODO: find unused params
+
+  const auto CheckTypeOf = [&](const PatternType &) -> bool {
+    llvm_unreachable("GITypeOf should have been rejected earlier!");
+  };
+
+  // Now, typecheck all alternatives.
+  for (auto &Alt : Alts) {
+    OperandTypeChecker OTC(Def.getLoc());
+    for (auto &Pat : Alt.Pats) {
+      if (auto *IP = dyn_cast<InstructionPattern>(Pat.get())) {
+        if (!OTC.check(*IP, CheckTypeOf))
+          return false;
+      }
+    }
+    OTC.propagateTypes();
+  }
+
+  return true;
+}
+
+bool PatFrag::handleUnboundInParam(StringRef ParamName, StringRef ArgName,
+                                   ArrayRef<SMLoc> DiagLoc) const {
+  // The parameter must be a live-in of all alternatives for this to work.
+  // Otherwise, we risk having unbound parameters being used (= crashes).
+  //
+  // Examples:
+  //
+  // in (ins $y), (patterns (G_FNEG $dst, $y), "return matchFnegOp(${y})")
+  //    even if $y is unbound, we'll lazily bind it when emitting the G_FNEG.
+  //
+  // in (ins $y), (patterns "return matchFnegOp(${y})")
+  //    if $y is unbound when this fragment is emitted, C++ code expansion will
+  //    fail.
+  for (const auto &Alt : Alts) {
+    auto &OT = Alt.OpTable;
+    if (!OT.lookup(ParamName).Found) {
+      llvm::PrintError(DiagLoc, "operand '" + ArgName + "' (for parameter '" +
+                                ParamName + "' of '" + getName() +
+                                "') cannot be unbound");
+      PrintNote(
+          DiagLoc,
+          "one or more alternatives of '" + getName() + "' do not bind '" +
+              ParamName +
+              "' to an instruction operand; either use a bound operand or "
+              "ensure '" +
+              Def.getName() + "' binds '" + ParamName +
+              "' in all alternatives");
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool PatFrag::buildOperandsTables() {
+  // enumerate(...) doesn't seem to allow lvalues so we need to count the old
+  // way.
+  unsigned Idx = 0;
+
+  const auto DiagnoseRedef = [this, &Idx](StringRef OpName) {
+    PrintError("Operand '" + OpName +
+               "' is defined multiple times in patterns of alternative #" +
+               std::to_string(Idx));
+  };
+
+  for (auto &Alt : Alts) {
+    for (auto &Pat : Alt.Pats) {
+      auto *IP = dyn_cast<InstructionPattern>(Pat.get());
+      if (!IP)
+        continue;
+
+      if (!Alt.OpTable.addPattern(IP, DiagnoseRedef))
+        return false;
+    }
+
+    ++Idx;
+  }
+
+  return true;
+}
+
+void PatFrag::print(raw_ostream &OS, StringRef Indent) const {
+  OS << Indent << "(PatFrag name:" << getName() << '\n';
+  if (!in_params().empty()) {
+    OS << Indent << "  (ins ";
+    printParamsList(OS, in_params());
+    OS << ")\n";
+  }
+
+  if (!out_params().empty()) {
+    OS << Indent << "  (outs ";
+    printParamsList(OS, out_params());
+    OS << ")\n";
+  }
+
+  // TODO: Dump OperandTable as well.
+  OS << Indent << "  (alternatives [\n";
+  for (const auto &Alt : Alts) {
+    OS << Indent << "    [\n";
+    for (const auto &Pat : Alt.Pats) {
+      OS << Indent << "      ";
+      Pat->print(OS, /*PrintName=*/true);
+      OS << ",\n";
+    }
+    OS << Indent << "    ],\n";
+  }
+  OS << Indent << "  ])\n";
+
+  OS << Indent << ')';
+}
+
+void PatFrag::dump() const { print(dbgs()); }
+
+void PatFrag::printParamsList(raw_ostream &OS, iterator_range<ParamIt> Params) {
+  OS << '['
+     << join(map_range(Params,
+                       [](auto &O) {
+                         return (O.Name + ":" + getParamKindStr(O.Kind)).str();
+                       }),
+             ", ")
+     << ']';
+}
+
+void PatFrag::PrintError(Twine Msg) const { llvm::PrintError(&Def, Msg); }
+
+ArrayRef<InstructionOperand> PatFragPattern::getApplyDefsNeeded() const {
+  assert(PF.num_roots() == 1);
+  // Only roots need to be redef.
+  for (auto [Idx, Param] : enumerate(PF.out_params())) {
+    if (Param.Kind == PatFrag::PK_Root)
+      return getOperand(Idx);
+  }
+  llvm_unreachable("root not found!");
+}
+
+//===- PatFragPattern -----------------------------------------------------===//
+
+bool PatFragPattern::checkSemantics(ArrayRef<SMLoc> DiagLoc) {
+  if (!InstructionPattern::checkSemantics(DiagLoc))
+    return false;
+
+  for (const auto &[Idx, Op] : enumerate(Operands)) {
+    switch (PF.getParam(Idx).Kind) {
+    case PatFrag::PK_Imm:
+      if (!Op.hasImmValue()) {
+        PrintError(DiagLoc, "expected operand " + std::to_string(Idx) +
+                                " of '" + getInstName() +
+                                "' to be an immediate; got " + Op.describe());
+        return false;
+      }
+      if (Op.isNamedImmediate()) {
+        PrintError(DiagLoc, "operand " + std::to_string(Idx) + " of '" +
+                                getInstName() +
+                                "' cannot be a named immediate");
+        return false;
+      }
+      break;
+    case PatFrag::PK_Root:
+    case PatFrag::PK_MachineOperand:
+      if (!Op.isNamedOperand() || Op.isNamedImmediate()) {
+        PrintError(DiagLoc, "expected operand " + std::to_string(Idx) +
+                                " of '" + getInstName() +
+                                "' to be a MachineOperand; got " +
+                                Op.describe());
+        return false;
+      }
+      break;
+    }
+  }
+
+  return true;
+}
+
+bool PatFragPattern::mapInputCodeExpansions(const CodeExpansions &ParentCEs,
+                                            CodeExpansions &PatFragCEs,
+                                            ArrayRef<SMLoc> DiagLoc) const {
+  for (const auto &[Idx, Op] : enumerate(operands())) {
+    StringRef ParamName = PF.getParam(Idx).Name;
+
+    // Operands to a PFP can only be named, or be an immediate, but not a named
+    // immediate.
+    assert(!Op.isNamedImmediate());
+
+    if (Op.isNamedOperand()) {
+      StringRef ArgName = Op.getOperandName();
+      // Map it only if it's been defined.
+      auto It = ParentCEs.find(ArgName);
+      if (It == ParentCEs.end()) {
+        if (!PF.handleUnboundInParam(ParamName, ArgName, DiagLoc))
+          return false;
+      } else
+        PatFragCEs.declare(ParamName, It->second);
+      continue;
+    }
+
+    if (Op.hasImmValue()) {
+      PatFragCEs.declare(ParamName, std::to_string(Op.getImmValue()));
+      continue;
+    }
+
+    llvm_unreachable("Unknown Operand Type!");
+  }
+
+  return true;
+}
+
+//===- BuiltinPattern -----------------------------------------------------===//
+
+BuiltinPattern::BuiltinInfo BuiltinPattern::getBuiltinInfo(const Record &Def) {
+  assert(Def.isSubClassOf(ClassName));
+
+  StringRef Name = Def.getName();
+  for (const auto &KBI : KnownBuiltins) {
+    if (KBI.DefName == Name)
+      return KBI;
+  }
+
+  PrintFatalError(Def.getLoc(), "Unimplemented " + ClassName +
+                                    " def '" + Name + "'");
+}
+
+bool BuiltinPattern::checkSemantics(ArrayRef<SMLoc> Loc) {
+  if (!InstructionPattern::checkSemantics(Loc))
+    return false;
+
+  // For now all builtins just take names, no immediates.
+  for (const auto &[Idx, Op] : enumerate(operands())) {
+    if (!Op.isNamedOperand() || Op.isNamedImmediate()) {
+      PrintError(Loc, "expected operand " + std::to_string(Idx) + " of '" +
+                          getInstName() + "' to be a name");
+      return false;
+    }
+  }
+
+  return true;
+}
+
+} // namespace gi
+} // namespace llvm
diff --git a/llvm/utils/TableGen/GlobalISel/Patterns.h b/llvm/utils/TableGen/GlobalISel/Patterns.h
new file mode 100644
index 000000000000000..b3160552a21fef3
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/Patterns.h
@@ -0,0 +1,690 @@
+//===- Patterns.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Contains the Pattern hierarchy alongside helper classes such as
+/// PatFrag, MIFlagsInfo, PatternType, etc.
+///
+/// These classes are used by the GlobalISel Combiner backend to help parse,
+/// process and emit MIR patterns.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_GLOBALISEL_PATTERNS_H
+#define LLVM_UTILS_GLOBALISEL_PATTERNS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include <memory>
+#include <optional>
+#include <string>
+
+namespace llvm {
+
+class Record;
+class SMLoc;
+class StringInit;
+class CodeExpansions;
+class CodeGenInstruction;
+
+namespace gi {
+
+class CXXPredicateCode;
+class LLTCodeGen;
+class LLTCodeGenOrTempType;
+class RuleMatcher;
+
+//===- PatternType --------------------------------------------------------===//
+
+/// Represent the type of a Pattern Operand.
+///
+/// Types have two form:
+///   - LLTs, which are straightforward.
+///   - Special types, e.g. GITypeOf
+class PatternType {
+public:
+  static constexpr StringLiteral SpecialTyClassName = "GISpecialType";
+  static constexpr StringLiteral TypeOfClassName = "GITypeOf";
+
+  enum PTKind : uint8_t {
+    PT_None,
+
+    PT_ValueType,
+    PT_TypeOf,
+  };
+
+  PatternType() : Kind(PT_None), Data() {}
+
+  static std::optional<PatternType> get(ArrayRef<SMLoc> DiagLoc,
+                                        const Record *R, Twine DiagCtx);
+  static PatternType getTypeOf(StringRef OpName);
+
+  bool isNone() const { return Kind == PT_None; }
+  bool isLLT() const { return Kind == PT_ValueType; }
+  bool isSpecial() const { return isTypeOf(); }
+  bool isTypeOf() const { return Kind == PT_TypeOf; }
+
+  StringRef getTypeOfOpName() const;
+  const Record *getLLTRecord() const;
+
+  explicit operator bool() const { return !isNone(); }
+
+  bool operator==(const PatternType &Other) const;
+  bool operator!=(const PatternType &Other) const { return !operator==(Other); }
+
+  std::string str() const;
+
+private:
+  PatternType(PTKind Kind) : Kind(Kind), Data() {}
+
+  PTKind Kind;
+  union DataT {
+    DataT() : Str() {}
+
+    /// PT_ValueType -> ValueType Def.
+    const Record *Def;
+
+    /// PT_TypeOf -> Operand name (without the '$')
+    StringRef Str;
+  } Data;
+};
+
+//===- Pattern Base Class -------------------------------------------------===//
+
+/// Base class for all patterns that can be written in an `apply`, `match` or
+/// `pattern` DAG operator.
+///
+/// For example:
+///
+///     (apply (G_ZEXT $x, $y), (G_ZEXT $y, $z), "return isFoo(${z})")
+///
+/// Creates 3 Pattern objects:
+///   - Two CodeGenInstruction Patterns
+///   - A CXXPattern
+class Pattern {
+public:
+  enum {
+    K_AnyOpcode,
+    K_CXX,
+
+    K_CodeGenInstruction,
+    K_PatFrag,
+    K_Builtin,
+  };
+
+  virtual ~Pattern() = default;
+
+  unsigned getKind() const { return Kind; }
+  const char *getKindName() const;
+
+  bool hasName() const { return !Name.empty(); }
+  StringRef getName() const { return Name; }
+
+  virtual void print(raw_ostream &OS, bool PrintName = true) const = 0;
+  void dump() const;
+
+protected:
+  Pattern(unsigned Kind, StringRef Name) : Kind(Kind), Name(Name) {
+    assert(!Name.empty() && "unnamed pattern!");
+  }
+
+  void printImpl(raw_ostream &OS, bool PrintName,
+                 function_ref<void()> ContentPrinter) const;
+
+private:
+  unsigned Kind;
+  StringRef Name;
+};
+
+//===- AnyOpcodePattern ---------------------------------------------------===//
+
+/// `wip_match_opcode` patterns.
+/// This matches one or more opcodes, and does not check any operands
+/// whatsoever.
+///
+/// TODO: Long-term, this needs to be removed. It's a hack around MIR
+///       pattern matching limitations.
+class AnyOpcodePattern : public Pattern {
+public:
+  AnyOpcodePattern(StringRef Name) : Pattern(K_AnyOpcode, Name) {}
+
+  static bool classof(const Pattern *P) { return P->getKind() == K_AnyOpcode; }
+
+  void addOpcode(const CodeGenInstruction *I) { Insts.push_back(I); }
+  const auto &insts() const { return Insts; }
+
+  void print(raw_ostream &OS, bool PrintName = true) const override;
+
+private:
+  SmallVector<const CodeGenInstruction *, 4> Insts;
+};
+
+//===- CXXPattern ---------------------------------------------------------===//
+
+/// Represents raw C++ code which may need some expansions.
+///
+///   e.g. [{ return isFooBux(${src}.getReg()); }]
+///
+/// For the expanded code, \see CXXPredicateCode. CXXPredicateCode objects are
+/// created through `expandCode`.
+///
+/// \see CodeExpander and \see CodeExpansions for more information on code
+/// expansions.
+///
+/// This object has two purposes:
+///   - Represent C++ code as a pattern entry.
+///   - Be a factory for expanded C++ code.
+///     - It's immutable and only holds the raw code so we can expand the same
+///       CXX pattern multiple times if we need to.
+///
+/// Note that the code is always trimmed in the constructor, so leading and
+/// trailing whitespaces are removed. This removes bloat in the output, avoids
+/// formatting issues, but also allows us to check things like
+/// `.startswith("return")` trivially without worrying about spaces.
+class CXXPattern : public Pattern {
+public:
+  CXXPattern(const StringInit &Code, StringRef Name);
+
+  CXXPattern(StringRef Code, StringRef Name)
+      : Pattern(K_CXX, Name), RawCode(Code.trim().str()) {}
+
+  static bool classof(const Pattern *P) { return P->getKind() == K_CXX; }
+
+  void setIsApply(bool Value = true) { IsApply = Value; }
+  StringRef getRawCode() const { return RawCode; }
+
+  /// Expands raw code, replacing things such as `${foo}` with their
+  /// substitution in \p CE.
+  ///
+  /// \param CE     Map of Code Expansions
+  /// \param Locs   SMLocs for the Code Expander, in case it needs to emit
+  ///               diagnostics.
+  /// \param AddComment Optionally called to emit a comment before the expanded
+  ///                   code.
+  ///
+  /// \return A CXXPredicateCode object that contains the expanded code. Note
+  /// that this may or may not insert a new object. All CXXPredicateCode objects
+  /// are held in a set to avoid emitting duplicate C++ code.
+  const CXXPredicateCode &
+  expandCode(const CodeExpansions &CE, ArrayRef<SMLoc> Locs,
+             function_ref<void(raw_ostream &)> AddComment = {}) const;
+
+  void print(raw_ostream &OS, bool PrintName = true) const override;
+
+private:
+  bool IsApply = false;
+  std::string RawCode;
+};
+
+//===- InstructionPattern ---------------------------------------------===//
+
+/// An operand for an InstructionPattern.
+///
+/// Operands are composed of three elements:
+///   - (Optional) Value
+///   - (Optional) Name
+///   - (Optional) Type
+///
+/// Some examples:
+///   (i32 0):$x -> V=int(0), Name='x', Type=i32
+///   0:$x -> V=int(0), Name='x'
+///   $x -> Name='x'
+///   i32:$x -> Name='x', Type = i32
+class InstructionOperand {
+public:
+  using IntImmTy = int64_t;
+
+  InstructionOperand(IntImmTy Imm, StringRef Name, PatternType Type)
+      : Value(Imm), Name(Name), Type(Type) {}
+
+  InstructionOperand(StringRef Name, PatternType Type)
+      : Name(Name), Type(Type) {}
+
+  bool isNamedImmediate() const { return hasImmValue() && isNamedOperand(); }
+
+  bool hasImmValue() const { return Value.has_value(); }
+  IntImmTy getImmValue() const { return *Value; }
+
+  bool isNamedOperand() const { return !Name.empty(); }
+  StringRef getOperandName() const {
+    assert(isNamedOperand() && "Operand is unnamed");
+    return Name;
+  }
+
+  InstructionOperand withNewName(StringRef NewName) const {
+    InstructionOperand Result = *this;
+    Result.Name = NewName;
+    return Result;
+  }
+
+  void setIsDef(bool Value = true) { Def = Value; }
+  bool isDef() const { return Def; }
+
+  void setType(PatternType NewType) {
+    assert((!Type || (Type == NewType)) && "Overwriting type!");
+    Type = NewType;
+  }
+  PatternType getType() const { return Type; }
+
+  std::string describe() const;
+  void print(raw_ostream &OS) const;
+
+  void dump() const;
+
+private:
+  std::optional<int64_t> Value;
+  StringRef Name;
+  PatternType Type;
+  bool Def = false;
+};
+
+/// Base class for CodeGenInstructionPattern & PatFragPattern, which handles all
+/// the boilerplate for patterns that have a list of operands for some (pseudo)
+/// instruction.
+class InstructionPattern : public Pattern {
+public:
+  virtual ~InstructionPattern() = default;
+
+  static bool classof(const Pattern *P) {
+    return P->getKind() == K_CodeGenInstruction || P->getKind() == K_PatFrag ||
+           P->getKind() == K_Builtin;
+  }
+
+  template <typename... Ty> void addOperand(Ty &&...Init) {
+    Operands.emplace_back(std::forward<Ty>(Init)...);
+  }
+
+  auto &operands() { return Operands; }
+  const auto &operands() const { return Operands; }
+  unsigned operands_size() const { return Operands.size(); }
+  InstructionOperand &getOperand(unsigned K) { return Operands[K]; }
+  const InstructionOperand &getOperand(unsigned K) const { return Operands[K]; }
+
+  /// When this InstructionPattern is used as the match root, returns the
+  /// operands that must be redefined in the 'apply' pattern for the rule to be
+  /// valid.
+  ///
+  /// For most patterns, this just returns the defs.
+  /// For PatFrag this only returns the root of the PF.
+  ///
+  /// Returns an empty array on error.
+  virtual ArrayRef<InstructionOperand> getApplyDefsNeeded() const {
+    return {operands().begin(), getNumInstDefs()};
+  }
+
+  auto named_operands() {
+    return make_filter_range(Operands,
+                             [&](auto &O) { return O.isNamedOperand(); });
+  }
+
+  auto named_operands() const {
+    return make_filter_range(Operands,
+                             [&](auto &O) { return O.isNamedOperand(); });
+  }
+
+  virtual bool isVariadic() const { return false; }
+  virtual unsigned getNumInstOperands() const = 0;
+  virtual unsigned getNumInstDefs() const = 0;
+
+  bool hasAllDefs() const { return operands_size() >= getNumInstDefs(); }
+
+  virtual StringRef getInstName() const = 0;
+
+  /// Diagnoses all uses of special types in this Pattern and returns true if at
+  /// least one diagnostic was emitted.
+  bool diagnoseAllSpecialTypes(ArrayRef<SMLoc> Loc, Twine Msg) const;
+
+  void reportUnreachable(ArrayRef<SMLoc> Locs) const;
+  virtual bool checkSemantics(ArrayRef<SMLoc> Loc);
+
+  void print(raw_ostream &OS, bool PrintName = true) const override;
+
+protected:
+  InstructionPattern(unsigned K, StringRef Name) : Pattern(K, Name) {}
+
+  virtual void printExtras(raw_ostream &OS) const {}
+
+  SmallVector<InstructionOperand, 4> Operands;
+};
+
+//===- OperandTable -------------------------------------------------------===//
+
+/// Maps InstructionPattern operands to their definitions. This allows us to tie
+/// different patterns of a (apply), (match) or (patterns) set of patterns
+/// together.
+class OperandTable {
+public:
+  bool addPattern(InstructionPattern *P,
+                  function_ref<void(StringRef)> DiagnoseRedef);
+
+  struct LookupResult {
+    LookupResult() = default;
+    LookupResult(InstructionPattern *Def) : Found(true), Def(Def) {}
+
+    bool Found = false;
+    InstructionPattern *Def = nullptr;
+
+    bool isLiveIn() const { return Found && !Def; }
+  };
+
+  LookupResult lookup(StringRef OpName) const {
+    if (auto It = Table.find(OpName); It != Table.end())
+      return LookupResult(It->second);
+    return LookupResult();
+  }
+
+  InstructionPattern *getDef(StringRef OpName) const {
+    return lookup(OpName).Def;
+  }
+
+  void print(raw_ostream &OS, StringRef Name = "", StringRef Indent = "") const;
+
+  auto begin() const { return Table.begin(); }
+  auto end() const { return Table.end(); }
+
+  void dump() const;
+
+private:
+  StringMap<InstructionPattern *> Table;
+};
+
+//===- CodeGenInstructionPattern ------------------------------------------===//
+
+/// Helper class to contain data associated with a MIFlags operand.
+class MIFlagsInfo {
+public:
+  void addSetFlag(const Record *R);
+  void addUnsetFlag(const Record *R);
+  void addCopyFlag(StringRef InstName);
+
+  const auto &set_flags() const { return SetF; }
+  const auto &unset_flags() const { return UnsetF; }
+  const auto &copy_flags() const { return CopyF; }
+
+private:
+  SetVector<StringRef> SetF, UnsetF, CopyF;
+};
+
+/// Matches an instruction, e.g. `G_ADD $x, $y, $z`.
+class CodeGenInstructionPattern : public InstructionPattern {
+public:
+  CodeGenInstructionPattern(const CodeGenInstruction &I, StringRef Name)
+      : InstructionPattern(K_CodeGenInstruction, Name), I(I) {}
+
+  static bool classof(const Pattern *P) {
+    return P->getKind() == K_CodeGenInstruction;
+  }
+
+  bool is(StringRef OpcodeName) const;
+
+  bool hasVariadicDefs() const;
+  bool isVariadic() const override;
+  unsigned getNumInstDefs() const override;
+  unsigned getNumInstOperands() const override;
+
+  MIFlagsInfo &getOrCreateMIFlagsInfo();
+  const MIFlagsInfo *getMIFlagsInfo() const { return FI.get(); }
+
+  const CodeGenInstruction &getInst() const { return I; }
+  StringRef getInstName() const override;
+
+private:
+  void printExtras(raw_ostream &OS) const override;
+
+  const CodeGenInstruction &I;
+  std::unique_ptr<MIFlagsInfo> FI;
+};
+
+//===- OperandTypeChecker -------------------------------------------------===//
+
+/// This is a trivial type checker for all operands in a set of
+/// InstructionPatterns.
+///
+/// It infers the type of each operand, check it's consistent with the known
+/// type of the operand, and then sets all of the types in all operands in
+/// propagateTypes.
+///
+/// It also handles verifying correctness of special types.
+class OperandTypeChecker {
+public:
+  OperandTypeChecker(ArrayRef<SMLoc> DiagLoc) : DiagLoc(DiagLoc) {}
+
+  /// Step 1: Check each pattern one by one. All patterns that pass through here
+  /// are added to a common worklist so propagateTypes can access them.
+  bool check(InstructionPattern &P,
+             std::function<bool(const PatternType &)> VerifyTypeOfOperand);
+
+  /// Step 2: Propagate all types. e.g. if one use of "$a" has type i32, make
+  /// all uses of "$a" have type i32.
+  void propagateTypes();
+
+protected:
+  ArrayRef<SMLoc> DiagLoc;
+
+private:
+  using InconsistentTypeDiagFn = std::function<void()>;
+
+  void PrintSeenWithTypeIn(InstructionPattern &P, StringRef OpName,
+                           PatternType Ty) const;
+
+  struct OpTypeInfo {
+    PatternType Type;
+    InconsistentTypeDiagFn PrintTypeSrcNote = []() {};
+  };
+
+  StringMap<OpTypeInfo> Types;
+
+  SmallVector<InstructionPattern *, 16> Pats;
+};
+
+//===- PatFrag ------------------------------------------------------------===//
+
+/// Represents a parsed GICombinePatFrag. This can be thought of as the
+/// equivalent of a CodeGenInstruction, but for PatFragPatterns.
+///
+/// PatFrags are made of 3 things:
+///   - Out parameters (defs)
+///   - In parameters
+///   - A set of pattern lists (alternatives).
+///
+/// If the PatFrag uses instruction patterns, the root must be one of the defs.
+///
+/// Note that this DOES NOT represent the use of the PatFrag, only its
+/// definition. The use of the PatFrag in a Pattern is represented by
+/// PatFragPattern.
+///
+/// PatFrags use the term "parameter" instead of operand because they're
+/// essentially macros, and using that name avoids confusion. Other than that,
+/// they're structured similarly to a MachineInstruction  - all parameters
+/// (operands) are in the same list, with defs at the start. This helps mapping
+/// parameters to values, because, param N of a PatFrag is always operand N of a
+/// PatFragPattern.
+class PatFrag {
+public:
+  static constexpr StringLiteral ClassName = "GICombinePatFrag";
+
+  enum ParamKind {
+    PK_Root,
+    PK_MachineOperand,
+    PK_Imm,
+  };
+
+  struct Param {
+    StringRef Name;
+    ParamKind Kind;
+  };
+
+  using ParamVec = SmallVector<Param, 4>;
+  using ParamIt = ParamVec::const_iterator;
+
+  /// Represents an alternative of the PatFrag. When parsing a GICombinePatFrag,
+  /// this is created from its "Alternatives" list. Each alternative is a list
+  /// of patterns written wrapped in a  `(pattern ...)` dag init.
+  ///
+  /// Each argument to the `pattern` DAG operator is parsed into a Pattern
+  /// instance.
+  struct Alternative {
+    OperandTable OpTable;
+    SmallVector<std::unique_ptr<Pattern>, 4> Pats;
+  };
+
+  explicit PatFrag(const Record &Def);
+
+  static StringRef getParamKindStr(ParamKind OK);
+
+  StringRef getName() const;
+
+  const Record &getDef() const { return Def; }
+  ArrayRef<SMLoc> getLoc() const;
+
+  Alternative &addAlternative() { return Alts.emplace_back(); }
+  const Alternative &getAlternative(unsigned K) const { return Alts[K]; }
+  unsigned num_alternatives() const { return Alts.size(); }
+
+  void addInParam(StringRef Name, ParamKind Kind);
+  iterator_range<ParamIt> in_params() const;
+  unsigned num_in_params() const { return Params.size() - NumOutParams; }
+
+  void addOutParam(StringRef Name, ParamKind Kind);
+  iterator_range<ParamIt> out_params() const;
+  unsigned num_out_params() const { return NumOutParams; }
+
+  unsigned num_roots() const;
+  unsigned num_params() const { return num_in_params() + num_out_params(); }
+
+  /// Finds the operand \p Name and returns its index or -1 if not found.
+  /// Remember that all params are part of the same list, with out params at the
+  /// start. This means that the index returned can be used to access operands
+  /// of InstructionPatterns.
+  unsigned getParamIdx(StringRef Name) const;
+  const Param &getParam(unsigned K) const { return Params[K]; }
+
+  bool canBeMatchRoot() const { return num_roots() == 1; }
+
+  void print(raw_ostream &OS, StringRef Indent = "") const;
+  void dump() const;
+
+  /// Checks if the in-param \p ParamName can be unbound or not.
+  /// \p ArgName is the name of the argument passed to the PatFrag.
+  ///
+  /// An argument can be unbound only if, for all alternatives:
+  ///   - There is no CXX pattern, OR:
+  ///   - There is an InstructionPattern that binds the parameter.
+  ///
+  /// e.g. in (MyPatFrag $foo), if $foo has never been seen before (= it's
+  /// unbound), this checks if MyPatFrag supports it or not.
+  bool handleUnboundInParam(StringRef ParamName, StringRef ArgName,
+                            ArrayRef<SMLoc> DiagLoc) const;
+
+  bool checkSemantics();
+  bool buildOperandsTables();
+
+private:
+  static void printParamsList(raw_ostream &OS, iterator_range<ParamIt> Params);
+
+  void PrintError(Twine Msg) const;
+
+  const Record &Def;
+  unsigned NumOutParams = 0;
+  ParamVec Params;
+  SmallVector<Alternative, 2> Alts;
+};
+
+//===- PatFragPattern -----------------------------------------------------===//
+
+/// Represents a use of a GICombinePatFrag.
+class PatFragPattern : public InstructionPattern {
+public:
+  PatFragPattern(const PatFrag &PF, StringRef Name)
+      : InstructionPattern(K_PatFrag, Name), PF(PF) {}
+
+  static bool classof(const Pattern *P) { return P->getKind() == K_PatFrag; }
+
+  const PatFrag &getPatFrag() const { return PF; }
+  StringRef getInstName() const override { return PF.getName(); }
+
+  unsigned getNumInstDefs() const override { return PF.num_out_params(); }
+  unsigned getNumInstOperands() const override { return PF.num_params(); }
+
+  ArrayRef<InstructionOperand> getApplyDefsNeeded() const override;
+
+  bool checkSemantics(ArrayRef<SMLoc> DiagLoc) override;
+
+  /// Before emitting the patterns inside the PatFrag, add all necessary code
+  /// expansions to \p PatFragCEs imported from \p ParentCEs.
+  ///
+  /// For a MachineOperand PatFrag parameter, this will fetch the expansion for
+  /// that operand from \p ParentCEs and add it to \p PatFragCEs. Errors can be
+  /// emitted if the MachineOperand reference is unbound.
+  ///
+  /// For an Immediate PatFrag parameter this simply adds the integer value to
+  /// \p PatFragCEs as an expansion.
+  ///
+  /// \param ParentCEs Contains all of the code expansions declared by the other
+  ///                  patterns emitted so far in the pattern list containing
+  ///                  this PatFragPattern.
+  /// \param PatFragCEs Output Code Expansions (usually empty)
+  /// \param DiagLoc    Diagnostic loc in case an error occurs.
+  /// \return `true` on success, `false` on failure.
+  bool mapInputCodeExpansions(const CodeExpansions &ParentCEs,
+                              CodeExpansions &PatFragCEs,
+                              ArrayRef<SMLoc> DiagLoc) const;
+
+private:
+  const PatFrag &PF;
+};
+
+//===- BuiltinPattern -----------------------------------------------------===//
+
+/// Represents builtin instructions such as "GIReplaceReg" and "GIEraseRoot".
+enum BuiltinKind {
+  BI_ReplaceReg,
+  BI_EraseRoot,
+};
+
+class BuiltinPattern : public InstructionPattern {
+  struct BuiltinInfo {
+    StringLiteral DefName;
+    BuiltinKind Kind;
+    unsigned NumOps;
+    unsigned NumDefs;
+  };
+
+  static constexpr std::array<BuiltinInfo, 2> KnownBuiltins = {{
+      {"GIReplaceReg", BI_ReplaceReg, 2, 1},
+      {"GIEraseRoot", BI_EraseRoot, 0, 0},
+  }};
+
+public:
+  static constexpr StringLiteral ClassName = "GIBuiltinInst";
+
+  BuiltinPattern(const Record &Def, StringRef Name)
+      : InstructionPattern(K_Builtin, Name), I(getBuiltinInfo(Def)) {}
+
+  static bool classof(const Pattern *P) { return P->getKind() == K_Builtin; }
+
+  unsigned getNumInstOperands() const override { return I.NumOps; }
+  unsigned getNumInstDefs() const override { return I.NumDefs; }
+  StringRef getInstName() const override { return I.DefName; }
+  BuiltinKind getBuiltinKind() const { return I.Kind; }
+
+  bool checkSemantics(ArrayRef<SMLoc> Loc) override;
+
+private:
+  static BuiltinInfo getBuiltinInfo(const Record &Def);
+
+  BuiltinInfo I;
+};
+
+} // namespace gi
+} // end namespace llvm
+
+#endif // ifndef LLVM_UTILS_GLOBALISEL_PATTERNS_H
diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
index 071586240e8fb5a..89aca87a28ec0dc 100644
--- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
@@ -28,9 +28,12 @@
 
 #include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
+#include "GlobalISel/CXXPredicates.h"
 #include "GlobalISel/CodeExpander.h"
 #include "GlobalISel/CodeExpansions.h"
 #include "GlobalISel/CombinerUtils.h"
+#include "GlobalISel/MatchDataInfo.h"
+#include "GlobalISel/Patterns.h"
 #include "GlobalISelMatchTable.h"
 #include "GlobalISelMatchTableExecutorEmitter.h"
 #include "SubtargetFeatureInfo.h"
@@ -76,1618 +79,74 @@ cl::opt<bool> DebugTypeInfer("gicombiner-debug-typeinfer",
 
 constexpr StringLiteral CXXApplyPrefix = "GICXXCustomAction_CombineApply";
 constexpr StringLiteral CXXPredPrefix = "GICXXPred_MI_Predicate_";
-constexpr StringLiteral PatFragClassName = "GICombinePatFrag";
-constexpr StringLiteral BuiltinInstClassName = "GIBuiltinInst";
-constexpr StringLiteral SpecialTyClassName = "GISpecialType";
-constexpr StringLiteral TypeOfClassName = "GITypeOf";
 constexpr StringLiteral MIFlagsEnumClassName = "MIFlagEnum";
 
-std::string getIsEnabledPredicateEnumName(unsigned CombinerRuleID) {
-  return "GICXXPred_Simple_IsRule" + to_string(CombinerRuleID) + "Enabled";
-}
-
-/// Copies a StringRef into a static pool to make sure it has a static lifetime.
-StringRef insertStrRef(StringRef S) {
-  if (S.empty())
-    return {};
-
-  static StringSet<> Pool;
-  auto [It, Inserted] = Pool.insert(S);
-  return It->getKey();
-}
-
-void declareInstExpansion(CodeExpansions &CE, const InstructionMatcher &IM,
-                          StringRef Name) {
-  CE.declare(Name, "State.MIs[" + to_string(IM.getInsnVarID()) + "]");
-}
-
-void declareInstExpansion(CodeExpansions &CE, const BuildMIAction &A,
-                          StringRef Name) {
-  // Note: we use redeclare here because this may overwrite a matcher inst
-  // expansion.
-  CE.redeclare(Name, "OutMIs[" + to_string(A.getInsnID()) + "]");
-}
-
-void declareOperandExpansion(CodeExpansions &CE, const OperandMatcher &OM,
-                             StringRef Name) {
-  CE.declare(Name, "State.MIs[" + to_string(OM.getInsnVarID()) +
-                       "]->getOperand(" + to_string(OM.getOpIdx()) + ")");
-}
-
-void declareTempRegExpansion(CodeExpansions &CE, unsigned TempRegID,
-                             StringRef Name) {
-  CE.declare(Name, "State.TempRegisters[" + to_string(TempRegID) + "]");
-}
-
-std::string makeAnonPatName(StringRef Prefix, unsigned Idx) {
-  return ("__" + Prefix + "_" + Twine(Idx)).str();
-}
-
-template <typename Container> auto keys(Container &&C) {
-  return map_range(C, [](auto &Entry) -> auto & { return Entry.first; });
-}
-
-template <typename Container> auto values(Container &&C) {
-  return map_range(C, [](auto &Entry) -> auto & { return Entry.second; });
-}
-
-//===- MatchData Handling -------------------------------------------------===//
-
-/// Represents MatchData defined by the match stage and required by the apply
-/// stage.
-///
-/// This allows the plumbing of arbitrary data from C++ predicates between the
-/// stages.
-///
-/// When this class is initially created, it only has a pattern symbol and a
-/// type. When all of the MatchDatas declarations of a given pattern have been
-/// parsed, `AssignVariables` must be called to assign storage variable names to
-/// each MatchDataInfo.
-class MatchDataInfo {
-  StringRef PatternSymbol;
-  StringRef Type;
-  std::string VarName;
-
-public:
-  static constexpr StringLiteral StructTypeName = "MatchInfosTy";
-  static constexpr StringLiteral StructName = "MatchInfos";
-
-  MatchDataInfo(StringRef PatternSymbol, StringRef Type)
-      : PatternSymbol(PatternSymbol), Type(Type.trim()) {}
-
-  StringRef getPatternSymbol() const { return PatternSymbol; };
-  StringRef getType() const { return Type; };
-
-  bool hasVariableName() const { return !VarName.empty(); }
-  void setVariableName(StringRef Name) { VarName = Name; }
-  StringRef getVariableName() const;
-
-  std::string getQualifiedVariableName() const {
-    return StructName.str() + "." + getVariableName().str();
-  }
-
-  void print(raw_ostream &OS) const;
-  void dump() const { print(dbgs()); }
-};
-
-StringRef MatchDataInfo::getVariableName() const {
-  assert(hasVariableName());
-  return VarName;
-}
-
-void MatchDataInfo::print(raw_ostream &OS) const {
-  OS << "(MatchDataInfo pattern_symbol:" << PatternSymbol << " type:'" << Type
-     << "' var_name:" << (VarName.empty() ? "<unassigned>" : VarName) << ")";
-}
-
-/// Pool of type -> variables used to emit MatchData variables declarations.
-///
-/// e.g. if the map contains "int64_t" -> ["MD0", "MD1"], then two variable
-/// declarations must be emitted: `int64_t MD0` and `int64_t MD1`.
-///
-/// This has a static lifetime and will outlive all the `MatchDataInfo` objects
-/// by design. It needs to persist after all `CombineRuleBuilder` objects died
-/// so we can emit the variable declarations.
-StringMap<std::vector<std::string>> AllMatchDataVars;
-
-// Assign variable names to all MatchDatas used by a pattern. This must be
-// called after all MatchData decls have been parsed inside a rule.
-//
-// Requires an array of MatchDataInfo so we can handle cases where a pattern
-// uses multiple instances of the same MatchData type.
-void AssignMatchDataVariables(MutableArrayRef<MatchDataInfo> Infos) {
-  static unsigned NextVarID = 0;
-
-  StringMap<unsigned> SeenTypes;
-  for (auto &Info : Infos) {
-    unsigned &NumSeen = SeenTypes[Info.getType()];
-    auto &ExistingVars = AllMatchDataVars[Info.getType()];
-
-    if (NumSeen == ExistingVars.size())
-      ExistingVars.push_back("MDInfo" + to_string(NextVarID++));
-
-    Info.setVariableName(ExistingVars[NumSeen++]);
-  }
-}
-
-//===- C++ Predicates Handling --------------------------------------------===//
-
-/// Entry into the static pool of all CXX Predicate code. This contains
-/// fully expanded C++ code.
-///
-/// The static pool is hidden inside the object and can be accessed through
-/// getAllMatchCode/getAllApplyCode
-///
-/// Note that CXXPattern trims C++ code, so the Code is already expected to be
-/// free of leading/trailing whitespace.
-class CXXPredicateCode {
-  using CXXPredicateCodePool =
-      DenseMap<hash_code, std::unique_ptr<CXXPredicateCode>>;
-  static CXXPredicateCodePool AllCXXMatchCode;
-  static CXXPredicateCodePool AllCXXApplyCode;
-
-  /// Sorts a `CXXPredicateCodePool` by their IDs and returns it.
-  static std::vector<const CXXPredicateCode *>
-  getSorted(const CXXPredicateCodePool &Pool) {
-    std::vector<const CXXPredicateCode *> Out;
-    std::transform(Pool.begin(), Pool.end(), std::back_inserter(Out),
-                   [&](auto &Elt) { return Elt.second.get(); });
-    sort(Out, [](const auto *A, const auto *B) { return A->ID < B->ID; });
-    return Out;
-  }
-
-  /// Gets an instance of `CXXPredicateCode` for \p Code, or returns an already
-  /// existing one.
-  static const CXXPredicateCode &get(CXXPredicateCodePool &Pool,
-                                     std::string Code) {
-    // Check if we already have an identical piece of code, if not, create an
-    // entry in the pool.
-    const auto CodeHash = hash_value(Code);
-    if (auto It = Pool.find(CodeHash); It != Pool.end())
-      return *It->second;
-
-    const auto ID = Pool.size();
-    auto OwnedData = std::unique_ptr<CXXPredicateCode>(
-        new CXXPredicateCode(std::move(Code), ID));
-    const auto &DataRef = *OwnedData;
-    Pool[CodeHash] = std::move(OwnedData);
-    return DataRef;
-  }
-
-  CXXPredicateCode(std::string Code, unsigned ID)
-      : Code(Code), ID(ID), BaseEnumName("GICombiner" + to_string(ID)) {
-    // Don't assert if ErrorsPrinted is set. This may mean CodeExpander failed,
-    // and it may add spaces in such cases.
-    assert((ErrorsPrinted || StringRef(Code).trim() == Code) &&
-           "Code was expected to be trimmed!");
-  }
-
-public:
-  static const CXXPredicateCode &getMatchCode(std::string Code) {
-    return get(AllCXXMatchCode, std::move(Code));
-  }
-
-  static const CXXPredicateCode &getApplyCode(std::string Code) {
-    return get(AllCXXApplyCode, std::move(Code));
-  }
-
-  static std::vector<const CXXPredicateCode *> getAllMatchCode() {
-    return getSorted(AllCXXMatchCode);
-  }
-
-  static std::vector<const CXXPredicateCode *> getAllApplyCode() {
-    return getSorted(AllCXXApplyCode);
-  }
-
-  const std::string Code;
-  const unsigned ID;
-  const std::string BaseEnumName;
-
-  bool needsUnreachable() const {
-    return !StringRef(Code).starts_with("return");
-  }
-
-  std::string getEnumNameWithPrefix(StringRef Prefix) const {
-    return Prefix.str() + BaseEnumName;
-  }
-};
-
-CXXPredicateCode::CXXPredicateCodePool CXXPredicateCode::AllCXXMatchCode;
-CXXPredicateCode::CXXPredicateCodePool CXXPredicateCode::AllCXXApplyCode;
-
-//===- PatternType --------------------------------------------------------===//
-
-/// Represent the type of a Pattern Operand.
-///
-/// Types have two form:
-///   - LLTs, which are straightforward.
-///   - Special types, e.g. GITypeOf
-class PatternType {
-public:
-  enum PTKind : uint8_t {
-    PT_None,
-
-    PT_ValueType,
-    PT_TypeOf,
-  };
-
-  PatternType() : Kind(PT_None), Data() {}
-
-  static std::optional<PatternType> get(ArrayRef<SMLoc> DiagLoc,
-                                        const Record *R, Twine DiagCtx);
-  static PatternType getTypeOf(StringRef OpName);
-
-  bool isNone() const { return Kind == PT_None; }
-  bool isLLT() const { return Kind == PT_ValueType; }
-  bool isSpecial() const { return isTypeOf(); }
-  bool isTypeOf() const { return Kind == PT_TypeOf; }
-
-  StringRef getTypeOfOpName() const;
-  LLTCodeGen getLLTCodeGen() const;
-
-  LLTCodeGenOrTempType getLLTCodeGenOrTempType(RuleMatcher &RM) const;
-
-  explicit operator bool() const { return !isNone(); }
-
-  bool operator==(const PatternType &Other) const;
-  bool operator!=(const PatternType &Other) const { return !operator==(Other); }
-
-  std::string str() const;
-
-private:
-  PatternType(PTKind Kind) : Kind(Kind), Data() {}
-
-  PTKind Kind;
-  union DataT {
-    DataT() : Str() {}
-
-    /// PT_ValueType -> ValueType Def.
-    const Record *Def;
-
-    /// PT_TypeOf -> Operand name (without the '$')
-    StringRef Str;
-  } Data;
-};
-
-std::optional<PatternType> PatternType::get(ArrayRef<SMLoc> DiagLoc,
-                                            const Record *R, Twine DiagCtx) {
-  assert(R);
-  if (R->isSubClassOf("ValueType")) {
-    PatternType PT(PT_ValueType);
-    PT.Data.Def = R;
-    return PT;
-  }
-
-  if (R->isSubClassOf(TypeOfClassName)) {
-    auto RawOpName = R->getValueAsString("OpName");
-    if (!RawOpName.starts_with("$")) {
-      PrintError(DiagLoc, DiagCtx + ": invalid operand name format '" +
-                              RawOpName + "' in " + TypeOfClassName +
-                              ": expected '$' followed by an operand name");
-      return std::nullopt;
-    }
-
-    PatternType PT(PT_TypeOf);
-    PT.Data.Str = RawOpName.drop_front(1);
-    return PT;
-  }
-
-  PrintError(DiagLoc, DiagCtx + ": unknown type '" + R->getName() + "'");
-  return std::nullopt;
-}
-
-PatternType PatternType::getTypeOf(StringRef OpName) {
-  PatternType PT(PT_TypeOf);
-  PT.Data.Str = OpName;
-  return PT;
-}
-
-StringRef PatternType::getTypeOfOpName() const {
-  assert(isTypeOf());
-  return Data.Str;
-}
-
-LLTCodeGen PatternType::getLLTCodeGen() const {
-  assert(isLLT());
-  return *MVTToLLT(getValueType(Data.Def));
-}
-
-LLTCodeGenOrTempType
-PatternType::getLLTCodeGenOrTempType(RuleMatcher &RM) const {
-  assert(!isNone());
-
-  if (isLLT())
-    return getLLTCodeGen();
-
-  assert(isTypeOf());
-  auto &OM = RM.getOperandMatcher(getTypeOfOpName());
-  return OM.getTempTypeIdx(RM);
-}
-
-bool PatternType::operator==(const PatternType &Other) const {
-  if (Kind != Other.Kind)
-    return false;
-
-  switch (Kind) {
-  case PT_None:
-    return true;
-  case PT_ValueType:
-    return Data.Def == Other.Data.Def;
-  case PT_TypeOf:
-    return Data.Str == Other.Data.Str;
-  }
-
-  llvm_unreachable("Unknown Type Kind");
-}
-
-std::string PatternType::str() const {
-  switch (Kind) {
-  case PT_None:
-    return "";
-  case PT_ValueType:
-    return Data.Def->getName().str();
-  case PT_TypeOf:
-    return (TypeOfClassName + "<$" + getTypeOfOpName() + ">").str();
-  }
-
-  llvm_unreachable("Unknown type!");
-}
-
-//===- Pattern Base Class -------------------------------------------------===//
-
-/// Base class for all patterns that can be written in an `apply`, `match` or
-/// `pattern` DAG operator.
-///
-/// For example:
-///
-///     (apply (G_ZEXT $x, $y), (G_ZEXT $y, $z), "return isFoo(${z})")
-///
-/// Creates 3 Pattern objects:
-///   - Two CodeGenInstruction Patterns
-///   - A CXXPattern
-class Pattern {
-public:
-  enum {
-    K_AnyOpcode,
-    K_CXX,
-
-    K_CodeGenInstruction,
-    K_PatFrag,
-    K_Builtin,
-  };
-
-  virtual ~Pattern() = default;
-
-  unsigned getKind() const { return Kind; }
-  const char *getKindName() const;
-
-  bool hasName() const { return !Name.empty(); }
-  StringRef getName() const { return Name; }
-
-  virtual void print(raw_ostream &OS, bool PrintName = true) const = 0;
-  void dump() const { return print(dbgs()); }
-
-protected:
-  Pattern(unsigned Kind, StringRef Name)
-      : Kind(Kind), Name(insertStrRef(Name)) {
-    assert(!Name.empty() && "unnamed pattern!");
-  }
-
-  void printImpl(raw_ostream &OS, bool PrintName,
-                 function_ref<void()> ContentPrinter) const;
-
-private:
-  unsigned Kind;
-  StringRef Name;
-};
-
-const char *Pattern::getKindName() const {
-  switch (Kind) {
-  case K_AnyOpcode:
-    return "AnyOpcodePattern";
-  case K_CXX:
-    return "CXXPattern";
-  case K_CodeGenInstruction:
-    return "CodeGenInstructionPattern";
-  case K_PatFrag:
-    return "PatFragPattern";
-  case K_Builtin:
-    return "BuiltinPattern";
-  }
-
-  llvm_unreachable("unknown pattern kind!");
-}
-
-void Pattern::printImpl(raw_ostream &OS, bool PrintName,
-                        function_ref<void()> ContentPrinter) const {
-  OS << "(" << getKindName() << " ";
-  if (PrintName)
-    OS << "name:" << getName() << " ";
-  ContentPrinter();
-  OS << ")";
-}
-
-//===- AnyOpcodePattern ---------------------------------------------------===//
-
-/// `wip_match_opcode` patterns.
-/// This matches one or more opcodes, and does not check any operands
-/// whatsoever.
-///
-/// TODO: Long-term, this needs to be removed. It's a hack around MIR
-///       pattern matching limitations.
-class AnyOpcodePattern : public Pattern {
-public:
-  AnyOpcodePattern(StringRef Name) : Pattern(K_AnyOpcode, Name) {}
-
-  static bool classof(const Pattern *P) { return P->getKind() == K_AnyOpcode; }
-
-  void addOpcode(const CodeGenInstruction *I) { Insts.push_back(I); }
-  const auto &insts() const { return Insts; }
-
-  void print(raw_ostream &OS, bool PrintName = true) const override;
-
-private:
-  SmallVector<const CodeGenInstruction *, 4> Insts;
-};
-
-void AnyOpcodePattern::print(raw_ostream &OS, bool PrintName) const {
-  printImpl(OS, PrintName, [&OS, this]() {
-    OS << "["
-       << join(map_range(Insts,
-                         [](const auto *I) { return I->TheDef->getName(); }),
-               ", ")
-       << "]";
-  });
-}
-
-//===- CXXPattern ---------------------------------------------------------===//
-
-/// Represents raw C++ code which may need some expansions.
-///
-///   e.g. [{ return isFooBux(${src}.getReg()); }]
-///
-/// For the expanded code, \see CXXPredicateCode. CXXPredicateCode objects are
-/// created through `expandCode`.
-///
-/// \see CodeExpander and \see CodeExpansions for more information on code
-/// expansions.
-///
-/// This object has two purposes:
-///   - Represent C++ code as a pattern entry.
-///   - Be a factory for expanded C++ code.
-///     - It's immutable and only holds the raw code so we can expand the same
-///       CXX pattern multiple times if we need to.
-///
-/// Note that the code is always trimmed in the constructor, so leading and
-/// trailing whitespaces are removed. This removes bloat in the output, avoids
-/// formatting issues, but also allows us to check things like
-/// `.startswith("return")` trivially without worrying about spaces.
-class CXXPattern : public Pattern {
-public:
-  CXXPattern(const StringInit &Code, StringRef Name)
-      : CXXPattern(Code.getAsUnquotedString(), Name) {}
-
-  CXXPattern(StringRef Code, StringRef Name)
-      : Pattern(K_CXX, Name), RawCode(Code.trim().str()) {}
-
-  static bool classof(const Pattern *P) { return P->getKind() == K_CXX; }
-
-  void setIsApply(bool Value = true) { IsApply = Value; }
-  StringRef getRawCode() const { return RawCode; }
-
-  /// Expands raw code, replacing things such as `${foo}` with their
-  /// substitution in \p CE.
-  ///
-  /// \param CE     Map of Code Expansions
-  /// \param Locs   SMLocs for the Code Expander, in case it needs to emit
-  ///               diagnostics.
-  /// \param AddComment If DebugCXXPreds is enabled, this is called to emit a
-  ///                   comment before the expanded code.
-  ///
-  /// \return A CXXPredicateCode object that contains the expanded code. Note
-  /// that this may or may not insert a new object. All CXXPredicateCode objects
-  /// are held in a set to avoid emitting duplicate C++ code.
-  const CXXPredicateCode &
-  expandCode(const CodeExpansions &CE, ArrayRef<SMLoc> Locs,
-             function_ref<void(raw_ostream &)> AddComment = {}) const;
-
-  void print(raw_ostream &OS, bool PrintName = true) const override;
-
-private:
-  bool IsApply = false;
-  std::string RawCode;
-};
-
-const CXXPredicateCode &
-CXXPattern::expandCode(const CodeExpansions &CE, ArrayRef<SMLoc> Locs,
-                       function_ref<void(raw_ostream &)> AddComment) const {
-  std::string Result;
-  raw_string_ostream OS(Result);
-
-  if (DebugCXXPreds && AddComment)
-    AddComment(OS);
-
-  CodeExpander Expander(RawCode, CE, Locs, /*ShowExpansions*/ false);
-  Expander.emit(OS);
-  if (IsApply)
-    return CXXPredicateCode::getApplyCode(std::move(Result));
-  return CXXPredicateCode::getMatchCode(std::move(Result));
-}
-
-void CXXPattern::print(raw_ostream &OS, bool PrintName) const {
-  printImpl(OS, PrintName, [&OS, this] {
-    OS << (IsApply ? "apply" : "match") << " code:\"";
-    printEscapedString(getRawCode(), OS);
-    OS << "\"";
-  });
-}
-
-//===- InstructionPattern ---------------------------------------------===//
-
-/// An operand for an InstructionPattern.
-///
-/// Operands are composed of three elements:
-///   - (Optional) Value
-///   - (Optional) Name
-///   - (Optional) Type
-///
-/// Some examples:
-///   (i32 0):$x -> V=int(0), Name='x', Type=i32
-///   0:$x -> V=int(0), Name='x'
-///   $x -> Name='x'
-///   i32:$x -> Name='x', Type = i32
-class InstructionOperand {
-public:
-  using IntImmTy = int64_t;
-
-  InstructionOperand(IntImmTy Imm, StringRef Name, PatternType Type)
-      : Value(Imm), Name(insertStrRef(Name)), Type(Type) {}
-
-  InstructionOperand(StringRef Name, PatternType Type)
-      : Name(insertStrRef(Name)), Type(Type) {}
-
-  bool isNamedImmediate() const { return hasImmValue() && isNamedOperand(); }
-
-  bool hasImmValue() const { return Value.has_value(); }
-  IntImmTy getImmValue() const { return *Value; }
-
-  bool isNamedOperand() const { return !Name.empty(); }
-  StringRef getOperandName() const {
-    assert(isNamedOperand() && "Operand is unnamed");
-    return Name;
-  }
-
-  InstructionOperand withNewName(StringRef NewName) const {
-    InstructionOperand Result = *this;
-    Result.Name = insertStrRef(NewName);
-    return Result;
-  }
-
-  void setIsDef(bool Value = true) { Def = Value; }
-  bool isDef() const { return Def; }
-
-  void setType(PatternType NewType) {
-    assert((!Type || (Type == NewType)) && "Overwriting type!");
-    Type = NewType;
-  }
-  PatternType getType() const { return Type; }
-
-  std::string describe() const {
-    if (!hasImmValue())
-      return "MachineOperand $" + getOperandName().str() + "";
-    std::string Str = "imm " + to_string(getImmValue());
-    if (isNamedImmediate())
-      Str += ":$" + getOperandName().str() + "";
-    return Str;
-  }
-
-  void print(raw_ostream &OS) const {
-    if (isDef())
-      OS << "<def>";
-
-    bool NeedsColon = true;
-    if (Type) {
-      if (hasImmValue())
-        OS << "(" << Type.str() << " " << getImmValue() << ")";
-      else
-        OS << Type.str();
-    } else if (hasImmValue())
-      OS << getImmValue();
-    else
-      NeedsColon = false;
-
-    if (isNamedOperand())
-      OS << (NeedsColon ? ":" : "") << "$" << getOperandName();
-  }
-
-  void dump() const { return print(dbgs()); }
-
-private:
-  std::optional<int64_t> Value;
-  StringRef Name;
-  PatternType Type;
-  bool Def = false;
-};
-
-/// Base class for CodeGenInstructionPattern & PatFragPattern, which handles all
-/// the boilerplate for patterns that have a list of operands for some (pseudo)
-/// instruction.
-class InstructionPattern : public Pattern {
-public:
-  virtual ~InstructionPattern() = default;
-
-  static bool classof(const Pattern *P) {
-    return P->getKind() == K_CodeGenInstruction || P->getKind() == K_PatFrag ||
-           P->getKind() == K_Builtin;
-  }
-
-  template <typename... Ty> void addOperand(Ty &&...Init) {
-    Operands.emplace_back(std::forward<Ty>(Init)...);
-  }
-
-  auto &operands() { return Operands; }
-  const auto &operands() const { return Operands; }
-  unsigned operands_size() const { return Operands.size(); }
-  InstructionOperand &getOperand(unsigned K) { return Operands[K]; }
-  const InstructionOperand &getOperand(unsigned K) const { return Operands[K]; }
-
-  /// When this InstructionPattern is used as the match root, returns the
-  /// operands that must be redefined in the 'apply' pattern for the rule to be
-  /// valid.
-  ///
-  /// For most patterns, this just returns the defs.
-  /// For PatFrag this only returns the root of the PF.
-  ///
-  /// Returns an empty array on error.
-  virtual ArrayRef<InstructionOperand> getApplyDefsNeeded() const {
-    return {operands().begin(), getNumInstDefs()};
-  }
-
-  auto named_operands() {
-    return make_filter_range(Operands,
-                             [&](auto &O) { return O.isNamedOperand(); });
-  }
-
-  auto named_operands() const {
-    return make_filter_range(Operands,
-                             [&](auto &O) { return O.isNamedOperand(); });
-  }
-
-  virtual bool isVariadic() const { return false; }
-  virtual unsigned getNumInstOperands() const = 0;
-  virtual unsigned getNumInstDefs() const = 0;
-
-  bool hasAllDefs() const { return operands_size() >= getNumInstDefs(); }
-
-  virtual StringRef getInstName() const = 0;
-
-  /// Diagnoses all uses of special types in this Pattern and returns true if at
-  /// least one diagnostic was emitted.
-  bool diagnoseAllSpecialTypes(ArrayRef<SMLoc> Loc, Twine Msg) const;
-
-  void reportUnreachable(ArrayRef<SMLoc> Locs) const;
-  virtual bool checkSemantics(ArrayRef<SMLoc> Loc);
-
-  void print(raw_ostream &OS, bool PrintName = true) const override;
-
-protected:
-  InstructionPattern(unsigned K, StringRef Name) : Pattern(K, Name) {}
-
-  virtual void printExtras(raw_ostream &OS) const {}
-
-  SmallVector<InstructionOperand, 4> Operands;
-};
-
-bool InstructionPattern::diagnoseAllSpecialTypes(ArrayRef<SMLoc> Loc,
-                                                 Twine Msg) const {
-  bool HasDiag = false;
-  for (const auto &[Idx, Op] : enumerate(operands())) {
-    if (Op.getType().isSpecial()) {
-      PrintError(Loc, Msg);
-      PrintNote(Loc, "operand " + Twine(Idx) + " of '" + getName() +
-                         "' has type '" + Op.getType().str() + "'");
-      HasDiag = true;
-    }
-  }
-  return HasDiag;
-}
-
-void InstructionPattern::reportUnreachable(ArrayRef<SMLoc> Locs) const {
-  PrintError(Locs, "pattern '" + getName() + "' ('" + getInstName() +
-                       "') is unreachable from the pattern root!");
-}
-
-bool InstructionPattern::checkSemantics(ArrayRef<SMLoc> Loc) {
-  unsigned NumExpectedOperands = getNumInstOperands();
-
-  if (isVariadic()) {
-    if (Operands.size() < NumExpectedOperands) {
-      PrintError(Loc, +"'" + getInstName() + "' expected at least " +
-                          Twine(NumExpectedOperands) + " operands, got " +
-                          Twine(Operands.size()));
-      return false;
-    }
-  } else if (NumExpectedOperands != Operands.size()) {
-    PrintError(Loc, +"'" + getInstName() + "' expected " +
-                        Twine(NumExpectedOperands) + " operands, got " +
-                        Twine(Operands.size()));
-    return false;
-  }
-
-  unsigned OpIdx = 0;
-  unsigned NumDefs = getNumInstDefs();
-  for (auto &Op : Operands)
-    Op.setIsDef(OpIdx++ < NumDefs);
-
-  return true;
-}
-
-void InstructionPattern::print(raw_ostream &OS, bool PrintName) const {
-  printImpl(OS, PrintName, [&OS, this] {
-    OS << getInstName() << " operands:[";
-    StringRef Sep = "";
-    for (const auto &Op : Operands) {
-      OS << Sep;
-      Op.print(OS);
-      Sep = ", ";
-    }
-    OS << "]";
-
-    printExtras(OS);
-  });
-}
-
-//===- OperandTable -------------------------------------------------------===//
-
-/// Maps InstructionPattern operands to their definitions. This allows us to tie
-/// different patterns of a (apply), (match) or (patterns) set of patterns
-/// together.
-class OperandTable {
-public:
-  bool addPattern(InstructionPattern *P,
-                  function_ref<void(StringRef)> DiagnoseRedef) {
-    for (const auto &Op : P->named_operands()) {
-      StringRef OpName = Op.getOperandName();
-
-      // We always create an entry in the OperandTable, even for uses.
-      // Uses of operands that don't have a def (= live-ins) will remain with a
-      // nullptr as the Def.
-      //
-      // This allows us tell whether an operand exists in a pattern or not. If
-      // there is no entry for it, it doesn't exist, if there is an entry, it's
-      // used/def'd at least once.
-      auto &Def = Table[OpName];
-
-      if (!Op.isDef())
-        continue;
-
-      if (Def) {
-        DiagnoseRedef(OpName);
-        return false;
-      }
-
-      Def = P;
-    }
-
-    return true;
-  }
-
-  struct LookupResult {
-    LookupResult() = default;
-    LookupResult(InstructionPattern *Def) : Found(true), Def(Def) {}
-
-    bool Found = false;
-    InstructionPattern *Def = nullptr;
-
-    bool isLiveIn() const { return Found && !Def; }
-  };
-
-  LookupResult lookup(StringRef OpName) const {
-    if (auto It = Table.find(OpName); It != Table.end())
-      return LookupResult(It->second);
-    return LookupResult();
-  }
-
-  InstructionPattern *getDef(StringRef OpName) const {
-    return lookup(OpName).Def;
-  }
-
-  void print(raw_ostream &OS, StringRef Name = "",
-             StringRef Indent = "") const {
-    OS << Indent << "(OperandTable ";
-    if (!Name.empty())
-      OS << Name << " ";
-    if (Table.empty()) {
-      OS << "<empty>)\n";
-      return;
-    }
-
-    SmallVector<StringRef, 0> Keys(Table.keys());
-    sort(Keys);
-
-    OS << '\n';
-    for (const auto &Key : Keys) {
-      const auto *Def = Table.at(Key);
-      OS << Indent << "  " << Key << " -> "
-         << (Def ? Def->getName() : "<live-in>") << '\n';
-    }
-    OS << Indent << ")\n";
-  }
-
-  auto begin() const { return Table.begin(); }
-  auto end() const { return Table.end(); }
-
-  void dump() const { print(dbgs()); }
-
-private:
-  StringMap<InstructionPattern *> Table;
-};
-
-//===- CodeGenInstructionPattern ------------------------------------------===//
-
-/// Helper class to contain data associated with a MIFlags operator.
-class MIFlagsInfo {
-public:
-  void addSetFlag(const Record *R) {
-    SetF.insert(R->getValueAsString("EnumName"));
-  }
-  void addUnsetFlag(const Record *R) {
-    UnsetF.insert(R->getValueAsString("EnumName"));
-  }
-  void addCopyFlag(StringRef InstName) { CopyF.insert(insertStrRef(InstName)); }
-
-  const auto &set_flags() const { return SetF; }
-  const auto &unset_flags() const { return UnsetF; }
-  const auto &copy_flags() const { return CopyF; }
-
-private:
-  SetVector<StringRef> SetF, UnsetF, CopyF;
-};
-
-/// Matches an instruction, e.g. `G_ADD $x, $y, $z`.
-class CodeGenInstructionPattern : public InstructionPattern {
-public:
-  CodeGenInstructionPattern(const CodeGenInstruction &I, StringRef Name)
-      : InstructionPattern(K_CodeGenInstruction, Name), I(I) {}
-
-  static bool classof(const Pattern *P) {
-    return P->getKind() == K_CodeGenInstruction;
-  }
-
-  bool is(StringRef OpcodeName) const {
-    return I.TheDef->getName() == OpcodeName;
-  }
-
-  bool hasVariadicDefs() const;
-  bool isVariadic() const override { return I.Operands.isVariadic; }
-  unsigned getNumInstDefs() const override;
-  unsigned getNumInstOperands() const override;
-
-  MIFlagsInfo &getOrCreateMIFlagsInfo();
-  const MIFlagsInfo *getMIFlagsInfo() const { return FI.get(); }
-
-  const CodeGenInstruction &getInst() const { return I; }
-  StringRef getInstName() const override { return I.TheDef->getName(); }
-
-private:
-  void printExtras(raw_ostream &OS) const override;
-
-  const CodeGenInstruction &I;
-  std::unique_ptr<MIFlagsInfo> FI;
-};
-
-bool CodeGenInstructionPattern::hasVariadicDefs() const {
-  // Note: we cannot use variadicOpsAreDefs, it's not set for
-  // GenericInstructions.
-  if (!isVariadic())
-    return false;
-
-  if (I.variadicOpsAreDefs)
-    return true;
-
-  DagInit *OutOps = I.TheDef->getValueAsDag("OutOperandList");
-  if (OutOps->arg_empty())
-    return false;
-
-  auto *LastArgTy = dyn_cast<DefInit>(OutOps->getArg(OutOps->arg_size() - 1));
-  return LastArgTy && LastArgTy->getDef()->getName() == "variable_ops";
-}
-
-unsigned CodeGenInstructionPattern::getNumInstDefs() const {
-  if (!isVariadic() || !hasVariadicDefs())
-    return I.Operands.NumDefs;
-  unsigned NumOuts = I.Operands.size() - I.Operands.NumDefs;
-  assert(Operands.size() > NumOuts);
-  return std::max<unsigned>(I.Operands.NumDefs, Operands.size() - NumOuts);
-}
-
-unsigned CodeGenInstructionPattern::getNumInstOperands() const {
-  unsigned NumCGIOps = I.Operands.size();
-  return isVariadic() ? std::max<unsigned>(NumCGIOps, Operands.size())
-                      : NumCGIOps;
-}
-
-MIFlagsInfo &CodeGenInstructionPattern::getOrCreateMIFlagsInfo() {
-  if (!FI)
-    FI = std::make_unique<MIFlagsInfo>();
-  return *FI;
-}
-
-void CodeGenInstructionPattern::printExtras(raw_ostream &OS) const {
-  if (!FI)
-    return;
-
-  OS << " (MIFlags";
-  if (!FI->set_flags().empty())
-    OS << " (set " << join(FI->set_flags(), ", ") << ")";
-  if (!FI->unset_flags().empty())
-    OS << " (unset " << join(FI->unset_flags(), ", ") << ")";
-  if (!FI->copy_flags().empty())
-    OS << " (copy " << join(FI->copy_flags(), ", ") << ")";
-  OS << ')';
-}
-
-//===- OperandTypeChecker -------------------------------------------------===//
-
-/// This is a trivial type checker for all operands in a set of
-/// InstructionPatterns.
-///
-/// It infers the type of each operand, check it's consistent with the known
-/// type of the operand, and then sets all of the types in all operands in
-/// propagateTypes.
-///
-/// It also handles verifying correctness of special types.
-class OperandTypeChecker {
-public:
-  OperandTypeChecker(ArrayRef<SMLoc> DiagLoc) : DiagLoc(DiagLoc) {}
-
-  /// Step 1: Check each pattern one by one. All patterns that pass through here
-  /// are added to a common worklist so propagateTypes can access them.
-  bool check(InstructionPattern &P,
-             std::function<bool(const PatternType &)> VerifyTypeOfOperand);
-
-  /// Step 2: Propagate all types. e.g. if one use of "$a" has type i32, make
-  /// all uses of "$a" have type i32.
-  void propagateTypes();
-
-protected:
-  ArrayRef<SMLoc> DiagLoc;
-
-private:
-  using InconsistentTypeDiagFn = std::function<void()>;
-
-  void PrintSeenWithTypeIn(InstructionPattern &P, StringRef OpName,
-                           PatternType Ty) const {
-    PrintNote(DiagLoc, "'" + OpName + "' seen with type '" + Ty.str() +
-                           "' in '" + P.getName() + "'");
-  }
-
-  struct OpTypeInfo {
-    PatternType Type;
-    InconsistentTypeDiagFn PrintTypeSrcNote = []() {};
-  };
-
-  StringMap<OpTypeInfo> Types;
-
-  SmallVector<InstructionPattern *, 16> Pats;
-};
-
-bool OperandTypeChecker::check(
-    InstructionPattern &P,
-    std::function<bool(const PatternType &)> VerifyTypeOfOperand) {
-  Pats.push_back(&P);
+//===- CodeExpansions Helpers  --------------------------------------------===//
 
-  for (auto &Op : P.operands()) {
-    const auto Ty = Op.getType();
-    if (!Ty)
-      continue;
-
-    if (Ty.isTypeOf() && !VerifyTypeOfOperand(Ty))
-      return false;
-
-    if (!Op.isNamedOperand())
-      continue;
-
-    StringRef OpName = Op.getOperandName();
-    auto &Info = Types[OpName];
-    if (!Info.Type) {
-      Info.Type = Ty;
-      Info.PrintTypeSrcNote = [this, OpName, Ty, &P]() {
-        PrintSeenWithTypeIn(P, OpName, Ty);
-      };
-      continue;
-    }
-
-    if (Info.Type != Ty) {
-      PrintError(DiagLoc, "conflicting types for operand '" +
-                              Op.getOperandName() + "': '" + Info.Type.str() +
-                              "' vs '" + Ty.str() + "'");
-      PrintSeenWithTypeIn(P, OpName, Ty);
-      Info.PrintTypeSrcNote();
-      return false;
-    }
-  }
-
-  return true;
-}
-
-void OperandTypeChecker::propagateTypes() {
-  for (auto *Pat : Pats) {
-    for (auto &Op : Pat->named_operands()) {
-      if (auto &Info = Types[Op.getOperandName()]; Info.Type)
-        Op.setType(Info.Type);
-    }
-  }
-}
-
-//===- PatFrag ------------------------------------------------------------===//
-
-/// Represents a parsed GICombinePatFrag. This can be thought of as the
-/// equivalent of a CodeGenInstruction, but for PatFragPatterns.
-///
-/// PatFrags are made of 3 things:
-///   - Out parameters (defs)
-///   - In parameters
-///   - A set of pattern lists (alternatives).
-///
-/// If the PatFrag uses instruction patterns, the root must be one of the defs.
-///
-/// Note that this DOES NOT represent the use of the PatFrag, only its
-/// definition. The use of the PatFrag in a Pattern is represented by
-/// PatFragPattern.
-///
-/// PatFrags use the term "parameter" instead of operand because they're
-/// essentially macros, and using that name avoids confusion. Other than that,
-/// they're structured similarly to a MachineInstruction  - all parameters
-/// (operands) are in the same list, with defs at the start. This helps mapping
-/// parameters to values, because, param N of a PatFrag is always operand N of a
-/// PatFragPattern.
-class PatFrag {
-public:
-  enum ParamKind {
-    PK_Root,
-    PK_MachineOperand,
-    PK_Imm,
-  };
-
-  struct Param {
-    StringRef Name;
-    ParamKind Kind;
-  };
-
-  using ParamVec = SmallVector<Param, 4>;
-  using ParamIt = ParamVec::const_iterator;
-
-  /// Represents an alternative of the PatFrag. When parsing a GICombinePatFrag,
-  /// this is created from its "Alternatives" list. Each alternative is a list
-  /// of patterns written wrapped in a  `(pattern ...)` dag init.
-  ///
-  /// Each argument to the `pattern` DAG operator is parsed into a Pattern
-  /// instance.
-  struct Alternative {
-    OperandTable OpTable;
-    SmallVector<std::unique_ptr<Pattern>, 4> Pats;
-  };
-
-  explicit PatFrag(const Record &Def) : Def(Def) {
-    assert(Def.isSubClassOf(PatFragClassName));
-  }
-
-  static StringRef getParamKindStr(ParamKind OK);
-
-  StringRef getName() const { return Def.getName(); }
-
-  const Record &getDef() const { return Def; }
-  ArrayRef<SMLoc> getLoc() const { return Def.getLoc(); }
-
-  Alternative &addAlternative() { return Alts.emplace_back(); }
-  const Alternative &getAlternative(unsigned K) const { return Alts[K]; }
-  unsigned num_alternatives() const { return Alts.size(); }
-
-  void addInParam(StringRef Name, ParamKind Kind);
-  iterator_range<ParamIt> in_params() const;
-  unsigned num_in_params() const { return Params.size() - NumOutParams; }
-
-  void addOutParam(StringRef Name, ParamKind Kind);
-  iterator_range<ParamIt> out_params() const;
-  unsigned num_out_params() const { return NumOutParams; }
-
-  unsigned num_roots() const;
-  unsigned num_params() const { return num_in_params() + num_out_params(); }
-
-  /// Finds the operand \p Name and returns its index or -1 if not found.
-  /// Remember that all params are part of the same list, with out params at the
-  /// start. This means that the index returned can be used to access operands
-  /// of InstructionPatterns.
-  unsigned getParamIdx(StringRef Name) const;
-  const Param &getParam(unsigned K) const { return Params[K]; }
-
-  bool canBeMatchRoot() const { return num_roots() == 1; }
-
-  void print(raw_ostream &OS, StringRef Indent = "") const;
-  void dump() const { print(dbgs()); }
-
-  /// Checks if the in-param \p ParamName can be unbound or not.
-  /// \p ArgName is the name of the argument passed to the PatFrag.
-  ///
-  /// An argument can be unbound only if, for all alternatives:
-  ///   - There is no CXX pattern, OR:
-  ///   - There is an InstructionPattern that binds the parameter.
-  ///
-  /// e.g. in (MyPatFrag $foo), if $foo has never been seen before (= it's
-  /// unbound), this checks if MyPatFrag supports it or not.
-  bool handleUnboundInParam(StringRef ParamName, StringRef ArgName,
-                            ArrayRef<SMLoc> DiagLoc) const;
-
-  bool checkSemantics();
-  bool buildOperandsTables();
-
-private:
-  static void printParamsList(raw_ostream &OS, iterator_range<ParamIt> Params);
-
-  void PrintError(Twine Msg) const { ::PrintError(&Def, Msg); }
-
-  const Record &Def;
-  unsigned NumOutParams = 0;
-  ParamVec Params;
-  SmallVector<Alternative, 2> Alts;
-};
-
-StringRef PatFrag::getParamKindStr(ParamKind OK) {
-  switch (OK) {
-  case PK_Root:
-    return "root";
-  case PK_MachineOperand:
-    return "machine_operand";
-  case PK_Imm:
-    return "imm";
-  }
-
-  llvm_unreachable("Unknown operand kind!");
-}
-
-void PatFrag::addInParam(StringRef Name, ParamKind Kind) {
-  Params.emplace_back(Param{insertStrRef(Name), Kind});
-}
-
-iterator_range<PatFrag::ParamIt> PatFrag::in_params() const {
-  return {Params.begin() + NumOutParams, Params.end()};
-}
-
-void PatFrag::addOutParam(StringRef Name, ParamKind Kind) {
-  assert(NumOutParams == Params.size() &&
-         "Adding out-param after an in-param!");
-  Params.emplace_back(Param{insertStrRef(Name), Kind});
-  ++NumOutParams;
-}
-
-iterator_range<PatFrag::ParamIt> PatFrag::out_params() const {
-  return {Params.begin(), Params.begin() + NumOutParams};
-}
-
-unsigned PatFrag::num_roots() const {
-  return count_if(out_params(),
-                  [&](const auto &P) { return P.Kind == PK_Root; });
-}
-
-unsigned PatFrag::getParamIdx(StringRef Name) const {
-  for (const auto &[Idx, Op] : enumerate(Params)) {
-    if (Op.Name == Name)
-      return Idx;
-  }
-
-  return -1;
+void declareInstExpansion(CodeExpansions &CE, const InstructionMatcher &IM,
+                          StringRef Name) {
+  CE.declare(Name, "State.MIs[" + to_string(IM.getInsnVarID()) + "]");
 }
 
-bool PatFrag::checkSemantics() {
-  for (const auto &Alt : Alts) {
-    for (const auto &Pat : Alt.Pats) {
-      switch (Pat->getKind()) {
-      case Pattern::K_AnyOpcode:
-        PrintError("wip_match_opcode cannot be used in " + PatFragClassName);
-        return false;
-      case Pattern::K_Builtin:
-        PrintError("Builtin instructions cannot be used in " +
-                   PatFragClassName);
-        return false;
-      case Pattern::K_CXX:
-        continue;
-      case Pattern::K_CodeGenInstruction:
-        if (cast<CodeGenInstructionPattern>(Pat.get())->diagnoseAllSpecialTypes(
-                Def.getLoc(), SpecialTyClassName + " is not supported in " +
-                                  PatFragClassName))
-          return false;
-        continue;
-      case Pattern::K_PatFrag:
-        // TODO: It's just that the emitter doesn't handle it but technically
-        // there is no reason why we can't. We just have to be careful with
-        // operand mappings, it could get complex.
-        PrintError("nested " + PatFragClassName + " are not supported");
-        return false;
-      }
-    }
-  }
-
-  StringSet<> SeenOps;
-  for (const auto &Op : in_params()) {
-    if (SeenOps.count(Op.Name)) {
-      PrintError("duplicate parameter '" + Op.Name + "'");
-      return false;
-    }
-
-    // Check this operand is NOT defined in any alternative's patterns.
-    for (const auto &Alt : Alts) {
-      if (Alt.OpTable.lookup(Op.Name).Def) {
-        PrintError("input parameter '" + Op.Name + "' cannot be redefined!");
-        return false;
-      }
-    }
-
-    if (Op.Kind == PK_Root) {
-      PrintError("input parameterr '" + Op.Name + "' cannot be a root!");
-      return false;
-    }
-
-    SeenOps.insert(Op.Name);
-  }
-
-  for (const auto &Op : out_params()) {
-    if (Op.Kind != PK_Root && Op.Kind != PK_MachineOperand) {
-      PrintError("output parameter '" + Op.Name +
-                 "' must be 'root' or 'gi_mo'");
-      return false;
-    }
-
-    if (SeenOps.count(Op.Name)) {
-      PrintError("duplicate parameter '" + Op.Name + "'");
-      return false;
-    }
-
-    // Check this operand is defined in all alternative's patterns.
-    for (const auto &Alt : Alts) {
-      const auto *OpDef = Alt.OpTable.getDef(Op.Name);
-      if (!OpDef) {
-        PrintError("output parameter '" + Op.Name +
-                   "' must be defined by all alternative patterns in '" +
-                   Def.getName() + "'");
-        return false;
-      }
-
-      if (Op.Kind == PK_Root && OpDef->getNumInstDefs() != 1) {
-        // The instruction that defines the root must have a single def.
-        // Otherwise we'd need to support multiple roots and it gets messy.
-        //
-        // e.g. this is not supported:
-        //   (pattern (G_UNMERGE_VALUES $x, $root, $vec))
-        PrintError("all instructions that define root '" + Op.Name + "' in '" +
-                   Def.getName() + "' can only have a single output operand");
-        return false;
-      }
-    }
-
-    SeenOps.insert(Op.Name);
-  }
-
-  if (num_out_params() != 0 && num_roots() == 0) {
-    PrintError(PatFragClassName + " must have one root in its 'out' operands");
-    return false;
-  }
-
-  if (num_roots() > 1) {
-    PrintError(PatFragClassName + " can only have one root");
-    return false;
-  }
-
-  // TODO: find unused params
-
-  const auto CheckTypeOf = [&](const PatternType &) -> bool {
-    llvm_unreachable("GITypeOf should have been rejected earlier!");
-  };
-
-  // Now, typecheck all alternatives.
-  for (auto &Alt : Alts) {
-    OperandTypeChecker OTC(Def.getLoc());
-    for (auto &Pat : Alt.Pats) {
-      if (auto *IP = dyn_cast<InstructionPattern>(Pat.get())) {
-        if (!OTC.check(*IP, CheckTypeOf))
-          return false;
-      }
-    }
-    OTC.propagateTypes();
-  }
-
-  return true;
+void declareInstExpansion(CodeExpansions &CE, const BuildMIAction &A,
+                          StringRef Name) {
+  // Note: we use redeclare here because this may overwrite a matcher inst
+  // expansion.
+  CE.redeclare(Name, "OutMIs[" + to_string(A.getInsnID()) + "]");
 }
 
-bool PatFrag::handleUnboundInParam(StringRef ParamName, StringRef ArgName,
-                                   ArrayRef<SMLoc> DiagLoc) const {
-  // The parameter must be a live-in of all alternatives for this to work.
-  // Otherwise, we risk having unbound parameters being used (= crashes).
-  //
-  // Examples:
-  //
-  // in (ins $y), (patterns (G_FNEG $dst, $y), "return matchFnegOp(${y})")
-  //    even if $y is unbound, we'll lazily bind it when emitting the G_FNEG.
-  //
-  // in (ins $y), (patterns "return matchFnegOp(${y})")
-  //    if $y is unbound when this fragment is emitted, C++ code expansion will
-  //    fail.
-  for (const auto &Alt : Alts) {
-    auto &OT = Alt.OpTable;
-    if (!OT.lookup(ParamName).Found) {
-      ::PrintError(DiagLoc, "operand '" + ArgName + "' (for parameter '" +
-                                ParamName + "' of '" + getName() +
-                                "') cannot be unbound");
-      PrintNote(
-          DiagLoc,
-          "one or more alternatives of '" + getName() + "' do not bind '" +
-              ParamName +
-              "' to an instruction operand; either use a bound operand or "
-              "ensure '" +
-              Def.getName() + "' binds '" + ParamName +
-              "' in all alternatives");
-      return false;
-    }
-  }
-
-  return true;
+void declareOperandExpansion(CodeExpansions &CE, const OperandMatcher &OM,
+                             StringRef Name) {
+  CE.declare(Name, "State.MIs[" + to_string(OM.getInsnVarID()) +
+                       "]->getOperand(" + to_string(OM.getOpIdx()) + ")");
 }
 
-bool PatFrag::buildOperandsTables() {
-  // enumerate(...) doesn't seem to allow lvalues so we need to count the old
-  // way.
-  unsigned Idx = 0;
-
-  const auto DiagnoseRedef = [this, &Idx](StringRef OpName) {
-    PrintError("Operand '" + OpName +
-               "' is defined multiple times in patterns of alternative #" +
-               to_string(Idx));
-  };
-
-  for (auto &Alt : Alts) {
-    for (auto &Pat : Alt.Pats) {
-      auto *IP = dyn_cast<InstructionPattern>(Pat.get());
-      if (!IP)
-        continue;
-
-      if (!Alt.OpTable.addPattern(IP, DiagnoseRedef))
-        return false;
-    }
-
-    ++Idx;
-  }
-
-  return true;
+void declareTempRegExpansion(CodeExpansions &CE, unsigned TempRegID,
+                             StringRef Name) {
+  CE.declare(Name, "State.TempRegisters[" + to_string(TempRegID) + "]");
 }
 
-void PatFrag::print(raw_ostream &OS, StringRef Indent) const {
-  OS << Indent << "(PatFrag name:" << getName() << '\n';
-  if (!in_params().empty()) {
-    OS << Indent << "  (ins ";
-    printParamsList(OS, in_params());
-    OS << ")\n";
-  }
-
-  if (!out_params().empty()) {
-    OS << Indent << "  (outs ";
-    printParamsList(OS, out_params());
-    OS << ")\n";
-  }
-
-  // TODO: Dump OperandTable as well.
-  OS << Indent << "  (alternatives [\n";
-  for (const auto &Alt : Alts) {
-    OS << Indent << "    [\n";
-    for (const auto &Pat : Alt.Pats) {
-      OS << Indent << "      ";
-      Pat->print(OS, /*PrintName=*/true);
-      OS << ",\n";
-    }
-    OS << Indent << "    ],\n";
-  }
-  OS << Indent << "  ])\n";
+//===- Misc. Helpers  -----------------------------------------------------===//
 
-  OS << Indent << ')';
-}
+/// Copies a StringRef into a static pool to preserve it.
+/// Most Pattern classes use StringRef so we need this.
+StringRef insertStrRef(StringRef S) {
+  if (S.empty())
+    return {};
 
-void PatFrag::printParamsList(raw_ostream &OS, iterator_range<ParamIt> Params) {
-  OS << '['
-     << join(map_range(Params,
-                       [](auto &O) {
-                         return (O.Name + ":" + getParamKindStr(O.Kind)).str();
-                       }),
-             ", ")
-     << ']';
+  static StringSet<> Pool;
+  auto [It, Inserted] = Pool.insert(S);
+  return It->getKey();
 }
 
-//===- PatFragPattern -----------------------------------------------------===//
-
-class PatFragPattern : public InstructionPattern {
-public:
-  PatFragPattern(const PatFrag &PF, StringRef Name)
-      : InstructionPattern(K_PatFrag, Name), PF(PF) {}
-
-  static bool classof(const Pattern *P) { return P->getKind() == K_PatFrag; }
-
-  const PatFrag &getPatFrag() const { return PF; }
-  StringRef getInstName() const override { return PF.getName(); }
-
-  unsigned getNumInstDefs() const override { return PF.num_out_params(); }
-  unsigned getNumInstOperands() const override { return PF.num_params(); }
-
-  ArrayRef<InstructionOperand> getApplyDefsNeeded() const override;
-
-  bool checkSemantics(ArrayRef<SMLoc> DiagLoc) override;
-
-  /// Before emitting the patterns inside the PatFrag, add all necessary code
-  /// expansions to \p PatFragCEs imported from \p ParentCEs.
-  ///
-  /// For a MachineOperand PatFrag parameter, this will fetch the expansion for
-  /// that operand from \p ParentCEs and add it to \p PatFragCEs. Errors can be
-  /// emitted if the MachineOperand reference is unbound.
-  ///
-  /// For an Immediate PatFrag parameter this simply adds the integer value to
-  /// \p PatFragCEs as an expansion.
-  ///
-  /// \param ParentCEs Contains all of the code expansions declared by the other
-  ///                  patterns emitted so far in the pattern list containing
-  ///                  this PatFragPattern.
-  /// \param PatFragCEs Output Code Expansions (usually empty)
-  /// \param DiagLoc    Diagnostic loc in case an error occurs.
-  /// \return `true` on success, `false` on failure.
-  bool mapInputCodeExpansions(const CodeExpansions &ParentCEs,
-                              CodeExpansions &PatFragCEs,
-                              ArrayRef<SMLoc> DiagLoc) const;
-
-private:
-  const PatFrag &PF;
-};
-
-ArrayRef<InstructionOperand> PatFragPattern::getApplyDefsNeeded() const {
-  assert(PF.num_roots() == 1);
-  // Only roots need to be redef.
-  for (auto [Idx, Param] : enumerate(PF.out_params())) {
-    if (Param.Kind == PatFrag::PK_Root)
-      return getOperand(Idx);
-  }
-  llvm_unreachable("root not found!");
+template <typename Container> auto keys(Container &&C) {
+  return map_range(C, [](auto &Entry) -> auto & { return Entry.first; });
 }
 
-bool PatFragPattern::checkSemantics(ArrayRef<SMLoc> DiagLoc) {
-  if (!InstructionPattern::checkSemantics(DiagLoc))
-    return false;
-
-  for (const auto &[Idx, Op] : enumerate(Operands)) {
-    switch (PF.getParam(Idx).Kind) {
-    case PatFrag::PK_Imm:
-      if (!Op.hasImmValue()) {
-        PrintError(DiagLoc, "expected operand " + to_string(Idx) + " of '" +
-                                getInstName() + "' to be an immediate; got " +
-                                Op.describe());
-        return false;
-      }
-      if (Op.isNamedImmediate()) {
-        PrintError(DiagLoc, "operand " + to_string(Idx) + " of '" +
-                                getInstName() +
-                                "' cannot be a named immediate");
-        return false;
-      }
-      break;
-    case PatFrag::PK_Root:
-    case PatFrag::PK_MachineOperand:
-      if (!Op.isNamedOperand() || Op.isNamedImmediate()) {
-        PrintError(DiagLoc, "expected operand " + to_string(Idx) + " of '" +
-                                getInstName() +
-                                "' to be a MachineOperand; got " +
-                                Op.describe());
-        return false;
-      }
-      break;
-    }
-  }
-
-  return true;
+template <typename Container> auto values(Container &&C) {
+  return map_range(C, [](auto &Entry) -> auto & { return Entry.second; });
 }
 
-bool PatFragPattern::mapInputCodeExpansions(const CodeExpansions &ParentCEs,
-                                            CodeExpansions &PatFragCEs,
-                                            ArrayRef<SMLoc> DiagLoc) const {
-  for (const auto &[Idx, Op] : enumerate(operands())) {
-    StringRef ParamName = PF.getParam(Idx).Name;
-
-    // Operands to a PFP can only be named, or be an immediate, but not a named
-    // immediate.
-    assert(!Op.isNamedImmediate());
-
-    if (Op.isNamedOperand()) {
-      StringRef ArgName = Op.getOperandName();
-      // Map it only if it's been defined.
-      auto It = ParentCEs.find(ArgName);
-      if (It == ParentCEs.end()) {
-        if (!PF.handleUnboundInParam(ParamName, ArgName, DiagLoc))
-          return false;
-      } else
-        PatFragCEs.declare(ParamName, It->second);
-      continue;
-    }
-
-    if (Op.hasImmValue()) {
-      PatFragCEs.declare(ParamName, to_string(Op.getImmValue()));
-      continue;
-    }
-
-    llvm_unreachable("Unknown Operand Type!");
-  }
-
-  return true;
+std::string getIsEnabledPredicateEnumName(unsigned CombinerRuleID) {
+  return "GICXXPred_Simple_IsRule" + to_string(CombinerRuleID) + "Enabled";
 }
 
-//===- BuiltinPattern -----------------------------------------------------===//
-
-enum BuiltinKind {
-  BI_ReplaceReg,
-  BI_EraseRoot,
-};
-
-class BuiltinPattern : public InstructionPattern {
-  struct BuiltinInfo {
-    StringLiteral DefName;
-    BuiltinKind Kind;
-    unsigned NumOps;
-    unsigned NumDefs;
-  };
-
-  static constexpr std::array<BuiltinInfo, 2> KnownBuiltins = {{
-      {"GIReplaceReg", BI_ReplaceReg, 2, 1},
-      {"GIEraseRoot", BI_EraseRoot, 0, 0},
-  }};
-
-public:
-  BuiltinPattern(const Record &Def, StringRef Name)
-      : InstructionPattern(K_Builtin, Name), I(getBuiltinInfo(Def)) {}
-
-  static bool classof(const Pattern *P) { return P->getKind() == K_Builtin; }
-
-  unsigned getNumInstOperands() const override { return I.NumOps; }
-  unsigned getNumInstDefs() const override { return I.NumDefs; }
-  StringRef getInstName() const override { return I.DefName; }
-  BuiltinKind getBuiltinKind() const { return I.Kind; }
+//===- MatchTable Helpers  ------------------------------------------------===//
 
-  bool checkSemantics(ArrayRef<SMLoc> Loc) override;
-
-private:
-  static BuiltinInfo getBuiltinInfo(const Record &Def);
-
-  BuiltinInfo I;
-};
-
-BuiltinPattern::BuiltinInfo BuiltinPattern::getBuiltinInfo(const Record &Def) {
-  assert(Def.isSubClassOf(BuiltinInstClassName));
-
-  StringRef Name = Def.getName();
-  for (const auto &KBI : KnownBuiltins) {
-    if (KBI.DefName == Name)
-      return KBI;
-  }
-
-  PrintFatalError(Def.getLoc(), "Unimplemented " + BuiltinInstClassName +
-                                    " def '" + Name + "'");
+LLTCodeGen getLLTCodeGen(const PatternType &PT) {
+  return *MVTToLLT(getValueType(PT.getLLTRecord()));
 }
 
-bool BuiltinPattern::checkSemantics(ArrayRef<SMLoc> Loc) {
-  if (!InstructionPattern::checkSemantics(Loc))
-    return false;
+LLTCodeGenOrTempType getLLTCodeGenOrTempType(const PatternType &PT,
+                                             RuleMatcher &RM) {
+  assert(!PT.isNone());
 
-  // For now all builtins just take names, no immediates.
-  for (const auto &[Idx, Op] : enumerate(operands())) {
-    if (!Op.isNamedOperand() || Op.isNamedImmediate()) {
-      PrintError(Loc, "expected operand " + to_string(Idx) + " of '" +
-                          getInstName() + "' to be a name");
-      return false;
-    }
-  }
+  if (PT.isLLT())
+    return getLLTCodeGen(PT);
 
-  return true;
+  assert(PT.isTypeOf());
+  auto &OM = RM.getOperandMatcher(PT.getTypeOfOpName());
+  return OM.getTempTypeIdx(RM);
 }
 
 //===- PrettyStackTrace Helpers  ------------------------------------------===//
@@ -1701,8 +160,8 @@ class PrettyStackTraceParse : public PrettyStackTraceEntry {
   void print(raw_ostream &OS) const override {
     if (Def.isSubClassOf("GICombineRule"))
       OS << "Parsing GICombineRule '" << Def.getName() << "'";
-    else if (Def.isSubClassOf(PatFragClassName))
-      OS << "Parsing " << PatFragClassName << " '" << Def.getName() << "'";
+    else if (Def.isSubClassOf(PatFrag::ClassName))
+      OS << "Parsing " << PatFrag::ClassName << " '" << Def.getName() << "'";
     else
       OS << "Parsing '" << Def.getName() << "'";
     OS << '\n';
@@ -1720,8 +179,8 @@ class PrettyStackTraceEmit : public PrettyStackTraceEntry {
   void print(raw_ostream &OS) const override {
     if (Def.isSubClassOf("GICombineRule"))
       OS << "Emitting GICombineRule '" << Def.getName() << "'";
-    else if (Def.isSubClassOf(PatFragClassName))
-      OS << "Emitting " << PatFragClassName << " '" << Def.getName() << "'";
+    else if (Def.isSubClassOf(PatFrag::ClassName))
+      OS << "Emitting " << PatFrag::ClassName << " '" << Def.getName() << "'";
     else
       OS << "Emitting '" << Def.getName() << "'";
 
@@ -2540,7 +999,7 @@ bool CombineRuleBuilder::addApplyPattern(std::unique_ptr<Pattern> Pat) {
   }
 
   if (isa<PatFragPattern>(Pat.get())) {
-    PrintError("'" + Name + "': using " + PatFragClassName +
+    PrintError("'" + Name + "': using " + PatFrag::ClassName +
                " is not supported in apply patterns");
     return false;
   }
@@ -2583,12 +1042,14 @@ void CombineRuleBuilder::addCXXPredicate(RuleMatcher &M,
   // FIXME: Hack so C++ code is executed last. May not work for more complex
   // patterns.
   auto &IM = *std::prev(M.insnmatchers().end());
+  auto Loc = RuleDef.getLoc();
+  const auto AddComment = [&](raw_ostream &OS) {
+    OS << "// Pattern Alternatives: ";
+    print(OS, Alts);
+    OS << '\n';
+  };
   const auto &ExpandedCode =
-      P.expandCode(CE, RuleDef.getLoc(), [&](raw_ostream &OS) {
-        OS << "// Pattern Alternatives: ";
-        print(OS, Alts);
-        OS << '\n';
-      });
+      DebugCXXPreds ? P.expandCode(CE, Loc, AddComment) : P.expandCode(CE, Loc);
   IM->addPredicate<GenericInstructionPredicateMatcher>(
       ExpandedCode.getEnumNameWithPrefix(CXXPredPrefix));
 }
@@ -2638,8 +1099,8 @@ bool CombineRuleBuilder::typecheckPatterns() {
   for (auto &Pat : values(MatchPats)) {
     if (auto *IP = dyn_cast<InstructionPattern>(Pat.get())) {
       if (IP->diagnoseAllSpecialTypes(
-              RuleDef.getLoc(),
-              SpecialTyClassName + " is not supported in 'match' patterns")) {
+              RuleDef.getLoc(), PatternType::SpecialTyClassName +
+                                    " is not supported in 'match' patterns")) {
         return false;
       }
     }
@@ -3053,7 +1514,7 @@ bool CombineRuleBuilder::parsePatternList(
     Init *Arg = List.getArg(I);
     std::string Name = List.getArgName(I)
                            ? List.getArgName(I)->getValue().str()
-                           : makeAnonPatName(AnonPatNamePrefix, I);
+                           : ("__" + AnonPatNamePrefix + "_" + Twine(I)).str();
 
     if (auto Pat = parseInstructionPattern(*Arg, Name)) {
       if (!ParseAction(std::move(Pat)))
@@ -3069,7 +1530,7 @@ bool CombineRuleBuilder::parsePatternList(
 
     // Parse arbitrary C++ code
     if (const auto *StringI = dyn_cast<StringInit>(Arg)) {
-      auto CXXPat = std::make_unique<CXXPattern>(*StringI, Name);
+      auto CXXPat = std::make_unique<CXXPattern>(*StringI, insertStrRef(Name));
       if (!ParseAction(std::move(CXXPat)))
         return false;
       continue;
@@ -3093,18 +1554,19 @@ CombineRuleBuilder::parseInstructionPattern(const Init &Arg,
   std::unique_ptr<InstructionPattern> Pat;
   if (const DagInit *IP = getDagWithOperatorOfSubClass(Arg, "Instruction")) {
     auto &Instr = CGT.getInstruction(IP->getOperatorAsDef(RuleDef.getLoc()));
-    Pat = std::make_unique<CodeGenInstructionPattern>(Instr, Name);
+    Pat =
+        std::make_unique<CodeGenInstructionPattern>(Instr, insertStrRef(Name));
   } else if (const DagInit *PFP =
-                 getDagWithOperatorOfSubClass(Arg, PatFragClassName)) {
+                 getDagWithOperatorOfSubClass(Arg, PatFrag::ClassName)) {
     const Record *Def = PFP->getOperatorAsDef(RuleDef.getLoc());
     const PatFrag *PF = parsePatFrag(Def);
     if (!PF)
       return nullptr; // Already diagnosed by parsePatFrag
-    Pat = std::make_unique<PatFragPattern>(*PF, Name);
+    Pat = std::make_unique<PatFragPattern>(*PF, insertStrRef(Name));
   } else if (const DagInit *BP =
-                 getDagWithOperatorOfSubClass(Arg, BuiltinInstClassName)) {
+                 getDagWithOperatorOfSubClass(Arg, BuiltinPattern::ClassName)) {
     Pat = std::make_unique<BuiltinPattern>(
-        *BP->getOperatorAsDef(RuleDef.getLoc()), Name);
+        *BP->getOperatorAsDef(RuleDef.getLoc()), insertStrRef(Name));
   } else {
     return nullptr;
   }
@@ -3140,7 +1602,7 @@ CombineRuleBuilder::parseWipMatchOpcodeMatcher(const Init &Arg,
   }
 
   // Each argument is an opcode that can match.
-  auto Result = std::make_unique<AnyOpcodePattern>(Name);
+  auto Result = std::make_unique<AnyOpcodePattern>(insertStrRef(Name));
   for (const auto &Arg : Matcher->getArgs()) {
     Record *OpcodeDef = getDefOfSubClass(*Arg, "Instruction");
     if (OpcodeDef) {
@@ -3168,7 +1630,7 @@ bool CombineRuleBuilder::parseInstructionPatternOperand(
   // untyped immediate, e.g. 0
   if (const auto *IntImm = dyn_cast<IntInit>(OpInit)) {
     std::string Name = OpName ? OpName->getAsUnquotedString() : "";
-    IP.addOperand(IntImm->getValue(), Name, PatternType());
+    IP.addOperand(IntImm->getValue(), insertStrRef(Name), PatternType());
     return true;
   }
 
@@ -3195,7 +1657,7 @@ bool CombineRuleBuilder::parseInstructionPatternOperand(
       return ParseErr();
 
     std::string Name = OpName ? OpName->getAsUnquotedString() : "";
-    IP.addOperand(Val->getValue(), Name, *ImmTy);
+    IP.addOperand(Val->getValue(), insertStrRef(Name), *ImmTy);
     return true;
   }
 
@@ -3211,14 +1673,14 @@ bool CombineRuleBuilder::parseInstructionPatternOperand(
         PatternType::get(RuleDef.getLoc(), Def, "cannot parse operand type");
     if (!Ty)
       return false;
-    IP.addOperand(OpName->getAsUnquotedString(), *Ty);
+    IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), *Ty);
     return true;
   }
 
   // Untyped operand e.g. $x/$z in (G_FNEG $x, $z)
   if (isa<UnsetInit>(OpInit)) {
     assert(OpName && "Unset w/ no OpName?");
-    IP.addOperand(OpName->getAsUnquotedString(), PatternType());
+    IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), PatternType());
     return true;
   }
 
@@ -3286,7 +1748,7 @@ bool CombineRuleBuilder::parseInstructionPatternMIFlags(
 
     // Copy flags from a matched instruction: (MIFlags $mi)
     if (isa<UnsetInit>(Arg)) {
-      FI.addCopyFlag(Op->getArgName(K)->getAsUnquotedString());
+      FI.addCopyFlag(insertStrRef(Op->getArgName(K)->getAsUnquotedString()));
       continue;
     }
   }
@@ -3297,19 +1759,19 @@ bool CombineRuleBuilder::parseInstructionPatternMIFlags(
 std::unique_ptr<PatFrag>
 CombineRuleBuilder::parsePatFragImpl(const Record *Def) const {
   auto StackTrace = PrettyStackTraceParse(*Def);
-  if (!Def->isSubClassOf(PatFragClassName))
+  if (!Def->isSubClassOf(PatFrag::ClassName))
     return nullptr;
 
   const DagInit *Ins = Def->getValueAsDag("InOperands");
   if (Ins->getOperatorAsDef(Def->getLoc())->getName() != "ins") {
-    ::PrintError(Def, "expected 'ins' operator for " + PatFragClassName +
+    ::PrintError(Def, "expected 'ins' operator for " + PatFrag::ClassName +
                           " in operands list");
     return nullptr;
   }
 
   const DagInit *Outs = Def->getValueAsDag("OutOperands");
   if (Outs->getOperatorAsDef(Def->getLoc())->getName() != "outs") {
-    ::PrintError(Def, "expected 'outs' operator for " + PatFragClassName +
+    ::PrintError(Def, "expected 'outs' operator for " + PatFrag::ClassName +
                           " out operands list");
     return nullptr;
   }
@@ -3317,14 +1779,14 @@ CombineRuleBuilder::parsePatFragImpl(const Record *Def) const {
   auto Result = std::make_unique<PatFrag>(*Def);
   if (!parsePatFragParamList(Def->getLoc(), *Outs,
                              [&](StringRef Name, PatFrag::ParamKind Kind) {
-                               Result->addOutParam(Name, Kind);
+                               Result->addOutParam(insertStrRef(Name), Kind);
                                return true;
                              }))
     return nullptr;
 
   if (!parsePatFragParamList(Def->getLoc(), *Ins,
                              [&](StringRef Name, PatFrag::ParamKind Kind) {
-                               Result->addInParam(Name, Kind);
+                               Result->addInParam(insertStrRef(Name), Kind);
                                return true;
                              }))
     return nullptr;
@@ -3405,7 +1867,7 @@ const PatFrag *CombineRuleBuilder::parsePatFrag(const Record *Def) const {
 
   std::unique_ptr<PatFrag> NewPatFrag = parsePatFragImpl(Def);
   if (!NewPatFrag) {
-    ::PrintError(Def, "Could not parse " + PatFragClassName + " '" +
+    ::PrintError(Def, "Could not parse " + PatFrag::ClassName + " '" +
                           Def->getName() + "'");
     // Put a nullptr in the map so we don't attempt parsing this again.
     ParsedPatFrags[Def] = nullptr;
@@ -3593,7 +2055,8 @@ bool CombineRuleBuilder::emitPatFragMatchPattern(
     if (PIdx == (unsigned)-1) {
       // This is a temp of the PatFragPattern, prefix the name to avoid
       // conflicts.
-      return O.withNewName((PFP.getName() + "." + ParamName).str());
+      return O.withNewName(
+          insertStrRef((PFP.getName() + "." + ParamName).str()));
     }
 
     // The operand will be added to PatFragCEs's code expansions using the
@@ -3850,7 +2313,7 @@ bool CombineRuleBuilder::emitInstructionApplyPattern(
       // Always insert the action at the beginning, otherwise we may end up
       // using the temp reg before it's available.
       M.insertAction<MakeTempRegisterAction>(
-          M.actions_begin(), Ty.getLLTCodeGenOrTempType(M), TempRegID);
+          M.actions_begin(), getLLTCodeGenOrTempType(Ty, M), TempRegID);
     }
 
     DstMI.addRenderer<TempRegRenderer>(TempRegID);
@@ -3901,7 +2364,7 @@ bool CombineRuleBuilder::emitCodeGenInstructionApplyImmOperand(
     return true;
   }
 
-  auto ImmTy = Ty.getLLTCodeGenOrTempType(M);
+  auto ImmTy = getLLTCodeGenOrTempType(Ty, M);
 
   if (isGConstant) {
     DstMI.addRenderer<ImmRenderer>(O.getImmValue(), ImmTy);
@@ -4045,7 +2508,7 @@ bool CombineRuleBuilder::emitCodeGenInstructionMatchPattern(
         // and isn't all that useful so it's just rejected in typecheckPatterns
         // at this time.
         assert(Ty.isLLT() && "Only LLTs are supported in match patterns!");
-        OM.addPredicate<LLTOperandMatcher>(Ty.getLLTCodeGen());
+        OM.addPredicate<LLTOperandMatcher>(getLLTCodeGen(Ty));
       }
     }
 

>From 781b8f0d67767f55bd72639486c6ed2821c39933 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Fri, 24 Nov 2023 14:19:30 +0100
Subject: [PATCH 2/2] clang-format

---
 llvm/utils/TableGen/GlobalISel/Patterns.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/utils/TableGen/GlobalISel/Patterns.cpp b/llvm/utils/TableGen/GlobalISel/Patterns.cpp
index d4325771c0a8844..49dc1eca33e4265 100644
--- a/llvm/utils/TableGen/GlobalISel/Patterns.cpp
+++ b/llvm/utils/TableGen/GlobalISel/Patterns.cpp
@@ -641,8 +641,8 @@ bool PatFrag::handleUnboundInParam(StringRef ParamName, StringRef ArgName,
     auto &OT = Alt.OpTable;
     if (!OT.lookup(ParamName).Found) {
       llvm::PrintError(DiagLoc, "operand '" + ArgName + "' (for parameter '" +
-                                ParamName + "' of '" + getName() +
-                                "') cannot be unbound");
+                                    ParamName + "' of '" + getName() +
+                                    "') cannot be unbound");
       PrintNote(
           DiagLoc,
           "one or more alternatives of '" + getName() + "' do not bind '" +
@@ -821,8 +821,8 @@ BuiltinPattern::BuiltinInfo BuiltinPattern::getBuiltinInfo(const Record &Def) {
       return KBI;
   }
 
-  PrintFatalError(Def.getLoc(), "Unimplemented " + ClassName +
-                                    " def '" + Name + "'");
+  PrintFatalError(Def.getLoc(),
+                  "Unimplemented " + ClassName + " def '" + Name + "'");
 }
 
 bool BuiltinPattern::checkSemantics(ArrayRef<SMLoc> Loc) {



More information about the llvm-commits mailing list