[llvm] dd0973b - [TableGen] Split GlobalISelCombinerEmitter into multiple files (#73325)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 28 02:48:29 PST 2023


Author: Pierre van Houtryve
Date: 2023-11-28T11:48:24+01:00
New Revision: dd0973be58b8a8d94d63762941f741f2b93aec28

URL: https://github.com/llvm/llvm-project/commit/dd0973be58b8a8d94d63762941f741f2b93aec28
DIFF: https://github.com/llvm/llvm-project/commit/dd0973be58b8a8d94d63762941f741f2b93aec28.diff

LOG: [TableGen] Split GlobalISelCombinerEmitter into multiple files (#73325)

Split MatchDataInfo, CXXPredicates and the Pattern hierarchy into their
own files.

This should help with maintenance a bit, and make the API easier to
navigate.
I hope this encourages a bit more experimentation with MIR patterns,
e.g. I'd like to try getting them in ISel at some point.

Currently, this is pretty much only moving code around. There is no
significant refactoring in there.
I want to split the Combiner backend even more at some point though,
e.g. by separating the TableGen parsing logic into yet another file so
other backends could very easily parse patterns themselves.

Note: I moved the responsibility of managing string lifetimes into the
backend instead of the Pattern class.
e.g. Before you'd do `P.addOperand(Name)` but now it's
`P.addOperand(insertStrRef(Name))`.
I verified this was done correctly by running the tests with UBSan/ASan.

Added: 
    llvm/utils/TableGen/GlobalISel/CXXPredicates.cpp
    llvm/utils/TableGen/GlobalISel/CXXPredicates.h
    llvm/utils/TableGen/GlobalISel/MatchDataInfo.cpp
    llvm/utils/TableGen/GlobalISel/MatchDataInfo.h
    llvm/utils/TableGen/GlobalISel/Patterns.cpp
    llvm/utils/TableGen/GlobalISel/Patterns.h

Modified: 
    llvm/utils/TableGen/GlobalISel/CMakeLists.txt
    llvm/utils/TableGen/GlobalISel/CombinerUtils.h
    llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/utils/TableGen/GlobalISel/CMakeLists.txt b/llvm/utils/TableGen/GlobalISel/CMakeLists.txt
index a85f1ac6cc9366a..7262c405839934a 100644
--- a/llvm/utils/TableGen/GlobalISel/CMakeLists.txt
+++ b/llvm/utils/TableGen/GlobalISel/CMakeLists.txt
@@ -5,6 +5,9 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_library(LLVMTableGenGlobalISel STATIC DISABLE_LLVM_LINK_LLVM_DYLIB
   CodeExpander.cpp
+  CXXPredicates.cpp
+  MatchDataInfo.cpp
+  Patterns.cpp
 
   DEPENDS
   vt_gen

diff  --git a/llvm/utils/TableGen/GlobalISel/CXXPredicates.cpp b/llvm/utils/TableGen/GlobalISel/CXXPredicates.cpp
new file mode 100644
index 000000000000000..e39293ebfe7aee2
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/CXXPredicates.cpp
@@ -0,0 +1,51 @@
+//===- CXXPredicates.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "CXXPredicates.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace llvm {
+namespace gi {
+
+std::vector<const CXXPredicateCode *>
+CXXPredicateCode::getSorted(const CXXPredicateCodePool &Pool) {
+  std::vector<const CXXPredicateCode *> Out;
+  std::transform(Pool.begin(), Pool.end(), std::back_inserter(Out),
+                 [&](auto &Elt) { return Elt.second.get(); });
+  sort(Out, [](const auto *A, const auto *B) { return A->ID < B->ID; });
+  return Out;
+}
+
+const CXXPredicateCode &CXXPredicateCode::get(CXXPredicateCodePool &Pool,
+                                              std::string Code) {
+  // Check if we already have an identical piece of code, if not, create an
+  // entry in the pool.
+  const auto CodeHash = hash_value(Code);
+  if (auto It = Pool.find(CodeHash); It != Pool.end())
+    return *It->second;
+
+  const auto ID = Pool.size();
+  auto OwnedData = std::unique_ptr<CXXPredicateCode>(
+      new CXXPredicateCode(std::move(Code), ID));
+  const auto &DataRef = *OwnedData;
+  Pool[CodeHash] = std::move(OwnedData);
+  return DataRef;
+}
+
+// TODO: Make BaseEnumName prefix configurable.
+CXXPredicateCode::CXXPredicateCode(std::string Code, unsigned ID)
+    : Code(Code), ID(ID), BaseEnumName("GICombiner" + std::to_string(ID)) {}
+
+CXXPredicateCode::CXXPredicateCodePool CXXPredicateCode::AllCXXMatchCode;
+CXXPredicateCode::CXXPredicateCodePool CXXPredicateCode::AllCXXApplyCode;
+
+} // namespace gi
+} // namespace llvm

diff  --git a/llvm/utils/TableGen/GlobalISel/CXXPredicates.h b/llvm/utils/TableGen/GlobalISel/CXXPredicates.h
new file mode 100644
index 000000000000000..01610a13110dd20
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/CXXPredicates.h
@@ -0,0 +1,86 @@
+//===- CXXPredicates.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Contains utilities related to handling C++ code in MIR patterns for
+///   GlobalISel. C++ predicates need to be expanded, and then stored in a
+///   static pool until they can be emitted.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_MIRPATTERNS_CXXPREDICATES_H
+#define LLVM_UTILS_MIRPATTERNS_CXXPREDICATES_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace gi {
+
+/// Entry into the static pool of all CXX Predicate code. This contains
+/// fully expanded C++ code.
+///
+/// The static pool is hidden inside the object and can be accessed through
+/// getAllMatchCode/getAllApplyCode
+///
+/// Note that CXXPattern trims C++ code, so the Code is already expected to be
+/// free of leading/trailing whitespace.
+class CXXPredicateCode {
+  using CXXPredicateCodePool =
+      DenseMap<hash_code, std::unique_ptr<CXXPredicateCode>>;
+  static CXXPredicateCodePool AllCXXMatchCode;
+  static CXXPredicateCodePool AllCXXApplyCode;
+
+  /// Sorts a `CXXPredicateCodePool` by their IDs and returns it.
+  static std::vector<const CXXPredicateCode *>
+  getSorted(const CXXPredicateCodePool &Pool);
+
+  /// Gets an instance of `CXXPredicateCode` for \p Code, or returns an already
+  /// existing one.
+  static const CXXPredicateCode &get(CXXPredicateCodePool &Pool,
+                                     std::string Code);
+
+  CXXPredicateCode(std::string Code, unsigned ID);
+
+public:
+  static const CXXPredicateCode &getMatchCode(std::string Code) {
+    return get(AllCXXMatchCode, std::move(Code));
+  }
+
+  static const CXXPredicateCode &getApplyCode(std::string Code) {
+    return get(AllCXXApplyCode, std::move(Code));
+  }
+
+  static std::vector<const CXXPredicateCode *> getAllMatchCode() {
+    return getSorted(AllCXXMatchCode);
+  }
+
+  static std::vector<const CXXPredicateCode *> getAllApplyCode() {
+    return getSorted(AllCXXApplyCode);
+  }
+
+  const std::string Code;
+  const unsigned ID;
+  const std::string BaseEnumName;
+
+  bool needsUnreachable() const {
+    return !StringRef(Code).starts_with("return");
+  }
+
+  std::string getEnumNameWithPrefix(StringRef Prefix) const {
+    return Prefix.str() + BaseEnumName;
+  }
+};
+
+} // namespace gi
+} // end namespace llvm
+
+#endif // ifndef LLVM_UTILS_MIRPATTERNS_CXXPREDICATES_H

diff  --git a/llvm/utils/TableGen/GlobalISel/CombinerUtils.h b/llvm/utils/TableGen/GlobalISel/CombinerUtils.h
index 07671efcc57516e..8cb2514a10e8761 100644
--- a/llvm/utils/TableGen/GlobalISel/CombinerUtils.h
+++ b/llvm/utils/TableGen/GlobalISel/CombinerUtils.h
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 //
 /// \file Utility functions used by both Combiner backends.
-/// TODO: Can remove when MatchDAG-based backend is removed.
 //
 //===----------------------------------------------------------------------===//
 

diff  --git a/llvm/utils/TableGen/GlobalISel/MatchDataInfo.cpp b/llvm/utils/TableGen/GlobalISel/MatchDataInfo.cpp
new file mode 100644
index 000000000000000..b5c9e4f8c24852d
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/MatchDataInfo.cpp
@@ -0,0 +1,49 @@
+//===- MatchDataInfo.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "MatchDataInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace gi {
+
+StringMap<std::vector<std::string>> AllMatchDataVars;
+
+StringRef MatchDataInfo::getVariableName() const {
+  assert(hasVariableName());
+  return VarName;
+}
+
+void MatchDataInfo::print(raw_ostream &OS) const {
+  OS << "(MatchDataInfo pattern_symbol:" << PatternSymbol << " type:'" << Type
+     << "' var_name:" << (VarName.empty() ? "<unassigned>" : VarName) << ")";
+}
+
+void MatchDataInfo::dump() const { print(dbgs()); }
+
+void AssignMatchDataVariables(MutableArrayRef<MatchDataInfo> Infos) {
+  static unsigned NextVarID = 0;
+
+  StringMap<unsigned> SeenTypes;
+  for (auto &Info : Infos) {
+    unsigned &NumSeen = SeenTypes[Info.getType()];
+    auto &ExistingVars = AllMatchDataVars[Info.getType()];
+
+    if (NumSeen == ExistingVars.size())
+      ExistingVars.push_back("MDInfo" + std::to_string(NextVarID++));
+
+    Info.setVariableName(ExistingVars[NumSeen++]);
+  }
+}
+
+} // namespace gi
+} // namespace llvm

diff  --git a/llvm/utils/TableGen/GlobalISel/MatchDataInfo.h b/llvm/utils/TableGen/GlobalISel/MatchDataInfo.h
new file mode 100644
index 000000000000000..abe1245bc67d092
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/MatchDataInfo.h
@@ -0,0 +1,90 @@
+//===- MatchDataInfo.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Contains utilities related to handling "match data" for GlobalISel
+///  Combiners. Match data allows for setting some arbitrary data in the "match"
+///  phase and pass it down to the "apply" phase.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_MIRPATTERNS_MATCHDATAINFO_H
+#define LLVM_UTILS_MIRPATTERNS_MATCHDATAINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace gi {
+
+/// Represents MatchData defined by the match stage and required by the apply
+/// stage.
+///
+/// This allows the plumbing of arbitrary data from C++ predicates between the
+/// stages.
+///
+/// When this class is initially created, it only has a pattern symbol and a
+/// type. When all of the MatchDatas declarations of a given pattern have been
+/// parsed, `AssignVariables` must be called to assign storage variable names to
+/// each MatchDataInfo.
+class MatchDataInfo {
+  StringRef PatternSymbol;
+  StringRef Type;
+  std::string VarName;
+
+public:
+  static constexpr StringLiteral StructTypeName = "MatchInfosTy";
+  static constexpr StringLiteral StructName = "MatchInfos";
+
+  MatchDataInfo(StringRef PatternSymbol, StringRef Type)
+      : PatternSymbol(PatternSymbol), Type(Type.trim()) {}
+
+  StringRef getPatternSymbol() const { return PatternSymbol; };
+  StringRef getType() const { return Type; };
+
+  bool hasVariableName() const { return !VarName.empty(); }
+  void setVariableName(StringRef Name) { VarName = Name; }
+  StringRef getVariableName() const;
+
+  std::string getQualifiedVariableName() const {
+    return StructName.str() + "." + getVariableName().str();
+  }
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+/// Pool of type -> variables used to emit MatchData variables declarations.
+///
+/// e.g. if the map contains "int64_t" -> ["MD0", "MD1"], then two variable
+/// declarations must be emitted: `int64_t MD0` and `int64_t MD1`.
+///
+/// This has a static lifetime and will outlive all the `MatchDataInfo` objects
+/// by design. It needs a static lifetime so the backends can emit variable
+/// declarations after processing all the inputs.
+extern StringMap<std::vector<std::string>> AllMatchDataVars;
+
+/// Assign variable names to all MatchDatas used by a pattern. This must be
+/// called after all MatchData decls have been parsed for a given processing
+/// unit (e.g. a combine rule)
+///
+/// Requires an array of MatchDataInfo so we can handle cases where a pattern
+/// uses multiple instances of the same MatchData type.
+///
+/// Writes to \ref AllMatchDataVars.
+void AssignMatchDataVariables(MutableArrayRef<MatchDataInfo> Infos);
+
+} // namespace gi
+} // end namespace llvm
+
+#endif // ifndef LLVM_UTILS_MIRPATTERNS_MATCHDATAINFO_H

diff  --git a/llvm/utils/TableGen/GlobalISel/Patterns.cpp b/llvm/utils/TableGen/GlobalISel/Patterns.cpp
new file mode 100644
index 000000000000000..0a6d05e06dca128
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/Patterns.cpp
@@ -0,0 +1,842 @@
+//===- Patterns.cpp --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Patterns.h"
+#include "../CodeGenInstruction.h"
+#include "CXXPredicates.h"
+#include "CodeExpander.h"
+#include "CodeExpansions.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+
+namespace llvm {
+namespace gi {
+
+//===- PatternType --------------------------------------------------------===//
+
+std::optional<PatternType> PatternType::get(ArrayRef<SMLoc> DiagLoc,
+                                            const Record *R, Twine DiagCtx) {
+  assert(R);
+  if (R->isSubClassOf("ValueType")) {
+    PatternType PT(PT_ValueType);
+    PT.Data.Def = R;
+    return PT;
+  }
+
+  if (R->isSubClassOf(TypeOfClassName)) {
+    auto RawOpName = R->getValueAsString("OpName");
+    if (!RawOpName.starts_with("$")) {
+      PrintError(DiagLoc, DiagCtx + ": invalid operand name format '" +
+                              RawOpName + "' in " + TypeOfClassName +
+                              ": expected '$' followed by an operand name");
+      return std::nullopt;
+    }
+
+    PatternType PT(PT_TypeOf);
+    PT.Data.Str = RawOpName.drop_front(1);
+    return PT;
+  }
+
+  PrintError(DiagLoc, DiagCtx + ": unknown type '" + R->getName() + "'");
+  return std::nullopt;
+}
+
+PatternType PatternType::getTypeOf(StringRef OpName) {
+  PatternType PT(PT_TypeOf);
+  PT.Data.Str = OpName;
+  return PT;
+}
+
+StringRef PatternType::getTypeOfOpName() const {
+  assert(isTypeOf());
+  return Data.Str;
+}
+
+const Record *PatternType::getLLTRecord() const {
+  assert(isLLT());
+  return Data.Def;
+}
+
+bool PatternType::operator==(const PatternType &Other) const {
+  if (Kind != Other.Kind)
+    return false;
+
+  switch (Kind) {
+  case PT_None:
+    return true;
+  case PT_ValueType:
+    return Data.Def == Other.Data.Def;
+  case PT_TypeOf:
+    return Data.Str == Other.Data.Str;
+  }
+
+  llvm_unreachable("Unknown Type Kind");
+}
+
+std::string PatternType::str() const {
+  switch (Kind) {
+  case PT_None:
+    return "";
+  case PT_ValueType:
+    return Data.Def->getName().str();
+  case PT_TypeOf:
+    return (TypeOfClassName + "<$" + getTypeOfOpName() + ">").str();
+  }
+
+  llvm_unreachable("Unknown type!");
+}
+
+//===- Pattern ------------------------------------------------------------===//
+
+void Pattern::dump() const { return print(dbgs()); }
+
+const char *Pattern::getKindName() const {
+  switch (Kind) {
+  case K_AnyOpcode:
+    return "AnyOpcodePattern";
+  case K_CXX:
+    return "CXXPattern";
+  case K_CodeGenInstruction:
+    return "CodeGenInstructionPattern";
+  case K_PatFrag:
+    return "PatFragPattern";
+  case K_Builtin:
+    return "BuiltinPattern";
+  }
+
+  llvm_unreachable("unknown pattern kind!");
+}
+
+void Pattern::printImpl(raw_ostream &OS, bool PrintName,
+                        function_ref<void()> ContentPrinter) const {
+  OS << "(" << getKindName() << " ";
+  if (PrintName)
+    OS << "name:" << getName() << " ";
+  ContentPrinter();
+  OS << ")";
+}
+
+//===- AnyOpcodePattern ---------------------------------------------------===//
+
+void AnyOpcodePattern::print(raw_ostream &OS, bool PrintName) const {
+  printImpl(OS, PrintName, [&OS, this]() {
+    OS << "["
+       << join(map_range(Insts,
+                         [](const auto *I) { return I->TheDef->getName(); }),
+               ", ")
+       << "]";
+  });
+}
+
+//===- CXXPattern ---------------------------------------------------------===//
+
+CXXPattern::CXXPattern(const StringInit &Code, StringRef Name)
+    : CXXPattern(Code.getAsUnquotedString(), Name) {}
+
+const CXXPredicateCode &
+CXXPattern::expandCode(const CodeExpansions &CE, ArrayRef<SMLoc> Locs,
+                       function_ref<void(raw_ostream &)> AddComment) const {
+  std::string Result;
+  raw_string_ostream OS(Result);
+
+  if (AddComment)
+    AddComment(OS);
+
+  CodeExpander Expander(RawCode, CE, Locs, /*ShowExpansions*/ false);
+  Expander.emit(OS);
+  if (IsApply)
+    return CXXPredicateCode::getApplyCode(std::move(Result));
+  return CXXPredicateCode::getMatchCode(std::move(Result));
+}
+
+void CXXPattern::print(raw_ostream &OS, bool PrintName) const {
+  printImpl(OS, PrintName, [&OS, this] {
+    OS << (IsApply ? "apply" : "match") << " code:\"";
+    printEscapedString(getRawCode(), OS);
+    OS << "\"";
+  });
+}
+
+//===- InstructionOperand -------------------------------------------------===//
+
+std::string InstructionOperand::describe() const {
+  if (!hasImmValue())
+    return "MachineOperand $" + getOperandName().str() + "";
+  std::string Str = "imm " + std::to_string(getImmValue());
+  if (isNamedImmediate())
+    Str += ":$" + getOperandName().str() + "";
+  return Str;
+}
+
+void InstructionOperand::print(raw_ostream &OS) const {
+  if (isDef())
+    OS << "<def>";
+
+  bool NeedsColon = true;
+  if (Type) {
+    if (hasImmValue())
+      OS << "(" << Type.str() << " " << getImmValue() << ")";
+    else
+      OS << Type.str();
+  } else if (hasImmValue())
+    OS << getImmValue();
+  else
+    NeedsColon = false;
+
+  if (isNamedOperand())
+    OS << (NeedsColon ? ":" : "") << "$" << getOperandName();
+}
+
+void InstructionOperand::dump() const { return print(dbgs()); }
+
+//===- InstructionPattern -------------------------------------------------===//
+
+bool InstructionPattern::diagnoseAllSpecialTypes(ArrayRef<SMLoc> Loc,
+                                                 Twine Msg) const {
+  bool HasDiag = false;
+  for (const auto &[Idx, Op] : enumerate(operands())) {
+    if (Op.getType().isSpecial()) {
+      PrintError(Loc, Msg);
+      PrintNote(Loc, "operand " + Twine(Idx) + " of '" + getName() +
+                         "' has type '" + Op.getType().str() + "'");
+      HasDiag = true;
+    }
+  }
+  return HasDiag;
+}
+
+void InstructionPattern::reportUnreachable(ArrayRef<SMLoc> Locs) const {
+  PrintError(Locs, "pattern '" + getName() + "' ('" + getInstName() +
+                       "') is unreachable from the pattern root!");
+}
+
+bool InstructionPattern::checkSemantics(ArrayRef<SMLoc> Loc) {
+  unsigned NumExpectedOperands = getNumInstOperands();
+
+  if (isVariadic()) {
+    if (Operands.size() < NumExpectedOperands) {
+      PrintError(Loc, +"'" + getInstName() + "' expected at least " +
+                          Twine(NumExpectedOperands) + " operands, got " +
+                          Twine(Operands.size()));
+      return false;
+    }
+  } else if (NumExpectedOperands != Operands.size()) {
+    PrintError(Loc, +"'" + getInstName() + "' expected " +
+                        Twine(NumExpectedOperands) + " operands, got " +
+                        Twine(Operands.size()));
+    return false;
+  }
+
+  unsigned OpIdx = 0;
+  unsigned NumDefs = getNumInstDefs();
+  for (auto &Op : Operands)
+    Op.setIsDef(OpIdx++ < NumDefs);
+
+  return true;
+}
+
+void InstructionPattern::print(raw_ostream &OS, bool PrintName) const {
+  printImpl(OS, PrintName, [&OS, this] {
+    OS << getInstName() << " operands:[";
+    StringRef Sep;
+    for (const auto &Op : Operands) {
+      OS << Sep;
+      Op.print(OS);
+      Sep = ", ";
+    }
+    OS << "]";
+
+    printExtras(OS);
+  });
+}
+
+//===- OperandTable -------------------------------------------------------===//
+
+bool OperandTable::addPattern(InstructionPattern *P,
+                              function_ref<void(StringRef)> DiagnoseRedef) {
+  for (const auto &Op : P->named_operands()) {
+    StringRef OpName = Op.getOperandName();
+
+    // We always create an entry in the OperandTable, even for uses.
+    // Uses of operands that don't have a def (= live-ins) will remain with a
+    // nullptr as the Def.
+    //
+    // This allows us tell whether an operand exists in a pattern or not. If
+    // there is no entry for it, it doesn't exist, if there is an entry, it's
+    // used/def'd at least once.
+    auto &Def = Table[OpName];
+
+    if (!Op.isDef())
+      continue;
+
+    if (Def) {
+      DiagnoseRedef(OpName);
+      return false;
+    }
+
+    Def = P;
+  }
+
+  return true;
+}
+
+void OperandTable::print(raw_ostream &OS, StringRef Name,
+                         StringRef Indent) const {
+  OS << Indent << "(OperandTable ";
+  if (!Name.empty())
+    OS << Name << " ";
+  if (Table.empty()) {
+    OS << "<empty>)\n";
+    return;
+  }
+
+  SmallVector<StringRef, 0> Keys(Table.keys());
+  sort(Keys);
+
+  OS << '\n';
+  for (const auto &Key : Keys) {
+    const auto *Def = Table.at(Key);
+    OS << Indent << "  " << Key << " -> "
+       << (Def ? Def->getName() : "<live-in>") << '\n';
+  }
+  OS << Indent << ")\n";
+}
+
+void OperandTable::dump() const { print(dbgs()); }
+
+//===- MIFlagsInfo --------------------------------------------------------===//
+
+void MIFlagsInfo::addSetFlag(const Record *R) {
+  SetF.insert(R->getValueAsString("EnumName"));
+}
+
+void MIFlagsInfo::addUnsetFlag(const Record *R) {
+  UnsetF.insert(R->getValueAsString("EnumName"));
+}
+
+void MIFlagsInfo::addCopyFlag(StringRef InstName) { CopyF.insert(InstName); }
+
+//===- CodeGenInstructionPattern ------------------------------------------===//
+
+bool CodeGenInstructionPattern::is(StringRef OpcodeName) const {
+  return I.TheDef->getName() == OpcodeName;
+}
+
+bool CodeGenInstructionPattern::isVariadic() const {
+  return I.Operands.isVariadic;
+}
+
+bool CodeGenInstructionPattern::hasVariadicDefs() const {
+  // Note: we cannot use variadicOpsAreDefs, it's not set for
+  // GenericInstructions.
+  if (!isVariadic())
+    return false;
+
+  if (I.variadicOpsAreDefs)
+    return true;
+
+  DagInit *OutOps = I.TheDef->getValueAsDag("OutOperandList");
+  if (OutOps->arg_empty())
+    return false;
+
+  auto *LastArgTy = dyn_cast<DefInit>(OutOps->getArg(OutOps->arg_size() - 1));
+  return LastArgTy && LastArgTy->getDef()->getName() == "variable_ops";
+}
+
+unsigned CodeGenInstructionPattern::getNumInstDefs() const {
+  if (!isVariadic() || !hasVariadicDefs())
+    return I.Operands.NumDefs;
+  unsigned NumOuts = I.Operands.size() - I.Operands.NumDefs;
+  assert(Operands.size() > NumOuts);
+  return std::max<unsigned>(I.Operands.NumDefs, Operands.size() - NumOuts);
+}
+
+unsigned CodeGenInstructionPattern::getNumInstOperands() const {
+  unsigned NumCGIOps = I.Operands.size();
+  return isVariadic() ? std::max<unsigned>(NumCGIOps, Operands.size())
+                      : NumCGIOps;
+}
+
+MIFlagsInfo &CodeGenInstructionPattern::getOrCreateMIFlagsInfo() {
+  if (!FI)
+    FI = std::make_unique<MIFlagsInfo>();
+  return *FI;
+}
+
+StringRef CodeGenInstructionPattern::getInstName() const {
+  return I.TheDef->getName();
+}
+
+void CodeGenInstructionPattern::printExtras(raw_ostream &OS) const {
+  if (!FI)
+    return;
+
+  OS << " (MIFlags";
+  if (!FI->set_flags().empty())
+    OS << " (set " << join(FI->set_flags(), ", ") << ")";
+  if (!FI->unset_flags().empty())
+    OS << " (unset " << join(FI->unset_flags(), ", ") << ")";
+  if (!FI->copy_flags().empty())
+    OS << " (copy " << join(FI->copy_flags(), ", ") << ")";
+  OS << ')';
+}
+
+//===- OperandTypeChecker -------------------------------------------------===//
+
+bool OperandTypeChecker::check(
+    InstructionPattern &P,
+    std::function<bool(const PatternType &)> VerifyTypeOfOperand) {
+  Pats.push_back(&P);
+
+  for (auto &Op : P.operands()) {
+    const auto Ty = Op.getType();
+    if (!Ty)
+      continue;
+
+    if (Ty.isTypeOf() && !VerifyTypeOfOperand(Ty))
+      return false;
+
+    if (!Op.isNamedOperand())
+      continue;
+
+    StringRef OpName = Op.getOperandName();
+    auto &Info = Types[OpName];
+    if (!Info.Type) {
+      Info.Type = Ty;
+      Info.PrintTypeSrcNote = [this, OpName, Ty, &P]() {
+        PrintSeenWithTypeIn(P, OpName, Ty);
+      };
+      continue;
+    }
+
+    if (Info.Type != Ty) {
+      PrintError(DiagLoc, "conflicting types for operand '" +
+                              Op.getOperandName() + "': '" + Info.Type.str() +
+                              "' vs '" + Ty.str() + "'");
+      PrintSeenWithTypeIn(P, OpName, Ty);
+      Info.PrintTypeSrcNote();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void OperandTypeChecker::propagateTypes() {
+  for (auto *Pat : Pats) {
+    for (auto &Op : Pat->named_operands()) {
+      if (auto &Info = Types[Op.getOperandName()]; Info.Type)
+        Op.setType(Info.Type);
+    }
+  }
+}
+
+void OperandTypeChecker::PrintSeenWithTypeIn(InstructionPattern &P,
+                                             StringRef OpName,
+                                             PatternType Ty) const {
+  PrintNote(DiagLoc, "'" + OpName + "' seen with type '" + Ty.str() + "' in '" +
+                         P.getName() + "'");
+}
+
+StringRef PatFrag::getParamKindStr(ParamKind OK) {
+  switch (OK) {
+  case PK_Root:
+    return "root";
+  case PK_MachineOperand:
+    return "machine_operand";
+  case PK_Imm:
+    return "imm";
+  }
+
+  llvm_unreachable("Unknown operand kind!");
+}
+
+//===- PatFrag -----------------------------------------------------------===//
+
+PatFrag::PatFrag(const Record &Def) : Def(Def) {
+  assert(Def.isSubClassOf(ClassName));
+}
+
+StringRef PatFrag::getName() const { return Def.getName(); }
+
+ArrayRef<SMLoc> PatFrag::getLoc() const { return Def.getLoc(); }
+
+void PatFrag::addInParam(StringRef Name, ParamKind Kind) {
+  Params.emplace_back(Param{Name, Kind});
+}
+
+iterator_range<PatFrag::ParamIt> PatFrag::in_params() const {
+  return {Params.begin() + NumOutParams, Params.end()};
+}
+
+void PatFrag::addOutParam(StringRef Name, ParamKind Kind) {
+  assert(NumOutParams == Params.size() &&
+         "Adding out-param after an in-param!");
+  Params.emplace_back(Param{Name, Kind});
+  ++NumOutParams;
+}
+
+iterator_range<PatFrag::ParamIt> PatFrag::out_params() const {
+  return {Params.begin(), Params.begin() + NumOutParams};
+}
+
+unsigned PatFrag::num_roots() const {
+  return count_if(out_params(),
+                  [&](const auto &P) { return P.Kind == PK_Root; });
+}
+
+unsigned PatFrag::getParamIdx(StringRef Name) const {
+  for (const auto &[Idx, Op] : enumerate(Params)) {
+    if (Op.Name == Name)
+      return Idx;
+  }
+
+  return -1;
+}
+
+bool PatFrag::checkSemantics() {
+  for (const auto &Alt : Alts) {
+    for (const auto &Pat : Alt.Pats) {
+      switch (Pat->getKind()) {
+      case Pattern::K_AnyOpcode:
+        PrintError("wip_match_opcode cannot be used in " + ClassName);
+        return false;
+      case Pattern::K_Builtin:
+        PrintError("Builtin instructions cannot be used in " + ClassName);
+        return false;
+      case Pattern::K_CXX:
+        continue;
+      case Pattern::K_CodeGenInstruction:
+        if (cast<CodeGenInstructionPattern>(Pat.get())->diagnoseAllSpecialTypes(
+                Def.getLoc(), PatternType::SpecialTyClassName +
+                                  " is not supported in " + ClassName))
+          return false;
+        continue;
+      case Pattern::K_PatFrag:
+        // TODO: It's just that the emitter doesn't handle it but technically
+        // there is no reason why we can't. We just have to be careful with
+        // operand mappings, it could get complex.
+        PrintError("nested " + ClassName + " are not supported");
+        return false;
+      }
+    }
+  }
+
+  StringSet<> SeenOps;
+  for (const auto &Op : in_params()) {
+    if (SeenOps.count(Op.Name)) {
+      PrintError("duplicate parameter '" + Op.Name + "'");
+      return false;
+    }
+
+    // Check this operand is NOT defined in any alternative's patterns.
+    for (const auto &Alt : Alts) {
+      if (Alt.OpTable.lookup(Op.Name).Def) {
+        PrintError("input parameter '" + Op.Name + "' cannot be redefined!");
+        return false;
+      }
+    }
+
+    if (Op.Kind == PK_Root) {
+      PrintError("input parameterr '" + Op.Name + "' cannot be a root!");
+      return false;
+    }
+
+    SeenOps.insert(Op.Name);
+  }
+
+  for (const auto &Op : out_params()) {
+    if (Op.Kind != PK_Root && Op.Kind != PK_MachineOperand) {
+      PrintError("output parameter '" + Op.Name +
+                 "' must be 'root' or 'gi_mo'");
+      return false;
+    }
+
+    if (SeenOps.count(Op.Name)) {
+      PrintError("duplicate parameter '" + Op.Name + "'");
+      return false;
+    }
+
+    // Check this operand is defined in all alternative's patterns.
+    for (const auto &Alt : Alts) {
+      const auto *OpDef = Alt.OpTable.getDef(Op.Name);
+      if (!OpDef) {
+        PrintError("output parameter '" + Op.Name +
+                   "' must be defined by all alternative patterns in '" +
+                   Def.getName() + "'");
+        return false;
+      }
+
+      if (Op.Kind == PK_Root && OpDef->getNumInstDefs() != 1) {
+        // The instruction that defines the root must have a single def.
+        // Otherwise we'd need to support multiple roots and it gets messy.
+        //
+        // e.g. this is not supported:
+        //   (pattern (G_UNMERGE_VALUES $x, $root, $vec))
+        PrintError("all instructions that define root '" + Op.Name + "' in '" +
+                   Def.getName() + "' can only have a single output operand");
+        return false;
+      }
+    }
+
+    SeenOps.insert(Op.Name);
+  }
+
+  if (num_out_params() != 0 && num_roots() == 0) {
+    PrintError(ClassName + " must have one root in its 'out' operands");
+    return false;
+  }
+
+  if (num_roots() > 1) {
+    PrintError(ClassName + " can only have one root");
+    return false;
+  }
+
+  // TODO: find unused params
+
+  const auto CheckTypeOf = [&](const PatternType &) -> bool {
+    llvm_unreachable("GITypeOf should have been rejected earlier!");
+  };
+
+  // Now, typecheck all alternatives.
+  for (auto &Alt : Alts) {
+    OperandTypeChecker OTC(Def.getLoc());
+    for (auto &Pat : Alt.Pats) {
+      if (auto *IP = dyn_cast<InstructionPattern>(Pat.get())) {
+        if (!OTC.check(*IP, CheckTypeOf))
+          return false;
+      }
+    }
+    OTC.propagateTypes();
+  }
+
+  return true;
+}
+
+bool PatFrag::handleUnboundInParam(StringRef ParamName, StringRef ArgName,
+                                   ArrayRef<SMLoc> DiagLoc) const {
+  // The parameter must be a live-in of all alternatives for this to work.
+  // Otherwise, we risk having unbound parameters being used (= crashes).
+  //
+  // Examples:
+  //
+  // in (ins $y), (patterns (G_FNEG $dst, $y), "return matchFnegOp(${y})")
+  //    even if $y is unbound, we'll lazily bind it when emitting the G_FNEG.
+  //
+  // in (ins $y), (patterns "return matchFnegOp(${y})")
+  //    if $y is unbound when this fragment is emitted, C++ code expansion will
+  //    fail.
+  for (const auto &Alt : Alts) {
+    auto &OT = Alt.OpTable;
+    if (!OT.lookup(ParamName).Found) {
+      llvm::PrintError(DiagLoc, "operand '" + ArgName + "' (for parameter '" +
+                                    ParamName + "' of '" + getName() +
+                                    "') cannot be unbound");
+      PrintNote(
+          DiagLoc,
+          "one or more alternatives of '" + getName() + "' do not bind '" +
+              ParamName +
+              "' to an instruction operand; either use a bound operand or "
+              "ensure '" +
+              Def.getName() + "' binds '" + ParamName +
+              "' in all alternatives");
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool PatFrag::buildOperandsTables() {
+  // enumerate(...) doesn't seem to allow lvalues so we need to count the old
+  // way.
+  unsigned Idx = 0;
+
+  const auto DiagnoseRedef = [this, &Idx](StringRef OpName) {
+    PrintError("Operand '" + OpName +
+               "' is defined multiple times in patterns of alternative #" +
+               std::to_string(Idx));
+  };
+
+  for (auto &Alt : Alts) {
+    for (auto &Pat : Alt.Pats) {
+      auto *IP = dyn_cast<InstructionPattern>(Pat.get());
+      if (!IP)
+        continue;
+
+      if (!Alt.OpTable.addPattern(IP, DiagnoseRedef))
+        return false;
+    }
+
+    ++Idx;
+  }
+
+  return true;
+}
+
+void PatFrag::print(raw_ostream &OS, StringRef Indent) const {
+  OS << Indent << "(PatFrag name:" << getName() << '\n';
+  if (!in_params().empty()) {
+    OS << Indent << "  (ins ";
+    printParamsList(OS, in_params());
+    OS << ")\n";
+  }
+
+  if (!out_params().empty()) {
+    OS << Indent << "  (outs ";
+    printParamsList(OS, out_params());
+    OS << ")\n";
+  }
+
+  // TODO: Dump OperandTable as well.
+  OS << Indent << "  (alternatives [\n";
+  for (const auto &Alt : Alts) {
+    OS << Indent << "    [\n";
+    for (const auto &Pat : Alt.Pats) {
+      OS << Indent << "      ";
+      Pat->print(OS, /*PrintName=*/true);
+      OS << ",\n";
+    }
+    OS << Indent << "    ],\n";
+  }
+  OS << Indent << "  ])\n";
+
+  OS << Indent << ')';
+}
+
+void PatFrag::dump() const { print(dbgs()); }
+
+void PatFrag::printParamsList(raw_ostream &OS, iterator_range<ParamIt> Params) {
+  OS << '['
+     << join(map_range(Params,
+                       [](auto &O) {
+                         return (O.Name + ":" + getParamKindStr(O.Kind)).str();
+                       }),
+             ", ")
+     << ']';
+}
+
+void PatFrag::PrintError(Twine Msg) const { llvm::PrintError(&Def, Msg); }
+
+ArrayRef<InstructionOperand> PatFragPattern::getApplyDefsNeeded() const {
+  assert(PF.num_roots() == 1);
+  // Only roots need to be redef.
+  for (auto [Idx, Param] : enumerate(PF.out_params())) {
+    if (Param.Kind == PatFrag::PK_Root)
+      return getOperand(Idx);
+  }
+  llvm_unreachable("root not found!");
+}
+
+//===- PatFragPattern -----------------------------------------------------===//
+
+bool PatFragPattern::checkSemantics(ArrayRef<SMLoc> DiagLoc) {
+  if (!InstructionPattern::checkSemantics(DiagLoc))
+    return false;
+
+  for (const auto &[Idx, Op] : enumerate(Operands)) {
+    switch (PF.getParam(Idx).Kind) {
+    case PatFrag::PK_Imm:
+      if (!Op.hasImmValue()) {
+        PrintError(DiagLoc, "expected operand " + std::to_string(Idx) +
+                                " of '" + getInstName() +
+                                "' to be an immediate; got " + Op.describe());
+        return false;
+      }
+      if (Op.isNamedImmediate()) {
+        PrintError(DiagLoc, "operand " + std::to_string(Idx) + " of '" +
+                                getInstName() +
+                                "' cannot be a named immediate");
+        return false;
+      }
+      break;
+    case PatFrag::PK_Root:
+    case PatFrag::PK_MachineOperand:
+      if (!Op.isNamedOperand() || Op.isNamedImmediate()) {
+        PrintError(DiagLoc, "expected operand " + std::to_string(Idx) +
+                                " of '" + getInstName() +
+                                "' to be a MachineOperand; got " +
+                                Op.describe());
+        return false;
+      }
+      break;
+    }
+  }
+
+  return true;
+}
+
+bool PatFragPattern::mapInputCodeExpansions(const CodeExpansions &ParentCEs,
+                                            CodeExpansions &PatFragCEs,
+                                            ArrayRef<SMLoc> DiagLoc) const {
+  for (const auto &[Idx, Op] : enumerate(operands())) {
+    StringRef ParamName = PF.getParam(Idx).Name;
+
+    // Operands to a PFP can only be named, or be an immediate, but not a named
+    // immediate.
+    assert(!Op.isNamedImmediate());
+
+    if (Op.isNamedOperand()) {
+      StringRef ArgName = Op.getOperandName();
+      // Map it only if it's been defined.
+      auto It = ParentCEs.find(ArgName);
+      if (It == ParentCEs.end()) {
+        if (!PF.handleUnboundInParam(ParamName, ArgName, DiagLoc))
+          return false;
+      } else
+        PatFragCEs.declare(ParamName, It->second);
+      continue;
+    }
+
+    if (Op.hasImmValue()) {
+      PatFragCEs.declare(ParamName, std::to_string(Op.getImmValue()));
+      continue;
+    }
+
+    llvm_unreachable("Unknown Operand Type!");
+  }
+
+  return true;
+}
+
+//===- BuiltinPattern -----------------------------------------------------===//
+
+BuiltinPattern::BuiltinInfo BuiltinPattern::getBuiltinInfo(const Record &Def) {
+  assert(Def.isSubClassOf(ClassName));
+
+  StringRef Name = Def.getName();
+  for (const auto &KBI : KnownBuiltins) {
+    if (KBI.DefName == Name)
+      return KBI;
+  }
+
+  PrintFatalError(Def.getLoc(),
+                  "Unimplemented " + ClassName + " def '" + Name + "'");
+}
+
+bool BuiltinPattern::checkSemantics(ArrayRef<SMLoc> Loc) {
+  if (!InstructionPattern::checkSemantics(Loc))
+    return false;
+
+  // For now all builtins just take names, no immediates.
+  for (const auto &[Idx, Op] : enumerate(operands())) {
+    if (!Op.isNamedOperand() || Op.isNamedImmediate()) {
+      PrintError(Loc, "expected operand " + std::to_string(Idx) + " of '" +
+                          getInstName() + "' to be a name");
+      return false;
+    }
+  }
+
+  return true;
+}
+
+} // namespace gi
+} // namespace llvm

diff  --git a/llvm/utils/TableGen/GlobalISel/Patterns.h b/llvm/utils/TableGen/GlobalISel/Patterns.h
new file mode 100644
index 000000000000000..b3160552a21fef3
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/Patterns.h
@@ -0,0 +1,690 @@
+//===- Patterns.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Contains the Pattern hierarchy alongside helper classes such as
+/// PatFrag, MIFlagsInfo, PatternType, etc.
+///
+/// These classes are used by the GlobalISel Combiner backend to help parse,
+/// process and emit MIR patterns.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_GLOBALISEL_PATTERNS_H
+#define LLVM_UTILS_GLOBALISEL_PATTERNS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include <memory>
+#include <optional>
+#include <string>
+
+namespace llvm {
+
+class Record;
+class SMLoc;
+class StringInit;
+class CodeExpansions;
+class CodeGenInstruction;
+
+namespace gi {
+
+class CXXPredicateCode;
+class LLTCodeGen;
+class LLTCodeGenOrTempType;
+class RuleMatcher;
+
+//===- PatternType --------------------------------------------------------===//
+
+/// Represent the type of a Pattern Operand.
+///
+/// Types have two form:
+///   - LLTs, which are straightforward.
+///   - Special types, e.g. GITypeOf
+class PatternType {
+public:
+  static constexpr StringLiteral SpecialTyClassName = "GISpecialType";
+  static constexpr StringLiteral TypeOfClassName = "GITypeOf";
+
+  enum PTKind : uint8_t {
+    PT_None,
+
+    PT_ValueType,
+    PT_TypeOf,
+  };
+
+  PatternType() : Kind(PT_None), Data() {}
+
+  static std::optional<PatternType> get(ArrayRef<SMLoc> DiagLoc,
+                                        const Record *R, Twine DiagCtx);
+  static PatternType getTypeOf(StringRef OpName);
+
+  bool isNone() const { return Kind == PT_None; }
+  bool isLLT() const { return Kind == PT_ValueType; }
+  bool isSpecial() const { return isTypeOf(); }
+  bool isTypeOf() const { return Kind == PT_TypeOf; }
+
+  StringRef getTypeOfOpName() const;
+  const Record *getLLTRecord() const;
+
+  explicit operator bool() const { return !isNone(); }
+
+  bool operator==(const PatternType &Other) const;
+  bool operator!=(const PatternType &Other) const { return !operator==(Other); }
+
+  std::string str() const;
+
+private:
+  PatternType(PTKind Kind) : Kind(Kind), Data() {}
+
+  PTKind Kind;
+  union DataT {
+    DataT() : Str() {}
+
+    /// PT_ValueType -> ValueType Def.
+    const Record *Def;
+
+    /// PT_TypeOf -> Operand name (without the '$')
+    StringRef Str;
+  } Data;
+};
+
+//===- Pattern Base Class -------------------------------------------------===//
+
+/// Base class for all patterns that can be written in an `apply`, `match` or
+/// `pattern` DAG operator.
+///
+/// For example:
+///
+///     (apply (G_ZEXT $x, $y), (G_ZEXT $y, $z), "return isFoo(${z})")
+///
+/// Creates 3 Pattern objects:
+///   - Two CodeGenInstruction Patterns
+///   - A CXXPattern
+class Pattern {
+public:
+  enum {
+    K_AnyOpcode,
+    K_CXX,
+
+    K_CodeGenInstruction,
+    K_PatFrag,
+    K_Builtin,
+  };
+
+  virtual ~Pattern() = default;
+
+  unsigned getKind() const { return Kind; }
+  const char *getKindName() const;
+
+  bool hasName() const { return !Name.empty(); }
+  StringRef getName() const { return Name; }
+
+  virtual void print(raw_ostream &OS, bool PrintName = true) const = 0;
+  void dump() const;
+
+protected:
+  Pattern(unsigned Kind, StringRef Name) : Kind(Kind), Name(Name) {
+    assert(!Name.empty() && "unnamed pattern!");
+  }
+
+  void printImpl(raw_ostream &OS, bool PrintName,
+                 function_ref<void()> ContentPrinter) const;
+
+private:
+  unsigned Kind;
+  StringRef Name;
+};
+
+//===- AnyOpcodePattern ---------------------------------------------------===//
+
+/// `wip_match_opcode` patterns.
+/// This matches one or more opcodes, and does not check any operands
+/// whatsoever.
+///
+/// TODO: Long-term, this needs to be removed. It's a hack around MIR
+///       pattern matching limitations.
+class AnyOpcodePattern : public Pattern {
+public:
+  AnyOpcodePattern(StringRef Name) : Pattern(K_AnyOpcode, Name) {}
+
+  static bool classof(const Pattern *P) { return P->getKind() == K_AnyOpcode; }
+
+  void addOpcode(const CodeGenInstruction *I) { Insts.push_back(I); }
+  const auto &insts() const { return Insts; }
+
+  void print(raw_ostream &OS, bool PrintName = true) const override;
+
+private:
+  SmallVector<const CodeGenInstruction *, 4> Insts;
+};
+
+//===- CXXPattern ---------------------------------------------------------===//
+
+/// Represents raw C++ code which may need some expansions.
+///
+///   e.g. [{ return isFooBux(${src}.getReg()); }]
+///
+/// For the expanded code, \see CXXPredicateCode. CXXPredicateCode objects are
+/// created through `expandCode`.
+///
+/// \see CodeExpander and \see CodeExpansions for more information on code
+/// expansions.
+///
+/// This object has two purposes:
+///   - Represent C++ code as a pattern entry.
+///   - Be a factory for expanded C++ code.
+///     - It's immutable and only holds the raw code so we can expand the same
+///       CXX pattern multiple times if we need to.
+///
+/// Note that the code is always trimmed in the constructor, so leading and
+/// trailing whitespaces are removed. This removes bloat in the output, avoids
+/// formatting issues, but also allows us to check things like
+/// `.startswith("return")` trivially without worrying about spaces.
+class CXXPattern : public Pattern {
+public:
+  CXXPattern(const StringInit &Code, StringRef Name);
+
+  CXXPattern(StringRef Code, StringRef Name)
+      : Pattern(K_CXX, Name), RawCode(Code.trim().str()) {}
+
+  static bool classof(const Pattern *P) { return P->getKind() == K_CXX; }
+
+  void setIsApply(bool Value = true) { IsApply = Value; }
+  StringRef getRawCode() const { return RawCode; }
+
+  /// Expands raw code, replacing things such as `${foo}` with their
+  /// substitution in \p CE.
+  ///
+  /// \param CE     Map of Code Expansions
+  /// \param Locs   SMLocs for the Code Expander, in case it needs to emit
+  ///               diagnostics.
+  /// \param AddComment Optionally called to emit a comment before the expanded
+  ///                   code.
+  ///
+  /// \return A CXXPredicateCode object that contains the expanded code. Note
+  /// that this may or may not insert a new object. All CXXPredicateCode objects
+  /// are held in a set to avoid emitting duplicate C++ code.
+  const CXXPredicateCode &
+  expandCode(const CodeExpansions &CE, ArrayRef<SMLoc> Locs,
+             function_ref<void(raw_ostream &)> AddComment = {}) const;
+
+  void print(raw_ostream &OS, bool PrintName = true) const override;
+
+private:
+  bool IsApply = false;
+  std::string RawCode;
+};
+
+//===- InstructionPattern ---------------------------------------------===//
+
+/// An operand for an InstructionPattern.
+///
+/// Operands are composed of three elements:
+///   - (Optional) Value
+///   - (Optional) Name
+///   - (Optional) Type
+///
+/// Some examples:
+///   (i32 0):$x -> V=int(0), Name='x', Type=i32
+///   0:$x -> V=int(0), Name='x'
+///   $x -> Name='x'
+///   i32:$x -> Name='x', Type = i32
+class InstructionOperand {
+public:
+  using IntImmTy = int64_t;
+
+  InstructionOperand(IntImmTy Imm, StringRef Name, PatternType Type)
+      : Value(Imm), Name(Name), Type(Type) {}
+
+  InstructionOperand(StringRef Name, PatternType Type)
+      : Name(Name), Type(Type) {}
+
+  bool isNamedImmediate() const { return hasImmValue() && isNamedOperand(); }
+
+  bool hasImmValue() const { return Value.has_value(); }
+  IntImmTy getImmValue() const { return *Value; }
+
+  bool isNamedOperand() const { return !Name.empty(); }
+  StringRef getOperandName() const {
+    assert(isNamedOperand() && "Operand is unnamed");
+    return Name;
+  }
+
+  InstructionOperand withNewName(StringRef NewName) const {
+    InstructionOperand Result = *this;
+    Result.Name = NewName;
+    return Result;
+  }
+
+  void setIsDef(bool Value = true) { Def = Value; }
+  bool isDef() const { return Def; }
+
+  void setType(PatternType NewType) {
+    assert((!Type || (Type == NewType)) && "Overwriting type!");
+    Type = NewType;
+  }
+  PatternType getType() const { return Type; }
+
+  std::string describe() const;
+  void print(raw_ostream &OS) const;
+
+  void dump() const;
+
+private:
+  std::optional<int64_t> Value;
+  StringRef Name;
+  PatternType Type;
+  bool Def = false;
+};
+
+/// Base class for CodeGenInstructionPattern & PatFragPattern, which handles all
+/// the boilerplate for patterns that have a list of operands for some (pseudo)
+/// instruction.
+class InstructionPattern : public Pattern {
+public:
+  virtual ~InstructionPattern() = default;
+
+  static bool classof(const Pattern *P) {
+    return P->getKind() == K_CodeGenInstruction || P->getKind() == K_PatFrag ||
+           P->getKind() == K_Builtin;
+  }
+
+  template <typename... Ty> void addOperand(Ty &&...Init) {
+    Operands.emplace_back(std::forward<Ty>(Init)...);
+  }
+
+  auto &operands() { return Operands; }
+  const auto &operands() const { return Operands; }
+  unsigned operands_size() const { return Operands.size(); }
+  InstructionOperand &getOperand(unsigned K) { return Operands[K]; }
+  const InstructionOperand &getOperand(unsigned K) const { return Operands[K]; }
+
+  /// When this InstructionPattern is used as the match root, returns the
+  /// operands that must be redefined in the 'apply' pattern for the rule to be
+  /// valid.
+  ///
+  /// For most patterns, this just returns the defs.
+  /// For PatFrag this only returns the root of the PF.
+  ///
+  /// Returns an empty array on error.
+  virtual ArrayRef<InstructionOperand> getApplyDefsNeeded() const {
+    return {operands().begin(), getNumInstDefs()};
+  }
+
+  auto named_operands() {
+    return make_filter_range(Operands,
+                             [&](auto &O) { return O.isNamedOperand(); });
+  }
+
+  auto named_operands() const {
+    return make_filter_range(Operands,
+                             [&](auto &O) { return O.isNamedOperand(); });
+  }
+
+  virtual bool isVariadic() const { return false; }
+  virtual unsigned getNumInstOperands() const = 0;
+  virtual unsigned getNumInstDefs() const = 0;
+
+  bool hasAllDefs() const { return operands_size() >= getNumInstDefs(); }
+
+  virtual StringRef getInstName() const = 0;
+
+  /// Diagnoses all uses of special types in this Pattern and returns true if at
+  /// least one diagnostic was emitted.
+  bool diagnoseAllSpecialTypes(ArrayRef<SMLoc> Loc, Twine Msg) const;
+
+  void reportUnreachable(ArrayRef<SMLoc> Locs) const;
+  virtual bool checkSemantics(ArrayRef<SMLoc> Loc);
+
+  void print(raw_ostream &OS, bool PrintName = true) const override;
+
+protected:
+  InstructionPattern(unsigned K, StringRef Name) : Pattern(K, Name) {}
+
+  virtual void printExtras(raw_ostream &OS) const {}
+
+  SmallVector<InstructionOperand, 4> Operands;
+};
+
+//===- OperandTable -------------------------------------------------------===//
+
+/// Maps InstructionPattern operands to their definitions. This allows us to tie
+/// 
diff erent patterns of a (apply), (match) or (patterns) set of patterns
+/// together.
+class OperandTable {
+public:
+  bool addPattern(InstructionPattern *P,
+                  function_ref<void(StringRef)> DiagnoseRedef);
+
+  struct LookupResult {
+    LookupResult() = default;
+    LookupResult(InstructionPattern *Def) : Found(true), Def(Def) {}
+
+    bool Found = false;
+    InstructionPattern *Def = nullptr;
+
+    bool isLiveIn() const { return Found && !Def; }
+  };
+
+  LookupResult lookup(StringRef OpName) const {
+    if (auto It = Table.find(OpName); It != Table.end())
+      return LookupResult(It->second);
+    return LookupResult();
+  }
+
+  InstructionPattern *getDef(StringRef OpName) const {
+    return lookup(OpName).Def;
+  }
+
+  void print(raw_ostream &OS, StringRef Name = "", StringRef Indent = "") const;
+
+  auto begin() const { return Table.begin(); }
+  auto end() const { return Table.end(); }
+
+  void dump() const;
+
+private:
+  StringMap<InstructionPattern *> Table;
+};
+
+//===- CodeGenInstructionPattern ------------------------------------------===//
+
+/// Helper class to contain data associated with a MIFlags operand.
+class MIFlagsInfo {
+public:
+  void addSetFlag(const Record *R);
+  void addUnsetFlag(const Record *R);
+  void addCopyFlag(StringRef InstName);
+
+  const auto &set_flags() const { return SetF; }
+  const auto &unset_flags() const { return UnsetF; }
+  const auto &copy_flags() const { return CopyF; }
+
+private:
+  SetVector<StringRef> SetF, UnsetF, CopyF;
+};
+
+/// Matches an instruction, e.g. `G_ADD $x, $y, $z`.
+class CodeGenInstructionPattern : public InstructionPattern {
+public:
+  CodeGenInstructionPattern(const CodeGenInstruction &I, StringRef Name)
+      : InstructionPattern(K_CodeGenInstruction, Name), I(I) {}
+
+  static bool classof(const Pattern *P) {
+    return P->getKind() == K_CodeGenInstruction;
+  }
+
+  bool is(StringRef OpcodeName) const;
+
+  bool hasVariadicDefs() const;
+  bool isVariadic() const override;
+  unsigned getNumInstDefs() const override;
+  unsigned getNumInstOperands() const override;
+
+  MIFlagsInfo &getOrCreateMIFlagsInfo();
+  const MIFlagsInfo *getMIFlagsInfo() const { return FI.get(); }
+
+  const CodeGenInstruction &getInst() const { return I; }
+  StringRef getInstName() const override;
+
+private:
+  void printExtras(raw_ostream &OS) const override;
+
+  const CodeGenInstruction &I;
+  std::unique_ptr<MIFlagsInfo> FI;
+};
+
+//===- OperandTypeChecker -------------------------------------------------===//
+
+/// This is a trivial type checker for all operands in a set of
+/// InstructionPatterns.
+///
+/// It infers the type of each operand, check it's consistent with the known
+/// type of the operand, and then sets all of the types in all operands in
+/// propagateTypes.
+///
+/// It also handles verifying correctness of special types.
+class OperandTypeChecker {
+public:
+  OperandTypeChecker(ArrayRef<SMLoc> DiagLoc) : DiagLoc(DiagLoc) {}
+
+  /// Step 1: Check each pattern one by one. All patterns that pass through here
+  /// are added to a common worklist so propagateTypes can access them.
+  bool check(InstructionPattern &P,
+             std::function<bool(const PatternType &)> VerifyTypeOfOperand);
+
+  /// Step 2: Propagate all types. e.g. if one use of "$a" has type i32, make
+  /// all uses of "$a" have type i32.
+  void propagateTypes();
+
+protected:
+  ArrayRef<SMLoc> DiagLoc;
+
+private:
+  using InconsistentTypeDiagFn = std::function<void()>;
+
+  void PrintSeenWithTypeIn(InstructionPattern &P, StringRef OpName,
+                           PatternType Ty) const;
+
+  struct OpTypeInfo {
+    PatternType Type;
+    InconsistentTypeDiagFn PrintTypeSrcNote = []() {};
+  };
+
+  StringMap<OpTypeInfo> Types;
+
+  SmallVector<InstructionPattern *, 16> Pats;
+};
+
+//===- PatFrag ------------------------------------------------------------===//
+
+/// Represents a parsed GICombinePatFrag. This can be thought of as the
+/// equivalent of a CodeGenInstruction, but for PatFragPatterns.
+///
+/// PatFrags are made of 3 things:
+///   - Out parameters (defs)
+///   - In parameters
+///   - A set of pattern lists (alternatives).
+///
+/// If the PatFrag uses instruction patterns, the root must be one of the defs.
+///
+/// Note that this DOES NOT represent the use of the PatFrag, only its
+/// definition. The use of the PatFrag in a Pattern is represented by
+/// PatFragPattern.
+///
+/// PatFrags use the term "parameter" instead of operand because they're
+/// essentially macros, and using that name avoids confusion. Other than that,
+/// they're structured similarly to a MachineInstruction  - all parameters
+/// (operands) are in the same list, with defs at the start. This helps mapping
+/// parameters to values, because, param N of a PatFrag is always operand N of a
+/// PatFragPattern.
+class PatFrag {
+public:
+  static constexpr StringLiteral ClassName = "GICombinePatFrag";
+
+  enum ParamKind {
+    PK_Root,
+    PK_MachineOperand,
+    PK_Imm,
+  };
+
+  struct Param {
+    StringRef Name;
+    ParamKind Kind;
+  };
+
+  using ParamVec = SmallVector<Param, 4>;
+  using ParamIt = ParamVec::const_iterator;
+
+  /// Represents an alternative of the PatFrag. When parsing a GICombinePatFrag,
+  /// this is created from its "Alternatives" list. Each alternative is a list
+  /// of patterns written wrapped in a  `(pattern ...)` dag init.
+  ///
+  /// Each argument to the `pattern` DAG operator is parsed into a Pattern
+  /// instance.
+  struct Alternative {
+    OperandTable OpTable;
+    SmallVector<std::unique_ptr<Pattern>, 4> Pats;
+  };
+
+  explicit PatFrag(const Record &Def);
+
+  static StringRef getParamKindStr(ParamKind OK);
+
+  StringRef getName() const;
+
+  const Record &getDef() const { return Def; }
+  ArrayRef<SMLoc> getLoc() const;
+
+  Alternative &addAlternative() { return Alts.emplace_back(); }
+  const Alternative &getAlternative(unsigned K) const { return Alts[K]; }
+  unsigned num_alternatives() const { return Alts.size(); }
+
+  void addInParam(StringRef Name, ParamKind Kind);
+  iterator_range<ParamIt> in_params() const;
+  unsigned num_in_params() const { return Params.size() - NumOutParams; }
+
+  void addOutParam(StringRef Name, ParamKind Kind);
+  iterator_range<ParamIt> out_params() const;
+  unsigned num_out_params() const { return NumOutParams; }
+
+  unsigned num_roots() const;
+  unsigned num_params() const { return num_in_params() + num_out_params(); }
+
+  /// Finds the operand \p Name and returns its index or -1 if not found.
+  /// Remember that all params are part of the same list, with out params at the
+  /// start. This means that the index returned can be used to access operands
+  /// of InstructionPatterns.
+  unsigned getParamIdx(StringRef Name) const;
+  const Param &getParam(unsigned K) const { return Params[K]; }
+
+  bool canBeMatchRoot() const { return num_roots() == 1; }
+
+  void print(raw_ostream &OS, StringRef Indent = "") const;
+  void dump() const;
+
+  /// Checks if the in-param \p ParamName can be unbound or not.
+  /// \p ArgName is the name of the argument passed to the PatFrag.
+  ///
+  /// An argument can be unbound only if, for all alternatives:
+  ///   - There is no CXX pattern, OR:
+  ///   - There is an InstructionPattern that binds the parameter.
+  ///
+  /// e.g. in (MyPatFrag $foo), if $foo has never been seen before (= it's
+  /// unbound), this checks if MyPatFrag supports it or not.
+  bool handleUnboundInParam(StringRef ParamName, StringRef ArgName,
+                            ArrayRef<SMLoc> DiagLoc) const;
+
+  bool checkSemantics();
+  bool buildOperandsTables();
+
+private:
+  static void printParamsList(raw_ostream &OS, iterator_range<ParamIt> Params);
+
+  void PrintError(Twine Msg) const;
+
+  const Record &Def;
+  unsigned NumOutParams = 0;
+  ParamVec Params;
+  SmallVector<Alternative, 2> Alts;
+};
+
+//===- PatFragPattern -----------------------------------------------------===//
+
+/// Represents a use of a GICombinePatFrag.
+class PatFragPattern : public InstructionPattern {
+public:
+  PatFragPattern(const PatFrag &PF, StringRef Name)
+      : InstructionPattern(K_PatFrag, Name), PF(PF) {}
+
+  static bool classof(const Pattern *P) { return P->getKind() == K_PatFrag; }
+
+  const PatFrag &getPatFrag() const { return PF; }
+  StringRef getInstName() const override { return PF.getName(); }
+
+  unsigned getNumInstDefs() const override { return PF.num_out_params(); }
+  unsigned getNumInstOperands() const override { return PF.num_params(); }
+
+  ArrayRef<InstructionOperand> getApplyDefsNeeded() const override;
+
+  bool checkSemantics(ArrayRef<SMLoc> DiagLoc) override;
+
+  /// Before emitting the patterns inside the PatFrag, add all necessary code
+  /// expansions to \p PatFragCEs imported from \p ParentCEs.
+  ///
+  /// For a MachineOperand PatFrag parameter, this will fetch the expansion for
+  /// that operand from \p ParentCEs and add it to \p PatFragCEs. Errors can be
+  /// emitted if the MachineOperand reference is unbound.
+  ///
+  /// For an Immediate PatFrag parameter this simply adds the integer value to
+  /// \p PatFragCEs as an expansion.
+  ///
+  /// \param ParentCEs Contains all of the code expansions declared by the other
+  ///                  patterns emitted so far in the pattern list containing
+  ///                  this PatFragPattern.
+  /// \param PatFragCEs Output Code Expansions (usually empty)
+  /// \param DiagLoc    Diagnostic loc in case an error occurs.
+  /// \return `true` on success, `false` on failure.
+  bool mapInputCodeExpansions(const CodeExpansions &ParentCEs,
+                              CodeExpansions &PatFragCEs,
+                              ArrayRef<SMLoc> DiagLoc) const;
+
+private:
+  const PatFrag &PF;
+};
+
+//===- BuiltinPattern -----------------------------------------------------===//
+
+/// Represents builtin instructions such as "GIReplaceReg" and "GIEraseRoot".
+enum BuiltinKind {
+  BI_ReplaceReg,
+  BI_EraseRoot,
+};
+
+class BuiltinPattern : public InstructionPattern {
+  struct BuiltinInfo {
+    StringLiteral DefName;
+    BuiltinKind Kind;
+    unsigned NumOps;
+    unsigned NumDefs;
+  };
+
+  static constexpr std::array<BuiltinInfo, 2> KnownBuiltins = {{
+      {"GIReplaceReg", BI_ReplaceReg, 2, 1},
+      {"GIEraseRoot", BI_EraseRoot, 0, 0},
+  }};
+
+public:
+  static constexpr StringLiteral ClassName = "GIBuiltinInst";
+
+  BuiltinPattern(const Record &Def, StringRef Name)
+      : InstructionPattern(K_Builtin, Name), I(getBuiltinInfo(Def)) {}
+
+  static bool classof(const Pattern *P) { return P->getKind() == K_Builtin; }
+
+  unsigned getNumInstOperands() const override { return I.NumOps; }
+  unsigned getNumInstDefs() const override { return I.NumDefs; }
+  StringRef getInstName() const override { return I.DefName; }
+  BuiltinKind getBuiltinKind() const { return I.Kind; }
+
+  bool checkSemantics(ArrayRef<SMLoc> Loc) override;
+
+private:
+  static BuiltinInfo getBuiltinInfo(const Record &Def);
+
+  BuiltinInfo I;
+};
+
+} // namespace gi
+} // end namespace llvm
+
+#endif // ifndef LLVM_UTILS_GLOBALISEL_PATTERNS_H

diff  --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
index 071586240e8fb5a..89aca87a28ec0dc 100644
--- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
@@ -28,9 +28,12 @@
 
 #include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
+#include "GlobalISel/CXXPredicates.h"
 #include "GlobalISel/CodeExpander.h"
 #include "GlobalISel/CodeExpansions.h"
 #include "GlobalISel/CombinerUtils.h"
+#include "GlobalISel/MatchDataInfo.h"
+#include "GlobalISel/Patterns.h"
 #include "GlobalISelMatchTable.h"
 #include "GlobalISelMatchTableExecutorEmitter.h"
 #include "SubtargetFeatureInfo.h"
@@ -76,1618 +79,74 @@ cl::opt<bool> DebugTypeInfer("gicombiner-debug-typeinfer",
 
 constexpr StringLiteral CXXApplyPrefix = "GICXXCustomAction_CombineApply";
 constexpr StringLiteral CXXPredPrefix = "GICXXPred_MI_Predicate_";
-constexpr StringLiteral PatFragClassName = "GICombinePatFrag";
-constexpr StringLiteral BuiltinInstClassName = "GIBuiltinInst";
-constexpr StringLiteral SpecialTyClassName = "GISpecialType";
-constexpr StringLiteral TypeOfClassName = "GITypeOf";
 constexpr StringLiteral MIFlagsEnumClassName = "MIFlagEnum";
 
-std::string getIsEnabledPredicateEnumName(unsigned CombinerRuleID) {
-  return "GICXXPred_Simple_IsRule" + to_string(CombinerRuleID) + "Enabled";
-}
-
-/// Copies a StringRef into a static pool to make sure it has a static lifetime.
-StringRef insertStrRef(StringRef S) {
-  if (S.empty())
-    return {};
-
-  static StringSet<> Pool;
-  auto [It, Inserted] = Pool.insert(S);
-  return It->getKey();
-}
-
-void declareInstExpansion(CodeExpansions &CE, const InstructionMatcher &IM,
-                          StringRef Name) {
-  CE.declare(Name, "State.MIs[" + to_string(IM.getInsnVarID()) + "]");
-}
-
-void declareInstExpansion(CodeExpansions &CE, const BuildMIAction &A,
-                          StringRef Name) {
-  // Note: we use redeclare here because this may overwrite a matcher inst
-  // expansion.
-  CE.redeclare(Name, "OutMIs[" + to_string(A.getInsnID()) + "]");
-}
-
-void declareOperandExpansion(CodeExpansions &CE, const OperandMatcher &OM,
-                             StringRef Name) {
-  CE.declare(Name, "State.MIs[" + to_string(OM.getInsnVarID()) +
-                       "]->getOperand(" + to_string(OM.getOpIdx()) + ")");
-}
-
-void declareTempRegExpansion(CodeExpansions &CE, unsigned TempRegID,
-                             StringRef Name) {
-  CE.declare(Name, "State.TempRegisters[" + to_string(TempRegID) + "]");
-}
-
-std::string makeAnonPatName(StringRef Prefix, unsigned Idx) {
-  return ("__" + Prefix + "_" + Twine(Idx)).str();
-}
-
-template <typename Container> auto keys(Container &&C) {
-  return map_range(C, [](auto &Entry) -> auto & { return Entry.first; });
-}
-
-template <typename Container> auto values(Container &&C) {
-  return map_range(C, [](auto &Entry) -> auto & { return Entry.second; });
-}
-
-//===- MatchData Handling -------------------------------------------------===//
-
-/// Represents MatchData defined by the match stage and required by the apply
-/// stage.
-///
-/// This allows the plumbing of arbitrary data from C++ predicates between the
-/// stages.
-///
-/// When this class is initially created, it only has a pattern symbol and a
-/// type. When all of the MatchDatas declarations of a given pattern have been
-/// parsed, `AssignVariables` must be called to assign storage variable names to
-/// each MatchDataInfo.
-class MatchDataInfo {
-  StringRef PatternSymbol;
-  StringRef Type;
-  std::string VarName;
-
-public:
-  static constexpr StringLiteral StructTypeName = "MatchInfosTy";
-  static constexpr StringLiteral StructName = "MatchInfos";
-
-  MatchDataInfo(StringRef PatternSymbol, StringRef Type)
-      : PatternSymbol(PatternSymbol), Type(Type.trim()) {}
-
-  StringRef getPatternSymbol() const { return PatternSymbol; };
-  StringRef getType() const { return Type; };
-
-  bool hasVariableName() const { return !VarName.empty(); }
-  void setVariableName(StringRef Name) { VarName = Name; }
-  StringRef getVariableName() const;
-
-  std::string getQualifiedVariableName() const {
-    return StructName.str() + "." + getVariableName().str();
-  }
-
-  void print(raw_ostream &OS) const;
-  void dump() const { print(dbgs()); }
-};
-
-StringRef MatchDataInfo::getVariableName() const {
-  assert(hasVariableName());
-  return VarName;
-}
-
-void MatchDataInfo::print(raw_ostream &OS) const {
-  OS << "(MatchDataInfo pattern_symbol:" << PatternSymbol << " type:'" << Type
-     << "' var_name:" << (VarName.empty() ? "<unassigned>" : VarName) << ")";
-}
-
-/// Pool of type -> variables used to emit MatchData variables declarations.
-///
-/// e.g. if the map contains "int64_t" -> ["MD0", "MD1"], then two variable
-/// declarations must be emitted: `int64_t MD0` and `int64_t MD1`.
-///
-/// This has a static lifetime and will outlive all the `MatchDataInfo` objects
-/// by design. It needs to persist after all `CombineRuleBuilder` objects died
-/// so we can emit the variable declarations.
-StringMap<std::vector<std::string>> AllMatchDataVars;
-
-// Assign variable names to all MatchDatas used by a pattern. This must be
-// called after all MatchData decls have been parsed inside a rule.
-//
-// Requires an array of MatchDataInfo so we can handle cases where a pattern
-// uses multiple instances of the same MatchData type.
-void AssignMatchDataVariables(MutableArrayRef<MatchDataInfo> Infos) {
-  static unsigned NextVarID = 0;
-
-  StringMap<unsigned> SeenTypes;
-  for (auto &Info : Infos) {
-    unsigned &NumSeen = SeenTypes[Info.getType()];
-    auto &ExistingVars = AllMatchDataVars[Info.getType()];
-
-    if (NumSeen == ExistingVars.size())
-      ExistingVars.push_back("MDInfo" + to_string(NextVarID++));
-
-    Info.setVariableName(ExistingVars[NumSeen++]);
-  }
-}
-
-//===- C++ Predicates Handling --------------------------------------------===//
-
-/// Entry into the static pool of all CXX Predicate code. This contains
-/// fully expanded C++ code.
-///
-/// The static pool is hidden inside the object and can be accessed through
-/// getAllMatchCode/getAllApplyCode
-///
-/// Note that CXXPattern trims C++ code, so the Code is already expected to be
-/// free of leading/trailing whitespace.
-class CXXPredicateCode {
-  using CXXPredicateCodePool =
-      DenseMap<hash_code, std::unique_ptr<CXXPredicateCode>>;
-  static CXXPredicateCodePool AllCXXMatchCode;
-  static CXXPredicateCodePool AllCXXApplyCode;
-
-  /// Sorts a `CXXPredicateCodePool` by their IDs and returns it.
-  static std::vector<const CXXPredicateCode *>
-  getSorted(const CXXPredicateCodePool &Pool) {
-    std::vector<const CXXPredicateCode *> Out;
-    std::transform(Pool.begin(), Pool.end(), std::back_inserter(Out),
-                   [&](auto &Elt) { return Elt.second.get(); });
-    sort(Out, [](const auto *A, const auto *B) { return A->ID < B->ID; });
-    return Out;
-  }
-
-  /// Gets an instance of `CXXPredicateCode` for \p Code, or returns an already
-  /// existing one.
-  static const CXXPredicateCode &get(CXXPredicateCodePool &Pool,
-                                     std::string Code) {
-    // Check if we already have an identical piece of code, if not, create an
-    // entry in the pool.
-    const auto CodeHash = hash_value(Code);
-    if (auto It = Pool.find(CodeHash); It != Pool.end())
-      return *It->second;
-
-    const auto ID = Pool.size();
-    auto OwnedData = std::unique_ptr<CXXPredicateCode>(
-        new CXXPredicateCode(std::move(Code), ID));
-    const auto &DataRef = *OwnedData;
-    Pool[CodeHash] = std::move(OwnedData);
-    return DataRef;
-  }
-
-  CXXPredicateCode(std::string Code, unsigned ID)
-      : Code(Code), ID(ID), BaseEnumName("GICombiner" + to_string(ID)) {
-    // Don't assert if ErrorsPrinted is set. This may mean CodeExpander failed,
-    // and it may add spaces in such cases.
-    assert((ErrorsPrinted || StringRef(Code).trim() == Code) &&
-           "Code was expected to be trimmed!");
-  }
-
-public:
-  static const CXXPredicateCode &getMatchCode(std::string Code) {
-    return get(AllCXXMatchCode, std::move(Code));
-  }
-
-  static const CXXPredicateCode &getApplyCode(std::string Code) {
-    return get(AllCXXApplyCode, std::move(Code));
-  }
-
-  static std::vector<const CXXPredicateCode *> getAllMatchCode() {
-    return getSorted(AllCXXMatchCode);
-  }
-
-  static std::vector<const CXXPredicateCode *> getAllApplyCode() {
-    return getSorted(AllCXXApplyCode);
-  }
-
-  const std::string Code;
-  const unsigned ID;
-  const std::string BaseEnumName;
-
-  bool needsUnreachable() const {
-    return !StringRef(Code).starts_with("return");
-  }
-
-  std::string getEnumNameWithPrefix(StringRef Prefix) const {
-    return Prefix.str() + BaseEnumName;
-  }
-};
-
-CXXPredicateCode::CXXPredicateCodePool CXXPredicateCode::AllCXXMatchCode;
-CXXPredicateCode::CXXPredicateCodePool CXXPredicateCode::AllCXXApplyCode;
-
-//===- PatternType --------------------------------------------------------===//
-
-/// Represent the type of a Pattern Operand.
-///
-/// Types have two form:
-///   - LLTs, which are straightforward.
-///   - Special types, e.g. GITypeOf
-class PatternType {
-public:
-  enum PTKind : uint8_t {
-    PT_None,
-
-    PT_ValueType,
-    PT_TypeOf,
-  };
-
-  PatternType() : Kind(PT_None), Data() {}
-
-  static std::optional<PatternType> get(ArrayRef<SMLoc> DiagLoc,
-                                        const Record *R, Twine DiagCtx);
-  static PatternType getTypeOf(StringRef OpName);
-
-  bool isNone() const { return Kind == PT_None; }
-  bool isLLT() const { return Kind == PT_ValueType; }
-  bool isSpecial() const { return isTypeOf(); }
-  bool isTypeOf() const { return Kind == PT_TypeOf; }
-
-  StringRef getTypeOfOpName() const;
-  LLTCodeGen getLLTCodeGen() const;
-
-  LLTCodeGenOrTempType getLLTCodeGenOrTempType(RuleMatcher &RM) const;
-
-  explicit operator bool() const { return !isNone(); }
-
-  bool operator==(const PatternType &Other) const;
-  bool operator!=(const PatternType &Other) const { return !operator==(Other); }
-
-  std::string str() const;
-
-private:
-  PatternType(PTKind Kind) : Kind(Kind), Data() {}
-
-  PTKind Kind;
-  union DataT {
-    DataT() : Str() {}
-
-    /// PT_ValueType -> ValueType Def.
-    const Record *Def;
-
-    /// PT_TypeOf -> Operand name (without the '$')
-    StringRef Str;
-  } Data;
-};
-
-std::optional<PatternType> PatternType::get(ArrayRef<SMLoc> DiagLoc,
-                                            const Record *R, Twine DiagCtx) {
-  assert(R);
-  if (R->isSubClassOf("ValueType")) {
-    PatternType PT(PT_ValueType);
-    PT.Data.Def = R;
-    return PT;
-  }
-
-  if (R->isSubClassOf(TypeOfClassName)) {
-    auto RawOpName = R->getValueAsString("OpName");
-    if (!RawOpName.starts_with("$")) {
-      PrintError(DiagLoc, DiagCtx + ": invalid operand name format '" +
-                              RawOpName + "' in " + TypeOfClassName +
-                              ": expected '$' followed by an operand name");
-      return std::nullopt;
-    }
-
-    PatternType PT(PT_TypeOf);
-    PT.Data.Str = RawOpName.drop_front(1);
-    return PT;
-  }
-
-  PrintError(DiagLoc, DiagCtx + ": unknown type '" + R->getName() + "'");
-  return std::nullopt;
-}
-
-PatternType PatternType::getTypeOf(StringRef OpName) {
-  PatternType PT(PT_TypeOf);
-  PT.Data.Str = OpName;
-  return PT;
-}
-
-StringRef PatternType::getTypeOfOpName() const {
-  assert(isTypeOf());
-  return Data.Str;
-}
-
-LLTCodeGen PatternType::getLLTCodeGen() const {
-  assert(isLLT());
-  return *MVTToLLT(getValueType(Data.Def));
-}
-
-LLTCodeGenOrTempType
-PatternType::getLLTCodeGenOrTempType(RuleMatcher &RM) const {
-  assert(!isNone());
-
-  if (isLLT())
-    return getLLTCodeGen();
-
-  assert(isTypeOf());
-  auto &OM = RM.getOperandMatcher(getTypeOfOpName());
-  return OM.getTempTypeIdx(RM);
-}
-
-bool PatternType::operator==(const PatternType &Other) const {
-  if (Kind != Other.Kind)
-    return false;
-
-  switch (Kind) {
-  case PT_None:
-    return true;
-  case PT_ValueType:
-    return Data.Def == Other.Data.Def;
-  case PT_TypeOf:
-    return Data.Str == Other.Data.Str;
-  }
-
-  llvm_unreachable("Unknown Type Kind");
-}
-
-std::string PatternType::str() const {
-  switch (Kind) {
-  case PT_None:
-    return "";
-  case PT_ValueType:
-    return Data.Def->getName().str();
-  case PT_TypeOf:
-    return (TypeOfClassName + "<$" + getTypeOfOpName() + ">").str();
-  }
-
-  llvm_unreachable("Unknown type!");
-}
-
-//===- Pattern Base Class -------------------------------------------------===//
-
-/// Base class for all patterns that can be written in an `apply`, `match` or
-/// `pattern` DAG operator.
-///
-/// For example:
-///
-///     (apply (G_ZEXT $x, $y), (G_ZEXT $y, $z), "return isFoo(${z})")
-///
-/// Creates 3 Pattern objects:
-///   - Two CodeGenInstruction Patterns
-///   - A CXXPattern
-class Pattern {
-public:
-  enum {
-    K_AnyOpcode,
-    K_CXX,
-
-    K_CodeGenInstruction,
-    K_PatFrag,
-    K_Builtin,
-  };
-
-  virtual ~Pattern() = default;
-
-  unsigned getKind() const { return Kind; }
-  const char *getKindName() const;
-
-  bool hasName() const { return !Name.empty(); }
-  StringRef getName() const { return Name; }
-
-  virtual void print(raw_ostream &OS, bool PrintName = true) const = 0;
-  void dump() const { return print(dbgs()); }
-
-protected:
-  Pattern(unsigned Kind, StringRef Name)
-      : Kind(Kind), Name(insertStrRef(Name)) {
-    assert(!Name.empty() && "unnamed pattern!");
-  }
-
-  void printImpl(raw_ostream &OS, bool PrintName,
-                 function_ref<void()> ContentPrinter) const;
-
-private:
-  unsigned Kind;
-  StringRef Name;
-};
-
-const char *Pattern::getKindName() const {
-  switch (Kind) {
-  case K_AnyOpcode:
-    return "AnyOpcodePattern";
-  case K_CXX:
-    return "CXXPattern";
-  case K_CodeGenInstruction:
-    return "CodeGenInstructionPattern";
-  case K_PatFrag:
-    return "PatFragPattern";
-  case K_Builtin:
-    return "BuiltinPattern";
-  }
-
-  llvm_unreachable("unknown pattern kind!");
-}
-
-void Pattern::printImpl(raw_ostream &OS, bool PrintName,
-                        function_ref<void()> ContentPrinter) const {
-  OS << "(" << getKindName() << " ";
-  if (PrintName)
-    OS << "name:" << getName() << " ";
-  ContentPrinter();
-  OS << ")";
-}
-
-//===- AnyOpcodePattern ---------------------------------------------------===//
-
-/// `wip_match_opcode` patterns.
-/// This matches one or more opcodes, and does not check any operands
-/// whatsoever.
-///
-/// TODO: Long-term, this needs to be removed. It's a hack around MIR
-///       pattern matching limitations.
-class AnyOpcodePattern : public Pattern {
-public:
-  AnyOpcodePattern(StringRef Name) : Pattern(K_AnyOpcode, Name) {}
-
-  static bool classof(const Pattern *P) { return P->getKind() == K_AnyOpcode; }
-
-  void addOpcode(const CodeGenInstruction *I) { Insts.push_back(I); }
-  const auto &insts() const { return Insts; }
-
-  void print(raw_ostream &OS, bool PrintName = true) const override;
-
-private:
-  SmallVector<const CodeGenInstruction *, 4> Insts;
-};
-
-void AnyOpcodePattern::print(raw_ostream &OS, bool PrintName) const {
-  printImpl(OS, PrintName, [&OS, this]() {
-    OS << "["
-       << join(map_range(Insts,
-                         [](const auto *I) { return I->TheDef->getName(); }),
-               ", ")
-       << "]";
-  });
-}
-
-//===- CXXPattern ---------------------------------------------------------===//
-
-/// Represents raw C++ code which may need some expansions.
-///
-///   e.g. [{ return isFooBux(${src}.getReg()); }]
-///
-/// For the expanded code, \see CXXPredicateCode. CXXPredicateCode objects are
-/// created through `expandCode`.
-///
-/// \see CodeExpander and \see CodeExpansions for more information on code
-/// expansions.
-///
-/// This object has two purposes:
-///   - Represent C++ code as a pattern entry.
-///   - Be a factory for expanded C++ code.
-///     - It's immutable and only holds the raw code so we can expand the same
-///       CXX pattern multiple times if we need to.
-///
-/// Note that the code is always trimmed in the constructor, so leading and
-/// trailing whitespaces are removed. This removes bloat in the output, avoids
-/// formatting issues, but also allows us to check things like
-/// `.startswith("return")` trivially without worrying about spaces.
-class CXXPattern : public Pattern {
-public:
-  CXXPattern(const StringInit &Code, StringRef Name)
-      : CXXPattern(Code.getAsUnquotedString(), Name) {}
-
-  CXXPattern(StringRef Code, StringRef Name)
-      : Pattern(K_CXX, Name), RawCode(Code.trim().str()) {}
-
-  static bool classof(const Pattern *P) { return P->getKind() == K_CXX; }
-
-  void setIsApply(bool Value = true) { IsApply = Value; }
-  StringRef getRawCode() const { return RawCode; }
-
-  /// Expands raw code, replacing things such as `${foo}` with their
-  /// substitution in \p CE.
-  ///
-  /// \param CE     Map of Code Expansions
-  /// \param Locs   SMLocs for the Code Expander, in case it needs to emit
-  ///               diagnostics.
-  /// \param AddComment If DebugCXXPreds is enabled, this is called to emit a
-  ///                   comment before the expanded code.
-  ///
-  /// \return A CXXPredicateCode object that contains the expanded code. Note
-  /// that this may or may not insert a new object. All CXXPredicateCode objects
-  /// are held in a set to avoid emitting duplicate C++ code.
-  const CXXPredicateCode &
-  expandCode(const CodeExpansions &CE, ArrayRef<SMLoc> Locs,
-             function_ref<void(raw_ostream &)> AddComment = {}) const;
-
-  void print(raw_ostream &OS, bool PrintName = true) const override;
-
-private:
-  bool IsApply = false;
-  std::string RawCode;
-};
-
-const CXXPredicateCode &
-CXXPattern::expandCode(const CodeExpansions &CE, ArrayRef<SMLoc> Locs,
-                       function_ref<void(raw_ostream &)> AddComment) const {
-  std::string Result;
-  raw_string_ostream OS(Result);
-
-  if (DebugCXXPreds && AddComment)
-    AddComment(OS);
-
-  CodeExpander Expander(RawCode, CE, Locs, /*ShowExpansions*/ false);
-  Expander.emit(OS);
-  if (IsApply)
-    return CXXPredicateCode::getApplyCode(std::move(Result));
-  return CXXPredicateCode::getMatchCode(std::move(Result));
-}
-
-void CXXPattern::print(raw_ostream &OS, bool PrintName) const {
-  printImpl(OS, PrintName, [&OS, this] {
-    OS << (IsApply ? "apply" : "match") << " code:\"";
-    printEscapedString(getRawCode(), OS);
-    OS << "\"";
-  });
-}
-
-//===- InstructionPattern ---------------------------------------------===//
-
-/// An operand for an InstructionPattern.
-///
-/// Operands are composed of three elements:
-///   - (Optional) Value
-///   - (Optional) Name
-///   - (Optional) Type
-///
-/// Some examples:
-///   (i32 0):$x -> V=int(0), Name='x', Type=i32
-///   0:$x -> V=int(0), Name='x'
-///   $x -> Name='x'
-///   i32:$x -> Name='x', Type = i32
-class InstructionOperand {
-public:
-  using IntImmTy = int64_t;
-
-  InstructionOperand(IntImmTy Imm, StringRef Name, PatternType Type)
-      : Value(Imm), Name(insertStrRef(Name)), Type(Type) {}
-
-  InstructionOperand(StringRef Name, PatternType Type)
-      : Name(insertStrRef(Name)), Type(Type) {}
-
-  bool isNamedImmediate() const { return hasImmValue() && isNamedOperand(); }
-
-  bool hasImmValue() const { return Value.has_value(); }
-  IntImmTy getImmValue() const { return *Value; }
-
-  bool isNamedOperand() const { return !Name.empty(); }
-  StringRef getOperandName() const {
-    assert(isNamedOperand() && "Operand is unnamed");
-    return Name;
-  }
-
-  InstructionOperand withNewName(StringRef NewName) const {
-    InstructionOperand Result = *this;
-    Result.Name = insertStrRef(NewName);
-    return Result;
-  }
-
-  void setIsDef(bool Value = true) { Def = Value; }
-  bool isDef() const { return Def; }
-
-  void setType(PatternType NewType) {
-    assert((!Type || (Type == NewType)) && "Overwriting type!");
-    Type = NewType;
-  }
-  PatternType getType() const { return Type; }
-
-  std::string describe() const {
-    if (!hasImmValue())
-      return "MachineOperand $" + getOperandName().str() + "";
-    std::string Str = "imm " + to_string(getImmValue());
-    if (isNamedImmediate())
-      Str += ":$" + getOperandName().str() + "";
-    return Str;
-  }
-
-  void print(raw_ostream &OS) const {
-    if (isDef())
-      OS << "<def>";
-
-    bool NeedsColon = true;
-    if (Type) {
-      if (hasImmValue())
-        OS << "(" << Type.str() << " " << getImmValue() << ")";
-      else
-        OS << Type.str();
-    } else if (hasImmValue())
-      OS << getImmValue();
-    else
-      NeedsColon = false;
-
-    if (isNamedOperand())
-      OS << (NeedsColon ? ":" : "") << "$" << getOperandName();
-  }
-
-  void dump() const { return print(dbgs()); }
-
-private:
-  std::optional<int64_t> Value;
-  StringRef Name;
-  PatternType Type;
-  bool Def = false;
-};
-
-/// Base class for CodeGenInstructionPattern & PatFragPattern, which handles all
-/// the boilerplate for patterns that have a list of operands for some (pseudo)
-/// instruction.
-class InstructionPattern : public Pattern {
-public:
-  virtual ~InstructionPattern() = default;
-
-  static bool classof(const Pattern *P) {
-    return P->getKind() == K_CodeGenInstruction || P->getKind() == K_PatFrag ||
-           P->getKind() == K_Builtin;
-  }
-
-  template <typename... Ty> void addOperand(Ty &&...Init) {
-    Operands.emplace_back(std::forward<Ty>(Init)...);
-  }
-
-  auto &operands() { return Operands; }
-  const auto &operands() const { return Operands; }
-  unsigned operands_size() const { return Operands.size(); }
-  InstructionOperand &getOperand(unsigned K) { return Operands[K]; }
-  const InstructionOperand &getOperand(unsigned K) const { return Operands[K]; }
-
-  /// When this InstructionPattern is used as the match root, returns the
-  /// operands that must be redefined in the 'apply' pattern for the rule to be
-  /// valid.
-  ///
-  /// For most patterns, this just returns the defs.
-  /// For PatFrag this only returns the root of the PF.
-  ///
-  /// Returns an empty array on error.
-  virtual ArrayRef<InstructionOperand> getApplyDefsNeeded() const {
-    return {operands().begin(), getNumInstDefs()};
-  }
-
-  auto named_operands() {
-    return make_filter_range(Operands,
-                             [&](auto &O) { return O.isNamedOperand(); });
-  }
-
-  auto named_operands() const {
-    return make_filter_range(Operands,
-                             [&](auto &O) { return O.isNamedOperand(); });
-  }
-
-  virtual bool isVariadic() const { return false; }
-  virtual unsigned getNumInstOperands() const = 0;
-  virtual unsigned getNumInstDefs() const = 0;
-
-  bool hasAllDefs() const { return operands_size() >= getNumInstDefs(); }
-
-  virtual StringRef getInstName() const = 0;
-
-  /// Diagnoses all uses of special types in this Pattern and returns true if at
-  /// least one diagnostic was emitted.
-  bool diagnoseAllSpecialTypes(ArrayRef<SMLoc> Loc, Twine Msg) const;
-
-  void reportUnreachable(ArrayRef<SMLoc> Locs) const;
-  virtual bool checkSemantics(ArrayRef<SMLoc> Loc);
-
-  void print(raw_ostream &OS, bool PrintName = true) const override;
-
-protected:
-  InstructionPattern(unsigned K, StringRef Name) : Pattern(K, Name) {}
-
-  virtual void printExtras(raw_ostream &OS) const {}
-
-  SmallVector<InstructionOperand, 4> Operands;
-};
-
-bool InstructionPattern::diagnoseAllSpecialTypes(ArrayRef<SMLoc> Loc,
-                                                 Twine Msg) const {
-  bool HasDiag = false;
-  for (const auto &[Idx, Op] : enumerate(operands())) {
-    if (Op.getType().isSpecial()) {
-      PrintError(Loc, Msg);
-      PrintNote(Loc, "operand " + Twine(Idx) + " of '" + getName() +
-                         "' has type '" + Op.getType().str() + "'");
-      HasDiag = true;
-    }
-  }
-  return HasDiag;
-}
-
-void InstructionPattern::reportUnreachable(ArrayRef<SMLoc> Locs) const {
-  PrintError(Locs, "pattern '" + getName() + "' ('" + getInstName() +
-                       "') is unreachable from the pattern root!");
-}
-
-bool InstructionPattern::checkSemantics(ArrayRef<SMLoc> Loc) {
-  unsigned NumExpectedOperands = getNumInstOperands();
-
-  if (isVariadic()) {
-    if (Operands.size() < NumExpectedOperands) {
-      PrintError(Loc, +"'" + getInstName() + "' expected at least " +
-                          Twine(NumExpectedOperands) + " operands, got " +
-                          Twine(Operands.size()));
-      return false;
-    }
-  } else if (NumExpectedOperands != Operands.size()) {
-    PrintError(Loc, +"'" + getInstName() + "' expected " +
-                        Twine(NumExpectedOperands) + " operands, got " +
-                        Twine(Operands.size()));
-    return false;
-  }
-
-  unsigned OpIdx = 0;
-  unsigned NumDefs = getNumInstDefs();
-  for (auto &Op : Operands)
-    Op.setIsDef(OpIdx++ < NumDefs);
-
-  return true;
-}
-
-void InstructionPattern::print(raw_ostream &OS, bool PrintName) const {
-  printImpl(OS, PrintName, [&OS, this] {
-    OS << getInstName() << " operands:[";
-    StringRef Sep = "";
-    for (const auto &Op : Operands) {
-      OS << Sep;
-      Op.print(OS);
-      Sep = ", ";
-    }
-    OS << "]";
-
-    printExtras(OS);
-  });
-}
-
-//===- OperandTable -------------------------------------------------------===//
-
-/// Maps InstructionPattern operands to their definitions. This allows us to tie
-/// 
diff erent patterns of a (apply), (match) or (patterns) set of patterns
-/// together.
-class OperandTable {
-public:
-  bool addPattern(InstructionPattern *P,
-                  function_ref<void(StringRef)> DiagnoseRedef) {
-    for (const auto &Op : P->named_operands()) {
-      StringRef OpName = Op.getOperandName();
-
-      // We always create an entry in the OperandTable, even for uses.
-      // Uses of operands that don't have a def (= live-ins) will remain with a
-      // nullptr as the Def.
-      //
-      // This allows us tell whether an operand exists in a pattern or not. If
-      // there is no entry for it, it doesn't exist, if there is an entry, it's
-      // used/def'd at least once.
-      auto &Def = Table[OpName];
-
-      if (!Op.isDef())
-        continue;
-
-      if (Def) {
-        DiagnoseRedef(OpName);
-        return false;
-      }
-
-      Def = P;
-    }
-
-    return true;
-  }
-
-  struct LookupResult {
-    LookupResult() = default;
-    LookupResult(InstructionPattern *Def) : Found(true), Def(Def) {}
-
-    bool Found = false;
-    InstructionPattern *Def = nullptr;
-
-    bool isLiveIn() const { return Found && !Def; }
-  };
-
-  LookupResult lookup(StringRef OpName) const {
-    if (auto It = Table.find(OpName); It != Table.end())
-      return LookupResult(It->second);
-    return LookupResult();
-  }
-
-  InstructionPattern *getDef(StringRef OpName) const {
-    return lookup(OpName).Def;
-  }
-
-  void print(raw_ostream &OS, StringRef Name = "",
-             StringRef Indent = "") const {
-    OS << Indent << "(OperandTable ";
-    if (!Name.empty())
-      OS << Name << " ";
-    if (Table.empty()) {
-      OS << "<empty>)\n";
-      return;
-    }
-
-    SmallVector<StringRef, 0> Keys(Table.keys());
-    sort(Keys);
-
-    OS << '\n';
-    for (const auto &Key : Keys) {
-      const auto *Def = Table.at(Key);
-      OS << Indent << "  " << Key << " -> "
-         << (Def ? Def->getName() : "<live-in>") << '\n';
-    }
-    OS << Indent << ")\n";
-  }
-
-  auto begin() const { return Table.begin(); }
-  auto end() const { return Table.end(); }
-
-  void dump() const { print(dbgs()); }
-
-private:
-  StringMap<InstructionPattern *> Table;
-};
-
-//===- CodeGenInstructionPattern ------------------------------------------===//
-
-/// Helper class to contain data associated with a MIFlags operator.
-class MIFlagsInfo {
-public:
-  void addSetFlag(const Record *R) {
-    SetF.insert(R->getValueAsString("EnumName"));
-  }
-  void addUnsetFlag(const Record *R) {
-    UnsetF.insert(R->getValueAsString("EnumName"));
-  }
-  void addCopyFlag(StringRef InstName) { CopyF.insert(insertStrRef(InstName)); }
-
-  const auto &set_flags() const { return SetF; }
-  const auto &unset_flags() const { return UnsetF; }
-  const auto &copy_flags() const { return CopyF; }
-
-private:
-  SetVector<StringRef> SetF, UnsetF, CopyF;
-};
-
-/// Matches an instruction, e.g. `G_ADD $x, $y, $z`.
-class CodeGenInstructionPattern : public InstructionPattern {
-public:
-  CodeGenInstructionPattern(const CodeGenInstruction &I, StringRef Name)
-      : InstructionPattern(K_CodeGenInstruction, Name), I(I) {}
-
-  static bool classof(const Pattern *P) {
-    return P->getKind() == K_CodeGenInstruction;
-  }
-
-  bool is(StringRef OpcodeName) const {
-    return I.TheDef->getName() == OpcodeName;
-  }
-
-  bool hasVariadicDefs() const;
-  bool isVariadic() const override { return I.Operands.isVariadic; }
-  unsigned getNumInstDefs() const override;
-  unsigned getNumInstOperands() const override;
-
-  MIFlagsInfo &getOrCreateMIFlagsInfo();
-  const MIFlagsInfo *getMIFlagsInfo() const { return FI.get(); }
-
-  const CodeGenInstruction &getInst() const { return I; }
-  StringRef getInstName() const override { return I.TheDef->getName(); }
-
-private:
-  void printExtras(raw_ostream &OS) const override;
-
-  const CodeGenInstruction &I;
-  std::unique_ptr<MIFlagsInfo> FI;
-};
-
-bool CodeGenInstructionPattern::hasVariadicDefs() const {
-  // Note: we cannot use variadicOpsAreDefs, it's not set for
-  // GenericInstructions.
-  if (!isVariadic())
-    return false;
-
-  if (I.variadicOpsAreDefs)
-    return true;
-
-  DagInit *OutOps = I.TheDef->getValueAsDag("OutOperandList");
-  if (OutOps->arg_empty())
-    return false;
-
-  auto *LastArgTy = dyn_cast<DefInit>(OutOps->getArg(OutOps->arg_size() - 1));
-  return LastArgTy && LastArgTy->getDef()->getName() == "variable_ops";
-}
-
-unsigned CodeGenInstructionPattern::getNumInstDefs() const {
-  if (!isVariadic() || !hasVariadicDefs())
-    return I.Operands.NumDefs;
-  unsigned NumOuts = I.Operands.size() - I.Operands.NumDefs;
-  assert(Operands.size() > NumOuts);
-  return std::max<unsigned>(I.Operands.NumDefs, Operands.size() - NumOuts);
-}
-
-unsigned CodeGenInstructionPattern::getNumInstOperands() const {
-  unsigned NumCGIOps = I.Operands.size();
-  return isVariadic() ? std::max<unsigned>(NumCGIOps, Operands.size())
-                      : NumCGIOps;
-}
-
-MIFlagsInfo &CodeGenInstructionPattern::getOrCreateMIFlagsInfo() {
-  if (!FI)
-    FI = std::make_unique<MIFlagsInfo>();
-  return *FI;
-}
-
-void CodeGenInstructionPattern::printExtras(raw_ostream &OS) const {
-  if (!FI)
-    return;
-
-  OS << " (MIFlags";
-  if (!FI->set_flags().empty())
-    OS << " (set " << join(FI->set_flags(), ", ") << ")";
-  if (!FI->unset_flags().empty())
-    OS << " (unset " << join(FI->unset_flags(), ", ") << ")";
-  if (!FI->copy_flags().empty())
-    OS << " (copy " << join(FI->copy_flags(), ", ") << ")";
-  OS << ')';
-}
-
-//===- OperandTypeChecker -------------------------------------------------===//
-
-/// This is a trivial type checker for all operands in a set of
-/// InstructionPatterns.
-///
-/// It infers the type of each operand, check it's consistent with the known
-/// type of the operand, and then sets all of the types in all operands in
-/// propagateTypes.
-///
-/// It also handles verifying correctness of special types.
-class OperandTypeChecker {
-public:
-  OperandTypeChecker(ArrayRef<SMLoc> DiagLoc) : DiagLoc(DiagLoc) {}
-
-  /// Step 1: Check each pattern one by one. All patterns that pass through here
-  /// are added to a common worklist so propagateTypes can access them.
-  bool check(InstructionPattern &P,
-             std::function<bool(const PatternType &)> VerifyTypeOfOperand);
-
-  /// Step 2: Propagate all types. e.g. if one use of "$a" has type i32, make
-  /// all uses of "$a" have type i32.
-  void propagateTypes();
-
-protected:
-  ArrayRef<SMLoc> DiagLoc;
-
-private:
-  using InconsistentTypeDiagFn = std::function<void()>;
-
-  void PrintSeenWithTypeIn(InstructionPattern &P, StringRef OpName,
-                           PatternType Ty) const {
-    PrintNote(DiagLoc, "'" + OpName + "' seen with type '" + Ty.str() +
-                           "' in '" + P.getName() + "'");
-  }
-
-  struct OpTypeInfo {
-    PatternType Type;
-    InconsistentTypeDiagFn PrintTypeSrcNote = []() {};
-  };
-
-  StringMap<OpTypeInfo> Types;
-
-  SmallVector<InstructionPattern *, 16> Pats;
-};
-
-bool OperandTypeChecker::check(
-    InstructionPattern &P,
-    std::function<bool(const PatternType &)> VerifyTypeOfOperand) {
-  Pats.push_back(&P);
+//===- CodeExpansions Helpers  --------------------------------------------===//
 
-  for (auto &Op : P.operands()) {
-    const auto Ty = Op.getType();
-    if (!Ty)
-      continue;
-
-    if (Ty.isTypeOf() && !VerifyTypeOfOperand(Ty))
-      return false;
-
-    if (!Op.isNamedOperand())
-      continue;
-
-    StringRef OpName = Op.getOperandName();
-    auto &Info = Types[OpName];
-    if (!Info.Type) {
-      Info.Type = Ty;
-      Info.PrintTypeSrcNote = [this, OpName, Ty, &P]() {
-        PrintSeenWithTypeIn(P, OpName, Ty);
-      };
-      continue;
-    }
-
-    if (Info.Type != Ty) {
-      PrintError(DiagLoc, "conflicting types for operand '" +
-                              Op.getOperandName() + "': '" + Info.Type.str() +
-                              "' vs '" + Ty.str() + "'");
-      PrintSeenWithTypeIn(P, OpName, Ty);
-      Info.PrintTypeSrcNote();
-      return false;
-    }
-  }
-
-  return true;
-}
-
-void OperandTypeChecker::propagateTypes() {
-  for (auto *Pat : Pats) {
-    for (auto &Op : Pat->named_operands()) {
-      if (auto &Info = Types[Op.getOperandName()]; Info.Type)
-        Op.setType(Info.Type);
-    }
-  }
-}
-
-//===- PatFrag ------------------------------------------------------------===//
-
-/// Represents a parsed GICombinePatFrag. This can be thought of as the
-/// equivalent of a CodeGenInstruction, but for PatFragPatterns.
-///
-/// PatFrags are made of 3 things:
-///   - Out parameters (defs)
-///   - In parameters
-///   - A set of pattern lists (alternatives).
-///
-/// If the PatFrag uses instruction patterns, the root must be one of the defs.
-///
-/// Note that this DOES NOT represent the use of the PatFrag, only its
-/// definition. The use of the PatFrag in a Pattern is represented by
-/// PatFragPattern.
-///
-/// PatFrags use the term "parameter" instead of operand because they're
-/// essentially macros, and using that name avoids confusion. Other than that,
-/// they're structured similarly to a MachineInstruction  - all parameters
-/// (operands) are in the same list, with defs at the start. This helps mapping
-/// parameters to values, because, param N of a PatFrag is always operand N of a
-/// PatFragPattern.
-class PatFrag {
-public:
-  enum ParamKind {
-    PK_Root,
-    PK_MachineOperand,
-    PK_Imm,
-  };
-
-  struct Param {
-    StringRef Name;
-    ParamKind Kind;
-  };
-
-  using ParamVec = SmallVector<Param, 4>;
-  using ParamIt = ParamVec::const_iterator;
-
-  /// Represents an alternative of the PatFrag. When parsing a GICombinePatFrag,
-  /// this is created from its "Alternatives" list. Each alternative is a list
-  /// of patterns written wrapped in a  `(pattern ...)` dag init.
-  ///
-  /// Each argument to the `pattern` DAG operator is parsed into a Pattern
-  /// instance.
-  struct Alternative {
-    OperandTable OpTable;
-    SmallVector<std::unique_ptr<Pattern>, 4> Pats;
-  };
-
-  explicit PatFrag(const Record &Def) : Def(Def) {
-    assert(Def.isSubClassOf(PatFragClassName));
-  }
-
-  static StringRef getParamKindStr(ParamKind OK);
-
-  StringRef getName() const { return Def.getName(); }
-
-  const Record &getDef() const { return Def; }
-  ArrayRef<SMLoc> getLoc() const { return Def.getLoc(); }
-
-  Alternative &addAlternative() { return Alts.emplace_back(); }
-  const Alternative &getAlternative(unsigned K) const { return Alts[K]; }
-  unsigned num_alternatives() const { return Alts.size(); }
-
-  void addInParam(StringRef Name, ParamKind Kind);
-  iterator_range<ParamIt> in_params() const;
-  unsigned num_in_params() const { return Params.size() - NumOutParams; }
-
-  void addOutParam(StringRef Name, ParamKind Kind);
-  iterator_range<ParamIt> out_params() const;
-  unsigned num_out_params() const { return NumOutParams; }
-
-  unsigned num_roots() const;
-  unsigned num_params() const { return num_in_params() + num_out_params(); }
-
-  /// Finds the operand \p Name and returns its index or -1 if not found.
-  /// Remember that all params are part of the same list, with out params at the
-  /// start. This means that the index returned can be used to access operands
-  /// of InstructionPatterns.
-  unsigned getParamIdx(StringRef Name) const;
-  const Param &getParam(unsigned K) const { return Params[K]; }
-
-  bool canBeMatchRoot() const { return num_roots() == 1; }
-
-  void print(raw_ostream &OS, StringRef Indent = "") const;
-  void dump() const { print(dbgs()); }
-
-  /// Checks if the in-param \p ParamName can be unbound or not.
-  /// \p ArgName is the name of the argument passed to the PatFrag.
-  ///
-  /// An argument can be unbound only if, for all alternatives:
-  ///   - There is no CXX pattern, OR:
-  ///   - There is an InstructionPattern that binds the parameter.
-  ///
-  /// e.g. in (MyPatFrag $foo), if $foo has never been seen before (= it's
-  /// unbound), this checks if MyPatFrag supports it or not.
-  bool handleUnboundInParam(StringRef ParamName, StringRef ArgName,
-                            ArrayRef<SMLoc> DiagLoc) const;
-
-  bool checkSemantics();
-  bool buildOperandsTables();
-
-private:
-  static void printParamsList(raw_ostream &OS, iterator_range<ParamIt> Params);
-
-  void PrintError(Twine Msg) const { ::PrintError(&Def, Msg); }
-
-  const Record &Def;
-  unsigned NumOutParams = 0;
-  ParamVec Params;
-  SmallVector<Alternative, 2> Alts;
-};
-
-StringRef PatFrag::getParamKindStr(ParamKind OK) {
-  switch (OK) {
-  case PK_Root:
-    return "root";
-  case PK_MachineOperand:
-    return "machine_operand";
-  case PK_Imm:
-    return "imm";
-  }
-
-  llvm_unreachable("Unknown operand kind!");
-}
-
-void PatFrag::addInParam(StringRef Name, ParamKind Kind) {
-  Params.emplace_back(Param{insertStrRef(Name), Kind});
-}
-
-iterator_range<PatFrag::ParamIt> PatFrag::in_params() const {
-  return {Params.begin() + NumOutParams, Params.end()};
-}
-
-void PatFrag::addOutParam(StringRef Name, ParamKind Kind) {
-  assert(NumOutParams == Params.size() &&
-         "Adding out-param after an in-param!");
-  Params.emplace_back(Param{insertStrRef(Name), Kind});
-  ++NumOutParams;
-}
-
-iterator_range<PatFrag::ParamIt> PatFrag::out_params() const {
-  return {Params.begin(), Params.begin() + NumOutParams};
-}
-
-unsigned PatFrag::num_roots() const {
-  return count_if(out_params(),
-                  [&](const auto &P) { return P.Kind == PK_Root; });
-}
-
-unsigned PatFrag::getParamIdx(StringRef Name) const {
-  for (const auto &[Idx, Op] : enumerate(Params)) {
-    if (Op.Name == Name)
-      return Idx;
-  }
-
-  return -1;
+void declareInstExpansion(CodeExpansions &CE, const InstructionMatcher &IM,
+                          StringRef Name) {
+  CE.declare(Name, "State.MIs[" + to_string(IM.getInsnVarID()) + "]");
 }
 
-bool PatFrag::checkSemantics() {
-  for (const auto &Alt : Alts) {
-    for (const auto &Pat : Alt.Pats) {
-      switch (Pat->getKind()) {
-      case Pattern::K_AnyOpcode:
-        PrintError("wip_match_opcode cannot be used in " + PatFragClassName);
-        return false;
-      case Pattern::K_Builtin:
-        PrintError("Builtin instructions cannot be used in " +
-                   PatFragClassName);
-        return false;
-      case Pattern::K_CXX:
-        continue;
-      case Pattern::K_CodeGenInstruction:
-        if (cast<CodeGenInstructionPattern>(Pat.get())->diagnoseAllSpecialTypes(
-                Def.getLoc(), SpecialTyClassName + " is not supported in " +
-                                  PatFragClassName))
-          return false;
-        continue;
-      case Pattern::K_PatFrag:
-        // TODO: It's just that the emitter doesn't handle it but technically
-        // there is no reason why we can't. We just have to be careful with
-        // operand mappings, it could get complex.
-        PrintError("nested " + PatFragClassName + " are not supported");
-        return false;
-      }
-    }
-  }
-
-  StringSet<> SeenOps;
-  for (const auto &Op : in_params()) {
-    if (SeenOps.count(Op.Name)) {
-      PrintError("duplicate parameter '" + Op.Name + "'");
-      return false;
-    }
-
-    // Check this operand is NOT defined in any alternative's patterns.
-    for (const auto &Alt : Alts) {
-      if (Alt.OpTable.lookup(Op.Name).Def) {
-        PrintError("input parameter '" + Op.Name + "' cannot be redefined!");
-        return false;
-      }
-    }
-
-    if (Op.Kind == PK_Root) {
-      PrintError("input parameterr '" + Op.Name + "' cannot be a root!");
-      return false;
-    }
-
-    SeenOps.insert(Op.Name);
-  }
-
-  for (const auto &Op : out_params()) {
-    if (Op.Kind != PK_Root && Op.Kind != PK_MachineOperand) {
-      PrintError("output parameter '" + Op.Name +
-                 "' must be 'root' or 'gi_mo'");
-      return false;
-    }
-
-    if (SeenOps.count(Op.Name)) {
-      PrintError("duplicate parameter '" + Op.Name + "'");
-      return false;
-    }
-
-    // Check this operand is defined in all alternative's patterns.
-    for (const auto &Alt : Alts) {
-      const auto *OpDef = Alt.OpTable.getDef(Op.Name);
-      if (!OpDef) {
-        PrintError("output parameter '" + Op.Name +
-                   "' must be defined by all alternative patterns in '" +
-                   Def.getName() + "'");
-        return false;
-      }
-
-      if (Op.Kind == PK_Root && OpDef->getNumInstDefs() != 1) {
-        // The instruction that defines the root must have a single def.
-        // Otherwise we'd need to support multiple roots and it gets messy.
-        //
-        // e.g. this is not supported:
-        //   (pattern (G_UNMERGE_VALUES $x, $root, $vec))
-        PrintError("all instructions that define root '" + Op.Name + "' in '" +
-                   Def.getName() + "' can only have a single output operand");
-        return false;
-      }
-    }
-
-    SeenOps.insert(Op.Name);
-  }
-
-  if (num_out_params() != 0 && num_roots() == 0) {
-    PrintError(PatFragClassName + " must have one root in its 'out' operands");
-    return false;
-  }
-
-  if (num_roots() > 1) {
-    PrintError(PatFragClassName + " can only have one root");
-    return false;
-  }
-
-  // TODO: find unused params
-
-  const auto CheckTypeOf = [&](const PatternType &) -> bool {
-    llvm_unreachable("GITypeOf should have been rejected earlier!");
-  };
-
-  // Now, typecheck all alternatives.
-  for (auto &Alt : Alts) {
-    OperandTypeChecker OTC(Def.getLoc());
-    for (auto &Pat : Alt.Pats) {
-      if (auto *IP = dyn_cast<InstructionPattern>(Pat.get())) {
-        if (!OTC.check(*IP, CheckTypeOf))
-          return false;
-      }
-    }
-    OTC.propagateTypes();
-  }
-
-  return true;
+void declareInstExpansion(CodeExpansions &CE, const BuildMIAction &A,
+                          StringRef Name) {
+  // Note: we use redeclare here because this may overwrite a matcher inst
+  // expansion.
+  CE.redeclare(Name, "OutMIs[" + to_string(A.getInsnID()) + "]");
 }
 
-bool PatFrag::handleUnboundInParam(StringRef ParamName, StringRef ArgName,
-                                   ArrayRef<SMLoc> DiagLoc) const {
-  // The parameter must be a live-in of all alternatives for this to work.
-  // Otherwise, we risk having unbound parameters being used (= crashes).
-  //
-  // Examples:
-  //
-  // in (ins $y), (patterns (G_FNEG $dst, $y), "return matchFnegOp(${y})")
-  //    even if $y is unbound, we'll lazily bind it when emitting the G_FNEG.
-  //
-  // in (ins $y), (patterns "return matchFnegOp(${y})")
-  //    if $y is unbound when this fragment is emitted, C++ code expansion will
-  //    fail.
-  for (const auto &Alt : Alts) {
-    auto &OT = Alt.OpTable;
-    if (!OT.lookup(ParamName).Found) {
-      ::PrintError(DiagLoc, "operand '" + ArgName + "' (for parameter '" +
-                                ParamName + "' of '" + getName() +
-                                "') cannot be unbound");
-      PrintNote(
-          DiagLoc,
-          "one or more alternatives of '" + getName() + "' do not bind '" +
-              ParamName +
-              "' to an instruction operand; either use a bound operand or "
-              "ensure '" +
-              Def.getName() + "' binds '" + ParamName +
-              "' in all alternatives");
-      return false;
-    }
-  }
-
-  return true;
+void declareOperandExpansion(CodeExpansions &CE, const OperandMatcher &OM,
+                             StringRef Name) {
+  CE.declare(Name, "State.MIs[" + to_string(OM.getInsnVarID()) +
+                       "]->getOperand(" + to_string(OM.getOpIdx()) + ")");
 }
 
-bool PatFrag::buildOperandsTables() {
-  // enumerate(...) doesn't seem to allow lvalues so we need to count the old
-  // way.
-  unsigned Idx = 0;
-
-  const auto DiagnoseRedef = [this, &Idx](StringRef OpName) {
-    PrintError("Operand '" + OpName +
-               "' is defined multiple times in patterns of alternative #" +
-               to_string(Idx));
-  };
-
-  for (auto &Alt : Alts) {
-    for (auto &Pat : Alt.Pats) {
-      auto *IP = dyn_cast<InstructionPattern>(Pat.get());
-      if (!IP)
-        continue;
-
-      if (!Alt.OpTable.addPattern(IP, DiagnoseRedef))
-        return false;
-    }
-
-    ++Idx;
-  }
-
-  return true;
+void declareTempRegExpansion(CodeExpansions &CE, unsigned TempRegID,
+                             StringRef Name) {
+  CE.declare(Name, "State.TempRegisters[" + to_string(TempRegID) + "]");
 }
 
-void PatFrag::print(raw_ostream &OS, StringRef Indent) const {
-  OS << Indent << "(PatFrag name:" << getName() << '\n';
-  if (!in_params().empty()) {
-    OS << Indent << "  (ins ";
-    printParamsList(OS, in_params());
-    OS << ")\n";
-  }
-
-  if (!out_params().empty()) {
-    OS << Indent << "  (outs ";
-    printParamsList(OS, out_params());
-    OS << ")\n";
-  }
-
-  // TODO: Dump OperandTable as well.
-  OS << Indent << "  (alternatives [\n";
-  for (const auto &Alt : Alts) {
-    OS << Indent << "    [\n";
-    for (const auto &Pat : Alt.Pats) {
-      OS << Indent << "      ";
-      Pat->print(OS, /*PrintName=*/true);
-      OS << ",\n";
-    }
-    OS << Indent << "    ],\n";
-  }
-  OS << Indent << "  ])\n";
+//===- Misc. Helpers  -----------------------------------------------------===//
 
-  OS << Indent << ')';
-}
+/// Copies a StringRef into a static pool to preserve it.
+/// Most Pattern classes use StringRef so we need this.
+StringRef insertStrRef(StringRef S) {
+  if (S.empty())
+    return {};
 
-void PatFrag::printParamsList(raw_ostream &OS, iterator_range<ParamIt> Params) {
-  OS << '['
-     << join(map_range(Params,
-                       [](auto &O) {
-                         return (O.Name + ":" + getParamKindStr(O.Kind)).str();
-                       }),
-             ", ")
-     << ']';
+  static StringSet<> Pool;
+  auto [It, Inserted] = Pool.insert(S);
+  return It->getKey();
 }
 
-//===- PatFragPattern -----------------------------------------------------===//
-
-class PatFragPattern : public InstructionPattern {
-public:
-  PatFragPattern(const PatFrag &PF, StringRef Name)
-      : InstructionPattern(K_PatFrag, Name), PF(PF) {}
-
-  static bool classof(const Pattern *P) { return P->getKind() == K_PatFrag; }
-
-  const PatFrag &getPatFrag() const { return PF; }
-  StringRef getInstName() const override { return PF.getName(); }
-
-  unsigned getNumInstDefs() const override { return PF.num_out_params(); }
-  unsigned getNumInstOperands() const override { return PF.num_params(); }
-
-  ArrayRef<InstructionOperand> getApplyDefsNeeded() const override;
-
-  bool checkSemantics(ArrayRef<SMLoc> DiagLoc) override;
-
-  /// Before emitting the patterns inside the PatFrag, add all necessary code
-  /// expansions to \p PatFragCEs imported from \p ParentCEs.
-  ///
-  /// For a MachineOperand PatFrag parameter, this will fetch the expansion for
-  /// that operand from \p ParentCEs and add it to \p PatFragCEs. Errors can be
-  /// emitted if the MachineOperand reference is unbound.
-  ///
-  /// For an Immediate PatFrag parameter this simply adds the integer value to
-  /// \p PatFragCEs as an expansion.
-  ///
-  /// \param ParentCEs Contains all of the code expansions declared by the other
-  ///                  patterns emitted so far in the pattern list containing
-  ///                  this PatFragPattern.
-  /// \param PatFragCEs Output Code Expansions (usually empty)
-  /// \param DiagLoc    Diagnostic loc in case an error occurs.
-  /// \return `true` on success, `false` on failure.
-  bool mapInputCodeExpansions(const CodeExpansions &ParentCEs,
-                              CodeExpansions &PatFragCEs,
-                              ArrayRef<SMLoc> DiagLoc) const;
-
-private:
-  const PatFrag &PF;
-};
-
-ArrayRef<InstructionOperand> PatFragPattern::getApplyDefsNeeded() const {
-  assert(PF.num_roots() == 1);
-  // Only roots need to be redef.
-  for (auto [Idx, Param] : enumerate(PF.out_params())) {
-    if (Param.Kind == PatFrag::PK_Root)
-      return getOperand(Idx);
-  }
-  llvm_unreachable("root not found!");
+template <typename Container> auto keys(Container &&C) {
+  return map_range(C, [](auto &Entry) -> auto & { return Entry.first; });
 }
 
-bool PatFragPattern::checkSemantics(ArrayRef<SMLoc> DiagLoc) {
-  if (!InstructionPattern::checkSemantics(DiagLoc))
-    return false;
-
-  for (const auto &[Idx, Op] : enumerate(Operands)) {
-    switch (PF.getParam(Idx).Kind) {
-    case PatFrag::PK_Imm:
-      if (!Op.hasImmValue()) {
-        PrintError(DiagLoc, "expected operand " + to_string(Idx) + " of '" +
-                                getInstName() + "' to be an immediate; got " +
-                                Op.describe());
-        return false;
-      }
-      if (Op.isNamedImmediate()) {
-        PrintError(DiagLoc, "operand " + to_string(Idx) + " of '" +
-                                getInstName() +
-                                "' cannot be a named immediate");
-        return false;
-      }
-      break;
-    case PatFrag::PK_Root:
-    case PatFrag::PK_MachineOperand:
-      if (!Op.isNamedOperand() || Op.isNamedImmediate()) {
-        PrintError(DiagLoc, "expected operand " + to_string(Idx) + " of '" +
-                                getInstName() +
-                                "' to be a MachineOperand; got " +
-                                Op.describe());
-        return false;
-      }
-      break;
-    }
-  }
-
-  return true;
+template <typename Container> auto values(Container &&C) {
+  return map_range(C, [](auto &Entry) -> auto & { return Entry.second; });
 }
 
-bool PatFragPattern::mapInputCodeExpansions(const CodeExpansions &ParentCEs,
-                                            CodeExpansions &PatFragCEs,
-                                            ArrayRef<SMLoc> DiagLoc) const {
-  for (const auto &[Idx, Op] : enumerate(operands())) {
-    StringRef ParamName = PF.getParam(Idx).Name;
-
-    // Operands to a PFP can only be named, or be an immediate, but not a named
-    // immediate.
-    assert(!Op.isNamedImmediate());
-
-    if (Op.isNamedOperand()) {
-      StringRef ArgName = Op.getOperandName();
-      // Map it only if it's been defined.
-      auto It = ParentCEs.find(ArgName);
-      if (It == ParentCEs.end()) {
-        if (!PF.handleUnboundInParam(ParamName, ArgName, DiagLoc))
-          return false;
-      } else
-        PatFragCEs.declare(ParamName, It->second);
-      continue;
-    }
-
-    if (Op.hasImmValue()) {
-      PatFragCEs.declare(ParamName, to_string(Op.getImmValue()));
-      continue;
-    }
-
-    llvm_unreachable("Unknown Operand Type!");
-  }
-
-  return true;
+std::string getIsEnabledPredicateEnumName(unsigned CombinerRuleID) {
+  return "GICXXPred_Simple_IsRule" + to_string(CombinerRuleID) + "Enabled";
 }
 
-//===- BuiltinPattern -----------------------------------------------------===//
-
-enum BuiltinKind {
-  BI_ReplaceReg,
-  BI_EraseRoot,
-};
-
-class BuiltinPattern : public InstructionPattern {
-  struct BuiltinInfo {
-    StringLiteral DefName;
-    BuiltinKind Kind;
-    unsigned NumOps;
-    unsigned NumDefs;
-  };
-
-  static constexpr std::array<BuiltinInfo, 2> KnownBuiltins = {{
-      {"GIReplaceReg", BI_ReplaceReg, 2, 1},
-      {"GIEraseRoot", BI_EraseRoot, 0, 0},
-  }};
-
-public:
-  BuiltinPattern(const Record &Def, StringRef Name)
-      : InstructionPattern(K_Builtin, Name), I(getBuiltinInfo(Def)) {}
-
-  static bool classof(const Pattern *P) { return P->getKind() == K_Builtin; }
-
-  unsigned getNumInstOperands() const override { return I.NumOps; }
-  unsigned getNumInstDefs() const override { return I.NumDefs; }
-  StringRef getInstName() const override { return I.DefName; }
-  BuiltinKind getBuiltinKind() const { return I.Kind; }
+//===- MatchTable Helpers  ------------------------------------------------===//
 
-  bool checkSemantics(ArrayRef<SMLoc> Loc) override;
-
-private:
-  static BuiltinInfo getBuiltinInfo(const Record &Def);
-
-  BuiltinInfo I;
-};
-
-BuiltinPattern::BuiltinInfo BuiltinPattern::getBuiltinInfo(const Record &Def) {
-  assert(Def.isSubClassOf(BuiltinInstClassName));
-
-  StringRef Name = Def.getName();
-  for (const auto &KBI : KnownBuiltins) {
-    if (KBI.DefName == Name)
-      return KBI;
-  }
-
-  PrintFatalError(Def.getLoc(), "Unimplemented " + BuiltinInstClassName +
-                                    " def '" + Name + "'");
+LLTCodeGen getLLTCodeGen(const PatternType &PT) {
+  return *MVTToLLT(getValueType(PT.getLLTRecord()));
 }
 
-bool BuiltinPattern::checkSemantics(ArrayRef<SMLoc> Loc) {
-  if (!InstructionPattern::checkSemantics(Loc))
-    return false;
+LLTCodeGenOrTempType getLLTCodeGenOrTempType(const PatternType &PT,
+                                             RuleMatcher &RM) {
+  assert(!PT.isNone());
 
-  // For now all builtins just take names, no immediates.
-  for (const auto &[Idx, Op] : enumerate(operands())) {
-    if (!Op.isNamedOperand() || Op.isNamedImmediate()) {
-      PrintError(Loc, "expected operand " + to_string(Idx) + " of '" +
-                          getInstName() + "' to be a name");
-      return false;
-    }
-  }
+  if (PT.isLLT())
+    return getLLTCodeGen(PT);
 
-  return true;
+  assert(PT.isTypeOf());
+  auto &OM = RM.getOperandMatcher(PT.getTypeOfOpName());
+  return OM.getTempTypeIdx(RM);
 }
 
 //===- PrettyStackTrace Helpers  ------------------------------------------===//
@@ -1701,8 +160,8 @@ class PrettyStackTraceParse : public PrettyStackTraceEntry {
   void print(raw_ostream &OS) const override {
     if (Def.isSubClassOf("GICombineRule"))
       OS << "Parsing GICombineRule '" << Def.getName() << "'";
-    else if (Def.isSubClassOf(PatFragClassName))
-      OS << "Parsing " << PatFragClassName << " '" << Def.getName() << "'";
+    else if (Def.isSubClassOf(PatFrag::ClassName))
+      OS << "Parsing " << PatFrag::ClassName << " '" << Def.getName() << "'";
     else
       OS << "Parsing '" << Def.getName() << "'";
     OS << '\n';
@@ -1720,8 +179,8 @@ class PrettyStackTraceEmit : public PrettyStackTraceEntry {
   void print(raw_ostream &OS) const override {
     if (Def.isSubClassOf("GICombineRule"))
       OS << "Emitting GICombineRule '" << Def.getName() << "'";
-    else if (Def.isSubClassOf(PatFragClassName))
-      OS << "Emitting " << PatFragClassName << " '" << Def.getName() << "'";
+    else if (Def.isSubClassOf(PatFrag::ClassName))
+      OS << "Emitting " << PatFrag::ClassName << " '" << Def.getName() << "'";
     else
       OS << "Emitting '" << Def.getName() << "'";
 
@@ -2540,7 +999,7 @@ bool CombineRuleBuilder::addApplyPattern(std::unique_ptr<Pattern> Pat) {
   }
 
   if (isa<PatFragPattern>(Pat.get())) {
-    PrintError("'" + Name + "': using " + PatFragClassName +
+    PrintError("'" + Name + "': using " + PatFrag::ClassName +
                " is not supported in apply patterns");
     return false;
   }
@@ -2583,12 +1042,14 @@ void CombineRuleBuilder::addCXXPredicate(RuleMatcher &M,
   // FIXME: Hack so C++ code is executed last. May not work for more complex
   // patterns.
   auto &IM = *std::prev(M.insnmatchers().end());
+  auto Loc = RuleDef.getLoc();
+  const auto AddComment = [&](raw_ostream &OS) {
+    OS << "// Pattern Alternatives: ";
+    print(OS, Alts);
+    OS << '\n';
+  };
   const auto &ExpandedCode =
-      P.expandCode(CE, RuleDef.getLoc(), [&](raw_ostream &OS) {
-        OS << "// Pattern Alternatives: ";
-        print(OS, Alts);
-        OS << '\n';
-      });
+      DebugCXXPreds ? P.expandCode(CE, Loc, AddComment) : P.expandCode(CE, Loc);
   IM->addPredicate<GenericInstructionPredicateMatcher>(
       ExpandedCode.getEnumNameWithPrefix(CXXPredPrefix));
 }
@@ -2638,8 +1099,8 @@ bool CombineRuleBuilder::typecheckPatterns() {
   for (auto &Pat : values(MatchPats)) {
     if (auto *IP = dyn_cast<InstructionPattern>(Pat.get())) {
       if (IP->diagnoseAllSpecialTypes(
-              RuleDef.getLoc(),
-              SpecialTyClassName + " is not supported in 'match' patterns")) {
+              RuleDef.getLoc(), PatternType::SpecialTyClassName +
+                                    " is not supported in 'match' patterns")) {
         return false;
       }
     }
@@ -3053,7 +1514,7 @@ bool CombineRuleBuilder::parsePatternList(
     Init *Arg = List.getArg(I);
     std::string Name = List.getArgName(I)
                            ? List.getArgName(I)->getValue().str()
-                           : makeAnonPatName(AnonPatNamePrefix, I);
+                           : ("__" + AnonPatNamePrefix + "_" + Twine(I)).str();
 
     if (auto Pat = parseInstructionPattern(*Arg, Name)) {
       if (!ParseAction(std::move(Pat)))
@@ -3069,7 +1530,7 @@ bool CombineRuleBuilder::parsePatternList(
 
     // Parse arbitrary C++ code
     if (const auto *StringI = dyn_cast<StringInit>(Arg)) {
-      auto CXXPat = std::make_unique<CXXPattern>(*StringI, Name);
+      auto CXXPat = std::make_unique<CXXPattern>(*StringI, insertStrRef(Name));
       if (!ParseAction(std::move(CXXPat)))
         return false;
       continue;
@@ -3093,18 +1554,19 @@ CombineRuleBuilder::parseInstructionPattern(const Init &Arg,
   std::unique_ptr<InstructionPattern> Pat;
   if (const DagInit *IP = getDagWithOperatorOfSubClass(Arg, "Instruction")) {
     auto &Instr = CGT.getInstruction(IP->getOperatorAsDef(RuleDef.getLoc()));
-    Pat = std::make_unique<CodeGenInstructionPattern>(Instr, Name);
+    Pat =
+        std::make_unique<CodeGenInstructionPattern>(Instr, insertStrRef(Name));
   } else if (const DagInit *PFP =
-                 getDagWithOperatorOfSubClass(Arg, PatFragClassName)) {
+                 getDagWithOperatorOfSubClass(Arg, PatFrag::ClassName)) {
     const Record *Def = PFP->getOperatorAsDef(RuleDef.getLoc());
     const PatFrag *PF = parsePatFrag(Def);
     if (!PF)
       return nullptr; // Already diagnosed by parsePatFrag
-    Pat = std::make_unique<PatFragPattern>(*PF, Name);
+    Pat = std::make_unique<PatFragPattern>(*PF, insertStrRef(Name));
   } else if (const DagInit *BP =
-                 getDagWithOperatorOfSubClass(Arg, BuiltinInstClassName)) {
+                 getDagWithOperatorOfSubClass(Arg, BuiltinPattern::ClassName)) {
     Pat = std::make_unique<BuiltinPattern>(
-        *BP->getOperatorAsDef(RuleDef.getLoc()), Name);
+        *BP->getOperatorAsDef(RuleDef.getLoc()), insertStrRef(Name));
   } else {
     return nullptr;
   }
@@ -3140,7 +1602,7 @@ CombineRuleBuilder::parseWipMatchOpcodeMatcher(const Init &Arg,
   }
 
   // Each argument is an opcode that can match.
-  auto Result = std::make_unique<AnyOpcodePattern>(Name);
+  auto Result = std::make_unique<AnyOpcodePattern>(insertStrRef(Name));
   for (const auto &Arg : Matcher->getArgs()) {
     Record *OpcodeDef = getDefOfSubClass(*Arg, "Instruction");
     if (OpcodeDef) {
@@ -3168,7 +1630,7 @@ bool CombineRuleBuilder::parseInstructionPatternOperand(
   // untyped immediate, e.g. 0
   if (const auto *IntImm = dyn_cast<IntInit>(OpInit)) {
     std::string Name = OpName ? OpName->getAsUnquotedString() : "";
-    IP.addOperand(IntImm->getValue(), Name, PatternType());
+    IP.addOperand(IntImm->getValue(), insertStrRef(Name), PatternType());
     return true;
   }
 
@@ -3195,7 +1657,7 @@ bool CombineRuleBuilder::parseInstructionPatternOperand(
       return ParseErr();
 
     std::string Name = OpName ? OpName->getAsUnquotedString() : "";
-    IP.addOperand(Val->getValue(), Name, *ImmTy);
+    IP.addOperand(Val->getValue(), insertStrRef(Name), *ImmTy);
     return true;
   }
 
@@ -3211,14 +1673,14 @@ bool CombineRuleBuilder::parseInstructionPatternOperand(
         PatternType::get(RuleDef.getLoc(), Def, "cannot parse operand type");
     if (!Ty)
       return false;
-    IP.addOperand(OpName->getAsUnquotedString(), *Ty);
+    IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), *Ty);
     return true;
   }
 
   // Untyped operand e.g. $x/$z in (G_FNEG $x, $z)
   if (isa<UnsetInit>(OpInit)) {
     assert(OpName && "Unset w/ no OpName?");
-    IP.addOperand(OpName->getAsUnquotedString(), PatternType());
+    IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), PatternType());
     return true;
   }
 
@@ -3286,7 +1748,7 @@ bool CombineRuleBuilder::parseInstructionPatternMIFlags(
 
     // Copy flags from a matched instruction: (MIFlags $mi)
     if (isa<UnsetInit>(Arg)) {
-      FI.addCopyFlag(Op->getArgName(K)->getAsUnquotedString());
+      FI.addCopyFlag(insertStrRef(Op->getArgName(K)->getAsUnquotedString()));
       continue;
     }
   }
@@ -3297,19 +1759,19 @@ bool CombineRuleBuilder::parseInstructionPatternMIFlags(
 std::unique_ptr<PatFrag>
 CombineRuleBuilder::parsePatFragImpl(const Record *Def) const {
   auto StackTrace = PrettyStackTraceParse(*Def);
-  if (!Def->isSubClassOf(PatFragClassName))
+  if (!Def->isSubClassOf(PatFrag::ClassName))
     return nullptr;
 
   const DagInit *Ins = Def->getValueAsDag("InOperands");
   if (Ins->getOperatorAsDef(Def->getLoc())->getName() != "ins") {
-    ::PrintError(Def, "expected 'ins' operator for " + PatFragClassName +
+    ::PrintError(Def, "expected 'ins' operator for " + PatFrag::ClassName +
                           " in operands list");
     return nullptr;
   }
 
   const DagInit *Outs = Def->getValueAsDag("OutOperands");
   if (Outs->getOperatorAsDef(Def->getLoc())->getName() != "outs") {
-    ::PrintError(Def, "expected 'outs' operator for " + PatFragClassName +
+    ::PrintError(Def, "expected 'outs' operator for " + PatFrag::ClassName +
                           " out operands list");
     return nullptr;
   }
@@ -3317,14 +1779,14 @@ CombineRuleBuilder::parsePatFragImpl(const Record *Def) const {
   auto Result = std::make_unique<PatFrag>(*Def);
   if (!parsePatFragParamList(Def->getLoc(), *Outs,
                              [&](StringRef Name, PatFrag::ParamKind Kind) {
-                               Result->addOutParam(Name, Kind);
+                               Result->addOutParam(insertStrRef(Name), Kind);
                                return true;
                              }))
     return nullptr;
 
   if (!parsePatFragParamList(Def->getLoc(), *Ins,
                              [&](StringRef Name, PatFrag::ParamKind Kind) {
-                               Result->addInParam(Name, Kind);
+                               Result->addInParam(insertStrRef(Name), Kind);
                                return true;
                              }))
     return nullptr;
@@ -3405,7 +1867,7 @@ const PatFrag *CombineRuleBuilder::parsePatFrag(const Record *Def) const {
 
   std::unique_ptr<PatFrag> NewPatFrag = parsePatFragImpl(Def);
   if (!NewPatFrag) {
-    ::PrintError(Def, "Could not parse " + PatFragClassName + " '" +
+    ::PrintError(Def, "Could not parse " + PatFrag::ClassName + " '" +
                           Def->getName() + "'");
     // Put a nullptr in the map so we don't attempt parsing this again.
     ParsedPatFrags[Def] = nullptr;
@@ -3593,7 +2055,8 @@ bool CombineRuleBuilder::emitPatFragMatchPattern(
     if (PIdx == (unsigned)-1) {
       // This is a temp of the PatFragPattern, prefix the name to avoid
       // conflicts.
-      return O.withNewName((PFP.getName() + "." + ParamName).str());
+      return O.withNewName(
+          insertStrRef((PFP.getName() + "." + ParamName).str()));
     }
 
     // The operand will be added to PatFragCEs's code expansions using the
@@ -3850,7 +2313,7 @@ bool CombineRuleBuilder::emitInstructionApplyPattern(
       // Always insert the action at the beginning, otherwise we may end up
       // using the temp reg before it's available.
       M.insertAction<MakeTempRegisterAction>(
-          M.actions_begin(), Ty.getLLTCodeGenOrTempType(M), TempRegID);
+          M.actions_begin(), getLLTCodeGenOrTempType(Ty, M), TempRegID);
     }
 
     DstMI.addRenderer<TempRegRenderer>(TempRegID);
@@ -3901,7 +2364,7 @@ bool CombineRuleBuilder::emitCodeGenInstructionApplyImmOperand(
     return true;
   }
 
-  auto ImmTy = Ty.getLLTCodeGenOrTempType(M);
+  auto ImmTy = getLLTCodeGenOrTempType(Ty, M);
 
   if (isGConstant) {
     DstMI.addRenderer<ImmRenderer>(O.getImmValue(), ImmTy);
@@ -4045,7 +2508,7 @@ bool CombineRuleBuilder::emitCodeGenInstructionMatchPattern(
         // and isn't all that useful so it's just rejected in typecheckPatterns
         // at this time.
         assert(Ty.isLLT() && "Only LLTs are supported in match patterns!");
-        OM.addPredicate<LLTOperandMatcher>(Ty.getLLTCodeGen());
+        OM.addPredicate<LLTOperandMatcher>(getLLTCodeGen(Ty));
       }
     }
 


        


More information about the llvm-commits mailing list