[llvm] fdcb256 - [TableGen] X86 mnemonic tables backend

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 18 01:44:03 PDT 2022


Author: Amir Ayupov
Date: 2022-03-18T01:43:53-07:00
New Revision: fdcb256f9796de1393763a66c3b38a5859fb6b70

URL: https://github.com/llvm/llvm-project/commit/fdcb256f9796de1393763a66c3b38a5859fb6b70
DIFF: https://github.com/llvm/llvm-project/commit/fdcb256f9796de1393763a66c3b38a5859fb6b70.diff

LOG: [TableGen] X86 mnemonic tables backend

Add tablegen backend that generates X86 mnemonic-based opcode groupings, e.g.
`isADD`, `isTEST`, etc.

Addresses https://lists.llvm.org/pipermail/llvm-dev/2022-January/154526.html

Reviewed By: skan

Differential Revision: https://reviews.llvm.org/D121571

Added: 
    llvm/utils/TableGen/X86MnemonicTables.cpp

Modified: 
    llvm/utils/TableGen/CMakeLists.txt
    llvm/utils/TableGen/TableGen.cpp
    llvm/utils/TableGen/TableGenBackends.h
    llvm/utils/TableGen/X86RecognizableInstr.h

Removed: 
    


################################################################################
diff  --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt
index 339692bcd6512..87f86dc5b725a 100644
--- a/llvm/utils/TableGen/CMakeLists.txt
+++ b/llvm/utils/TableGen/CMakeLists.txt
@@ -53,6 +53,7 @@ add_tablegen(llvm-tblgen LLVM
   X86DisassemblerTables.cpp
   X86EVEX2VEXTablesEmitter.cpp
   X86FoldTablesEmitter.cpp
+  X86MnemonicTables.cpp
   X86ModRMFilters.cpp
   X86RecognizableInstr.cpp
   WebAssemblyDisassemblerEmitter.cpp

diff  --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp
index 2d4a45f889be6..cde49919f54fe 100644
--- a/llvm/utils/TableGen/TableGen.cpp
+++ b/llvm/utils/TableGen/TableGen.cpp
@@ -52,6 +52,7 @@ enum ActionType {
   GenGICombiner,
   GenX86EVEX2VEXTables,
   GenX86FoldTables,
+  GenX86MnemonicTables,
   GenRegisterBank,
   GenExegesis,
   GenAutomata,
@@ -130,6 +131,8 @@ cl::opt<ActionType> Action(
                    "Generate X86 EVEX to VEX compress tables"),
         clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
                    "Generate X86 fold tables"),
+        clEnumValN(GenX86MnemonicTables, "gen-x86-mnemonic-tables",
+                   "Generate X86 mnemonic tables"),
         clEnumValN(GenRegisterBank, "gen-register-bank",
                    "Generate registers bank descriptions"),
         clEnumValN(GenExegesis, "gen-exegesis",
@@ -257,6 +260,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenX86EVEX2VEXTables:
     EmitX86EVEX2VEXTables(Records, OS);
     break;
+  case GenX86MnemonicTables:
+    EmitX86MnemonicTables(Records, OS);
+    break;
   case GenX86FoldTables:
     EmitX86FoldTables(Records, OS);
     break;

diff  --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h
index 71db8dc77b052..224efa98bae16 100644
--- a/llvm/utils/TableGen/TableGenBackends.h
+++ b/llvm/utils/TableGen/TableGenBackends.h
@@ -88,6 +88,7 @@ void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
 void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS);
 void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS);
+void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
 void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
 void EmitAutomata(RecordKeeper &RK, raw_ostream &OS);

diff  --git a/llvm/utils/TableGen/X86MnemonicTables.cpp b/llvm/utils/TableGen/X86MnemonicTables.cpp
new file mode 100644
index 0000000000000..8269e6b5b85c5
--- /dev/null
+++ b/llvm/utils/TableGen/X86MnemonicTables.cpp
@@ -0,0 +1,114 @@
+//==- X86MnemonicTables.cpp - Generate mnemonic extraction tables. -*- C++ -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting tables that group
+// instructions by their mnemonic name wrt AsmWriter Variant (e.g. isADD, etc).
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenInstruction.h"
+#include "CodeGenTarget.h"
+#include "X86DisassemblerTables.h"
+#include "X86RecognizableInstr.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/TableGenBackend.h"
+
+using namespace llvm;
+
+namespace {
+
+class X86MnemonicTablesEmitter {
+  CodeGenTarget Target;
+
+public:
+  X86MnemonicTablesEmitter(RecordKeeper &R) : Target(R) {}
+
+  // Output X86 mnemonic tables.
+  void run(raw_ostream &OS);
+};
+
+void X86MnemonicTablesEmitter::run(raw_ostream &OS) {
+  emitSourceFileHeader("X86 Mnemonic tables", OS);
+  OS << "namespace llvm {\nnamespace X86 {\n\n";
+  Record *AsmWriter = Target.getAsmWriter();
+  unsigned Variant = AsmWriter->getValueAsInt("Variant");
+
+  // Hold all instructions grouped by mnemonic
+  StringMap<SmallVector<const CodeGenInstruction *, 0>> MnemonicToCGInstrMap;
+
+  // Unused
+  X86Disassembler::DisassemblerTables Tables;
+  ArrayRef<const CodeGenInstruction *> NumberedInstructions =
+      Target.getInstructionsByEnumValue();
+  for (unsigned II = 0, IE = NumberedInstructions.size(); II != IE; ++II) {
+    const CodeGenInstruction *I = NumberedInstructions[II];
+    X86Disassembler::RecognizableInstr RI(Tables, *I, II);
+    Record *Def = I->TheDef;
+    bool IsCodeGenOnly = RI.IsCodeGenOnly;
+    bool ForceDisassemble = RI.ForceDisassemble;
+    uint8_t Form = RI.Form;
+    if ( // Filter non-X86 instructions
+        !Def->isSubClassOf("X86Inst") ||
+        // Skip pseudo instructions as they may contain non-alnum characters in
+        // mnemonic
+        (IsCodeGenOnly && !ForceDisassemble) ||
+        // Non-parsable instruction defs contain prefix as part of AsmString
+        Def->getValueAsString("AsmVariantName") == "NonParsable" ||
+        // Skip CodeGenInstructions that are not real standalone instructions
+        Form == X86Local::PrefixByte || Form == X86Local::Pseudo)
+      continue;
+    // Flatten an instruction assembly string.
+    std::string AsmString = I->FlattenAsmStringVariants(I->AsmString, Variant);
+    StringRef Mnemonic(AsmString);
+    // Extract a mnemonic assuming it's separated by \t
+    Mnemonic = Mnemonic.take_until([](char C) { return C == '\t'; });
+
+    // Special case: CMOVCC, JCC, SETCC have "${cond}" in mnemonic.
+    // Replace it with "CC" in-place.
+    size_t CondPos = Mnemonic.find("${cond}");
+    if (CondPos != StringRef::npos)
+      Mnemonic = AsmString.replace(CondPos, StringRef::npos, "CC");
+
+    // It's intentional that we put a std::string to the map (StringRef::upper
+    // returns a string) as AsmString is deallocated at the end of the iteration
+    MnemonicToCGInstrMap[Mnemonic.upper()].push_back(I);
+  }
+
+  OS << "#ifdef GET_X86_MNEMONIC_TABLES_H\n";
+  OS << "#undef GET_X86_MNEMONIC_TABLES_H\n\n";
+  for (StringRef Mnemonic : MnemonicToCGInstrMap.keys())
+    OS << "bool is" << Mnemonic << "(unsigned Opcode);\n";
+  OS << "#endif // GET_X86_MNEMONIC_TABLES_H\n\n";
+
+  OS << "#ifdef GET_X86_MNEMONIC_TABLES_CPP\n";
+  OS << "#undef GET_X86_MNEMONIC_TABLES_CPP\n\n";
+  for (StringRef Mnemonic : MnemonicToCGInstrMap.keys()) {
+    OS << "bool is" << Mnemonic << "(unsigned Opcode) {\n";
+    auto Mnemonics = MnemonicToCGInstrMap[Mnemonic];
+    if (Mnemonics.size() == 1) {
+      const CodeGenInstruction *CGI = *Mnemonics.begin();
+      OS << "\treturn Opcode == " << CGI->TheDef->getName() << ";\n}\n\n";
+    } else {
+      OS << "\tswitch (Opcode) {\n";
+      for (const CodeGenInstruction *CGI : Mnemonics) {
+        OS << "\tcase " << CGI->TheDef->getName() << ":\n";
+      }
+      OS << "\t\treturn true;\n\t}\n\treturn false;\n}\n\n";
+    }
+  }
+  OS << "#endif // GET_X86_MNEMONIC_TABLES_CPP\n\n";
+  OS << "} // end namespace X86\n} // end namespace llvm";
+}
+
+} // namespace
+
+namespace llvm {
+void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS) {
+  X86MnemonicTablesEmitter(RK).run(OS);
+}
+} // namespace llvm

diff  --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h
index 8f557d9ee5f51..debc6c516ab16 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -163,7 +163,7 @@ class DisassemblerTables;
 ///   to interpret the information available in the LLVM tables, and to emit the
 ///   instruction into DisassemblerTables.
 class RecognizableInstr {
-private:
+public:
   /// The opcode of the instruction, as used in an MCInst
   InstrUID UID;
   /// The record from the .td files corresponding to this instruction
@@ -232,6 +232,7 @@ class RecognizableInstr {
   /// info table
   InstructionSpecifier* Spec;
 
+private:
   /// insnContext - Returns the primary context in which the instruction is
   ///   valid.
   ///
@@ -339,6 +340,7 @@ class RecognizableInstr {
   ///               decode information for the current instruction.
   void emitDecodePath(DisassemblerTables &tables) const;
 
+public:
   /// Constructor - Initializes a RecognizableInstr with the appropriate fields
   ///   from a CodeGenInstruction.
   ///
@@ -348,7 +350,6 @@ class RecognizableInstr {
   RecognizableInstr(DisassemblerTables &tables,
                     const CodeGenInstruction &insn,
                     InstrUID uid);
-public:
   /// processInstr - Accepts a CodeGenInstruction and loads decode information
   ///   for it into a DisassemblerTables if appropriate.
   ///


        


More information about the llvm-commits mailing list