[llvm] r363753 - Re-commit r363744: [tblgen][disasm] Allow multiple encodings to disassemble to the same instruction

Tue Jun 18 16:34:47 PDT 2019

Author: dsanders
Date: Tue Jun 18 16:34:46 2019
New Revision: 363753

URL: http://llvm.org/viewvc/llvm-project?rev=363753&view=rev
Log:
Re-commit r363744: [tblgen][disasm] Allow multiple encodings to disassemble to the same instruction

It seems macOS lets you have ArrayRef<const X> even though this is apparently
forbidden by the language standard (Thanks MSVC++ for the clear error message).
Removed the problematic const's to fix this.

(It also seems I'm not receiving buildbot emails anymore and I'm trying to find
 out why. In the mean time I'll be polling lab.llvm.org to hopefully see if/when
 failures occur)

Modified:
    llvm/trunk/include/llvm/Target/Target.td
    llvm/trunk/utils/TableGen/FixedLenDecoderEmitter.cpp

Modified: llvm/trunk/include/llvm/Target/Target.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/Target.td?rev=363753&r1=363752&r2=363753&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Target/Target.td (original)
+++ llvm/trunk/include/llvm/Target/Target.td Tue Jun 18 16:34:46 2019
@@ -398,11 +398,49 @@ include "llvm/Target/TargetSchedule.td"
 
 class Predicate; // Forward def
 
+class InstructionEncoding {
+  // Size of encoded instruction.
+  int Size;
+
+  // The "namespace" in which this instruction exists, on targets like ARM
+  // which multiple ISA namespaces exist.
+  string DecoderNamespace = "";
+
+  // List of predicates which will be turned into isel matching code.
+  list<Predicate> Predicates = [];
+
+  string DecoderMethod = "";
+
+  // Is the instruction decoder method able to completely determine if the
+  // given instruction is valid or not. If the TableGen definition of the
+  // instruction specifies bitpattern A??B where A and B are static bits, the
+  // hasCompleteDecoder flag says whether the decoder method fully handles the
+  // ?? space, i.e. if it is a final arbiter for the instruction validity.
+  // If not then the decoder attempts to continue decoding when the decoder
+  // method fails.
+  //
+  // This allows to handle situations where the encoding is not fully
+  // orthogonal. Example:
+  // * InstA with bitpattern 0b0000????,
+  // * InstB with bitpattern 0b000000?? but the associated decoder method
+  //   DecodeInstB() returns Fail when ?? is 0b00 or 0b11.
+  //
+  // The decoder tries to decode a bitpattern that matches both InstA and
+  // InstB bitpatterns first as InstB (because it is the most specific
+  // encoding). In the default case (hasCompleteDecoder = 1), when
+  // DecodeInstB() returns Fail the bitpattern gets rejected. By setting
+  // hasCompleteDecoder = 0 in InstB, the decoder is informed that
+  // DecodeInstB() is not able to determine if all possible values of ?? are
+  // valid or not. If DecodeInstB() returns Fail the decoder will attempt to
+  // decode the bitpattern as InstA too.
+  bit hasCompleteDecoder = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // Instruction set description - These classes correspond to the C++ classes in
 // the Target/TargetInstrInfo.h file.
 //
-class Instruction {
+class Instruction : InstructionEncoding {
   string Namespace = "";
 
   dag OutOperandList;       // An dag containing the MI def operand list.
@@ -427,10 +465,6 @@ class Instruction {
   // from the opcode.
   int Size = 0;
 
-  // DecoderNamespace - The "namespace" in which this instruction exists, on
-  // targets like ARM which multiple ISA namespaces exist.
-  string DecoderNamespace = "";
-
   // Code size, for instruction selection.
   // FIXME: What does this actually mean?
   int CodeSize = 0;
@@ -532,31 +566,6 @@ class Instruction {
   string DisableEncoding = "";
 
   string PostEncoderMethod = "";
-  string DecoderMethod = "";
-
-  // Is the instruction decoder method able to completely determine if the
-  // given instruction is valid or not. If the TableGen definition of the
-  // instruction specifies bitpattern A??B where A and B are static bits, the
-  // hasCompleteDecoder flag says whether the decoder method fully handles the
-  // ?? space, i.e. if it is a final arbiter for the instruction validity.
-  // If not then the decoder attempts to continue decoding when the decoder
-  // method fails.
-  //
-  // This allows to handle situations where the encoding is not fully
-  // orthogonal. Example:
-  // * InstA with bitpattern 0b0000????,
-  // * InstB with bitpattern 0b000000?? but the associated decoder method
-  //   DecodeInstB() returns Fail when ?? is 0b00 or 0b11.
-  //
-  // The decoder tries to decode a bitpattern that matches both InstA and
-  // InstB bitpatterns first as InstB (because it is the most specific
-  // encoding). In the default case (hasCompleteDecoder = 1), when
-  // DecodeInstB() returns Fail the bitpattern gets rejected. By setting
-  // hasCompleteDecoder = 0 in InstB, the decoder is informed that
-  // DecodeInstB() is not able to determine if all possible values of ?? are
-  // valid or not. If DecodeInstB() returns Fail the decoder will attempt to
-  // decode the bitpattern as InstA too.
-  bit hasCompleteDecoder = 1;
 
   /// Target-specific flags. This becomes the TSFlags field in TargetInstrDesc.
   bits<64> TSFlags = 0;
@@ -593,6 +602,13 @@ class Instruction {
   bit FastISelShouldIgnore = 0;
 }
 
+/// Defines an additional encoding that disassembles to the given instruction
+/// Like Instruction, the Inst and SoftFail fields are omitted to allow targets
+// to specify their size.
+class AdditionalEncoding<Instruction I> : InstructionEncoding {
+  Instruction AliasOf = I;
+}
+
 /// PseudoInstExpansion - Expansion information for a pseudo-instruction.
 /// Which instruction it expands to and how the operands map from the
 /// pseudo.

Modified: llvm/trunk/utils/TableGen/FixedLenDecoderEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/FixedLenDecoderEmitter.cpp?rev=363753&r1=363752&r2=363753&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/FixedLenDecoderEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/FixedLenDecoderEmitter.cpp Tue Jun 18 16:34:46 2019
@@ -16,9 +16,10 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/CachedHashString.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
@@ -47,6 +48,12 @@ using namespace llvm;
 
 namespace {
 
+STATISTIC(NumEncodings, "Number of encodings considered");
+STATISTIC(NumEncodingsLackingDisasm, "Number of encodings without disassembler info");
+STATISTIC(NumInstructions, "Number of instructions considered");
+STATISTIC(NumEncodingsSupported, "Number of encodings supported");
+STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
+
 struct EncodingField {
   unsigned Base, Width, Offset;
   EncodingField(unsigned B, unsigned W, unsigned O)
@@ -94,6 +101,15 @@ struct EncodingAndInst {
       : EncodingDef(EncodingDef), Inst(Inst) {}
 };
 
+struct EncodingIDAndOpcode {
+  unsigned EncodingID;
+  unsigned Opcode;
+
+  EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {}
+  EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode)
+      : EncodingID(EncodingID), Opcode(Opcode) {}
+};
+
 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
   if (Value.EncodingDef != Value.Inst->TheDef)
     OS << Value.EncodingDef->getName() << ":";
@@ -102,6 +118,7 @@ raw_ostream &operator<<(raw_ostream &OS,
 }
 
 class FixedLenDecoderEmitter {
+  RecordKeeper &RK;
   std::vector<EncodingAndInst> NumberedEncodings;
 
 public:
@@ -113,7 +130,7 @@ public:
                          std::string ROK = "MCDisassembler::Success",
                          std::string RFail = "MCDisassembler::Fail",
                          std::string L = "")
-      : Target(R), PredicateNamespace(std::move(PredicateNamespace)),
+      : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)),
         GuardPrefix(std::move(GPrefix)), GuardPostfix(std::move(GPostfix)),
         ReturnOK(std::move(ROK)), ReturnFail(std::move(RFail)),
         Locals(std::move(L)) {}
@@ -251,10 +268,11 @@ protected:
   bool Mixed; // a mixed region contains both set and unset bits
 
   // Map of well-known segment value to the set of uid's with that value.
-  std::map<uint64_t, std::vector<unsigned>> FilteredInstructions;
+  std::map<uint64_t, std::vector<EncodingIDAndOpcode>>
+      FilteredInstructions;
 
   // Set of uid's with non-constant segment values.
-  std::vector<unsigned> VariableInstructions;
+  std::vector<EncodingIDAndOpcode> VariableInstructions;
 
   // Map of well-known segment value to its delegate.
   std::map<unsigned, std::unique_ptr<const FilterChooser>> FilterChooserMap;
@@ -263,7 +281,7 @@ protected:
   unsigned NumFiltered;
 
   // Keeps track of the last opcode in the filtered bucket.
-  unsigned LastOpcFiltered;
+  EncodingIDAndOpcode LastOpcFiltered;
 
 public:
   Filter(Filter &&f);
@@ -273,7 +291,7 @@ public:
 
   unsigned getNumFiltered() const { return NumFiltered; }
 
-  unsigned getSingletonOpc() const {
+  EncodingIDAndOpcode getSingletonOpc() const {
     assert(NumFiltered == 1);
     return LastOpcFiltered;
   }
@@ -340,7 +358,9 @@ protected:
   ArrayRef<EncodingAndInst> AllInstructions;
 
   // Vector of uid's for this filter chooser to work on.
-  const std::vector<unsigned> &Opcodes;
+  // The first member of the pair is the opcode id being decoded, the second is
+  // the opcode id that should be emitted.
+  const std::vector<EncodingIDAndOpcode> &Opcodes;
 
   // Lookup table for the operand decoding of instructions.
   const std::map<unsigned, std::vector<OperandInfo>> &Operands;
@@ -366,7 +386,7 @@ protected:
 
 public:
   FilterChooser(ArrayRef<EncodingAndInst> Insts,
-                const std::vector<unsigned> &IDs,
+                const std::vector<EncodingIDAndOpcode> &IDs,
                 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
                 unsigned BW, const FixedLenDecoderEmitter *E)
       : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
@@ -376,7 +396,7 @@ public:
   }
 
   FilterChooser(ArrayRef<EncodingAndInst> Insts,
-                const std::vector<unsigned> &IDs,
+                const std::vector<EncodingIDAndOpcode> &IDs,
                 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
                 const std::vector<bit_value_t> &ParentFilterBitValues,
                 const FilterChooser &parent)
@@ -412,6 +432,15 @@ protected:
     }
   }
 
+  // Emit the name of the encoding/instruction pair.
+  void emitNameWithID(raw_ostream &OS, unsigned Opcode) const {
+    const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
+    const Record *InstDef = AllInstructions[Opcode].Inst->TheDef;
+    if (EncodingDef != InstDef)
+      OS << EncodingDef->getName() << ":";
+    OS << InstDef->getName();
+  }
+
   // Populates the field of the insn given the start position and the number of
   // consecutive bits to scan for.
   //
@@ -462,7 +491,7 @@ protected:
 
   // Emits table entries to decode the singleton.
   void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
-                               unsigned Opc) const;
+                               EncodingIDAndOpcode Opc) const;
 
   // Emits code to decode the singleton, and then to decode the rest.
   void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
@@ -523,13 +552,13 @@ Filter::Filter(FilterChooser &owner, uns
   assert(StartBit + NumBits - 1 < Owner->BitWidth);
 
   NumFiltered = 0;
-  LastOpcFiltered = 0;
+  LastOpcFiltered = {0, 0};
 
   for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
     insn_t Insn;
 
     // Populates the insn given the uid.
-    Owner->insnWithID(Insn, Owner->Opcodes[i]);
+    Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID);
 
     uint64_t Field;
     // Scans the segment for possibly well-specified encoding bits.
@@ -1025,7 +1054,7 @@ unsigned FilterChooser::getIslands(std::
   // 1: Water (the bit value does not affect decoding)
   // 2: Island (well-known bit value needed for decoding)
   int State = 0;
-  int Val = -1;
+  int64_t Val = -1;
 
   for (unsigned i = 0; i < BitWidth; ++i) {
     Val = Value(Insn[i]);
@@ -1313,12 +1342,12 @@ void FilterChooser::emitSoftFailTableEnt
 
 // Emits table entries to decode the singleton.
 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
-                                            unsigned Opc) const {
+                                            EncodingIDAndOpcode Opc) const {
   std::vector<unsigned> StartBits;
   std::vector<unsigned> EndBits;
   std::vector<uint64_t> FieldVals;
   insn_t Insn;
-  insnWithID(Insn, Opc);
+  insnWithID(Insn, Opc.EncodingID);
 
   // Look for islands of undecoded bits of the singleton.
   getIslands(StartBits, EndBits, FieldVals, Insn);
@@ -1326,7 +1355,7 @@ void FilterChooser::emitSingletonTableEn
   unsigned Size = StartBits.size();
 
   // Emit the predicate table entry if one is needed.
-  emitPredicateTableEntry(TableInfo, Opc);
+  emitPredicateTableEntry(TableInfo, Opc.EncodingID);
 
   // Check any additional encoding fields needed.
   for (unsigned I = Size; I != 0; --I) {
@@ -1350,10 +1379,11 @@ void FilterChooser::emitSingletonTableEn
   }
 
   // Check for soft failure of the match.
-  emitSoftFailTableEntry(TableInfo, Opc);
+  emitSoftFailTableEntry(TableInfo, Opc.EncodingID);
 
   bool HasCompleteDecoder;
-  unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc, HasCompleteDecoder);
+  unsigned DIdx =
+      getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder);
 
   // Produce OPC_Decode or OPC_TryDecode opcode based on the information
   // whether the instruction decoder is complete or not. If it is complete
@@ -1366,8 +1396,9 @@ void FilterChooser::emitSingletonTableEn
   // can decode it.
   TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode :
       MCD::OPC_TryDecode);
+  NumEncodingsSupported++;
   uint8_t Buffer[16], *p;
-  encodeULEB128(Opc, Buffer);
+  encodeULEB128(Opc.Opcode, Buffer);
   for (p = Buffer; *p >= 128 ; ++p)
     TableInfo.Table.push_back(*p);
   TableInfo.Table.push_back(*p);
@@ -1393,7 +1424,7 @@ void FilterChooser::emitSingletonTableEn
 // Emits table entries to decode the singleton, and then to decode the rest.
 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
                                             const Filter &Best) const {
-  unsigned Opc = Best.getSingletonOpc();
+  EncodingIDAndOpcode Opc = Best.getSingletonOpc();
 
   // complex singletons need predicate checks from the first singleton
   // to refer forward to the variable filterchooser that follows.
@@ -1453,7 +1484,7 @@ bool FilterChooser::filterProcessor(bool
       std::vector<uint64_t> FieldVals;
       insn_t Insn;
 
-      insnWithID(Insn, Opcodes[i]);
+      insnWithID(Insn, Opcodes[i].EncodingID);
 
       // Look for islands of undecoded bits of any instruction.
       if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
@@ -1497,7 +1528,7 @@ bool FilterChooser::filterProcessor(bool
   for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
     insn_t insn;
 
-    insnWithID(insn, Opcodes[InsnIndex]);
+    insnWithID(insn, Opcodes[InsnIndex].EncodingID);
 
     for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
       switch (bitAttrs[BitIndex]) {
@@ -1716,9 +1747,12 @@ void FilterChooser::emitTableEntries(Dec
   dumpStack(errs(), "\t\t");
 
   for (unsigned i = 0; i < Opcodes.size(); ++i) {
-    errs() << '\t' << AllInstructions[Opcodes[i]] << " ";
-    dumpBits(errs(),
-             getBitsField(*AllInstructions[Opcodes[i]].EncodingDef, "Inst"));
+    errs() << '\t';
+    emitNameWithID(errs(), Opcodes[i].EncodingID);
+    errs() << " ";
+    dumpBits(
+        errs(),
+        getBitsField(*AllInstructions[Opcodes[i].EncodingID].EncodingDef, "Inst"));
     errs() << '\n';
   }
 }
@@ -1750,24 +1784,25 @@ static std::string findOperandDecoderMet
   return Decoder;
 }
 
-static bool populateInstruction(CodeGenTarget &Target,
-                       const CodeGenInstruction &CGI, unsigned Opc,
-                       std::map<unsigned, std::vector<OperandInfo>> &Operands){
+static bool
+populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
+                    const CodeGenInstruction &CGI, unsigned Opc,
+                    std::map<unsigned, std::vector<OperandInfo>> &Operands) {
   const Record &Def = *CGI.TheDef;
   // If all the bit positions are not specified; do not decode this instruction.
   // We are bound to fail!  For proper disassembly, the well-known encoding bits
   // of the instruction must be fully specified.
 
-  BitsInit &Bits = getBitsField(Def, "Inst");
+  BitsInit &Bits = getBitsField(EncodingDef, "Inst");
   if (Bits.allInComplete()) return false;
 
   std::vector<OperandInfo> InsnOperands;
 
   // If the instruction has specified a custom decoding hook, use that instead
   // of trying to auto-generate the decoder.
-  StringRef InstDecoder = Def.getValueAsString("DecoderMethod");
+  StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod");
   if (InstDecoder != "") {
-    bool HasCompleteInstDecoder = Def.getValueAsBit("hasCompleteDecoder");
+    bool HasCompleteInstDecoder = EncodingDef.getValueAsBit("hasCompleteDecoder");
     InsnOperands.push_back(OperandInfo(InstDecoder, HasCompleteInstDecoder));
     Operands[Opc] = InsnOperands;
     return true;
@@ -2143,7 +2178,7 @@ static void emitDecodeInstruction(format
      << "  const FeatureBitset& Bits = STI.getFeatureBits();\n"
      << "\n"
      << "  const uint8_t *Ptr = DecodeTable;\n"
-     << "  uint32_t CurFieldValue = 0;\n"
+     << "  InsnType CurFieldValue = 0;\n"
      << "  DecodeStatus S = MCDisassembler::Success;\n"
      << "  while (true) {\n"
      << "    ptrdiff_t Loc = Ptr - DecodeTable;\n"
@@ -2188,7 +2223,7 @@ static void emitDecodeInstruction(format
      << "      unsigned Len = *++Ptr;\n"
      << "      InsnType FieldValue = fieldFromInstruction(insn, Start, Len);\n"
      << "      // Decode the field value.\n"
-     << "      uint32_t ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
+     << "      InsnType ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
      << "      Ptr += Len;\n"
      << "      // NumToSkip is a plain 24-bit integer.\n"
      << "      unsigned NumToSkip = *Ptr++;\n"
@@ -2335,37 +2370,52 @@ void FixedLenDecoderEmitter::run(raw_ost
   // Parameterize the decoders based on namespace and instruction width.
   const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
   NumberedEncodings.reserve(NumberedInstructions.size());
-  for (const auto &NumberedInstruction : NumberedInstructions)
+  DenseMap<Record *, unsigned> IndexOfInstruction;
+  for (const auto &NumberedInstruction : NumberedInstructions) {
+    IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size();
     NumberedEncodings.emplace_back(NumberedInstruction->TheDef, NumberedInstruction);
+  }
+  for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding"))
+    NumberedEncodings.emplace_back(
+        NumberedAlias,
+        &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf")));
 
-  std::map<std::pair<std::string, unsigned>,
-           std::vector<unsigned>> OpcMap;
+  std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
+      OpcMap;
   std::map<unsigned, std::vector<OperandInfo>> Operands;
 
   for (unsigned i = 0; i < NumberedEncodings.size(); ++i) {
+    const Record *EncodingDef = NumberedEncodings[i].EncodingDef;
     const CodeGenInstruction *Inst = NumberedEncodings[i].Inst;
     const Record *Def = Inst->TheDef;
-    unsigned Size = Def->getValueAsInt("Size");
+    unsigned Size = EncodingDef->getValueAsInt("Size");
     if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
         Def->getValueAsBit("isPseudo") ||
         Def->getValueAsBit("isAsmParserOnly") ||
-        Def->getValueAsBit("isCodeGenOnly"))
+        Def->getValueAsBit("isCodeGenOnly")) {
+      NumEncodingsLackingDisasm++;
       continue;
+    }
+
+    if (i < NumberedInstructions.size())
+      NumInstructions++;
+    NumEncodings++;
 
-    StringRef DecoderNamespace = Def->getValueAsString("DecoderNamespace");
+    StringRef DecoderNamespace = EncodingDef->getValueAsString("DecoderNamespace");
 
     if (Size) {
-      if (populateInstruction(Target, *Inst, i, Operands)) {
-        OpcMap[std::make_pair(DecoderNamespace, Size)].push_back(i);
-      }
+      if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) {
+        OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back(i, IndexOfInstruction.find(Def)->second);
+      } else
+        NumEncodingsOmitted++;
     }
   }
 
   DecoderTableInfo TableInfo;
   for (const auto &Opc : OpcMap) {
     // Emit the decoder for this namespace+width combination.
-    ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(),
-                                                   NumberedEncodings.size());
+    ArrayRef<EncodingAndInst> NumberedEncodingsRef(
+        NumberedEncodings.data(), NumberedEncodings.size());
     FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands,
                      8 * Opc.first.second, this);