[llvm-commits] [llvm] r78378 - in /llvm/trunk: lib/Target/X86/AsmParser/X86AsmParser.cpp test/MC/AsmParser/x86_instructions.s utils/TableGen/AsmMatcherEmitter.cpp

Daniel Dunbar daniel at zuster.org
Fri Aug 7 01:26:06 PDT 2009


Author: ddunbar
Date: Fri Aug  7 03:26:05 2009
New Revision: 78378

URL: http://llvm.org/viewvc/llvm-project?rev=78378&view=rev
Log:
llvm-mc/AsmMatcher: Move to a slightly more sane matching design.
 - Still not very sane, but a least its not 60k lines on X86. :)

 - In terms of correctness, currently some things are hard wired for X86, and we
   still don't properly resolve ambiguities (this is ignoring the instructions
   we don't even match due to funny .td stuff or other corner cases).

The high level changes:
 1. Represent tokens which are significant for matching explicitly as separate
    operands. This uniformly handles not only the instruction mnemonic, but
    also 'signficiant' syntax like the '*' in "call * ...".

 2. Separate the matching of operands to an instruction from the construction of
    the MCInst. In theory this can be done during matching, but since the number
    of variations is small I think it makes sense to decompose the problems.

 3. Improved a few of the mechanisms to at least successfully flatten / tokenize
    the assembly strings for PowerPC and ARM.

 4. The comment at the top of AsmMatcherEmitter.cpp explains the approach I'm
    moving towards for handling ambiguous instructions. The high-bit is to infer
    a partial ordering of the operand classes (and force the user to specify one
    if we can't) and use that to resolve ambiguities.

Modified:
    llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp
    llvm/trunk/test/MC/AsmParser/x86_instructions.s
    llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp

Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=78378&r1=78377&r2=78378&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original)
+++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Fri Aug  7 03:26:05 2009
@@ -26,10 +26,6 @@
   MCAsmParser &Parser;
 
 private:
-  bool MatchInstruction(const StringRef &Name,
-                        SmallVectorImpl<X86Operand> &Operands,
-                        MCInst &Inst);
-
   MCAsmParser &getParser() const { return Parser; }
 
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
@@ -47,6 +43,9 @@
   /// @name Auto-generated Match Functions
   /// {  
 
+  bool MatchInstruction(SmallVectorImpl<X86Operand> &Operands,
+                        MCInst &Inst);
+
   bool MatchRegisterName(const StringRef &Name, unsigned &RegNo);
 
   /// }
@@ -67,6 +66,7 @@
 /// instruction.
 struct X86Operand {
   enum {
+    Token,
     Register,
     Immediate,
     Memory
@@ -74,6 +74,11 @@
 
   union {
     struct {
+      const char *Data;
+      unsigned Length;
+    } Tok;
+
+    struct {
       unsigned RegNo;
     } Reg;
 
@@ -90,6 +95,11 @@
     } Mem;
   };
 
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
   unsigned getReg() const {
     assert(Kind == Register && "Invalid access!");
     return Reg.RegNo;
@@ -121,18 +131,61 @@
     return Mem.Scale;
   }
 
+  bool isToken(const StringRef &Str) const { 
+    return Kind == Token && Str == getToken(); 
+  }
+
+  bool isImm() const { return Kind == Immediate; }
+  
+  bool isMem() const { return Kind == Memory; }
+
+  bool isReg() const { return Kind == Register; }
+
+  void addRegOperands(MCInst &Inst, unsigned N) {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateMCValue(getImm()));
+  }
+
+  void addMemOperands(MCInst &Inst, unsigned N) {
+    assert((N == 4 || N == 5) && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
+    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
+    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
+    Inst.addOperand(MCOperand::CreateMCValue(getMemDisp()));
+
+    // FIXME: What a hack.
+    if (N == 5)
+      Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+  }
+
+  static X86Operand CreateToken(StringRef Str) {
+    X86Operand Res;
+    Res.Kind = Token;
+    Res.Tok.Data = Str.data();
+    Res.Tok.Length = Str.size();
+    return Res;
+  }
+
   static X86Operand CreateReg(unsigned RegNo) {
     X86Operand Res;
     Res.Kind = Register;
     Res.Reg.RegNo = RegNo;
     return Res;
   }
+
   static X86Operand CreateImm(MCValue Val) {
     X86Operand Res;
     Res.Kind = Immediate;
     Res.Imm.Val = Val;
     return Res;
   }
+
   static X86Operand CreateMem(unsigned SegReg, MCValue Disp, unsigned BaseReg,
                               unsigned IndexReg, unsigned Scale) {
     // We should never just have a displacement, that would be an immediate.
@@ -326,7 +379,9 @@
 }
 
 bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
-  SmallVector<X86Operand, 3> Operands;
+  SmallVector<X86Operand, 4> Operands;
+
+  Operands.push_back(X86Operand::CreateToken(Name));
 
   SMLoc Loc = getLexer().getTok().getLoc();
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -345,7 +400,7 @@
     }
   }
 
-  if (!MatchInstruction(Name, Operands, Inst))
+  if (!MatchInstruction(Operands, Inst))
     return false;
 
   // FIXME: We should give nicer diagnostics about the exact failure.
@@ -362,137 +417,4 @@
   RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
 }
 
-// FIXME: These should come from tblgen?
-
-static bool 
-Match_X86_Op_REG(const X86Operand &Op, MCOperand *MCOps, unsigned NumOps) {
-  assert(NumOps == 1 && "Invalid number of ops!");
-
-  // FIXME: Match correct registers.
-  if (Op.Kind != X86Operand::Register)
-    return true;
-
-  MCOps[0] = MCOperand::CreateReg(Op.getReg());
-  return false;
-}
-
-static bool 
-Match_X86_Op_IMM(const X86Operand &Op, MCOperand *MCOps, unsigned NumOps) {
-  assert(NumOps == 1 && "Invalid number of ops!");
-
-  // FIXME: We need to check widths.
-  if (Op.Kind != X86Operand::Immediate)
-    return true;
-
-  MCOps[0] = MCOperand::CreateMCValue(Op.getImm());
-  return false;
-}
-
-static bool Match_X86_Op_LMEM(const X86Operand &Op,
-                             MCOperand *MCOps,
-                             unsigned NumMCOps) {
-  assert(NumMCOps == 4 && "Invalid number of ops!");
-
-  if (Op.Kind != X86Operand::Memory)
-    return true;
-
-  MCOps[0] = MCOperand::CreateReg(Op.getMemBaseReg());
-  MCOps[1] = MCOperand::CreateImm(Op.getMemScale());
-  MCOps[2] = MCOperand::CreateReg(Op.getMemIndexReg());
-  MCOps[3] = MCOperand::CreateMCValue(Op.getMemDisp());
-
-  return false;  
-}
-
-static bool Match_X86_Op_MEM(const X86Operand &Op,
-                             MCOperand *MCOps,
-                             unsigned NumMCOps) {
-  assert(NumMCOps == 5 && "Invalid number of ops!");
-
-  if (Match_X86_Op_LMEM(Op, MCOps, 4))
-    return true;
-
-  MCOps[4] = MCOperand::CreateReg(Op.getMemSegReg());
-
-  return false;  
-}
-
-#define REG(name) \
-  static bool Match_X86_Op_##name(const X86Operand &Op, \
-                                  MCOperand *MCOps,     \
-                                  unsigned NumMCOps) {  \
-    return Match_X86_Op_REG(Op, MCOps, NumMCOps);       \
-  }
-
-REG(GR64)
-REG(GR32)
-REG(GR16)
-REG(GR8)
-
-#define IMM(name) \
-  static bool Match_X86_Op_##name(const X86Operand &Op, \
-                                  MCOperand *MCOps,     \
-                                  unsigned NumMCOps) {  \
-    return Match_X86_Op_IMM(Op, MCOps, NumMCOps);       \
-  }
-
-IMM(brtarget)
-IMM(brtarget8)
-IMM(i16i8imm)
-IMM(i16imm)
-IMM(i32i8imm)
-IMM(i32imm)
-IMM(i32imm_pcrel)
-IMM(i64i32imm)
-IMM(i64i32imm_pcrel)
-IMM(i64i8imm)
-IMM(i64imm)
-IMM(i8imm)
-
-#define LMEM(name) \
-  static bool Match_X86_Op_##name(const X86Operand &Op, \
-                                  MCOperand *MCOps,     \
-                                  unsigned NumMCOps) {  \
-    return Match_X86_Op_LMEM(Op, MCOps, NumMCOps);       \
-  }
-
-LMEM(lea32mem)
-LMEM(lea64_32mem)
-LMEM(lea64mem)
-
-#define MEM(name) \
-  static bool Match_X86_Op_##name(const X86Operand &Op, \
-                                  MCOperand *MCOps,     \
-                                  unsigned NumMCOps) {  \
-    return Match_X86_Op_MEM(Op, MCOps, NumMCOps);       \
-  }
-
-MEM(f128mem)
-MEM(f32mem)
-MEM(f64mem)
-MEM(f80mem)
-MEM(i128mem)
-MEM(i16mem)
-MEM(i32mem)
-MEM(i64mem)
-MEM(i8mem)
-MEM(sdmem)
-MEM(ssmem)
-
-#define DUMMY(name) \
-  static bool Match_X86_Op_##name(const X86Operand &Op, \
-                                  MCOperand *MCOps,     \
-                                  unsigned NumMCOps) {  \
-    return true;                                        \
-  }
-
-DUMMY(FR32)
-DUMMY(FR64)
-DUMMY(GR32_NOREX)
-DUMMY(GR8_NOREX)
-DUMMY(RST)
-DUMMY(VR128)
-DUMMY(VR64)
-DUMMY(i8mem_NOREX)
-
 #include "X86GenAsmMatcher.inc"

Modified: llvm/trunk/test/MC/AsmParser/x86_instructions.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/x86_instructions.s?rev=78378&r1=78377&r2=78378&view=diff

==============================================================================
--- llvm/trunk/test/MC/AsmParser/x86_instructions.s (original)
+++ llvm/trunk/test/MC/AsmParser/x86_instructions.s Fri Aug  7 03:26:05 2009
@@ -16,4 +16,3 @@
         movl %eax, 10(%ebp, %ebx, 4)
 // RUN: grep {MCInst(opcode=.*, operands=.reg:0, imm:4, reg:21, val:10, reg:0, reg:19.)} %t
         movl %eax, 10(, %ebx, 4)
-        
\ No newline at end of file

Modified: llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp?rev=78378&r1=78377&r2=78378&view=diff

==============================================================================
--- llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp Fri Aug  7 03:26:05 2009
@@ -10,17 +10,87 @@
 // This tablegen backend emits a target specifier matcher for converting parsed
 // assembly operands in the MCInst structures.
 //
+// The input to the target specific matcher is a list of literal tokens and
+// operands. The target specific parser should generally eliminate any syntax
+// which is not relevant for matching; for example, comma tokens should have
+// already been consumed and eliminated by the parser. Most instructions will
+// end up with a single literal token (the instruction name) and some number of
+// operands.
+//
+// Some example inputs, for X86:
+//   'addl' (immediate ...) (register ...)
+//   'add' (immediate ...) (memory ...)
+//   'call' '*' %epc 
+//
+// The assembly matcher is responsible for converting this input into a precise
+// machine instruction (i.e., an instruction with a well defined encoding). This
+// mapping has several properties which complicate matching:
+//
+//  - It may be ambiguous; many architectures can legally encode particular
+//    variants of an instruction in different ways (for example, using a smaller
+//    encoding for small immediates). Such ambiguities should never be
+//    arbitrarily resolved by the assembler, the assembler is always responsible
+//    for choosing the "best" available instruction.
+//
+//  - It may depend on the subtarget or the assembler context. Instructions
+//    which are invalid for the current mode, but otherwise unambiguous (e.g.,
+//    an SSE instruction in a file being assembled for i486) should be accepted
+//    and rejected by the assembler front end. However, if the proper encoding
+//    for an instruction is dependent on the assembler context then the matcher
+//    is responsible for selecting the correct machine instruction for the
+//    current mode.
+//
+// The core matching algorithm attempts to exploit the regularity in most
+// instruction sets to quickly determine the set of possibly matching
+// instructions, and the simplify the generated code. Additionally, this helps
+// to ensure that the ambiguities are intentionally resolved by the user.
+//
+// The matching is divided into two distinct phases:
+//
+//   1. Classification: Each operand is mapped to the unique set which (a)
+//      contains it, and (b) is the largest such subset for which a single
+//      instruction could match all members.
+//
+//      For register classes, we can generate these subgroups automatically. For
+//      arbitrary operands, we expect the user to define the classes and their
+//      relations to one another (for example, 8-bit signed immediates as a
+//      subset of 32-bit immediates).
+//
+//      By partitioning the operands in this way, we guarantee that for any
+//      tuple of classes, any single instruction must match either all or none
+//      of the sets of operands which could classify to that tuple.
+//
+//      In addition, the subset relation amongst classes induces a partial order
+//      on such tuples, which we use to resolve ambiguities.
+//
+//      FIXME: What do we do if a crazy case shows up where this is the wrong
+//      resolution?
+//
+//   2. The input can now be treated as a tuple of classes (static tokens are
+//      simple singleton sets). Each such tuple should generally map to a single
+//      instruction (we currently ignore cases where this isn't true, whee!!!),
+//      which we can emit a simple matcher for.
+//
 //===----------------------------------------------------------------------===//
 
 #include "AsmMatcherEmitter.h"
 #include "CodeGenTarget.h"
 #include "Record.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include <set>
 #include <list>
 using namespace llvm;
 
+namespace {
+  cl::opt<std::string>
+  MatchOneInstr("match-one-instr", cl::desc("Match only the named instruction"),
+              cl::init(""));
+}
+
 /// FlattenVariants - Flatten an .td file assembly string by selecting the
 /// variant at index \arg N.
 static std::string FlattenVariants(const std::string &AsmString,
@@ -33,7 +103,8 @@
     size_t VariantsStart = 0;
     for (size_t e = Cur.size(); VariantsStart != e; ++VariantsStart)
       if (Cur[VariantsStart] == '{' && 
-          (VariantsStart == 0 || Cur[VariantsStart-1] != '$'))
+          (VariantsStart == 0 || (Cur[VariantsStart-1] != '$' &&
+                                  Cur[VariantsStart-1] != '\\')))
         break;
 
     // Add the prefix to the result.
@@ -47,7 +118,7 @@
     size_t VariantsEnd = VariantsStart;
     unsigned NestedBraces = 1;
     for (size_t e = Cur.size(); VariantsEnd != e; ++VariantsEnd) {
-      if (Cur[VariantsEnd] == '}') {
+      if (Cur[VariantsEnd] == '}' && Cur[VariantsEnd-1] != '\\') {
         if (--NestedBraces == 0)
           break;
       } else if (Cur[VariantsEnd] == '{')
@@ -69,274 +140,498 @@
 }
 
 /// TokenizeAsmString - Tokenize a simplified assembly string.
-static void TokenizeAsmString(const std::string &AsmString, 
+static void TokenizeAsmString(const StringRef &AsmString, 
                               SmallVectorImpl<StringRef> &Tokens) {
   unsigned Prev = 0;
   bool InTok = true;
   for (unsigned i = 0, e = AsmString.size(); i != e; ++i) {
     switch (AsmString[i]) {
+    case '[':
+    case ']':
     case '*':
     case '!':
     case ' ':
     case '\t':
     case ',':
       if (InTok) {
-        Tokens.push_back(StringRef(&AsmString[Prev], i - Prev));
+        Tokens.push_back(AsmString.slice(Prev, i));
+        InTok = false;
+      }
+      if (!isspace(AsmString[i]) && AsmString[i] != ',')
+        Tokens.push_back(AsmString.substr(i, 1));
+      Prev = i + 1;
+      break;
+      
+    case '\\':
+      if (InTok) {
+        Tokens.push_back(AsmString.slice(Prev, i));
         InTok = false;
       }
-      if (AsmString[i] == '*' || AsmString[i] == '!')
-        Tokens.push_back(StringRef(&AsmString[i], 1));
+      ++i;
+      assert(i != AsmString.size() && "Invalid quoted character");
+      Tokens.push_back(AsmString.substr(i, 1));
       Prev = i + 1;
       break;
 
+    case '$': {
+      // If this isn't "${", treat like a normal token.
+      if (i + 1 == AsmString.size() || AsmString[i + 1] != '{') {
+        if (InTok) {
+          Tokens.push_back(AsmString.slice(Prev, i));
+          InTok = false;
+        }
+        Prev = i;
+        break;
+      }
+
+      if (InTok) {
+        Tokens.push_back(AsmString.slice(Prev, i));
+        InTok = false;
+      }
+
+      StringRef::iterator End =
+        std::find(AsmString.begin() + i, AsmString.end(), '}');
+      assert(End != AsmString.end() && "Missing brace in operand reference!");
+      size_t EndPos = End - AsmString.begin();
+      Tokens.push_back(AsmString.slice(i, EndPos+1));
+      Prev = EndPos + 1;
+      i = EndPos;
+      break;
+    }
+
     default:
       InTok = true;
     }
   }
   if (InTok && Prev != AsmString.size())
-    Tokens.push_back(StringRef(&AsmString[Prev], AsmString.size() - Prev));
+    Tokens.push_back(AsmString.substr(Prev));
 }
 
-void AsmMatcherEmitter::run(raw_ostream &OS) {
-  CodeGenTarget Target;
-  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
-  Record *AsmParser = Target.getAsmParser();
-  std::string ClassName = AsmParser->getValueAsString("AsmParserClassName");
+static bool IsAssemblerInstruction(const StringRef &Name,
+                                   const CodeGenInstruction &CGI, 
+                                   const SmallVectorImpl<StringRef> &Tokens) {
+  // Ignore psuedo ops.
+  //
+  // FIXME: This is a hack.
+  if (const RecordVal *Form = CGI.TheDef->getValue("Form"))
+    if (Form->getValue()->getAsString() == "Pseudo")
+      return false;
+  
+  // Ignore "PHI" node.
+  //
+  // FIXME: This is also a hack.
+  if (Name == "PHI")
+    return false;
+
+  // Ignore instructions with no .s string.
+  //
+  // FIXME: What are these?
+  if (CGI.AsmString.empty())
+    return false;
+
+  // FIXME: Hack; ignore any instructions with a newline in them.
+  if (std::find(CGI.AsmString.begin(), 
+                CGI.AsmString.end(), '\n') != CGI.AsmString.end())
+    return false;
+  
+  // Ignore instructions with attributes, these are always fake instructions for
+  // simplifying codegen.
+  //
+  // FIXME: Is this true?
+  //
+  // Also, we ignore instructions which reference the operand multiple times;
+  // this implies a constraint we would not currently honor. These are
+  // currently always fake instructions for simplifying codegen.
+  //
+  // FIXME: Encode this assumption in the .td, so we can error out here.
+  std::set<std::string> OperandNames;
+  for (unsigned i = 1, e = Tokens.size(); i < e; ++i) {
+    if (Tokens[i][0] == '$' && 
+        std::find(Tokens[i].begin(), 
+                  Tokens[i].end(), ':') != Tokens[i].end()) {
+      DEBUG({
+          errs() << "warning: '" << Name << "': "
+                 << "ignoring instruction; operand with attribute '" 
+                 << Tokens[i] << "', \n";
+        });
+      return false;
+    }
 
-  std::string Namespace = Registers[0].TheDef->getValueAsString("Namespace");
+    if (Tokens[i][0] == '$' && !OperandNames.insert(Tokens[i]).second) {
+      DEBUG({
+          errs() << "warning: '" << Name << "': "
+                 << "ignoring instruction; tied operand '" 
+                 << Tokens[i] << "', \n";
+        });
+      return false;
+    }
+  }
 
-  EmitSourceFileHeader("Assembly Matcher Source Fragment", OS);
+  return true;
+}
 
-  // Emit the function to match a register name to number.
+namespace {
 
-  OS << "bool " << Target.getName() << ClassName
-     << "::MatchRegisterName(const StringRef &Name, unsigned &RegNo) {\n";
+struct OperandListLess {
+  bool operator()(const
+                  std::pair<const CodeGenInstruction::OperandInfo*, unsigned> &
+                  A,
+                  const
+                  std::pair<const CodeGenInstruction::OperandInfo*, unsigned> &
+                  B) {
+    return A.first->MIOperandNo < B.first->MIOperandNo;
+  }
+                  
+};
 
-  // FIXME: TableGen should have a fast string matcher generator.
-  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Registers[i];
-    if (Reg.TheDef->getValueAsString("AsmName").empty())
-      continue;
+struct InstructionInfo {
+  struct Operand {
+    enum {
+      Token,
+      Class
+    } Kind;
+
+    struct ClassData {
+      /// Operand - The tablegen operand this class corresponds to.
+      const CodeGenInstruction::OperandInfo *Operand;
+
+      /// ClassName - The name of this operand's class.
+      std::string ClassName;
+
+      /// PredicateMethod - The name of the operand method to test whether the
+      /// operand matches this class.
+      std::string PredicateMethod;
+
+      /// RenderMethod - The name of the operand method to add this operand to
+      /// an MCInst.
+      std::string RenderMethod;
+    } AsClass;
+  };
+
+  /// InstrName - The target name for this instruction.
+  std::string InstrName;
+
+  /// Instr - The instruction this matches.
+  const CodeGenInstruction *Instr;
+
+  /// AsmString - The assembly string for this instruction (with variants
+  /// removed).
+  std::string AsmString;
+
+  /// Tokens - The tokenized assembly pattern that this instruction matches.
+  SmallVector<StringRef, 4> Tokens;
+
+  /// Operands - The operands that this instruction matches.
+  SmallVector<Operand, 4> Operands;
+
+  /// ConversionFn - The name of the conversion function to convert parsed
+  /// operands into an MCInst for this function.
+  std::string ConversionFn;
+
+  /// OrderedClassOperands - The indices of the class operands, ordered by their
+  /// MIOperandNo order (which is the order they should be passed to the
+  /// conversion function).
+  SmallVector<unsigned, 4> OrderedClassOperands;
+
+public:
+  void dump();
+};
 
-    OS << "  if (Name == \"" 
-       << Reg.TheDef->getValueAsString("AsmName") << "\")\n"
-       << "    return RegNo=" << i + 1 << ", false;\n";
+}
+
+void InstructionInfo::dump() {
+  errs() << InstrName << " -- " << "flattened:\"" << AsmString << '\"'
+         << ", tokens:[";
+  for (unsigned i = 0, e = Tokens.size(); i != e; ++i) {
+    errs() << Tokens[i];
+    if (i + 1 != e)
+      errs() << ", ";
   }
-  OS << "  return true;\n";
-  OS << "}\n";
+  errs() << "]\n";
 
-  // Emit the function to match instructions.  
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  Target.getInstructionsByEnumValue(NumberedInstructions);
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    Operand &Op = Operands[i];
+    errs() << "  op[" << i << "] = ";
+    if (Op.Kind == Operand::Token) {
+      errs() << '\"' << Tokens[i] << "\"\n";
+      continue;
+    }
 
-  std::list<std::string> MatchFns;
+    assert(Op.Kind == Operand::Class && "Invalid kind!");
+    const CodeGenInstruction::OperandInfo &OI = *Op.AsClass.Operand;
+    errs() << OI.Name << " " << OI.Rec->getName()
+           << " (" << OI.MIOperandNo << ", " << OI.MINumOperands << ")\n";
+  }
+}
 
-  OS << "\n";
+static void BuildInstructionInfos(CodeGenTarget &Target,
+                                  std::vector<InstructionInfo*> &Infos) {
   const std::map<std::string, CodeGenInstruction> &Instructions =
     Target.getInstructions();
+
   for (std::map<std::string, CodeGenInstruction>::const_iterator 
          it = Instructions.begin(), ie = Instructions.end(); it != ie; ++it) {
     const CodeGenInstruction &CGI = it->second;
 
-    // Ignore psuedo ops.
-    //
-    // FIXME: This is a hack.
-    if (const RecordVal *Form = CGI.TheDef->getValue("Form"))
-      if (Form->getValue()->getAsString() == "Pseudo")
-        continue;
-
-    // Ignore "PHI" node.
-    //
-    // FIXME: This is also a hack.
-    if (it->first == "PHI")
+    if (!MatchOneInstr.empty() && it->first != MatchOneInstr)
       continue;
 
-    // Ignore instructions with no .s string.
-    //
-    // FIXME: What are these?
-    if (CGI.AsmString.empty())
-      continue;
+    OwningPtr<InstructionInfo> II(new InstructionInfo);
+    
+    II->InstrName = it->first;
+    II->Instr = &it->second;
+    II->AsmString = FlattenVariants(CGI.AsmString, 0);
+
+    TokenizeAsmString(II->AsmString, II->Tokens);
 
-    // FIXME: Hack; ignore "lock".
-    if (StringRef(CGI.AsmString).startswith("lock"))
+    // Ignore instructions which shouldn't be matched.
+    if (!IsAssemblerInstruction(it->first, CGI, II->Tokens))
       continue;
 
-    std::string Flattened = FlattenVariants(CGI.AsmString, 0);
-    SmallVector<StringRef, 8> Tokens;
+    for (unsigned i = 0, e = II->Tokens.size(); i != e; ++i) {
+      StringRef Token = II->Tokens[i];
+
+      // Check for simple tokens.
+      if (Token[0] != '$') {
+        InstructionInfo::Operand Op;
+        Op.Kind = InstructionInfo::Operand::Token;
+        II->Operands.push_back(Op);
+        continue;
+      }
+
+      // Otherwise this is an operand reference.
+      InstructionInfo::Operand Op;
+      Op.Kind = InstructionInfo::Operand::Class;
+
+      StringRef OperandName;
+      if (Token[1] == '{')
+        OperandName = Token.substr(2, Token.size() - 3);
+      else
+        OperandName = Token.substr(1);
 
-    TokenizeAsmString(Flattened, Tokens);
+      // Map this token to an operand. FIXME: Move elsewhere.
+      unsigned Idx;
+      try {
+        Idx = CGI.getOperandNamed(OperandName);
+      } catch(...) {
+        errs() << "error: unable to find operand: '" << OperandName << "'!\n";
+        break;
+      }
 
-    DEBUG({
-        outs() << it->first << " -- flattened:\"" 
-               << Flattened << "\", tokens:[";
-        for (unsigned i = 0, e = Tokens.size(); i != e; ++i) {
-          outs() << Tokens[i];
-          if (i + 1 != e)
-            outs() << ", ";
-        }
-        outs() << "]\n";
+      const CodeGenInstruction::OperandInfo &OI = CGI.OperandList[Idx];      
+      Op.AsClass.Operand = &OI;
 
-        for (unsigned i = 0, e = CGI.OperandList.size(); i != e; ++i) {
-          const CodeGenInstruction::OperandInfo &OI = CGI.OperandList[i];
-          outs() << "  op[" << i << "] = " << OI.Name
-                 << " " << OI.Rec->getName()
-                 << " (" << OI.MIOperandNo << ", " << OI.MINumOperands << ")\n";
+      if (OI.Rec->isSubClassOf("RegisterClass")) {
+        Op.AsClass.ClassName = "Reg";
+        Op.AsClass.PredicateMethod = "isReg";
+        Op.AsClass.RenderMethod = "addRegOperands";
+      } else if (OI.Rec->isSubClassOf("Operand")) {
+        // FIXME: This should not be hard coded.
+        const RecordVal *RV = OI.Rec->getValue("Type");
+
+        // FIXME: Yet another total hack.
+        if (RV->getValue()->getAsString() == "iPTR" ||
+            OI.Rec->getName() == "lea32mem" ||
+            OI.Rec->getName() == "lea64_32mem") {
+          Op.AsClass.ClassName = "Mem";
+          Op.AsClass.PredicateMethod = "isMem";
+          Op.AsClass.RenderMethod = "addMemOperands";
+        } else {
+          Op.AsClass.ClassName = "Imm";
+          Op.AsClass.PredicateMethod = "isImm";
+          Op.AsClass.RenderMethod = "addImmOperands";
         }
-      });
+      } else {
+        OI.Rec->dump();
+        assert(0 && "Unexpected instruction operand record!");
+      }
 
-    // FIXME: Ignore prefixes with non-literal tokens.
-    if (std::find(Tokens[0].begin(), Tokens[0].end(), '$') != Tokens[0].end()) {
-      DEBUG({
-          errs() << "warning: '" << it->first << "': "
-                 << "ignoring non-literal token '" << Tokens[0] << "', \n";
-        });
-      continue;
+      II->Operands.push_back(Op);
     }
 
-    // Ignore instructions with subreg specifiers, these are always fake
-    // instructions for simplifying codegen.
-    //
-    // FIXME: Is this true?
-    //
-    // Also, we ignore instructions which reference the operand multiple times;
-    // this implies a constraint we would not currently honor. These are
-    // currently always fake instructions for simplifying codegen.
-    //
-    // FIXME: Encode this assumption in the .td, so we can error out here.
-    std::set<std::string> OperandNames;
-    unsigned HasSubreg = 0, HasDuplicate = 0;
-    for (unsigned i = 1, e = Tokens.size(); i < e; ++i) {
-      if (Tokens[i][0] == '$' && 
-          std::find(Tokens[i].begin(), 
-                    Tokens[i].end(), ':') != Tokens[i].end())
-        HasSubreg = i;
-      if (Tokens[i][0] == '$' && !OperandNames.insert(Tokens[i]).second)
-        HasDuplicate = i;
-    }
-    if (HasSubreg) {
-      DEBUG({
-          errs() << "warning: '" << it->first << "': "
-                 << "ignoring instruction; operand with subreg attribute '" 
-                 << Tokens[HasSubreg] << "', \n";
-        });
-      continue;
-    } else if (HasDuplicate) {
-      DEBUG({
-          errs() << "warning: '" << it->first << "': "
-                 << "ignoring instruction; tied operand '" 
-                 << Tokens[HasSubreg] << "', \n";
-        });
+    // If we broke out, ignore the instruction.
+    if (II->Operands.size() != II->Tokens.size())
       continue;
-    }
 
-    std::string FnName = "Match_" + Target.getName() + "_Inst_" + it->first;
-    MatchFns.push_back(FnName);
+    Infos.push_back(II.take());
+  }
+}
 
-    OS << "static bool " << FnName
-       << "(const StringRef &Name,"
-       << " SmallVectorImpl<X86Operand> &Operands,"
-       << " MCInst &Inst) {\n\n";
-
-    OS << "  // Match name.\n";
-    OS << "  if (Name != \"" << Tokens[0] << "\")\n";
-    OS << "    return true;\n\n";
-    
-    OS << "  // Match number of operands.\n";
-    OS << "  if (Operands.size() != " << Tokens.size() - 1 << ")\n";
-    OS << "    return true;\n\n";
+static void ConstructConversionFunctions(CodeGenTarget &Target,
+                                         std::vector<InstructionInfo*> &Infos,
+                                         raw_ostream &OS) {
+  // Function we have already generated.
+  std::set<std::string> GeneratedFns;
+
+  for (std::vector<InstructionInfo*>::const_iterator it = Infos.begin(),
+         ie = Infos.end(); it != ie; ++it) {
+    InstructionInfo &II = **it;
+
+    // Order the (class) operands by the order to convert them into an MCInst.
+    SmallVector<std::pair<unsigned, unsigned>, 4> MIOperandList;
+    for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) {
+      InstructionInfo::Operand &Op = II.Operands[i];
+      if (Op.Kind == InstructionInfo::Operand::Class)
+        MIOperandList.push_back(std::make_pair(Op.AsClass.Operand->MIOperandNo,
+                                               i));
+    }
+    std::sort(MIOperandList.begin(), MIOperandList.end());
 
-    // Compute the total number of MCOperands.
-    //
-    // FIXME: Isn't this somewhere else?
+    // Compute the total number of operands.
     unsigned NumMIOperands = 0;
-    for (unsigned i = 0, e = CGI.OperandList.size(); i != e; ++i) {
-      const CodeGenInstruction::OperandInfo &OI = CGI.OperandList[i];
+    for (unsigned i = 0, e = II.Instr->OperandList.size(); i != e; ++i) {
+      const CodeGenInstruction::OperandInfo &OI = II.Instr->OperandList[i];
       NumMIOperands = std::max(NumMIOperands, 
                                OI.MIOperandNo + OI.MINumOperands);
     }
 
-    std::set<unsigned> MatchedOperands;
-    // This the list of operands we need to fill in.
-    if (NumMIOperands)
-      OS << "  MCOperand Ops[" << NumMIOperands << "];\n\n";
-
-    unsigned ParsedOpIdx = 0;
-    for (unsigned i = 1, e = Tokens.size(); i < e; ++i) {
-      // FIXME: Can only match simple operands.
-      if (Tokens[i][0] != '$') {
-        OS << "  // FIXME: unable to match token: '" << Tokens[i] << "'!\n";
-        OS << "  return true;\n\n";
-        continue;
-      }
-
-      // Map this token to an operand. FIXME: Move elsewhere.
+    // Build the conversion function signature.
+    std::string Signature = "Convert";
+    unsigned CurIndex = 0;
+    for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) {
+      InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second];
+      assert(CurIndex <= Op.AsClass.Operand->MIOperandNo &&
+             "Duplicate match for instruction operand!");
+
+      // Save the conversion index, for use by the matcher.
+      II.OrderedClassOperands.push_back(MIOperandList[i].second);
+      
+      // Skip operands which weren't matched by anything, this occurs when the
+      // .td file encodes "implicit" operands as explicit ones.
+      //
+      // FIXME: This should be removed from the MCInst structure.
+      for (; CurIndex != Op.AsClass.Operand->MIOperandNo; ++CurIndex)
+        Signature += "Imp";
+
+      Signature += Op.AsClass.ClassName;
+      Signature += utostr(Op.AsClass.Operand->MINumOperands);
+      CurIndex += Op.AsClass.Operand->MINumOperands;
+    }
 
-      unsigned Idx;
-      try {
-        Idx = CGI.getOperandNamed(Tokens[i].substr(1));
-      } catch(...) {
-        OS << "  // FIXME: unable to find operand: '" << Tokens[i] << "'!\n";
-        OS << "  return true;\n\n";
-        continue;
-      }
+    // Add any trailing implicit operands.
+    for (; CurIndex != NumMIOperands; ++CurIndex)
+      Signature += "Imp";
 
-      // FIXME: Each match routine should always end up filling the same number
-      // of operands, we should just check that the number matches what the
-      // match routine expects here instead of passing it. We can do this once
-      // we start generating the class match functions.
-      const CodeGenInstruction::OperandInfo &OI = CGI.OperandList[Idx];
+    // Save the conversion function, for use by the matcher.
+    II.ConversionFn = Signature;
 
-      // Track that we have matched these operands.
-      //
-      // FIXME: Verify that we don't parse something to the same operand twice.
-      for (unsigned j = 0; j != OI.MINumOperands; ++j)
-        MatchedOperands.insert(OI.MIOperandNo + j);
-
-      OS << "  // Match '" << Tokens[i] << "' (parsed operand " << ParsedOpIdx 
-         << ") to machine operands [" << OI.MIOperandNo << ", " 
-         << OI.MIOperandNo + OI.MINumOperands << ").\n";
-      OS << "  if (Match_" << Target.getName() 
-         << "_Op_" << OI.Rec->getName()  << "("
-         << "Operands[" << ParsedOpIdx << "], "
-         << "&Ops[" << OI.MIOperandNo << "], " 
-         << OI.MINumOperands << "))\n";
-      OS << "    return true;\n\n";
-
-      ++ParsedOpIdx;
-    }
-
-    // Generate code to construct the MCInst.
-
-    OS << "  // Construct MCInst.\n";
-    OS << "  Inst.setOpcode(" << Target.getName() << "::" 
-       << it->first << ");\n";
-    for (unsigned i = 0, e = NumMIOperands; i != e; ++i) {
-      // FIXME: Oops! Ignore this for now, the instruction should print ok. If
-      // we need to evaluate the constraints.
-      if (!MatchedOperands.count(i)) {
-        OS << "\n";
-        OS << "  // FIXME: Nothing matched Ops[" << i << "]!\n";
-        OS << "  Ops[" << i << "] = MCOperand::CreateReg(0);\n";
-        OS << "\n";
-      }
+    // Check if we have already generated this function.
+    if (!GeneratedFns.insert(Signature).second)
+      continue;
 
-      OS << "  Inst.addOperand(Ops[" << i << "]);\n";
+    // If not, emit it now.
+    //
+    // FIXME: There should be no need to pass the number of operands to fill;
+    // this should always be implicit in the class.
+    OS << "static bool " << Signature << "(MCInst &Inst, unsigned Opcode";
+    for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i)
+      OS << ", " << Target.getName() << "Operand Op" << i;
+    OS << ") {\n";
+    OS << "  Inst.setOpcode(Opcode);\n";
+    CurIndex = 0;
+    for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) {
+      InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second];
+
+      // Add the implicit operands.
+      for (; CurIndex != Op.AsClass.Operand->MIOperandNo; ++CurIndex)
+        OS << "  Inst.addOperand(MCOperand::CreateReg(0));\n";
+
+      OS << "  Op" << i << "." << Op.AsClass.RenderMethod 
+         << "(Inst, " << Op.AsClass.Operand->MINumOperands << ");\n";
+      CurIndex += Op.AsClass.Operand->MINumOperands;
     }
-    OS << "\n";
+    
+    // And add trailing implicit operands.
+    for (; CurIndex != NumMIOperands; ++CurIndex)
+      OS << "  Inst.addOperand(MCOperand::CreateReg(0));\n";
+
     OS << "  return false;\n";
     OS << "}\n\n";
   }
+}
+
+void AsmMatcherEmitter::run(raw_ostream &OS) {
+  CodeGenTarget Target;
+  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+  Record *AsmParser = Target.getAsmParser();
+  std::string ClassName = AsmParser->getValueAsString("AsmParserClassName");
+
+  std::string Namespace = Registers[0].TheDef->getValueAsString("Namespace");
 
-  // Generate the top level match function.
+  EmitSourceFileHeader("Assembly Matcher Source Fragment", OS);
+
+  // Emit the function to match a register name to number.
 
   OS << "bool " << Target.getName() << ClassName
-     << "::MatchInstruction(const StringRef &Name, "
+     << "::MatchRegisterName(const StringRef &Name, unsigned &RegNo) {\n";
+
+  // FIXME: TableGen should have a fast string matcher generator.
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
+    const CodeGenRegister &Reg = Registers[i];
+    if (Reg.TheDef->getValueAsString("AsmName").empty())
+      continue;
+
+    OS << "  if (Name == \"" 
+       << Reg.TheDef->getValueAsString("AsmName") << "\")\n"
+       << "    return RegNo=" << i + 1 << ", false;\n";
+  }
+  OS << "  return true;\n";
+  OS << "}\n\n";
+
+  std::vector<InstructionInfo*> Infos;
+  BuildInstructionInfos(Target, Infos);
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "instruction_info"
+  DEBUG({
+      for (std::vector<InstructionInfo*>::iterator it = Infos.begin(),
+             ie = Infos.end(); it != ie; ++it)
+        (*it)->dump();
+    });
+#undef DEBUG_TYPE
+#define DEBUG_TYPE ""
+
+  // FIXME: At this point we should be able to totally order Infos, if not then
+  // we have an ambiguity which the .td file should be forced to resolve.
+
+  // Generate the terminal actions to convert operands into an MCInst. We still
+  // pass the operands in to these functions individually (as opposed to the
+  // array) so that we do not need to worry about the operand order.
+  ConstructConversionFunctions(Target, Infos, OS);
+
+  // Build a very stupid version of the match function which just checks each
+  // instruction in order.
+
+  OS << "bool " << Target.getName() << ClassName
+     << "::MatchInstruction(" 
      << "SmallVectorImpl<" << Target.getName() << "Operand> &Operands, "
      << "MCInst &Inst) {\n";
-  for (std::list<std::string>::iterator it = MatchFns.begin(), 
-         ie = MatchFns.end(); it != ie; ++it) {
-    OS << "  if (!" << *it << "(Name, Operands, Inst))\n";
-    OS << "    return false;\n\n";
+
+  for (std::vector<InstructionInfo*>::const_iterator it = Infos.begin(),
+         ie = Infos.end(); it != ie; ++it) {
+    InstructionInfo &II = **it;
+
+    // The parser is expected to arrange things so that each "token" matches
+    // exactly one target specific operand.
+    OS << "  if (Operands.size() == " << II.Operands.size();
+    for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) {
+      InstructionInfo::Operand &Op = II.Operands[i];
+      
+      OS << " &&\n";
+      OS << "      ";
+
+      if (Op.Kind == InstructionInfo::Operand::Token)
+        OS << "Operands[" << i << "].isToken(\"" << II.Tokens[i] << "\")";
+      else
+        OS << "Operands[" << i << "]." 
+           << Op.AsClass.PredicateMethod << "()";
+    }
+    OS << ")\n";
+    OS << "    return " << II.ConversionFn << "(Inst, " 
+       << Target.getName() << "::" << II.InstrName;
+    for (unsigned i = 0, e = II.OrderedClassOperands.size(); i != e; ++i)
+      OS << ", Operands[" << II.OrderedClassOperands[i] << "]";
+    OS << ");\n\n";
   }
 
   OS << "  return true;\n";





More information about the llvm-commits mailing list