[llvm] r252439 - [AsmParser] Backends can parameterize ASM tokenization.

Colin LeMahieu via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 8 16:31:07 PST 2015


Author: colinl
Date: Sun Nov  8 18:31:07 2015
New Revision: 252439

URL: http://llvm.org/viewvc/llvm-project?rev=252439&view=rev
Log:
[AsmParser] Backends can parameterize ASM tokenization.

Modified:
    llvm/trunk/include/llvm/Target/Target.td
    llvm/trunk/lib/MC/MCParser/AsmParser.cpp
    llvm/trunk/lib/Target/AArch64/AArch64.td
    llvm/trunk/lib/Target/ARM/ARM.td
    llvm/trunk/lib/Target/BPF/BPF.td
    llvm/trunk/lib/Target/PowerPC/PPC.td
    llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp

Modified: llvm/trunk/include/llvm/Target/Target.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/Target.td?rev=252439&r1=252438&r2=252439&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/Target.td (original)
+++ llvm/trunk/include/llvm/Target/Target.td Sun Nov  8 18:31:07 2015
@@ -965,6 +965,15 @@ class AsmParserVariant {
   // register tokens as constrained registers, instead of tokens, for the
   // purposes of matching.
   string RegisterPrefix = "";
+
+  // TokenizingCharacters - Characters that are standalone tokens
+  string TokenizingCharacters = "[]*!";
+
+  // SeparatorCharacters - Characters that are not tokens
+  string SeparatorCharacters = " \t,";
+
+  // BreakCharacters - Characters that start new identifiers
+  string BreakCharacters = "";
 }
 def DefaultAsmParserVariant : AsmParserVariant;
 

Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmParser.cpp?rev=252439&r1=252438&r2=252439&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCParser/AsmParser.cpp (original)
+++ llvm/trunk/lib/MC/MCParser/AsmParser.cpp Sun Nov  8 18:31:07 2015
@@ -1334,6 +1334,15 @@ bool AsmParser::parseStatement(ParseStat
     // Treat '.' as a valid identifier in this context.
     Lex();
     IDVal = ".";
+  } else if (Lexer.is(AsmToken::LCurly)) {
+    // Treat '{' as a valid identifier in this context.
+    Lex();
+    IDVal = "{";
+
+  } else if (Lexer.is(AsmToken::RCurly)) {
+    // Treat '}' as a valid identifier in this context.
+    Lex();
+    IDVal = "}";
   } else if (parseIdentifier(IDVal)) {
     if (!TheCondState.Ignore)
       return TokError("unexpected token at start of statement");

Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=252439&r1=252438&r2=252439&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Sun Nov  8 18:31:07 2015
@@ -125,11 +125,13 @@ def : ProcessorModel<"cyclone", CycloneM
 def GenericAsmParserVariant : AsmParserVariant {
   int Variant = 0;
   string Name = "generic";
+  string BreakCharacters = ".";
 }
 
 def AppleAsmParserVariant : AsmParserVariant {
   int Variant = 1;
   string Name = "apple-neon";
+  string BreakCharacters = ".";
 }
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/lib/Target/ARM/ARM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=252439&r1=252438&r2=252439&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARM.td (original)
+++ llvm/trunk/lib/Target/ARM/ARM.td Sun Nov  8 18:31:07 2015
@@ -511,8 +511,15 @@ def ARMAsmWriter : AsmWriter {
   bit isMCAsmWriter = 1;
 }
 
+def ARMAsmParserVariant : AsmParserVariant {
+  int Variant = 0;
+  string Name = "ARM";
+  string BreakCharacters = ".";
+}
+
 def ARM : Target {
   // Pull in Instruction Info:
   let InstructionSet = ARMInstrInfo;
   let AssemblyWriters = [ARMAsmWriter];
+  let AssemblyParserVariants = [ARMAsmParserVariant];
 }

Modified: llvm/trunk/lib/Target/BPF/BPF.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/BPF/BPF.td?rev=252439&r1=252438&r2=252439&view=diff
==============================================================================
--- llvm/trunk/lib/Target/BPF/BPF.td (original)
+++ llvm/trunk/lib/Target/BPF/BPF.td Sun Nov  8 18:31:07 2015
@@ -25,7 +25,14 @@ def BPFInstPrinter : AsmWriter {
   bit isMCAsmWriter = 1;
 }
 
+def BPFAsmParserVariant : AsmParserVariant {
+  int Variant = 0;
+  string Name = "BPF";
+  string BreakCharacters = ".";
+}
+
 def BPF : Target {
   let InstructionSet = BPFInstrInfo;
   let AssemblyWriters = [BPFInstPrinter];
+  let AssemblyParserVariants = [BPFAsmParserVariant];
 }

Modified: llvm/trunk/lib/Target/PowerPC/PPC.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPC.td?rev=252439&r1=252438&r2=252439&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPC.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPC.td Sun Nov  8 18:31:07 2015
@@ -403,6 +403,7 @@ def PPCAsmParserVariant : AsmParserVaria
   // InstAlias definitions use immediate literals.  Set RegisterPrefix
   // so that those are not misinterpreted as registers.
   string RegisterPrefix = "%";
+  string BreakCharacters = ".";
 }
 
 def PPC : Target {

Modified: llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp?rev=252439&r1=252438&r2=252439&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp Sun Nov  8 18:31:07 2015
@@ -294,6 +294,13 @@ public:
   }
 };
 
+class AsmVariantInfo {
+public:
+  std::string TokenizingCharacters;
+  std::string SeparatorCharacters;
+  std::string BreakCharacters;
+};
+
 /// MatchableInfo - Helper class for storing the necessary information for an
 /// instruction or alias which is capable of being matched.
 struct MatchableInfo {
@@ -484,7 +491,8 @@ struct MatchableInfo {
 
   void initialize(const AsmMatcherInfo &Info,
                   SmallPtrSetImpl<Record*> &SingletonRegisters,
-                  int AsmVariantNo, StringRef RegisterPrefix);
+                  int AsmVariantNo, StringRef RegisterPrefix,
+                  AsmVariantInfo const &Variant);
 
   /// validate - Return true if this matchable is a valid thing to match against
   /// and perform a bunch of validity checking.
@@ -584,8 +592,10 @@ struct MatchableInfo {
   void dump() const;
 
 private:
-  void tokenizeAsmString(const AsmMatcherInfo &Info);
-  void addAsmOperand(size_t Start, size_t End);
+  void tokenizeAsmString(AsmMatcherInfo const &Info,
+                         AsmVariantInfo const &Variant);
+  void addAsmOperand(size_t Start, size_t End,
+                     std::string const &SeparatorCharacters);
 };
 
 /// SubtargetFeatureInfo - Helper class for storing information on a subtarget
@@ -828,12 +838,13 @@ extractSingletonRegisterForAsmOperand(Ma
 
 void MatchableInfo::initialize(const AsmMatcherInfo &Info,
                                SmallPtrSetImpl<Record*> &SingletonRegisters,
-                               int AsmVariantNo, StringRef RegisterPrefix) {
+                               int AsmVariantNo, StringRef RegisterPrefix,
+                               AsmVariantInfo const &Variant) {
   AsmVariantID = AsmVariantNo;
   AsmString =
     CodeGenInstruction::FlattenAsmStringVariants(AsmString, AsmVariantNo);
 
-  tokenizeAsmString(Info);
+  tokenizeAsmString(Info, Variant);
 
   // Compute the require features.
   for (Record *Predicate : TheDef->getValueAsListOfDefs("Predicates"))
@@ -857,9 +868,9 @@ void MatchableInfo::initialize(const Asm
 }
 
 /// Append an AsmOperand for the given substring of AsmString.
-void MatchableInfo::addAsmOperand(size_t Start, size_t End) {
+void MatchableInfo::addAsmOperand(size_t Start, size_t End,
+                                  std::string const &Separators) {
   StringRef String = AsmString;
-  StringRef Separators = "[]*! \t,";
   // Look for separators before and after to figure out is this token is
   // isolated.  Accept '$$' as that's how we escape '$'.
   bool IsIsolatedToken =
@@ -870,42 +881,54 @@ void MatchableInfo::addAsmOperand(size_t
 }
 
 /// tokenizeAsmString - Tokenize a simplified assembly string.
-void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) {
+void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info,
+                                      AsmVariantInfo const &Variant) {
   StringRef String = AsmString;
-  size_t Prev = 0;
-  bool InTok = true;
-  for (size_t i = 0, e = String.size(); i != e; ++i) {
-    switch (String[i]) {
-    case '[':
-    case ']':
-    case '*':
-    case '!':
-    case ' ':
-    case '\t':
-    case ',':
-      if (InTok) {
-        addAsmOperand(Prev, i);
+  unsigned Prev = 0;
+  bool InTok = false;
+  std::string Separators = Variant.TokenizingCharacters +
+                           Variant.SeparatorCharacters;
+  for (unsigned i = 0, e = String.size(); i != e; ++i) {
+    if(Variant.BreakCharacters.find(String[i]) != std::string::npos) {
+      if(InTok) {
+        addAsmOperand(Prev, i, Separators);
+        Prev = i;
+      }
+      InTok = true;
+      continue;
+    }
+    if(Variant.TokenizingCharacters.find(String[i]) != std::string::npos) {
+      if(InTok) {
+        addAsmOperand(Prev, i, Separators);
         InTok = false;
       }
-      if (!isspace(String[i]) && String[i] != ',')
-        addAsmOperand(i, i + 1);
+      addAsmOperand(i, i + 1, Separators);
       Prev = i + 1;
-      break;
-
+      continue;
+    }
+    if(Variant.SeparatorCharacters.find(String[i]) != std::string::npos) {
+      if(InTok) {
+        addAsmOperand(Prev, i, Separators);
+        InTok = false;
+      }
+      Prev = i + 1;
+      continue;
+    }
+    switch (String[i]) {
     case '\\':
       if (InTok) {
-        addAsmOperand(Prev, i);
+        addAsmOperand(Prev, i, Separators);
         InTok = false;
       }
       ++i;
       assert(i != String.size() && "Invalid quoted character");
-      addAsmOperand(i, i + 1);
+      addAsmOperand(i, i + 1, Separators);
       Prev = i + 1;
       break;
 
     case '$': {
-      if (InTok) {
-        addAsmOperand(Prev, i);
+      if (InTok && Prev != i) {
+        addAsmOperand(Prev, i, Separators);
         InTok = false;
       }
 
@@ -915,31 +938,20 @@ void MatchableInfo::tokenizeAsmString(co
         break;
       }
 
-      // If this is "${" find the next "}" and make an identifier like "${xxx}"
-      size_t EndPos = String.find('}', i);
-      assert(EndPos != StringRef::npos &&
-             "Missing brace in operand reference!");
-      addAsmOperand(i, EndPos+1);
+      StringRef::iterator End = std::find(String.begin() + i, String.end(),'}');
+      assert(End != String.end() && "Missing brace in operand reference!");
+      size_t EndPos = End - String.begin();
+      addAsmOperand(i, EndPos+1, Separators);
       Prev = EndPos + 1;
       i = EndPos;
       break;
     }
-
-    case '.':
-      if (!Info.AsmParser->getValueAsBit("MnemonicContainsDot")) {
-        if (InTok)
-          addAsmOperand(Prev, i);
-        Prev = i;
-      }
-      InTok = true;
-      break;
-
     default:
       InTok = true;
     }
   }
   if (InTok && Prev != String.size())
-    addAsmOperand(Prev, StringRef::npos);
+    addAsmOperand(Prev, StringRef::npos, Separators);
 
   // The first token of the instruction is the mnemonic, which must be a
   // simple string, not a $foo variable or a singleton register.
@@ -1373,6 +1385,13 @@ void AsmMatcherInfo::buildInfo() {
     std::string CommentDelimiter =
       AsmVariant->getValueAsString("CommentDelimiter");
     std::string RegisterPrefix = AsmVariant->getValueAsString("RegisterPrefix");
+    AsmVariantInfo Variant;
+    Variant.TokenizingCharacters =
+        AsmVariant->getValueAsString("TokenizingCharacters");
+    Variant.SeparatorCharacters =
+        AsmVariant->getValueAsString("SeparatorCharacters");
+    Variant.BreakCharacters =
+        AsmVariant->getValueAsString("BreakCharacters");
     int AsmVariantNo = AsmVariant->getValueAsInt("Variant");
 
     for (const CodeGenInstruction *CGI : Target.instructions()) {
@@ -1388,7 +1407,8 @@ void AsmMatcherInfo::buildInfo() {
 
       auto II = llvm::make_unique<MatchableInfo>(*CGI);
 
-      II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
+      II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix,
+                     Variant);
 
       // Ignore instructions which shouldn't be matched and diagnose invalid
       // instruction definitions with an error.
@@ -1415,7 +1435,8 @@ void AsmMatcherInfo::buildInfo() {
 
       auto II = llvm::make_unique<MatchableInfo>(std::move(Alias));
 
-      II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
+      II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix,
+                     Variant);
 
       // Validate the alias definitions.
       II->validate(CommentDelimiter, false);




More information about the llvm-commits mailing list