[llvm] r242702 - MIR Parser: Add support for quoted named global value operands.

Alex Lorenz arphaman at gmail.com
Mon Jul 20 13:31:01 PDT 2015


Author: arphaman
Date: Mon Jul 20 15:31:01 2015
New Revision: 242702

URL: http://llvm.org/viewvc/llvm-project?rev=242702&view=rev
Log:
MIR Parser: Add support for quoted named global value operands.

This commit extends the machine instruction lexer and implements support for
the quoted global value tokens. With this change the syntax for the global value
identifier tokens becomes identical to the syntax for the global identifier
tokens from the LLVM's assembly language.

Reviewers: Duncan P. N. Exon Smith

Added:
    llvm/trunk/test/CodeGen/MIR/X86/missing-closing-quote.mir
Modified:
    llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp
    llvm/trunk/lib/CodeGen/MIRParser/MILexer.h
    llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp
    llvm/trunk/test/CodeGen/MIR/X86/global-value-operands.mir

Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp?rev=242702&r1=242701&r2=242702&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp (original)
+++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp Mon Jul 20 15:31:01 2015
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MILexer.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include <cctype>
@@ -68,6 +69,51 @@ static bool isIdentifierChar(char C) {
          C == '$';
 }
 
+void MIToken::unescapeQuotedStringValue(std::string &Str) const {
+  assert(isStringValueQuoted() && "String value isn't quoted");
+  StringRef Value = Range.drop_front(StringOffset);
+  assert(Value.front() == '"' && Value.back() == '"');
+  Cursor C = Cursor(Value.substr(1, Value.size() - 2));
+
+  Str.clear();
+  Str.reserve(C.remaining().size());
+  while (!C.isEOF()) {
+    char Char = C.peek();
+    if (Char == '\\') {
+      if (C.peek(1) == '\\') {
+        // Two '\' become one
+        Str += '\\';
+        C.advance(2);
+        continue;
+      }
+      if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
+        Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
+        C.advance(3);
+        continue;
+      }
+    }
+    Str += Char;
+    C.advance();
+  }
+}
+
+/// Lex a string constant using the following regular expression: \"[^\"]*\"
+static Cursor lexStringConstant(
+    Cursor C,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+  assert(C.peek() == '"');
+  for (C.advance(); C.peek() != '"'; C.advance()) {
+    if (C.isEOF()) {
+      ErrorCallback(
+          C.location(),
+          "end of machine instruction reached before the closing '\"'");
+      return None;
+    }
+  }
+  C.advance();
+  return C;
+}
+
 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
   return StringSwitch<MIToken::TokenKind>(Identifier)
       .Case("_", MIToken::underscore)
@@ -190,12 +236,22 @@ static Cursor maybeLexRegister(Cursor C,
   return C;
 }
 
-static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) {
+static Cursor maybeLexGlobalValue(
+    Cursor C, MIToken &Token,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
   if (C.peek() != '@')
     return None;
   auto Range = C;
   C.advance(); // Skip the '@'
-  // TODO: add support for quoted names.
+  if (C.peek() == '"') {
+    if (Cursor R = lexStringConstant(C, ErrorCallback)) {
+      Token = MIToken(MIToken::QuotedNamedGlobalValue, Range.upto(R),
+                      /*StringOffset=*/1); // Drop the '@'
+      return R;
+    }
+    Token = MIToken(MIToken::Error, Range.remaining());
+    return Range;
+  }
   if (!isdigit(C.peek())) {
     while (isIdentifierChar(C.peek()))
       C.advance();
@@ -267,7 +323,7 @@ StringRef llvm::lexMIToken(
     return R.remaining();
   if (Cursor R = maybeLexRegister(C, Token))
     return R.remaining();
-  if (Cursor R = maybeLexGlobalValue(C, Token))
+  if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
     return R.remaining();
   if (Cursor R = maybeLexIntegerLiteral(C, Token))
     return R.remaining();

Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.h?rev=242702&r1=242701&r2=242702&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MIRParser/MILexer.h (original)
+++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.h Mon Jul 20 15:31:01 2015
@@ -52,6 +52,7 @@ struct MIToken {
     StackObject,
     FixedStackObject,
     NamedGlobalValue,
+    QuotedNamedGlobalValue,
     GlobalValue,
 
     // Other tokens
@@ -94,7 +95,26 @@ public:
 
   StringRef::iterator location() const { return Range.begin(); }
 
-  StringRef stringValue() const { return Range.drop_front(StringOffset); }
+  bool isStringValueQuoted() const { return Kind == QuotedNamedGlobalValue; }
+
+  /// Return the token's raw string value.
+  ///
+  /// If the string value is quoted, this method returns that quoted string as
+  /// it is, without unescaping the string value.
+  StringRef rawStringValue() const { return Range.drop_front(StringOffset); }
+
+  /// Return token's string value.
+  ///
+  /// Expects the string value to be unquoted.
+  StringRef stringValue() const {
+    assert(!isStringValueQuoted() && "String value is quoted");
+    return Range.drop_front(StringOffset);
+  }
+
+  /// Unescapes the token's string value.
+  ///
+  /// Expects the string value to be quoted.
+  void unescapeQuotedStringValue(std::string &Str) const;
 
   const APSInt &integerValue() const { return IntVal; }
 

Modified: llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp?rev=242702&r1=242701&r2=242702&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp (original)
+++ llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp Mon Jul 20 15:31:01 2015
@@ -31,6 +31,22 @@ using namespace llvm;
 
 namespace {
 
+struct StringValueUtility {
+  StringRef String;
+  std::string UnescapedString;
+
+  StringValueUtility(const MIToken &Token) {
+    if (Token.isStringValueQuoted()) {
+      Token.unescapeQuotedStringValue(UnescapedString);
+      String = UnescapedString;
+      return;
+    }
+    String = Token.stringValue();
+  }
+
+  operator StringRef() const { return String; }
+};
+
 /// A wrapper struct around the 'MachineOperand' struct that includes a source
 /// range.
 struct MachineOperandWithLocation {
@@ -485,14 +501,16 @@ bool MIParser::parseFixedStackObjectOper
 
 bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
   switch (Token.kind()) {
-  case MIToken::NamedGlobalValue: {
-    auto Name = Token.stringValue();
+  case MIToken::NamedGlobalValue:
+  case MIToken::QuotedNamedGlobalValue: {
+    StringValueUtility Name(Token);
     const Module *M = MF.getFunction()->getParent();
     if (const auto *GV = M->getNamedValue(Name)) {
       Dest = MachineOperand::CreateGA(GV, /*Offset=*/0);
       break;
     }
-    return error(Twine("use of undefined global value '@") + Name + "'");
+    return error(Twine("use of undefined global value '@") +
+                 Token.rawStringValue() + "'");
   }
   case MIToken::GlobalValue: {
     unsigned GVIdx;
@@ -548,6 +566,7 @@ bool MIParser::parseMachineOperand(Machi
     return parseFixedStackObjectOperand(Dest);
   case MIToken::GlobalValue:
   case MIToken::NamedGlobalValue:
+  case MIToken::QuotedNamedGlobalValue:
     return parseGlobalAddressOperand(Dest);
   case MIToken::JumpTableIndex:
     return parseJumpTableIndexOperand(Dest);

Modified: llvm/trunk/test/CodeGen/MIR/X86/global-value-operands.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/X86/global-value-operands.mir?rev=242702&r1=242701&r2=242702&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/X86/global-value-operands.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/X86/global-value-operands.mir Mon Jul 20 15:31:01 2015
@@ -34,6 +34,14 @@
     ret i32 %b
   }
 
+  @"\01Hello@$%09 \\ World," = external global i32
+
+  define i32 @test2() {
+  entry:
+    %a = load i32, i32* @"\01Hello@$%09 \\ World,"
+    ret i32 %a
+  }
+
 ...
 ---
 # CHECK: name: inc
@@ -81,3 +89,14 @@ body:
       - 'MOV32mr killed %rcx, 1, _, 0, _, %eax'
       - 'RETQ %eax'
 ...
+---
+name:            test2
+body:
+  - id:              0
+    name:            entry
+    instructions:
+      # CHECK: , @"\01Hello@$%09 \5C World,",
+      - '%rax = MOV64rm %rip, 1, _, @"\01Hello@$%09 \\ World,", _'
+      - '%eax = MOV32rm killed %rax, 1, _, 0, _'
+      - 'RETQ %eax'
+...

Added: llvm/trunk/test/CodeGen/MIR/X86/missing-closing-quote.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/X86/missing-closing-quote.mir?rev=242702&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/X86/missing-closing-quote.mir (added)
+++ llvm/trunk/test/CodeGen/MIR/X86/missing-closing-quote.mir Mon Jul 20 15:31:01 2015
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @"quoted name" = external global i32
+
+  define i32 @test() {
+  entry:
+    %a = load i32, i32* @"quoted name"
+    ret i32 %a
+  }
+
+...
+---
+name:            test
+body:
+  - id:          0
+    name:        entry
+    instructions:
+      # CHECK: [[@LINE+1]]:53: end of machine instruction reached before the closing '"'
+      - '%rax = MOV64rm %rip, 1, _, @"quoted name, _'
+      - '%eax = MOV32rm killed %rax, 1, _, 0, _'
+      - 'RETQ %eax'
+...





More information about the llvm-commits mailing list