[llvm-commits] [llvm] r79010 - in /llvm/trunk: test/MC/AsmParser/directive_ascii.s tools/llvm-mc/AsmParser.cpp tools/llvm-mc/AsmParser.h

Fri Aug 14 11:19:53 PDT 2009

Author: ddunbar
Date: Fri Aug 14 13:19:52 2009
New Revision: 79010

URL: http://llvm.org/viewvc/llvm-project?rev=79010&view=rev
Log:
llvm-mc: Support escaped characters in string literals (for .ascii and .asciz)

Modified:
    llvm/trunk/test/MC/AsmParser/directive_ascii.s
    llvm/trunk/tools/llvm-mc/AsmParser.cpp
    llvm/trunk/tools/llvm-mc/AsmParser.h

Modified: llvm/trunk/test/MC/AsmParser/directive_ascii.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/directive_ascii.s?rev=79010&r1=79009&r2=79010&view=diff

==============================================================================

--- llvm/trunk/test/MC/AsmParser/directive_ascii.s (original)
+++ llvm/trunk/test/MC/AsmParser/directive_ascii.s Fri Aug 14 13:19:52 2009
@@ -1,5 +1,6 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
+        .data
 # CHECK: TEST0:
 TEST0:  
         .ascii
@@ -20,5 +21,28 @@
 # CHECK: .byte 0
 TEST3:  
         .asciz "B", "C"
-
-       
+        
+# CHECK: TEST4:
+# CHECK: .byte 1
+# CHECK: .byte 1
+# CHECK: .byte 7
+# CHECK: .byte 0
+# CHECK: .byte 56
+# CHECK: .byte 1
+# CHECK: .byte 0
+# CHECK: .byte 49
+# CHECK: .byte 0
+TEST4:  
+        .ascii "\1\01\07\08\001\0001\b\0"
+        
+# CHECK: TEST5:
+# CHECK: .byte 8
+# CHECK: .byte 12
+# CHECK: .byte 10
+# CHECK: .byte 13
+# CHECK: .byte 9
+# CHECK: .byte 92
+# CHECK: .byte 34
+TEST5:
+        .ascii "\b\f\n\r\t\\\""
+        

Modified: llvm/trunk/tools/llvm-mc/AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/AsmParser.cpp?rev=79010&r1=79009&r2=79010&view=diff

==============================================================================
--- llvm/trunk/tools/llvm-mc/AsmParser.cpp (original)
+++ llvm/trunk/tools/llvm-mc/AsmParser.cpp Fri Aug 14 13:19:52 2009
@@ -765,6 +765,64 @@
   return false;
 }
 
+bool AsmParser::ParseEscapedString(std::string &Data) {
+  assert(Lexer.is(AsmToken::String) && "Unexpected current token!");
+
+  Data = "";
+  StringRef Str = Lexer.getTok().getStringContents();
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (Str[i] != '\\') {
+      Data += Str[i];
+      continue;
+    }
+
+    // Recognize escaped characters. Note that this escape semantics currently
+    // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
+    ++i;
+    if (i == e)
+      return TokError("unexpected backslash at end of string");
+
+    // Recognize octal sequences.
+    if ((unsigned) (Str[i] - '0') <= 7) {
+      // Consume up to three octal characters.
+      unsigned Value = Str[i] - '0';
+
+      if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+        ++i;
+        Value = Value * 8 + (Str[i] - '0');
+
+        if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+          ++i;
+          Value = Value * 8 + (Str[i] - '0');
+        }
+      }
+
+      if (Value > 255)
+        return TokError("invalid octal escape sequence (out of range)");
+
+      Data += (unsigned char) Value;
+      continue;
+    }
+
+    // Otherwise recognize individual escapes.
+    switch (Str[i]) {
+    default:
+      // Just reject invalid escape sequences for now.
+      return TokError("invalid escape sequence (unrecognized character)");
+
+    case 'b': Data += '\b'; break;
+    case 'f': Data += '\f'; break;
+    case 'n': Data += '\n'; break;
+    case 'r': Data += '\r'; break;
+    case 't': Data += '\t'; break;
+    case '"': Data += '"'; break;
+    case '\\': Data += '\\'; break;
+    }
+  }
+
+  return false;
+}
+
 /// ParseDirectiveAscii:
 ///   ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
 bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
@@ -773,11 +831,11 @@
       if (Lexer.isNot(AsmToken::String))
         return TokError("expected string in '.ascii' or '.asciz' directive");
       
-      // FIXME: This shouldn't use a const char* + strlen, the string could have
-      // embedded nulls.
-      // FIXME: Should have accessor for getting string contents.
-      StringRef Str = Lexer.getTok().getString();
-      Out.EmitBytes(Str.substr(1, Str.size() - 2));
+      std::string Data;
+      if (ParseEscapedString(Data))
+        return true;
+      
+      Out.EmitBytes(Data);
       if (ZeroTerminated)
         Out.EmitBytes(StringRef("\0", 1));
       

Modified: llvm/trunk/tools/llvm-mc/AsmParser.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/AsmParser.h?rev=79010&r1=79009&r2=79010&view=diff

==============================================================================
--- llvm/trunk/tools/llvm-mc/AsmParser.h (original)
+++ llvm/trunk/tools/llvm-mc/AsmParser.h Fri Aug 14 13:19:52 2009
@@ -135,6 +135,10 @@
   bool ParseDirectiveFile(SMLoc DirectiveLoc); // ".file"
   bool ParseDirectiveLine(SMLoc DirectiveLoc); // ".line"
   bool ParseDirectiveLoc(SMLoc DirectiveLoc); // ".loc"
+
+  /// ParseEscapedString - Parse the current token as a string which may include
+  /// escaped characters and return the string contents.
+  bool ParseEscapedString(std::string &Data);
 };
 
 } // end namespace llvm