[llvm-commits] [llvm] r79010 - in /llvm/trunk: test/MC/AsmParser/directive_ascii.s tools/llvm-mc/AsmParser.cpp tools/llvm-mc/AsmParser.h
Daniel Dunbar
daniel at zuster.org
Fri Aug 14 11:19:53 PDT 2009
Author: ddunbar
Date: Fri Aug 14 13:19:52 2009
New Revision: 79010
URL: http://llvm.org/viewvc/llvm-project?rev=79010&view=rev
Log:
llvm-mc: Support escaped characters in string literals (for .ascii and .asciz)
Modified:
llvm/trunk/test/MC/AsmParser/directive_ascii.s
llvm/trunk/tools/llvm-mc/AsmParser.cpp
llvm/trunk/tools/llvm-mc/AsmParser.h
Modified: llvm/trunk/test/MC/AsmParser/directive_ascii.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/directive_ascii.s?rev=79010&r1=79009&r2=79010&view=diff
==============================================================================
--- llvm/trunk/test/MC/AsmParser/directive_ascii.s (original)
+++ llvm/trunk/test/MC/AsmParser/directive_ascii.s Fri Aug 14 13:19:52 2009
@@ -1,5 +1,6 @@
# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+ .data
# CHECK: TEST0:
TEST0:
.ascii
@@ -20,5 +21,28 @@
# CHECK: .byte 0
TEST3:
.asciz "B", "C"
-
-
+
+# CHECK: TEST4:
+# CHECK: .byte 1
+# CHECK: .byte 1
+# CHECK: .byte 7
+# CHECK: .byte 0
+# CHECK: .byte 56
+# CHECK: .byte 1
+# CHECK: .byte 0
+# CHECK: .byte 49
+# CHECK: .byte 0
+TEST4:
+ .ascii "\1\01\07\08\001\0001\b\0"
+
+# CHECK: TEST5:
+# CHECK: .byte 8
+# CHECK: .byte 12
+# CHECK: .byte 10
+# CHECK: .byte 13
+# CHECK: .byte 9
+# CHECK: .byte 92
+# CHECK: .byte 34
+TEST5:
+ .ascii "\b\f\n\r\t\\\""
+
Modified: llvm/trunk/tools/llvm-mc/AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/AsmParser.cpp?rev=79010&r1=79009&r2=79010&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mc/AsmParser.cpp (original)
+++ llvm/trunk/tools/llvm-mc/AsmParser.cpp Fri Aug 14 13:19:52 2009
@@ -765,6 +765,64 @@
return false;
}
+bool AsmParser::ParseEscapedString(std::string &Data) {
+ assert(Lexer.is(AsmToken::String) && "Unexpected current token!");
+
+ Data = "";
+ StringRef Str = Lexer.getTok().getStringContents();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (Str[i] != '\\') {
+ Data += Str[i];
+ continue;
+ }
+
+ // Recognize escaped characters. Note that this escape semantics currently
+ // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
+ ++i;
+ if (i == e)
+ return TokError("unexpected backslash at end of string");
+
+ // Recognize octal sequences.
+ if ((unsigned) (Str[i] - '0') <= 7) {
+ // Consume up to three octal characters.
+ unsigned Value = Str[i] - '0';
+
+ if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+ ++i;
+ Value = Value * 8 + (Str[i] - '0');
+
+ if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+ ++i;
+ Value = Value * 8 + (Str[i] - '0');
+ }
+ }
+
+ if (Value > 255)
+ return TokError("invalid octal escape sequence (out of range)");
+
+ Data += (unsigned char) Value;
+ continue;
+ }
+
+ // Otherwise recognize individual escapes.
+ switch (Str[i]) {
+ default:
+ // Just reject invalid escape sequences for now.
+ return TokError("invalid escape sequence (unrecognized character)");
+
+ case 'b': Data += '\b'; break;
+ case 'f': Data += '\f'; break;
+ case 'n': Data += '\n'; break;
+ case 'r': Data += '\r'; break;
+ case 't': Data += '\t'; break;
+ case '"': Data += '"'; break;
+ case '\\': Data += '\\'; break;
+ }
+ }
+
+ return false;
+}
+
/// ParseDirectiveAscii:
/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
@@ -773,11 +831,11 @@
if (Lexer.isNot(AsmToken::String))
return TokError("expected string in '.ascii' or '.asciz' directive");
- // FIXME: This shouldn't use a const char* + strlen, the string could have
- // embedded nulls.
- // FIXME: Should have accessor for getting string contents.
- StringRef Str = Lexer.getTok().getString();
- Out.EmitBytes(Str.substr(1, Str.size() - 2));
+ std::string Data;
+ if (ParseEscapedString(Data))
+ return true;
+
+ Out.EmitBytes(Data);
if (ZeroTerminated)
Out.EmitBytes(StringRef("\0", 1));
Modified: llvm/trunk/tools/llvm-mc/AsmParser.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/AsmParser.h?rev=79010&r1=79009&r2=79010&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mc/AsmParser.h (original)
+++ llvm/trunk/tools/llvm-mc/AsmParser.h Fri Aug 14 13:19:52 2009
@@ -135,6 +135,10 @@
bool ParseDirectiveFile(SMLoc DirectiveLoc); // ".file"
bool ParseDirectiveLine(SMLoc DirectiveLoc); // ".line"
bool ParseDirectiveLoc(SMLoc DirectiveLoc); // ".loc"
+
+ /// ParseEscapedString - Parse the current token as a string which may include
+ /// escaped characters and return the string contents.
+ bool ParseEscapedString(std::string &Data);
};
} // end namespace llvm
More information about the llvm-commits
mailing list