[llvm-commits] [llvm] r73855 - in /llvm/trunk/tools/llvm-mc: AsmLexer.cpp AsmLexer.h llvm-mc.cpp
Chris Lattner
sabre at nondot.org
Sun Jun 21 12:21:25 PDT 2009
Author: lattner
Date: Sun Jun 21 14:21:25 2009
New Revision: 73855
URL: http://llvm.org/viewvc/llvm-project?rev=73855&view=rev
Log:
implement enough of a lexer to get through Olden/health/Output/health.llc.s
without errors.
Modified:
llvm/trunk/tools/llvm-mc/AsmLexer.cpp
llvm/trunk/tools/llvm-mc/AsmLexer.h
llvm/trunk/tools/llvm-mc/llvm-mc.cpp
Modified: llvm/trunk/tools/llvm-mc/AsmLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/AsmLexer.cpp?rev=73855&r1=73854&r2=73855&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mc/AsmLexer.cpp (original)
+++ llvm/trunk/tools/llvm-mc/AsmLexer.cpp Sun Jun 21 14:21:25 2009
@@ -14,6 +14,7 @@
#include "AsmLexer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
+#include <cerrno>
using namespace llvm;
AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
@@ -23,6 +24,10 @@
TokStart = 0;
}
+SMLoc AsmLexer::getLoc() const {
+ return SMLoc::getFromPointer(TokStart);
+}
+
void AsmLexer::PrintError(const char *Loc, const std::string &Msg) const {
SrcMgr.PrintError(SMLoc::getFromPointer(Loc), Msg);
}
@@ -31,6 +36,13 @@
SrcMgr.PrintError(Loc, Msg);
}
+/// ReturnError - Set the error to the specified string at the specified
+/// location. This is defined to always return asmtok::Error.
+asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
+ PrintError(Loc, Msg);
+ return asmtok::Error;
+}
+
int AsmLexer::getNextChar() {
char CurChar = *CurPtr++;
switch (CurChar) {
@@ -59,6 +71,129 @@
}
}
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+asmtok::TokKind AsmLexer::LexIdentifier() {
+ while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
+ *CurPtr == '.' || *CurPtr == '@')
+ ++CurPtr;
+ CurStrVal.assign(TokStart, CurPtr); // Skip %
+ return asmtok::Identifier;
+}
+
+/// LexPercent: Register: %[a-zA-Z0-9]+
+asmtok::TokKind AsmLexer::LexPercent() {
+ if (!isalnum(*CurPtr))
+ return asmtok::Error; // Must have at least one character.
+ while (isalnum(*CurPtr))
+ ++CurPtr;
+ CurStrVal.assign(TokStart, CurPtr); // Skip %
+ return asmtok::Register;
+}
+
+/// LexSlash: Slash: /
+/// C-Style Comment: /* ... */
+asmtok::TokKind AsmLexer::LexSlash() {
+ if (*CurPtr != '*')
+ return asmtok::Slash;
+
+ // C Style comment.
+ ++CurPtr; // skip the star.
+ while (1) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ PrintError(TokStart, "Unterminated comment!");
+ return asmtok::Error;
+ case '*':
+ // End of the comment?
+ if (CurPtr[0] != '/') break;
+
+ ++CurPtr; // End the */.
+ return LexToken();
+ }
+ }
+}
+
+/// LexHash: Comment: #[^\n]*
+asmtok::TokKind AsmLexer::LexHash() {
+ int CurChar = getNextChar();
+ while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
+ CurChar = getNextChar();
+
+ if (CurChar == EOF)
+ return asmtok::Eof;
+ return asmtok::EndOfStatement;
+}
+
+
+/// LexDigit: First character is [0-9].
+/// Local Label: [0-9][:]
+/// Forward/Backward Label: [0-9][fb]
+/// Binary integer: 0b[01]+
+/// Octal integer: 0[0-7]+
+/// Hex integer: 0x[0-9a-fA-F]+
+/// Decimal integer: [1-9][0-9]*
+/// TODO: FP literal.
+asmtok::TokKind AsmLexer::LexDigit() {
+ if (*CurPtr == ':')
+ return asmtok::Error; // FIXME LOCAL LABEL.
+ if (*CurPtr == 'f' || *CurPtr == 'b')
+ return asmtok::Error; // FIXME FORWARD/BACKWARD LABEL.
+
+ // Decimal integer: [1-9][0-9]*
+ if (CurPtr[-1] != '0') {
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+ CurIntVal = strtoll(TokStart, 0, 10);
+ return asmtok::IntVal;
+ }
+
+ if (*CurPtr == 'b') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (CurPtr[0] == '0' || CurPtr[0] == '1')
+ ++CurPtr;
+
+ // Requires at least one binary digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid binary number");
+ CurIntVal = strtoll(NumStart, 0, 2);
+ return asmtok::IntVal;
+ }
+
+ if (*CurPtr == 'x') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ // Requires at least one hex digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+
+ errno = 0;
+ CurIntVal = strtoll(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+ if (errno == ERANGE) {
+ errno = 0;
+ CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+ if (errno == ERANGE)
+ return ReturnError(CurPtr-2, "Hexadecimal number out of range");
+ }
+ return asmtok::IntVal;
+ }
+
+ // Must be an octal number, it starts with 0.
+ while (*CurPtr >= '0' && *CurPtr <= '7')
+ ++CurPtr;
+ CurIntVal = strtoll(TokStart, 0, 8);
+ return asmtok::IntVal;
+}
+
+
asmtok::TokKind AsmLexer::LexToken() {
TokStart = CurPtr;
// This always consumes at least one character.
@@ -66,9 +201,9 @@
switch (CurChar) {
default:
- // Handle letters: [a-zA-Z_]
-// if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
-// return LexIdentifier();
+ // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+ if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
+ return LexIdentifier();
// Unknown character, emit an error.
return asmtok::Error;
@@ -76,12 +211,29 @@
case 0:
case ' ':
case '\t':
- case '\n':
- case '\r':
// Ignore whitespace.
return LexToken();
+ case '\n': // FALL THROUGH.
+ case '\r': // FALL THROUGH.
+ case ';': return asmtok::EndOfStatement;
case ':': return asmtok::Colon;
case '+': return asmtok::Plus;
case '-': return asmtok::Minus;
+ case '(': return asmtok::LParen;
+ case ')': return asmtok::RParen;
+ case '*': return asmtok::Star;
+ case ',': return asmtok::Comma;
+ case '$': return asmtok::Dollar;
+ case '%': return LexPercent();
+ case '/': return LexSlash();
+ case '#': return LexHash();
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return LexDigit();
+
+ // TODO: Quoted identifiers (objc methods etc)
+ // local labels: [0-9][:]
+ // Forward/backward labels: [0-9][fb]
+ // Integers, fp constants, character constants.
}
}
\ No newline at end of file
Modified: llvm/trunk/tools/llvm-mc/AsmLexer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/AsmLexer.h?rev=73855&r1=73854&r2=73855&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mc/AsmLexer.h (original)
+++ llvm/trunk/tools/llvm-mc/AsmLexer.h Sun Jun 21 14:21:25 2009
@@ -29,12 +29,16 @@
Eof, Error,
Identifier,
+ Register,
IntVal,
-
+ EndOfStatement,
Colon,
Plus,
- Minus
+ Minus,
+ Slash, // '/'
+ LParen, RParen,
+ Star, Comma, Dollar
};
}
@@ -66,7 +70,7 @@
asmtok::TokKind getKind() const { return CurKind; }
const std::string &getCurStrVal() const {
- assert(CurKind == asmtok::Identifier &&
+ assert((CurKind == asmtok::Identifier || CurKind == asmtok::Register) &&
"This token doesn't have a string value");
return CurStrVal;
}
@@ -82,9 +86,15 @@
private:
int getNextChar();
+ asmtok::TokKind ReturnError(const char *Loc, const std::string &Msg);
/// LexToken - Read the next token and return its code.
asmtok::TokKind LexToken();
+ asmtok::TokKind LexIdentifier();
+ asmtok::TokKind LexPercent();
+ asmtok::TokKind LexSlash();
+ asmtok::TokKind LexHash();
+ asmtok::TokKind LexDigit();
};
} // end namespace llvm
Modified: llvm/trunk/tools/llvm-mc/llvm-mc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/llvm-mc.cpp?rev=73855&r1=73854&r2=73855&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mc/llvm-mc.cpp (original)
+++ llvm/trunk/tools/llvm-mc/llvm-mc.cpp Sun Jun 21 14:21:25 2009
@@ -72,17 +72,29 @@
asmtok::TokKind Tok = Lexer.Lex();
while (Tok != asmtok::Eof) {
switch (Tok) {
- default: outs() << "<<unknown token>>\n"; break;
- case asmtok::Error: outs() << "<<error>>\n"; break;
+ default: Lexer.PrintError(Lexer.getLoc(), "driver: unknown token"); break;
+ case asmtok::Error:
+ Lexer.PrintError(Lexer.getLoc(), "error, bad token");
+ break;
case asmtok::Identifier:
outs() << "identifier: " << Lexer.getCurStrVal() << '\n';
break;
+ case asmtok::Register:
+ outs() << "register: " << Lexer.getCurStrVal() << '\n';
+ break;
case asmtok::IntVal:
outs() << "int: " << Lexer.getCurIntVal() << '\n';
break;
+ case asmtok::EndOfStatement: outs() << "EndOfStatement\n"; break;
case asmtok::Colon: outs() << "Colon\n"; break;
case asmtok::Plus: outs() << "Plus\n"; break;
case asmtok::Minus: outs() << "Minus\n"; break;
+ case asmtok::Slash: outs() << "Slash\n"; break;
+ case asmtok::LParen: outs() << "LParen\n"; break;
+ case asmtok::RParen: outs() << "RParen\n"; break;
+ case asmtok::Star: outs() << "Star\n"; break;
+ case asmtok::Comma: outs() << "Comma\n"; break;
+ case asmtok::Dollar: outs() << "Dollar\n"; break;
}
Tok = Lexer.Lex();
More information about the llvm-commits
mailing list