[llvm] r315207 - [llvm-rc] Have the tokenizer discard single & block comments.
Zachary Turner via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 9 08:46:13 PDT 2017
Author: zturner
Date: Mon Oct 9 08:46:13 2017
New Revision: 315207
URL: http://llvm.org/viewvc/llvm-project?rev=315207&view=rev
Log:
[llvm-rc] Have the tokenizer discard single & block comments.
This allows rc files to have comments. Eventually we should
just use clang's c preprocessor, but that's a bit larger
effort for minimal gain, and this is straightforward.
Differential Revision: https://reviews.llvm.org/D38651
Modified:
llvm/trunk/test/tools/llvm-rc/Inputs/tokens.rc
llvm/trunk/test/tools/llvm-rc/tokenizer.test
llvm/trunk/tools/llvm-rc/ResourceScriptToken.cpp
llvm/trunk/tools/llvm-rc/ResourceScriptTokenList.h
Modified: llvm/trunk/test/tools/llvm-rc/Inputs/tokens.rc
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-rc/Inputs/tokens.rc?rev=315207&r1=315206&r2=315207&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-rc/Inputs/tokens.rc (original)
+++ llvm/trunk/test/tools/llvm-rc/Inputs/tokens.rc Mon Oct 9 08:46:13 2017
@@ -3,6 +3,14 @@ He11o LLVM
"RC string test.",L"Another RC string test.'&{",42,100
+Block Comment Ident /*block /* // comment */ ifier
+Line Comment // Identifier /*
+
+/* Multi line
+ block
+ comment */
+
+Multiple /* comments */ on /* a */ single // line
":))"
Modified: llvm/trunk/test/tools/llvm-rc/tokenizer.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-rc/tokenizer.test?rev=315207&r1=315206&r2=315207&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-rc/tokenizer.test (original)
+++ llvm/trunk/test/tools/llvm-rc/tokenizer.test Mon Oct 9 08:46:13 2017
@@ -34,4 +34,13 @@
; CHECK-NEXT: Int: 42; int value = 42
; CHECK-NEXT: Comma: ,
; CHECK-NEXT: Int: 100; int value = 100
+; CHECK-NEXT: Identifier: Block
+; CHECK-NEXT: Identifier: Comment
+; CHECK-NEXT: Identifier: Ident
+; CHECK-NEXT: Identifier: ifier
+; CHECK-NEXT: Identifier: Line
+; CHECK-NEXT: Identifier: Comment
+; CHECK-NEXT: Identifier: Multiple
+; CHECK-NEXT: Identifier: on
+; CHECK-NEXT: Identifier: single
; CHECK-NEXT: String: ":))"
Modified: llvm/trunk/tools/llvm-rc/ResourceScriptToken.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-rc/ResourceScriptToken.cpp?rev=315207&r1=315206&r2=315207&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-rc/ResourceScriptToken.cpp (original)
+++ llvm/trunk/tools/llvm-rc/ResourceScriptToken.cpp Mon Oct 9 08:46:13 2017
@@ -121,6 +121,17 @@ private:
bool canStartString() const;
+ // Check if tokenizer can start reading a single line comment (e.g. a comment
+ // that begins with '//')
+ bool canStartLineComment() const;
+
+ // Check if tokenizer can start or finish reading a block comment (e.g. a
+ // comment that begins with '/*' and ends with '*/')
+ bool canStartBlockComment() const;
+
+ // Throw away all remaining characters on the current line.
+ void skipCurrentLine();
+
bool streamEof() const;
// Classify the token that is about to be read from the current position.
@@ -134,6 +145,14 @@ private:
size_t DataLength, Pos;
};
+void Tokenizer::skipCurrentLine() {
+ Pos = Data.find_first_of("\r\n", Pos);
+ Pos = Data.find_first_not_of("\r\n", Pos);
+
+ if (Pos == StringRef::npos)
+ Pos = DataLength;
+}
+
Expected<std::vector<RCToken>> Tokenizer::run() {
Pos = 0;
std::vector<RCToken> Result;
@@ -154,6 +173,10 @@ Expected<std::vector<RCToken>> Tokenizer
if (Error TokenError = consumeToken(TokenKind))
return std::move(TokenError);
+ // Comments are just deleted, don't bother saving them.
+ if (TokenKind == Kind::LineComment || TokenKind == Kind::StartComment)
+ continue;
+
RCToken Token(TokenKind, Data.take_front(Pos).drop_front(TokenStart));
if (TokenKind == Kind::Identifier) {
processIdentifier(Token);
@@ -195,6 +218,21 @@ Error Tokenizer::consumeToken(const Kind
advance();
return Error::success();
+ case Kind::LineComment:
+ advance(2);
+ skipCurrentLine();
+ return Error::success();
+
+ case Kind::StartComment: {
+ advance(2);
+ auto EndPos = Data.find("*/", Pos);
+ if (EndPos == StringRef::npos)
+ return getStringError(
+ "Unclosed multi-line comment beginning at position " + Twine(Pos));
+ advance(EndPos - Pos);
+ advance(2);
+ return Error::success();
+ }
case Kind::Identifier:
while (!streamEof() && canContinueIdentifier())
advance();
@@ -259,6 +297,16 @@ bool Tokenizer::canStartInt() const {
return std::isdigit(Data[Pos]);
}
+bool Tokenizer::canStartBlockComment() const {
+ assert(!streamEof());
+ return Data.drop_front(Pos).startswith("/*");
+}
+
+bool Tokenizer::canStartLineComment() const {
+ assert(!streamEof());
+ return Data.drop_front(Pos).startswith("//");
+}
+
bool Tokenizer::canContinueInt() const {
assert(!streamEof());
return std::isalnum(Data[Pos]);
@@ -271,6 +319,11 @@ bool Tokenizer::canStartString() const {
bool Tokenizer::streamEof() const { return Pos == DataLength; }
Kind Tokenizer::classifyCurrentToken() const {
+ if (canStartBlockComment())
+ return Kind::StartComment;
+ if (canStartLineComment())
+ return Kind::LineComment;
+
if (canStartInt())
return Kind::Int;
if (canStartString())
Modified: llvm/trunk/tools/llvm-rc/ResourceScriptTokenList.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-rc/ResourceScriptTokenList.h?rev=315207&r1=315206&r2=315207&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-rc/ResourceScriptTokenList.h (original)
+++ llvm/trunk/tools/llvm-rc/ResourceScriptTokenList.h Mon Oct 9 08:46:13 2017
@@ -18,6 +18,8 @@ TOKEN(Invalid) // Invalid token. Sh
TOKEN(Int) // Integer (decimal, octal or hexadecimal).
TOKEN(String) // String value.
TOKEN(Identifier) // Script identifier (resource name or type).
+TOKEN(LineComment) // Beginning of single-line comment.
+TOKEN(StartComment) // Beginning of multi-line comment.
// Short tokens. They usually consist of exactly one character.
// The definitions are of the form SHORT_TOKEN(TokenName, TokenChar).
More information about the llvm-commits
mailing list