[cfe-commits] r171640 - in /cfe/trunk: lib/Format/Format.cpp lib/Format/UnwrappedLineParser.cpp lib/Format/UnwrappedLineParser.h unittests/Format/FormatTest.cpp
Argyrios Kyrtzidis
akyrtzi at gmail.com
Mon Jan 7 11:53:22 PST 2013
On Jan 5, 2013, at 2:56 PM, Manuel Klimek <klimek at google.com> wrote:
> Author: klimek
> Date: Sat Jan 5 16:56:06 2013
> New Revision: 171640
>
> URL: http://llvm.org/viewvc/llvm-project?rev=171640&view=rev
> Log:
> Fixes parsing of hash tokens in the middle of a line.
>
> To parse # correctly, we need to know whether it is the first token in a
> line - we can deduct this either from the whitespace or seeing that the
> token is the first in the file - we already calculate this information.
> This patch moves the identification of the first token into the
> getNextToken method and stores it inside the FormatToken, so the
> UnwrappedLineParser can stay independent of the SourceManager.
This may be silly since I'm not familiar with libFormat, but shouldn't you just check clang::Token::isAtStartOfLine() ?
-Argyrios
>
> Modified:
> cfe/trunk/lib/Format/Format.cpp
> cfe/trunk/lib/Format/UnwrappedLineParser.cpp
> cfe/trunk/lib/Format/UnwrappedLineParser.h
> cfe/trunk/unittests/Format/FormatTest.cpp
>
> Modified: cfe/trunk/lib/Format/Format.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=171640&r1=171639&r2=171640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Format/Format.cpp (original)
> +++ cfe/trunk/lib/Format/Format.cpp Sat Jan 5 16:56:06 2013
> @@ -483,8 +483,7 @@
>
> unsigned Newlines =
> std::min(Token.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
> - unsigned Offset = SourceMgr.getFileOffset(Token.WhiteSpaceStart);
> - if (Newlines == 0 && Offset != 0)
> + if (Newlines == 0 && !Token.IsFirst)
> Newlines = 1;
> unsigned Indent = Line.Level * 2;
> if ((Token.Tok.is(tok::kw_public) || Token.Tok.is(tok::kw_protected) ||
> @@ -685,9 +684,10 @@
> next();
> if (Index >= Tokens.size())
> return;
> - // It is the responsibility of the UnwrappedLineParser to make sure
> - // this sequence is not produced inside an unwrapped line.
> - assert(Tokens[Index].Tok.getIdentifierInfo() != NULL);
> + // Hashes in the middle of a line can lead to any strange token
> + // sequence.
> + if (Tokens[Index].Tok.getIdentifierInfo() == NULL)
> + return;
> switch (Tokens[Index].Tok.getIdentifierInfo()->getPPKeywordID()) {
> case tok::pp_include:
> case tok::pp_import:
> @@ -1033,6 +1033,8 @@
> Lex.LexFromRawLexer(FormatTok.Tok);
> StringRef Text = tokenText(FormatTok.Tok);
> FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
> + if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
> + FormatTok.IsFirst = true;
>
> // Consume and record whitespace until we find a significant token.
> while (FormatTok.Tok.is(tok::unknown)) {
>
> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=171640&r1=171639&r2=171640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
> +++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Sat Jan 5 16:56:06 2013
> @@ -470,7 +470,9 @@
>
> void UnwrappedLineParser::readToken() {
> FormatTok = Tokens->getNextToken();
> - while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash)) {
> + while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) &&
> + ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
> + FormatTok.IsFirst)) {
> // FIXME: This is incorrect - the correct way is to create a
> // data structure that will construct the parts around the preprocessor
> // directive as a structured \c UnwrappedLine.
>
> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=171640&r1=171639&r2=171640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
> +++ cfe/trunk/lib/Format/UnwrappedLineParser.h Sat Jan 5 16:56:06 2013
> @@ -31,7 +31,8 @@
> /// whitespace characters preceeding it.
> struct FormatToken {
> FormatToken()
> - : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0) {
> + : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0),
> + IsFirst(false) {
> }
>
> /// \brief The \c Token.
> @@ -56,6 +57,9 @@
> /// \brief The length in characters of the whitespace immediately preceeding
> /// the \c Token.
> unsigned WhiteSpaceLength;
> +
> + /// \brief Indicates that this is the first token.
> + bool IsFirst;
> };
>
> /// \brief An unwrapped line is a sequence of \c Token, that we would like to
>
> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=171640&r1=171639&r2=171640&view=diff
> ==============================================================================
> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
> +++ cfe/trunk/unittests/Format/FormatTest.cpp Sat Jan 5 16:56:06 2013
> @@ -470,6 +470,10 @@
> EXPECT_EQ("{\n {\n#define A\n }\n}", format("{{\n#define A\n}}"));
> }
>
> +TEST_F(FormatTest, FormatHashIfNotAtStartOfLine) {
> + verifyFormat("{\n {\n a #c;\n }\n}");
> +}
> +
> // FIXME: write test for unbalanced braces in macros...
> // FIXME: test # inside a normal statement (like {#define A b})
>
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
More information about the cfe-commits
mailing list