[cfe-commits] r171640 - in /cfe/trunk: lib/Format/Format.cpp lib/Format/UnwrappedLineParser.cpp lib/Format/UnwrappedLineParser.h unittests/Format/FormatTest.cpp

Mon Jan 7 11:53:22 PST 2013

On Jan 5, 2013, at 2:56 PM, Manuel Klimek <klimek at google.com> wrote:

> Author: klimek
> Date: Sat Jan  5 16:56:06 2013
> New Revision: 171640
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=171640&view=rev
> Log:
> Fixes parsing of hash tokens in the middle of a line.
> 
> To parse # correctly, we need to know whether it is the first token in a
> line - we can deduct this either from the whitespace or seeing that the
> token is the first in the file - we already calculate this information.
> This patch moves the identification of the first token into the
> getNextToken method and stores it inside the FormatToken, so the
> UnwrappedLineParser can stay independent of the SourceManager.

This may be silly since I'm not familiar with libFormat, but shouldn't you just check clang::Token::isAtStartOfLine() ?

-Argyrios

> 
> Modified:
>    cfe/trunk/lib/Format/Format.cpp
>    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>    cfe/trunk/lib/Format/UnwrappedLineParser.h
>    cfe/trunk/unittests/Format/FormatTest.cpp
> 
> Modified: cfe/trunk/lib/Format/Format.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=171640&r1=171639&r2=171640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Format/Format.cpp (original)
> +++ cfe/trunk/lib/Format/Format.cpp Sat Jan  5 16:56:06 2013
> @@ -483,8 +483,7 @@
> 
>     unsigned Newlines =
>         std::min(Token.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
> -    unsigned Offset = SourceMgr.getFileOffset(Token.WhiteSpaceStart);
> -    if (Newlines == 0 && Offset != 0)
> +    if (Newlines == 0 && !Token.IsFirst)
>       Newlines = 1;
>     unsigned Indent = Line.Level * 2;
>     if ((Token.Tok.is(tok::kw_public) || Token.Tok.is(tok::kw_protected) ||
> @@ -685,9 +684,10 @@
>       next();
>       if (Index >= Tokens.size())
>         return;
> -      // It is the responsibility of the UnwrappedLineParser to make sure
> -      // this sequence is not produced inside an unwrapped line.
> -      assert(Tokens[Index].Tok.getIdentifierInfo() != NULL);
> +      // Hashes in the middle of a line can lead to any strange token
> +      // sequence.
> +      if (Tokens[Index].Tok.getIdentifierInfo() == NULL)
> +        return;
>       switch (Tokens[Index].Tok.getIdentifierInfo()->getPPKeywordID()) {
>       case tok::pp_include:
>       case tok::pp_import:
> @@ -1033,6 +1033,8 @@
>     Lex.LexFromRawLexer(FormatTok.Tok);
>     StringRef Text = tokenText(FormatTok.Tok);
>     FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
> +    if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
> +      FormatTok.IsFirst = true;
> 
>     // Consume and record whitespace until we find a significant token.
>     while (FormatTok.Tok.is(tok::unknown)) {
> 
> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=171640&r1=171639&r2=171640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
> +++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Sat Jan  5 16:56:06 2013
> @@ -470,7 +470,9 @@
> 
> void UnwrappedLineParser::readToken() {
>   FormatTok = Tokens->getNextToken();
> -  while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash)) {
> +  while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) &&
> +         ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
> +          FormatTok.IsFirst)) {
>     // FIXME: This is incorrect - the correct way is to create a
>     // data structure that will construct the parts around the preprocessor
>     // directive as a structured \c UnwrappedLine.
> 
> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=171640&r1=171639&r2=171640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
> +++ cfe/trunk/lib/Format/UnwrappedLineParser.h Sat Jan  5 16:56:06 2013
> @@ -31,7 +31,8 @@
> /// whitespace characters preceeding it.
> struct FormatToken {
>   FormatToken()
> -      : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0) {
> +      : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0),
> +        IsFirst(false) {
>   }
> 
>   /// \brief The \c Token.
> @@ -56,6 +57,9 @@
>   /// \brief The length in characters of the whitespace immediately preceeding
>   /// the \c Token.
>   unsigned WhiteSpaceLength;
> +
> +  /// \brief Indicates that this is the first token.
> +  bool IsFirst;
> };
> 
> /// \brief An unwrapped line is a sequence of \c Token, that we would like to
> 
> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=171640&r1=171639&r2=171640&view=diff
> ==============================================================================
> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
> +++ cfe/trunk/unittests/Format/FormatTest.cpp Sat Jan  5 16:56:06 2013
> @@ -470,6 +470,10 @@
>   EXPECT_EQ("{\n  {\n#define A\n  }\n}", format("{{\n#define A\n}}"));
> }
> 
> +TEST_F(FormatTest, FormatHashIfNotAtStartOfLine) {
> +  verifyFormat("{\n  {\n    a #c;\n  }\n}");
> +}
> +
> // FIXME: write test for unbalanced braces in macros...
> // FIXME: test # inside a normal statement (like {#define A b})
> 
> 
> 
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits