[cfe-commits] r171640 - in /cfe/trunk: lib/Format/Format.cpp lib/Format/UnwrappedLineParser.cpp lib/Format/UnwrappedLineParser.h unittests/Format/FormatTest.cpp

Mon Jan 7 12:58:37 PST 2013

On Mon, Jan 7, 2013 at 9:27 PM, Argyrios Kyrtzidis <akyrtzi at gmail.com>wrote:

> On Jan 7, 2013, at 12:10 PM, Manuel Klimek <klimek at google.com> wrote:
>
> On Mon, Jan 7, 2013 at 8:53 PM, Argyrios Kyrtzidis <akyrtzi at gmail.com>wrote:
>
>> On Jan 5, 2013, at 2:56 PM, Manuel Klimek <klimek at google.com> wrote:
>>
>> > Author: klimek
>> > Date: Sat Jan  5 16:56:06 2013
>> > New Revision: 171640
>> >
>> > URL: http://llvm.org/viewvc/llvm-project?rev=171640&view=rev
>> > Log:
>> > Fixes parsing of hash tokens in the middle of a line.
>> >
>> > To parse # correctly, we need to know whether it is the first token in a
>> > line - we can deduct this either from the whitespace or seeing that the
>> > token is the first in the file - we already calculate this information.
>> > This patch moves the identification of the first token into the
>> > getNextToken method and stores it inside the FormatToken, so the
>> > UnwrappedLineParser can stay independent of the SourceManager.
>>
>> This may be silly since I'm not familiar with libFormat, but shouldn't
>> you just check clang::Token::isAtStartOfLine() ?
>>
>
> After trying it out it looks like isAtStartOfLine tells me whether it's
> the first character (whitespace included).
> But we need to know whether it's the first token in the line, disregarding
> possible whitespace...
>
>
> Your unit-test only includes a case where a hash has another token before
> it; it's not clear to me why, if isAtStartOfLine is true, it makes a
> difference if there is whitespace or not, for parsing hash tokens.
>

Note that the added unit test doesn't add the full story - there were other
unit tests breaking before I added the IsFirst check (also, other unit
tests break if I use isAtStartOfLine).

The problem is that isStartOfLine is not true for the hash token in the
case of
   #define ...
We could probably also deduce it from isAtStartOfLine of the first
(possibly whitespace) token we see before the hash... I can play around
with that some more...

In any case, "IsFirst", as you defined it, can be deducted
> from isAtStartOfLine and WhiteSpaceLength, right ?
>

IsFirst actually is true if it's the first token in the *file*.

Cheers,
/Manuel

>
> -Argyrios
>
>
> Or am I missing something?
>
> Thanks!
> /Manuel
>
>
>>
>> -Argyrios
>>
>> >
>> > Modified:
>> >    cfe/trunk/lib/Format/Format.cpp
>> >    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>> >    cfe/trunk/lib/Format/UnwrappedLineParser.h
>> >    cfe/trunk/unittests/Format/FormatTest.cpp
>> >
>> > Modified: cfe/trunk/lib/Format/Format.cpp
>> > URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=171640&r1=171639&r2=171640&view=diff
>> >
>> ==============================================================================
>> > --- cfe/trunk/lib/Format/Format.cpp (original)
>> > +++ cfe/trunk/lib/Format/Format.cpp Sat Jan  5 16:56:06 2013
>> > @@ -483,8 +483,7 @@
>> >
>> >     unsigned Newlines =
>> >         std::min(Token.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
>> > -    unsigned Offset = SourceMgr.getFileOffset(Token.WhiteSpaceStart);
>> > -    if (Newlines == 0 && Offset != 0)
>> > +    if (Newlines == 0 && !Token.IsFirst)
>> >       Newlines = 1;
>> >     unsigned Indent = Line.Level * 2;
>> >     if ((Token.Tok.is <http://token.tok.is/>(tok::kw_public) ||
>> Token.Tok.is <http://token.tok.is/>(tok::kw_protected) ||
>> > @@ -685,9 +684,10 @@
>> >       next();
>> >       if (Index >= Tokens.size())
>> >         return;
>> > -      // It is the responsibility of the UnwrappedLineParser to make
>> sure
>> > -      // this sequence is not produced inside an unwrapped line.
>> > -      assert(Tokens[Index].Tok.getIdentifierInfo() != NULL);
>> > +      // Hashes in the middle of a line can lead to any strange token
>> > +      // sequence.
>> > +      if (Tokens[Index].Tok.getIdentifierInfo() == NULL)
>> > +        return;
>> >       switch (Tokens[Index].Tok.getIdentifierInfo()->getPPKeywordID()) {
>> >       case tok::pp_include:
>> >       case tok::pp_import:
>> > @@ -1033,6 +1033,8 @@
>> >     Lex.LexFromRawLexer(FormatTok.Tok);
>> >     StringRef Text = tokenText(FormatTok.Tok);
>> >     FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
>> > +    if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
>> > +      FormatTok.IsFirst = true;
>> >
>> >     // Consume and record whitespace until we find a significant token.
>> >     while (FormatTok.Tok.is <http://formattok.tok.is/>(tok::unknown)) {
>> >
>> > Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>> > URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=171640&r1=171639&r2=171640&view=diff
>> >
>> ==============================================================================
>> > --- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
>> > +++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Sat Jan  5 16:56:06
>> 2013
>> > @@ -470,7 +470,9 @@
>> >
>> > void UnwrappedLineParser::readToken() {
>> >   FormatTok = Tokens->getNextToken();
>> > -  while (!Line.InPPDirective && FormatTok.Tok.is<http://formattok.tok.is/>(tok::hash))
>> {
>> > +  while (!Line.InPPDirective && FormatTok.Tok.is<http://formattok.tok.is/>(tok::hash)
>> &&
>> > +         ((FormatTok.NewlinesBefore > 0 &&
>> FormatTok.HasUnescapedNewline) ||
>> > +          FormatTok.IsFirst)) {
>> >     // FIXME: This is incorrect - the correct way is to create a
>> >     // data structure that will construct the parts around the
>> preprocessor
>> >     // directive as a structured \c UnwrappedLine.
>> >
>> > Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
>> > URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=171640&r1=171639&r2=171640&view=diff
>> >
>> ==============================================================================
>> > --- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
>> > +++ cfe/trunk/lib/Format/UnwrappedLineParser.h Sat Jan  5 16:56:06 2013
>> > @@ -31,7 +31,8 @@
>> > /// whitespace characters preceeding it.
>> > struct FormatToken {
>> >   FormatToken()
>> > -      : NewlinesBefore(0), HasUnescapedNewline(false),
>> WhiteSpaceLength(0) {
>> > +      : NewlinesBefore(0), HasUnescapedNewline(false),
>> WhiteSpaceLength(0),
>> > +        IsFirst(false) {
>> >   }
>> >
>> >   /// \brief The \c Token.
>> > @@ -56,6 +57,9 @@
>> >   /// \brief The length in characters of the whitespace immediately
>> preceeding
>> >   /// the \c Token.
>> >   unsigned WhiteSpaceLength;
>> > +
>> > +  /// \brief Indicates that this is the first token.
>> > +  bool IsFirst;
>> > };
>> >
>> > /// \brief An unwrapped line is a sequence of \c Token, that we would
>> like to
>> >
>> > Modified: cfe/trunk/unittests/Format/FormatTest.cpp
>> > URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=171640&r1=171639&r2=171640&view=diff
>> >
>> ==============================================================================
>> > --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
>> > +++ cfe/trunk/unittests/Format/FormatTest.cpp Sat Jan  5 16:56:06 2013
>> > @@ -470,6 +470,10 @@
>> >   EXPECT_EQ("{\n  {\n#define A\n  }\n}", format("{{\n#define A\n}}"));
>> > }
>> >
>> > +TEST_F(FormatTest, FormatHashIfNotAtStartOfLine) {
>> > +  verifyFormat("{\n  {\n    a #c;\n  }\n}");
>> > +}
>> > +
>> > // FIXME: write test for unbalanced braces in macros...
>> > // FIXME: test # inside a normal statement (like {#define A b})
>> >
>> >
>> >
>> > _______________________________________________
>> > cfe-commits mailing list
>> > cfe-commits at cs.uiuc.edu
>> > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130107/52fb69ce/attachment.html>