[cfe-commits] r171640 - in /cfe/trunk: lib/Format/Format.cpp lib/Format/UnwrappedLineParser.cpp lib/Format/UnwrappedLineParser.h unittests/Format/FormatTest.cpp

Mon Jan 7 12:10:26 PST 2013

On Mon, Jan 7, 2013 at 8:53 PM, Argyrios Kyrtzidis <akyrtzi at gmail.com>wrote:

> On Jan 5, 2013, at 2:56 PM, Manuel Klimek <klimek at google.com> wrote:
>
> > Author: klimek
> > Date: Sat Jan  5 16:56:06 2013
> > New Revision: 171640
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=171640&view=rev
> > Log:
> > Fixes parsing of hash tokens in the middle of a line.
> >
> > To parse # correctly, we need to know whether it is the first token in a
> > line - we can deduct this either from the whitespace or seeing that the
> > token is the first in the file - we already calculate this information.
> > This patch moves the identification of the first token into the
> > getNextToken method and stores it inside the FormatToken, so the
> > UnwrappedLineParser can stay independent of the SourceManager.
>
> This may be silly since I'm not familiar with libFormat, but shouldn't you
> just check clang::Token::isAtStartOfLine() ?
>

After trying it out it looks like isAtStartOfLine tells me whether it's the
first character (whitespace included).
But we need to know whether it's the first token in the line, disregarding
possible whitespace...

Or am I missing something?

Thanks!
/Manuel

>
> -Argyrios
>
> >
> > Modified:
> >    cfe/trunk/lib/Format/Format.cpp
> >    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
> >    cfe/trunk/lib/Format/UnwrappedLineParser.h
> >    cfe/trunk/unittests/Format/FormatTest.cpp
> >
> > Modified: cfe/trunk/lib/Format/Format.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=171640&r1=171639&r2=171640&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/lib/Format/Format.cpp (original)
> > +++ cfe/trunk/lib/Format/Format.cpp Sat Jan  5 16:56:06 2013
> > @@ -483,8 +483,7 @@
> >
> >     unsigned Newlines =
> >         std::min(Token.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
> > -    unsigned Offset = SourceMgr.getFileOffset(Token.WhiteSpaceStart);
> > -    if (Newlines == 0 && Offset != 0)
> > +    if (Newlines == 0 && !Token.IsFirst)
> >       Newlines = 1;
> >     unsigned Indent = Line.Level * 2;
> >     if ((Token.Tok.is(tok::kw_public) || Token.Tok.is(tok::kw_protected)
> ||
> > @@ -685,9 +684,10 @@
> >       next();
> >       if (Index >= Tokens.size())
> >         return;
> > -      // It is the responsibility of the UnwrappedLineParser to make
> sure
> > -      // this sequence is not produced inside an unwrapped line.
> > -      assert(Tokens[Index].Tok.getIdentifierInfo() != NULL);
> > +      // Hashes in the middle of a line can lead to any strange token
> > +      // sequence.
> > +      if (Tokens[Index].Tok.getIdentifierInfo() == NULL)
> > +        return;
> >       switch (Tokens[Index].Tok.getIdentifierInfo()->getPPKeywordID()) {
> >       case tok::pp_include:
> >       case tok::pp_import:
> > @@ -1033,6 +1033,8 @@
> >     Lex.LexFromRawLexer(FormatTok.Tok);
> >     StringRef Text = tokenText(FormatTok.Tok);
> >     FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
> > +    if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
> > +      FormatTok.IsFirst = true;
> >
> >     // Consume and record whitespace until we find a significant token.
> >     while (FormatTok.Tok.is(tok::unknown)) {
> >
> > Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=171640&r1=171639&r2=171640&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
> > +++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Sat Jan  5 16:56:06 2013
> > @@ -470,7 +470,9 @@
> >
> > void UnwrappedLineParser::readToken() {
> >   FormatTok = Tokens->getNextToken();
> > -  while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash)) {
> > +  while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) &&
> > +         ((FormatTok.NewlinesBefore > 0 &&
> FormatTok.HasUnescapedNewline) ||
> > +          FormatTok.IsFirst)) {
> >     // FIXME: This is incorrect - the correct way is to create a
> >     // data structure that will construct the parts around the
> preprocessor
> >     // directive as a structured \c UnwrappedLine.
> >
> > Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=171640&r1=171639&r2=171640&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
> > +++ cfe/trunk/lib/Format/UnwrappedLineParser.h Sat Jan  5 16:56:06 2013
> > @@ -31,7 +31,8 @@
> > /// whitespace characters preceeding it.
> > struct FormatToken {
> >   FormatToken()
> > -      : NewlinesBefore(0), HasUnescapedNewline(false),
> WhiteSpaceLength(0) {
> > +      : NewlinesBefore(0), HasUnescapedNewline(false),
> WhiteSpaceLength(0),
> > +        IsFirst(false) {
> >   }
> >
> >   /// \brief The \c Token.
> > @@ -56,6 +57,9 @@
> >   /// \brief The length in characters of the whitespace immediately
> preceeding
> >   /// the \c Token.
> >   unsigned WhiteSpaceLength;
> > +
> > +  /// \brief Indicates that this is the first token.
> > +  bool IsFirst;
> > };
> >
> > /// \brief An unwrapped line is a sequence of \c Token, that we would
> like to
> >
> > Modified: cfe/trunk/unittests/Format/FormatTest.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=171640&r1=171639&r2=171640&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
> > +++ cfe/trunk/unittests/Format/FormatTest.cpp Sat Jan  5 16:56:06 2013
> > @@ -470,6 +470,10 @@
> >   EXPECT_EQ("{\n  {\n#define A\n  }\n}", format("{{\n#define A\n}}"));
> > }
> >
> > +TEST_F(FormatTest, FormatHashIfNotAtStartOfLine) {
> > +  verifyFormat("{\n  {\n    a #c;\n  }\n}");
> > +}
> > +
> > // FIXME: write test for unbalanced braces in macros...
> > // FIXME: test # inside a normal statement (like {#define A b})
> >
> >
> >
> > _______________________________________________
> > cfe-commits mailing list
> > cfe-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130107/006a27b7/attachment.html>