r191614 - Refactor comment merging.

Benjamin Kramer benny.kra at googlemail.com
Sat Sep 28 08:06:27 PDT 2013


Author: d0k
Date: Sat Sep 28 10:06:27 2013
New Revision: 191614

URL: http://llvm.org/viewvc/llvm-project?rev=191614&view=rev
Log:
Refactor comment merging.

- We scan for whitespace between comments anyways, remember any newlines seen
  along the way.
- Use this newline number to decide whether two comments are adjacent.
- Since the newline check is now free remove the caching and unused code.
- Remove unnecessary boolean state from the comment list.
- No behavioral change.

Modified:
    cfe/trunk/include/clang/AST/RawCommentList.h
    cfe/trunk/lib/AST/RawCommentList.cpp

Modified: cfe/trunk/include/clang/AST/RawCommentList.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/RawCommentList.h?rev=191614&r1=191613&r2=191614&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/RawCommentList.h (original)
+++ cfe/trunk/include/clang/AST/RawCommentList.h Sat Sep 28 10:06:27 2013
@@ -107,12 +107,9 @@ public:
     return RawText;
   }
 
-  SourceRange getSourceRange() const LLVM_READONLY {
-    return Range;
-  }
-
-  unsigned getBeginLine(const SourceManager &SM) const;
-  unsigned getEndLine(const SourceManager &SM) const;
+  SourceRange getSourceRange() const LLVM_READONLY { return Range; }
+  SourceLocation getLocStart() const LLVM_READONLY { return Range.getBegin(); }
+  SourceLocation getLocEnd() const LLVM_READONLY { return Range.getEnd(); }
 
   const char *getBriefText(const ASTContext &Context) const {
     if (BriefTextValid)
@@ -146,11 +143,6 @@ private:
   /// considered as documentation comments.
   bool ParseAllComments : 1;
 
-  mutable bool BeginLineValid : 1; ///< True if BeginLine is valid
-  mutable bool EndLineValid : 1;   ///< True if EndLine is valid
-  mutable unsigned BeginLine;      ///< Cached line number
-  mutable unsigned EndLine;        ///< Cached line number
-
   /// \brief Constructor for AST deserialization.
   RawComment(SourceRange SR, CommentKind K, bool IsTrailingComment,
              bool IsAlmostTrailingComment,
@@ -158,8 +150,7 @@ private:
     Range(SR), RawTextValid(false), BriefTextValid(false), Kind(K),
     IsAttached(false), IsTrailingComment(IsTrailingComment),
     IsAlmostTrailingComment(IsAlmostTrailingComment),
-    ParseAllComments(ParseAllComments),
-    BeginLineValid(false), EndLineValid(false)
+    ParseAllComments(ParseAllComments)
   { }
 
   StringRef getRawTextSlow(const SourceManager &SourceMgr) const;
@@ -178,8 +169,7 @@ public:
   explicit BeforeThanCompare(const SourceManager &SM) : SM(SM) { }
 
   bool operator()(const RawComment &LHS, const RawComment &RHS) {
-    return SM.isBeforeInTranslationUnit(LHS.getSourceRange().getBegin(),
-                                        RHS.getSourceRange().getBegin());
+    return SM.isBeforeInTranslationUnit(LHS.getLocStart(), RHS.getLocStart());
   }
 
   bool operator()(const RawComment *LHS, const RawComment *RHS) {
@@ -191,8 +181,7 @@ public:
 /// sorted in order of appearance in the translation unit.
 class RawCommentList {
 public:
-  RawCommentList(SourceManager &SourceMgr) :
-    SourceMgr(SourceMgr), OnlyWhitespaceSeen(true) { }
+  RawCommentList(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
 
   void addComment(const RawComment &RC, llvm::BumpPtrAllocator &Allocator);
 
@@ -203,15 +192,9 @@ public:
 private:
   SourceManager &SourceMgr;
   std::vector<RawComment *> Comments;
-  SourceLocation PrevCommentEndLoc;
-  bool OnlyWhitespaceSeen;
 
   void addCommentsToFront(const std::vector<RawComment *> &C) {
-    size_t OldSize = Comments.size();
-    Comments.resize(C.size() + OldSize);
-    std::copy_backward(Comments.begin(), Comments.begin() + OldSize,
-                       Comments.end());
-    std::copy(C.begin(), C.end(), Comments.begin());
+    Comments.insert(Comments.begin(), C.begin(), C.end());
   }
 
   friend class ASTReader;

Modified: cfe/trunk/lib/AST/RawCommentList.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/RawCommentList.cpp?rev=191614&r1=191613&r2=191614&view=diff
==============================================================================
--- cfe/trunk/lib/AST/RawCommentList.cpp (original)
+++ cfe/trunk/lib/AST/RawCommentList.cpp Sat Sep 28 10:06:27 2013
@@ -68,8 +68,7 @@ RawComment::RawComment(const SourceManag
                        bool Merged, bool ParseAllComments) :
     Range(SR), RawTextValid(false), BriefTextValid(false),
     IsAttached(false), IsAlmostTrailingComment(false),
-    ParseAllComments(ParseAllComments),
-    BeginLineValid(false), EndLineValid(false) {
+    ParseAllComments(ParseAllComments) {
   // Extract raw comment text, if possible.
   if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
     Kind = RCK_Invalid;
@@ -90,26 +89,6 @@ RawComment::RawComment(const SourceManag
   }
 }
 
-unsigned RawComment::getBeginLine(const SourceManager &SM) const {
-  if (BeginLineValid)
-    return BeginLine;
-
-  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
-  BeginLine = SM.getLineNumber(LocInfo.first, LocInfo.second);
-  BeginLineValid = true;
-  return BeginLine;
-}
-
-unsigned RawComment::getEndLine(const SourceManager &SM) const {
-  if (EndLineValid)
-    return EndLine;
-
-  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getEnd());
-  EndLine = SM.getLineNumber(LocInfo.first, LocInfo.second);
-  EndLineValid = true;
-  return EndLine;
-}
-
 StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
   FileID BeginFileID;
   FileID EndFileID;
@@ -184,13 +163,9 @@ comments::FullComment *RawComment::parse
   return P.parseFullComment();
 }
 
-namespace {
-bool containsOnlyWhitespace(StringRef Str) {
-  return Str.find_first_not_of(" \t\f\v\r\n") == StringRef::npos;
-}
-
-bool onlyWhitespaceBetween(SourceManager &SM,
-                           SourceLocation Loc1, SourceLocation Loc2) {
+static bool onlyWhitespaceBetween(SourceManager &SM,
+                                  SourceLocation Loc1, SourceLocation Loc2,
+                                  unsigned MaxNewlinesAllowed) {
   std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
   std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
 
@@ -203,10 +178,38 @@ bool onlyWhitespaceBetween(SourceManager
   if (Invalid)
     return false;
 
-  StringRef Text(Buffer + Loc1Info.second, Loc2Info.second - Loc1Info.second);
-  return containsOnlyWhitespace(Text);
+  unsigned NumNewlines = 0;
+  assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
+  // Look for non-whitespace characters and remember any newlines seen.
+  for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
+    switch (Buffer[I]) {
+    default:
+      return false;
+    case ' ':
+    case '\t':
+    case '\f':
+    case '\v':
+      break;
+    case '\r':
+    case '\n':
+      ++NumNewlines;
+
+      // Check if we have found more than the maximum allowed number of
+      // newlines.
+      if (NumNewlines > MaxNewlinesAllowed)
+        return false;
+
+      // Collapse \r\n and \n\r into a single newline.
+      if (I + 1 != Loc2Info.second &&
+          (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
+          Buffer[I] != Buffer[I + 1])
+        ++I;
+      break;
+    }
+  }
+
+  return true;
 }
-} // unnamed namespace
 
 void RawCommentList::addComment(const RawComment &RC,
                                 llvm::BumpPtrAllocator &Allocator) {
@@ -215,23 +218,13 @@ void RawCommentList::addComment(const Ra
 
   // Check if the comments are not in source order.
   while (!Comments.empty() &&
-         !SourceMgr.isBeforeInTranslationUnit(
-              Comments.back()->getSourceRange().getBegin(),
-              RC.getSourceRange().getBegin())) {
+         !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getLocStart(),
+                                              RC.getLocStart())) {
     // If they are, just pop a few last comments that don't fit.
     // This happens if an \#include directive contains comments.
     Comments.pop_back();
   }
 
-  if (OnlyWhitespaceSeen) {
-    if (!onlyWhitespaceBetween(SourceMgr,
-                               PrevCommentEndLoc,
-                               RC.getSourceRange().getBegin()))
-      OnlyWhitespaceSeen = false;
-  }
-
-  PrevCommentEndLoc = RC.getSourceRange().getEnd();
-
   // Ordinary comments are not interesting for us.
   if (RC.isOrdinary())
     return;
@@ -240,7 +233,6 @@ void RawCommentList::addComment(const Ra
   // anything to merge it with).
   if (Comments.empty()) {
     Comments.push_back(new (Allocator) RawComment(RC));
-    OnlyWhitespaceSeen = true;
     return;
   }
 
@@ -250,21 +242,13 @@ void RawCommentList::addComment(const Ra
   // Merge comments only if there is only whitespace between them.
   // Can't merge trailing and non-trailing comments.
   // Merge comments if they are on same or consecutive lines.
-  bool Merged = false;
-  if (OnlyWhitespaceSeen &&
-      (C1.isTrailingComment() == C2.isTrailingComment())) {
-    unsigned C1EndLine = C1.getEndLine(SourceMgr);
-    unsigned C2BeginLine = C2.getBeginLine(SourceMgr);
-    if (C1EndLine + 1 == C2BeginLine || C1EndLine == C2BeginLine) {
-      SourceRange MergedRange(C1.getSourceRange().getBegin(),
-                              C2.getSourceRange().getEnd());
-      *Comments.back() = RawComment(SourceMgr, MergedRange, true,
-                                    RC.isParseAllComments());
-      Merged = true;
-    }
-  }
-  if (!Merged)
+  if (C1.isTrailingComment() == C2.isTrailingComment() &&
+      onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(),
+                            /*MaxNewlinesAllowed=*/1)) {
+    SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd());
+    *Comments.back() = RawComment(SourceMgr, MergedRange, true,
+                                  RC.isParseAllComments());
+  } else {
     Comments.push_back(new (Allocator) RawComment(RC));
-
-  OnlyWhitespaceSeen = true;
+  }
 }





More information about the cfe-commits mailing list