r242317 - Allow any comment to be a trailing comment when -fparse-all-comments is on.

James Dennett jdennett at google.com
Wed Jul 15 12:13:39 PDT 2015


Author: jdennett
Date: Wed Jul 15 14:13:39 2015
New Revision: 242317

URL: http://llvm.org/viewvc/llvm-project?rev=242317&view=rev
Log:
Allow any comment to be a trailing comment when -fparse-all-comments is on.

This helps with freeform documentation styles, where otherwise code like
  enum class E {
    E1,  // D1
    E2   // D2
  };
would result in D1 being associated with E2. To properly associate E1
with D1 and E2 with D2, this patch allows all raw comments C such that
C.isParseAllComments() to participate in trailing comment checks inside
getRawCommentForDeclNoCache. This takes care of linking the intended
documentation with the intended decls. There remains an issue with code
like:
  foo();  // DN
  int x;
To prevent DN from being associated with x, this patch adds a new test
on preceding-line comments C (where C.isParseAllComments() and also
C's kind is RCK_OrdinaryBCPL or RCK_OrdinaryC) that checks whether C
is the first non-whitespace thing on C's starting line.

Patch from Luke Zarko <zarko at google.com>, D11069 reviewed by rsmith.

Modified:
    cfe/trunk/lib/AST/RawCommentList.cpp
    cfe/trunk/test/Index/parse-all-comments.c

Modified: cfe/trunk/lib/AST/RawCommentList.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/RawCommentList.cpp?rev=242317&r1=242316&r2=242317&view=diff
==============================================================================
--- cfe/trunk/lib/AST/RawCommentList.cpp (original)
+++ cfe/trunk/lib/AST/RawCommentList.cpp Wed Jul 15 14:13:39 2015
@@ -15,6 +15,7 @@
 #include "clang/AST/CommentLexer.h"
 #include "clang/AST/CommentParser.h"
 #include "clang/AST/CommentSema.h"
+#include "clang/Basic/CharInfo.h"
 #include "llvm/ADT/STLExtras.h"
 
 using namespace clang;
@@ -62,12 +63,53 @@ std::pair<RawComment::CommentKind, bool>
 bool mergedCommentIsTrailingComment(StringRef Comment) {
   return (Comment.size() > 3) && (Comment[3] == '<');
 }
+
+/// Returns true if R1 and R2 both have valid locations that start on the same
+/// column.
+bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
+                               const RawComment &R2) {
+  SourceLocation L1 = R1.getLocStart();
+  SourceLocation L2 = R2.getLocStart();
+  bool Invalid = false;
+  unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
+  if (!Invalid) {
+    unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
+    return !Invalid && (C1 == C2);
+  }
+  return false;
+}
 } // unnamed namespace
 
+/// \brief Determines whether there is only whitespace in `Buffer` between `P`
+/// and the previous line.
+/// \param Buffer The buffer to search in.
+/// \param P The offset from the beginning of `Buffer` to start from.
+/// \return true if all of the characters in `Buffer` ranging from the closest
+/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
+/// are whitespace.
+static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
+  // Search backwards until we see linefeed or carriage return.
+  for (unsigned I = P; I != 0; --I) {
+    char C = Buffer[I - 1];
+    if (isVerticalWhitespace(C))
+      return true;
+    if (!isHorizontalWhitespace(C))
+      return false;
+  }
+  // We hit the beginning of the buffer.
+  return true;
+}
+
+/// Returns whether `K` is an ordinary comment kind.
+static bool isOrdinaryKind(RawComment::CommentKind K) {
+  return (K == RawComment::RCK_OrdinaryBCPL) ||
+         (K == RawComment::RCK_OrdinaryC);
+}
+
 RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
                        bool Merged, bool ParseAllComments) :
     Range(SR), RawTextValid(false), BriefTextValid(false),
-    IsAttached(false), IsAlmostTrailingComment(false),
+    IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false),
     ParseAllComments(ParseAllComments) {
   // Extract raw comment text, if possible.
   if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
@@ -75,17 +117,34 @@ RawComment::RawComment(const SourceManag
     return;
   }
 
+  // Guess comment kind.
+  std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments);
+
+  // Guess whether an ordinary comment is trailing.
+  if (ParseAllComments && isOrdinaryKind(K.first)) {
+    FileID BeginFileID;
+    unsigned BeginOffset;
+    std::tie(BeginFileID, BeginOffset) =
+        SourceMgr.getDecomposedLoc(Range.getBegin());
+    if (BeginOffset != 0) {
+      bool Invalid = false;
+      const char *Buffer =
+          SourceMgr.getBufferData(BeginFileID, &Invalid).data();
+      IsTrailingComment |=
+          (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
+    }
+  }
+
   if (!Merged) {
-    // Guess comment kind.
-    std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments);
     Kind = K.first;
-    IsTrailingComment = K.second;
+    IsTrailingComment |= K.second;
 
     IsAlmostTrailingComment = RawText.startswith("//<") ||
                                  RawText.startswith("/*<");
   } else {
     Kind = RCK_Merged;
-    IsTrailingComment = mergedCommentIsTrailingComment(RawText);
+    IsTrailingComment =
+        IsTrailingComment || mergedCommentIsTrailingComment(RawText);
   }
 }
 
@@ -239,9 +298,22 @@ void RawCommentList::addComment(const Ra
   const RawComment &C2 = RC;
 
   // Merge comments only if there is only whitespace between them.
-  // Can't merge trailing and non-trailing comments.
+  // Can't merge trailing and non-trailing comments unless the second is
+  // non-trailing ordinary in the same column, as in the case:
+  //   int x; // documents x
+  //          // more text
+  // versus:
+  //   int x; // documents x
+  //   int y; // documents y
+  // or:
+  //   int x; // documents x
+  //   // documents y
+  //   int y;
   // Merge comments if they are on same or consecutive lines.
-  if (C1.isTrailingComment() == C2.isTrailingComment() &&
+  if ((C1.isTrailingComment() == C2.isTrailingComment() ||
+       (C1.isTrailingComment() && !C2.isTrailingComment() &&
+        isOrdinaryKind(C2.getKind()) &&
+        commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
       onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(),
                             /*MaxNewlinesAllowed=*/1)) {
     SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd());

Modified: cfe/trunk/test/Index/parse-all-comments.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Index/parse-all-comments.c?rev=242317&r1=242316&r2=242317&view=diff
==============================================================================
--- cfe/trunk/test/Index/parse-all-comments.c (original)
+++ cfe/trunk/test/Index/parse-all-comments.c Wed Jul 15 14:13:39 2015
@@ -33,6 +33,41 @@ void multi_line_comment_plus_ordinary(in
 // WITH EMPTY LINE
 void multi_line_comment_empty_line(int);
 
+int notdoxy7; // Not a Doxygen juxtaposed comment.  notdoxy7 NOT_DOXYGEN
+int notdoxy8; // Not a Doxygen juxtaposed comment.  notdoxy8 NOT_DOXYGEN
+
+int trdoxy9;  /// A Doxygen non-trailing comment.  trdoxyA IS_DOXYGEN_SINGLE
+int trdoxyA;
+
+int trdoxyB;  // Not a Doxygen trailing comment.  PART_ONE
+              // It's a multiline one too.  trdoxyB NOT_DOXYGEN
+int trdoxyC;
+
+int trdoxyD;  // Not a Doxygen trailing comment.   trdoxyD NOT_DOXYGEN
+              /// This comment doesn't get merged.   trdoxyE IS_DOXYGEN
+int trdoxyE;
+
+int trdoxyF;  /// A Doxygen non-trailing comment that gets dropped on the floor.
+              // This comment will also be dropped.
+int trdoxyG;  // This one won't.  trdoxyG NOT_DOXYGEN
+
+int trdoxyH;  ///< A Doxygen trailing comment.  PART_ONE
+              // This one gets merged with it.  trdoxyH SOME_DOXYGEN
+int trdoxyI;  // This one doesn't.  trdoxyI NOT_DOXYGEN
+
+int trdoxyJ;  // Not a Doxygen trailing comment.  PART_ONE
+              ///< This one gets merged with it.  trdoxyJ SOME_DOXYGEN
+int trdoxyK;  // This one doesn't.  trdoxyK NOT_DOXYGEN
+
+int trdoxyL;  // Not a Doxygen trailing comment.  trdoxyL NOT_DOXYGEN
+// This one shouldn't get merged.  trdoxyM NOT_DOXYGEN
+int trdoxyM;
+
+int trdoxyN;  ///< A Doxygen trailing comment.  trdoxyN IS_DOXYGEN
+  // This one shouldn't get merged.  trdoxyO NOT_DOXYGEN
+int trdoxyO;
+
+
 #endif
 
 // RUN: rm -rf %t
@@ -60,3 +95,21 @@ void multi_line_comment_empty_line(int);
 // CHECK: parse-all-comments.c:22:6: FunctionDecl=isdoxy6:{{.*}} isdoxy6 IS_DOXYGEN_SINGLE
 // CHECK: parse-all-comments.c:29:6: FunctionDecl=multi_line_comment_plus_ordinary:{{.*}} BLOCK_ORDINARY_COMMENT {{.*}} ORDINARY COMMENT {{.*}} IS_DOXYGEN_START {{.*}} IS_DOXYGEN_END
 // CHECK: parse-all-comments.c:34:6: FunctionDecl=multi_line_comment_empty_line:{{.*}} MULTILINE COMMENT{{.*}}\n{{.*}}\n{{.*}} WITH EMPTY LINE
+// CHECK: parse-all-comments.c:36:5: VarDecl=notdoxy7:{{.*}} notdoxy7 NOT_DOXYGEN
+// CHECK: parse-all-comments.c:37:5: VarDecl=notdoxy8:{{.*}} notdoxy8 NOT_DOXYGEN
+// CHECK-NOT: parse-all-comments.c:39:5: VarDecl=trdoxy9:{{.*}} trdoxyA IS_DOXYGEN_SINGLE
+// CHECK: parse-all-comments.c:40:5: VarDecl=trdoxyA:{{.*}} trdoxyA IS_DOXYGEN_SINGLE
+// CHECK: parse-all-comments.c:42:5: VarDecl=trdoxyB:{{.*}} PART_ONE {{.*}} trdoxyB NOT_DOXYGEN
+// CHECK-NOT: parse-all-comments.c:44:5: VarDecl=trdoxyC:{{.*}} trdoxyB NOT_DOXYGEN
+// CHECK: parse-all-comments.c:46:5: VarDecl=trdoxyD:{{.*}} trdoxyD NOT_DOXYGEN
+// CHECK: parse-all-comments.c:48:5: VarDecl=trdoxyE:{{.*}} trdoxyE IS_DOXYGEN
+// CHECK-NOT: parse-all-comments.c:50:5: VarDecl=trdoxyF:{{.*}} RawComment
+// CHECK: parse-all-comments.c:52:5: VarDecl=trdoxyG:{{.*}} trdoxyG NOT_DOXYGEN
+// CHECK: parse-all-comments.c:54:5: VarDecl=trdoxyH:{{.*}} PART_ONE {{.*}} trdoxyH SOME_DOXYGEN
+// CHECK: parse-all-comments.c:56:5: VarDecl=trdoxyI:{{.*}} trdoxyI NOT_DOXYGEN
+// CHECK: parse-all-comments.c:58:5: VarDecl=trdoxyJ:{{.*}} PART_ONE {{.*}} trdoxyJ SOME_DOXYGEN
+// CHECK: parse-all-comments.c:60:5: VarDecl=trdoxyK:{{.*}} trdoxyK NOT_DOXYGEN
+// CHECK: parse-all-comments.c:62:5: VarDecl=trdoxyL:{{.*}} trdoxyL NOT_DOXYGEN
+// CHECK: parse-all-comments.c:64:5: VarDecl=trdoxyM:{{.*}} trdoxyM NOT_DOXYGEN
+// CHECK: parse-all-comments.c:66:5: VarDecl=trdoxyN:{{.*}} trdoxyN IS_DOXYGEN
+// CHECK: parse-all-comments.c:68:5: VarDecl=trdoxyO:{{.*}} trdoxyO NOT_DOXYGEN





More information about the cfe-commits mailing list