[clang] 38b4516 - [libTooling] Add function to determine associated text of a declaration.

Wed Feb 26 06:58:10 PST 2020

Author: Yitzhak Mandelbaum
Date: 2020-02-26T09:56:48-05:00
New Revision: 38b4516de8a4a791d17085d37f95e3cc15c359f9

URL: https://github.com/llvm/llvm-project/commit/38b4516de8a4a791d17085d37f95e3cc15c359f9
DIFF: https://github.com/llvm/llvm-project/commit/38b4516de8a4a791d17085d37f95e3cc15c359f9.diff

LOG: [libTooling] Add function to determine associated text of a declaration.

Summary:
Second attempt -- the first was reverted in commit 0e480b39c66143ad142f9a30d8d40e49d7d7b0ce, because of test breakages. This revision fixes the cause of the test breakages.

Original description follows:
This patch adds `getAssociatedRange` which, for a given decl, computes preceding
and trailing text that would conceptually be associated with the decl by the
reader. This includes comments, whitespace, and separators like ';'.

Reviewers: gribozavr

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D72153

Added: 
    

Modified: 
    clang/include/clang/Tooling/Transformer/SourceCode.h
    clang/lib/Tooling/Transformer/SourceCode.cpp
    clang/unittests/Tooling/SourceCodeTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/Transformer/SourceCode.h b/clang/include/clang/Tooling/Transformer/SourceCode.h
index 1b92a117f44c..2c7eb65371cf 100644

--- a/clang/include/clang/Tooling/Transformer/SourceCode.h
+++ b/clang/include/clang/Tooling/Transformer/SourceCode.h
@@ -20,9 +20,10 @@
 namespace clang {
 namespace tooling {
 
-/// Extends \p Range to include the token \p Next, if it immediately follows the
-/// end of the range. Otherwise, returns \p Range unchanged.
-CharSourceRange maybeExtendRange(CharSourceRange Range, tok::TokenKind Next,
+/// Extends \p Range to include the token \p Terminator, if it immediately
+/// follows the end of the range. Otherwise, returns \p Range unchanged.
+CharSourceRange maybeExtendRange(CharSourceRange Range,
+                                 tok::TokenKind Terminator,
                                  ASTContext &Context);
 
 /// Returns the source range spanning the node, extended to include \p Next, if
@@ -35,6 +36,13 @@ CharSourceRange getExtendedRange(const T &Node, tok::TokenKind Next,
                           Next, Context);
 }
 
+/// Returns the logical source range of the node extended to include associated
+/// comments and whitespace before and after the node, and associated
+/// terminators. The returned range consists of file locations, if valid file
+/// locations can be found for the associated content; otherwise, an invalid
+/// range is returned.
+CharSourceRange getAssociatedRange(const Decl &D, ASTContext &Context);
+
 /// Returns the source-code text in the specified range.
 StringRef getText(CharSourceRange Range, const ASTContext &Context);
 

diff  --git a/clang/lib/Tooling/Transformer/SourceCode.cpp b/clang/lib/Tooling/Transformer/SourceCode.cpp
index 5c1f8b46fe42..8d59487ea7ae 100644
--- a/clang/lib/Tooling/Transformer/SourceCode.cpp
+++ b/clang/lib/Tooling/Transformer/SourceCode.cpp
@@ -10,6 +10,13 @@
 //
 //===----------------------------------------------------------------------===//
 #include "clang/Tooling/Transformer/SourceCode.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/Comment.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/Expr.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/Support/Errc.h"
 
@@ -84,3 +91,302 @@ clang::tooling::getRangeForEdit(const CharSourceRange &EditRange,
   return Range;
 
 }
+
+static bool startsWithNewline(const SourceManager &SM, const Token &Tok) {
+  return isVerticalWhitespace(SM.getCharacterData(Tok.getLocation())[0]);
+}
+
+static bool contains(const std::set<tok::TokenKind> &Terminators,
+                     const Token &Tok) {
+  return Terminators.count(Tok.getKind()) > 0;
+}
+
+// Returns the exclusive, *file* end location of the entity whose last token is
+// at location 'EntityLast'. That is, it returns the location one past the last
+// relevant character.
+//
+// Associated tokens include comments, horizontal whitespace and 'Terminators'
+// -- optional tokens, which, if any are found, will be included; if
+// 'Terminators' is empty, we will not include any extra tokens beyond comments
+// and horizontal whitespace.
+static SourceLocation
+getEntityEndLoc(const SourceManager &SM, SourceLocation EntityLast,
+                const std::set<tok::TokenKind> &Terminators,
+                const LangOptions &LangOpts) {
+  assert(EntityLast.isValid() && "Invalid end location found.");
+
+  // We remember the last location of a non-horizontal-whitespace token we have
+  // lexed; this is the location up to which we will want to delete.
+  // FIXME: Support using the spelling loc here for cases where we want to
+  // analyze the macro text.
+
+  CharSourceRange ExpansionRange = SM.getExpansionRange(EntityLast);
+  // FIXME: Should check isTokenRange(), for the (rare) case that
+  // `ExpansionRange` is a character range.
+  std::unique_ptr<Lexer> Lexer = [&]() {
+    bool Invalid = false;
+    auto FileOffset = SM.getDecomposedLoc(ExpansionRange.getEnd());
+    llvm::StringRef File = SM.getBufferData(FileOffset.first, &Invalid);
+    assert(!Invalid && "Cannot get file/offset");
+    return std::make_unique<clang::Lexer>(
+        SM.getLocForStartOfFile(FileOffset.first), LangOpts, File.begin(),
+        File.data() + FileOffset.second, File.end());
+  }();
+
+  // Tell Lexer to return whitespace as pseudo-tokens (kind is tok::unknown).
+  Lexer->SetKeepWhitespaceMode(true);
+
+  // Generally, the code we want to include looks like this ([] are optional),
+  // If Terminators is empty:
+  //   [ <comment> ] [ <newline> ]
+  // Otherwise:
+  //   ... <terminator> [ <comment> ] [ <newline> ]
+
+  Token Tok;
+  bool Terminated = false;
+
+  // First, lex to the current token (which is the last token of the range that
+  // is definitely associated with the decl). Then, we process the first token
+  // separately from the rest based on conditions that hold specifically for
+  // that first token.
+  //
+  // We do not search for a terminator if none is required or we've already
+  // encountered it. Otherwise, if the original `EntityLast` location was in a
+  // macro expansion, we don't have visibility into the text, so we assume we've
+  // already terminated. However, we note this assumption with
+  // `TerminatedByMacro`, because we'll want to handle it somewhat 
diff erently
+  // for the terminators semicolon and comma. These terminators can be safely
+  // associated with the entity when they appear after the macro -- extra
+  // semicolons have no effect on the program and a well-formed program won't
+  // have multiple commas in a row, so we're guaranteed that there is only one.
+  //
+  // FIXME: This handling of macros is more conservative than necessary. When
+  // the end of the expansion coincides with the end of the node, we can still
+  // safely analyze the code. But, it is more complicated, because we need to
+  // start by lexing the spelling loc for the first token and then switch to the
+  // expansion loc.
+  bool TerminatedByMacro = false;
+  Lexer->LexFromRawLexer(Tok);
+  if (Terminators.empty() || contains(Terminators, Tok))
+    Terminated = true;
+  else if (EntityLast.isMacroID()) {
+    Terminated = true;
+    TerminatedByMacro = true;
+  }
+
+  // We save the most recent candidate for the exclusive end location.
+  SourceLocation End = Tok.getEndLoc();
+
+  while (!Terminated) {
+    // Lex the next token we want to possibly expand the range with.
+    Lexer->LexFromRawLexer(Tok);
+
+    switch (Tok.getKind()) {
+    case tok::eof:
+    // Unexpected separators.
+    case tok::l_brace:
+    case tok::r_brace:
+    case tok::comma:
+      return End;
+    // Whitespace pseudo-tokens.
+    case tok::unknown:
+      if (startsWithNewline(SM, Tok))
+        // Include at least until the end of the line.
+        End = Tok.getEndLoc();
+      break;
+    default:
+      if (contains(Terminators, Tok))
+        Terminated = true;
+      End = Tok.getEndLoc();
+      break;
+    }
+  }
+
+  do {
+    // Lex the next token we want to possibly expand the range with.
+    Lexer->LexFromRawLexer(Tok);
+
+    switch (Tok.getKind()) {
+    case tok::unknown:
+      if (startsWithNewline(SM, Tok))
+        // We're done, but include this newline.
+        return Tok.getEndLoc();
+      break;
+    case tok::comment:
+      // Include any comments we find on the way.
+      End = Tok.getEndLoc();
+      break;
+    case tok::semi:
+    case tok::comma:
+      if (TerminatedByMacro && contains(Terminators, Tok)) {
+        End = Tok.getEndLoc();
+        // We've found a real terminator.
+        TerminatedByMacro = false;
+        break;
+      }
+      // Found an unrelated token; stop and don't include it.
+      return End;
+    default:
+      // Found an unrelated token; stop and don't include it.
+      return End;
+    }
+  } while (true);
+}
+
+// Returns the expected terminator tokens for the given declaration.
+//
+// If we do not know the correct terminator token, returns an empty set.
+//
+// There are cases where we have more than one possible terminator (for example,
+// we find either a comma or a semicolon after a VarDecl).
+static std::set<tok::TokenKind> getTerminators(const Decl &D) {
+  if (llvm::isa<RecordDecl>(D) || llvm::isa<UsingDecl>(D))
+    return {tok::semi};
+
+  if (llvm::isa<FunctionDecl>(D) || llvm::isa<LinkageSpecDecl>(D))
+    return {tok::r_brace, tok::semi};
+
+  if (llvm::isa<VarDecl>(D) || llvm::isa<FieldDecl>(D))
+    return {tok::comma, tok::semi};
+
+  return {};
+}
+
+// Starting from `Loc`, skips whitespace up to, and including, a single
+// newline. Returns the (exclusive) end of any skipped whitespace (that is, the
+// location immediately after the whitespace).
+static SourceLocation skipWhitespaceAndNewline(const SourceManager &SM,
+                                               SourceLocation Loc,
+                                               const LangOptions &LangOpts) {
+  const char *LocChars = SM.getCharacterData(Loc);
+  int i = 0;
+  while (isHorizontalWhitespace(LocChars[i]))
+    ++i;
+  if (isVerticalWhitespace(LocChars[i]))
+    ++i;
+  return Loc.getLocWithOffset(i);
+}
+
+// Is `Loc` separated from any following decl by something meaningful (e.g. an
+// empty line, a comment), ignoring horizontal whitespace?  Since this is a
+// heuristic, we return false when in doubt.  `Loc` cannot be the first location
+// in the file.
+static bool atOrBeforeSeparation(const SourceManager &SM, SourceLocation Loc,
+                                 const LangOptions &LangOpts) {
+  // If the preceding character is a newline, we'll check for an empty line as a
+  // separator. However, we can't identify an empty line using tokens, so we
+  // analyse the characters. If we try to use tokens, we'll just end up with a
+  // whitespace token, whose characters we'd have to analyse anyhow.
+  bool Invalid = false;
+  const char *LocChars =
+      SM.getCharacterData(Loc.getLocWithOffset(-1), &Invalid);
+  assert(!Invalid &&
+         "Loc must be a valid character and not the first of the source file.");
+  if (isVerticalWhitespace(LocChars[0])) {
+    for (int i = 1; isWhitespace(LocChars[i]); ++i)
+      if (isVerticalWhitespace(LocChars[i]))
+        return true;
+  }
+  // We didn't find an empty line, so lex the next token, skipping past any
+  // whitespace we just scanned.
+  Token Tok;
+  bool Failed = Lexer::getRawToken(Loc, Tok, SM, LangOpts,
+                                   /*IgnoreWhiteSpace=*/true);
+  if (Failed)
+    // Any text that confuses the lexer seems fair to consider a separation.
+    return true;
+
+  switch (Tok.getKind()) {
+  case tok::comment:
+  case tok::l_brace:
+  case tok::r_brace:
+  case tok::eof:
+    return true;
+  default:
+    return false;
+  }
+}
+
+CharSourceRange tooling::getAssociatedRange(const Decl &Decl,
+                                            ASTContext &Context) {
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+  CharSourceRange Range = CharSourceRange::getTokenRange(Decl.getSourceRange());
+
+  // First, expand to the start of the template<> declaration if necessary.
+  if (const auto *Record = llvm::dyn_cast<CXXRecordDecl>(&Decl)) {
+    if (const auto *T = Record->getDescribedClassTemplate())
+      if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
+        Range.setBegin(T->getBeginLoc());
+  } else if (const auto *F = llvm::dyn_cast<FunctionDecl>(&Decl)) {
+    if (const auto *T = F->getDescribedFunctionTemplate())
+      if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
+        Range.setBegin(T->getBeginLoc());
+  }
+
+  // Next, expand the end location past trailing comments to include a potential
+  // newline at the end of the decl's line.
+  Range.setEnd(
+      getEntityEndLoc(SM, Decl.getEndLoc(), getTerminators(Decl), LangOpts));
+  Range.setTokenRange(false);
+
+  // Expand to include preceeding associated comments. We ignore any comments
+  // that are not preceeding the decl, since we've already skipped trailing
+  // comments with getEntityEndLoc.
+  if (const RawComment *Comment =
+          Decl.getASTContext().getRawCommentForDeclNoCache(&Decl))
+    // Only include a preceding comment if:
+    // * it is *not* separate from the declaration (not including any newline
+    //   that immediately follows the comment),
+    // * the decl *is* separate from any following entity (so, there are no
+    //   other entities the comment could refer to), and
+    // * it is not a IfThisThenThat lint check.
+    if (SM.isBeforeInTranslationUnit(Comment->getBeginLoc(),
+                                     Range.getBegin()) &&
+        !atOrBeforeSeparation(
+            SM, skipWhitespaceAndNewline(SM, Comment->getEndLoc(), LangOpts),
+            LangOpts) &&
+        atOrBeforeSeparation(SM, Range.getEnd(), LangOpts)) {
+      const StringRef CommentText = Comment->getRawText(SM);
+      if (!CommentText.contains("LINT.IfChange") &&
+          !CommentText.contains("LINT.ThenChange"))
+        Range.setBegin(Comment->getBeginLoc());
+    }
+  // Add leading attributes.
+  for (auto *Attr : Decl.attrs()) {
+    if (Attr->getLocation().isInvalid() ||
+        !SM.isBeforeInTranslationUnit(Attr->getLocation(), Range.getBegin()))
+      continue;
+    Range.setBegin(Attr->getLocation());
+
+    // Extend to the left '[[' or '__attribute((' if we saw the attribute,
+    // unless it is not a valid location.
+    bool Invalid;
+    StringRef Source =
+        SM.getBufferData(SM.getFileID(Range.getBegin()), &Invalid);
+    if (Invalid)
+      continue;
+    llvm::StringRef BeforeAttr =
+        Source.substr(0, SM.getFileOffset(Range.getBegin()));
+    llvm::StringRef BeforeAttrStripped = BeforeAttr.rtrim();
+
+    for (llvm::StringRef Prefix : {"[[", "__attribute__(("}) {
+      // Handle whitespace between attribute prefix and attribute value.
+      if (BeforeAttrStripped.endswith(Prefix)) {
+        // Move start to start position of prefix, which is
+        // length(BeforeAttr) - length(BeforeAttrStripped) + length(Prefix)
+        // positions to the left.
+        Range.setBegin(Range.getBegin().getLocWithOffset(static_cast<int>(
+            -BeforeAttr.size() + BeforeAttrStripped.size() - Prefix.size())));
+        break;
+        // If we didn't see '[[' or '__attribute' it's probably coming from a
+        // macro expansion which is already handled by makeFileCharRange(),
+        // below.
+      }
+    }
+  }
+
+  // Range.getEnd() is already fully un-expanded by getEntityEndLoc. But,
+  // Range.getBegin() may be inside an expansion.
+  return Lexer::makeFileCharRange(Range, SM, LangOpts);
+}

diff  --git a/clang/unittests/Tooling/SourceCodeTest.cpp b/clang/unittests/Tooling/SourceCodeTest.cpp
index 9e0d2e2c4274..aeb0ca36582b 100644
--- a/clang/unittests/Tooling/SourceCodeTest.cpp
+++ b/clang/unittests/Tooling/SourceCodeTest.cpp
@@ -20,6 +20,7 @@ using namespace clang;
 using llvm::Failed;
 using llvm::Succeeded;
 using llvm::ValueIs;
+using tooling::getAssociatedRange;
 using tooling::getExtendedText;
 using tooling::getRangeForEdit;
 using tooling::getText;
@@ -51,6 +52,28 @@ MATCHER_P(EqualsRange, R, "") {
          arg.getBegin() == R.getBegin() && arg.getEnd() == R.getEnd();
 }
 
+MATCHER_P2(EqualsAnnotatedRange, SM, R, "") {
+  if (arg.getBegin().isMacroID()) {
+    *result_listener << "which starts in a macro";
+    return false;
+  }
+  if (arg.getEnd().isMacroID()) {
+    *result_listener << "which ends in a macro";
+    return false;
+  }
+
+  unsigned Begin = SM->getFileOffset(arg.getBegin());
+  unsigned End = SM->getFileOffset(arg.getEnd());
+
+  *result_listener << "which is [" << Begin << ",";
+  if (arg.isTokenRange()) {
+    *result_listener << End << "]";
+    return Begin == R.Begin && End + 1 == R.End;
+  }
+  *result_listener << End << ")";
+  return Begin == R.Begin && End == R.End;
+}
+
 static ::testing::Matcher<CharSourceRange> AsRange(const SourceManager &SM,
                                                    llvm::Annotations::Range R) {
   return EqualsRange(CharSourceRange::getCharRange(
@@ -58,6 +81,40 @@ static ::testing::Matcher<CharSourceRange> AsRange(const SourceManager &SM,
       SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(R.End)));
 }
 
+// Base class for visitors that expect a single match corresponding to a
+// specific annotated range.
+template <typename T> class AnnotatedCodeVisitor : public TestVisitor<T> {
+  llvm::Annotations Code;
+  int MatchCount = 0;
+
+public:
+  AnnotatedCodeVisitor() : Code("$r[[]]") {}
+  bool VisitDeclHelper(Decl *Decl) {
+    // Only consider explicit declarations.
+    if (Decl->isImplicit())
+      return true;
+
+    ++MatchCount;
+    EXPECT_THAT(getAssociatedRange(*Decl, *this->Context),
+                EqualsAnnotatedRange(&this->Context->getSourceManager(),
+                                     Code.range("r")))
+        << Code.code();
+    return true;
+  }
+
+  bool runOverAnnotated(llvm::StringRef AnnotatedCode,
+                        std::vector<std::string> Args = {}) {
+    Code = llvm::Annotations(AnnotatedCode);
+    MatchCount = 0;
+    Args.push_back("-std=c++11");
+    Args.push_back("-fno-delayed-template-parsing");
+    bool result = tooling::runToolOnCodeWithArgs(this->CreateTestAction(),
+                                                 Code.code(), Args);
+    EXPECT_EQ(MatchCount, 1) << AnnotatedCode;
+    return result;
+  }
+};
+
 TEST(SourceCodeTest, getText) {
   CallsVisitor Visitor;
 
@@ -126,6 +183,212 @@ TEST(SourceCodeTest, getExtendedText) {
   Visitor.runOver("int foo() { return foo() + 3; }");
 }
 
+TEST(SourceCodeTest, getAssociatedRange) {
+  struct VarDeclsVisitor : AnnotatedCodeVisitor<VarDeclsVisitor> {
+    bool VisitVarDecl(VarDecl *Decl) { return VisitDeclHelper(Decl); }
+  };
+  VarDeclsVisitor Visitor;
+
+  // Includes semicolon.
+  Visitor.runOverAnnotated("$r[[int x = 4;]]");
+
+  // Includes newline and semicolon.
+  Visitor.runOverAnnotated("$r[[int x = 4;\n]]");
+
+  // Includes trailing comments.
+  Visitor.runOverAnnotated("$r[[int x = 4; // Comment\n]]");
+  Visitor.runOverAnnotated("$r[[int x = 4; /* Comment */\n]]");
+
+  // Does *not* include trailing comments when another entity appears between
+  // the decl and the comment.
+  Visitor.runOverAnnotated("$r[[int x = 4;]] class C {}; // Comment\n");
+
+  // Includes attributes.
+  Visitor.runOverAnnotated(R"cpp(
+      #define ATTR __attribute__((deprecated("message")))
+      $r[[ATTR
+      int x;]])cpp");
+
+  // Includes attributes and comments together.
+  Visitor.runOverAnnotated(R"cpp(
+      #define ATTR __attribute__((deprecated("message")))
+      $r[[ATTR
+      // Commment.
+      int x;]])cpp");
+}
+
+TEST(SourceCodeTest, getAssociatedRangeClasses) {
+  struct RecordDeclsVisitor : AnnotatedCodeVisitor<RecordDeclsVisitor> {
+    bool VisitRecordDecl(RecordDecl *Decl) { return VisitDeclHelper(Decl); }
+  };
+  RecordDeclsVisitor Visitor;
+
+  Visitor.runOverAnnotated("$r[[class A;]]");
+  Visitor.runOverAnnotated("$r[[class A {};]]");
+
+  // Includes leading template annotation.
+  Visitor.runOverAnnotated("$r[[template <typename T> class A;]]");
+  Visitor.runOverAnnotated("$r[[template <typename T> class A {};]]");
+}
+
+TEST(SourceCodeTest, getAssociatedRangeClassTemplateSpecializations) {
+  struct CXXRecordDeclsVisitor : AnnotatedCodeVisitor<CXXRecordDeclsVisitor> {
+    bool VisitCXXRecordDecl(CXXRecordDecl *Decl) {
+      return Decl->getTemplateSpecializationKind() !=
+                 TSK_ExplicitSpecialization ||
+             VisitDeclHelper(Decl);
+    }
+  };
+  CXXRecordDeclsVisitor Visitor;
+
+  Visitor.runOverAnnotated(R"cpp(
+      template <typename T> class A{};
+      $r[[template <> class A<int>;]])cpp");
+  Visitor.runOverAnnotated(R"cpp(
+      template <typename T> class A{};
+      $r[[template <> class A<int> {};]])cpp");
+}
+
+TEST(SourceCodeTest, getAssociatedRangeFunctions) {
+  struct FunctionDeclsVisitor : AnnotatedCodeVisitor<FunctionDeclsVisitor> {
+    bool VisitFunctionDecl(FunctionDecl *Decl) { return VisitDeclHelper(Decl); }
+  };
+  FunctionDeclsVisitor Visitor;
+
+  Visitor.runOverAnnotated("$r[[int f();]]");
+  Visitor.runOverAnnotated("$r[[int f() { return 0; }]]");
+  // Includes leading template annotation.
+  Visitor.runOverAnnotated("$r[[template <typename T> int f();]]");
+  Visitor.runOverAnnotated("$r[[template <typename T> int f() { return 0; }]]");
+}
+
+TEST(SourceCodeTest, getAssociatedRangeMemberTemplates) {
+  struct CXXMethodDeclsVisitor : AnnotatedCodeVisitor<CXXMethodDeclsVisitor> {
+    bool VisitCXXMethodDecl(CXXMethodDecl *Decl) {
+      // Only consider the definition of the template.
+      return !Decl->doesThisDeclarationHaveABody() || VisitDeclHelper(Decl);
+    }
+  };
+  CXXMethodDeclsVisitor Visitor;
+
+  Visitor.runOverAnnotated(R"cpp(
+      template <typename C>
+      struct A { template <typename T> int member(T v); };
+
+      $r[[template <typename C>
+      template  <typename T>
+      int A<C>::member(T v) { return 0; }]])cpp");
+}
+
+TEST(SourceCodeTest, getAssociatedRangeWithComments) {
+  struct VarDeclsVisitor : AnnotatedCodeVisitor<VarDeclsVisitor> {
+    bool VisitVarDecl(VarDecl *Decl) { return VisitDeclHelper(Decl); }
+  };
+
+  VarDeclsVisitor Visitor;
+  auto Visit = [&](llvm::StringRef AnnotatedCode) {
+    Visitor.runOverAnnotated(AnnotatedCode, {"-fparse-all-comments"});
+  };
+
+  // Includes leading comments.
+  Visit("$r[[// Comment.\nint x = 4;]]");
+  Visit("$r[[// Comment.\nint x = 4;\n]]");
+  Visit("$r[[/* Comment.*/\nint x = 4;\n]]");
+  // ... even if separated by (extra) horizontal whitespace.
+  Visit("$r[[/* Comment.*/  \nint x = 4;\n]]");
+
+  // Includes comments even in the presence of trailing whitespace.
+  Visit("$r[[// Comment.\nint x = 4;]]  ");
+
+  // Includes comments when the declaration is followed by the beginning or end
+  // of a compound statement.
+  Visit(R"cpp(
+  void foo() {
+    $r[[/* C */
+    int x = 4;
+  ]]};)cpp");
+  Visit(R"cpp(
+  void foo() {
+    $r[[/* C */
+    int x = 4;
+   ]]{ class Foo {}; }
+   })cpp");
+
+  // Includes comments inside macros (when decl is in the same macro).
+  Visit(R"cpp(
+      #define DECL /* Comment */ int x
+      $r[[DECL;]])cpp");
+
+  // Does not include comments when only the decl or the comment come from a
+  // macro.
+  // FIXME: Change code to allow this.
+  Visit(R"cpp(
+      #define DECL int x
+      // Comment
+      $r[[DECL;]])cpp");
+  Visit(R"cpp(
+      #define COMMENT /* Comment */
+      COMMENT
+      $r[[int x;]])cpp");
+
+  // Includes multi-line comments.
+  Visit(R"cpp(
+      $r[[/* multi
+       * line
+       * comment
+       */
+      int x;]])cpp");
+  Visit(R"cpp(
+      $r[[// multi
+      // line
+      // comment
+      int x;]])cpp");
+
+  // Does not include comments separated by multiple empty lines.
+  Visit("// Comment.\n\n\n$r[[int x = 4;\n]]");
+  Visit("/* Comment.*/\n\n\n$r[[int x = 4;\n]]");
+
+  // Does not include comments before a *series* of declarations.
+  Visit(R"cpp(
+      // Comment.
+      $r[[int x = 4;
+      ]]class foo {};)cpp");
+
+  // Does not include IfThisThenThat comments
+  Visit("// LINT.IfChange.\n$r[[int x = 4;]]");
+  Visit("// LINT.ThenChange.\n$r[[int x = 4;]]");
+
+  // Includes attributes.
+  Visit(R"cpp(
+      #define ATTR __attribute__((deprecated("message")))
+      $r[[ATTR
+      int x;]])cpp");
+
+  // Includes attributes and comments together.
+  Visit(R"cpp(
+      #define ATTR __attribute__((deprecated("message")))
+      $r[[ATTR
+      // Commment.
+      int x;]])cpp");
+}
+
+TEST(SourceCodeTest, getAssociatedRangeInvalidForPartialExpansions) {
+  struct FailingVarDeclsVisitor : TestVisitor<FailingVarDeclsVisitor> {
+    FailingVarDeclsVisitor() {}
+    bool VisitVarDecl(VarDecl *Decl) {
+      EXPECT_TRUE(getAssociatedRange(*Decl, *Context).isInvalid());
+      return true;
+    }
+  };
+
+  FailingVarDeclsVisitor Visitor;
+  // Should fail because it only includes a part of the expansion.
+  std::string Code = R"cpp(
+      #define DECL class foo { }; int x
+      DECL;)cpp";
+  Visitor.runOver(Code);
+}
+
 TEST(SourceCodeTest, EditRangeWithMacroExpansionsShouldSucceed) {
   // The call expression, whose range we are extracting, includes two macro
   // expansions.