r332458 - [AST] Added a helper to extract a user-friendly text of a comment.

Wed May 16 05:30:09 PDT 2018

Author: ibiryukov
Date: Wed May 16 05:30:09 2018
New Revision: 332458

URL: http://llvm.org/viewvc/llvm-project?rev=332458&view=rev
Log:
[AST] Added a helper to extract a user-friendly text of a comment.

Summary:
The helper is used in clangd for documentation shown in code completion
and storing the docs in the symbols. See D45999.

This patch reuses the code of the Doxygen comment lexer, disabling the
bits that do command and html tag parsing.
The new helper works on all comments, including non-doxygen comments.
However, it does not understand or transform any doxygen directives,
i.e. cannot extract brief text, etc.

Reviewers: sammccall, hokein, ioeric

Reviewed By: ioeric

Subscribers: mgorny, cfe-commits

Differential Revision: https://reviews.llvm.org/D46000

Added:
    cfe/trunk/unittests/AST/CommentTextTest.cpp
Modified:
    cfe/trunk/include/clang/AST/CommentLexer.h
    cfe/trunk/include/clang/AST/RawCommentList.h
    cfe/trunk/lib/AST/CommentLexer.cpp
    cfe/trunk/lib/AST/RawCommentList.cpp
    cfe/trunk/unittests/AST/CMakeLists.txt

Modified: cfe/trunk/include/clang/AST/CommentLexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/CommentLexer.h?rev=332458&r1=332457&r2=332458&view=diff
==============================================================================

--- cfe/trunk/include/clang/AST/CommentLexer.h (original)
+++ cfe/trunk/include/clang/AST/CommentLexer.h Wed May 16 05:30:09 2018
@@ -281,6 +281,11 @@ private:
   /// command, including command marker.
   SmallString<16> VerbatimBlockEndCommandName;
 
+  /// If true, the commands, html tags, etc will be parsed and reported as
+  /// separate tokens inside the comment body. If false, the comment text will
+  /// be parsed into text and newline tokens.
+  bool ParseCommands;
+
   /// Given a character reference name (e.g., "lt"), return the character that
   /// it stands for (e.g., "<").
   StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
@@ -315,12 +320,11 @@ private:
   /// Eat string matching regexp \code \s*\* \endcode.
   void skipLineStartingDecorations();
 
-  /// Lex stuff inside comments.  CommentEnd should be set correctly.
+  /// Lex comment text, including commands if ParseCommands is set to true.
   void lexCommentText(Token &T);
 
-  void setupAndLexVerbatimBlock(Token &T,
-                                const char *TextBegin,
-                                char Marker, const CommandInfo *Info);
+  void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker,
+                                const CommandInfo *Info);
 
   void lexVerbatimBlockFirstLine(Token &T);
 
@@ -343,14 +347,13 @@ private:
 
 public:
   Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
-        const CommandTraits &Traits,
-        SourceLocation FileLoc,
-        const char *BufferStart, const char *BufferEnd);
+        const CommandTraits &Traits, SourceLocation FileLoc,
+        const char *BufferStart, const char *BufferEnd,
+        bool ParseCommands = true);
 
   void lex(Token &T);
 
-  StringRef getSpelling(const Token &Tok,
-                        const SourceManager &SourceMgr,
+  StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr,
                         bool *Invalid = nullptr) const;
 };
 

Modified: cfe/trunk/include/clang/AST/RawCommentList.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/RawCommentList.h?rev=332458&r1=332457&r2=332458&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/RawCommentList.h (original)
+++ cfe/trunk/include/clang/AST/RawCommentList.h Wed May 16 05:30:09 2018
@@ -111,6 +111,30 @@ public:
     return extractBriefText(Context);
   }
 
+  /// Returns sanitized comment text, suitable for presentation in editor UIs.
+  /// E.g. will transform:
+  ///     // This is a long multiline comment.
+  ///     //   Parts of it  might be indented.
+  ///     /* The comments styles might be mixed. */
+  ///  into
+  ///     "This is a long multiline comment.\n"
+  ///     "  Parts of it  might be indented.\n"
+  ///     "The comments styles might be mixed."
+  /// Also removes leading indentation and sanitizes some common cases:
+  ///     /* This is a first line.
+  ///      *   This is a second line. It is indented.
+  ///      * This is a third line. */
+  /// and
+  ///     /* This is a first line.
+  ///          This is a second line. It is indented.
+  ///     This is a third line. */
+  /// will both turn into:
+  ///     "This is a first line.\n"
+  ///     "  This is a second line. It is indented.\n"
+  ///     "This is a third line."
+  std::string getFormattedText(const SourceManager &SourceMgr,
+                               DiagnosticsEngine &Diags) const;
+
   /// Parse the comment, assuming it is attached to decl \c D.
   comments::FullComment *parse(const ASTContext &Context,
                                const Preprocessor *PP, const Decl *D) const;

Modified: cfe/trunk/lib/AST/CommentLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentLexer.cpp?rev=332458&r1=332457&r2=332458&view=diff
==============================================================================
--- cfe/trunk/lib/AST/CommentLexer.cpp (original)
+++ cfe/trunk/lib/AST/CommentLexer.cpp Wed May 16 05:30:09 2018
@@ -294,6 +294,39 @@ void Lexer::lexCommentText(Token &T) {
   assert(CommentState == LCS_InsideBCPLComment ||
          CommentState == LCS_InsideCComment);
 
+  // Handles lexing non-command text, i.e. text and newline.
+  auto HandleNonCommandToken = [&]() -> void {
+    assert(State == LS_Normal);
+
+    const char *TokenPtr = BufferPtr;
+    assert(TokenPtr < CommentEnd);
+    switch (*TokenPtr) {
+      case '\n':
+      case '\r':
+          TokenPtr = skipNewline(TokenPtr, CommentEnd);
+          formTokenWithChars(T, TokenPtr, tok::newline);
+
+          if (CommentState == LCS_InsideCComment)
+            skipLineStartingDecorations();
+          return;
+
+      default: {
+          StringRef TokStartSymbols = ParseCommands ? "\n\r\\@&<" : "\n\r";
+          size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr)
+                           .find_first_of(TokStartSymbols);
+          if (End != StringRef::npos)
+            TokenPtr += End;
+          else
+            TokenPtr = CommentEnd;
+          formTextToken(T, TokenPtr);
+          return;
+      }
+    }
+  };
+
+  if (!ParseCommands)
+    return HandleNonCommandToken();
+
   switch (State) {
   case LS_Normal:
     break;
@@ -315,136 +348,116 @@ void Lexer::lexCommentText(Token &T) {
   }
 
   assert(State == LS_Normal);
-
   const char *TokenPtr = BufferPtr;
   assert(TokenPtr < CommentEnd);
-  while (TokenPtr != CommentEnd) {
-    switch(*TokenPtr) {
-      case '\\':
-      case '@': {
-        // Commands that start with a backslash and commands that start with
-        // 'at' have equivalent semantics.  But we keep information about the
-        // exact syntax in AST for comments.
-        tok::TokenKind CommandKind =
-            (*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
+  switch(*TokenPtr) {
+    case '\\':
+    case '@': {
+      // Commands that start with a backslash and commands that start with
+      // 'at' have equivalent semantics.  But we keep information about the
+      // exact syntax in AST for comments.
+      tok::TokenKind CommandKind =
+          (*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
+      TokenPtr++;
+      if (TokenPtr == CommentEnd) {
+        formTextToken(T, TokenPtr);
+        return;
+      }
+      char C = *TokenPtr;
+      switch (C) {
+      default:
+        break;
+
+      case '\\': case '@': case '&': case '$':
+      case '#':  case '<': case '>': case '%':
+      case '\"': case '.': case ':':
+        // This is one of \\ \@ \& \$ etc escape sequences.
         TokenPtr++;
-        if (TokenPtr == CommentEnd) {
-          formTextToken(T, TokenPtr);
-          return;
-        }
-        char C = *TokenPtr;
-        switch (C) {
-        default:
-          break;
-
-        case '\\': case '@': case '&': case '$':
-        case '#':  case '<': case '>': case '%':
-        case '\"': case '.': case ':':
-          // This is one of \\ \@ \& \$ etc escape sequences.
+        if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
+          // This is the \:: escape sequence.
           TokenPtr++;
-          if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
-            // This is the \:: escape sequence.
-            TokenPtr++;
-          }
-          StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
-          formTokenWithChars(T, TokenPtr, tok::text);
-          T.setText(UnescapedText);
-          return;
         }
+        StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
+        formTokenWithChars(T, TokenPtr, tok::text);
+        T.setText(UnescapedText);
+        return;
+      }
 
-        // Don't make zero-length commands.
-        if (!isCommandNameStartCharacter(*TokenPtr)) {
-          formTextToken(T, TokenPtr);
-          return;
-        }
+      // Don't make zero-length commands.
+      if (!isCommandNameStartCharacter(*TokenPtr)) {
+        formTextToken(T, TokenPtr);
+        return;
+      }
 
-        TokenPtr = skipCommandName(TokenPtr, CommentEnd);
-        unsigned Length = TokenPtr - (BufferPtr + 1);
+      TokenPtr = skipCommandName(TokenPtr, CommentEnd);
+      unsigned Length = TokenPtr - (BufferPtr + 1);
 
-        // Hardcoded support for lexing LaTeX formula commands
-        // \f$ \f[ \f] \f{ \f} as a single command.
-        if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
-          C = *TokenPtr;
-          if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
-            TokenPtr++;
-            Length++;
-          }
+      // Hardcoded support for lexing LaTeX formula commands
+      // \f$ \f[ \f] \f{ \f} as a single command.
+      if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
+        C = *TokenPtr;
+        if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
+          TokenPtr++;
+          Length++;
         }
+      }
 
-        StringRef CommandName(BufferPtr + 1, Length);
+      StringRef CommandName(BufferPtr + 1, Length);
 
-        const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
-        if (!Info) {
-          if ((Info = Traits.getTypoCorrectCommandInfo(CommandName))) {
-            StringRef CorrectedName = Info->Name;
-            SourceLocation Loc = getSourceLocation(BufferPtr);
-            SourceLocation EndLoc = getSourceLocation(TokenPtr);
-            SourceRange FullRange = SourceRange(Loc, EndLoc);
-            SourceRange CommandRange(Loc.getLocWithOffset(1), EndLoc);
-            Diag(Loc, diag::warn_correct_comment_command_name)
-              << FullRange << CommandName << CorrectedName
-              << FixItHint::CreateReplacement(CommandRange, CorrectedName);
-          } else {
-            formTokenWithChars(T, TokenPtr, tok::unknown_command);
-            T.setUnknownCommandName(CommandName);
-            Diag(T.getLocation(), diag::warn_unknown_comment_command_name)
-                << SourceRange(T.getLocation(), T.getEndLocation());
-            return;
-          }
-        }
-        if (Info->IsVerbatimBlockCommand) {
-          setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
+      const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
+      if (!Info) {
+        if ((Info = Traits.getTypoCorrectCommandInfo(CommandName))) {
+          StringRef CorrectedName = Info->Name;
+          SourceLocation Loc = getSourceLocation(BufferPtr);
+          SourceLocation EndLoc = getSourceLocation(TokenPtr);
+          SourceRange FullRange = SourceRange(Loc, EndLoc);
+          SourceRange CommandRange(Loc.getLocWithOffset(1), EndLoc);
+          Diag(Loc, diag::warn_correct_comment_command_name)
+            << FullRange << CommandName << CorrectedName
+            << FixItHint::CreateReplacement(CommandRange, CorrectedName);
+        } else {
+          formTokenWithChars(T, TokenPtr, tok::unknown_command);
+          T.setUnknownCommandName(CommandName);
+          Diag(T.getLocation(), diag::warn_unknown_comment_command_name)
+              << SourceRange(T.getLocation(), T.getEndLocation());
           return;
         }
-        if (Info->IsVerbatimLineCommand) {
-          setupAndLexVerbatimLine(T, TokenPtr, Info);
-          return;
-        }
-        formTokenWithChars(T, TokenPtr, CommandKind);
-        T.setCommandID(Info->getID());
-        return;
       }
-
-      case '&':
-        lexHTMLCharacterReference(T);
+      if (Info->IsVerbatimBlockCommand) {
+        setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
         return;
-
-      case '<': {
-        TokenPtr++;
-        if (TokenPtr == CommentEnd) {
-          formTextToken(T, TokenPtr);
-          return;
-        }
-        const char C = *TokenPtr;
-        if (isHTMLIdentifierStartingCharacter(C))
-          setupAndLexHTMLStartTag(T);
-        else if (C == '/')
-          setupAndLexHTMLEndTag(T);
-        else
-          formTextToken(T, TokenPtr);
+      }
+      if (Info->IsVerbatimLineCommand) {
+        setupAndLexVerbatimLine(T, TokenPtr, Info);
         return;
       }
+      formTokenWithChars(T, TokenPtr, CommandKind);
+      T.setCommandID(Info->getID());
+      return;
+    }
 
-      case '\n':
-      case '\r':
-        TokenPtr = skipNewline(TokenPtr, CommentEnd);
-        formTokenWithChars(T, TokenPtr, tok::newline);
-
-        if (CommentState == LCS_InsideCComment)
-          skipLineStartingDecorations();
-        return;
+    case '&':
+      lexHTMLCharacterReference(T);
+      return;
 
-      default: {
-        size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr).
-                         find_first_of("\n\r\\@&<");
-        if (End != StringRef::npos)
-          TokenPtr += End;
-        else
-          TokenPtr = CommentEnd;
+    case '<': {
+      TokenPtr++;
+      if (TokenPtr == CommentEnd) {
         formTextToken(T, TokenPtr);
         return;
       }
+      const char C = *TokenPtr;
+      if (isHTMLIdentifierStartingCharacter(C))
+        setupAndLexHTMLStartTag(T);
+      else if (C == '/')
+        setupAndLexHTMLEndTag(T);
+      else
+        formTextToken(T, TokenPtr);
+      return;
     }
+
+    default:
+      return HandleNonCommandToken();
   }
 }
 
@@ -727,14 +740,13 @@ void Lexer::lexHTMLEndTag(Token &T) {
 }
 
 Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
-             const CommandTraits &Traits,
-             SourceLocation FileLoc,
-             const char *BufferStart, const char *BufferEnd):
-    Allocator(Allocator), Diags(Diags), Traits(Traits),
-    BufferStart(BufferStart), BufferEnd(BufferEnd),
-    FileLoc(FileLoc), BufferPtr(BufferStart),
-    CommentState(LCS_BeforeComment), State(LS_Normal) {
-}
+             const CommandTraits &Traits, SourceLocation FileLoc,
+             const char *BufferStart, const char *BufferEnd,
+             bool ParseCommands)
+    : Allocator(Allocator), Diags(Diags), Traits(Traits),
+      BufferStart(BufferStart), BufferEnd(BufferEnd), FileLoc(FileLoc),
+      BufferPtr(BufferStart), CommentState(LCS_BeforeComment), State(LS_Normal),
+      ParseCommands(ParseCommands) {}
 
 void Lexer::lex(Token &T) {
 again:

Modified: cfe/trunk/lib/AST/RawCommentList.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/RawCommentList.cpp?rev=332458&r1=332457&r2=332458&view=diff
==============================================================================
--- cfe/trunk/lib/AST/RawCommentList.cpp (original)
+++ cfe/trunk/lib/AST/RawCommentList.cpp Wed May 16 05:30:09 2018
@@ -335,3 +335,94 @@ void RawCommentList::addDeserializedComm
              BeforeThanCompare<RawComment>(SourceMgr));
   std::swap(Comments, MergedComments);
 }
+
+std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
+                                         DiagnosticsEngine &Diags) const {
+  llvm::StringRef CommentText = getRawText(SourceMgr);
+  if (CommentText.empty())
+    return "";
+
+  llvm::BumpPtrAllocator Allocator;
+  // We do not parse any commands, so CommentOptions are ignored by
+  // comments::Lexer. Therefore, we just use default-constructed options.
+  CommentOptions DefOpts;
+  comments::CommandTraits EmptyTraits(Allocator, DefOpts);
+  comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
+                    CommentText.begin(), CommentText.end(),
+                    /*ParseCommands=*/false);
+
+  std::string Result;
+  // A column number of the first non-whitespace token in the comment text.
+  // We skip whitespace up to this column, but keep the whitespace after this
+  // column. IndentColumn is calculated when lexing the first line and reused
+  // for the rest of lines.
+  unsigned IndentColumn = 0;
+
+  // Processes one line of the comment and adds it to the result.
+  // Handles skipping the indent at the start of the line.
+  // Returns false when eof is reached and true otherwise.
+  auto LexLine = [&](bool IsFirstLine) -> bool {
+    comments::Token Tok;
+    // Lex the first token on the line. We handle it separately, because we to
+    // fix up its indentation.
+    L.lex(Tok);
+    if (Tok.is(comments::tok::eof))
+      return false;
+    if (Tok.is(comments::tok::newline)) {
+      Result += "\n";
+      return true;
+    }
+    llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
+    bool LocInvalid = false;
+    unsigned TokColumn =
+        SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
+    assert(!LocInvalid && "getFormattedText for invalid location");
+
+    // Amount of leading whitespace in TokText.
+    size_t WhitespaceLen = TokText.find_first_not_of(" \t");
+    if (WhitespaceLen == StringRef::npos)
+      WhitespaceLen = TokText.size();
+    // Remember the amount of whitespace we skipped in the first line to remove
+    // indent up to that column in the following lines.
+    if (IsFirstLine)
+      IndentColumn = TokColumn + WhitespaceLen;
+
+    // Amount of leading whitespace we actually want to skip.
+    // For the first line we skip all the whitespace.
+    // For the rest of the lines, we skip whitespace up to IndentColumn.
+    unsigned SkipLen =
+        IsFirstLine
+            ? WhitespaceLen
+            : std::min<size_t>(
+                  WhitespaceLen,
+                  std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
+    llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
+    Result += Trimmed;
+    // Lex all tokens in the rest of the line.
+    for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
+      if (Tok.is(comments::tok::newline)) {
+        Result += "\n";
+        return true;
+      }
+      Result += L.getSpelling(Tok, SourceMgr);
+    }
+    // We've reached the end of file token.
+    return false;
+  };
+
+  auto DropTrailingNewLines = [](std::string &Str) {
+    while (Str.back() == '\n')
+      Str.pop_back();
+  };
+
+  // Proces first line separately to remember indent for the following lines.
+  if (!LexLine(/*IsFirstLine=*/true)) {
+    DropTrailingNewLines(Result);
+    return Result;
+  }
+  // Process the rest of the lines.
+  while (LexLine(/*IsFirstLine=*/false))
+    ;
+  DropTrailingNewLines(Result);
+  return Result;
+}

Modified: cfe/trunk/unittests/AST/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/CMakeLists.txt?rev=332458&r1=332457&r2=332458&view=diff
==============================================================================
--- cfe/trunk/unittests/AST/CMakeLists.txt (original)
+++ cfe/trunk/unittests/AST/CMakeLists.txt Wed May 16 05:30:09 2018
@@ -9,6 +9,7 @@ add_clang_unittest(ASTTests
   ASTVectorTest.cpp
   CommentLexer.cpp
   CommentParser.cpp
+  CommentTextTest.cpp
   DataCollectionTest.cpp
   DeclPrinterTest.cpp
   DeclTest.cpp

Added: cfe/trunk/unittests/AST/CommentTextTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/CommentTextTest.cpp?rev=332458&view=auto
==============================================================================
--- cfe/trunk/unittests/AST/CommentTextTest.cpp (added)
+++ cfe/trunk/unittests/AST/CommentTextTest.cpp Wed May 16 05:30:09 2018
@@ -0,0 +1,122 @@
+//===- unittest/AST/CommentTextTest.cpp - Comment text extraction test ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tests for user-friendly output formatting of comments, i.e.
+// RawComment::getFormattedText().
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/RawCommentList.h"
+#include "clang/Basic/CommentOptions.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticIDs.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/FileSystemOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/VirtualFileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <gtest/gtest.h>
+
+namespace clang {
+
+class CommentTextTest : public ::testing::Test {
+protected:
+  std::string formatComment(llvm::StringRef CommentText) {
+    SourceManagerForFile FileSourceMgr("comment-test.cpp", CommentText);
+    SourceManager& SourceMgr = FileSourceMgr.get();
+
+    auto CommentStartOffset = CommentText.find("/");
+    assert(CommentStartOffset != llvm::StringRef::npos);
+    FileID File = SourceMgr.getMainFileID();
+
+    SourceRange CommentRange(
+        SourceMgr.getLocForStartOfFile(File).getLocWithOffset(
+            CommentStartOffset),
+        SourceMgr.getLocForEndOfFile(File));
+    CommentOptions EmptyOpts;
+    // FIXME: technically, merged that we set here is incorrect, but that
+    // shouldn't matter.
+    RawComment Comment(SourceMgr, CommentRange, EmptyOpts, /*Merged=*/true);
+    DiagnosticsEngine Diags(new DiagnosticIDs, new DiagnosticOptions);
+    return Comment.getFormattedText(SourceMgr, Diags);
+  }
+};
+
+TEST_F(CommentTextTest, FormattedText) {
+  // clang-format off
+  auto ExpectedOutput =
+R"(This function does this and that.
+For example,
+   Runnning it in that case will give you
+   this result.
+That's about it.)";
+  // Two-slash comments.
+  EXPECT_EQ(ExpectedOutput, formatComment(
+R"cpp(
+// This function does this and that.
+// For example,
+//    Runnning it in that case will give you
+//    this result.
+// That's about it.)cpp"));
+
+  // Three-slash comments.
+  EXPECT_EQ(ExpectedOutput, formatComment(
+R"cpp(
+/// This function does this and that.
+/// For example,
+///    Runnning it in that case will give you
+///    this result.
+/// That's about it.)cpp"));
+
+  // Block comments.
+  EXPECT_EQ(ExpectedOutput, formatComment(
+R"cpp(
+/* This function does this and that.
+ * For example,
+ *    Runnning it in that case will give you
+ *    this result.
+ * That's about it.*/)cpp"));
+
+  // Doxygen-style block comments.
+  EXPECT_EQ(ExpectedOutput, formatComment(
+R"cpp(
+/** This function does this and that.
+  * For example,
+  *    Runnning it in that case will give you
+  *    this result.
+  * That's about it.*/)cpp"));
+
+  // Weird indentation.
+  EXPECT_EQ(ExpectedOutput, formatComment(
+R"cpp(
+       // This function does this and that.
+  //      For example,
+  //         Runnning it in that case will give you
+        //   this result.
+       // That's about it.)cpp"));
+  // clang-format on
+}
+
+TEST_F(CommentTextTest, KeepsDoxygenControlSeqs) {
+  // clang-format off
+  auto ExpectedOutput =
+R"(\brief This is the brief part of the comment.
+\param a something about a.
+ at param b something about b.)";
+
+  EXPECT_EQ(ExpectedOutput, formatComment(
+R"cpp(
+/// \brief This is the brief part of the comment.
+/// \param a something about a.
+/// @param b something about b.)cpp"));
+  // clang-format on
+}
+
+} // namespace clang