[clang-tools-extra] ebd522a - [clangd] Tweak parseDocumentation loop to use raw lines. NFC

Fri Apr 3 23:08:00 PDT 2020

Author: Sam McCall
Date: 2020-04-04T08:07:51+02:00
New Revision: ebd522aaa8aad74ea44db5bb4b74f1b784da9f99

URL: https://github.com/llvm/llvm-project/commit/ebd522aaa8aad74ea44db5bb4b74f1b784da9f99
DIFF: https://github.com/llvm/llvm-project/commit/ebd522aaa8aad74ea44db5bb4b74f1b784da9f99.diff

LOG: [clangd] Tweak parseDocumentation loop to use raw lines. NFC

This clears the way for the raw lines themselves to be parsed easily.

(Okay, one functional change: fix punctuation linebreaks with trailing WS)

Added: 
    

Modified: 
    clang-tools-extra/clangd/Hover.cpp
    clang-tools-extra/clangd/unittests/HoverTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp
index e54ba2e364a0..71ab985affa2 100644

--- a/clang-tools-extra/clangd/Hover.cpp
+++ b/clang-tools-extra/clangd/Hover.cpp
@@ -616,46 +616,39 @@ llvm::Optional<HoverInfo> getHoverContents(const Expr *E, ParsedAST &AST) {
   return llvm::None;
 }
 
-bool isParagraphLineBreak(llvm::StringRef Str, size_t LineBreakIndex) {
-  return Str.substr(LineBreakIndex + 1)
-      .drop_while([](auto C) { return C == ' ' || C == '\t'; })
-      .startswith("\n");
+bool isParagraphBreak(llvm::StringRef Rest) {
+  return Rest.ltrim(" \t").startswith("\n");
 }
 
-bool isPunctuationLineBreak(llvm::StringRef Str, size_t LineBreakIndex) {
+bool punctuationIndicatesLineBreak(llvm::StringRef Line) {
   constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt";
 
-  return LineBreakIndex > 0 && Punctuation.contains(Str[LineBreakIndex - 1]);
+  Line = Line.rtrim();
+  return !Line.empty() && Punctuation.contains(Line.back());
 }
 
-bool isFollowedByHardLineBreakIndicator(llvm::StringRef Str,
-                                        size_t LineBreakIndex) {
+bool isHardLineBreakIndicator(llvm::StringRef Rest) {
   // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote,
   // '#' headings, '`' code blocks
-  constexpr llvm::StringLiteral LinbreakIdenticators = R"txt(-*@\>#`)txt";
+  constexpr llvm::StringLiteral LinebreakIndicators = R"txt(-*@\>#`)txt";
 
-  auto NextNonSpaceCharIndex = Str.find_first_not_of(' ', LineBreakIndex + 1);
-
-  if (NextNonSpaceCharIndex == llvm::StringRef::npos) {
+  Rest = Rest.ltrim(" \t");
+  if (Rest.empty())
     return false;
-  }
-
-  auto FollowedBySingleCharIndicator =
-      LinbreakIdenticators.find(Str[NextNonSpaceCharIndex]) !=
-      llvm::StringRef::npos;
 
-  auto FollowedByNumberedListIndicator =
-      llvm::isDigit(Str[NextNonSpaceCharIndex]) &&
-      NextNonSpaceCharIndex + 1 < Str.size() &&
-      (Str[NextNonSpaceCharIndex + 1] == '.' ||
-       Str[NextNonSpaceCharIndex + 1] == ')');
+  if (LinebreakIndicators.contains(Rest.front()))
+    return true;
 
-  return FollowedBySingleCharIndicator || FollowedByNumberedListIndicator;
+  if (llvm::isDigit(Rest.front())) {
+    llvm::StringRef AfterDigit = Rest.drop_while(llvm::isDigit);
+    if (AfterDigit.startswith(".") || AfterDigit.startswith(")"))
+      return true;
+  }
+  return false;
 }
 
-bool isHardLineBreak(llvm::StringRef Str, size_t LineBreakIndex) {
-  return isPunctuationLineBreak(Str, LineBreakIndex) ||
-         isFollowedByHardLineBreakIndicator(Str, LineBreakIndex);
+bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest) {
+  return punctuationIndicatesLineBreak(Line) || isHardLineBreakIndicator(Rest);
 }
 
 } // namespace
@@ -814,42 +807,32 @@ markup::Document HoverInfo::present() const {
 }
 
 void parseDocumentation(llvm::StringRef Input, markup::Document &Output) {
+  std::vector<llvm::StringRef> ParagraphLines;
+  auto FlushParagraph = [&] {
+    if (ParagraphLines.empty())
+      return;
+    auto &P = Output.addParagraph();
+    for (llvm::StringRef Line : ParagraphLines)
+      P.appendText(Line.str());
+    ParagraphLines.clear();
+  };
 
-  constexpr auto WhiteSpaceChars = "\t\n\v\f\r ";
-
-  auto TrimmedInput = Input.trim();
-
-  std::string CurrentLine;
-
-  for (size_t CharIndex = 0; CharIndex < TrimmedInput.size();) {
-    if (TrimmedInput[CharIndex] == '\n') {
-      // Trim whitespace infront of linebreak
-      const auto LastNonSpaceCharIndex =
-          CurrentLine.find_last_not_of(WhiteSpaceChars) + 1;
-      CurrentLine.erase(LastNonSpaceCharIndex);
+  llvm::StringRef Line, Rest;
+  for (std::tie(Line, Rest) = Input.split('\n');
+       !(Line.empty() && Rest.empty());
+       std::tie(Line, Rest) = Rest.split('\n')) {
 
-      if (isParagraphLineBreak(TrimmedInput, CharIndex) ||
-          isHardLineBreak(TrimmedInput, CharIndex)) {
-        // FIXME: maybe distinguish between line breaks and paragraphs
-        Output.addParagraph().appendText(CurrentLine);
-        CurrentLine = "";
-      } else {
-        // Ommit linebreak
-        CurrentLine += ' ';
-      }
+    // After a linebreak remove spaces to avoid 4 space markdown code blocks.
+    // FIXME: make FlushParagraph handle this.
+    Line = Line.ltrim();
+    if (!Line.empty())
+      ParagraphLines.push_back(Line);
 
-      CharIndex++;
-      // After a linebreak always remove spaces to avoid 4 space markdown code
-      // blocks, also skip all additional linebreaks since they have no effect
-      CharIndex = TrimmedInput.find_first_not_of(WhiteSpaceChars, CharIndex);
-    } else {
-      CurrentLine += TrimmedInput[CharIndex];
-      CharIndex++;
+    if (isParagraphBreak(Rest) || isHardLineBreakAfter(Line, Rest)) {
+      FlushParagraph();
     }
   }
-  if (!CurrentLine.empty()) {
-    Output.addParagraph().appendText(CurrentLine);
-  }
+  FlushParagraph();
 }
 
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,

diff  --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp
index f9ab78dd753b..b51329e95f2c 100644
--- a/clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -1980,6 +1980,11 @@ TEST(Hover, DocCommentLineBreakConversion) {
                    "foo.  \nbar",
                    "foo.\nbar",
                },
+               {
+                   "foo. \nbar",
+                   "foo.  \nbar",
+                   "foo.\nbar",
+               },
                {
                    "foo\n*bar",
                    "foo  \n\\*bar",