[clang-tools-extra] r360151 - [clangd] Introduce intermediate representation of formatted text

Ilya Biryukov via cfe-commits cfe-commits at lists.llvm.org
Tue May 7 07:18:18 PDT 2019


Author: ibiryukov
Date: Tue May  7 07:18:18 2019
New Revision: 360151

URL: http://llvm.org/viewvc/llvm-project?rev=360151&view=rev
Log:
[clangd] Introduce intermediate representation of formatted text

Summary: That can render to markdown or plain text. Used for findHover requests.

Reviewers: malaperle, sammccall, kadircet

Reviewed By: sammccall

Subscribers: mgorny, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D58547

Added:
    clang-tools-extra/trunk/clangd/FormattedString.cpp
    clang-tools-extra/trunk/clangd/FormattedString.h
    clang-tools-extra/trunk/clangd/unittests/FormattedStringTests.cpp
Modified:
    clang-tools-extra/trunk/clangd/CMakeLists.txt
    clang-tools-extra/trunk/clangd/unittests/CMakeLists.txt

Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=360151&r1=360150&r2=360151&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt Tue May  7 07:18:18 2019
@@ -50,6 +50,7 @@ add_clang_library(clangDaemon
   FileDistance.cpp
   FS.cpp
   FSProvider.cpp
+  FormattedString.cpp
   FuzzyMatch.cpp
   GlobalCompilationDatabase.cpp
   Headers.cpp

Added: clang-tools-extra/trunk/clangd/FormattedString.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/FormattedString.cpp?rev=360151&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/FormattedString.cpp (added)
+++ clang-tools-extra/trunk/clangd/FormattedString.cpp Tue May  7 07:18:18 2019
@@ -0,0 +1,173 @@
+//===--- FormattedString.cpp --------------------------------*- C++-*------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "FormattedString.h"
+#include "clang/Basic/CharInfo.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+#include <string>
+
+namespace clang {
+namespace clangd {
+
+namespace {
+/// Escape a markdown text block. Ensures the punctuation will not introduce
+/// any of the markdown constructs.
+static std::string renderText(llvm::StringRef Input) {
+  // Escaping ASCII punctiation ensures we can't start a markdown construct.
+  constexpr llvm::StringLiteral Punctuation =
+      R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
+
+  std::string R;
+  for (size_t From = 0; From < Input.size();) {
+    size_t Next = Input.find_first_of(Punctuation, From);
+    R += Input.substr(From, Next - From);
+    if (Next == llvm::StringRef::npos)
+      break;
+    R += "\\";
+    R += Input[Next];
+
+    From = Next + 1;
+  }
+  return R;
+}
+
+/// Renders \p Input as an inline block of code in markdown. The returned value
+/// is surrounded by backticks and the inner contents are properly escaped.
+static std::string renderInlineBlock(llvm::StringRef Input) {
+  std::string R;
+  // Double all backticks to make sure we don't close the inline block early.
+  for (size_t From = 0; From < Input.size();) {
+    size_t Next = Input.find("`", From);
+    R += Input.substr(From, Next - From);
+    if (Next == llvm::StringRef::npos)
+      break;
+    R += "``"; // double the found backtick.
+
+    From = Next + 1;
+  }
+  // If results starts with a backtick, add spaces on both sides. The spaces
+  // are ignored by markdown renderers.
+  if (llvm::StringRef(R).startswith("`") || llvm::StringRef(R).endswith("`"))
+    return "` " + std::move(R) + " `";
+  // Markdown render should ignore first and last space if both are there. We
+  // add an extra pair of spaces in that case to make sure we render what the
+  // user intended.
+  if (llvm::StringRef(R).startswith(" ") && llvm::StringRef(R).endswith(" "))
+    return "` " + std::move(R) + " `";
+  return "`" + std::move(R) + "`";
+}
+/// Render \p Input as markdown code block with a specified \p Language. The
+/// result is surrounded by >= 3 backticks. Although markdown also allows to use
+/// '~' for code blocks, they are never used.
+static std::string renderCodeBlock(llvm::StringRef Input,
+                                   llvm::StringRef Language) {
+  // Count the maximum number of consecutive backticks in \p Input. We need to
+  // start and end the code block with more.
+  unsigned MaxBackticks = 0;
+  unsigned Backticks = 0;
+  for (char C : Input) {
+    if (C == '`') {
+      ++Backticks;
+      continue;
+    }
+    MaxBackticks = std::max(MaxBackticks, Backticks);
+    Backticks = 0;
+  }
+  MaxBackticks = std::max(Backticks, MaxBackticks);
+  // Use the corresponding number of backticks to start and end a code block.
+  std::string BlockMarker(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`');
+  return BlockMarker + Language.str() + "\n" + Input.str() + "\n" + BlockMarker;
+}
+
+} // namespace
+
+void FormattedString::appendText(std::string Text) {
+  // We merge consecutive blocks of text to simplify the overall structure.
+  if (Chunks.empty() || Chunks.back().Kind != ChunkKind::PlainText) {
+    Chunk C;
+    C.Kind = ChunkKind::PlainText;
+    Chunks.push_back(C);
+  }
+  // FIXME: ensure there is a whitespace between the chunks.
+  Chunks.back().Contents += Text;
+}
+
+void FormattedString::appendCodeBlock(std::string Code, std::string Language) {
+  Chunk C;
+  C.Kind = ChunkKind::CodeBlock;
+  C.Contents = std::move(Code);
+  C.Language = std::move(Language);
+  Chunks.push_back(std::move(C));
+}
+
+void FormattedString::appendInlineCode(std::string Code) {
+  Chunk C;
+  C.Kind = ChunkKind::InlineCodeBlock;
+  C.Contents = std::move(Code);
+  Chunks.push_back(std::move(C));
+}
+
+std::string FormattedString::renderAsMarkdown() const {
+  std::string R;
+  for (const auto &C : Chunks) {
+    switch (C.Kind) {
+    case ChunkKind::PlainText:
+      R += renderText(C.Contents);
+      continue;
+    case ChunkKind::InlineCodeBlock:
+      // Make sure we don't glue two backticks together.
+      if (llvm::StringRef(R).endswith("`"))
+        R += " ";
+      R += renderInlineBlock(C.Contents);
+      continue;
+    case ChunkKind::CodeBlock:
+      if (!R.empty() && !llvm::StringRef(R).endswith("\n"))
+        R += "\n";
+      R += renderCodeBlock(C.Contents, C.Language);
+      R += "\n";
+      continue;
+    }
+    llvm_unreachable("unhanlded ChunkKind");
+  }
+  return R;
+}
+
+std::string FormattedString::renderAsPlainText() const {
+  std::string R;
+  auto EnsureWhitespace = [&]() {
+    if (R.empty() || isWhitespace(R.back()))
+      return;
+    R += " ";
+  };
+  for (const auto &C : Chunks) {
+    switch (C.Kind) {
+    case ChunkKind::PlainText:
+      EnsureWhitespace();
+      R += C.Contents;
+      continue;
+    case ChunkKind::InlineCodeBlock:
+      EnsureWhitespace();
+      R += C.Contents;
+      continue;
+    case ChunkKind::CodeBlock:
+      if (!R.empty())
+        R += "\n\n";
+      R += C.Contents;
+      if (!llvm::StringRef(C.Contents).endswith("\n"))
+        R += "\n";
+      continue;
+    }
+    llvm_unreachable("unhanlded ChunkKind");
+  }
+  while (!R.empty() && isWhitespace(R.back()))
+    R.pop_back();
+  return R;
+}
+} // namespace clangd
+} // namespace clang

Added: clang-tools-extra/trunk/clangd/FormattedString.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/FormattedString.h?rev=360151&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/FormattedString.h (added)
+++ clang-tools-extra/trunk/clangd/FormattedString.h Tue May  7 07:18:18 2019
@@ -0,0 +1,57 @@
+//===--- FormattedString.h ----------------------------------*- C++-*------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A simple intermediate representation of formatted text that could be
+// converted to plaintext or markdown.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H
+
+#include <string>
+#include <vector>
+
+namespace clang {
+namespace clangd {
+
+/// A structured string representation that could be converted to markdown or
+/// plaintext upon requrest.
+class FormattedString {
+public:
+  /// Append plain text to the end of the string.
+  void appendText(std::string Text);
+  /// Append a block of C++ code. This translates to a ``` block in markdown.
+  /// In a plain text representation, the code block will be surrounded by
+  /// newlines.
+  void appendCodeBlock(std::string Code, std::string Language = "cpp");
+  /// Append an inline block of C++ code. This translates to the ` block in
+  /// markdown.
+  void appendInlineCode(std::string Code);
+
+  std::string renderAsMarkdown() const;
+  std::string renderAsPlainText() const;
+
+private:
+  enum class ChunkKind {
+    PlainText,       /// A plain text paragraph.
+    CodeBlock,       /// A block of code.
+    InlineCodeBlock, /// An inline block of code.
+  };
+  struct Chunk {
+    ChunkKind Kind = ChunkKind::PlainText;
+    std::string Contents;
+    /// Language for code block chunks. Ignored for other chunks.
+    std::string Language;
+  };
+  std::vector<Chunk> Chunks;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif

Modified: clang-tools-extra/trunk/clangd/unittests/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/unittests/CMakeLists.txt?rev=360151&r1=360150&r2=360151&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/unittests/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clangd/unittests/CMakeLists.txt Tue May  7 07:18:18 2019
@@ -38,6 +38,7 @@ add_unittest(ClangdUnitTests ClangdTests
   FileDistanceTests.cpp
   FileIndexTests.cpp
   FindSymbolsTests.cpp
+  FormattedStringTests.cpp
   FSTests.cpp
   FunctionTests.cpp
   FuzzyMatchTests.cpp

Added: clang-tools-extra/trunk/clangd/unittests/FormattedStringTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/unittests/FormattedStringTests.cpp?rev=360151&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/unittests/FormattedStringTests.cpp (added)
+++ clang-tools-extra/trunk/clangd/unittests/FormattedStringTests.cpp Tue May  7 07:18:18 2019
@@ -0,0 +1,156 @@
+//===-- FormattedStringTests.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "FormattedString.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+TEST(FormattedString, Basic) {
+  FormattedString S;
+  EXPECT_EQ(S.renderAsPlainText(), "");
+  EXPECT_EQ(S.renderAsMarkdown(), "");
+
+  S.appendText("foobar");
+  EXPECT_EQ(S.renderAsPlainText(), "foobar");
+  EXPECT_EQ(S.renderAsMarkdown(), "foobar");
+
+  S = FormattedString();
+  S.appendInlineCode("foobar");
+  EXPECT_EQ(S.renderAsPlainText(), "foobar");
+  EXPECT_EQ(S.renderAsMarkdown(), "`foobar`");
+
+  S = FormattedString();
+  S.appendCodeBlock("foobar");
+  EXPECT_EQ(S.renderAsPlainText(), "foobar");
+  EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
+                                  "foobar\n"
+                                  "```\n");
+}
+
+TEST(FormattedString, CodeBlocks) {
+  FormattedString S;
+  S.appendCodeBlock("foobar");
+  S.appendCodeBlock("bazqux", "javascript");
+
+  EXPECT_EQ(S.renderAsPlainText(), "foobar\n\n\nbazqux");
+  std::string ExpectedMarkdown = R"md(```cpp
+foobar
+```
+```javascript
+bazqux
+```
+)md";
+  EXPECT_EQ(S.renderAsMarkdown(), ExpectedMarkdown);
+
+  S = FormattedString();
+  S.appendInlineCode("foobar");
+  S.appendInlineCode("bazqux");
+  EXPECT_EQ(S.renderAsPlainText(), "foobar bazqux");
+  EXPECT_EQ(S.renderAsMarkdown(), "`foobar` `bazqux`");
+
+  S = FormattedString();
+  S.appendText("foo");
+  S.appendInlineCode("bar");
+  S.appendText("baz");
+
+  EXPECT_EQ(S.renderAsPlainText(), "foo bar baz");
+  EXPECT_EQ(S.renderAsMarkdown(), "foo`bar`baz");
+}
+
+TEST(FormattedString, Escaping) {
+  // Check some ASCII punctuation
+  FormattedString S;
+  S.appendText("*!`");
+  EXPECT_EQ(S.renderAsMarkdown(), "\\*\\!\\`");
+
+  // Check all ASCII punctuation.
+  S = FormattedString();
+  std::string Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
+  // Same text, with each character escaped.
+  std::string EscapedPunctuation;
+  EscapedPunctuation.reserve(2 * Punctuation.size());
+  for (char C : Punctuation)
+    EscapedPunctuation += std::string("\\") + C;
+  S.appendText(Punctuation);
+  EXPECT_EQ(S.renderAsMarkdown(), EscapedPunctuation);
+
+  // In code blocks we don't need to escape ASCII punctuation.
+  S = FormattedString();
+  S.appendInlineCode("* foo !+ bar * baz");
+  EXPECT_EQ(S.renderAsMarkdown(), "`* foo !+ bar * baz`");
+  S = FormattedString();
+  S.appendCodeBlock("#define FOO\n* foo !+ bar * baz");
+  EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
+                                  "#define FOO\n* foo !+ bar * baz\n"
+                                  "```\n");
+
+  // But we have to escape the backticks.
+  S = FormattedString();
+  S.appendInlineCode("foo`bar`baz");
+  EXPECT_EQ(S.renderAsMarkdown(), "`foo``bar``baz`");
+
+  S = FormattedString();
+  S.appendCodeBlock("foo`bar`baz");
+  EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
+                                  "foo`bar`baz\n"
+                                  "```\n");
+
+  // Inline code blocks starting or ending with backticks should add spaces.
+  S = FormattedString();
+  S.appendInlineCode("`foo");
+  EXPECT_EQ(S.renderAsMarkdown(), "` ``foo `");
+  S = FormattedString();
+  S.appendInlineCode("foo`");
+  EXPECT_EQ(S.renderAsMarkdown(), "` foo`` `");
+  S = FormattedString();
+  S.appendInlineCode("`foo`");
+  EXPECT_EQ(S.renderAsMarkdown(), "` ``foo`` `");
+
+  // Should also add extra spaces if the block stars and ends with spaces.
+  S = FormattedString();
+  S.appendInlineCode(" foo ");
+  EXPECT_EQ(S.renderAsMarkdown(), "`  foo  `");
+  S = FormattedString();
+  S.appendInlineCode("foo ");
+  EXPECT_EQ(S.renderAsMarkdown(), "`foo `");
+  S = FormattedString();
+  S.appendInlineCode(" foo");
+  EXPECT_EQ(S.renderAsMarkdown(), "` foo`");
+
+  // Code blocks might need more than 3 backticks.
+  S = FormattedString();
+  S.appendCodeBlock("foobarbaz `\nqux");
+  EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
+                                  "foobarbaz `\nqux\n"
+                                  "```\n");
+  S = FormattedString();
+  S.appendCodeBlock("foobarbaz ``\nqux");
+  EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
+                                  "foobarbaz ``\nqux\n"
+                                  "```\n");
+  S = FormattedString();
+  S.appendCodeBlock("foobarbaz ```\nqux");
+  EXPECT_EQ(S.renderAsMarkdown(), "````cpp\n"
+                                  "foobarbaz ```\nqux\n"
+                                  "````\n");
+  S = FormattedString();
+  S.appendCodeBlock("foobarbaz ` `` ``` ```` `\nqux");
+  EXPECT_EQ(S.renderAsMarkdown(), "`````cpp\n"
+                                  "foobarbaz ` `` ``` ```` `\nqux\n"
+                                  "`````\n");
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang




More information about the cfe-commits mailing list