[clang-tools-extra] r323101 - [clangd] Add support for different file URI schemas.

Eric Liu via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 22 03:48:20 PST 2018


Author: ioeric
Date: Mon Jan 22 03:48:20 2018
New Revision: 323101

URL: http://llvm.org/viewvc/llvm-project?rev=323101&view=rev
Log:
[clangd] Add support for different file URI schemas.

Summary: I will replace the existing URI struct in Protocol.h with the new URI and rename FileURI to URI in a followup patch.

Reviewers: sammccall

Reviewed By: sammccall

Subscribers: jkorous-apple, klimek, mgorny, ilya-biryukov, cfe-commits

Differential Revision: https://reviews.llvm.org/D41946

Added:
    clang-tools-extra/trunk/clangd/URI.cpp
    clang-tools-extra/trunk/clangd/URI.h
    clang-tools-extra/trunk/unittests/clangd/URITests.cpp
Modified:
    clang-tools-extra/trunk/clangd/CMakeLists.txt
    clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt

Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=323101&r1=323100&r2=323101&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt Mon Jan 22 03:48:20 2018
@@ -21,6 +21,7 @@ add_clang_library(clangDaemon
   ProtocolHandlers.cpp
   SourceCode.cpp
   Trace.cpp
+  URI.cpp
   XRefs.cpp
   index/FileIndex.cpp
   index/Index.cpp

Added: clang-tools-extra/trunk/clangd/URI.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/URI.cpp?rev=323101&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/URI.cpp (added)
+++ clang-tools-extra/trunk/clangd/URI.cpp Mon Jan 22 03:48:20 2018
@@ -0,0 +1,199 @@
+//===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "URI.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
+#include <iomanip>
+#include <sstream>
+
+LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry)
+
+namespace clang {
+namespace clangd {
+namespace {
+
+inline llvm::Error make_string_error(const llvm::Twine &Message) {
+  return llvm::make_error<llvm::StringError>(Message,
+                                             llvm::inconvertibleErrorCode());
+}
+
+/// \brief This manages file paths in the file system. All paths in the scheme
+/// are absolute (with leading '/').
+class FileSystemScheme : public URIScheme {
+public:
+  static const char *Scheme;
+
+  llvm::Expected<std::string>
+  getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
+                  llvm::StringRef /*HintPath*/) const override {
+    if (!Body.startswith("/"))
+      return make_string_error("File scheme: expect body to be an absolute "
+                               "path starting with '/': " +
+                               Body);
+    // For Windows paths e.g. /X:
+    if (Body.size() > 2 && Body[0] == '/' && Body[2] == ':')
+      Body.consume_front("/");
+    llvm::SmallVector<char, 16> Path(Body.begin(), Body.end());
+    llvm::sys::path::native(Path);
+    return std::string(Path.begin(), Path.end());
+  }
+
+  llvm::Expected<FileURI>
+  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
+    using namespace llvm::sys;
+
+    std::string Body;
+    // For Windows paths e.g. X:
+    if (AbsolutePath.size() > 1 && AbsolutePath[1] == ':')
+      Body = "/";
+    Body += path::convert_to_slash(AbsolutePath);
+    return FileURI::create(Scheme, /*Authority=*/"", Body);
+  }
+};
+
+const char *FileSystemScheme::Scheme = "file";
+
+static URISchemeRegistry::Add<FileSystemScheme>
+    X(FileSystemScheme::Scheme,
+      "URI scheme for absolute paths in the file system.");
+
+llvm::Expected<std::unique_ptr<URIScheme>>
+findSchemeByName(llvm::StringRef Scheme) {
+  for (auto I = URISchemeRegistry::begin(), E = URISchemeRegistry::end();
+       I != E; ++I) {
+    if (I->getName() != Scheme)
+      continue;
+    return I->instantiate();
+  }
+  return make_string_error("Can't find scheme: " + Scheme);
+}
+
+bool shouldEscape(unsigned char C) {
+  // Unreserved characters.
+  if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'))
+    return false;
+  switch (C) {
+  case '-':
+  case '_':
+  case '.':
+  case '~':
+  case '/': // '/' is only reserved when parsing.
+    return false;
+  }
+  return true;
+}
+
+/// Encodes a string according to percent-encoding.
+/// - Unreserved characters are not escaped.
+/// - Reserved characters always escaped with exceptions like '/'.
+/// - All other characters are escaped.
+std::string percentEncode(llvm::StringRef Content) {
+  std::string Result;
+  llvm::raw_string_ostream OS(Result);
+  for (unsigned char C : Content)
+    if (shouldEscape(C))
+      OS << '%' << llvm::format_hex_no_prefix(C, 2);
+    else
+      OS << C;
+
+  OS.flush();
+  return Result;
+}
+
+/// Decodes a string according to percent-encoding.
+std::string percentDecode(llvm::StringRef Content) {
+  std::string Result;
+  for (auto I = Content.begin(), E = Content.end(); I != E; ++I) {
+    if (*I != '%') {
+      Result += *I;
+      continue;
+    }
+    if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) &&
+        llvm::isHexDigit(*(I + 2))) {
+      Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2)));
+      I += 2;
+    } else
+      Result.push_back(*I);
+  }
+  return Result;
+}
+
+} // namespace
+
+llvm::Expected<FileURI> FileURI::create(llvm::StringRef Scheme,
+                                        llvm::StringRef Authority,
+                                        llvm::StringRef Body) {
+  if (Scheme.empty())
+    return make_string_error("Scheme must be specified in a URI.");
+  if (!Authority.empty() && !Body.startswith("/"))
+    return make_string_error(
+        "URI body must start with '/' when authority is present.");
+  FileURI U;
+  U.Scheme = Scheme;
+  U.Authority = Authority;
+  U.Body = Body;
+  return U;
+}
+
+std::string FileURI::toString() const {
+  std::string Result;
+  llvm::raw_string_ostream OS(Result);
+  OS << percentEncode(Scheme) << ":";
+  if (Authority.empty() && Body.empty())
+    return OS.str();
+  // If authority if empty, we only print body if it starts with "/"; otherwise,
+  // the URI is invalid.
+  if (!Authority.empty() || llvm::StringRef(Body).startswith("/"))
+    OS << "//" << percentEncode(Authority);
+  OS << percentEncode(Body);
+  OS.flush();
+  return Result;
+}
+
+llvm::Expected<FileURI> FileURI::parse(llvm::StringRef OrigUri) {
+  FileURI U;
+  llvm::StringRef Uri = OrigUri;
+
+  auto Pos = Uri.find(':');
+  if (Pos == 0 || Pos == llvm::StringRef::npos)
+    return make_string_error("Scheme must be provided in URI: " + OrigUri);
+  U.Scheme = percentDecode(Uri.substr(0, Pos));
+  Uri = Uri.substr(Pos + 1);
+  if (Uri.consume_front("//")) {
+    Pos = Uri.find('/');
+    U.Authority = percentDecode(Uri.substr(0, Pos));
+    Uri = Uri.substr(Pos);
+  }
+  U.Body = percentDecode(Uri);
+  return U;
+}
+
+llvm::Expected<FileURI> FileURI::create(llvm::StringRef AbsolutePath,
+                                        llvm::StringRef Scheme) {
+  if (!llvm::sys::path::is_absolute(AbsolutePath))
+    return make_string_error("Not a valid absolute path: " + AbsolutePath);
+  auto S = findSchemeByName(Scheme);
+  if (!S)
+    return S.takeError();
+  return S->get()->uriFromAbsolutePath(AbsolutePath);
+}
+
+llvm::Expected<std::string> FileURI::resolve(const FileURI &Uri,
+                                             llvm::StringRef HintPath) {
+  auto S = findSchemeByName(Uri.Scheme);
+  if (!S)
+    return S.takeError();
+  return S->get()->getAbsolutePath(Uri.Authority, Uri.Body, HintPath);
+}
+
+} // namespace clangd
+} // namespace clang

Added: clang-tools-extra/trunk/clangd/URI.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/URI.h?rev=323101&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/URI.h (added)
+++ clang-tools-extra/trunk/clangd/URI.h Mon Jan 22 03:48:20 2018
@@ -0,0 +1,101 @@
+//===--- URI.h - File URIs with schemes --------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Registry.h"
+
+namespace clang {
+namespace clangd {
+
+/// A URI describes the location of a source file.
+/// In the simplest case, this is a "file" URI that directly encodes the
+/// absolute path to a file. More abstract cases are possible: a shared index
+/// service might expose repo:// URIs that are relative to the source control
+/// root.
+///
+/// Clangd handles URIs of the form <scheme>:[//<authority>]<body>. It doesn't
+/// further split the authority or body into constituent parts (e.g. query
+/// strings is included in the body).
+class FileURI {
+public:
+  /// Returns decoded scheme e.g. "https"
+  llvm::StringRef scheme() const { return Scheme; }
+  /// Returns decoded authority e.g. "reviews.lvm.org"
+  llvm::StringRef authority() const { return Authority; }
+  /// Returns decoded body e.g. "/D41946"
+  llvm::StringRef body() const { return Body; }
+
+  /// Returns a string URI with all components percent-encoded.
+  std::string toString() const;
+
+  /// Create a FileURI from unescaped scheme+authority+body.
+  static llvm::Expected<FileURI> create(llvm::StringRef Scheme,
+                                        llvm::StringRef Authority,
+                                        llvm::StringRef Body);
+
+  /// Creates a FileURI for a file in the given scheme. \p Scheme must be
+  /// registered. The URI is percent-encoded.
+  static llvm::Expected<FileURI> create(llvm::StringRef AbsolutePath,
+                                        llvm::StringRef Scheme = "file");
+
+  /// Parse a URI string "<scheme>:[//<authority>/]<path>". Percent-encoded
+  /// characters in the URI will be decoded.
+  static llvm::Expected<FileURI> parse(llvm::StringRef Uri);
+
+  /// Resolves the absolute path of \p U. If there is no matching scheme, or the
+  /// URI is invalid in the scheme, this returns an error.
+  ///
+  /// \p HintPath A related path, such as the current file or working directory,
+  /// which can help disambiguate when the same file exists in many workspaces.
+  static llvm::Expected<std::string> resolve(const FileURI &U,
+                                             llvm::StringRef HintPath = "");
+
+  friend bool operator==(const FileURI &LHS, const FileURI &RHS) {
+    return std::tie(LHS.Scheme, LHS.Authority, LHS.Body) ==
+           std::tie(RHS.Scheme, RHS.Authority, RHS.Body);
+  }
+
+private:
+  FileURI() = default;
+
+  std::string Scheme;
+  std::string Authority;
+  std::string Body;
+};
+
+/// URIScheme is an extension point for teaching clangd to recognize a custom
+/// URI scheme. This is expected to be implemented and exposed via the
+/// URISchemeRegistry.
+class URIScheme {
+public:
+  virtual ~URIScheme() = default;
+
+  /// Returns the absolute path of the file corresponding to the URI
+  /// authority+body in the file system. See FileURI::resolve for semantics of
+  /// \p HintPath.
+  virtual llvm::Expected<std::string>
+  getAbsolutePath(llvm::StringRef Authority, llvm::StringRef Body,
+                  llvm::StringRef HintPath) const = 0;
+
+  virtual llvm::Expected<FileURI>
+  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const = 0;
+};
+
+/// By default, a "file" scheme is supported where URI paths are always absolute
+/// in the file system.
+typedef llvm::Registry<URIScheme> URISchemeRegistry;
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H

Modified: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt?rev=323101&r1=323100&r2=323101&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt Mon Jan 22 03:48:20 2018
@@ -18,6 +18,7 @@ add_extra_unittest(ClangdTests
   FuzzyMatchTests.cpp
   IndexTests.cpp
   JSONExprTests.cpp
+  URITests.cpp
   TestFS.cpp
   TraceTests.cpp
   SourceCodeTests.cpp

Added: clang-tools-extra/trunk/unittests/clangd/URITests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/URITests.cpp?rev=323101&view=auto
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/URITests.cpp (added)
+++ clang-tools-extra/trunk/unittests/clangd/URITests.cpp Mon Jan 22 03:48:20 2018
@@ -0,0 +1,220 @@
+//===-- URITests.cpp  ---------------------------------*- C++ -*-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestFS.h"
+#include "URI.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+using ::testing::AllOf;
+
+MATCHER_P(Scheme, S, "") { return arg.scheme() == S; }
+MATCHER_P(Authority, A, "") { return arg.authority() == A; }
+MATCHER_P(Body, B, "") { return arg.body() == B; }
+
+// Assume all files in the schema have a "test-root/" root directory, and the
+// schema path is the relative path to the root directory.
+// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z".
+class TestScheme : public URIScheme {
+public:
+  static const char *Scheme;
+
+  static const char *TestRoot;
+
+  llvm::Expected<std::string>
+  getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
+                  llvm::StringRef HintPath) const override {
+    auto Pos = HintPath.find(TestRoot);
+    assert(Pos != llvm::StringRef::npos);
+    return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body)
+        .str();
+  }
+
+  llvm::Expected<FileURI>
+  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
+    auto Pos = AbsolutePath.find(TestRoot);
+    assert(Pos != llvm::StringRef::npos);
+    return FileURI::create(
+        Scheme, /*Authority=*/"",
+        AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size()));
+  }
+};
+
+const char *TestScheme::Scheme = "test";
+const char *TestScheme::TestRoot = "/test-root/";
+
+static URISchemeRegistry::Add<TestScheme> X(TestScheme::Scheme, "Test schema");
+
+std::string createOrDie(llvm::StringRef AbsolutePath,
+                        llvm::StringRef Scheme = "file") {
+  auto Uri = FileURI::create(AbsolutePath, Scheme);
+  if (!Uri)
+    llvm_unreachable(llvm::toString(Uri.takeError()).c_str());
+  return Uri->toString();
+}
+
+std::string createOrDie(llvm::StringRef Scheme, llvm::StringRef Authority,
+                        llvm::StringRef Body) {
+  auto Uri = FileURI::create(Scheme, Authority, Body);
+  if (!Uri)
+    llvm_unreachable(llvm::toString(Uri.takeError()).c_str());
+  return Uri->toString();
+}
+
+FileURI parseOrDie(llvm::StringRef Uri) {
+  auto U = FileURI::parse(Uri);
+  if (!U)
+    llvm_unreachable(llvm::toString(U.takeError()).c_str());
+  return *U;
+}
+
+TEST(PercentEncodingTest, Encode) {
+  EXPECT_EQ(createOrDie("x", /*Authority=*/"", "a/b/c"), "x:a/b/c");
+  EXPECT_EQ(createOrDie("x", /*Authority=*/"", "a!b;c~"), "x:a%21b%3bc~");
+}
+
+TEST(PercentEncodingTest, Decode) {
+  EXPECT_EQ(parseOrDie("x:a/b/c").body(), "a/b/c");
+
+  EXPECT_EQ(parseOrDie("%3a://%3a/%3").scheme(), ":");
+  EXPECT_EQ(parseOrDie("%3a://%3a/%3").authority(), ":");
+  EXPECT_EQ(parseOrDie("%3a://%3a/%3").body(), "/%3");
+
+  EXPECT_EQ(parseOrDie("x:a%21b%3ac~").body(), "a!b:c~");
+}
+
+std::string resolveOrDie(const FileURI &U, llvm::StringRef HintPath = "") {
+  auto Path = FileURI::resolve(U, HintPath);
+  if (!Path)
+    llvm_unreachable(llvm::toString(Path.takeError()).c_str());
+  return *Path;
+}
+
+TEST(URITest, Create) {
+#ifdef LLVM_ON_WIN32
+  EXPECT_THAT(createOrDie("c:\\x\\y\\z"), "file:///c:/x/y/z");
+#else
+  EXPECT_THAT(createOrDie("/x/y/z"), "file:///x/y/z");
+  EXPECT_THAT(createOrDie("/(x)/y/\\ z"), "file:///%28x%29/y/%5c%20z");
+#endif
+}
+
+TEST(URITest, FailedCreate) {
+  auto Fail = [](llvm::Expected<FileURI> U) {
+    if (!U) {
+      llvm::consumeError(U.takeError());
+      return true;
+    }
+    return false;
+  };
+  // Create from scheme+authority+body:
+  //
+  // Scheme must be provided.
+  EXPECT_TRUE(Fail(FileURI::create("", "auth", "/a")));
+  // Body must start with '/' if authority is present.
+  EXPECT_TRUE(Fail(FileURI::create("scheme", "auth", "x/y/z")));
+
+  // Create from scheme registry:
+  //
+  EXPECT_TRUE(Fail(FileURI::create("/x/y/z", "no")));
+  // Path has to be absolute.
+  EXPECT_TRUE(Fail(FileURI::create("x/y/z")));
+}
+
+TEST(URITest, Parse) {
+  EXPECT_THAT(parseOrDie("file://auth/x/y/z"),
+              AllOf(Scheme("file"), Authority("auth"), Body("/x/y/z")));
+
+  EXPECT_THAT(parseOrDie("file://au%3dth/%28x%29/y/%5c%20z"),
+              AllOf(Scheme("file"), Authority("au=th"), Body("/(x)/y/\\ z")));
+
+  EXPECT_THAT(parseOrDie("file:///%28x%29/y/%5c%20z"),
+              AllOf(Scheme("file"), Authority(""), Body("/(x)/y/\\ z")));
+  EXPECT_THAT(parseOrDie("file:///x/y/z"),
+              AllOf(Scheme("file"), Authority(""), Body("/x/y/z")));
+  EXPECT_THAT(parseOrDie("file:"),
+              AllOf(Scheme("file"), Authority(""), Body("")));
+  EXPECT_THAT(parseOrDie("file:///x/y/z%2"),
+              AllOf(Scheme("file"), Authority(""), Body("/x/y/z%2")));
+  EXPECT_THAT(parseOrDie("http://llvm.org"),
+              AllOf(Scheme("http"), Authority("llvm.org"), Body("")));
+  EXPECT_THAT(parseOrDie("http://llvm.org/"),
+              AllOf(Scheme("http"), Authority("llvm.org"), Body("/")));
+  EXPECT_THAT(parseOrDie("http://llvm.org/D"),
+              AllOf(Scheme("http"), Authority("llvm.org"), Body("/D")));
+  EXPECT_THAT(parseOrDie("http:/"),
+              AllOf(Scheme("http"), Authority(""), Body("/")));
+  EXPECT_THAT(parseOrDie("urn:isbn:0451450523"),
+              AllOf(Scheme("urn"), Authority(""), Body("isbn:0451450523")));
+  EXPECT_THAT(
+      parseOrDie("file:///c:/windows/system32/"),
+      AllOf(Scheme("file"), Authority(""), Body("/c:/windows/system32/")));
+}
+
+TEST(URITest, ParseFailed) {
+  auto FailedParse = [](llvm::StringRef U) {
+    auto URI = FileURI::parse(U);
+    if (!URI) {
+      llvm::consumeError(URI.takeError());
+      return true;
+    }
+    return false;
+  };
+
+  // Expect ':' in URI.
+  EXPECT_TRUE(FailedParse("file//x/y/z"));
+  // Empty.
+  EXPECT_TRUE(FailedParse(""));
+  EXPECT_TRUE(FailedParse(":/a/b/c"));
+}
+
+TEST(URITest, Resolve) {
+#ifdef LLVM_ON_WIN32
+  EXPECT_THAT(resolveOrDie(parseOrDie("file:///c:/x/y/z")), "c:\\x\\y\\z");
+#else
+  EXPECT_EQ(resolveOrDie(parseOrDie("file:/a/b/c")), "/a/b/c");
+  EXPECT_EQ(resolveOrDie(parseOrDie("file://auth/a/b/c")), "/a/b/c");
+  EXPECT_EQ(resolveOrDie(parseOrDie("test:a/b/c"), "/dir/test-root/x/y/z"),
+            "/dir/test-root/a/b/c");
+  EXPECT_THAT(resolveOrDie(parseOrDie("file://au%3dth/%28x%29/y/%20z")),
+              "/(x)/y/ z");
+  EXPECT_THAT(resolveOrDie(parseOrDie("file:///c:/x/y/z")), "c:/x/y/z");
+#endif
+}
+
+TEST(URITest, Platform) {
+  auto Path = getVirtualTestFilePath("x");
+  auto U = FileURI::create(Path, "file");
+  EXPECT_TRUE(static_cast<bool>(U));
+  EXPECT_THAT(resolveOrDie(*U), Path.str());
+}
+
+TEST(URITest, ResolveFailed) {
+  auto FailedResolve = [](llvm::StringRef Uri) {
+    auto Path = FileURI::resolve(parseOrDie(Uri));
+    if (!Path) {
+      llvm::consumeError(Path.takeError());
+      return true;
+    }
+    return false;
+  };
+
+  // Invalid scheme.
+  EXPECT_TRUE(FailedResolve("no:/a/b/c"));
+  // File path needs to be absolute.
+  EXPECT_TRUE(FailedResolve("file:a/b/c"));
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang




More information about the cfe-commits mailing list