[clang-tools-extra] r364389 - [clangd] Enable extraction of system includes from custom toolchains

Kadir Cetinkaya via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 26 00:45:28 PDT 2019


Author: kadircet
Date: Wed Jun 26 00:45:27 2019
New Revision: 364389

URL: http://llvm.org/viewvc/llvm-project?rev=364389&view=rev
Log:
[clangd] Enable extraction of system includes from custom toolchains

Summary:
Some custom toolchains come with their own header files and compiler
drivers. Those compiler drivers implicitly know about include search path for
those headers. This patch aims to extract that information from drivers and add
it to the command line when invoking clang frontend.

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62804

Added:
    clang-tools-extra/trunk/clangd/QueryDriverDatabase.cpp
    clang-tools-extra/trunk/clangd/test/system-include-extractor.test
Modified:
    clang-tools-extra/trunk/clangd/CMakeLists.txt
    clang-tools-extra/trunk/clangd/ClangdLSPServer.cpp
    clang-tools-extra/trunk/clangd/ClangdServer.h
    clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.h
    clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp

Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=364389&r1=364388&r2=364389&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt Wed Jun 26 00:45:27 2019
@@ -64,6 +64,7 @@ add_clang_library(clangDaemon
   RIFF.cpp
   Selection.cpp
   SourceCode.cpp
+  QueryDriverDatabase.cpp
   Threading.cpp
   Trace.cpp
   TUScheduler.cpp

Modified: clang-tools-extra/trunk/clangd/ClangdLSPServer.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/ClangdLSPServer.cpp?rev=364389&r1=364388&r2=364389&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/ClangdLSPServer.cpp (original)
+++ clang-tools-extra/trunk/clangd/ClangdLSPServer.cpp Wed Jun 26 00:45:27 2019
@@ -9,12 +9,14 @@
 #include "ClangdLSPServer.h"
 #include "Diagnostics.h"
 #include "FormattedString.h"
+#include "GlobalCompilationDatabase.h"
 #include "Protocol.h"
 #include "SourceCode.h"
 #include "Trace.h"
 #include "URI.h"
 #include "refactor/Tweak.h"
 #include "clang/Tooling/Core/Replacement.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Support/Errc.h"
@@ -335,9 +337,13 @@ void ClangdLSPServer::onInitialize(const
                                             ErrorCode::InvalidRequest));
   if (const auto &Dir = Params.initializationOptions.compilationDatabasePath)
     CompileCommandsDir = Dir;
-  if (UseDirBasedCDB)
+  if (UseDirBasedCDB) {
     BaseCDB = llvm::make_unique<DirectoryBasedGlobalCompilationDatabase>(
         CompileCommandsDir);
+    BaseCDB = getQueryDriverDatabase(
+        llvm::makeArrayRef(ClangdServerOpts.QueryDriverGlobs),
+        std::move(BaseCDB));
+  }
   CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags,
               ClangdServerOpts.ResourceDir);
   Server.emplace(*CDB, FSProvider, static_cast<DiagnosticsConsumer &>(*this),

Modified: clang-tools-extra/trunk/clangd/ClangdServer.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/ClangdServer.h?rev=364389&r1=364388&r2=364389&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/ClangdServer.h (original)
+++ clang-tools-extra/trunk/clangd/ClangdServer.h Wed Jun 26 00:45:27 2019
@@ -127,6 +127,10 @@ public:
     /// Enable hidden features mostly useful to clangd developers.
     /// e.g. tweaks to dump the AST.
     bool HiddenFeatures = false;
+
+    /// Clangd will execute compiler drivers matching one of these globs to
+    /// fetch system include path.
+    std::vector<std::string> QueryDriverGlobs;
   };
   // Sensible default options for use in tests.
   // Features like indexing must be enabled if desired.

Modified: clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.h?rev=364389&r1=364388&r2=364389&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.h (original)
+++ clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.h Wed Jun 26 00:45:27 2019
@@ -91,6 +91,13 @@ private:
   llvm::Optional<Path> CompileCommandsDir;
 };
 
+/// Extracts system include search path from drivers matching QueryDriverGlobs
+/// and adds them to the compile flags. Base may not be nullptr.
+/// Returns Base when \p QueryDriverGlobs is empty.
+std::unique_ptr<GlobalCompilationDatabase>
+getQueryDriverDatabase(llvm::ArrayRef<std::string> QueryDriverGlobs,
+                       std::unique_ptr<GlobalCompilationDatabase> Base);
+
 /// Wraps another compilation database, and supports overriding the commands
 /// using an in-memory mapping.
 class OverlayCDB : public GlobalCompilationDatabase {

Added: clang-tools-extra/trunk/clangd/QueryDriverDatabase.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/QueryDriverDatabase.cpp?rev=364389&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/QueryDriverDatabase.cpp (added)
+++ clang-tools-extra/trunk/clangd/QueryDriverDatabase.cpp Wed Jun 26 00:45:27 2019
@@ -0,0 +1,265 @@
+//===--- QueryDriverDatabase.cpp ---------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Some compiler drivers have implicit search mechanism for system headers.
+// This compilation database implementation tries to extract that information by
+// executing the driver in verbose mode. gcc-compatible drivers print something
+// like:
+// ....
+// ....
+// #include <...> search starts here:
+//  /usr/lib/gcc/x86_64-linux-gnu/7/include
+//  /usr/local/include
+//  /usr/lib/gcc/x86_64-linux-gnu/7/include-fixed
+//  /usr/include/x86_64-linux-gnu
+//  /usr/include
+// End of search list.
+// ....
+// ....
+// This component parses that output and adds each path to command line args
+// provided by Base, after prepending them with -isystem. Therefore current
+// implementation would not work with a driver that is not gcc-compatible.
+//
+// First argument of the command line received from underlying compilation
+// database is used as compiler driver path. Due to this arbitrary binary
+// execution, this mechanism is not used by default and only executes binaries
+// in the paths that are explicitly whitelisted by the user.
+
+#include "GlobalCompilationDatabase.h"
+#include "Logger.h"
+#include "Path.h"
+#include "Trace.h"
+#include "clang/Driver/Types.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+
+namespace clang {
+namespace clangd {
+namespace {
+
+std::vector<std::string> parseDriverOutput(llvm::StringRef Output) {
+  std::vector<std::string> SystemIncludes;
+  constexpr char const *SIS = "#include <...> search starts here:";
+  constexpr char const *SIE = "End of search list.";
+  llvm::SmallVector<llvm::StringRef, 8> Lines;
+  Output.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+
+  auto StartIt = std::find(Lines.begin(), Lines.end(), SIS);
+  if (StartIt == Lines.end()) {
+    elog("System include extraction: start marker not found: {0}", Output);
+    return {};
+  }
+  ++StartIt;
+  const auto EndIt = std::find(StartIt, Lines.end(), SIE);
+  if (EndIt == Lines.end()) {
+    elog("System include extraction: end marker missing: {0}", Output);
+    return {};
+  }
+
+  for (llvm::StringRef Line : llvm::make_range(StartIt, EndIt)) {
+    SystemIncludes.push_back(Line.str());
+    vlog("System include extraction: adding {0}", Line);
+  }
+  return SystemIncludes;
+}
+
+std::vector<std::string> extractSystemIncludes(PathRef Driver,
+                                               llvm::StringRef Ext,
+                                               llvm::Regex &QueryDriverRegex) {
+  trace::Span Tracer("Extract system includes");
+  SPAN_ATTACH(Tracer, "driver", Driver);
+  SPAN_ATTACH(Tracer, "ext", Ext);
+
+  if (!QueryDriverRegex.match(Driver)) {
+    vlog("System include extraction: not whitelisted driver {0}", Driver);
+    return {};
+  }
+
+  if (!llvm::sys::fs::exists(Driver)) {
+    elog("System include extraction: {0} does not exist.", Driver);
+    return {};
+  }
+  if (!llvm::sys::fs::can_execute(Driver)) {
+    elog("System include extraction: {0} is not executable.", Driver);
+    return {};
+  }
+
+  llvm::SmallString<128> OutputPath;
+  auto EC = llvm::sys::fs::createTemporaryFile("system-includes", "clangd",
+                                               OutputPath);
+  if (EC) {
+    elog("System include extraction: failed to create temporary file with "
+         "error {0}",
+         EC.message());
+    return {};
+  }
+  auto CleanUp = llvm::make_scope_exit(
+      [&OutputPath]() { llvm::sys::fs::remove(OutputPath); });
+
+  llvm::Optional<llvm::StringRef> Redirects[] = {
+      {""}, llvm::StringRef(OutputPath), {""}};
+
+  auto Type = driver::types::lookupTypeForExtension(Ext);
+  if (Type == driver::types::TY_INVALID) {
+    elog("System include extraction: invalid file type for {0}", Ext);
+    return {};
+  }
+  // Should we also preserve flags like "-sysroot", "-nostdinc" ?
+  const llvm::StringRef Args[] = {"-E", "-x", driver::types::getTypeName(Type),
+                                  "-", "-v"};
+
+  int RC =
+      llvm::sys::ExecuteAndWait(Driver, Args, /*Env=*/llvm::None, Redirects);
+  if (RC) {
+    elog("System include extraction: driver execution failed with return code: "
+         "{0}",
+         llvm::to_string(RC));
+    return {};
+  }
+
+  auto BufOrError = llvm::MemoryBuffer::getFile(OutputPath);
+  if (!BufOrError) {
+    elog("System include extraction: failed to read {0} with error {1}",
+         OutputPath, BufOrError.getError().message());
+    return {};
+  }
+
+  auto Includes = parseDriverOutput(BufOrError->get()->getBuffer());
+  log("System include extractor: succesfully executed {0}, got includes: "
+      "\"{1}\"",
+      Driver, llvm::join(Includes, ", "));
+  return Includes;
+}
+
+tooling::CompileCommand &
+addSystemIncludes(tooling::CompileCommand &Cmd,
+                  llvm::ArrayRef<std::string> SystemIncludes) {
+  for (llvm::StringRef Include : SystemIncludes) {
+    // FIXME(kadircet): This doesn't work when we have "--driver-mode=cl"
+    Cmd.CommandLine.push_back("-isystem");
+    Cmd.CommandLine.push_back(Include.str());
+  }
+  return Cmd;
+}
+
+/// Converts a glob containing only ** or * into a regex.
+std::string convertGlobToRegex(llvm::StringRef Glob) {
+  std::string RegText;
+  llvm::raw_string_ostream RegStream(RegText);
+  RegStream << '^';
+  for (size_t I = 0, E = Glob.size(); I < E; ++I) {
+    if (Glob[I] == '*') {
+      if (I + 1 < E && Glob[I + 1] == '*') {
+        // Double star, accept any sequence.
+        RegStream << ".*";
+        // Also skip the second star.
+        ++I;
+      } else {
+        // Single star, accept any sequence without a slash.
+        RegStream << "[^/]*";
+      }
+    } else {
+      RegStream << llvm::Regex::escape(Glob.substr(I, 1));
+    }
+  }
+  RegStream << '$';
+  RegStream.flush();
+  return RegText;
+}
+
+/// Converts a glob containing only ** or * into a regex.
+llvm::Regex convertGlobsToRegex(llvm::ArrayRef<std::string> Globs) {
+  assert(!Globs.empty() && "Globs cannot be empty!");
+  std::vector<std::string> RegTexts;
+  RegTexts.reserve(Globs.size());
+  for (llvm::StringRef Glob : Globs)
+    RegTexts.push_back(convertGlobToRegex(Glob));
+
+  llvm::Regex Reg(llvm::join(RegTexts, "|"));
+  assert(Reg.isValid(RegTexts.front()) &&
+         "Created an invalid regex from globs");
+  return Reg;
+}
+
+/// Extracts system includes from a trusted driver by parsing the output of
+/// include search path and appends them to the commands coming from underlying
+/// compilation database.
+class QueryDriverDatabase : public GlobalCompilationDatabase {
+public:
+  QueryDriverDatabase(llvm::ArrayRef<std::string> QueryDriverGlobs,
+                      std::unique_ptr<GlobalCompilationDatabase> Base)
+      : QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs)),
+        Base(std::move(Base)) {
+    assert(this->Base);
+    BaseChanged =
+        this->Base->watch([this](const std::vector<std::string> &Changes) {
+          OnCommandChanged.broadcast(Changes);
+        });
+  }
+
+  llvm::Optional<tooling::CompileCommand>
+  getCompileCommand(PathRef File, ProjectInfo *PI = nullptr) const override {
+    auto Cmd = Base->getCompileCommand(File, PI);
+    if (!Cmd || Cmd->CommandLine.empty())
+      return Cmd;
+
+    llvm::SmallString<128> Driver(Cmd->CommandLine.front());
+    llvm::sys::fs::make_absolute(Cmd->Directory, Driver);
+
+    llvm::ArrayRef<std::string> SystemIncludes;
+    {
+      std::lock_guard<std::mutex> Lock(Mu);
+
+      llvm::StringRef Ext = llvm::sys::path::extension(File).trim('.');
+      auto It = DriverToIncludesCache.try_emplace({Driver, Ext});
+      if (It.second)
+        It.first->second = extractSystemIncludes(Driver, Ext, QueryDriverRegex);
+      SystemIncludes = It.first->second;
+    }
+
+    return addSystemIncludes(*Cmd, SystemIncludes);
+  }
+
+private:
+  mutable std::mutex Mu;
+  // Caches includes extracted from a driver.
+  mutable llvm::DenseMap<std::pair<StringRef, StringRef>,
+                         std::vector<std::string>>
+      DriverToIncludesCache;
+  mutable llvm::Regex QueryDriverRegex;
+
+  std::unique_ptr<GlobalCompilationDatabase> Base;
+  CommandChanged::Subscription BaseChanged;
+};
+} // namespace
+
+std::unique_ptr<GlobalCompilationDatabase>
+getQueryDriverDatabase(llvm::ArrayRef<std::string> QueryDriverGlobs,
+                       std::unique_ptr<GlobalCompilationDatabase> Base) {
+  assert(Base && "Null base to SystemIncludeExtractor");
+  if (QueryDriverGlobs.empty())
+    return Base;
+  return llvm::make_unique<QueryDriverDatabase>(QueryDriverGlobs,
+                                                std::move(Base));
+}
+
+} // namespace clangd
+} // namespace clang

Added: clang-tools-extra/trunk/clangd/test/system-include-extractor.test
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/test/system-include-extractor.test?rev=364389&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/test/system-include-extractor.test (added)
+++ clang-tools-extra/trunk/clangd/test/system-include-extractor.test Wed Jun 26 00:45:27 2019
@@ -0,0 +1,50 @@
+# RUN: rm -rf %t.dir && mkdir -p %t.dir
+
+# Generate a mock-driver that will print %temp_dir%/my/dir and
+# %temp_dir%/my/dir2 as include search paths.
+# RUN: echo '#!/bin/bash' >> %t.dir/my_driver.sh
+# RUN: echo 'echo line to ignore' >> %t.dir/my_driver.sh
+# RUN: echo 'echo \#include \<...\> search starts here:' >> %t.dir/my_driver.sh
+# RUN: echo 'echo %t.dir/my/dir/' >> %t.dir/my_driver.sh
+# RUN: echo 'echo %t.dir/my/dir2/' >> %t.dir/my_driver.sh
+# RUN: echo 'echo End of search list.' >> %t.dir/my_driver.sh
+# RUN: chmod +x %t.dir/my_driver.sh
+
+# Create header files my/dir/a.h and my/dir2/b.h
+# RUN: mkdir -p %t.dir/my/dir
+# RUN: touch %t.dir/my/dir/a.h
+# RUN: mkdir -p %t.dir/my/dir2
+# RUN: touch %t.dir/my/dir2/b.h
+
+# Generate a compile_commands.json that will query the mock driver we've
+# created. Which should add a.h and b.h into include search path.
+# RUN: echo '[{"directory": "%/t.dir", "command": "%/t.dir/my_driver.sh the-file.cpp", "file": "the-file.cpp"}]' > %t.dir/compile_commands.json
+
+# RUN: sed -e "s|INPUT_DIR|%/t.dir|g" %s > %t.test.1
+# On Windows, we need the URI in didOpen to look like "uri":"file:///C:/..."
+# (with the extra slash in the front), so we add it here.
+# RUN: sed -e "s|file://\([A-Z]\):/|file:///\1:/|g" %t.test.1 > %t.test
+
+# Bless the mock driver we've just created so that clangd can execute it.
+# RUN: clangd -lit-test -query-driver="**.test,**.sh" < %t.test | FileCheck -strict-whitespace %t.test
+{"jsonrpc":"2.0","id":0,"method":"initialize","params":{}}
+---
+{
+  "jsonrpc":"2.0",
+  "method":"textDocument/didOpen",
+  "params": {
+    "textDocument": {
+      "uri": "file://INPUT_DIR/the-file.cpp",
+      "languageId":"cpp",
+      "version":1,
+      "text":"#include <a.h>\n#include <b.h>"
+    }
+  }
+}
+# CHECK:   "method": "textDocument/publishDiagnostics",
+# CHECK-NEXT:   "params": {
+# CHECK-NEXT:     "diagnostics": [],
+---
+{"jsonrpc":"2.0","id":10000,"method":"shutdown"}
+---
+{"jsonrpc":"2.0","method":"exit"}

Modified: clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp?rev=364389&r1=364388&r2=364389&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp (original)
+++ clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp Wed Jun 26 00:45:27 2019
@@ -269,6 +269,15 @@ static llvm::cl::opt<bool> HiddenFeature
     llvm::cl::desc("Enable hidden features mostly useful to clangd developers"),
     llvm::cl::init(false), llvm::cl::Hidden);
 
+static llvm::cl::list<std::string> QueryDriverGlobs(
+    "query-driver",
+    llvm::cl::desc(
+        "Comma separated list of globs for white-listing gcc-compatible "
+        "drivers that are safe to execute. Drivers matching any of these globs "
+        "will be used to extract system includes. e.g. "
+        "/usr/bin/**/clang-*,/path/to/repo/**/g++-*"),
+    llvm::cl::CommaSeparated);
+
 namespace {
 
 /// \brief Supports a test URI scheme with relaxed constraints for lit tests.
@@ -523,6 +532,7 @@ int main(int argc, char *argv[]) {
     };
   }
   Opts.SuggestMissingIncludes = SuggestMissingIncludes;
+  Opts.QueryDriverGlobs = std::move(QueryDriverGlobs);
   llvm::Optional<OffsetEncoding> OffsetEncodingFromFlag;
   if (ForceOffsetEncoding != OffsetEncoding::UnsupportedEncoding)
     OffsetEncodingFromFlag = ForceOffsetEncoding;




More information about the cfe-commits mailing list