[clang-tools-extra] f099f2f - [clangd] Use all inputs to SystemIncludeExtractor in cache key

Kadir Cetinkaya via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 17 07:57:57 PDT 2023


Author: Kadir Cetinkaya
Date: 2023-04-17T16:57:33+02:00
New Revision: f099f2fefbab0592c828573d816b46a475076f49

URL: https://github.com/llvm/llvm-project/commit/f099f2fefbab0592c828573d816b46a475076f49
DIFF: https://github.com/llvm/llvm-project/commit/f099f2fefbab0592c828573d816b46a475076f49.diff

LOG: [clangd] Use all inputs to SystemIncludeExtractor in cache key

Instead of passing in a tooling::CompileCommand into system include
extraction, pass a limited set, whose elements are used as keys.

Also fix the issue around accepting `-isysroot=/foo` which isn't a valid
argument (or the directory should be `=/foo` not `/foo`).

Fixes https://github.com/clangd/clangd/issues/1404
Fixes https://github.com/clangd/clangd/issues/1403

This should also unblock https://reviews.llvm.org/D138546

Differential Revision: https://reviews.llvm.org/D146941

Added: 
    

Modified: 
    clang-tools-extra/clangd/SystemIncludeExtractor.cpp
    clang-tools-extra/clangd/test/system-include-extractor.test

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/SystemIncludeExtractor.cpp b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp
index c334697e1e238..4d359b0058280 100644
--- a/clang-tools-extra/clangd/SystemIncludeExtractor.cpp
+++ b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp
@@ -32,32 +32,44 @@
 #include "CompileCommands.h"
 #include "GlobalCompilationDatabase.h"
 #include "support/Logger.h"
-#include "support/Path.h"
+#include "support/Threading.h"
 #include "support/Trace.h"
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticIDs.h"
+#include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
 #include "clang/Driver/Types.h"
 #include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/ScopedPrinter.h"
-#include <algorithm>
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstddef>
 #include <iterator>
-#include <map>
+#include <memory>
 #include <optional>
 #include <string>
+#include <tuple>
+#include <utility>
 #include <vector>
 
-namespace clang {
-namespace clangd {
+namespace clang::clangd {
 namespace {
 
 struct DriverInfo {
@@ -65,12 +77,138 @@ struct DriverInfo {
   std::string Target;
 };
 
+struct DriverArgs {
+  // Name of the driver program to execute or absolute path to it.
+  std::string Driver;
+  // Whether certain includes should be part of query.
+  bool StandardIncludes = true;
+  bool StandardCXXIncludes = true;
+  bool BuiltinIncludes = true;
+  // Language to use while querying.
+  std::string Lang;
+  std::string Sysroot;
+  std::string ISysroot;
+
+  bool operator==(const DriverArgs &RHS) const {
+    return std::tie(Driver, StandardIncludes, StandardCXXIncludes,
+                    BuiltinIncludes, Lang, Sysroot, ISysroot) ==
+           std::tie(RHS.Driver, RHS.StandardIncludes, RHS.StandardCXXIncludes,
+                    RHS.BuiltinIncludes, RHS.Lang, RHS.Sysroot, ISysroot);
+  }
+
+  DriverArgs(const tooling::CompileCommand &Cmd, llvm::StringRef File) {
+    llvm::SmallString<128> Driver(Cmd.CommandLine.front());
+    // Driver is a not a single executable name but instead a path (either
+    // relative or absolute).
+    if (llvm::any_of(Driver,
+                     [](char C) { return llvm::sys::path::is_separator(C); })) {
+      llvm::sys::fs::make_absolute(Cmd.Directory, Driver);
+    }
+    this->Driver = Driver.str().str();
+    for (size_t I = 0, E = Cmd.CommandLine.size(); I < E; ++I) {
+      llvm::StringRef Arg = Cmd.CommandLine[I];
+
+      // Look for Language related flags.
+      if (Arg.consume_front("-x")) {
+        if (Arg.empty() && I + 1 < E)
+          Lang = Cmd.CommandLine[I + 1];
+        else
+          Lang = Arg.str();
+      }
+      // Look for standard/builtin includes.
+      else if (Arg == "-nostdinc" || Arg == "--no-standard-includes")
+        StandardIncludes = false;
+      else if (Arg == "-nostdinc++")
+        StandardCXXIncludes = false;
+      else if (Arg == "-nobuiltininc")
+        BuiltinIncludes = false;
+      // Figure out sysroot
+      else if (Arg.consume_front("--sysroot")) {
+        if (Arg.consume_front("="))
+          Sysroot = Arg.str();
+        else if (Arg.empty() && I + 1 < E)
+          Sysroot = Cmd.CommandLine[I + 1];
+      } else if (Arg.consume_front("-isysroot")) {
+        if (Arg.empty() && I + 1 < E)
+          ISysroot = Cmd.CommandLine[I + 1];
+        else
+          ISysroot = Arg.str();
+      }
+    }
+
+    // If language is not explicit in the flags, infer from the file.
+    // This is important as we want to cache each language separately.
+    if (Lang.empty()) {
+      llvm::StringRef Ext = llvm::sys::path::extension(File).trim('.');
+      auto Type = driver::types::lookupTypeForExtension(Ext);
+      if (Type == driver::types::TY_INVALID) {
+        elog("System include extraction: invalid file type for {0}", Ext);
+      } else {
+        Lang = driver::types::getTypeName(Type);
+      }
+    }
+  }
+  llvm::SmallVector<llvm::StringRef> render() const {
+    // FIXME: Don't treat lang specially?
+    assert(!Lang.empty());
+    llvm::SmallVector<llvm::StringRef> Args = {"-x", Lang};
+    if (!StandardIncludes)
+      Args.push_back("-nostdinc");
+    if (!StandardCXXIncludes)
+      Args.push_back("-nostdinc++");
+    if (!BuiltinIncludes)
+      Args.push_back("-nobuiltininc++");
+    if (!Sysroot.empty())
+      Args.append({"--sysroot", Sysroot});
+    if (!ISysroot.empty())
+      Args.append({"-isysroot", ISysroot});
+    return Args;
+  }
+
+  static DriverArgs getEmpty() { return {}; }
+
+private:
+  DriverArgs() = default;
+};
+} // namespace
+} // namespace clang::clangd
+namespace llvm {
+using DriverArgs = clang::clangd::DriverArgs;
+template <> struct DenseMapInfo<DriverArgs> {
+  static DriverArgs getEmptyKey() {
+    auto Driver = DriverArgs::getEmpty();
+    Driver.Driver = "EMPTY_KEY";
+    return Driver;
+  }
+  static DriverArgs getTombstoneKey() {
+    auto Driver = DriverArgs::getEmpty();
+    Driver.Driver = "TOMBSTONE_KEY";
+    return Driver;
+  }
+  static unsigned getHashValue(const DriverArgs &Val) {
+    return llvm::hash_value(std::tuple{
+        Val.Driver,
+        Val.StandardIncludes,
+        Val.StandardCXXIncludes,
+        Val.BuiltinIncludes,
+        Val.Lang,
+        Val.Sysroot,
+        Val.ISysroot,
+    });
+  }
+  static bool isEqual(const DriverArgs &LHS, const DriverArgs &RHS) {
+    return LHS == RHS;
+  }
+};
+} // namespace llvm
+namespace clang::clangd {
+namespace {
 bool isValidTarget(llvm::StringRef Triple) {
   std::shared_ptr<TargetOptions> TargetOpts(new TargetOptions);
   TargetOpts->Triple = Triple.str();
   DiagnosticsEngine Diags(new DiagnosticIDs, new DiagnosticOptions,
                           new IgnoringDiagConsumer);
-  IntrusiveRefCntPtr<TargetInfo> Target =
+  llvm::IntrusiveRefCntPtr<TargetInfo> Target =
       TargetInfo::CreateTargetInfo(Diags, TargetOpts);
   return bool(Target);
 }
@@ -137,15 +275,12 @@ std::optional<DriverInfo> parseDriverOutput(llvm::StringRef Output) {
 }
 
 std::optional<DriverInfo>
-extractSystemIncludesAndTarget(llvm::SmallString<128> Driver,
-                               llvm::StringRef Lang,
-                               llvm::ArrayRef<std::string> CommandLine,
+extractSystemIncludesAndTarget(const DriverArgs &InputArgs,
                                const llvm::Regex &QueryDriverRegex) {
   trace::Span Tracer("Extract system includes and target");
 
+  std::string Driver = InputArgs.Driver;
   if (!llvm::sys::path::is_absolute(Driver)) {
-    assert(llvm::none_of(
-        Driver, [](char C) { return llvm::sys::path::is_separator(C); }));
     auto DriverProgram = llvm::sys::findProgramByName(Driver);
     if (DriverProgram) {
       vlog("System include extraction: driver {0} expanded to {1}", Driver,
@@ -158,7 +293,7 @@ extractSystemIncludesAndTarget(llvm::SmallString<128> Driver,
   }
 
   SPAN_ATTACH(Tracer, "driver", Driver);
-  SPAN_ATTACH(Tracer, "lang", Lang);
+  SPAN_ATTACH(Tracer, "lang", InputArgs.Lang);
 
   if (!QueryDriverRegex.match(Driver)) {
     vlog("System include extraction: not allowed driver {0}", Driver);
@@ -178,36 +313,10 @@ extractSystemIncludesAndTarget(llvm::SmallString<128> Driver,
 
   std::optional<llvm::StringRef> Redirects[] = {{""}, {""}, StdErrPath.str()};
 
-  llvm::SmallVector<llvm::StringRef> Args = {Driver, "-E", "-x",
-                                             Lang,   "-",  "-v"};
-
-  // These flags will be preserved
-  const llvm::StringRef FlagsToPreserve[] = {
-      "-nostdinc", "--no-standard-includes", "-nostdinc++", "-nobuiltininc"};
-  // Preserves these flags and their values, either as separate args or with an
-  // equalsbetween them
-  const llvm::StringRef ArgsToPreserve[] = {"--sysroot", "-isysroot"};
-
-  for (size_t I = 0, E = CommandLine.size(); I < E; ++I) {
-    llvm::StringRef Arg = CommandLine[I];
-    if (llvm::is_contained(FlagsToPreserve, Arg)) {
-      Args.push_back(Arg);
-    } else {
-      const auto *Found =
-          llvm::find_if(ArgsToPreserve, [&Arg](llvm::StringRef S) {
-            return Arg.startswith(S);
-          });
-      if (Found == std::end(ArgsToPreserve))
-        continue;
-      Arg = Arg.drop_front(Found->size());
-      if (Arg.empty() && I + 1 < E) {
-        Args.push_back(CommandLine[I]);
-        Args.push_back(CommandLine[++I]);
-      } else if (Arg.startswith("=")) {
-        Args.push_back(CommandLine[I]);
-      }
-    }
-  }
+  llvm::SmallVector<llvm::StringRef> Args = {Driver, "-E", "-v"};
+  Args.append(InputArgs.render());
+  // Input needs to go after Lang flags.
+  Args.push_back("-");
 
   std::string ErrMsg;
   if (int RC = llvm::sys::ExecuteAndWait(Driver, Args, /*Env=*/std::nullopt,
@@ -325,43 +434,19 @@ class SystemIncludeExtractor {
     if (Cmd.CommandLine.empty())
       return;
 
-    llvm::StringRef Lang;
-    for (size_t I = 0, E = Cmd.CommandLine.size(); I < E; ++I) {
-      llvm::StringRef Arg = Cmd.CommandLine[I];
-      if (Arg == "-x" && I + 1 < E)
-        Lang = Cmd.CommandLine[I + 1];
-      else if (Arg.startswith("-x"))
-        Lang = Arg.drop_front(2).trim();
-    }
-    if (Lang.empty()) {
-      llvm::StringRef Ext = llvm::sys::path::extension(File).trim('.');
-      auto Type = driver::types::lookupTypeForExtension(Ext);
-      if (Type == driver::types::TY_INVALID) {
-        elog("System include extraction: invalid file type for {0}", Ext);
-        return;
-      }
-      Lang = driver::types::getTypeName(Type);
-    }
-
-    llvm::SmallString<128> Driver(Cmd.CommandLine.front());
-    if (llvm::any_of(Driver,
-                     [](char C) { return llvm::sys::path::is_separator(C); }))
-      // Driver is a not a single executable name but instead a path (either
-      // relative or absolute).
-      llvm::sys::fs::make_absolute(Cmd.Directory, Driver);
-
-    if (auto Info =
-            QueriedDrivers.get(/*Key=*/(Driver + ":" + Lang).str(), [&] {
-              return extractSystemIncludesAndTarget(
-                  Driver, Lang, Cmd.CommandLine, QueryDriverRegex);
-            })) {
+    DriverArgs Args(Cmd, File);
+    if (Args.Lang.empty())
+      return;
+    if (auto Info = QueriedDrivers.get(Args, [&] {
+          return extractSystemIncludesAndTarget(Args, QueryDriverRegex);
+        })) {
       setTarget(addSystemIncludes(Cmd, Info->SystemIncludes), Info->Target);
     }
   }
 
 private:
   // Caches includes extracted from a driver. Key is driver:lang.
-  Memoize<llvm::StringMap<std::optional<DriverInfo>>> QueriedDrivers;
+  Memoize<llvm::DenseMap<DriverArgs, std::optional<DriverInfo>>> QueriedDrivers;
   llvm::Regex QueryDriverRegex;
 };
 } // namespace
@@ -373,5 +458,4 @@ getSystemIncludeExtractor(llvm::ArrayRef<std::string> QueryDriverGlobs) {
   return SystemIncludeExtractor(QueryDriverGlobs);
 }
 
-} // namespace clangd
-} // namespace clang
+} // namespace clang::clangd

diff  --git a/clang-tools-extra/clangd/test/system-include-extractor.test b/clang-tools-extra/clangd/test/system-include-extractor.test
index 49bb8a6101338..3f3a462bf844a 100644
--- a/clang-tools-extra/clangd/test/system-include-extractor.test
+++ b/clang-tools-extra/clangd/test/system-include-extractor.test
@@ -14,8 +14,8 @@
 # Check that clangd preserves certain flags like `-nostdinc` from
 # original invocation in compile_commands.json.
 # RUN: echo '[ -z "${args##*"-nostdinc"*}" ] || exit' >> %t.dir/bin/my_driver.sh
-# RUN: echo '[ -z "${args##*"-isysroot=/isysroot"*}" ] || exit' >> %t.dir/bin/my_driver.sh
 # RUN: echo '[ -z "${args##*"--sysroot /my/sysroot/path"*}" ] || exit' >> %t.dir/bin/my_driver.sh
+# RUN: echo '[ -z "${args##*"-isysroot /isysroot"*}" ] || exit' >> %t.dir/bin/my_driver.sh
 # RUN: echo 'echo line to ignore >&2' >> %t.dir/bin/my_driver.sh
 # RUN: echo 'printf "Target: arm-linux-gnueabihf\r\n" >&2' >> %t.dir/bin/my_driver.sh
 # RUN: echo 'printf "#include <...> search starts here:\r\n" >&2' >> %t.dir/bin/my_driver.sh
@@ -32,7 +32,7 @@
 
 # Generate a compile_commands.json that will query the mock driver we've
 # created. Which should add a.h and b.h into include search path.
-# RUN: echo '[{"directory": "%/t.dir", "command": "my_driver.sh the-file.cpp -nostdinc --sysroot /my/sysroot/path -isysroot=/isysroot", "file": "the-file.cpp"}]' > %t.dir/compile_commands.json
+# RUN: echo '[{"directory": "%/t.dir", "command": "my_driver.sh the-file.cpp -nostdinc --sysroot /my/sysroot/path -isysroot/isysroot", "file": "the-file.cpp"}]' > %t.dir/compile_commands.json
 
 # RUN: sed -e "s|INPUT_DIR|%/t.dir|g" %s > %t.test.1
 # On Windows, we need the URI in didOpen to look like "uri":"file:///C:/..."
@@ -70,7 +70,7 @@
 {"jsonrpc":"2.0","method":"exit"}
 
 # Generate a 
diff erent compile_commands.json which does not point to the mock driver
-# RUN: echo '[{"directory": "%/t.dir", "command": "gcc the-file.cpp -nostdinc --sysroot /my/sysroot/path -isysroot=/isysroot", "file": "the-file.cpp"}]' > %t.dir/compile_commands.json
+# RUN: echo '[{"directory": "%/t.dir", "command": "gcc the-file.cpp -nostdinc --sysroot /my/sysroot/path -isysroot/isysroot", "file": "the-file.cpp"}]' > %t.dir/compile_commands.json
 
 # Generate a clangd config file which points to the mock driver instead
 # RUN: echo 'CompileFlags:' > %t.dir/.clangd


        


More information about the cfe-commits mailing list