[clang-tools-extra] [include-cleaner] Handle symbols from system headers. (PR #66089)

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 12 07:07:36 PDT 2023


https://github.com/hokein created https://github.com/llvm/llvm-project/pull/66089:

This is an initial version -- we add a file-path-based mapping to handle sysmbols from system headers.

The mapping list is not completed (I just added the most frequent problematic system headers).

I'm looking for initial feedback. In the long run, we might replace the clangd's CanonicalInclude with this one.

>From 83c89f01e8ed59dc77827052505af458e3d7719b Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu at gmail.com>
Date: Tue, 12 Sep 2023 15:50:48 +0200
Subject: [PATCH] [include-cleaner] Handle symbols from system headers.

This is the initial version -- we add a file-path-based mapping to handle
sysmbols from system headers.

The mapping list is not completed (I just added the most frequent
problematic system headers).

I'm looking for initial feedback. In the long run, we might replace the
clangd's CanonicalInclude with this one.
---
 .../include-cleaner/lib/FindHeaders.cpp       | 114 ++++++++++++++++++
 .../unittests/FindHeadersTest.cpp             |  32 +++++
 2 files changed, 146 insertions(+)

diff --git a/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp b/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp
index 06e5e1812ba7218..fbb0c4009f7e8a1 100644
--- a/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp
+++ b/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp
@@ -24,6 +24,10 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Path.h"
+#include <cassert>
+#include <cstddef>
+#include <map>
 #include <optional>
 #include <queue>
 #include <set>
@@ -176,6 +180,104 @@ headersForSpecialSymbol(const Symbol &S, const SourceManager &SM,
   return std::nullopt;
 }
 
+// A hand-mainained list of include mappings for system headers.
+// The first element is the suffix of the physical file path for the system
+// header, the second element is the final verbatim header spelling.
+const std::pair<llvm::StringRef, llvm::StringRef> IncludeMappings[] = {
+    {"bits/types/struct_timeval.h", "<sys/time.h>"},
+    {"bits/pthreadtypes.h", "<pthread.h>"},
+    {"bits/types/struct_iovec.h", "<sys/uio.h>"},
+    {"linux/limits.h", "<limits.h>"},
+    {"bits/getopt_core.h", "<getopt.h>"},
+    {"asm-generic/socket.h", "<sys/socket.h>"},
+    {"bits/posix1_lim.h", "<limits.h>"},
+    {"bits/time.h", "time.h"},
+    {"bits/getopt_ext.h", "<getopt.h>"},
+    {"bits/types/sigset_t.h", "<signal.h>"},
+    {"bits/types/siginfo_t.h", "<signal.h>"},
+    {"bits/types/FILE.h", "<stdio.h>"},
+    {"asm/unistd_64.h", "<asm/unistd.h>"},
+    {"bits/stat.h", "<sys/stat.h>"},
+    {"asm-generic/ioctls.h", "<sys/ioctls.h>"},
+    {"asm-generic/errno.h", "<errno.h>"},
+    {"asm-generic/int-ll64.h", "<asm/types.h>"},
+    {"bits/sigaction.h", "<signal.h>"},
+    {"bits/types/struct_rusage.h", "<sys/resource.h>"},
+    {"sys/syscall.h", "<syscall.h>"},
+    {"linux/prctl.h", "<sys/prctl.h>"},
+    {"sys/ucontext.h", "<ucontext.h>"},
+    {"bits/types/clockid_t.h", "<time.h>"},
+    {"bits/types/mbstate_t.h", "<uchar.h>"},
+    {"bits/types/mbstate_t.h", "<wchar.h>"},
+    {"bits/types/struct_itimerspec.h", "<time.h>"},
+    {"bits/types/time_t.h", "<time.h>"},
+    {"bits/sockaddr.h", "<time.h>"},
+};
+// The maximum number of path components in a key from SuffixHeaderMapping.
+// Used to minimize the number of lookups in suffix path mappings.
+constexpr int MaxSuffixComponents = 3;
+
+// A mapping for system headers based on a set of filename rules.
+class SystemIncludeMap {
+public:
+  static const SystemIncludeMap &get() {
+    static SystemIncludeMap Instance;
+    static const auto *SystemHeaderMap = [] {
+      const size_t Size = sizeof(IncludeMappings) / sizeof(IncludeMappings[0]);
+      auto *HeaderMap = new std::multimap<llvm::StringRef, llvm::StringRef>();
+      for (size_t I = 0; I < Size; I++) {
+        HeaderMap->insert(IncludeMappings[I]);
+      }
+      return HeaderMap;
+    }();
+
+    // // Check MaxSuffixComponents constant is correct.
+    assert(llvm::all_of(*SystemHeaderMap, [](const auto &KeyAndValue) {
+      return std::distance(
+                 llvm::sys::path::begin(KeyAndValue.first,
+                                        llvm::sys::path::Style::posix),
+                 llvm::sys::path::end(KeyAndValue.first)) <=
+             MaxSuffixComponents;
+    }));
+    // ... and precise.
+    assert(llvm::any_of(*SystemHeaderMap, [](const auto &KeyAndValue) {
+      return std::distance(
+                 llvm::sys::path::begin(KeyAndValue.first,
+                                        llvm::sys::path::Style::posix),
+                 llvm::sys::path::end(KeyAndValue.first)) ==
+             MaxSuffixComponents;
+    }));
+
+    Instance.SuffixHeaderMapping = SystemHeaderMap;
+    return Instance;
+  }
+
+  // Returns the overridden verbatim spellings for HeaderPath that can be
+  // directly included.
+  llvm::SmallVector<llvm::StringRef>
+  mapHeader(llvm::StringRef HeaderPath) const {
+    if (!SuffixHeaderMapping)
+      return {};
+
+    int Components = 1;
+    llvm::SmallVector<llvm::StringRef> Headers;
+    for (auto It = llvm::sys::path::rbegin(HeaderPath),
+              End = llvm::sys::path::rend(HeaderPath);
+         It != End && Components <= MaxSuffixComponents; ++It, ++Components) {
+      auto SubPath = HeaderPath.substr(It->data() - HeaderPath.begin());
+      auto FoundRange = SuffixHeaderMapping->equal_range(SubPath);
+      for (auto It = FoundRange.first; It != FoundRange.second; ++It)
+        Headers.push_back(It->second);
+    }
+    return Headers;;
+  }
+
+private:
+  // A map from a suffix path to a verbatim header spelling (with <>).
+  const std::multimap<llvm::StringRef, llvm::StringRef>
+      *SuffixHeaderMapping = nullptr;
+};
+
 } // namespace
 
 llvm::SmallVector<Hinted<Header>> findHeaders(const SymbolLocation &Loc,
@@ -188,6 +290,18 @@ llvm::SmallVector<Hinted<Header>> findHeaders(const SymbolLocation &Loc,
     OptionalFileEntryRef FE = SM.getFileEntryRefForID(FID);
     if (!FE)
       return {};
+    
+    if (auto MappingHeader =
+            SystemIncludeMap::get().mapHeader(FE->getName());
+        !MappingHeader.empty()) {
+      for (auto &Header : MappingHeader)
+        Results.emplace_back(Header, Hints::PublicHeader);
+      assert(!Results.empty());
+      Results.front().Hint |= Hints::PreferredHeader;
+      // FIXME: should we include the original header as well?
+      return Results;
+    }
+
     if (!PI)
       return {{*FE, Hints::PublicHeader | Hints::OriginHeader}};
     bool IsOrigin = true;
diff --git a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp
index 4cdcde1184a0a9e..15ff04e8e00bfcd 100644
--- a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp
@@ -597,5 +597,37 @@ TEST_F(HeadersForSymbolTest, StandardHeaders) {
                            tooling::stdlib::Header::named("<assert.h>")));
 }
 
+TEST_F(HeadersForSymbolTest, SystemHeadersMapping) {
+  Inputs.Code = R"cpp(
+    #include "bits/types/struct_timeval.h"
+
+    timeval t;
+  )cpp";
+  Inputs.ExtraFiles["bits/types/struct_timeval.h"] = guard(R"cpp(
+    struct timeval {};
+    )cpp");
+  buildAST();
+  EXPECT_THAT(
+      headersFor("timeval"),
+      // Respect the ordering from the stdlib mapping.
+      UnorderedElementsAre(Header("<sys/time.h>")));
+}
+
+TEST_F(HeadersForSymbolTest, SystemHeadersMappingMultiple) {
+  Inputs.Code = R"cpp(
+    #include "bits/types/mbstate_t.h"
+
+    mbstate_t t;
+  )cpp";
+  Inputs.ExtraFiles["bits/types/mbstate_t.h"] = guard(R"cpp(
+    struct mbstate_t {};
+  )cpp");
+  buildAST();
+  EXPECT_THAT(
+      headersFor("mbstate_t"),
+      // Respect the ordering from the stdlib mapping.
+      UnorderedElementsAre(Header("<uchar.h>"), Header("<wchar.h>")));
+}
+
 } // namespace
 } // namespace clang::include_cleaner



More information about the cfe-commits mailing list