[clang-tools-extra] [include-cleaner] don't consider the associated header unused (PR #67228)

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Sat Sep 23 02:15:54 PDT 2023


https://github.com/sam-mccall created https://github.com/llvm/llvm-project/pull/67228

Loosely, the "associated header" of `foo.cpp` is `foo.h`.
It should be included, many styles include it first.

So far we haven't special cased it in any way, and require this include
to be justified. e.g. if foo.cpp defines a function declared in foo.h,
then the #include is allowed to check these declarations match.

However this doesn't really align with what users want:
- people reasonably want to include the associated header for the
  side-effect of validating that it compiles.
  In the degenerate case, `lib.cpp`is just `#include "lib.h"` (see bug)
- That `void foo(){}` IWYU-uses `void foo();` is a bit artificial, and
  most users won't internalize this. Instead they'll stick with the
  simpler model "include the header that defines your API".
  In the rare cases where these give different answers[1], our current
  behavior is a puzzling special case from the user POV.
  It is more user-friendly to accept both models.
- even where this diagnostic is a true positive (defs don't match header
  decls) the diagnostic does not communicate this usefully.

Fixes https://github.com/llvm/llvm-project/issues/67140

[1] Example of an associated header that's not IWYU-used:
```
// http.h
inline URL buildHttpURL(string host, int port, string path) {
  return "http://" + host + ":" + port + "/" + path;
}
// http.cpp
class HTTPURLHandler : URLHandler { ... };
REGISTER_URL_HANDLER("http", HTTPURLHandler);
```


>From d638a561fcc5af9723079d3672010c00aa196fb5 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall at gmail.com>
Date: Sat, 23 Sep 2023 10:44:30 +0200
Subject: [PATCH] [include-cleaner] don't consider the associated header unused

Loosely, the "associated header" of `foo.cpp` is `foo.h`.
It should be included, many styles include it first.

So far we haven't special cased it in any way, and require this include
to be justified. e.g. if foo.cpp defines a function declared in foo.h,
then the #include is allowed to check these declarations match.

However this doesn't really align with what users want:
- people reasonably want to include the associated header for the
  side-effect of validating that it compiles.
  In the degenerate case, `lib.cpp`is just `#include "lib.h"` (see bug)
- That `void foo(){}` IWYU-uses `void foo();` is a bit artificial, and
  most users won't internalize this. Instead they'll stick with the
  simpler model "include the header that defines your API".
  In the rare cases where these give different answers[1], our current
  behavior is a puzzling special case from the user POV.
  It is more user-friendly to accept both models.
- even where this diagnostic is a true positive (defs don't match header
  decls) the diagnostic does not communicate this usefully.

Fixes https://github.com/llvm/llvm-project/issues/67140

[1] Example of an associated header that's not IWYU-used:
```
// http.h
inline URL buildHttpURL(string host, int port, string path) {
  return "http://" + host + ":" + port + "/" + path;
}
// http.cpp
class HTTPURLHandler : URLHandler { ... };
REGISTER_URL_HANDLER("http", HTTPURLHandler);
```
---
 .../include-cleaner/lib/Record.cpp            | 34 ++++++++++++-
 .../include-cleaner/unittests/RecordTest.cpp  | 48 +++++++++++++++++++
 2 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/include-cleaner/lib/Record.cpp b/clang-tools-extra/include-cleaner/lib/Record.cpp
index 4e96e8eb208b7da..3e26978191a67ff 100644
--- a/clang-tools-extra/include-cleaner/lib/Record.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Record.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem/UniqueID.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/StringSaver.h"
 #include <algorithm>
 #include <assert.h>
@@ -178,7 +179,9 @@ class PragmaIncludes::RecordPragma : public PPCallbacks, public CommentHandler {
       : RecordPragma(CI.getPreprocessor(), Out) {}
   RecordPragma(const Preprocessor &P, PragmaIncludes *Out)
       : SM(P.getSourceManager()), HeaderInfo(P.getHeaderSearchInfo()), Out(Out),
-        UniqueStrings(Arena) {}
+        UniqueStrings(Arena),
+        MainFileStem(llvm::sys::path::stem(
+            SM.getNonBuiltinFilenameForID(SM.getMainFileID()).value_or(""))) {}
 
   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                    SrcMgr::CharacteristicKind FileType,
@@ -227,6 +230,7 @@ class PragmaIncludes::RecordPragma : public PPCallbacks, public CommentHandler {
       IncludedHeader = *File;
     checkForExport(HashFID, HashLine, std::move(IncludedHeader), File);
     checkForKeep(HashLine, File);
+    checkForDeducedAssociated(IncludedHeader);
   }
 
   void checkForExport(FileID IncludingFile, int HashLine,
@@ -276,6 +280,27 @@ class PragmaIncludes::RecordPragma : public PPCallbacks, public CommentHandler {
       KeepStack.pop_back(); // Pop immediately for single-line keep pragma.
   }
 
+  // Consider marking H as the "associated header" of the main file.
+  //
+  // Our heuristic:
+  // - it must be the first #include in the main file
+  // - it must have the same name stem as the main file (foo.h and foo.cpp)
+  // (IWYU pragma: associated is also supported, just not by this function).
+  //
+  // We consider the associated header as if it had a keep pragma.
+  // (Unlike IWYU, we don't treat #includes inside the associated header as if
+  // they were written in the main file.)
+  void checkForDeducedAssociated(std::optional<Header> H) {
+    namespace path = llvm::sys::path;
+    if (!InMainFile || SeenAssociatedCandidate)
+      return;
+    SeenAssociatedCandidate = true;
+    if (!H || H->kind() != Header::Physical)
+      return;
+    if (path::stem(H->physical().getName(), path::Style::posix) == MainFileStem)
+      Out->ShouldKeep.insert(H->physical().getUniqueID());
+  }
+
   bool HandleComment(Preprocessor &PP, SourceRange Range) override {
     auto &SM = PP.getSourceManager();
     auto Pragma =
@@ -287,7 +312,9 @@ class PragmaIncludes::RecordPragma : public PPCallbacks, public CommentHandler {
     int CommentLine = SM.getLineNumber(CommentFID, CommentOffset);
 
     if (InMainFile) {
-      if (Pragma->startswith("keep")) {
+      if (Pragma->startswith("keep") ||
+          // Limited support for associated headers: never consider unused.
+          Pragma->startswith("associated")) {
         KeepStack.push_back({CommentLine, false});
       } else if (Pragma->starts_with("begin_keep")) {
         KeepStack.push_back({CommentLine, true});
@@ -348,6 +375,9 @@ class PragmaIncludes::RecordPragma : public PPCallbacks, public CommentHandler {
   llvm::BumpPtrAllocator Arena;
   /// Intern table for strings. Contents are on the arena.
   llvm::StringSaver UniqueStrings;
+  // Used when deducing associated header.
+  llvm::StringRef MainFileStem;
+  bool SeenAssociatedCandidate = false;
 
   struct ExportPragma {
     // The line number where we saw the begin_exports or export pragma.
diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
index 36850731d514539..df3105ebd7f34bb 100644
--- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
@@ -379,6 +379,54 @@ TEST_F(PragmaIncludeTest, IWYUKeep) {
   EXPECT_TRUE(PI.shouldKeep(FM.getFile("std/set").get()));
 }
 
+TEST_F(PragmaIncludeTest, AssociatedHeader) {
+  createEmptyFiles({"foo/main.h", "bar/main.h", "bar/other.h", "std/vector"});
+  auto IsKeep = [&](llvm::StringRef Name, TestAST &AST) {
+    return PI.shouldKeep(AST.fileManager().getFile(Name).get());
+  };
+
+  Inputs.FileName = "main.cc";
+  Inputs.ExtraArgs.push_back("-isystemstd");
+  {
+    Inputs.Code = R"cpp(
+      #include "foo/main.h"
+      #include "bar/main.h"
+    )cpp";
+    auto AST = build();
+    EXPECT_TRUE(IsKeep("foo/main.h", AST));
+    EXPECT_FALSE(IsKeep("bar/main.h", AST)) << "not first include";
+  }
+
+  {
+    Inputs.Code = R"cpp(
+      #include "bar/other.h"
+      #include "bar/main.h"
+    )cpp";
+    auto AST = build();
+    EXPECT_FALSE(IsKeep("bar/other.h", AST));
+    EXPECT_FALSE(IsKeep("bar/main.h", AST)) << "not first include";
+  }
+
+  {
+    Inputs.Code = R"cpp(
+      #include "foo/main.h"
+      #include "bar/other.h" // IWYU pragma: associated
+    )cpp";
+    auto AST = build();
+    EXPECT_TRUE(IsKeep("foo/main.h", AST));
+    EXPECT_TRUE(IsKeep("bar/other.h", AST));
+  }
+
+  Inputs.FileName = "vector.cc";
+  {
+    Inputs.Code = R"cpp(
+      #include <vector>
+    )cpp";
+    auto AST = build();
+    EXPECT_FALSE(IsKeep("std/vector", AST)) << "stdlib is never associated";
+  }
+}
+
 TEST_F(PragmaIncludeTest, IWYUPrivate) {
   Inputs.Code = R"cpp(
     #include "public.h"



More information about the cfe-commits mailing list