[clang] [clang][modules] Headers meant to be included multiple times can be completely invisible in clang module builds (PR #83660)

Ian Anderson via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 1 22:29:08 PST 2024


https://github.com/ian-twilightcoder created https://github.com/llvm/llvm-project/pull/83660

Once a file has been `#import`'ed, it gets stamped as if it was `#pragma once` and will not be re-entered, even on #include. This means that any errant #import of a file designed to be included multiple times, such as <assert.h>, will incorrectly mark it as include-once and break the multiple include functionality. Normally this isn't a big problem, e.g. <assert.h> can't have its NDEBUG mode changed after the first #import, but it is still mostly functional. However, when clang modules are involved, this can cause the header to be hidden entirely.

Objective-C code most often uses #import for everything, because it's required for most Objective-C headers to prevent double inclusion and redeclaration errors. (It's rare for Objective-C headers to use macro guards or `#pragma once`.) The problem arises when a submodule includes a multiple-include header. The "already included" state is global across all modules (which is necessary so that non-modular headers don't get compiled into multiple translation units and cause redeclaration errors). If another module or the main file #import's the same header, it becomes invisible from then on. If the original submodule is not imported, the include of the header will effectively do nothing and the header will be invisible. The only way to actually get the header's declarations is to somehow figure out which submodule consumed the header, and import that instead. That's basically impossible since it depends on exactly which modules were built in which order.

#import is a poor indicator of whether a header is actually include-once, as the #import is external to the header it applies to, and requires that all inclusions correctly and consistently use #import vs #include. When modules are enabled, consider a header marked `textual` in its module as a stronger indicator of multiple-include than #import's indication of include-once. This will allow headers like <assert.h> to always be included when modules are enabled, even if #import is erroneously used somewhere.

>From 171d0e299dd676ce29583e16fdf8c3e6f3dd7925 Mon Sep 17 00:00:00 2001
From: Ian Anderson <iana at apple.com>
Date: Fri, 1 Mar 2024 22:17:09 -0800
Subject: [PATCH] [clang][modules] Headers meant to be included multiple times
 can be completely invisible in clang module builds

Once a file has been `#import`'ed, it gets stamped as if it was `#pragma once` and will not be re-entered, even on #include. This means that any errant #import of a file designed to be included multiple times, such as <assert.h>, will incorrectly mark it as include-once and break the multiple include functionality. Normally this isn't a big problem, e.g. <assert.h> can't have its NDEBUG mode changed after the first #import, but it is still mostly functional. However, when clang modules are involved, this can cause the header to be hidden entirely.

Objective-C code most often uses #import for everything, because it's required for most Objective-C headers to prevent double inclusion and redeclaration errors. (It's rare for Objective-C headers to use macro guards or `#pragma once`.) The problem arises when a submodule includes a multiple-include header. The "already included" state is global across all modules (which is necessary so that non-modular headers don't get compiled into multiple translation units and cause redeclaration errors). If another module or the main file #import's the same header, it becomes invisible from then on. If the original submodule is not imported, the include of the header will effectively do nothing and the header will be invisible. The only way to actually get the header's declarations is to somehow figure out which submodule consumed the header, and import that instead. That's basically impossible since it depends on exactly which modules were built in which order.

#import is a poor indicator of whether a header is actually include-once, as the #import is external to the header it applies to, and requires that all inclusions correctly and consistently use #import vs #include. When modules are enabled, consider a header marked `textual` in its module as a stronger indicator of multiple-include than #import's indication of include-once. This will allow headers like <assert.h> to always be included when modules are enabled, even if #import is erroneously used somewhere.
---
 clang/include/clang/Lex/HeaderSearch.h       |  24 ++-
 clang/lib/Lex/HeaderSearch.cpp               | 156 ++++++++++++-------
 clang/lib/Serialization/ASTReader.cpp        |   2 +-
 clang/test/Modules/builtin-import.mm         |   2 +
 clang/test/Modules/import-textual-noguard.mm |   6 +-
 clang/test/Modules/import-textual.mm         |   2 +
 clang/test/Modules/multiple-import.m         |  43 +++++
 7 files changed, 172 insertions(+), 63 deletions(-)
 create mode 100644 clang/test/Modules/multiple-import.m

diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h
index 705dcfa8aacc3f..bf8981b94d1b57 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -78,11 +78,17 @@ struct HeaderFileInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned External : 1;
 
-  /// Whether this header is part of a module.
+  /// Whether this header is part of and built with a module.
   LLVM_PREFERRED_TYPE(bool)
   unsigned isModuleHeader : 1;
 
-  /// Whether this header is part of the module that we are building.
+  /// Whether this header is a textual header in a module (isModuleHeader will
+  /// be false)
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned isTextualModuleHeader : 1;
+
+  /// Whether this header is part of and built with the module that we are
+  /// building.
   LLVM_PREFERRED_TYPE(bool)
   unsigned isCompilingModuleHeader : 1;
 
@@ -128,13 +134,20 @@ struct HeaderFileInfo {
 
   HeaderFileInfo()
       : isImport(false), isPragmaOnce(false), DirInfo(SrcMgr::C_User),
-        External(false), isModuleHeader(false), isCompilingModuleHeader(false),
-        Resolved(false), IndexHeaderMapHeader(false), IsValid(false)  {}
+        External(false), isModuleHeader(false), isTextualModuleHeader(false),
+        isCompilingModuleHeader(false), Resolved(false),
+        IndexHeaderMapHeader(false), IsValid(false) {}
 
   /// Retrieve the controlling macro for this header file, if
   /// any.
   const IdentifierInfo *
   getControllingMacro(ExternalPreprocessorSource *External);
+
+  /// Update the module membership bits based on the header role.
+  ///
+  /// isModuleHeader will potentially be set, but not cleared.
+  /// isTextualModuleHeader will be set or cleared based on the role update.
+  void mergeModuleMembership(ModuleMap::ModuleHeaderRole Role);
 };
 
 /// An external source of header file information, which may supply
@@ -522,6 +535,9 @@ class HeaderSearch {
   ///
   /// \return false if \#including the file will have no effect or true
   /// if we should include it.
+  ///
+  /// \param M The module to which `File` belongs (this should usually be the
+  /// SuggestedModule returned by LookupFile/LookupSubframeworkHeader)
   bool ShouldEnterIncludeFile(Preprocessor &PP, FileEntryRef File,
                               bool isImport, bool ModulesEnabled, Module *M,
                               bool &IsFirstIncludeOfFile);
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index fcc2b56df166b8..fb6ec456dd277e 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -1307,6 +1307,23 @@ OptionalFileEntryRef HeaderSearch::LookupSubframeworkHeader(
 // File Info Management.
 //===----------------------------------------------------------------------===//
 
+static void mergeHeaderFileInfoModuleBits(HeaderFileInfo &HFI,
+                                          bool isModuleHeader,
+                                          bool isTextualModuleHeader) {
+  assert((!isModuleHeader || !isTextualModuleHeader) &&
+         "A header can't build with a module and be textual at the same time");
+  HFI.isModuleHeader |= isModuleHeader;
+  if (HFI.isModuleHeader)
+    HFI.isTextualModuleHeader = false;
+  else
+    HFI.isTextualModuleHeader |= isTextualModuleHeader;
+}
+
+void HeaderFileInfo::mergeModuleMembership(ModuleMap::ModuleHeaderRole Role) {
+  mergeHeaderFileInfoModuleBits(*this, ModuleMap::isModular(Role),
+                                (Role & ModuleMap::TextualHeader));
+}
+
 /// Merge the header file info provided by \p OtherHFI into the current
 /// header file info (\p HFI)
 static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
@@ -1315,7 +1332,8 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
 
   HFI.isImport |= OtherHFI.isImport;
   HFI.isPragmaOnce |= OtherHFI.isPragmaOnce;
-  HFI.isModuleHeader |= OtherHFI.isModuleHeader;
+  mergeHeaderFileInfoModuleBits(HFI, OtherHFI.isModuleHeader,
+                                OtherHFI.isTextualModuleHeader);
 
   if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {
     HFI.ControllingMacro = OtherHFI.ControllingMacro;
@@ -1403,11 +1421,9 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const {
 void HeaderSearch::MarkFileModuleHeader(FileEntryRef FE,
                                         ModuleMap::ModuleHeaderRole Role,
                                         bool isCompilingModuleHeader) {
-  bool isModularHeader = ModuleMap::isModular(Role);
-
   // Don't mark the file info as non-external if there's nothing to change.
   if (!isCompilingModuleHeader) {
-    if (!isModularHeader)
+    if ((Role & ModuleMap::ExcludedHeader))
       return;
     auto *HFI = getExistingFileInfo(FE);
     if (HFI && HFI->isModuleHeader)
@@ -1415,82 +1431,109 @@ void HeaderSearch::MarkFileModuleHeader(FileEntryRef FE,
   }
 
   auto &HFI = getFileInfo(FE);
-  HFI.isModuleHeader |= isModularHeader;
-  HFI.isCompilingModuleHeader |= isCompilingModuleHeader;
+  HFI.mergeModuleMembership(Role);
+  HFI.isCompilingModuleHeader |= (HFI.isModuleHeader && isCompilingModuleHeader);
 }
 
 bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
                                           FileEntryRef File, bool isImport,
                                           bool ModulesEnabled, Module *M,
                                           bool &IsFirstIncludeOfFile) {
-  ++NumIncluded; // Count # of attempted #includes.
-
+  // An include file should be entered if either:
+  // 1. This is the first include of the file.
+  // 2. This file can be included multiple times, that is it's not an
+  //    "include-once" file.
+  //
+  // Include-once is controlled by these preprocessor directives.
+  //
+  // #pragma once
+  // This directive is in the include file, and marks it as an include-once
+  // file.
+  //
+  // #import <file>
+  // This directive is in the includer, and indicates that the include file
+  // should only be entered if this is the first include.
+  ++NumIncluded;
   IsFirstIncludeOfFile = false;
-
-  // Get information about this file.
   HeaderFileInfo &FileInfo = getFileInfo(File);
 
-  // FIXME: this is a workaround for the lack of proper modules-aware support
-  // for #import / #pragma once
-  auto TryEnterImported = [&]() -> bool {
+  auto MaybeReenterImportedFile = [&]() -> bool {
     if (!ModulesEnabled)
       return false;
+    // Modules add a wrinkle though: what's included isn't necessarily visible.
+    // Consider this module.
+    // module Example {
+    //   module A { header "a.h" export * }
+    //   module B { header "b.h" export * }
+    // }
+    // b.h includes c.h. The main file includes a.h, which will trigger a module
+    // build of Example, and c.h will be included. However, c.h isn't visible to
+    // the main file. Normally this is fine, the main file can just include c.h
+    // if it needs it. If c.h is in a module, the include will translate into a
+    // module import, this function will be skipped, and everything will work as
+    // expected. However, if c.h is not in a module (or is `textual`), then this
+    // function will run. If c.h is include-once, it will not be entered from
+    // the main file and it will still not be visible.
+
     // Ensure FileInfo bits are up to date.
     ModMap.resolveHeaderDirectives(File);
-    // Modules with builtins are special; multiple modules use builtins as
-    // modular headers, example:
-    //
-    //    module stddef { header "stddef.h" export * }
-    //
-    // After module map parsing, this expands to:
-    //
-    //    module stddef {
-    //      header "/path_to_builtin_dirs/stddef.h"
-    //      textual "stddef.h"
-    //    }
+
+    // This brings up a subtlety of #import - it's not a very good indicator of
+    // include-once. Developers are often unaware of the difference between
+    // #include and #import, and tend to use one or the other indiscrimiately.
+    // In order to support #include on include-once headers that lack macro
+    // guards and `#pragma once` (which is the vast majority of Objective-C
+    // headers), if a file is ever included with #import, it's marked as
+    // isImport in the HeaderFileInfo and treated as include-once. This allows
+    // #include to work in Objective-C.
+    // #include <Foundation/Foundation.h>
+    // #include <Foundation/NSString.h>
+    // Foundation.h has an #import of NSString.h, and so the second #include is
+    // skipped even though NSString.h has no `#pragma once` and no macro guard.
     //
-    // It's common that libc++ and system modules will both define such
-    // submodules. Make sure cached results for a builtin header won't
-    // prevent other builtin modules from potentially entering the builtin
-    // header. Note that builtins are header guarded and the decision to
-    // actually enter them is postponed to the controlling macros logic below.
-    bool TryEnterHdr = false;
-    if (FileInfo.isCompilingModuleHeader && FileInfo.isModuleHeader)
-      TryEnterHdr = ModMap.isBuiltinHeader(File);
-
-    // Textual headers can be #imported from different modules. Since ObjC
-    // headers find in the wild might rely only on #import and do not contain
-    // controlling macros, be conservative and only try to enter textual headers
-    // if such macro is present.
-    if (!FileInfo.isModuleHeader &&
-        FileInfo.getControllingMacro(ExternalLookup))
-      TryEnterHdr = true;
-    return TryEnterHdr;
+    // However, this helpfulness causes problems with modules. If c.h is not an
+    // include-once file, but something included it with #import anyway (as is
+    // typical in Objective-C code), this include will be skipped and c.h will
+    // not be visible. If the file is not `#pragma once`, consider it not
+    // include-once if it is a `textual` header in a module.
+    return !FileInfo.isPragmaOnce && FileInfo.isTextualModuleHeader;
+    // If the include file has a macro guard, then it might still not be
+    // re-entered if the controlling macro is visibly defined. e.g. another
+    // header in the module being built included this file and local submodule
+    // visibility is not enabled.
+
+    // It might be tempting to re-enter the include-once file if it's not
+    // visible in an attempt to make it visible. However this will still cause
+    // redeclaration errors against the known-but-not-visible declarations. The
+    // include file not being visible will most likely cause "undefined x"
+    // errors, but at least there's a slim chance of compilation succeeding.
   };
 
-  // If this is a #import directive, check that we have not already imported
-  // this header.
   if (isImport) {
-    // If this has already been imported, don't import it again.
+    // As discussed above, record that this file was ever `#import`ed, and treat
+    // it as an include-once file from here out.
     FileInfo.isImport = true;
-
-    // Has this already been #import'ed or #include'd?
-    if (PP.alreadyIncluded(File) && !TryEnterImported())
+    if (PP.alreadyIncluded(File) && !MaybeReenterImportedFile())
       return false;
   } else {
-    // Otherwise, if this is a #include of a file that was previously #import'd
-    // or if this is the second #include of a #pragma once file, ignore it.
-    if ((FileInfo.isPragmaOnce || FileInfo.isImport) && !TryEnterImported())
+    // isPragmaOnce and isImport are only set after the file has been included
+    // at least once. If either are set then this is a repeat #include of an
+    // include-once file.
+    if (FileInfo.isPragmaOnce ||
+        (FileInfo.isImport && !MaybeReenterImportedFile()))
       return false;
   }
 
-  // Next, check to see if the file is wrapped with #ifndef guards.  If so, and
-  // if the macro that guards it is defined, we know the #include has no effect.
-  if (const IdentifierInfo *ControllingMacro
-      = FileInfo.getControllingMacro(ExternalLookup)) {
+  // As a final optimization, check for a macro guard and skip entering the file
+  // if the controlling macro is defined. The macro guard will effectively erase
+  // the file's contents, and the include would have no effect other than to
+  // waste time opening and reading a file.
+  if (const IdentifierInfo *ControllingMacro =
+          FileInfo.getControllingMacro(ExternalLookup)) {
     // If the header corresponds to a module, check whether the macro is already
-    // defined in that module rather than checking in the current set of visible
-    // modules.
+    // defined in that module rather than checking all visible modules. This is
+    // mainly to cover corner cases where the same controlling macro is used in
+    // different files in multiple modules.
     if (M ? PP.isMacroDefinedInLocalModule(ControllingMacro, M)
           : PP.isMacroDefined(ControllingMacro)) {
       ++NumMultiIncludeFileOptzn;
@@ -1499,7 +1542,6 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
   }
 
   IsFirstIncludeOfFile = PP.markIncluded(File);
-
   return true;
 }
 
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 683a076e6bc399..862618eb9ed7f3 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2094,7 +2094,7 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
       Module::Header H = {std::string(key.Filename), "", *FE};
       ModMap.addHeader(Mod, H, HeaderRole, /*Imported=*/true);
     }
-    HFI.isModuleHeader |= ModuleMap::isModular(HeaderRole);
+    HFI.mergeModuleMembership(HeaderRole);
   }
 
   // This HeaderFileInfo was externally loaded.
diff --git a/clang/test/Modules/builtin-import.mm b/clang/test/Modules/builtin-import.mm
index 8a27cb358484c9..52db9c15803ce5 100644
--- a/clang/test/Modules/builtin-import.mm
+++ b/clang/test/Modules/builtin-import.mm
@@ -1,4 +1,6 @@
 // RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -nobuiltininc -nostdinc++ -isysroot %S/Inputs/libc-libcxx/sysroot -isystem %S/Inputs/libc-libcxx/sysroot/usr/include/c++/v1 -isystem %S/Inputs/libc-libcxx/sysroot/usr/include -std=c++11 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x objective-c++ %s
+// RUN: rm -rf %t
 // RUN: %clang_cc1 -fsyntax-only -nobuiltininc -nostdinc++ -isysroot %S/Inputs/libc-libcxx/sysroot -isystem %S/Inputs/libc-libcxx/sysroot/usr/include/c++/v1 -isystem %S/Inputs/libc-libcxx/sysroot/usr/include -std=c++11 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x objective-c++ -fmodules-local-submodule-visibility %s
 
 #include <stdio.h>
diff --git a/clang/test/Modules/import-textual-noguard.mm b/clang/test/Modules/import-textual-noguard.mm
index dd124b6609d000..ffc506117f519d 100644
--- a/clang/test/Modules/import-textual-noguard.mm
+++ b/clang/test/Modules/import-textual-noguard.mm
@@ -1,7 +1,11 @@
 // RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps -I%S/Inputs/import-textual/M2 -fmodules-cache-path=%t -x objective-c++ %s -verify
+// RUN: rm -rf %t
 // RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps -I%S/Inputs/import-textual/M2 -fmodules-cache-path=%t -x objective-c++ -fmodules-local-submodule-visibility %s -verify
 
-#include "A/A.h" // expected-error {{could not build module 'M'}}
+// expected-no-diagnostics
+
+#include "A/A.h"
 #include "B/B.h"
 
 typedef aint xxx;
diff --git a/clang/test/Modules/import-textual.mm b/clang/test/Modules/import-textual.mm
index 6593239d7fd709..94a6aa448cdc2a 100644
--- a/clang/test/Modules/import-textual.mm
+++ b/clang/test/Modules/import-textual.mm
@@ -1,4 +1,6 @@
 // RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps -I%S/Inputs/import-textual/M -fmodules-cache-path=%t -x objective-c++ %s -verify
+// RUN: rm -rf %t
 // RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps -I%S/Inputs/import-textual/M -fmodules-cache-path=%t -x objective-c++ -fmodules-local-submodule-visibility %s -verify
 
 // expected-no-diagnostics
diff --git a/clang/test/Modules/multiple-import.m b/clang/test/Modules/multiple-import.m
new file mode 100644
index 00000000000000..24b0f50cd3433c
--- /dev/null
+++ b/clang/test/Modules/multiple-import.m
@@ -0,0 +1,43 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -fmodules-cache-path=%t/no-lsv -fmodules -fimplicit-module-maps -I%t %t/multiple-imports.m -verify
+// RUN: %clang_cc1 -fmodules-cache-path=%t/lsv -fmodules -fimplicit-module-maps -fmodules-local-submodule-visibility -I%t %t/multiple-imports.m -verify
+
+//--- multiple-imports.m
+// expected-no-diagnostics
+#import <one.h>
+#import <assert.h>
+void test(void) {
+  assert(0);
+}
+
+//--- module.modulemap
+module Submodules [system] {
+  module one {
+    header "one.h"
+    export *
+  }
+  module two {
+    header "two.h"
+    export *
+  }
+}
+
+module libc [system] {
+  textual header "assert.h"
+}
+
+//--- one.h
+#ifndef one_h
+#define one_h
+#endif
+
+//--- two.h
+#ifndef two_h
+#define two_h
+#include <assert.h>
+#endif
+
+//--- assert.h
+#undef assert
+#define assert(expression) ((void)0)



More information about the cfe-commits mailing list