[clang] [Modules] Fix the inconsistency of which `Decl` should be serialized for an identifier. (PR #135887)
Volodymyr Sapsai via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 16 14:34:02 PDT 2025
https://github.com/vsapsai updated https://github.com/llvm/llvm-project/pull/135887
>From d944252ce2f08d1522f7297e4d7f9a7b730db180 Mon Sep 17 00:00:00 2001
From: Volodymyr Sapsai <vsapsai at apple.com>
Date: Tue, 15 Apr 2025 16:34:47 -0700
Subject: [PATCH 1/2] [Modules] Fix the inconsistency of which `Decl` should be
serialized for an identifier.
Fixes the assertion failure
> Assertion failed: (DeclIDs.contains(D) && "Declaration not emitted!"), function getDeclID, file ASTWriter.cpp, line 6873.
We prepare to serialize a `Decl` by adding it to `DeclIDs` in
`ASTWriter::GetDeclRef`. But the checks before this call aren't the same as
when we are actually serializing a `Decl` in `ASTIdentifierTableTrait::EmitData`
and `ASTWriter::WriteIdentifierTable`. That's how we can end up serializing
a `Decl` not present in `DeclIDs` and hitting the assertion. With the assertions
disabled clang crashes when trying to use a deserialized null `Decl`.
Fix by making the code checks before `ASTWriter::GetDeclRef` call similar
to those we have before the serialization.
rdar://139319683
---
clang/include/clang/Serialization/ASTWriter.h | 1 +
clang/lib/Serialization/ASTWriter.cpp | 11 +-
clang/test/Modules/non-modular-decl-use.c | 100 ++++++++++++++++++
3 files changed, 109 insertions(+), 3 deletions(-)
create mode 100644 clang/test/Modules/non-modular-decl-use.c
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index bdf3aca0637c8..5075232596cea 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -224,6 +224,7 @@ class ASTWriter : public ASTDeserializationListener,
/// discovery) and start at 2. 1 is reserved for the translation
/// unit, while 0 is reserved for NULL.
llvm::DenseMap<const Decl *, LocalDeclID> DeclIDs;
+ // TMP: ^ DeclIDs type for reference
/// Set of predefined decls. This is a helper data to determine if a decl
/// is predefined. It should be more clear and safer to query the set
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 95b5718f1d140..c9db69b3b784b 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3925,6 +3925,7 @@ class ASTIdentifierTableTrait {
// Only emit declarations that aren't from a chained PCH, though.
SmallVector<NamedDecl *, 16> Decls(IdResolver->decls(II));
for (NamedDecl *D : llvm::reverse(Decls))
+ // TMP: corresponding `getDeclForLocalLookup` call
LE.write<DeclID>((DeclID)Writer.getDeclID(
getDeclForLocalLookup(PP.getLangOpts(), D)));
}
@@ -3969,6 +3970,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
if (isLocalIdentifierID(ID) || II->hasChangedSinceDeserialization() ||
(Trait.needDecls() &&
II->hasFETokenInfoChangedSinceDeserialization()))
+ // TMP: ^ corresponding `hasFETokenInfoChangedSinceDeserialization` call
Generator.insert(II, ID, Trait);
}
@@ -5664,14 +5666,16 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema &SemaRef) {
llvm::SmallVector<const IdentifierInfo*, 256> IIs;
for (const auto &ID : SemaRef.PP.getIdentifierTable()) {
const IdentifierInfo *II = ID.second;
- if (!Chain || !II->isFromAST() || II->hasChangedSinceDeserialization())
+ if (!Chain || !II->isFromAST() || II->hasChangedSinceDeserialization() ||
+ II->hasFETokenInfoChangedSinceDeserialization())
IIs.push_back(II);
}
// Sort the identifiers to visit based on their name.
llvm::sort(IIs, llvm::deref<std::less<>>());
+ const LangOptions &LangOpts = getLangOpts();
for (const IdentifierInfo *II : IIs)
- for (const Decl *D : SemaRef.IdResolver.decls(II))
- GetDeclRef(D);
+ for (NamedDecl *D : SemaRef.IdResolver.decls(II))
+ GetDeclRef(getDeclForLocalLookup(LangOpts, D));
}
// Write all of the DeclsToCheckForDeferredDiags.
@@ -6845,6 +6849,7 @@ LocalDeclID ASTWriter::GetDeclRef(const Decl *D) {
}
assert(!(reinterpret_cast<uintptr_t>(D) & 0x01) && "Invalid decl pointer");
+ // TMP: adding D to DeclIDs
LocalDeclID &ID = DeclIDs[D];
if (ID.isInvalid()) {
if (DoneWritingDeclsAndTypes) {
diff --git a/clang/test/Modules/non-modular-decl-use.c b/clang/test/Modules/non-modular-decl-use.c
new file mode 100644
index 0000000000000..1bd87bf284620
--- /dev/null
+++ b/clang/test/Modules/non-modular-decl-use.c
@@ -0,0 +1,100 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -fsyntax-only -I %t/include %t/test.c \
+// RUN: -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/modules.cache
+
+// Test when a decl is present in multiple modules through an inclusion of
+// a non-modular header. Make sure such decl is serialized correctly and can be
+// used after deserialization.
+
+//--- include/non-modular.h
+#ifndef NON_MODULAR_H
+#define NON_MODULAR_H
+
+union TestUnion {
+ int x;
+ float y;
+};
+
+struct ReferenceUnion1 {
+ union TestUnion name;
+ unsigned versionMajor;
+};
+struct ReferenceUnion2 {
+ union TestUnion name;
+ unsigned versionMinor;
+};
+
+// Test another kind of RecordDecl.
+struct TestStruct {
+ int p;
+ float q;
+};
+
+struct ReferenceStruct1 {
+ unsigned fieldA;
+ struct TestStruct fieldB;
+};
+
+struct ReferenceStruct2 {
+ unsigned field1;
+ struct TestStruct field2;
+};
+
+#endif
+
+//--- include/piecewise1-empty.h
+//--- include/piecewise1-initially-hidden.h
+#include <non-modular.h>
+
+//--- include/piecewise2-empty.h
+//--- include/piecewise2-initially-hidden.h
+#include <non-modular.h>
+
+//--- include/with-multiple-decls.h
+#include <piecewise1-empty.h>
+// Include the non-modular header and resolve a name duplication between decl
+// in non-modular.h and in piecewise1-initially-hidden.h, create a
+// redeclaration chain.
+#include <non-modular.h>
+// Include a decl with a duplicate name again to add more to IdentifierResolver.
+#include <piecewise2-empty.h>
+
+//--- include/module.modulemap
+module Piecewise1 {
+ module Empty {
+ header "piecewise1-empty.h"
+ }
+ module InitiallyHidden {
+ header "piecewise1-initially-hidden.h"
+ export *
+ }
+}
+
+module Piecewise2 {
+ module Empty {
+ header "piecewise2-empty.h"
+ }
+ module InitiallyHidden {
+ header "piecewise2-initially-hidden.h"
+ export *
+ }
+}
+
+module WithMultipleDecls {
+ header "with-multiple-decls.h"
+ export *
+}
+
+//--- test.c
+#include <with-multiple-decls.h>
+
+struct Test {
+ int x;
+ union TestUnion name;
+};
+
+struct Test2 {
+ struct TestStruct name;
+ float y;
+};
>From f3047852c0818c0afa867d5ccd5413dc8ef2d309 Mon Sep 17 00:00:00 2001
From: Volodymyr Sapsai <vsapsai at apple.com>
Date: Wed, 16 Apr 2025 14:33:37 -0700
Subject: [PATCH 2/2] Remove TMP comments.
---
clang/include/clang/Serialization/ASTWriter.h | 1 -
clang/lib/Serialization/ASTWriter.cpp | 3 ---
2 files changed, 4 deletions(-)
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 5075232596cea..bdf3aca0637c8 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -224,7 +224,6 @@ class ASTWriter : public ASTDeserializationListener,
/// discovery) and start at 2. 1 is reserved for the translation
/// unit, while 0 is reserved for NULL.
llvm::DenseMap<const Decl *, LocalDeclID> DeclIDs;
- // TMP: ^ DeclIDs type for reference
/// Set of predefined decls. This is a helper data to determine if a decl
/// is predefined. It should be more clear and safer to query the set
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index c9db69b3b784b..da9a00f16b624 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3925,7 +3925,6 @@ class ASTIdentifierTableTrait {
// Only emit declarations that aren't from a chained PCH, though.
SmallVector<NamedDecl *, 16> Decls(IdResolver->decls(II));
for (NamedDecl *D : llvm::reverse(Decls))
- // TMP: corresponding `getDeclForLocalLookup` call
LE.write<DeclID>((DeclID)Writer.getDeclID(
getDeclForLocalLookup(PP.getLangOpts(), D)));
}
@@ -3970,7 +3969,6 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
if (isLocalIdentifierID(ID) || II->hasChangedSinceDeserialization() ||
(Trait.needDecls() &&
II->hasFETokenInfoChangedSinceDeserialization()))
- // TMP: ^ corresponding `hasFETokenInfoChangedSinceDeserialization` call
Generator.insert(II, ID, Trait);
}
@@ -6849,7 +6847,6 @@ LocalDeclID ASTWriter::GetDeclRef(const Decl *D) {
}
assert(!(reinterpret_cast<uintptr_t>(D) & 0x01) && "Invalid decl pointer");
- // TMP: adding D to DeclIDs
LocalDeclID &ID = DeclIDs[D];
if (ID.isInvalid()) {
if (DoneWritingDeclsAndTypes) {
More information about the cfe-commits
mailing list