[clang] [clang] Clean up enumerators when merging named enums (PR #114240)
Michael Jabbour via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 30 07:36:43 PDT 2024
https://github.com/michael-jabbour-sonarsource created https://github.com/llvm/llvm-project/pull/114240
Clang already has a mechanism to cleanup enumerators for typedefs to anonymous enums. So the following example code used to be handled correctly while merging, and ASTWriter behaves as expected:
```c
typedef enum { Val } AnonEnum;
```
However, the mentioned mechanism didn't handle named enums. This lead to stale declarations in `IdResolver`, causing an assertion violation in ASTWriter `Assertion `DeclIDs.contains(D) && "Declaration not emitted!"' failed.` when a module is being serialized with the following merged enums:
```c
typedef enum Enum1 { Val_A } Enum1;
enum Enum2 { Val_B };
```
The PR applies the same mechanism in the named enums case. Additionally, I dropped the call to `getLexicalDeclContext()->removeDecl` as it was causing a wrong odr-violation diagnostic with anonymous enums.
Might be easier to to review commit by commit. Any feedback is appreciated.
### Context
This fixes frontend crashes that were encountered when certain Objective-C modules are included on Xcode 16. For example, by running `CC=/path/to/clang-19 xcodebuild clean build` on a project that contains the following Objective-C file:
```c
#include <os/atomic.h>
int main() {
return memory_order_relaxed;
}
```
This crashes the parser in release, when ASTReader tried to load the enumerator declaration.
>From cc3cf25da67c9f8b9edabb318c6011cad9bd2f58 Mon Sep 17 00:00:00 2001
From: Michael Jabbour <michael.jabbour at sonarsource.com>
Date: Tue, 29 Oct 2024 11:16:09 +0100
Subject: [PATCH 1/4] [NFC] Factor out RetireNodesFromMergedDecl
---
clang/include/clang/Sema/Sema.h | 6 ++++++
clang/lib/Sema/SemaDecl.cpp | 27 +++++++++++++++------------
2 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 93d98e1cbb9c81..70bbb882eb2b13 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4034,6 +4034,12 @@ class Sema final : public SemaBase {
void MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
LookupResult &OldDecls);
+ /// RetireNodesFromMergedDecl - We have just merged the decl 'New' by making
+ /// another definition visible.
+ /// This method performs any necessary cleanup on the parser state to discard
+ /// child nodes from newly parsed decl we are retiring.
+ void RetireNodesFromMergedDecl(Scope *S, Decl *New);
+
/// MergeFunctionDecl - We just parsed a function 'New' from
/// declarator D which has the same name and scope as a previous
/// declaration 'Old'. Figure out how to resolve this situation,
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index f8e5f3c6d309d6..54c4c5f4e5395e 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -2551,18 +2551,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
// Make the old tag definition visible.
makeMergedDefinitionVisible(Hidden);
- // If this was an unscoped enumeration, yank all of its enumerators
- // out of the scope.
- if (isa<EnumDecl>(NewTag)) {
- Scope *EnumScope = getNonFieldDeclScope(S);
- for (auto *D : NewTag->decls()) {
- auto *ED = cast<EnumConstantDecl>(D);
- assert(EnumScope->isDeclScope(ED));
- EnumScope->RemoveDecl(ED);
- IdResolver.RemoveDecl(ED);
- ED->getLexicalDeclContext()->removeDecl(ED);
- }
- }
+ RetireNodesFromMergedDecl(S, NewTag);
}
}
@@ -2639,6 +2628,20 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
notePreviousDefinition(Old, New->getLocation());
}
+void Sema::RetireNodesFromMergedDecl(Scope *S, Decl *New) {
+ // If this was an unscoped enumeration, yank all of its enumerators
+ // out of the scope.
+ if (auto *ED = dyn_cast<EnumDecl>(New); ED) {
+ Scope *EnumScope = getNonFieldDeclScope(S);
+ for (auto *ECD : ED->enumerators()) {
+ assert(EnumScope->isDeclScope(ECD));
+ EnumScope->RemoveDecl(ECD);
+ IdResolver.RemoveDecl(ECD);
+ ECD->getLexicalDeclContext()->removeDecl(ECD);
+ }
+ }
+}
+
/// DeclhasAttr - returns true if decl Declaration already has the target
/// attribute.
static bool DeclHasAttr(const Decl *D, const Attr *A) {
>From f07904ae5468658a3e7e4a649cd183734479d6cc Mon Sep 17 00:00:00 2001
From: Michael Jabbour <michael.jabbour at sonarsource.com>
Date: Tue, 29 Oct 2024 12:07:29 +0100
Subject: [PATCH 2/4] Drop enumerators when merging named enums
This fixes a crash in ASTWriter when the stale IdResolver entries are
used when writing the identifier table.
assert(DeclIDs.contains(D) && "Declaration not emitted!");
---
clang/include/clang/Sema/Sema.h | 2 +-
clang/lib/Parse/ParseDecl.cpp | 2 +-
clang/lib/Parse/ParseDeclCXX.cpp | 3 +-
clang/lib/Sema/SemaDecl.cpp | 6 ++-
clang/test/Modules/modules-merge-enum.m | 52 +++++++++++++++++++++++++
5 files changed, 60 insertions(+), 5 deletions(-)
create mode 100644 clang/test/Modules/modules-merge-enum.m
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 70bbb882eb2b13..d53bcf63dd1bd9 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -3910,7 +3910,7 @@ class Sema final : public SemaBase {
/// Perform ODR-like check for C/ObjC when merging tag types from modules.
/// Differently from C++, actually parse the body and reject / error out
/// in case of a structural mismatch.
- bool ActOnDuplicateDefinition(Decl *Prev, SkipBodyInfo &SkipBody);
+ bool ActOnDuplicateDefinition(Scope *S, Decl *Prev, SkipBodyInfo &SkipBody);
typedef void *SkippedDefinitionContext;
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 31984453487aef..750301a9155d79 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -5645,7 +5645,7 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS,
Decl *D = SkipBody.CheckSameAsPrevious ? SkipBody.New : TagDecl;
ParseEnumBody(StartLoc, D);
if (SkipBody.CheckSameAsPrevious &&
- !Actions.ActOnDuplicateDefinition(TagDecl, SkipBody)) {
+ !Actions.ActOnDuplicateDefinition(getCurScope(), TagDecl, SkipBody)) {
DS.SetTypeSpecError();
return;
}
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index 60aab1411a96c5..505dff17bd80f2 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -2350,7 +2350,8 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
// Parse the definition body.
ParseStructUnionBody(StartLoc, TagType, cast<RecordDecl>(D));
if (SkipBody.CheckSameAsPrevious &&
- !Actions.ActOnDuplicateDefinition(TagOrTempResult.get(), SkipBody)) {
+ !Actions.ActOnDuplicateDefinition(getCurScope(),
+ TagOrTempResult.get(), SkipBody)) {
DS.SetTypeSpecError();
return;
}
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 54c4c5f4e5395e..efe0c541c38cc3 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -2631,7 +2631,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
void Sema::RetireNodesFromMergedDecl(Scope *S, Decl *New) {
// If this was an unscoped enumeration, yank all of its enumerators
// out of the scope.
- if (auto *ED = dyn_cast<EnumDecl>(New); ED) {
+ if (auto *ED = dyn_cast<EnumDecl>(New); ED && !ED->isScoped()) {
Scope *EnumScope = getNonFieldDeclScope(S);
for (auto *ECD : ED->enumerators()) {
assert(EnumScope->isDeclScope(ECD));
@@ -18062,12 +18062,14 @@ void Sema::ActOnTagStartDefinition(Scope *S, Decl *TagD) {
AddPushedVisibilityAttribute(Tag);
}
-bool Sema::ActOnDuplicateDefinition(Decl *Prev, SkipBodyInfo &SkipBody) {
+bool Sema::ActOnDuplicateDefinition(Scope *S, Decl *Prev,
+ SkipBodyInfo &SkipBody) {
if (!hasStructuralCompatLayout(Prev, SkipBody.New))
return false;
// Make the previous decl visible.
makeMergedDefinitionVisible(SkipBody.Previous);
+ RetireNodesFromMergedDecl(S, SkipBody.New);
return true;
}
diff --git a/clang/test/Modules/modules-merge-enum.m b/clang/test/Modules/modules-merge-enum.m
new file mode 100644
index 00000000000000..240e59144fbca7
--- /dev/null
+++ b/clang/test/Modules/modules-merge-enum.m
@@ -0,0 +1,52 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+// RUN: %clang -fmodules -fmodules-cache-path=%t/modcache -fsyntax-only %t/source.m
+// TODO: Expect some AST shape here.
+
+
+//--- shared.h
+// This header is shared between two modules, but it's not a module itself.
+// The enums defined here are parsed in both modules, and merged while building ModB.
+typedef enum MyEnum1 { MyVal_A } MyEnum1;
+enum MyEnum2 { MyVal_B };
+typedef enum { MyVal_C } MyEnum3;
+
+// In this case, no merging happens on the EnumDecl in Objective-C, and ASTWriter writes both EnumConstantDecls when building ModB.
+enum { MyVal_D };
+
+//--- module.modulemap
+module ModA {
+ module ModAFile1 {
+ header "ModAFile1.h"
+ export *
+ }
+ module ModAFile2 {
+ header "ModAFile2.h"
+ export *
+ }
+}
+// The goal of writing ModB is to test that ASTWriter can handle the merged AST nodes correctly.
+// For example, a stale declaration in IdResolver can cause an assertion failure while writing the identifier table.
+module ModB {
+ header "ModBFile.h"
+ export *
+}
+
+//--- ModAFile1.h
+#include "shared.h"
+
+//--- ModAFile2.h
+// Including this file, triggers loading of the module ModA with nodes coming ModAFile1.h being hidden.
+
+//--- ModBFile.h
+// ModBFile depends on ModAFile2.h only.
+#include "ModAFile2.h"
+// Including shared.h here causes Sema to merge the AST nodes from shared.h with the hidden ones from ModA.
+#include "shared.h"
+
+
+//--- source.m
+#include "ModBFile.h"
+
+int main() { return MyVal_A + MyVal_B + MyVal_C + MyVal_D; }
>From c42c48b587e347357a0fe795295b9bd5290ee930 Mon Sep 17 00:00:00 2001
From: Michael Jabbour <michael.jabbour at sonarsource.com>
Date: Tue, 29 Oct 2024 14:19:27 +0100
Subject: [PATCH 3/4] Remove call to getLexicalDeclContext()->removeDecl
The lexical decl context of the enumerator is always the EnumDecl. The
call used to alter the AST of the enum in ModB causing the following
error when -ast-dump-all is provided.
```
error: 'MyEnum3' has different definitions in different modules; definition in module 'ModA.ModAFile1' first difference is enum with 1 element
20 | typedef enum { MyVal_C } MyEnum3;
| ^~~~~~~~~~~~~~~~
note: but in 'ModB' found enum with 0 elements
20 | typedef enum { MyVal_C } MyEnum3;
```
---
clang/lib/Sema/SemaDecl.cpp | 1 -
clang/test/Modules/modules-merge-enum.m | 2 +-
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index efe0c541c38cc3..a38d2d00a5410d 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -2637,7 +2637,6 @@ void Sema::RetireNodesFromMergedDecl(Scope *S, Decl *New) {
assert(EnumScope->isDeclScope(ECD));
EnumScope->RemoveDecl(ECD);
IdResolver.RemoveDecl(ECD);
- ECD->getLexicalDeclContext()->removeDecl(ECD);
}
}
}
diff --git a/clang/test/Modules/modules-merge-enum.m b/clang/test/Modules/modules-merge-enum.m
index 240e59144fbca7..30dd17e40c54a8 100644
--- a/clang/test/Modules/modules-merge-enum.m
+++ b/clang/test/Modules/modules-merge-enum.m
@@ -1,7 +1,7 @@
// RUN: rm -rf %t
// RUN: split-file %s %t
-// RUN: %clang -fmodules -fmodules-cache-path=%t/modcache -fsyntax-only %t/source.m
+// RUN: %clang -fmodules -fmodules-cache-path=%t/modcache -fsyntax-only %t/source.m -Xclang -ast-dump-all
// TODO: Expect some AST shape here.
>From b29f25ff5a7aaf7e28d03d585df714cf40b767aa Mon Sep 17 00:00:00 2001
From: Michael Jabbour <michael.jabbour at sonarsource.com>
Date: Tue, 29 Oct 2024 16:44:38 +0100
Subject: [PATCH 4/4] Improve test by adding expected AST nodes
---
clang/test/Modules/modules-merge-enum.m | 46 +++++++++++++++++++++++--
1 file changed, 44 insertions(+), 2 deletions(-)
diff --git a/clang/test/Modules/modules-merge-enum.m b/clang/test/Modules/modules-merge-enum.m
index 30dd17e40c54a8..4873c7e6b4708e 100644
--- a/clang/test/Modules/modules-merge-enum.m
+++ b/clang/test/Modules/modules-merge-enum.m
@@ -1,19 +1,61 @@
// RUN: rm -rf %t
// RUN: split-file %s %t
-// RUN: %clang -fmodules -fmodules-cache-path=%t/modcache -fsyntax-only %t/source.m -Xclang -ast-dump-all
-// TODO: Expect some AST shape here.
+// RUN: %clang -fmodules -fmodules-cache-path=%t/modcache -fsyntax-only %t/source.m -Xclang -ast-dump-all 2>&1 | FileCheck %s
//--- shared.h
// This header is shared between two modules, but it's not a module itself.
// The enums defined here are parsed in both modules, and merged while building ModB.
+
typedef enum MyEnum1 { MyVal_A } MyEnum1;
+// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> MyEnum1
+// CHECK-NEXT: | |-also in ModB
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_A 'int'
+// CHECK-NEXT: |-TypedefDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyEnum1 'enum MyEnum1'
+// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum1' sugar imported
+// CHECK-NEXT: | `-EnumType 0x{{.*}} 'enum MyEnum1' imported
+// CHECK-NEXT: | `-Enum 0x{{.*}} 'MyEnum1'
+
+
enum MyEnum2 { MyVal_B };
+// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> MyEnum2
+// CHECK-NEXT: | |-also in ModB
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_B 'int'
+
+
typedef enum { MyVal_C } MyEnum3;
+// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations>
+// CHECK-NEXT: | |-also in ModB
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_C 'int'
+// CHECK-NEXT: |-TypedefDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyEnum3 'enum MyEnum3':'MyEnum3'
+// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum3' sugar imported
+// CHECK-NEXT: | `-EnumType 0x{{.*}} 'MyEnum3' imported
+// CHECK-NEXT: | `-Enum 0x{{.*}} ''
+
// In this case, no merging happens on the EnumDecl in Objective-C, and ASTWriter writes both EnumConstantDecls when building ModB.
enum { MyVal_D };
+// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 hidden <undeserialized declarations>
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyVal_D 'int'
+
+
+// Redeclarations coming from ModB.
+// CHECK: |-TypedefDecl 0x{{.*}} prev 0x{{.*}} imported in ModB MyEnum1 'enum MyEnum1'
+// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum1' sugar imported
+// CHECK-NEXT: | `-EnumType 0x{{.*}} 'enum MyEnum1' imported
+// CHECK-NEXT: | `-Enum 0x{{.*}} 'MyEnum1'
+
+// CHECK: |-EnumDecl 0x{{.*}} prev 0x{{.*}} imported in ModB <undeserialized declarations>
+// CHECK-NEXT: | |-also in ModB
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModB MyVal_C 'int'
+// CHECK-NEXT: |-TypedefDecl 0x{{.*}} prev 0x{{.*}} imported in ModB MyEnum3 'enum MyEnum3':'MyEnum3'
+// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum3' sugar imported
+// CHECK-NEXT: | `-EnumType 0x{{.*}} 'MyEnum3' imported
+// CHECK-NEXT: | `-Enum 0x{{.*}} ''
+
+// CHECK: |-EnumDecl 0x{{.*}} imported in ModB <undeserialized declarations>
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} first 0x{{.*}} imported in ModB referenced MyVal_D 'int'
//--- module.modulemap
module ModA {
More information about the cfe-commits
mailing list