[clang] [clang] Fix ASTWriter crash after merging named enums (PR #114240)

Michael Jabbour via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 7 00:36:07 PST 2025


https://github.com/michael-jabbour-sonarsource updated https://github.com/llvm/llvm-project/pull/114240

>From 9eede4531ad27ce1d77df492076d5fbcd237fd3f Mon Sep 17 00:00:00 2001
From: Michael Jabbour <michael.jabbour at sonarsource.com>
Date: Fri, 7 Mar 2025 09:32:54 +0100
Subject: [PATCH] [clang] Fix ASTWriter crash after merging named enums

---
 clang/docs/ReleaseNotes.rst             |   1 +
 clang/include/clang/Sema/Sema.h         |   8 +-
 clang/lib/Parse/ParseDecl.cpp           |   2 +-
 clang/lib/Parse/ParseDeclCXX.cpp        |   3 +-
 clang/lib/Sema/SemaDecl.cpp             |  30 ++++---
 clang/test/Modules/modules-merge-enum.m | 111 ++++++++++++++++++++++++
 6 files changed, 139 insertions(+), 16 deletions(-)
 create mode 100644 clang/test/Modules/modules-merge-enum.m

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 28856c27317f3..a25808e36bd51 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -248,6 +248,7 @@ Bug Fixes in This Version
 
 - Clang now outputs correct values when #embed data contains bytes with negative
   signed char values (#GH102798).
+- Fixed a crash when merging named enumerations in modules (#GH114240).
 - Fixed rejects-valid problem when #embed appears in std::initializer_list or
   when it can affect template argument deduction (#GH122306).
 - Fix crash on code completion of function calls involving partial order of function templates
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 08621e633b55c..fdef57e84ee3d 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4008,7 +4008,7 @@ class Sema final : public SemaBase {
   /// Perform ODR-like check for C/ObjC when merging tag types from modules.
   /// Differently from C++, actually parse the body and reject / error out
   /// in case of a structural mismatch.
-  bool ActOnDuplicateDefinition(Decl *Prev, SkipBodyInfo &SkipBody);
+  bool ActOnDuplicateDefinition(Scope *S, Decl *Prev, SkipBodyInfo &SkipBody);
 
   typedef void *SkippedDefinitionContext;
 
@@ -4132,6 +4132,12 @@ class Sema final : public SemaBase {
   void MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
                             LookupResult &OldDecls);
 
+  /// CleanupMergedEnum - We have just merged the decl 'New' by making another
+  /// definition visible.
+  /// This method performs any necessary cleanup on the parser state to discard
+  /// child nodes from newly parsed decl we are retiring.
+  void CleanupMergedEnum(Scope *S, Decl *New);
+
   /// MergeFunctionDecl - We just parsed a function 'New' from
   /// declarator D which has the same name and scope as a previous
   /// declaration 'Old'.  Figure out how to resolve this situation,
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 7ae136af47391..31cb4a24ad97e 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -5652,7 +5652,7 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS,
     Decl *D = SkipBody.CheckSameAsPrevious ? SkipBody.New : TagDecl;
     ParseEnumBody(StartLoc, D);
     if (SkipBody.CheckSameAsPrevious &&
-        !Actions.ActOnDuplicateDefinition(TagDecl, SkipBody)) {
+        !Actions.ActOnDuplicateDefinition(getCurScope(), TagDecl, SkipBody)) {
       DS.SetTypeSpecError();
       return;
     }
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index 43db715ac6d70..e48a35078892d 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -2350,7 +2350,8 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
       // Parse the definition body.
       ParseStructUnionBody(StartLoc, TagType, cast<RecordDecl>(D));
       if (SkipBody.CheckSameAsPrevious &&
-          !Actions.ActOnDuplicateDefinition(TagOrTempResult.get(), SkipBody)) {
+          !Actions.ActOnDuplicateDefinition(getCurScope(),
+                                            TagOrTempResult.get(), SkipBody)) {
         DS.SetTypeSpecError();
         return;
       }
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 714210c3856d7..5716eb61d4ae8 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -2564,18 +2564,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
       // Make the old tag definition visible.
       makeMergedDefinitionVisible(Hidden);
 
-      // If this was an unscoped enumeration, yank all of its enumerators
-      // out of the scope.
-      if (isa<EnumDecl>(NewTag)) {
-        Scope *EnumScope = getNonFieldDeclScope(S);
-        for (auto *D : NewTag->decls()) {
-          auto *ED = cast<EnumConstantDecl>(D);
-          assert(EnumScope->isDeclScope(ED));
-          EnumScope->RemoveDecl(ED);
-          IdResolver.RemoveDecl(ED);
-          ED->getLexicalDeclContext()->removeDecl(ED);
-        }
-      }
+      CleanupMergedEnum(S, NewTag);
     }
   }
 
@@ -2652,6 +2641,19 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
   notePreviousDefinition(Old, New->getLocation());
 }
 
+void Sema::CleanupMergedEnum(Scope *S, Decl *New) {
+  // If this was an unscoped enumeration, yank all of its enumerators
+  // out of the scope.
+  if (auto *ED = dyn_cast<EnumDecl>(New); ED && !ED->isScoped()) {
+    Scope *EnumScope = getNonFieldDeclScope(S);
+    for (auto *ECD : ED->enumerators()) {
+      assert(EnumScope->isDeclScope(ECD));
+      EnumScope->RemoveDecl(ECD);
+      IdResolver.RemoveDecl(ECD);
+    }
+  }
+}
+
 /// DeclhasAttr - returns true if decl Declaration already has the target
 /// attribute.
 static bool DeclHasAttr(const Decl *D, const Attr *A) {
@@ -18347,12 +18349,14 @@ void Sema::ActOnTagStartDefinition(Scope *S, Decl *TagD) {
   AddPushedVisibilityAttribute(Tag);
 }
 
-bool Sema::ActOnDuplicateDefinition(Decl *Prev, SkipBodyInfo &SkipBody) {
+bool Sema::ActOnDuplicateDefinition(Scope *S, Decl *Prev,
+                                    SkipBodyInfo &SkipBody) {
   if (!hasStructuralCompatLayout(Prev, SkipBody.New))
     return false;
 
   // Make the previous decl visible.
   makeMergedDefinitionVisible(SkipBody.Previous);
+  CleanupMergedEnum(S, SkipBody.New);
   return true;
 }
 
diff --git a/clang/test/Modules/modules-merge-enum.m b/clang/test/Modules/modules-merge-enum.m
new file mode 100644
index 0000000000000..f1010c1decc24
--- /dev/null
+++ b/clang/test/Modules/modules-merge-enum.m
@@ -0,0 +1,111 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+
+// Expect no crash
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/modcache -fmodule-map-file=%t/module.modulemap %t/source.m
+
+// Add -ast-dump-all to check that the AST nodes are merged correctly.
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/modcache -fmodule-map-file=%t/module.modulemap %t/source.m -ast-dump-all 2>&1 | FileCheck %s
+
+
+//--- shared.h
+// This header is shared between two modules, but it's not a module itself.
+// The enums defined here are parsed in both modules, and merged while building ModB.
+
+typedef enum MyEnum1 { MyVal_A } MyEnum1;
+// CHECK:      |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> MyEnum1
+// CHECK-NEXT: | |-also in ModB
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_A 'int'
+// CHECK-NEXT: |-TypedefDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyEnum1 'enum MyEnum1'
+// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum1' sugar imported
+// CHECK-NEXT: |   `-EnumType 0x{{.*}} 'enum MyEnum1' imported
+// CHECK-NEXT: |     `-Enum 0x{{.*}} 'MyEnum1'
+
+
+enum MyEnum2 { MyVal_B };
+// CHECK:      |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> MyEnum2
+// CHECK-NEXT: | |-also in ModB
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_B 'int'
+
+
+typedef enum { MyVal_C } MyEnum3;
+// CHECK:      |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations>
+// CHECK-NEXT: | |-also in ModB
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_C 'int'
+// CHECK-NEXT: |-TypedefDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyEnum3 'enum MyEnum3':'MyEnum3'
+// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum3' sugar imported
+// CHECK-NEXT: |   `-EnumType 0x{{.*}} 'MyEnum3' imported
+// CHECK-NEXT: |     `-Enum 0x{{.*}}
+
+struct MyStruct {
+  enum MyEnum5 { MyVal_D } Field;
+};
+
+// CHECK:      |-RecordDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> struct MyStruct definition
+// CHECK-NEXT: | |-also in ModB
+// CHECK-NEXT: | |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> MyEnum5
+// CHECK-NEXT: | | |-also in ModB
+// CHECK-NEXT: | | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_D 'int'
+// CHECK-NEXT: | `-FieldDecl 0x{{.*}} imported in ModA.ModAFile1 hidden Field 'enum MyEnum5'
+
+// In this case, no merging happens on the EnumDecl in Objective-C, and ASTWriter writes both EnumConstantDecls when building ModB.
+enum { MyVal_E };
+// CHECK:      |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 hidden <undeserialized declarations>
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyVal_E 'int'
+
+
+// Redeclarations coming from ModB.
+// CHECK:      |-TypedefDecl 0x{{.*}} prev 0x{{.*}} imported in ModB MyEnum1 'enum MyEnum1'
+// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum1' sugar imported
+// CHECK-NEXT: |   `-EnumType 0x{{.*}} 'enum MyEnum1' imported
+// CHECK-NEXT: |     `-Enum 0x{{.*}} 'MyEnum1'
+
+// CHECK:      |-EnumDecl 0x{{.*}} prev 0x{{.*}} imported in ModB <undeserialized declarations>
+// CHECK-NEXT: | |-also in ModB
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModB MyVal_C 'int'
+// CHECK-NEXT: |-TypedefDecl 0x{{.*}} prev 0x{{.*}} imported in ModB MyEnum3 'enum MyEnum3':'MyEnum3'
+// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum3' sugar imported
+// CHECK-NEXT: |   `-EnumType 0x{{.*}} 'MyEnum3' imported
+// CHECK-NEXT: |     `-Enum 0x{{.*}}
+
+// CHECK:      |-EnumDecl 0x{{.*}} imported in ModB <undeserialized declarations>
+// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} first 0x{{.*}} imported in ModB referenced MyVal_E 'int'
+
+
+
+//--- module.modulemap
+module ModA {
+  module ModAFile1 {
+    header "ModAFile1.h"
+    export *
+  }
+  module ModAFile2 {
+    header "ModAFile2.h"
+    export *
+  }
+}
+// The goal of writing ModB is to test that ASTWriter can handle the merged AST nodes correctly.
+// For example, a stale declaration in IdResolver can cause an assertion failure while writing the identifier table.
+module ModB {
+  header "ModBFile.h"
+  export *
+}
+
+//--- ModAFile1.h
+#include "shared.h"
+
+//--- ModAFile2.h
+// Including this file, triggers loading of the module ModA with nodes coming ModAFile1.h being hidden.
+
+//--- ModBFile.h
+// ModBFile depends on ModAFile2.h only.
+#include "ModAFile2.h"
+// Including shared.h here causes Sema to merge the AST nodes from shared.h with the hidden ones from ModA.
+#include "shared.h"
+
+
+//--- source.m
+#include "ModBFile.h"
+
+int main() { return MyVal_A + MyVal_B + MyVal_C + MyVal_D + MyVal_E; }



More information about the cfe-commits mailing list