[clang] [C++20][Modules] Load function body from the module that gives canonical decl (PR #111992)

Dmitry Polukhin via cfe-commits cfe-commits at lists.llvm.org
Thu Dec 5 07:43:00 PST 2024


https://github.com/dmpolukhin updated https://github.com/llvm/llvm-project/pull/111992

>From 3eaaa7d70f4b57cc13bd00bd3a3a921f7914d599 Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Fri, 11 Oct 2024 05:35:18 -0700
Subject: [PATCH 1/4] [C++20][Modules] Load function body from the module that
 gives cannonical decl

Summary:
Fix crash from reproducer provided in https://github.com/llvm/llvm-project/pull/109167#issuecomment-2405289565

Test Plan: TBD
---
 clang/lib/Serialization/ASTReader.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index ec85fad3389a1c..4184c32ea9eab5 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -10062,15 +10062,18 @@ void ASTReader::finishPendingActions() {
       // For a function defined inline within a class template, force the
       // canonical definition to be the one inside the canonical definition of
       // the template. This ensures that we instantiate from a correct view
-      // of the template.
+      // of the template. This behaviour seems to be important only for inline
+      // friend functions. For normal member functions, it might results in
+      // selecting canonical decl from module A but body from module B.
       //
       // Sadly we can't do this more generally: we can't be sure that all
       // copies of an arbitrary class definition will have the same members
       // defined (eg, some member functions may not be instantiated, and some
       // special members may or may not have been implicitly defined).
-      if (auto *RD = dyn_cast<CXXRecordDecl>(FD->getLexicalParent()))
-        if (RD->isDependentContext() && !RD->isThisDeclarationADefinition())
-          continue;
+      if (FD->getFriendObjectKind())
+        if (auto *RD = dyn_cast<CXXRecordDecl>(FD->getLexicalParent()))
+          if (RD->isDependentContext() && !RD->isThisDeclarationADefinition())
+            continue;
 
       // FIXME: Check for =delete/=default?
       const FunctionDecl *Defn = nullptr;

>From d4e6cd0e074a5eaaaf09a3caafba0986ebb49949 Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Fri, 11 Oct 2024 09:55:35 -0700
Subject: [PATCH 2/4] Add test case

---
 ...ash-instantiated-in-scope-cxx-modules4.cpp | 110 ++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules4.cpp

diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules4.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules4.cpp
new file mode 100644
index 00000000000000..087eb135dc5f53
--- /dev/null
+++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules4.cpp
@@ -0,0 +1,110 @@
+// RUN: rm -fR %t
+// RUN: split-file %s %t
+// RUN: cd %t
+// RUN: %clang_cc1 -verify -std=c++20 -x c++ -fmodule-map-file=modules.map -fmodule-name=foo1 -emit-module modules.map -o foo1.pcm
+// RUN: %clang_cc1 -verify -std=c++20 -x c++ -fmodule-map-file=modules.map -fmodule-name=foo2 -emit-module modules.map -o foo2.pcm
+// RUN: %clang_cc1 -verify -std=c++20 -x c++ -fmodule-map-file=modules.map -fmodule-file=foo1.pcm -fmodule-file=foo2.pcm server.cc
+
+//--- functional
+#pragma once
+
+namespace std {
+template <class> class function {};
+} // namespace std
+
+//--- foo.h
+#pragma once
+
+class MethodHandler {
+ public:
+  virtual ~MethodHandler() {}
+  struct HandlerParameter {
+    HandlerParameter();
+  };
+  virtual void RunHandler(const HandlerParameter &param);
+};
+
+template <class RequestType, class ResponseType>
+class CallbackUnaryHandler : public MethodHandler {
+ public:
+  explicit CallbackUnaryHandler();
+
+  void RunHandler(const HandlerParameter &param) final {
+    void *call = nullptr;
+    (void)[call](bool){};
+  }
+};
+
+//--- foo1.h
+// expected-no-diagnostics
+#pragma once
+
+#include "functional"
+
+#include "foo.h"
+
+class A;
+
+class ClientAsyncResponseReaderHelper {
+   public:
+      using t = std::function<void(A)>;
+        static void SetupRequest(t finish);
+};
+
+//--- foo2.h
+// expected-no-diagnostics
+#pragma once
+
+#include "foo.h"
+
+template <class BaseClass>
+class a : public BaseClass {
+ public:
+  a() { [[maybe_unused]] CallbackUnaryHandler<int, int> a; }
+};
+
+//--- modules.map
+module "foo" {
+  export *
+  module "foo.h" {
+    export *
+    textual header "foo.h"
+  }
+}
+
+module "foo1" {
+  export *
+  module "foo1.h" {
+    export *
+    header "foo1.h"
+  }
+
+  use "foo"
+}
+
+module "foo2" {
+  export *
+  module "foo2.h" {
+    export *
+    header "foo2.h"
+  }
+
+  use "foo"
+}
+
+//--- server.cc
+// expected-no-diagnostics
+#include "functional"
+
+#include "foo1.h"
+#include "foo2.h"
+
+std::function<void()> on_emit;
+
+template <class RequestType, class ResponseType>
+class CallbackUnaryHandler;
+
+class s {};
+class hs final : public a<s> {
+  explicit hs() {}
+};

>From 51a60128fa9ea445228c12468caabc317e6adeb9 Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Thu, 17 Oct 2024 06:44:23 -0700
Subject: [PATCH 3/4] Use mechanism for deserializing related decl

---
 .../include/clang/Serialization/ASTBitCodes.h |  8 +++----
 clang/include/clang/Serialization/ASTReader.h | 19 +++++++--------
 clang/include/clang/Serialization/ASTWriter.h | 11 ++++-----
 clang/lib/Serialization/ASTReader.cpp         | 24 ++++---------------
 clang/lib/Serialization/ASTReaderDecl.cpp     |  9 ++++---
 clang/lib/Serialization/ASTWriter.cpp         | 20 ++++++++--------
 clang/lib/Serialization/ASTWriterDecl.cpp     | 13 +++++++++-
 7 files changed, 46 insertions(+), 58 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index fd834c14ce790f..7a981b2079d683 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -724,11 +724,9 @@ enum ASTRecordTypes {
   /// Record code for vtables to emit.
   VTABLES_TO_EMIT = 70,
 
-  /// Record code for the FunctionDecl to lambdas mapping. These lambdas have to
-  /// be loaded right after the function they belong to. It is required to have
-  /// canonical declaration for the lambda class from the same module as
-  /// enclosing function.
-  FUNCTION_DECL_TO_LAMBDAS_MAP = 71,
+  /// Record code for related declarations that have to be deserialized together
+  /// from the same module.
+  RELATED_DECLS_MAP = 71,
 
   /// Record code for Sema's vector of functions/blocks with effects to
   /// be verified.
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index f739fe688c110d..4c4ea03482f06a 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -534,17 +534,14 @@ class ASTReader
   /// namespace as if it is not delayed.
   DelayedNamespaceOffsetMapTy DelayedNamespaceOffsetMap;
 
-  /// Mapping from FunctionDecl IDs to the corresponding lambda IDs.
-  ///
-  /// These lambdas have to be loaded right after the function they belong to.
-  /// It is required to have canonical declaration for lambda class from the
-  /// same module as enclosing function. This is required to correctly resolve
-  /// captured variables in the lambda. Without this, due to lazy
-  /// deserialization, canonical declarations for the function and lambdas can
-  /// be selected from different modules and DeclRefExprs may refer to the AST
-  /// nodes that don't exist in the function.
-  llvm::DenseMap<GlobalDeclID, SmallVector<GlobalDeclID, 4>>
-      FunctionToLambdasMap;
+  /// Mapping from main decl ID to the related decls IDs.
+  ///
+  /// These related decls have to be loaded right after the main decl.
+  /// It is required to have canonical declaration for related decls from the
+  /// same module as the enclosing main decl. Without this, due to lazy
+  /// deserialization, canonical declarations for the main decl and related can
+  /// be selected from different modules.
+  llvm::DenseMap<GlobalDeclID, SmallVector<GlobalDeclID, 4>> RelatedDeclsMap;
 
   struct PendingUpdateRecord {
     Decl *D;
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index e418fdea44a0a9..a6d2a84ad91aed 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -230,13 +230,12 @@ class ASTWriter : public ASTDeserializationListener,
   /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`.
   llvm::SmallPtrSet<const Decl *, 32> PredefinedDecls;
 
-  /// Mapping from FunctionDecl ID to the list of lambda IDs inside the
-  /// function.
+  /// Mapping from the main decl to related decls inside the main decls.
   ///
-  /// These lambdas have to be loaded right after the function they belong to.
-  /// In order to have canonical declaration for lambda class from the same
-  /// module as enclosing function during deserialization.
-  llvm::DenseMap<LocalDeclID, SmallVector<LocalDeclID, 4>> FunctionToLambdasMap;
+  /// These related decls have to be loaded right after the main decl they
+  /// belong to. In order to have canonical declaration for related decls from
+  /// the same module as the main decl during deserialization.
+  llvm::DenseMap<LocalDeclID, SmallVector<LocalDeclID, 4>> RelatedDeclsMap;
 
   /// Offset of each declaration in the bitstream, indexed by
   /// the declaration's ID.
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 4184c32ea9eab5..ce7bf0342011de 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3855,14 +3855,14 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       break;
     }
 
-    case FUNCTION_DECL_TO_LAMBDAS_MAP:
+    case RELATED_DECLS_MAP:
       for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/) {
         GlobalDeclID ID = ReadDeclID(F, Record, I);
-        auto &Lambdas = FunctionToLambdasMap[ID];
+        auto &RelatedDecls = RelatedDeclsMap[ID];
         unsigned NN = Record[I++];
-        Lambdas.reserve(NN);
+        RelatedDecls.reserve(NN);
         for (unsigned II = 0; II < NN; II++)
-          Lambdas.push_back(ReadDeclID(F, Record, I));
+          RelatedDecls.push_back(ReadDeclID(F, Record, I));
       }
       break;
 
@@ -10059,22 +10059,6 @@ void ASTReader::finishPendingActions() {
                                PBEnd = PendingBodies.end();
        PB != PBEnd; ++PB) {
     if (FunctionDecl *FD = dyn_cast<FunctionDecl>(PB->first)) {
-      // For a function defined inline within a class template, force the
-      // canonical definition to be the one inside the canonical definition of
-      // the template. This ensures that we instantiate from a correct view
-      // of the template. This behaviour seems to be important only for inline
-      // friend functions. For normal member functions, it might results in
-      // selecting canonical decl from module A but body from module B.
-      //
-      // Sadly we can't do this more generally: we can't be sure that all
-      // copies of an arbitrary class definition will have the same members
-      // defined (eg, some member functions may not be instantiated, and some
-      // special members may or may not have been implicitly defined).
-      if (FD->getFriendObjectKind())
-        if (auto *RD = dyn_cast<CXXRecordDecl>(FD->getLexicalParent()))
-          if (RD->isDependentContext() && !RD->isThisDeclarationADefinition())
-            continue;
-
       // FIXME: Check for =delete/=default?
       const FunctionDecl *Defn = nullptr;
       if (!getContext().getLangOpts().Modules || !FD->hasBody(Defn)) {
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 6ece3ba7af9f4b..01301f2573d2bb 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -4360,13 +4360,12 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
     DC->setHasExternalVisibleStorage(true);
   }
 
-  // Load any pending lambdas for the function.
-  if (auto *FD = dyn_cast<FunctionDecl>(D); FD && FD->isCanonicalDecl()) {
-    if (auto IT = FunctionToLambdasMap.find(ID);
-        IT != FunctionToLambdasMap.end()) {
+  // Load any pending related decls.
+  if (D->isCanonicalDecl()) {
+    if (auto IT = RelatedDeclsMap.find(ID); IT != RelatedDeclsMap.end()) {
       for (auto LID : IT->second)
         GetDecl(LID);
-      FunctionToLambdasMap.erase(IT);
+      RelatedDeclsMap.erase(IT);
     }
   }
 }
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index a52d59c61c4ce6..c33d74d43709de 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -921,7 +921,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(PENDING_IMPLICIT_INSTANTIATIONS);
   RECORD(UPDATE_VISIBLE);
   RECORD(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD);
-  RECORD(FUNCTION_DECL_TO_LAMBDAS_MAP);
+  RECORD(RELATED_DECLS_MAP);
   RECORD(DECL_UPDATE_OFFSETS);
   RECORD(DECL_UPDATES);
   RECORD(CUDA_SPECIAL_DECL_REFS);
@@ -5783,23 +5783,23 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) {
     Stream.EmitRecord(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD,
                       DelayedNamespaceRecord);
 
-  if (!FunctionToLambdasMap.empty()) {
-    // TODO: on disk hash table for function to lambda mapping might be more
+  if (!RelatedDeclsMap.empty()) {
+    // TODO: on disk hash table for related decls mapping might be more
     // efficent becuase it allows lazy deserialization.
-    RecordData FunctionToLambdasMapRecord;
-    for (const auto &Pair : FunctionToLambdasMap) {
-      FunctionToLambdasMapRecord.push_back(Pair.first.getRawValue());
-      FunctionToLambdasMapRecord.push_back(Pair.second.size());
+    RecordData RelatedDeclsMapRecord;
+    for (const auto &Pair : RelatedDeclsMap) {
+      RelatedDeclsMapRecord.push_back(Pair.first.getRawValue());
+      RelatedDeclsMapRecord.push_back(Pair.second.size());
       for (const auto &Lambda : Pair.second)
-        FunctionToLambdasMapRecord.push_back(Lambda.getRawValue());
+        RelatedDeclsMapRecord.push_back(Lambda.getRawValue());
     }
 
     auto Abv = std::make_shared<llvm::BitCodeAbbrev>();
-    Abv->Add(llvm::BitCodeAbbrevOp(FUNCTION_DECL_TO_LAMBDAS_MAP));
+    Abv->Add(llvm::BitCodeAbbrevOp(RELATED_DECLS_MAP));
     Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array));
     Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
     unsigned FunctionToLambdaMapAbbrev = Stream.EmitAbbrev(std::move(Abv));
-    Stream.EmitRecord(FUNCTION_DECL_TO_LAMBDAS_MAP, FunctionToLambdasMapRecord,
+    Stream.EmitRecord(RELATED_DECLS_MAP, RelatedDeclsMapRecord,
                       FunctionToLambdaMapAbbrev);
   }
 
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index ad357e30d57529..be8e074dac04bf 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -760,6 +760,17 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
     }
   }
 
+  if (D->getFriendObjectKind()) {
+    // For a function defined inline within a class template, we have to force
+    // the canonical definition to be the one inside the canonical definition of
+    // the template. Remember this relation to deserialize them together.
+    if (auto *RD = dyn_cast<CXXRecordDecl>(D->getLexicalParent()))
+      if (RD->isDependentContext() && RD->isThisDeclarationADefinition()) {
+        Writer.RelatedDeclsMap[Writer.GetDeclRef(RD)].push_back(
+            Writer.GetDeclRef(D));
+      }
+  }
+
   Record.push_back(D->param_size());
   for (auto *P : D->parameters())
     Record.AddDeclRef(P);
@@ -1525,7 +1536,7 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) {
     // For lambdas inside canonical FunctionDecl remember the mapping.
     if (auto FD = llvm::dyn_cast_or_null<FunctionDecl>(D->getDeclContext());
         FD && FD->isCanonicalDecl()) {
-      Writer.FunctionToLambdasMap[Writer.GetDeclRef(FD)].push_back(
+      Writer.RelatedDeclsMap[Writer.GetDeclRef(FD)].push_back(
           Writer.GetDeclRef(D));
     }
   } else {

>From d920dd36f2d55239d509ddbf78df22e0de6bb571 Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Thu, 5 Dec 2024 07:38:18 -0800
Subject: [PATCH 4/4] Use canonical decl as a template for instantiation

---
 clang/lib/Sema/SemaTemplateInstantiateDecl.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 39f8ece62ed5c2..299905f3a4da9a 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1976,14 +1976,16 @@ TemplateDeclInstantiator::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
   if (!InstParams)
     return nullptr;
 
+  // Use canonical templated decl because only canonical decl has body
+  // if declarations were merged during loading from modules.
+  FunctionDecl *TemplatedDecl = D->getTemplatedDecl()->getCanonicalDecl();
   FunctionDecl *Instantiated = nullptr;
-  if (CXXMethodDecl *DMethod = dyn_cast<CXXMethodDecl>(D->getTemplatedDecl()))
-    Instantiated = cast_or_null<FunctionDecl>(VisitCXXMethodDecl(DMethod,
-                                                                 InstParams));
+  if (CXXMethodDecl *DMethod = dyn_cast<CXXMethodDecl>(TemplatedDecl))
+    Instantiated =
+        cast_or_null<FunctionDecl>(VisitCXXMethodDecl(DMethod, InstParams));
   else
-    Instantiated = cast_or_null<FunctionDecl>(VisitFunctionDecl(
-                                                          D->getTemplatedDecl(),
-                                                                InstParams));
+    Instantiated = cast_or_null<FunctionDecl>(
+        VisitFunctionDecl(TemplatedDecl, InstParams));
 
   if (!Instantiated)
     return nullptr;



More information about the cfe-commits mailing list