[clang] [C++20][Modules] Fix crash when function and lambda inside loaded from different modules (PR #109167)

Dmitry Polukhin via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 19 08:48:44 PDT 2024


https://github.com/dmpolukhin updated https://github.com/llvm/llvm-project/pull/109167

>From 72b43bd2f392a009187e1cdd90627691a4017707 Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Wed, 18 Sep 2024 09:02:23 -0700
Subject: [PATCH 1/3] [C++20][Modules] Fix crash when function and lambda
 inside loaded from different modules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
Because AST loading code is lazy and happens in unpredictable order, it is
possible that a function and lambda inside the function can be loaded from
different modules. As a result, the captured DeclRefExpr won’t match the
corresponding VarDecl inside the function. This situation is reflected in the
AST as follows:

```
FunctionDecl 0x555564f4aff0 <Conv.h:33:1, line:41:1> line:33:35 imported in ./thrift_cpp2_base.h hidden tryTo 'Expected<Tgt, const char *> ()' inline
|-also in ./folly-conv.h
`-CompoundStmt 0x555564f7cfc8 <col:43, line:41:1>
  |-DeclStmt 0x555564f7ced8 <line:34:3, col:17>
  | `-VarDecl 0x555564f7cef8 <col:3, col:16> col:7 imported in ./thrift_cpp2_base.h hidden referenced result 'Tgt' cinit
  |   `-IntegerLiteral 0x555564f7d080 <col:16> 'int' 0
  |-CallExpr 0x555564f7cea8 <line:39:3, col:76> '<dependent type>'
  | |-UnresolvedLookupExpr 0x555564f7bea0 <col:3, col:19> '<overloaded function type>' lvalue (no ADL) = 'then_' 0x555564f7bef0
  | |-CXXTemporaryObjectExpr 0x555564f7bcb0 <col:25, col:45> 'Expected<bool, int>':'folly::Expected<bool, int>' 'void () noexcept' zeroing
  | `-LambdaExpr 0x555564f7bc88 <col:48, col:75> '(lambda at Conv.h:39:48)'
  |   |-CXXRecordDecl 0x555564f76b88 <col:48> col:48 imported in ./folly-conv.h hidden implicit <undeserialized declarations> class definition
  |   | |-also in ./thrift_cpp2_base.h
  |   | `-DefinitionData lambda empty standard_layout trivially_copyable literal can_const_default_init
  |   |   |-DefaultConstructor defaulted_is_constexpr
  |   |   |-CopyConstructor simple trivial has_const_param needs_implicit implicit_has_const_param
  |   |   |-MoveConstructor exists simple trivial needs_implicit
  |   |   |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param
  |   |   |-MoveAssignment
  |   |   `-Destructor simple irrelevant trivial constexpr needs_implicit
  |   `-CompoundStmt 0x555564f7d1a8 <col:58, col:75>
  |     `-ReturnStmt 0x555564f7d198 <col:60, col:67>
  |       `-DeclRefExpr 0x555564f7d0a0 <col:67> 'Tgt' lvalue Var 0x555564f7d0c8 'result' 'Tgt' refers_to_enclosing_variable_or_capture
  `-ReturnStmt 0x555564f7bc78 <line:40:3, col:11>
    `-InitListExpr 0x555564f7bc38 <col:10, col:11> 'void'
```

This diff modifies the AST deserialization process to load lambdas within the
canonical function declaration sooner, immediately following the function,
ensuring that they are loaded from the same module.

Re-land https://github.com/llvm/llvm-project/pull/104512
Added test case that caused crash due to multiple enclosed lambdas
deserialization.

Test Plan: check-clang
---
 clang/include/clang/Serialization/ASTReader.h |  9 +++
 clang/lib/Serialization/ASTReader.cpp         | 14 +++-
 clang/lib/Serialization/ASTReaderDecl.cpp     | 10 +++
 clang/lib/Serialization/ASTWriterDecl.cpp     | 42 ++++++++++
 ...rash-instantiated-in-scope-cxx-modules.cpp | 76 +++++++++++++++++++
 ...ash-instantiated-in-scope-cxx-modules2.cpp | 30 ++++++++
 ...ash-instantiated-in-scope-cxx-modules3.cpp | 26 +++++++
 7 files changed, 206 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
 create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp
 create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp

diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 898f4392465fdf..1b3812e7fd4523 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1188,6 +1188,15 @@ class ASTReader
   /// once recursing loading has been completed.
   llvm::SmallVector<NamedDecl *, 16> PendingOdrMergeChecks;
 
+  /// Lambdas that need to be loaded immediately after the function they belong
+  /// to. It is necessary to have a canonical declaration for the lambda class
+  /// from the same module as the enclosing function. This requirement ensures
+  /// the correct resolution of captured variables in the lambda. Without this,
+  /// due to lazy deserialization, canonical declarations for the function and
+  /// lambdas can be from different modules, and DeclRefExprs may refer to AST
+  /// nodes that do not exist in the function.
+  SmallVector<GlobalDeclID, 4> PendingLambdas;
+
   using DataPointers =
       std::pair<CXXRecordDecl *, struct CXXRecordDecl::DefinitionData *>;
   using ObjCInterfaceDataPointers =
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 7efcc81e194d95..d7bad6bbad0c90 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -9777,7 +9777,8 @@ void ASTReader::finishPendingActions() {
       !PendingDeducedVarTypes.empty() || !PendingIncompleteDeclChains.empty() ||
       !PendingDeclChains.empty() || !PendingMacroIDs.empty() ||
       !PendingDeclContextInfos.empty() || !PendingUpdateRecords.empty() ||
-      !PendingObjCExtensionIvarRedeclarations.empty()) {
+      !PendingObjCExtensionIvarRedeclarations.empty() ||
+      !PendingLambdas.empty()) {
     // If any identifiers with corresponding top-level declarations have
     // been loaded, load those declarations now.
     using TopLevelDeclsMap =
@@ -9922,6 +9923,17 @@ void ASTReader::finishPendingActions() {
       }
       PendingObjCExtensionIvarRedeclarations.pop_back();
     }
+
+    // Load any pending lambdas. During the deserialization of pending lambdas,
+    // more lambdas can be discovered, so swap the current PendingLambdas with a
+    // local empty vector. Newly discovered lambdas will be deserialized in the
+    // next iteration.
+    if (!PendingLambdas.empty()) {
+      SmallVector<GlobalDeclID, 4> DeclIDs;
+      DeclIDs.swap(PendingLambdas);
+      for (auto ID : DeclIDs)
+        GetDecl(ID);
+    }
   }
 
   // At this point, all update records for loaded decls are in place, so any
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 9272e23c7da3fc..20e577404d997d 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1155,6 +1155,16 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
   for (unsigned I = 0; I != NumParams; ++I)
     Params.push_back(readDeclAs<ParmVarDecl>());
   FD->setParams(Reader.getContext(), Params);
+
+  // For the first decl add all lambdas inside for loading them later,
+  // otherwise skip them.
+  unsigned NumLambdas = Record.readInt();
+  if (FD->isFirstDecl()) {
+    for (unsigned I = 0; I != NumLambdas; ++I)
+      Reader.PendingLambdas.push_back(Record.readDeclID());
+  } else {
+    Record.skipInts(NumLambdas);
+  }
 }
 
 void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) {
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 555f6325da646b..732a6e21f340d6 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -18,6 +18,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/OpenMPClause.h"
 #include "clang/AST/PrettyDeclStackTrace.h"
+#include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/ASTRecordWriter.h"
@@ -625,6 +626,33 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) {
                                            : QualType());
 }
 
+static llvm::SmallVector<const Decl *, 2> collectLambdas(FunctionDecl *D) {
+  struct LambdaCollector : public ConstStmtVisitor<LambdaCollector> {
+    llvm::SmallVectorImpl<const Decl *> &Lambdas;
+
+    LambdaCollector(llvm::SmallVectorImpl<const Decl *> &Lambdas)
+        : Lambdas(Lambdas) {}
+
+    void VisitLambdaExpr(const LambdaExpr *E) {
+      VisitStmt(E);
+      Lambdas.push_back(E->getLambdaClass());
+    }
+
+    void VisitStmt(const Stmt *S) {
+      if (!S)
+        return;
+      for (const Stmt *Child : S->children())
+        if (Child)
+          Visit(Child);
+    }
+  };
+
+  llvm::SmallVector<const Decl *, 2> Lambdas;
+  if (D->hasBody())
+    LambdaCollector(Lambdas).VisitStmt(D->getBody());
+  return Lambdas;
+}
+
 void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
   static_assert(DeclContext::NumFunctionDeclBits == 44,
                 "You need to update the serializer after you change the "
@@ -764,6 +792,19 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
   Record.push_back(D->param_size());
   for (auto *P : D->parameters())
     Record.AddDeclRef(P);
+
+  // Store references to all lambda decls inside function to load them
+  // immediately after loading the function to make sure that canonical
+  // decls for lambdas will be from the same module.
+  if (D->isCanonicalDecl()) {
+    llvm::SmallVector<const Decl *, 2> Lambdas = collectLambdas(D);
+    Record.push_back(Lambdas.size());
+    for (const auto *L : Lambdas)
+      Record.AddDeclRef(L);
+  } else {
+    Record.push_back(0);
+  }
+
   Code = serialization::DECL_FUNCTION;
 }
 
@@ -2239,6 +2280,7 @@ getFunctionDeclAbbrev(serialization::DeclCode Code) {
   //
   // This is:
   //         NumParams and Params[] from FunctionDecl, and
+  //         NumLambdas, Lambdas[] from FunctionDecl, and
   //         NumOverriddenMethods, OverriddenMethods[] from CXXMethodDecl.
   //
   //  Add an AbbrevOp for 'size then elements' and use it here.
diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
new file mode 100644
index 00000000000000..80844a58ad825a
--- /dev/null
+++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
@@ -0,0 +1,76 @@
+// RUN: rm -fR %t
+// RUN: split-file %s %t
+// RUN: cd %t
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized folly-conv.h
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized thrift_cpp2_base.h
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized -fmodule-file=folly-conv.pcm -fmodule-file=thrift_cpp2_base.pcm logger_base.h
+
+//--- Conv.h
+#pragma once
+
+template <typename _Tp, typename _Up = _Tp&&>
+_Up __declval(int);
+
+template <typename _Tp>
+auto declval() noexcept -> decltype(__declval<_Tp>(0));
+
+namespace folly {
+
+template <class Value, class Error>
+struct Expected {
+  template <class Yes>
+  auto thenOrThrow() -> decltype(declval<Value&>()) {
+    return 1;
+  }
+};
+
+struct ExpectedHelper {
+  template <class Error, class T>
+  static constexpr Expected<T, Error> return_(T) {
+    return Expected<T, Error>();
+  }
+
+  template <class This, class Fn, class E = int, class T = ExpectedHelper>
+  static auto then_(This&&, Fn&&)
+      -> decltype(T::template return_<E>((declval<Fn>()(true), 0))) {
+    return Expected<int, int>();
+  }
+};
+
+template <class Tgt>
+inline Expected<Tgt, const char*> tryTo() {
+  Tgt result = 0;
+  // In build with asserts:
+  // clang/lib/Sema/SemaTemplateInstantiate.cpp: llvm::PointerUnion<Decl *, LocalInstantiationScope::DeclArgumentPack *> *clang::LocalInstantiationScope::findInstantiationOf(const Decl *): Assertion `isa<LabelDecl>(D) && "declaration not instantiated in this scope"' failed.
+  // In release build compilation error on the line below inside lambda:
+  // error: variable 'result' is uninitialized when used here [-Werror,-Wuninitialized]
+  ExpectedHelper::then_(Expected<bool, int>(), [&](bool) { return result; });
+  return {};
+}
+
+} // namespace folly
+
+inline void bar() {
+  folly::tryTo<int>();
+}
+// expected-no-diagnostics
+
+//--- folly-conv.h
+#pragma once
+#include "Conv.h"
+// expected-no-diagnostics
+
+//--- thrift_cpp2_base.h
+#pragma once
+#include "Conv.h"
+// expected-no-diagnostics
+
+//--- logger_base.h
+#pragma once
+import "folly-conv.h";
+import "thrift_cpp2_base.h";
+
+inline void foo() {
+  folly::tryTo<unsigned>();
+}
+// expected-no-diagnostics
diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp
new file mode 100644
index 00000000000000..5b1a904e928a68
--- /dev/null
+++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp
@@ -0,0 +1,30 @@
+// RUN: rm -fR %t
+// RUN: split-file %s %t
+// RUN: cd %t
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header header.h
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=header.pcm main.cpp
+
+//--- header.h
+template <typename T>
+void f(T) {}
+
+class A {
+  virtual ~A();
+};
+
+inline A::~A() {
+  f([](){});
+}
+
+struct B {
+  void g() {
+    f([](){
+      [](){};
+    });
+  }
+};
+// expected-no-diagnostics
+
+//--- main.cpp
+import "header.h";
+// expected-no-diagnostics
diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp
new file mode 100644
index 00000000000000..646ff9f745710b
--- /dev/null
+++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 %s -std=c++11 -emit-pch -o %t
+// RUN: %clang_cc1 %s -std=c++11 -include-pch %t -fsyntax-only -verify
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// No crash or assertion failure on multiple nested lambdas deserialization.
+template <typename T>
+void b() {
+  [] {
+    []{
+      []{
+        []{
+          []{
+          }();
+        }();
+      }();
+    }();
+  }();
+}
+
+void foo() {
+  b<int>();
+}
+#endif

>From 4d95a90a956dba62e04ed168d7f3dc9ed73959ef Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Thu, 19 Sep 2024 08:11:37 -0700
Subject: [PATCH 2/3] Instead of StmtVisitor use DeclContext visitor for lambda
 collection

---
 clang/lib/Serialization/ASTWriterDecl.cpp | 33 ++++++++---------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 732a6e21f340d6..8f62b1a907ca83 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -626,30 +626,19 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) {
                                            : QualType());
 }
 
-static llvm::SmallVector<const Decl *, 2> collectLambdas(FunctionDecl *D) {
-  struct LambdaCollector : public ConstStmtVisitor<LambdaCollector> {
-    llvm::SmallVectorImpl<const Decl *> &Lambdas;
-
-    LambdaCollector(llvm::SmallVectorImpl<const Decl *> &Lambdas)
-        : Lambdas(Lambdas) {}
-
-    void VisitLambdaExpr(const LambdaExpr *E) {
-      VisitStmt(E);
-      Lambdas.push_back(E->getLambdaClass());
-    }
-
-    void VisitStmt(const Stmt *S) {
-      if (!S)
-        return;
-      for (const Stmt *Child : S->children())
-        if (Child)
-          Visit(Child);
+// Recursively collects all lambda declarations within the function declaration.
+static llvm::SmallVector<const Decl *, 2> collectLambdas(FunctionDecl *FD) {
+  llvm::SmallVector<const Decl *, 2> Lambdas;
+  std::function<void(DeclContext *)> visitor = [&visitor,
+                                                &Lambdas](DeclContext *DC) {
+    for (Decl *D : DC->decls()) {
+      if (isa<DeclContext>(D))
+        visitor(cast<DeclContext>(D));
+      if (auto *RD = dyn_cast<CXXRecordDecl>(D); RD && RD->isLambda())
+        Lambdas.push_back(RD);
     }
   };
-
-  llvm::SmallVector<const Decl *, 2> Lambdas;
-  if (D->hasBody())
-    LambdaCollector(Lambdas).VisitStmt(D->getBody());
+  visitor(FD);
   return Lambdas;
 }
 

>From 60fd20129304e184be69599c3e4f85ddc779ae29 Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Thu, 19 Sep 2024 08:46:15 -0700
Subject: [PATCH 3/3] To be on the safe side, assume that decls may be null

---
 clang/lib/Serialization/ASTWriterDecl.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 8f62b1a907ca83..36dad89edcb1c0 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -632,6 +632,8 @@ static llvm::SmallVector<const Decl *, 2> collectLambdas(FunctionDecl *FD) {
   std::function<void(DeclContext *)> visitor = [&visitor,
                                                 &Lambdas](DeclContext *DC) {
     for (Decl *D : DC->decls()) {
+      if (!D)
+        continue;
       if (isa<DeclContext>(D))
         visitor(cast<DeclContext>(D));
       if (auto *RD = dyn_cast<CXXRecordDecl>(D); RD && RD->isLambda())



More information about the cfe-commits mailing list