[clang] d778689 - [RFC][C++20][Modules] Fix crash when function and lambda inside loaded from different modules (#104512)

via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 10 08:15:56 PDT 2024


Author: Dmitry Polukhin
Date: 2024-09-10T16:15:50+01:00
New Revision: d778689fdc812033e7142ed87e4ee13c4997b3f9

URL: https://github.com/llvm/llvm-project/commit/d778689fdc812033e7142ed87e4ee13c4997b3f9
DIFF: https://github.com/llvm/llvm-project/commit/d778689fdc812033e7142ed87e4ee13c4997b3f9.diff

LOG: [RFC][C++20][Modules] Fix crash when function and lambda inside loaded from different modules (#104512)

Summary:
Because AST loading code is lazy and happens in unpredictable order it
could happen that function and lambda inside function can be loaded from
different modules. In this case, captured DeclRefExpr won’t match the
corresponding VarDecl inside function. In AST it looks like this:
```
FunctionDecl 0x555564f4aff0 <Conv.h:33:1, line:41:1> line:33:35 imported in ./thrift_cpp2_base.h hidden tryTo 'Expected<Tgt, const char *> ()' inline
|-also in ./folly-conv.h
`-CompoundStmt 0x555564f7cfc8 <col:43, line:41:1>
  |-DeclStmt 0x555564f7ced8 <line:34:3, col:17>
  | `-VarDecl 0x555564f7cef8 <col:3, col:16> col:7 imported in ./thrift_cpp2_base.h hidden referenced result 'Tgt' cinit
  |   `-IntegerLiteral 0x555564f7d080 <col:16> 'int' 0
  |-CallExpr 0x555564f7cea8 <line:39:3, col:76> '<dependent type>'
  | |-UnresolvedLookupExpr 0x555564f7bea0 <col:3, col:19> '<overloaded function type>' lvalue (no ADL) = 'then_' 0x555564f7bef0
  | |-CXXTemporaryObjectExpr 0x555564f7bcb0 <col:25, col:45> 'Expected<bool, int>':'folly::Expected<bool, int>' 'void () noexcept' zeroing
  | `-LambdaExpr 0x555564f7bc88 <col:48, col:75> '(lambda at Conv.h:39:48)'
  |   |-CXXRecordDecl 0x555564f76b88 <col:48> col:48 imported in ./folly-conv.h hidden implicit <undeserialized declarations> class definition
  |   | |-also in ./thrift_cpp2_base.h
  |   | `-DefinitionData lambda empty standard_layout trivially_copyable literal can_const_default_init
  |   |   |-DefaultConstructor defaulted_is_constexpr
  |   |   |-CopyConstructor simple trivial has_const_param needs_implicit implicit_has_const_param
  |   |   |-MoveConstructor exists simple trivial needs_implicit
  |   |   |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param
  |   |   |-MoveAssignment
  |   |   `-Destructor simple irrelevant trivial constexpr needs_implicit
  |   `-CompoundStmt 0x555564f7d1a8 <col:58, col:75>
  |     `-ReturnStmt 0x555564f7d198 <col:60, col:67>
  |       `-DeclRefExpr 0x555564f7d0a0 <col:67> 'Tgt' lvalue Var 0x555564f7d0c8 'result' 'Tgt' refers_to_enclosing_variable_or_capture
  `-ReturnStmt 0x555564f7bc78 <line:40:3, col:11>
    `-InitListExpr 0x555564f7bc38 <col:10, col:11> 'void'
```
This diff changes AST deserialization to load lambdas inside canonical
function declaration earlier right after the function to make sure that
their canonical decl is loaded from the same module.

Test Plan: check-clang

Added: 
    clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
    clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp

Modified: 
    clang/include/clang/Serialization/ASTReader.h
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/ASTReaderDecl.cpp
    clang/lib/Serialization/ASTWriterDecl.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 898f4392465fdf..7331bcf249266d 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1188,6 +1188,15 @@ class ASTReader
   /// once recursing loading has been completed.
   llvm::SmallVector<NamedDecl *, 16> PendingOdrMergeChecks;
 
+  /// Lambdas that need to be loaded right after the function they belong to.
+  /// It is required to have canonical declaration for lambda class from the
+  /// same module as enclosing function. This is required to correctly resolve
+  /// captured variables in the lambda. Without this, due to lazy
+  /// deserialization canonical declarations for the function and lambdas can
+  /// be from 
diff erent modules and DeclRefExprs may refer to the AST nodes
+  /// that don't exist in the function.
+  SmallVector<GlobalDeclID, 4> PendingLambdas;
+
   using DataPointers =
       std::pair<CXXRecordDecl *, struct CXXRecordDecl::DefinitionData *>;
   using ObjCInterfaceDataPointers =

diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index e5a1e20a265616..0ee53e43dff39c 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -9782,7 +9782,8 @@ void ASTReader::finishPendingActions() {
       !PendingDeducedVarTypes.empty() || !PendingIncompleteDeclChains.empty() ||
       !PendingDeclChains.empty() || !PendingMacroIDs.empty() ||
       !PendingDeclContextInfos.empty() || !PendingUpdateRecords.empty() ||
-      !PendingObjCExtensionIvarRedeclarations.empty()) {
+      !PendingObjCExtensionIvarRedeclarations.empty() ||
+      !PendingLambdas.empty()) {
     // If any identifiers with corresponding top-level declarations have
     // been loaded, load those declarations now.
     using TopLevelDeclsMap =
@@ -9927,6 +9928,11 @@ void ASTReader::finishPendingActions() {
       }
       PendingObjCExtensionIvarRedeclarations.pop_back();
     }
+
+    // Load any pendiong lambdas.
+    for (auto ID : PendingLambdas)
+      GetDecl(ID);
+    PendingLambdas.clear();
   }
 
   // At this point, all update records for loaded decls are in place, so any

diff  --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 9272e23c7da3fc..20e577404d997d 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1155,6 +1155,16 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
   for (unsigned I = 0; I != NumParams; ++I)
     Params.push_back(readDeclAs<ParmVarDecl>());
   FD->setParams(Reader.getContext(), Params);
+
+  // For the first decl add all lambdas inside for loading them later,
+  // otherwise skip them.
+  unsigned NumLambdas = Record.readInt();
+  if (FD->isFirstDecl()) {
+    for (unsigned I = 0; I != NumLambdas; ++I)
+      Reader.PendingLambdas.push_back(Record.readDeclID());
+  } else {
+    Record.skipInts(NumLambdas);
+  }
 }
 
 void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) {

diff  --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 555f6325da646b..732a6e21f340d6 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -18,6 +18,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/OpenMPClause.h"
 #include "clang/AST/PrettyDeclStackTrace.h"
+#include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/ASTRecordWriter.h"
@@ -625,6 +626,33 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) {
                                            : QualType());
 }
 
+static llvm::SmallVector<const Decl *, 2> collectLambdas(FunctionDecl *D) {
+  struct LambdaCollector : public ConstStmtVisitor<LambdaCollector> {
+    llvm::SmallVectorImpl<const Decl *> &Lambdas;
+
+    LambdaCollector(llvm::SmallVectorImpl<const Decl *> &Lambdas)
+        : Lambdas(Lambdas) {}
+
+    void VisitLambdaExpr(const LambdaExpr *E) {
+      VisitStmt(E);
+      Lambdas.push_back(E->getLambdaClass());
+    }
+
+    void VisitStmt(const Stmt *S) {
+      if (!S)
+        return;
+      for (const Stmt *Child : S->children())
+        if (Child)
+          Visit(Child);
+    }
+  };
+
+  llvm::SmallVector<const Decl *, 2> Lambdas;
+  if (D->hasBody())
+    LambdaCollector(Lambdas).VisitStmt(D->getBody());
+  return Lambdas;
+}
+
 void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
   static_assert(DeclContext::NumFunctionDeclBits == 44,
                 "You need to update the serializer after you change the "
@@ -764,6 +792,19 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
   Record.push_back(D->param_size());
   for (auto *P : D->parameters())
     Record.AddDeclRef(P);
+
+  // Store references to all lambda decls inside function to load them
+  // immediately after loading the function to make sure that canonical
+  // decls for lambdas will be from the same module.
+  if (D->isCanonicalDecl()) {
+    llvm::SmallVector<const Decl *, 2> Lambdas = collectLambdas(D);
+    Record.push_back(Lambdas.size());
+    for (const auto *L : Lambdas)
+      Record.AddDeclRef(L);
+  } else {
+    Record.push_back(0);
+  }
+
   Code = serialization::DECL_FUNCTION;
 }
 
@@ -2239,6 +2280,7 @@ getFunctionDeclAbbrev(serialization::DeclCode Code) {
   //
   // This is:
   //         NumParams and Params[] from FunctionDecl, and
+  //         NumLambdas, Lambdas[] from FunctionDecl, and
   //         NumOverriddenMethods, OverriddenMethods[] from CXXMethodDecl.
   //
   //  Add an AbbrevOp for 'size then elements' and use it here.

diff  --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
new file mode 100644
index 00000000000000..80844a58ad825a
--- /dev/null
+++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
@@ -0,0 +1,76 @@
+// RUN: rm -fR %t
+// RUN: split-file %s %t
+// RUN: cd %t
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized folly-conv.h
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized thrift_cpp2_base.h
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized -fmodule-file=folly-conv.pcm -fmodule-file=thrift_cpp2_base.pcm logger_base.h
+
+//--- Conv.h
+#pragma once
+
+template <typename _Tp, typename _Up = _Tp&&>
+_Up __declval(int);
+
+template <typename _Tp>
+auto declval() noexcept -> decltype(__declval<_Tp>(0));
+
+namespace folly {
+
+template <class Value, class Error>
+struct Expected {
+  template <class Yes>
+  auto thenOrThrow() -> decltype(declval<Value&>()) {
+    return 1;
+  }
+};
+
+struct ExpectedHelper {
+  template <class Error, class T>
+  static constexpr Expected<T, Error> return_(T) {
+    return Expected<T, Error>();
+  }
+
+  template <class This, class Fn, class E = int, class T = ExpectedHelper>
+  static auto then_(This&&, Fn&&)
+      -> decltype(T::template return_<E>((declval<Fn>()(true), 0))) {
+    return Expected<int, int>();
+  }
+};
+
+template <class Tgt>
+inline Expected<Tgt, const char*> tryTo() {
+  Tgt result = 0;
+  // In build with asserts:
+  // clang/lib/Sema/SemaTemplateInstantiate.cpp: llvm::PointerUnion<Decl *, LocalInstantiationScope::DeclArgumentPack *> *clang::LocalInstantiationScope::findInstantiationOf(const Decl *): Assertion `isa<LabelDecl>(D) && "declaration not instantiated in this scope"' failed.
+  // In release build compilation error on the line below inside lambda:
+  // error: variable 'result' is uninitialized when used here [-Werror,-Wuninitialized]
+  ExpectedHelper::then_(Expected<bool, int>(), [&](bool) { return result; });
+  return {};
+}
+
+} // namespace folly
+
+inline void bar() {
+  folly::tryTo<int>();
+}
+// expected-no-diagnostics
+
+//--- folly-conv.h
+#pragma once
+#include "Conv.h"
+// expected-no-diagnostics
+
+//--- thrift_cpp2_base.h
+#pragma once
+#include "Conv.h"
+// expected-no-diagnostics
+
+//--- logger_base.h
+#pragma once
+import "folly-conv.h";
+import "thrift_cpp2_base.h";
+
+inline void foo() {
+  folly::tryTo<unsigned>();
+}
+// expected-no-diagnostics

diff  --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp
new file mode 100644
index 00000000000000..5b1a904e928a68
--- /dev/null
+++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp
@@ -0,0 +1,30 @@
+// RUN: rm -fR %t
+// RUN: split-file %s %t
+// RUN: cd %t
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header header.h
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=header.pcm main.cpp
+
+//--- header.h
+template <typename T>
+void f(T) {}
+
+class A {
+  virtual ~A();
+};
+
+inline A::~A() {
+  f([](){});
+}
+
+struct B {
+  void g() {
+    f([](){
+      [](){};
+    });
+  }
+};
+// expected-no-diagnostics
+
+//--- main.cpp
+import "header.h";
+// expected-no-diagnostics


        


More information about the cfe-commits mailing list