[clang] 2ccac07 - [C++20][Modules] Fix crash when function and lambda inside loaded from different modules (#109167)

via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 25 00:31:54 PDT 2024


Author: Dmitry Polukhin
Date: 2024-09-25T08:31:49+01:00
New Revision: 2ccac07bf22d17029d4437b0a727dd55c8c86d56

URL: https://github.com/llvm/llvm-project/commit/2ccac07bf22d17029d4437b0a727dd55c8c86d56
DIFF: https://github.com/llvm/llvm-project/commit/2ccac07bf22d17029d4437b0a727dd55c8c86d56.diff

LOG: [C++20][Modules] Fix crash when function and lambda inside loaded from different modules (#109167)

Summary:
Because AST loading code is lazy and happens in unpredictable order, it
is possible that a function and lambda inside the function can be loaded
from different modules. As a result, the captured DeclRefExpr won’t
match the corresponding VarDecl inside the function. This situation is
reflected in the AST as follows:

```
FunctionDecl 0x555564f4aff0 <Conv.h:33:1, line:41:1> line:33:35 imported in ./thrift_cpp2_base.h hidden tryTo 'Expected<Tgt, const char *> ()' inline
|-also in ./folly-conv.h
`-CompoundStmt 0x555564f7cfc8 <col:43, line:41:1>
  |-DeclStmt 0x555564f7ced8 <line:34:3, col:17>
  | `-VarDecl 0x555564f7cef8 <col:3, col:16> col:7 imported in ./thrift_cpp2_base.h hidden referenced result 'Tgt' cinit
  |   `-IntegerLiteral 0x555564f7d080 <col:16> 'int' 0
  |-CallExpr 0x555564f7cea8 <line:39:3, col:76> '<dependent type>'
  | |-UnresolvedLookupExpr 0x555564f7bea0 <col:3, col:19> '<overloaded function type>' lvalue (no ADL) = 'then_' 0x555564f7bef0
  | |-CXXTemporaryObjectExpr 0x555564f7bcb0 <col:25, col:45> 'Expected<bool, int>':'folly::Expected<bool, int>' 'void () noexcept' zeroing
  | `-LambdaExpr 0x555564f7bc88 <col:48, col:75> '(lambda at Conv.h:39:48)'
  |   |-CXXRecordDecl 0x555564f76b88 <col:48> col:48 imported in ./folly-conv.h hidden implicit <undeserialized declarations> class definition
  |   | |-also in ./thrift_cpp2_base.h
  |   | `-DefinitionData lambda empty standard_layout trivially_copyable literal can_const_default_init
  |   |   |-DefaultConstructor defaulted_is_constexpr
  |   |   |-CopyConstructor simple trivial has_const_param needs_implicit implicit_has_const_param
  |   |   |-MoveConstructor exists simple trivial needs_implicit
  |   |   |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param
  |   |   |-MoveAssignment
  |   |   `-Destructor simple irrelevant trivial constexpr needs_implicit
  |   `-CompoundStmt 0x555564f7d1a8 <col:58, col:75>
  |     `-ReturnStmt 0x555564f7d198 <col:60, col:67>
  |       `-DeclRefExpr 0x555564f7d0a0 <col:67> 'Tgt' lvalue Var 0x555564f7d0c8 'result' 'Tgt' refers_to_enclosing_variable_or_capture
  `-ReturnStmt 0x555564f7bc78 <line:40:3, col:11>
    `-InitListExpr 0x555564f7bc38 <col:10, col:11> 'void'
```

This diff modifies the AST deserialization process to load lambdas
within the canonical function declaration sooner, immediately following
the function, ensuring that they are loaded from the same module.

Re-land https://github.com/llvm/llvm-project/pull/104512 Added test case
that caused crash due to multiple enclosed lambdas deserialization.

Test Plan: check-clang

Added: 
    clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
    clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp
    clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp

Modified: 
    clang/include/clang/Serialization/ASTBitCodes.h
    clang/include/clang/Serialization/ASTReader.h
    clang/include/clang/Serialization/ASTWriter.h
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/ASTReaderDecl.cpp
    clang/lib/Serialization/ASTWriter.cpp
    clang/lib/Serialization/ASTWriterDecl.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 4410df296d8efc..5be33ae0ed1b98 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -724,6 +724,12 @@ enum ASTRecordTypes {
 
   /// Record code for vtables to emit.
   VTABLES_TO_EMIT = 70,
+
+  /// Record code for the FunctionDecl to lambdas mapping. These lambdas have to
+  /// be loaded right after the function they belong to. It is required to have
+  /// canonical declaration for the lambda class from the same module as
+  /// enclosing function.
+  FUNCTION_DECL_TO_LAMBDAS_MAP = 71,
 };
 
 /// Record types used within a source manager block.

diff  --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 898f4392465fdf..c1843218a4b8b1 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -532,6 +532,18 @@ class ASTReader
   /// namespace as if it is not delayed.
   DelayedNamespaceOffsetMapTy DelayedNamespaceOffsetMap;
 
+  /// Mapping from FunctionDecl IDs to the corresponding lambda IDs.
+  ///
+  /// These lambdas have to be loaded right after the function they belong to.
+  /// It is required to have canonical declaration for lambda class from the
+  /// same module as enclosing function. This is required to correctly resolve
+  /// captured variables in the lambda. Without this, due to lazy
+  /// deserialization, canonical declarations for the function and lambdas can
+  /// be selected from 
diff erent modules and DeclRefExprs may refer to the AST
+  /// nodes that don't exist in the function.
+  llvm::DenseMap<GlobalDeclID, SmallVector<GlobalDeclID, 4>>
+      FunctionToLambdasMap;
+
   struct PendingUpdateRecord {
     Decl *D;
     GlobalDeclID ID;

diff  --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 10a50b711043a8..760866fd9de938 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -233,6 +233,14 @@ class ASTWriter : public ASTDeserializationListener,
   /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`.
   llvm::SmallPtrSet<const Decl *, 32> PredefinedDecls;
 
+  /// Mapping from FunctionDecl to the list of lambda IDs inside the function.
+  ///
+  /// These lambdas have to be loaded right after the function they belong to.
+  /// In order to have canonical declaration for lambda class from the same
+  /// module as enclosing function during deserialization.
+  llvm::DenseMap<const Decl *, SmallVector<LocalDeclID, 4>>
+      FunctionToLambdasMap;
+
   /// Offset of each declaration in the bitstream, indexed by
   /// the declaration's ID.
   std::vector<serialization::DeclOffset> DeclOffsets;

diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index ede3070787722d..a369ad0be47954 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3856,6 +3856,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       break;
     }
 
+    case FUNCTION_DECL_TO_LAMBDAS_MAP:
+      for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/) {
+        GlobalDeclID ID = ReadDeclID(F, Record, I);
+        auto &Lambdas = FunctionToLambdasMap[ID];
+        unsigned NN = Record[I++];
+        Lambdas.reserve(NN);
+        for (unsigned II = 0; II < NN; II++)
+          Lambdas.push_back(ReadDeclID(F, Record, I));
+      }
+      break;
+
     case OBJC_CATEGORIES_MAP:
       if (F.LocalNumObjCCategoriesInMap != 0)
         return llvm::createStringError(

diff  --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 9272e23c7da3fc..7cead2728ca938 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -4351,6 +4351,16 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
           reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod));
     DC->setHasExternalVisibleStorage(true);
   }
+
+  // Load any pending lambdas for the function.
+  if (auto *FD = dyn_cast<FunctionDecl>(D); FD && FD->isCanonicalDecl()) {
+    if (auto IT = FunctionToLambdasMap.find(ID);
+        IT != FunctionToLambdasMap.end()) {
+      for (auto LID : IT->second)
+        GetDecl(LID);
+      FunctionToLambdasMap.erase(IT);
+    }
+  }
 }
 
 void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) {

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 4ee14b1e260159..f326e3c2e2ff7a 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -903,6 +903,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(PENDING_IMPLICIT_INSTANTIATIONS);
   RECORD(UPDATE_VISIBLE);
   RECORD(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD);
+  RECORD(FUNCTION_DECL_TO_LAMBDAS_MAP);
   RECORD(DECL_UPDATE_OFFSETS);
   RECORD(DECL_UPDATES);
   RECORD(CUDA_SPECIAL_DECL_REFS);
@@ -5707,6 +5708,27 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) {
     Stream.EmitRecord(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD,
                       DelayedNamespaceRecord);
 
+  if (!FunctionToLambdasMap.empty()) {
+    // TODO: on disk hash table for function to lambda mapping might be more
+    // efficent becuase it allows lazy deserialization.
+    RecordData FunctionToLambdasMapRecord;
+    for (const auto &Pair : FunctionToLambdasMap) {
+      FunctionToLambdasMapRecord.push_back(
+          GetDeclRef(Pair.first).getRawValue());
+      FunctionToLambdasMapRecord.push_back(Pair.second.size());
+      for (const auto &Lambda : Pair.second)
+        FunctionToLambdasMapRecord.push_back(Lambda.getRawValue());
+    }
+
+    auto Abv = std::make_shared<llvm::BitCodeAbbrev>();
+    Abv->Add(llvm::BitCodeAbbrevOp(FUNCTION_DECL_TO_LAMBDAS_MAP));
+    Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array));
+    Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
+    unsigned FunctionToLambdaMapAbbrev = Stream.EmitAbbrev(std::move(Abv));
+    Stream.EmitRecord(FUNCTION_DECL_TO_LAMBDAS_MAP, FunctionToLambdasMapRecord,
+                      FunctionToLambdaMapAbbrev);
+  }
+
   const TranslationUnitDecl *TU = Context.getTranslationUnitDecl();
   // Create a lexical update block containing all of the declarations in the
   // translation unit that do not come from other AST files.

diff  --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 555f6325da646b..50c090b195d619 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -1521,6 +1521,11 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) {
     } else {
       Record.push_back(0);
     }
+    // For lambdas inside canonical FunctionDecl remember the mapping.
+    if (auto FD = llvm::dyn_cast_or_null<FunctionDecl>(D->getDeclContext());
+        FD && FD->isCanonicalDecl()) {
+      Writer.FunctionToLambdasMap[FD].push_back(Writer.GetDeclRef(D));
+    }
   } else {
     Record.push_back(CXXRecNotTemplate);
   }

diff  --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
new file mode 100644
index 00000000000000..80844a58ad825a
--- /dev/null
+++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
@@ -0,0 +1,76 @@
+// RUN: rm -fR %t
+// RUN: split-file %s %t
+// RUN: cd %t
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized folly-conv.h
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized thrift_cpp2_base.h
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized -fmodule-file=folly-conv.pcm -fmodule-file=thrift_cpp2_base.pcm logger_base.h
+
+//--- Conv.h
+#pragma once
+
+template <typename _Tp, typename _Up = _Tp&&>
+_Up __declval(int);
+
+template <typename _Tp>
+auto declval() noexcept -> decltype(__declval<_Tp>(0));
+
+namespace folly {
+
+template <class Value, class Error>
+struct Expected {
+  template <class Yes>
+  auto thenOrThrow() -> decltype(declval<Value&>()) {
+    return 1;
+  }
+};
+
+struct ExpectedHelper {
+  template <class Error, class T>
+  static constexpr Expected<T, Error> return_(T) {
+    return Expected<T, Error>();
+  }
+
+  template <class This, class Fn, class E = int, class T = ExpectedHelper>
+  static auto then_(This&&, Fn&&)
+      -> decltype(T::template return_<E>((declval<Fn>()(true), 0))) {
+    return Expected<int, int>();
+  }
+};
+
+template <class Tgt>
+inline Expected<Tgt, const char*> tryTo() {
+  Tgt result = 0;
+  // In build with asserts:
+  // clang/lib/Sema/SemaTemplateInstantiate.cpp: llvm::PointerUnion<Decl *, LocalInstantiationScope::DeclArgumentPack *> *clang::LocalInstantiationScope::findInstantiationOf(const Decl *): Assertion `isa<LabelDecl>(D) && "declaration not instantiated in this scope"' failed.
+  // In release build compilation error on the line below inside lambda:
+  // error: variable 'result' is uninitialized when used here [-Werror,-Wuninitialized]
+  ExpectedHelper::then_(Expected<bool, int>(), [&](bool) { return result; });
+  return {};
+}
+
+} // namespace folly
+
+inline void bar() {
+  folly::tryTo<int>();
+}
+// expected-no-diagnostics
+
+//--- folly-conv.h
+#pragma once
+#include "Conv.h"
+// expected-no-diagnostics
+
+//--- thrift_cpp2_base.h
+#pragma once
+#include "Conv.h"
+// expected-no-diagnostics
+
+//--- logger_base.h
+#pragma once
+import "folly-conv.h";
+import "thrift_cpp2_base.h";
+
+inline void foo() {
+  folly::tryTo<unsigned>();
+}
+// expected-no-diagnostics

diff  --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp
new file mode 100644
index 00000000000000..5b1a904e928a68
--- /dev/null
+++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp
@@ -0,0 +1,30 @@
+// RUN: rm -fR %t
+// RUN: split-file %s %t
+// RUN: cd %t
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header header.h
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=header.pcm main.cpp
+
+//--- header.h
+template <typename T>
+void f(T) {}
+
+class A {
+  virtual ~A();
+};
+
+inline A::~A() {
+  f([](){});
+}
+
+struct B {
+  void g() {
+    f([](){
+      [](){};
+    });
+  }
+};
+// expected-no-diagnostics
+
+//--- main.cpp
+import "header.h";
+// expected-no-diagnostics

diff  --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp
new file mode 100644
index 00000000000000..646ff9f745710b
--- /dev/null
+++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 %s -std=c++11 -emit-pch -o %t
+// RUN: %clang_cc1 %s -std=c++11 -include-pch %t -fsyntax-only -verify
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// No crash or assertion failure on multiple nested lambdas deserialization.
+template <typename T>
+void b() {
+  [] {
+    []{
+      []{
+        []{
+          []{
+          }();
+        }();
+      }();
+    }();
+  }();
+}
+
+void foo() {
+  b<int>();
+}
+#endif


        


More information about the cfe-commits mailing list