[clang] [RFC][C++20][Modules] Fix crash when function and lambda inside loaded from different modules (PR #104512)
Dmitry Polukhin via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 22 10:33:39 PDT 2024
https://github.com/dmpolukhin updated https://github.com/llvm/llvm-project/pull/104512
>From 8e11c66e4515bb35671853b89152b43d1ff60ffa Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Thu, 15 Aug 2024 14:03:57 -0700
Subject: [PATCH 1/5] [RFC][C++20][Modules] Fix crash when function and lambda
inside loaded from different modules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Summary:
Because AST loading code is lazy and happens in unpredictable order it could happen that function and lambda inside function can be loaded from different modules. In this case, captured DeclRefExpr won’t match the corresponding VarDecl inside function. In AST it looks like this:
```
FunctionDecl 0x555564f4aff0 <Conv.h:33:1, line:41:1> line:33:35 imported in ./thrift_cpp2_base.h hidden tryTo 'Expected<Tgt, const char *> ()' inline
|-also in ./folly-conv.h
`-CompoundStmt 0x555564f7cfc8 <col:43, line:41:1>
|-DeclStmt 0x555564f7ced8 <line:34:3, col:17>
| `-VarDecl 0x555564f7cef8 <col:3, col:16> col:7 imported in ./thrift_cpp2_base.h hidden referenced result 'Tgt' cinit
| `-IntegerLiteral 0x555564f7d080 <col:16> 'int' 0
|-CallExpr 0x555564f7cea8 <line:39:3, col:76> '<dependent type>'
| |-UnresolvedLookupExpr 0x555564f7bea0 <col:3, col:19> '<overloaded function type>' lvalue (no ADL) = 'then_' 0x555564f7bef0
| |-CXXTemporaryObjectExpr 0x555564f7bcb0 <col:25, col:45> 'Expected<bool, int>':'folly::Expected<bool, int>' 'void () noexcept' zeroing
| `-LambdaExpr 0x555564f7bc88 <col:48, col:75> '(lambda at Conv.h:39:48)'
| |-CXXRecordDecl 0x555564f76b88 <col:48> col:48 imported in ./folly-conv.h hidden implicit <undeserialized declarations> class definition
| | |-also in ./thrift_cpp2_base.h
| | `-DefinitionData lambda empty standard_layout trivially_copyable literal can_const_default_init
| | |-DefaultConstructor defaulted_is_constexpr
| | |-CopyConstructor simple trivial has_const_param needs_implicit implicit_has_const_param
| | |-MoveConstructor exists simple trivial needs_implicit
| | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param
| | |-MoveAssignment
| | `-Destructor simple irrelevant trivial constexpr needs_implicit
| `-CompoundStmt 0x555564f7d1a8 <col:58, col:75>
| `-ReturnStmt 0x555564f7d198 <col:60, col:67>
| `-DeclRefExpr 0x555564f7d0a0 <col:67> 'Tgt' lvalue Var 0x555564f7d0c8 'result' 'Tgt' refers_to_enclosing_variable_or_capture
`-ReturnStmt 0x555564f7bc78 <line:40:3, col:11>
`-InitListExpr 0x555564f7bc38 <col:10, col:11> 'void'
```
I’m not sure that it is the right fix for the problem so any ideas how to fix it better are very appreciated.
Test Plan: check-clang
---
...rash-instantiated-in-scope-cxx-modules.cpp | 76 +++++++++++++++++++
1 file changed, 76 insertions(+)
create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
new file mode 100644
index 00000000000000..80844a58ad825a
--- /dev/null
+++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp
@@ -0,0 +1,76 @@
+// RUN: rm -fR %t
+// RUN: split-file %s %t
+// RUN: cd %t
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized folly-conv.h
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized thrift_cpp2_base.h
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized -fmodule-file=folly-conv.pcm -fmodule-file=thrift_cpp2_base.pcm logger_base.h
+
+//--- Conv.h
+#pragma once
+
+template <typename _Tp, typename _Up = _Tp&&>
+_Up __declval(int);
+
+template <typename _Tp>
+auto declval() noexcept -> decltype(__declval<_Tp>(0));
+
+namespace folly {
+
+template <class Value, class Error>
+struct Expected {
+ template <class Yes>
+ auto thenOrThrow() -> decltype(declval<Value&>()) {
+ return 1;
+ }
+};
+
+struct ExpectedHelper {
+ template <class Error, class T>
+ static constexpr Expected<T, Error> return_(T) {
+ return Expected<T, Error>();
+ }
+
+ template <class This, class Fn, class E = int, class T = ExpectedHelper>
+ static auto then_(This&&, Fn&&)
+ -> decltype(T::template return_<E>((declval<Fn>()(true), 0))) {
+ return Expected<int, int>();
+ }
+};
+
+template <class Tgt>
+inline Expected<Tgt, const char*> tryTo() {
+ Tgt result = 0;
+ // In build with asserts:
+ // clang/lib/Sema/SemaTemplateInstantiate.cpp: llvm::PointerUnion<Decl *, LocalInstantiationScope::DeclArgumentPack *> *clang::LocalInstantiationScope::findInstantiationOf(const Decl *): Assertion `isa<LabelDecl>(D) && "declaration not instantiated in this scope"' failed.
+ // In release build compilation error on the line below inside lambda:
+ // error: variable 'result' is uninitialized when used here [-Werror,-Wuninitialized]
+ ExpectedHelper::then_(Expected<bool, int>(), [&](bool) { return result; });
+ return {};
+}
+
+} // namespace folly
+
+inline void bar() {
+ folly::tryTo<int>();
+}
+// expected-no-diagnostics
+
+//--- folly-conv.h
+#pragma once
+#include "Conv.h"
+// expected-no-diagnostics
+
+//--- thrift_cpp2_base.h
+#pragma once
+#include "Conv.h"
+// expected-no-diagnostics
+
+//--- logger_base.h
+#pragma once
+import "folly-conv.h";
+import "thrift_cpp2_base.h";
+
+inline void foo() {
+ folly::tryTo<unsigned>();
+}
+// expected-no-diagnostics
>From 02e09990e08de701c4efe952e4ff3e2070055cd2 Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Thu, 15 Aug 2024 14:03:57 -0700
Subject: [PATCH 2/5] [RFC][C++20][Modules] Fix crash when function and lambda
inside loaded from different modules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Summary:
Because AST loading code is lazy and happens in unpredictable order it could happen that function and lambda inside function can be loaded from different modules. In this case, captured DeclRefExpr won’t match the corresponding VarDecl inside function. In AST it looks like this:
```
FunctionDecl 0x555564f4aff0 <Conv.h:33:1, line:41:1> line:33:35 imported in ./thrift_cpp2_base.h hidden tryTo 'Expected<Tgt, const char *> ()' inline
|-also in ./folly-conv.h
`-CompoundStmt 0x555564f7cfc8 <col:43, line:41:1>
|-DeclStmt 0x555564f7ced8 <line:34:3, col:17>
| `-VarDecl 0x555564f7cef8 <col:3, col:16> col:7 imported in ./thrift_cpp2_base.h hidden referenced result 'Tgt' cinit
| `-IntegerLiteral 0x555564f7d080 <col:16> 'int' 0
|-CallExpr 0x555564f7cea8 <line:39:3, col:76> '<dependent type>'
| |-UnresolvedLookupExpr 0x555564f7bea0 <col:3, col:19> '<overloaded function type>' lvalue (no ADL) = 'then_' 0x555564f7bef0
| |-CXXTemporaryObjectExpr 0x555564f7bcb0 <col:25, col:45> 'Expected<bool, int>':'folly::Expected<bool, int>' 'void () noexcept' zeroing
| `-LambdaExpr 0x555564f7bc88 <col:48, col:75> '(lambda at Conv.h:39:48)'
| |-CXXRecordDecl 0x555564f76b88 <col:48> col:48 imported in ./folly-conv.h hidden implicit <undeserialized declarations> class definition
| | |-also in ./thrift_cpp2_base.h
| | `-DefinitionData lambda empty standard_layout trivially_copyable literal can_const_default_init
| | |-DefaultConstructor defaulted_is_constexpr
| | |-CopyConstructor simple trivial has_const_param needs_implicit implicit_has_const_param
| | |-MoveConstructor exists simple trivial needs_implicit
| | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param
| | |-MoveAssignment
| | `-Destructor simple irrelevant trivial constexpr needs_implicit
| `-CompoundStmt 0x555564f7d1a8 <col:58, col:75>
| `-ReturnStmt 0x555564f7d198 <col:60, col:67>
| `-DeclRefExpr 0x555564f7d0a0 <col:67> 'Tgt' lvalue Var 0x555564f7d0c8 'result' 'Tgt' refers_to_enclosing_variable_or_capture
`-ReturnStmt 0x555564f7bc78 <line:40:3, col:11>
`-InitListExpr 0x555564f7bc38 <col:10, col:11> 'void'
```
I’m not sure that it is the right fix for the problem so any ideas how to fix it better are very appreciated.
Test Plan: check-clang
---
clang/lib/Sema/SemaTemplateInstantiate.cpp | 5 +++++
clang/lib/Serialization/ASTReaderDecl.cpp | 7 +++++++
2 files changed, 12 insertions(+)
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 9a6cd2cd0ab751..f784de3a20acf2 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -4363,6 +4363,11 @@ LocalInstantiationScope::findInstantiationOf(const Decl *D) {
// a previous declaration.
if (const TagDecl *Tag = dyn_cast<TagDecl>(CheckD))
CheckD = Tag->getPreviousDecl();
+ else if (const VarDecl *VD = dyn_cast<VarDecl>(CheckD))
+ // Check re-declaration chain for variable to deduplicate variables
+ // that might be captured inside lambdas. Function and lambda class
+ // inside can be loaded from different modules.
+ CheckD = VD->getPreviousDecl();
else
CheckD = nullptr;
} while (CheckD);
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index c118f3818467d9..bd46ac3f29af56 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -3286,6 +3286,13 @@ DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader,
if (auto *TU = dyn_cast<TranslationUnitDecl>(DC))
return TU->getPrimaryContext();
+ // Merge VarDecls inside functions to deduplicate variables that might be
+ // captured inside lambdas. Function and lambda class inside can be loaded
+ // from different modules.
+ if (auto *FD = dyn_cast<FunctionDecl>(DC))
+ if (FD->getOwningModule())
+ return FD->getCanonicalDecl();
+
return nullptr;
}
>From 70a504833bfb540b54bbbe86e8065cf22bdf286b Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Tue, 20 Aug 2024 02:45:54 -0700
Subject: [PATCH 3/5] Try different approach withreverse order of loading
specializations
---
clang/lib/AST/DeclTemplate.cpp | 8 ++++++--
clang/lib/Sema/SemaTemplateInstantiate.cpp | 5 -----
clang/lib/Serialization/ASTReaderDecl.cpp | 7 -------
clang/test/Modules/odr_hash.cpp | 4 ++--
4 files changed, 8 insertions(+), 16 deletions(-)
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index 976b3a3e1ecedb..7dc9afd6159465 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -349,8 +349,12 @@ void RedeclarableTemplateDecl::loadLazySpecializationsImpl() const {
GlobalDeclID *Specs = CommonBasePtr->LazySpecializations;
CommonBasePtr->LazySpecializations = nullptr;
unsigned SpecSize = (*Specs++).getRawValue();
- for (unsigned I = 0; I != SpecSize; ++I)
- (void)Context.getExternalSource()->GetExternalDecl(Specs[I]);
+ // Load the specializations in reverse order so that the most recent
+ // specialization are visited first so they become canonical declarations.
+ // This order matches the order in which namelookup discovers declarations
+ // coming from modules.
+ for (unsigned I = SpecSize; I != 0; --I)
+ (void)Context.getExternalSource()->GetExternalDecl(Specs[I-1]);
}
}
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index f784de3a20acf2..9a6cd2cd0ab751 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -4363,11 +4363,6 @@ LocalInstantiationScope::findInstantiationOf(const Decl *D) {
// a previous declaration.
if (const TagDecl *Tag = dyn_cast<TagDecl>(CheckD))
CheckD = Tag->getPreviousDecl();
- else if (const VarDecl *VD = dyn_cast<VarDecl>(CheckD))
- // Check re-declaration chain for variable to deduplicate variables
- // that might be captured inside lambdas. Function and lambda class
- // inside can be loaded from different modules.
- CheckD = VD->getPreviousDecl();
else
CheckD = nullptr;
} while (CheckD);
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index bd46ac3f29af56..c118f3818467d9 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -3286,13 +3286,6 @@ DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader,
if (auto *TU = dyn_cast<TranslationUnitDecl>(DC))
return TU->getPrimaryContext();
- // Merge VarDecls inside functions to deduplicate variables that might be
- // captured inside lambdas. Function and lambda class inside can be loaded
- // from different modules.
- if (auto *FD = dyn_cast<FunctionDecl>(DC))
- if (FD->getOwningModule())
- return FD->getCanonicalDecl();
-
return nullptr;
}
diff --git a/clang/test/Modules/odr_hash.cpp b/clang/test/Modules/odr_hash.cpp
index fa8b2c81ab46e1..7cea3af3f41bdd 100644
--- a/clang/test/Modules/odr_hash.cpp
+++ b/clang/test/Modules/odr_hash.cpp
@@ -3084,8 +3084,8 @@ struct S5 {
};
#else
S5 s5;
-// expected-error at second.h:* {{'PointersAndReferences::S5::x' from module 'SecondModule' is not present in definition of 'PointersAndReferences::S5' in module 'FirstModule'}}
-// expected-note at first.h:* {{declaration of 'x' does not match}}
+// expected-error at first.h:* {{'PointersAndReferences::S5::x' from module 'FirstModule' is not present in definition of 'PointersAndReferences::S5' in module 'SecondModule'}}
+// expected-note at second.h:* {{declaration of 'x' does not match}}
#endif
#if defined(FIRST)
>From 0e143f67ac60b1636d451e3aa4e6bf326b3b0f8e Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Tue, 20 Aug 2024 02:54:48 -0700
Subject: [PATCH 4/5] Fix clang-format issue
---
clang/lib/AST/DeclTemplate.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index 7dc9afd6159465..bdd40b01c0c6f4 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -354,7 +354,7 @@ void RedeclarableTemplateDecl::loadLazySpecializationsImpl() const {
// This order matches the order in which namelookup discovers declarations
// coming from modules.
for (unsigned I = SpecSize; I != 0; --I)
- (void)Context.getExternalSource()->GetExternalDecl(Specs[I-1]);
+ (void)Context.getExternalSource()->GetExternalDecl(Specs[I - 1]);
}
}
>From eb90486184835b664b36e9362b0edbbe611ef5dc Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin at gmail.com>
Date: Thu, 22 Aug 2024 10:31:01 -0700
Subject: [PATCH 5/5] Visit lambdas right after FunctionDecl
---
clang/lib/AST/DeclTemplate.cpp | 8 ++---
clang/lib/Serialization/ASTReaderDecl.cpp | 9 ++++++
clang/lib/Serialization/ASTWriterDecl.cpp | 37 +++++++++++++++++++++++
clang/test/Modules/odr_hash.cpp | 4 +--
4 files changed, 50 insertions(+), 8 deletions(-)
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index bdd40b01c0c6f4..976b3a3e1ecedb 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -349,12 +349,8 @@ void RedeclarableTemplateDecl::loadLazySpecializationsImpl() const {
GlobalDeclID *Specs = CommonBasePtr->LazySpecializations;
CommonBasePtr->LazySpecializations = nullptr;
unsigned SpecSize = (*Specs++).getRawValue();
- // Load the specializations in reverse order so that the most recent
- // specialization are visited first so they become canonical declarations.
- // This order matches the order in which namelookup discovers declarations
- // coming from modules.
- for (unsigned I = SpecSize; I != 0; --I)
- (void)Context.getExternalSource()->GetExternalDecl(Specs[I - 1]);
+ for (unsigned I = 0; I != SpecSize; ++I)
+ (void)Context.getExternalSource()->GetExternalDecl(Specs[I]);
}
}
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index ef160228933c59..e7e921087a076a 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1152,6 +1152,15 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
for (unsigned I = 0; I != NumParams; ++I)
Params.push_back(readDeclAs<ParmVarDecl>());
FD->setParams(Reader.getContext(), Params);
+
+ // For the first decl read all lambdas inside, otherwise skip them.
+ unsigned NumLambdas = Record.readInt();
+ if (FD->isFirstDecl()) {
+ for (unsigned I = 0; I != NumLambdas; ++I)
+ readDecl();
+ } else {
+ (void)Record.readIntArray(NumLambdas);
+ }
}
void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) {
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 555f6325da646b..1c8dc0024a353d 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -18,6 +18,7 @@
#include "clang/AST/Expr.h"
#include "clang/AST/OpenMPClause.h"
#include "clang/AST/PrettyDeclStackTrace.h"
+#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/ASTRecordWriter.h"
@@ -625,6 +626,33 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) {
: QualType());
}
+static llvm::SmallVector<const Decl *, 2> collectLambdas(FunctionDecl *D) {
+ struct LambdaCollector : public ConstStmtVisitor<LambdaCollector> {
+ llvm::SmallVectorImpl<const Decl *> &Lambdas;
+
+ LambdaCollector(llvm::SmallVectorImpl<const Decl *> &Lambdas)
+ : Lambdas(Lambdas) {}
+
+ void VisitLambdaExpr(const LambdaExpr *E) {
+ VisitStmt(E);
+ Lambdas.push_back(E->getLambdaClass());
+ }
+
+ void VisitStmt(const Stmt *S) {
+ if (!S)
+ return;
+ for (const Stmt *Child : S->children())
+ if (Child)
+ Visit(Child);
+ }
+ };
+
+ llvm::SmallVector<const Decl *, 2> Lambdas;
+ if (D->hasBody())
+ LambdaCollector(Lambdas).VisitStmt(D->getBody());
+ return Lambdas;
+}
+
void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
static_assert(DeclContext::NumFunctionDeclBits == 44,
"You need to update the serializer after you change the "
@@ -764,6 +792,15 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
Record.push_back(D->param_size());
for (auto *P : D->parameters())
Record.AddDeclRef(P);
+
+ // Store references to all lambda decls inside function to load them
+ // immediately after loading the function to make sure that canonical
+ // decls for lambdas will be from the same module.
+ llvm::SmallVector<const Decl *, 2> Lambdas = collectLambdas(D);
+ Record.push_back(Lambdas.size());
+ for (const auto *L : Lambdas)
+ Record.AddDeclRef(L);
+
Code = serialization::DECL_FUNCTION;
}
diff --git a/clang/test/Modules/odr_hash.cpp b/clang/test/Modules/odr_hash.cpp
index 7cea3af3f41bdd..fa8b2c81ab46e1 100644
--- a/clang/test/Modules/odr_hash.cpp
+++ b/clang/test/Modules/odr_hash.cpp
@@ -3084,8 +3084,8 @@ struct S5 {
};
#else
S5 s5;
-// expected-error at first.h:* {{'PointersAndReferences::S5::x' from module 'FirstModule' is not present in definition of 'PointersAndReferences::S5' in module 'SecondModule'}}
-// expected-note at second.h:* {{declaration of 'x' does not match}}
+// expected-error at second.h:* {{'PointersAndReferences::S5::x' from module 'SecondModule' is not present in definition of 'PointersAndReferences::S5' in module 'FirstModule'}}
+// expected-note at first.h:* {{declaration of 'x' does not match}}
#endif
#if defined(FIRST)
More information about the cfe-commits
mailing list