[clang] 4082a75 - Improve stack usage to increase recursive initialization depth (#88546)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 16 10:48:17 PDT 2024
Author: Aaron Ballman
Date: 2024-04-16T13:48:13-04:00
New Revision: 4082a7554521572a65a5a0008c4661a534df659d
URL: https://github.com/llvm/llvm-project/commit/4082a7554521572a65a5a0008c4661a534df659d
DIFF: https://github.com/llvm/llvm-project/commit/4082a7554521572a65a5a0008c4661a534df659d.diff
LOG: Improve stack usage to increase recursive initialization depth (#88546)
We were crashing due to stack exhaustion on rather reasonable C++
template code. After some investigation, I found that we have a
stack-allocated object that was huge: `InitializationSequence` was 7016
bytes. This caused an overflow with deep call stacks in initialization
code.
With these change, `InitializationSequence` is now 248 bytes.
With the original code, testing RelWithDebInfo on Windows 10, all the
tests in SemaCXX took about 6s 800ms. The max template depth I could
reach on my machine using the code in the issue was 708. After that, I
would get `-Wstack-exhausted` warnings until crashing at 976
instantiations.
With these changes on the same machine, all the tests in SemaCXX took
about 6s 500ms. The max template depth I could reach was 1492. After
that, I would get `-Wstack-exhausted` warnings until crashing at 2898
instantiations.
This improves the behavior of #88330 but there's still an outstanding
question of why we run out of stack space and crash in some
circumstances before we're able to issue a diagnostic about stack space
exhaustion.
Added:
Modified:
clang/docs/ReleaseNotes.rst
clang/include/clang/Sema/Initialization.h
clang/include/clang/Sema/Overload.h
clang/lib/Sema/SemaInit.cpp
clang/lib/Sema/SemaOverload.cpp
Removed:
################################################################################
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d8ec8bcb8df532..e6c345a2f5c0f5 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -203,6 +203,12 @@ Non-comprehensive list of changes in this release
- ``__typeof_unqual__`` is available in all C modes as an extension, which behaves
like ``typeof_unqual`` from C23, similar to ``__typeof__`` and ``typeof``.
+- Improved stack usage with C++ initialization code. This allows significantly
+ more levels of recursive initialization before reaching stack exhaustion
+ limits. This will positively impact recursive template instantiation code,
+ but should also reduce memory overhead for initializations in general.
+ Fixes #GH88330
+
New Compiler Flags
------------------
- ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and
diff --git a/clang/include/clang/Sema/Initialization.h b/clang/include/clang/Sema/Initialization.h
index 2072cd8d1c3ef8..1ceacf0f49f568 100644
--- a/clang/include/clang/Sema/Initialization.h
+++ b/clang/include/clang/Sema/Initialization.h
@@ -1134,7 +1134,7 @@ class InitializationSequence {
OverloadingResult FailedOverloadResult;
/// The candidate set created when initialization failed.
- OverloadCandidateSet FailedCandidateSet;
+ std::unique_ptr<OverloadCandidateSet> FailedCandidateSet;
/// The incomplete type that caused a failure.
QualType FailedIncompleteType;
@@ -1403,7 +1403,9 @@ class InitializationSequence {
/// Retrieve a reference to the candidate set when overload
/// resolution fails.
OverloadCandidateSet &getFailedCandidateSet() {
- return FailedCandidateSet;
+ assert(FailedCandidateSet &&
+ "this should have been allocated in the constructor!");
+ return *FailedCandidateSet;
}
/// Get the overloading result, for when the initialization
diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index 76311b00d2fc58..e6f88bbf7c4f47 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -37,6 +37,7 @@
#include <cassert>
#include <cstddef>
#include <cstdint>
+#include <memory>
#include <utility>
namespace clang {
@@ -874,7 +875,8 @@ class Sema;
ConversionFixItGenerator Fix;
/// Viable - True to indicate that this overload candidate is viable.
- bool Viable : 1;
+ LLVM_PREFERRED_TYPE(bool)
+ unsigned Viable : 1;
/// Whether this candidate is the best viable function, or tied for being
/// the best viable function.
@@ -883,12 +885,14 @@ class Sema;
/// was part of the ambiguity kernel: the minimal non-empty set of viable
/// candidates such that all elements of the ambiguity kernel are better
/// than all viable candidates not in the ambiguity kernel.
- bool Best : 1;
+ LLVM_PREFERRED_TYPE(bool)
+ unsigned Best : 1;
/// IsSurrogate - True to indicate that this candidate is a
/// surrogate for a conversion to a function pointer or reference
/// (C++ [over.call.object]).
- bool IsSurrogate : 1;
+ LLVM_PREFERRED_TYPE(bool)
+ unsigned IsSurrogate : 1;
/// IgnoreObjectArgument - True to indicate that the first
/// argument's conversion, which for this function represents the
@@ -897,18 +901,20 @@ class Sema;
/// implicit object argument is just a placeholder) or a
/// non-static member function when the call doesn't have an
/// object argument.
- bool IgnoreObjectArgument : 1;
+ LLVM_PREFERRED_TYPE(bool)
+ unsigned IgnoreObjectArgument : 1;
/// True if the candidate was found using ADL.
- CallExpr::ADLCallKind IsADLCandidate : 1;
+ LLVM_PREFERRED_TYPE(CallExpr::ADLCallKind)
+ unsigned IsADLCandidate : 1;
/// Whether this is a rewritten candidate, and if so, of what kind?
LLVM_PREFERRED_TYPE(OverloadCandidateRewriteKind)
unsigned RewriteKind : 2;
/// FailureKind - The reason why this candidate is not viable.
- /// Actually an OverloadFailureKind.
- unsigned char FailureKind;
+ LLVM_PREFERRED_TYPE(OverloadFailureKind)
+ unsigned FailureKind : 5;
/// The number of call arguments that were explicitly provided,
/// to be used while performing partial ordering of function templates.
@@ -972,7 +978,9 @@ class Sema;
private:
friend class OverloadCandidateSet;
OverloadCandidate()
- : IsSurrogate(false), IsADLCandidate(CallExpr::NotADL), RewriteKind(CRK_None) {}
+ : IsSurrogate(false),
+ IsADLCandidate(static_cast<unsigned>(CallExpr::NotADL)),
+ RewriteKind(CRK_None) {}
};
/// OverloadCandidateSet - A set of overload candidates, used in C++
@@ -1070,51 +1078,16 @@ class Sema;
};
private:
- SmallVector<OverloadCandidate, 16> Candidates;
- llvm::SmallPtrSet<uintptr_t, 16> Functions;
-
- // Allocator for ConversionSequenceLists. We store the first few of these
- // inline to avoid allocation for small sets.
- llvm::BumpPtrAllocator SlabAllocator;
+ SmallVector<OverloadCandidate, 4> Candidates;
+ llvm::SmallPtrSet<uintptr_t, 4> Functions;
SourceLocation Loc;
CandidateSetKind Kind;
OperatorRewriteInfo RewriteInfo;
- constexpr static unsigned NumInlineBytes =
- 24 * sizeof(ImplicitConversionSequence);
- unsigned NumInlineBytesUsed = 0;
- alignas(void *) char InlineSpace[NumInlineBytes];
-
// Address space of the object being constructed.
LangAS DestAS = LangAS::Default;
- /// If we have space, allocates from inline storage. Otherwise, allocates
- /// from the slab allocator.
- /// FIXME: It would probably be nice to have a SmallBumpPtrAllocator
- /// instead.
- /// FIXME: Now that this only allocates ImplicitConversionSequences, do we
- /// want to un-generalize this?
- template <typename T>
- T *slabAllocate(unsigned N) {
- // It's simpler if this doesn't need to consider alignment.
- static_assert(alignof(T) == alignof(void *),
- "Only works for pointer-aligned types.");
- static_assert(std::is_trivial<T>::value ||
- std::is_same<ImplicitConversionSequence, T>::value,
- "Add destruction logic to OverloadCandidateSet::clear().");
-
- unsigned NBytes = sizeof(T) * N;
- if (NBytes > NumInlineBytes - NumInlineBytesUsed)
- return SlabAllocator.Allocate<T>(N);
- char *FreeSpaceStart = InlineSpace + NumInlineBytesUsed;
- assert(uintptr_t(FreeSpaceStart) % alignof(void *) == 0 &&
- "Misaligned storage!");
-
- NumInlineBytesUsed += NBytes;
- return reinterpret_cast<T *>(FreeSpaceStart);
- }
-
void destroyCandidates();
public:
@@ -1163,12 +1136,7 @@ class Sema;
ConversionSequenceList
allocateConversionSequences(unsigned NumConversions) {
ImplicitConversionSequence *Conversions =
- slabAllocate<ImplicitConversionSequence>(NumConversions);
-
- // Construct the new objects.
- for (unsigned I = 0; I != NumConversions; ++I)
- new (&Conversions[I]) ImplicitConversionSequence();
-
+ new ImplicitConversionSequence[NumConversions];
return ConversionSequenceList(Conversions, NumConversions);
}
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index fb7a80ab02846c..791c0b6e6df23e 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -6114,7 +6114,8 @@ InitializationSequence::InitializationSequence(
Sema &S, const InitializedEntity &Entity, const InitializationKind &Kind,
MultiExprArg Args, bool TopLevelOfInitList, bool TreatUnavailableAsInvalid)
: FailedOverloadResult(OR_Success),
- FailedCandidateSet(Kind.getLocation(), OverloadCandidateSet::CSK_Normal) {
+ FailedCandidateSet(new OverloadCandidateSet(
+ Kind.getLocation(), OverloadCandidateSet::CSK_Normal)) {
InitializeFrom(S, Entity, Kind, Args, TopLevelOfInitList,
TreatUnavailableAsInvalid);
}
@@ -9735,7 +9736,7 @@ bool InitializationSequence::Diagnose(Sema &S,
switch (FailedOverloadResult) {
case OR_Ambiguous:
- FailedCandidateSet.NoteCandidates(
+ FailedCandidateSet->NoteCandidates(
PartialDiagnosticAt(
Kind.getLocation(),
Failure == FK_UserConversionOverloadFailed
@@ -9749,7 +9750,8 @@ bool InitializationSequence::Diagnose(Sema &S,
break;
case OR_No_Viable_Function: {
- auto Cands = FailedCandidateSet.CompleteCandidates(S, OCD_AllCandidates, Args);
+ auto Cands =
+ FailedCandidateSet->CompleteCandidates(S, OCD_AllCandidates, Args);
if (!S.RequireCompleteType(Kind.getLocation(),
DestType.getNonReferenceType(),
diag::err_typecheck_nonviable_condition_incomplete,
@@ -9759,13 +9761,13 @@ bool InitializationSequence::Diagnose(Sema &S,
<< OnlyArg->getType() << Args[0]->getSourceRange()
<< DestType.getNonReferenceType();
- FailedCandidateSet.NoteCandidates(S, Args, Cands);
+ FailedCandidateSet->NoteCandidates(S, Args, Cands);
break;
}
case OR_Deleted: {
OverloadCandidateSet::iterator Best;
- OverloadingResult Ovl
- = FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
+ OverloadingResult Ovl =
+ FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
StringLiteral *Msg = Best->Function->getDeletedMessage();
S.Diag(Kind.getLocation(), diag::err_typecheck_deleted_function)
@@ -9949,7 +9951,7 @@ bool InitializationSequence::Diagnose(Sema &S,
// bad.
switch (FailedOverloadResult) {
case OR_Ambiguous:
- FailedCandidateSet.NoteCandidates(
+ FailedCandidateSet->NoteCandidates(
PartialDiagnosticAt(Kind.getLocation(),
S.PDiag(diag::err_ovl_ambiguous_init)
<< DestType << ArgsRange),
@@ -10003,7 +10005,7 @@ bool InitializationSequence::Diagnose(Sema &S,
break;
}
- FailedCandidateSet.NoteCandidates(
+ FailedCandidateSet->NoteCandidates(
PartialDiagnosticAt(
Kind.getLocation(),
S.PDiag(diag::err_ovl_no_viable_function_in_init)
@@ -10013,8 +10015,8 @@ bool InitializationSequence::Diagnose(Sema &S,
case OR_Deleted: {
OverloadCandidateSet::iterator Best;
- OverloadingResult Ovl
- = FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
+ OverloadingResult Ovl =
+ FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
if (Ovl != OR_Deleted) {
S.Diag(Kind.getLocation(), diag::err_ovl_deleted_init)
<< DestType << ArgsRange;
@@ -10093,8 +10095,8 @@ bool InitializationSequence::Diagnose(Sema &S,
S.Diag(Kind.getLocation(), diag::err_selected_explicit_constructor)
<< Args[0]->getSourceRange();
OverloadCandidateSet::iterator Best;
- OverloadingResult Ovl
- = FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
+ OverloadingResult Ovl =
+ FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
(void)Ovl;
assert(Ovl == OR_Success && "Inconsistent overload resolution");
CXXConstructorDecl *CtorDecl = cast<CXXConstructorDecl>(Best->Function);
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 227ef564ba3e08..bcde0d86cf10fd 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1057,8 +1057,7 @@ bool OverloadCandidateSet::OperatorRewriteInfo::shouldAddReversed(
void OverloadCandidateSet::destroyCandidates() {
for (iterator i = begin(), e = end(); i != e; ++i) {
- for (auto &C : i->Conversions)
- C.~ImplicitConversionSequence();
+ delete[] i->Conversions.data();
if (!i->Viable && i->FailureKind == ovl_fail_bad_deduction)
i->DeductionFailure.Destroy();
}
@@ -1066,8 +1065,6 @@ void OverloadCandidateSet::destroyCandidates() {
void OverloadCandidateSet::clear(CandidateSetKind CSK) {
destroyCandidates();
- SlabAllocator.Reset();
- NumInlineBytesUsed = 0;
Candidates.clear();
Functions.clear();
Kind = CSK;
@@ -6983,7 +6980,7 @@ void Sema::AddOverloadCandidate(
Candidate.RewriteKind =
CandidateSet.getRewriteInfo().getRewriteKind(Function, PO);
Candidate.IsSurrogate = false;
- Candidate.IsADLCandidate = IsADLCandidate;
+ Candidate.IsADLCandidate = static_cast<unsigned>(IsADLCandidate);
Candidate.IgnoreObjectArgument = false;
Candidate.ExplicitCallArguments = Args.size();
@@ -7815,7 +7812,7 @@ void Sema::AddTemplateOverloadCandidate(
Candidate.RewriteKind =
CandidateSet.getRewriteInfo().getRewriteKind(Candidate.Function, PO);
Candidate.IsSurrogate = false;
- Candidate.IsADLCandidate = IsADLCandidate;
+ Candidate.IsADLCandidate = static_cast<unsigned>(IsADLCandidate);
// Ignore the object argument if there is one, since we don't have an object
// type.
Candidate.IgnoreObjectArgument =
@@ -14125,7 +14122,8 @@ static ExprResult FinishOverloadedCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
return ExprError();
return SemaRef.BuildResolvedCallExpr(
Res.get(), FDecl, LParenLoc, Args, RParenLoc, ExecConfig,
- /*IsExecConfig=*/false, (*Best)->IsADLCandidate);
+ /*IsExecConfig=*/false,
+ static_cast<CallExpr::ADLCallKind>((*Best)->IsADLCandidate));
}
case OR_No_Viable_Function: {
@@ -14184,7 +14182,8 @@ static ExprResult FinishOverloadedCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
return ExprError();
return SemaRef.BuildResolvedCallExpr(
Res.get(), FDecl, LParenLoc, Args, RParenLoc, ExecConfig,
- /*IsExecConfig=*/false, (*Best)->IsADLCandidate);
+ /*IsExecConfig=*/false,
+ static_cast<CallExpr::ADLCallKind>((*Best)->IsADLCandidate));
}
}
@@ -14491,7 +14490,8 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
Args[0] = Input;
CallExpr *TheCall = CXXOperatorCallExpr::Create(
Context, Op, FnExpr.get(), ArgsArray, ResultTy, VK, OpLoc,
- CurFPFeatureOverrides(), Best->IsADLCandidate);
+ CurFPFeatureOverrides(),
+ static_cast<CallExpr::ADLCallKind>(Best->IsADLCandidate));
if (CheckCallReturnType(FnDecl->getReturnType(), OpLoc, TheCall, FnDecl))
return ExprError();
@@ -14909,7 +14909,8 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
// members; CodeGen should take care not to emit the this pointer.
TheCall = CXXOperatorCallExpr::Create(
Context, ChosenOp, FnExpr.get(), Args, ResultTy, VK, OpLoc,
- CurFPFeatureOverrides(), Best->IsADLCandidate);
+ CurFPFeatureOverrides(),
+ static_cast<CallExpr::ADLCallKind>(Best->IsADLCandidate));
if (const auto *Method = dyn_cast<CXXMethodDecl>(FnDecl);
Method && Method->isImplicitObjectMemberFunction()) {
More information about the cfe-commits
mailing list