[clang] 4082a75 - Improve stack usage to increase recursive initialization depth (#88546)

via cfe-commits cfe-commits at lists.llvm.org
Tue Apr 16 10:48:17 PDT 2024


Author: Aaron Ballman
Date: 2024-04-16T13:48:13-04:00
New Revision: 4082a7554521572a65a5a0008c4661a534df659d

URL: https://github.com/llvm/llvm-project/commit/4082a7554521572a65a5a0008c4661a534df659d
DIFF: https://github.com/llvm/llvm-project/commit/4082a7554521572a65a5a0008c4661a534df659d.diff

LOG: Improve stack usage to increase recursive initialization depth (#88546)

We were crashing due to stack exhaustion on rather reasonable C++
template code. After some investigation, I found that we have a
stack-allocated object that was huge: `InitializationSequence` was 7016
bytes. This caused an overflow with deep call stacks in initialization
code.

With these change, `InitializationSequence` is now 248 bytes.

With the original code, testing RelWithDebInfo on Windows 10, all the
tests in SemaCXX took about 6s 800ms. The max template depth I could
reach on my machine using the code in the issue was 708. After that, I
would get `-Wstack-exhausted` warnings until crashing at 976
instantiations.

With these changes on the same machine, all the tests in SemaCXX took
about 6s 500ms. The max template depth I could reach was 1492. After
that, I would get `-Wstack-exhausted` warnings until crashing at 2898
instantiations.

This improves the behavior of #88330 but there's still an outstanding
question of why we run out of stack space and crash in some
circumstances before we're able to issue a diagnostic about stack space
exhaustion.

Added: 
    

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Sema/Initialization.h
    clang/include/clang/Sema/Overload.h
    clang/lib/Sema/SemaInit.cpp
    clang/lib/Sema/SemaOverload.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d8ec8bcb8df532..e6c345a2f5c0f5 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -203,6 +203,12 @@ Non-comprehensive list of changes in this release
 - ``__typeof_unqual__`` is available in all C modes as an extension, which behaves
   like ``typeof_unqual`` from C23, similar to ``__typeof__`` and ``typeof``.
 
+- Improved stack usage with C++ initialization code. This allows significantly
+  more levels of recursive initialization before reaching stack exhaustion
+  limits. This will positively impact recursive template instantiation code,
+  but should also reduce memory overhead for initializations in general.
+  Fixes #GH88330
+
 New Compiler Flags
 ------------------
 - ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and

diff  --git a/clang/include/clang/Sema/Initialization.h b/clang/include/clang/Sema/Initialization.h
index 2072cd8d1c3ef8..1ceacf0f49f568 100644
--- a/clang/include/clang/Sema/Initialization.h
+++ b/clang/include/clang/Sema/Initialization.h
@@ -1134,7 +1134,7 @@ class InitializationSequence {
   OverloadingResult FailedOverloadResult;
 
   /// The candidate set created when initialization failed.
-  OverloadCandidateSet FailedCandidateSet;
+  std::unique_ptr<OverloadCandidateSet> FailedCandidateSet;
 
   /// The incomplete type that caused a failure.
   QualType FailedIncompleteType;
@@ -1403,7 +1403,9 @@ class InitializationSequence {
   /// Retrieve a reference to the candidate set when overload
   /// resolution fails.
   OverloadCandidateSet &getFailedCandidateSet() {
-    return FailedCandidateSet;
+    assert(FailedCandidateSet &&
+           "this should have been allocated in the constructor!");
+    return *FailedCandidateSet;
   }
 
   /// Get the overloading result, for when the initialization

diff  --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index 76311b00d2fc58..e6f88bbf7c4f47 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -37,6 +37,7 @@
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
+#include <memory>
 #include <utility>
 
 namespace clang {
@@ -874,7 +875,8 @@ class Sema;
     ConversionFixItGenerator Fix;
 
     /// Viable - True to indicate that this overload candidate is viable.
-    bool Viable : 1;
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned Viable : 1;
 
     /// Whether this candidate is the best viable function, or tied for being
     /// the best viable function.
@@ -883,12 +885,14 @@ class Sema;
     /// was part of the ambiguity kernel: the minimal non-empty set of viable
     /// candidates such that all elements of the ambiguity kernel are better
     /// than all viable candidates not in the ambiguity kernel.
-    bool Best : 1;
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned Best : 1;
 
     /// IsSurrogate - True to indicate that this candidate is a
     /// surrogate for a conversion to a function pointer or reference
     /// (C++ [over.call.object]).
-    bool IsSurrogate : 1;
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned IsSurrogate : 1;
 
     /// IgnoreObjectArgument - True to indicate that the first
     /// argument's conversion, which for this function represents the
@@ -897,18 +901,20 @@ class Sema;
     /// implicit object argument is just a placeholder) or a
     /// non-static member function when the call doesn't have an
     /// object argument.
-    bool IgnoreObjectArgument : 1;
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned IgnoreObjectArgument : 1;
 
     /// True if the candidate was found using ADL.
-    CallExpr::ADLCallKind IsADLCandidate : 1;
+    LLVM_PREFERRED_TYPE(CallExpr::ADLCallKind)
+    unsigned IsADLCandidate : 1;
 
     /// Whether this is a rewritten candidate, and if so, of what kind?
     LLVM_PREFERRED_TYPE(OverloadCandidateRewriteKind)
     unsigned RewriteKind : 2;
 
     /// FailureKind - The reason why this candidate is not viable.
-    /// Actually an OverloadFailureKind.
-    unsigned char FailureKind;
+    LLVM_PREFERRED_TYPE(OverloadFailureKind)
+    unsigned FailureKind : 5;
 
     /// The number of call arguments that were explicitly provided,
     /// to be used while performing partial ordering of function templates.
@@ -972,7 +978,9 @@ class Sema;
   private:
     friend class OverloadCandidateSet;
     OverloadCandidate()
-        : IsSurrogate(false), IsADLCandidate(CallExpr::NotADL), RewriteKind(CRK_None) {}
+        : IsSurrogate(false),
+          IsADLCandidate(static_cast<unsigned>(CallExpr::NotADL)),
+          RewriteKind(CRK_None) {}
   };
 
   /// OverloadCandidateSet - A set of overload candidates, used in C++
@@ -1070,51 +1078,16 @@ class Sema;
     };
 
   private:
-    SmallVector<OverloadCandidate, 16> Candidates;
-    llvm::SmallPtrSet<uintptr_t, 16> Functions;
-
-    // Allocator for ConversionSequenceLists. We store the first few of these
-    // inline to avoid allocation for small sets.
-    llvm::BumpPtrAllocator SlabAllocator;
+    SmallVector<OverloadCandidate, 4> Candidates;
+    llvm::SmallPtrSet<uintptr_t, 4> Functions;
 
     SourceLocation Loc;
     CandidateSetKind Kind;
     OperatorRewriteInfo RewriteInfo;
 
-    constexpr static unsigned NumInlineBytes =
-        24 * sizeof(ImplicitConversionSequence);
-    unsigned NumInlineBytesUsed = 0;
-    alignas(void *) char InlineSpace[NumInlineBytes];
-
     // Address space of the object being constructed.
     LangAS DestAS = LangAS::Default;
 
-    /// If we have space, allocates from inline storage. Otherwise, allocates
-    /// from the slab allocator.
-    /// FIXME: It would probably be nice to have a SmallBumpPtrAllocator
-    /// instead.
-    /// FIXME: Now that this only allocates ImplicitConversionSequences, do we
-    /// want to un-generalize this?
-    template <typename T>
-    T *slabAllocate(unsigned N) {
-      // It's simpler if this doesn't need to consider alignment.
-      static_assert(alignof(T) == alignof(void *),
-                    "Only works for pointer-aligned types.");
-      static_assert(std::is_trivial<T>::value ||
-                        std::is_same<ImplicitConversionSequence, T>::value,
-                    "Add destruction logic to OverloadCandidateSet::clear().");
-
-      unsigned NBytes = sizeof(T) * N;
-      if (NBytes > NumInlineBytes - NumInlineBytesUsed)
-        return SlabAllocator.Allocate<T>(N);
-      char *FreeSpaceStart = InlineSpace + NumInlineBytesUsed;
-      assert(uintptr_t(FreeSpaceStart) % alignof(void *) == 0 &&
-             "Misaligned storage!");
-
-      NumInlineBytesUsed += NBytes;
-      return reinterpret_cast<T *>(FreeSpaceStart);
-    }
-
     void destroyCandidates();
 
   public:
@@ -1163,12 +1136,7 @@ class Sema;
     ConversionSequenceList
     allocateConversionSequences(unsigned NumConversions) {
       ImplicitConversionSequence *Conversions =
-          slabAllocate<ImplicitConversionSequence>(NumConversions);
-
-      // Construct the new objects.
-      for (unsigned I = 0; I != NumConversions; ++I)
-        new (&Conversions[I]) ImplicitConversionSequence();
-
+          new ImplicitConversionSequence[NumConversions];
       return ConversionSequenceList(Conversions, NumConversions);
     }
 

diff  --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index fb7a80ab02846c..791c0b6e6df23e 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -6114,7 +6114,8 @@ InitializationSequence::InitializationSequence(
     Sema &S, const InitializedEntity &Entity, const InitializationKind &Kind,
     MultiExprArg Args, bool TopLevelOfInitList, bool TreatUnavailableAsInvalid)
     : FailedOverloadResult(OR_Success),
-      FailedCandidateSet(Kind.getLocation(), OverloadCandidateSet::CSK_Normal) {
+      FailedCandidateSet(new OverloadCandidateSet(
+          Kind.getLocation(), OverloadCandidateSet::CSK_Normal)) {
   InitializeFrom(S, Entity, Kind, Args, TopLevelOfInitList,
                  TreatUnavailableAsInvalid);
 }
@@ -9735,7 +9736,7 @@ bool InitializationSequence::Diagnose(Sema &S,
     switch (FailedOverloadResult) {
     case OR_Ambiguous:
 
-      FailedCandidateSet.NoteCandidates(
+      FailedCandidateSet->NoteCandidates(
           PartialDiagnosticAt(
               Kind.getLocation(),
               Failure == FK_UserConversionOverloadFailed
@@ -9749,7 +9750,8 @@ bool InitializationSequence::Diagnose(Sema &S,
       break;
 
     case OR_No_Viable_Function: {
-      auto Cands = FailedCandidateSet.CompleteCandidates(S, OCD_AllCandidates, Args);
+      auto Cands =
+          FailedCandidateSet->CompleteCandidates(S, OCD_AllCandidates, Args);
       if (!S.RequireCompleteType(Kind.getLocation(),
                                  DestType.getNonReferenceType(),
                           diag::err_typecheck_nonviable_condition_incomplete,
@@ -9759,13 +9761,13 @@ bool InitializationSequence::Diagnose(Sema &S,
           << OnlyArg->getType() << Args[0]->getSourceRange()
           << DestType.getNonReferenceType();
 
-      FailedCandidateSet.NoteCandidates(S, Args, Cands);
+      FailedCandidateSet->NoteCandidates(S, Args, Cands);
       break;
     }
     case OR_Deleted: {
       OverloadCandidateSet::iterator Best;
-      OverloadingResult Ovl
-        = FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
+      OverloadingResult Ovl =
+          FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
 
       StringLiteral *Msg = Best->Function->getDeletedMessage();
       S.Diag(Kind.getLocation(), diag::err_typecheck_deleted_function)
@@ -9949,7 +9951,7 @@ bool InitializationSequence::Diagnose(Sema &S,
     // bad.
     switch (FailedOverloadResult) {
       case OR_Ambiguous:
-        FailedCandidateSet.NoteCandidates(
+        FailedCandidateSet->NoteCandidates(
             PartialDiagnosticAt(Kind.getLocation(),
                                 S.PDiag(diag::err_ovl_ambiguous_init)
                                     << DestType << ArgsRange),
@@ -10003,7 +10005,7 @@ bool InitializationSequence::Diagnose(Sema &S,
           break;
         }
 
-        FailedCandidateSet.NoteCandidates(
+        FailedCandidateSet->NoteCandidates(
             PartialDiagnosticAt(
                 Kind.getLocation(),
                 S.PDiag(diag::err_ovl_no_viable_function_in_init)
@@ -10013,8 +10015,8 @@ bool InitializationSequence::Diagnose(Sema &S,
 
       case OR_Deleted: {
         OverloadCandidateSet::iterator Best;
-        OverloadingResult Ovl
-          = FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
+        OverloadingResult Ovl =
+            FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
         if (Ovl != OR_Deleted) {
           S.Diag(Kind.getLocation(), diag::err_ovl_deleted_init)
               << DestType << ArgsRange;
@@ -10093,8 +10095,8 @@ bool InitializationSequence::Diagnose(Sema &S,
     S.Diag(Kind.getLocation(), diag::err_selected_explicit_constructor)
       << Args[0]->getSourceRange();
     OverloadCandidateSet::iterator Best;
-    OverloadingResult Ovl
-      = FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
+    OverloadingResult Ovl =
+        FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
     (void)Ovl;
     assert(Ovl == OR_Success && "Inconsistent overload resolution");
     CXXConstructorDecl *CtorDecl = cast<CXXConstructorDecl>(Best->Function);

diff  --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 227ef564ba3e08..bcde0d86cf10fd 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1057,8 +1057,7 @@ bool OverloadCandidateSet::OperatorRewriteInfo::shouldAddReversed(
 
 void OverloadCandidateSet::destroyCandidates() {
   for (iterator i = begin(), e = end(); i != e; ++i) {
-    for (auto &C : i->Conversions)
-      C.~ImplicitConversionSequence();
+    delete[] i->Conversions.data();
     if (!i->Viable && i->FailureKind == ovl_fail_bad_deduction)
       i->DeductionFailure.Destroy();
   }
@@ -1066,8 +1065,6 @@ void OverloadCandidateSet::destroyCandidates() {
 
 void OverloadCandidateSet::clear(CandidateSetKind CSK) {
   destroyCandidates();
-  SlabAllocator.Reset();
-  NumInlineBytesUsed = 0;
   Candidates.clear();
   Functions.clear();
   Kind = CSK;
@@ -6983,7 +6980,7 @@ void Sema::AddOverloadCandidate(
   Candidate.RewriteKind =
       CandidateSet.getRewriteInfo().getRewriteKind(Function, PO);
   Candidate.IsSurrogate = false;
-  Candidate.IsADLCandidate = IsADLCandidate;
+  Candidate.IsADLCandidate = static_cast<unsigned>(IsADLCandidate);
   Candidate.IgnoreObjectArgument = false;
   Candidate.ExplicitCallArguments = Args.size();
 
@@ -7815,7 +7812,7 @@ void Sema::AddTemplateOverloadCandidate(
     Candidate.RewriteKind =
       CandidateSet.getRewriteInfo().getRewriteKind(Candidate.Function, PO);
     Candidate.IsSurrogate = false;
-    Candidate.IsADLCandidate = IsADLCandidate;
+    Candidate.IsADLCandidate = static_cast<unsigned>(IsADLCandidate);
     // Ignore the object argument if there is one, since we don't have an object
     // type.
     Candidate.IgnoreObjectArgument =
@@ -14125,7 +14122,8 @@ static ExprResult FinishOverloadedCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
       return ExprError();
     return SemaRef.BuildResolvedCallExpr(
         Res.get(), FDecl, LParenLoc, Args, RParenLoc, ExecConfig,
-        /*IsExecConfig=*/false, (*Best)->IsADLCandidate);
+        /*IsExecConfig=*/false,
+        static_cast<CallExpr::ADLCallKind>((*Best)->IsADLCandidate));
   }
 
   case OR_No_Viable_Function: {
@@ -14184,7 +14182,8 @@ static ExprResult FinishOverloadedCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
       return ExprError();
     return SemaRef.BuildResolvedCallExpr(
         Res.get(), FDecl, LParenLoc, Args, RParenLoc, ExecConfig,
-        /*IsExecConfig=*/false, (*Best)->IsADLCandidate);
+        /*IsExecConfig=*/false,
+        static_cast<CallExpr::ADLCallKind>((*Best)->IsADLCandidate));
   }
   }
 
@@ -14491,7 +14490,8 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
       Args[0] = Input;
       CallExpr *TheCall = CXXOperatorCallExpr::Create(
           Context, Op, FnExpr.get(), ArgsArray, ResultTy, VK, OpLoc,
-          CurFPFeatureOverrides(), Best->IsADLCandidate);
+          CurFPFeatureOverrides(),
+          static_cast<CallExpr::ADLCallKind>(Best->IsADLCandidate));
 
       if (CheckCallReturnType(FnDecl->getReturnType(), OpLoc, TheCall, FnDecl))
         return ExprError();
@@ -14909,7 +14909,8 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
         // members; CodeGen should take care not to emit the this pointer.
         TheCall = CXXOperatorCallExpr::Create(
             Context, ChosenOp, FnExpr.get(), Args, ResultTy, VK, OpLoc,
-            CurFPFeatureOverrides(), Best->IsADLCandidate);
+            CurFPFeatureOverrides(),
+            static_cast<CallExpr::ADLCallKind>(Best->IsADLCandidate));
 
         if (const auto *Method = dyn_cast<CXXMethodDecl>(FnDecl);
             Method && Method->isImplicitObjectMemberFunction()) {


        


More information about the cfe-commits mailing list