[clang] [Draft] Summary Based Analysis Prototype (PR #144224)

via cfe-commits cfe-commits at lists.llvm.org
Sat Jul 12 08:23:55 PDT 2025


https://github.com/isuckatcs updated https://github.com/llvm/llvm-project/pull/144224

>From 7fba0addb97f9195eb307882254207f9b58072f2 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Thu, 22 May 2025 22:43:35 +0200
Subject: [PATCH 01/48] [clang][Driver] Add option to emit summaries

---
 clang/include/clang/Driver/Options.td         | 13 ++++++++++
 .../include/clang/Frontend/FrontendOptions.h  |  3 +++
 clang/lib/Driver/ToolChains/Clang.cpp         | 24 +++++++++++++++++++
 clang/lib/Frontend/FrontendAction.cpp         |  6 +++++
 4 files changed, 46 insertions(+)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 22261621df092..f0c7b277e68e2 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5941,6 +5941,15 @@ def save_temps : Flag<["-", "--"], "save-temps">, Flags<[NoXarchOption]>,
   Visibility<[ClangOption, FlangOption, FC1Option]>,
   Alias<save_temps_EQ>, AliasArgs<["cwd"]>,
   HelpText<"Alias for --save-temps=cwd">;
+def emit_summaries_EQ : Joined<["-", "--"], "emit-summaries=">, Flags<[NoXarchOption]>,
+  Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Save summaries about the different functions. <arg> can be set to 'cwd' for "
+  "current working directory, or 'obj' which will save temporary files in the "
+  "same directory as the final output file">;
+def emit_summaries : Flag<["-", "--"], "emit-summaries">, Flags<[NoXarchOption]>,
+  Visibility<[ClangOption]>,
+  Alias<emit_summaries_EQ>, AliasArgs<["cwd"]>,
+  HelpText<"Alias for --emit-summaries=cwd">;
 def save_stats_EQ : Joined<["-", "--"], "save-stats=">, Flags<[NoXarchOption]>,
   HelpText<"Save llvm statistics.">;
 def save_stats : Flag<["-", "--"], "save-stats">, Flags<[NoXarchOption]>,
@@ -8148,6 +8157,10 @@ defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists",
   NegFlag<SetFalse, [], [ClangOption], "Don't preserve">,
   BothFlags<[], [ClangOption], " order of LLVM use-lists when serializing">>;
 
+def summary_file : Joined<["-"], "summary-file=">,
+  HelpText<"Filename to write summaries about function definitions to">,
+  MarshallingInfoString<FrontendOpts<"SummaryFile">>;
+
 def print_stats : Flag<["-"], "print-stats">,
   HelpText<"Print performance metrics and statistics">,
   MarshallingInfoFlag<FrontendOpts<"ShowStats">>;
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index c919a53ae089e..af2451c1bde8e 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -534,6 +534,9 @@ class FrontendOptions {
   /// minimization hints.
   std::string DumpMinimizationHintsPath;
 
+  /// Filename to write summaries about function definitions to.
+  std::string SummaryFile;
+
 public:
   FrontendOptions()
       : DisableFree(false), RelocatablePCH(false), ShowHelp(false),
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 2fb6cf8ea2bdc..676a4e34ddb27 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5470,6 +5470,30 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.getLastArg(options::OPT_save_temps_EQ))
     Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ);
 
+  // FIXME: This needs to be cleaned up and needs proper error handling as well.
+  if (const Arg *A = Args.getLastArg(options::OPT_emit_summaries_EQ)) {
+    llvm::SmallString<10> input;
+    for (const auto &II : Inputs) {
+      if (!II.isFilename())
+        continue;
+
+      input = II.getFilename();
+      break;
+    }
+
+    if (!input.empty()) {
+      if (A->containsValue("cwd")) {
+        llvm::SmallString<10> filename = llvm::sys::path::filename(input);
+        llvm::sys::path::replace_extension(filename, "json");
+
+        CmdArgs.push_back(
+            Args.MakeArgString(Twine("-summary-file=") + filename));
+      } else if (A->containsValue("obj")) {
+        // FIXME: implement
+      }
+    }
+  }
+
   auto *MemProfArg = Args.getLastArg(options::OPT_fmemory_profile,
                                      options::OPT_fmemory_profile_EQ,
                                      options::OPT_fno_memory_profile);
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 54a2e3eb297f5..868a1f70abd43 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -1253,6 +1253,12 @@ void FrontendAction::EndSourceFile() {
   // Finalize the action.
   EndSourceFileAction();
 
+  if (CI.hasSema() && !CI.getFrontendOpts().SummaryFile.empty()) {
+    std::error_code EC;
+    llvm::raw_fd_ostream(CI.getFrontendOpts().SummaryFile, EC,
+                         llvm::sys::fs::CD_CreateAlways);
+  }
+
   // Sema references the ast consumer, so reset sema first.
   //
   // FIXME: There is more per-file stuff we could just drop here?

>From 3d7d23ad7815ef4527f000651f4321348b200873 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 7 Jun 2025 21:38:36 +0200
Subject: [PATCH 02/48] [clang][Summary] add the summarizer skeleton

---
 .../include/clang/Frontend/CompilerInstance.h | 21 ++++++++++++++++++-
 clang/include/clang/Sema/Sema.h               |  6 +++++-
 clang/include/clang/Sema/SemaSummarizer.h     | 19 +++++++++++++++++
 clang/include/clang/Sema/SummaryConsumer.h    |  8 +++++++
 clang/lib/Frontend/ChainedIncludesSource.cpp  |  2 +-
 clang/lib/Frontend/CompilerInstance.cpp       | 11 ++++++++--
 clang/lib/Frontend/FrontendAction.cpp         | 10 ++++++++-
 clang/lib/Frontend/FrontendActions.cpp        |  2 +-
 clang/lib/Sema/CMakeLists.txt                 |  1 +
 clang/lib/Sema/Sema.cpp                       |  7 ++++++-
 clang/lib/Sema/SemaDecl.cpp                   |  4 ++++
 clang/lib/Sema/SemaSummarizer.cpp             |  8 +++++++
 clang/lib/Testing/TestAST.cpp                 |  2 +-
 clang/unittests/CodeGen/TestCompiler.h        |  2 +-
 .../unittests/Frontend/CodeGenActionTest.cpp  |  2 +-
 .../unittests/Sema/ExternalSemaSourceTest.cpp |  2 +-
 clang/unittests/Sema/SemaLookupTest.cpp       |  2 +-
 17 files changed, 96 insertions(+), 13 deletions(-)
 create mode 100644 clang/include/clang/Sema/SemaSummarizer.h
 create mode 100644 clang/include/clang/Sema/SummaryConsumer.h
 create mode 100644 clang/lib/Sema/SemaSummarizer.cpp

diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index 5f25a932c5052..f296e6d042a54 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -48,6 +48,7 @@ class ModuleFile;
 }
 
 class CodeCompleteConsumer;
+class SummaryConsumer;
 class DiagnosticsEngine;
 class DiagnosticConsumer;
 class FileManager;
@@ -121,6 +122,9 @@ class CompilerInstance : public ModuleLoader {
   /// The code completion consumer.
   std::unique_ptr<CodeCompleteConsumer> CompletionConsumer;
 
+  /// The summary consumer.
+  std::unique_ptr<SummaryConsumer> TheSummaryConsumer;
+
   /// The semantic analysis object.
   std::unique_ptr<Sema> TheSema;
 
@@ -607,6 +611,18 @@ class CompilerInstance : public ModuleLoader {
     return *CompletionConsumer;
   }
 
+  /// @}
+  /// @name Summary
+  /// @{
+
+  bool hasSummaryConsumer() const { return (bool)TheSummaryConsumer; }
+
+  SummaryConsumer &getSummaryConsumer() const {
+    assert(TheSummaryConsumer &&
+           "Compiler instance has no code summary consumer!");
+    return *TheSummaryConsumer;
+  }
+
   /// setCodeCompletionConsumer - Replace the current code completion consumer;
   /// the compiler instance takes ownership of \p Value.
   void setCodeCompletionConsumer(CodeCompleteConsumer *Value);
@@ -736,9 +752,12 @@ class CompilerInstance : public ModuleLoader {
       Preprocessor &PP, StringRef Filename, unsigned Line, unsigned Column,
       const CodeCompleteOptions &Opts, raw_ostream &OS);
 
+  void createSummaryConsumer();
+
   /// Create the Sema object to be used for parsing.
   void createSema(TranslationUnitKind TUKind,
-                  CodeCompleteConsumer *CompletionConsumer);
+                  CodeCompleteConsumer *CompletionConsumer,
+                  SummaryConsumer *SummaryConsumer);
 
   /// Create the frontend timer and replace any existing one with it.
   void createFrontendTimer();
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index fe93df94438cb..9cb2158ab4ff1 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -125,6 +125,7 @@ class CXXBasePath;
 class CXXBasePaths;
 class CXXFieldCollector;
 class CodeCompleteConsumer;
+class SummaryConsumer;
 enum class ComparisonCategoryType : unsigned char;
 class ConstraintSatisfaction;
 class DarwinSDKInfo;
@@ -159,6 +160,7 @@ class SemaARM;
 class SemaAVR;
 class SemaBPF;
 class SemaCodeCompletion;
+class SemaSummarizer;
 class SemaCUDA;
 class SemaDirectX;
 class SemaHLSL;
@@ -883,7 +885,8 @@ class Sema final : public SemaBase {
 public:
   Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
        TranslationUnitKind TUKind = TU_Complete,
-       CodeCompleteConsumer *CompletionConsumer = nullptr);
+       CodeCompleteConsumer *CompletionConsumer = nullptr,
+       SummaryConsumer *SummaryConsumer = nullptr);
   ~Sema();
 
   /// Perform initialization that occurs after the parser has been
@@ -1564,6 +1567,7 @@ class Sema final : public SemaBase {
   std::unique_ptr<SemaAVR> AVRPtr;
   std::unique_ptr<SemaBPF> BPFPtr;
   std::unique_ptr<SemaCodeCompletion> CodeCompletionPtr;
+  std::unique_ptr<SemaSummarizer> SummarizerPtr;
   std::unique_ptr<SemaCUDA> CUDAPtr;
   std::unique_ptr<SemaDirectX> DirectXPtr;
   std::unique_ptr<SemaHLSL> HLSLPtr;
diff --git a/clang/include/clang/Sema/SemaSummarizer.h b/clang/include/clang/Sema/SemaSummarizer.h
new file mode 100644
index 0000000000000..3661e443bb2de
--- /dev/null
+++ b/clang/include/clang/Sema/SemaSummarizer.h
@@ -0,0 +1,19 @@
+#ifndef LLVM_CLANG_SEMA_SEMASUMMARIZER_H
+#define LLVM_CLANG_SEMA_SEMASUMMARIZER_H
+
+#include "clang/Sema/SemaBase.h"
+#include "clang/Sema/SummaryConsumer.h"
+
+namespace clang {
+class SemaSummarizer : public SemaBase {
+public:
+  SemaSummarizer(Sema &S, SummaryConsumer *SummaryConsumer)
+      : SemaBase(S), SummaryConsumer(SummaryConsumer) {}
+
+  SummaryConsumer *SummaryConsumer;
+
+  void SummarizeFunctionBody(FunctionDecl *FD) const;
+};
+} // namespace clang
+
+#endif // LLVM_CLANG_SEMA_SEMASUMMARIZE_H
diff --git a/clang/include/clang/Sema/SummaryConsumer.h b/clang/include/clang/Sema/SummaryConsumer.h
new file mode 100644
index 0000000000000..8aa2713b46c65
--- /dev/null
+++ b/clang/include/clang/Sema/SummaryConsumer.h
@@ -0,0 +1,8 @@
+#ifndef LLVM_CLANG_SEMA_SUMMARYCONSUMER_H
+#define LLVM_CLANG_SEMA_SUMMARYCONSUMER_H
+
+namespace clang {
+class SummaryConsumer {};
+} // namespace clang
+
+#endif // LLVM_CLANG_SEMA_SUMMARYCONSUMER_H
diff --git a/clang/lib/Frontend/ChainedIncludesSource.cpp b/clang/lib/Frontend/ChainedIncludesSource.cpp
index 95b0ed248d545..437f5387375f7 100644
--- a/clang/lib/Frontend/ChainedIncludesSource.cpp
+++ b/clang/lib/Frontend/ChainedIncludesSource.cpp
@@ -142,7 +142,7 @@ IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
     Clang->getASTContext().setASTMutationListener(
                                             consumer->GetASTMutationListener());
     Clang->setASTConsumer(std::move(consumer));
-    Clang->createSema(TU_Prefix, nullptr);
+    Clang->createSema(TU_Prefix, nullptr, nullptr);
 
     if (firstInclude) {
       Preprocessor &PP = Clang->getPreprocessor();
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 503d36467653e..917a187f49fb5 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -37,6 +37,7 @@
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Sema.h"
+#include "clang/Sema/SummaryConsumer.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
 #include "clang/Serialization/InMemoryModuleCache.h"
@@ -741,10 +742,16 @@ CompilerInstance::createCodeCompletionConsumer(Preprocessor &PP,
   return new PrintingCodeCompleteConsumer(Opts, OS);
 }
 
+void CompilerInstance::createSummaryConsumer() {
+  TheSummaryConsumer.reset(
+      getFrontendOpts().SummaryFile.empty() ? nullptr : new SummaryConsumer());
+}
+
 void CompilerInstance::createSema(TranslationUnitKind TUKind,
-                                  CodeCompleteConsumer *CompletionConsumer) {
+                                  CodeCompleteConsumer *CompletionConsumer,
+                                  SummaryConsumer *SummaryConsumer) {
   TheSema.reset(new Sema(getPreprocessor(), getASTContext(), getASTConsumer(),
-                         TUKind, CompletionConsumer));
+                         TUKind, CompletionConsumer, SummaryConsumer));
 
   // Set up API notes.
   TheSema->APINotes.setSwiftVersion(getAPINotesOpts().SwiftVersion);
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 868a1f70abd43..aeeab901b9ed7 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -1339,8 +1339,16 @@ void ASTFrontendAction::ExecuteAction() {
   if (CI.hasCodeCompletionConsumer())
     CompletionConsumer = &CI.getCodeCompletionConsumer();
 
+  CI.createSummaryConsumer();
+
+  // Use a code completion consumer?
+  SummaryConsumer *SummaryConsumer = nullptr;
+  if (CI.hasSummaryConsumer())
+    SummaryConsumer = &CI.getSummaryConsumer();
+
   if (!CI.hasSema())
-    CI.createSema(getTranslationUnitKind(), CompletionConsumer);
+    CI.createSema(getTranslationUnitKind(), CompletionConsumer,
+                  SummaryConsumer);
 
   ParseAST(CI.getSema(), CI.getFrontendOpts().ShowStats,
            CI.getFrontendOpts().SkipFunctionBodies);
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 8c75e1a46da54..49f1420c75047 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -52,7 +52,7 @@ void EnsureSemaIsCreated(CompilerInstance &CI, FrontendAction &Action) {
 
   if (!CI.hasSema())
     CI.createSema(Action.getTranslationUnitKind(),
-                  GetCodeCompletionConsumer(CI));
+                  GetCodeCompletionConsumer(CI), nullptr);
 }
 } // namespace
 
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index 4b87004e4b8ea..5237f20201fde 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -85,6 +85,7 @@ add_clang_library(clangSema
   SemaStmt.cpp
   SemaStmtAsm.cpp
   SemaStmtAttr.cpp
+  SemaSummarizer.cpp
   SemaSPIRV.cpp
   SemaSYCL.cpp
   SemaSwift.cpp
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 1901d19b14dfc..2410d513c299e 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -64,6 +64,7 @@
 #include "clang/Sema/SemaRISCV.h"
 #include "clang/Sema/SemaSPIRV.h"
 #include "clang/Sema/SemaSYCL.h"
+#include "clang/Sema/SemaSummarizer.h"
 #include "clang/Sema/SemaSwift.h"
 #include "clang/Sema/SemaSystemZ.h"
 #include "clang/Sema/SemaWasm.h"
@@ -248,7 +249,8 @@ const unsigned Sema::MaxAlignmentExponent;
 const uint64_t Sema::MaximumAlignment;
 
 Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
-           TranslationUnitKind TUKind, CodeCompleteConsumer *CodeCompleter)
+           TranslationUnitKind TUKind, CodeCompleteConsumer *CodeCompleter,
+           SummaryConsumer *SummaryConsumer)
     : SemaBase(*this), CollectStats(false), TUKind(TUKind),
       CurFPFeatures(pp.getLangOpts()), LangOpts(pp.getLangOpts()), PP(pp),
       Context(ctxt), Consumer(consumer), Diags(PP.getDiagnostics()),
@@ -263,6 +265,9 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
       BPFPtr(std::make_unique<SemaBPF>(*this)),
       CodeCompletionPtr(
           std::make_unique<SemaCodeCompletion>(*this, CodeCompleter)),
+      SummarizerPtr(SummaryConsumer ? std::make_unique<SemaSummarizer>(
+                                          *this, SummaryConsumer)
+                                    : nullptr),
       CUDAPtr(std::make_unique<SemaCUDA>(*this)),
       DirectXPtr(std::make_unique<SemaDirectX>(*this)),
       HLSLPtr(std::make_unique<SemaHLSL>(*this)),
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 814f81cb64cae..bb80fd2a46f58 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -55,6 +55,7 @@
 #include "clang/Sema/SemaPPC.h"
 #include "clang/Sema/SemaRISCV.h"
 #include "clang/Sema/SemaSYCL.h"
+#include "clang/Sema/SemaSummarizer.h"
 #include "clang/Sema/SemaSwift.h"
 #include "clang/Sema/SemaWasm.h"
 #include "clang/Sema/Template.h"
@@ -16694,6 +16695,9 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
   if (FD && !FD->isDeleted())
     checkTypeSupport(FD->getType(), FD->getLocation(), FD);
 
+  if (FD && SummarizerPtr)
+    SummarizerPtr->SummarizeFunctionBody(FD);
+
   return dcl;
 }
 
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
new file mode 100644
index 0000000000000..ce1d514d3df9c
--- /dev/null
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -0,0 +1,8 @@
+#include "clang/Sema/SemaSummarizer.h"
+
+namespace clang {
+void SemaSummarizer::SummarizeFunctionBody(FunctionDecl *FD) const {
+  FD->dump();
+}
+
+} // namespace clang
\ No newline at end of file
diff --git a/clang/lib/Testing/TestAST.cpp b/clang/lib/Testing/TestAST.cpp
index 748f59b856e83..db0490689da53 100644
--- a/clang/lib/Testing/TestAST.cpp
+++ b/clang/lib/Testing/TestAST.cpp
@@ -69,7 +69,7 @@ void createMissingComponents(CompilerInstance &Clang) {
   if (!Clang.hasASTContext())
     Clang.createASTContext();
   if (!Clang.hasSema())
-    Clang.createSema(TU_Complete, /*CodeCompleteConsumer=*/nullptr);
+    Clang.createSema(TU_Complete, /*CodeCompleteConsumer=*/nullptr, nullptr);
 }
 
 } // namespace
diff --git a/clang/unittests/CodeGen/TestCompiler.h b/clang/unittests/CodeGen/TestCompiler.h
index a6fec7fb0945d..760fa340c3d74 100644
--- a/clang/unittests/CodeGen/TestCompiler.h
+++ b/clang/unittests/CodeGen/TestCompiler.h
@@ -69,7 +69,7 @@ struct TestCompiler {
 
     compiler.setASTConsumer(std::move(Consumer));
 
-    compiler.createSema(clang::TU_Prefix, nullptr);
+    compiler.createSema(clang::TU_Prefix, nullptr, nullptr);
 
     clang::SourceManager &sm = compiler.getSourceManager();
     sm.setMainFileID(sm.createFileID(
diff --git a/clang/unittests/Frontend/CodeGenActionTest.cpp b/clang/unittests/Frontend/CodeGenActionTest.cpp
index 90818b72cd6e6..e958ea1993a4a 100644
--- a/clang/unittests/Frontend/CodeGenActionTest.cpp
+++ b/clang/unittests/Frontend/CodeGenActionTest.cpp
@@ -37,7 +37,7 @@ class NullCodeGenAction : public CodeGenAction {
     if (!CI.hasPreprocessor())
       return;
     if (!CI.hasSema())
-      CI.createSema(getTranslationUnitKind(), nullptr);
+      CI.createSema(getTranslationUnitKind(), nullptr, nullptr);
   }
 };
 
diff --git a/clang/unittests/Sema/ExternalSemaSourceTest.cpp b/clang/unittests/Sema/ExternalSemaSourceTest.cpp
index 2b271d4bf7825..d223a7135ee84 100644
--- a/clang/unittests/Sema/ExternalSemaSourceTest.cpp
+++ b/clang/unittests/Sema/ExternalSemaSourceTest.cpp
@@ -194,7 +194,7 @@ class ExternalSemaSourceInstaller : public clang::ASTFrontendAction {
   void ExecuteAction() override {
     CompilerInstance &CI = getCompilerInstance();
     ASSERT_FALSE(CI.hasSema());
-    CI.createSema(getTranslationUnitKind(), nullptr);
+    CI.createSema(getTranslationUnitKind(), nullptr, nullptr);
     ASSERT_TRUE(CI.hasDiagnostics());
     DiagnosticsEngine &Diagnostics = CI.getDiagnostics();
     DiagnosticConsumer *Client = Diagnostics.getClient();
diff --git a/clang/unittests/Sema/SemaLookupTest.cpp b/clang/unittests/Sema/SemaLookupTest.cpp
index d97b571f6a37c..96c27945421f9 100644
--- a/clang/unittests/Sema/SemaLookupTest.cpp
+++ b/clang/unittests/Sema/SemaLookupTest.cpp
@@ -22,7 +22,7 @@ class LookupAction : public ASTFrontendAction {
   void ExecuteAction() override {
     CompilerInstance &CI = getCompilerInstance();
     ASSERT_FALSE(CI.hasSema());
-    CI.createSema(getTranslationUnitKind(), nullptr);
+    CI.createSema(getTranslationUnitKind(), nullptr, nullptr);
     ASSERT_TRUE(CI.hasSema());
     Sema &S = CI.getSema();
     ParseAST(S);

>From 2592f08b25c7394f7a765a4ca1ecb9ce69657560 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sun, 8 Jun 2025 00:21:20 +0200
Subject: [PATCH 03/48] [clang][Summary] implement summary base prototype

---
 clang/include/clang/Sema/SemaSummarizer.h |  2 +-
 clang/lib/Sema/SemaSummarizer.cpp         | 57 +++++++++++++++++++++--
 2 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Sema/SemaSummarizer.h b/clang/include/clang/Sema/SemaSummarizer.h
index 3661e443bb2de..45e97529a2f1f 100644
--- a/clang/include/clang/Sema/SemaSummarizer.h
+++ b/clang/include/clang/Sema/SemaSummarizer.h
@@ -12,7 +12,7 @@ class SemaSummarizer : public SemaBase {
 
   SummaryConsumer *SummaryConsumer;
 
-  void SummarizeFunctionBody(FunctionDecl *FD) const;
+  void SummarizeFunctionBody(const FunctionDecl *FD);
 };
 } // namespace clang
 
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
index ce1d514d3df9c..65ac0a7e4ec36 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -1,8 +1,59 @@
 #include "clang/Sema/SemaSummarizer.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Index/USRGeneration.h"
+#include <set>
 
 namespace clang {
-void SemaSummarizer::SummarizeFunctionBody(FunctionDecl *FD) const {
-  FD->dump();
+namespace {
+class FunctionSummary {
+  SmallVector<char> ID;
+  std::vector<std::string> FunctionAttrs;
+  std::set<SmallVector<char>> Calls;
+
+public:
+  void addCall(const clang::FunctionDecl *FD) {
+    SmallVector<char> Call;
+    index::generateUSRForDecl(FD, Call);
+    Calls.emplace(Call);
+  }
+
+  FunctionSummary(const clang::FunctionDecl *FD) {
+    index::generateUSRForDecl(FD, ID);
+  }
+};
+
+class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
+  FunctionSummary *Summary;
+
+public:
+  CallCollector(FunctionSummary &Summary) : Summary(&Summary) {}
+
+  virtual void
+  run(const ast_matchers::MatchFinder::MatchResult &Result) override {
+    const auto *Call = Result.Nodes.getNodeAs<CallExpr>("call");
+    if (!Call)
+      return;
+
+    const auto *Callee = llvm::dyn_cast<FunctionDecl>(Call->getCalleeDecl());
+    Summary->addCall(Callee);
+  }
+};
+
+void CollectCalledFunctions(const FunctionDecl *FD, FunctionSummary &Summary) {
+  using namespace ast_matchers;
+  MatchFinder Finder;
+  CallCollector CC(Summary);
+
+  Finder.addMatcher(functionDecl(forEachDescendant(callExpr().bind("call"))),
+                    &CC);
+  Finder.match(*FD, FD->getASTContext());
+}
+
+} // namespace
+
+void SemaSummarizer::SummarizeFunctionBody(const FunctionDecl *FD) {
+  FunctionSummary Summary(FD);
+  CollectCalledFunctions(FD, Summary);
 }
 
-} // namespace clang
\ No newline at end of file
+} // namespace clang

>From 43c1b90334a7abc7caf636f6bc15ff4a3194be13 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sun, 8 Jun 2025 14:53:27 +0200
Subject: [PATCH 04/48] [clang][Summary] implement summary inference prototype

---
 clang/include/clang/Sema/SemaSummarizer.h   | 22 ++++-
 clang/include/clang/Sema/SummaryAttribute.h | 54 ++++++++++++
 clang/lib/Sema/SemaSummarizer.cpp           | 95 +++++++++++++++------
 3 files changed, 141 insertions(+), 30 deletions(-)
 create mode 100644 clang/include/clang/Sema/SummaryAttribute.h

diff --git a/clang/include/clang/Sema/SemaSummarizer.h b/clang/include/clang/Sema/SemaSummarizer.h
index 45e97529a2f1f..71ec68cf947ba 100644
--- a/clang/include/clang/Sema/SemaSummarizer.h
+++ b/clang/include/clang/Sema/SemaSummarizer.h
@@ -2,15 +2,31 @@
 #define LLVM_CLANG_SEMA_SEMASUMMARIZER_H
 
 #include "clang/Sema/SemaBase.h"
+#include "clang/Sema/SummaryAttribute.h"
 #include "clang/Sema/SummaryConsumer.h"
+#include <set>
 
 namespace clang {
+class FunctionSummary {
+  SmallVector<char> ID;
+  std::set<SummaryAttribute> FunctionAttrs;
+  std::set<SmallVector<char>> Calls;
+
+public:
+  FunctionSummary(const clang::FunctionDecl *FD);
+
+  void addAttribute(SummaryAttribute Attr) { FunctionAttrs.emplace(Attr); }
+  bool hasAttribute(SummaryAttribute Attr) { return FunctionAttrs.count(Attr); }
+
+  void addCall(const clang::FunctionDecl *FD);
+};
+
 class SemaSummarizer : public SemaBase {
 public:
-  SemaSummarizer(Sema &S, SummaryConsumer *SummaryConsumer)
-      : SemaBase(S), SummaryConsumer(SummaryConsumer) {}
+  SemaSummarizer(Sema &S, SummaryConsumer *SummaryConsumer);
 
-  SummaryConsumer *SummaryConsumer;
+  std::vector<std::unique_ptr<SummaryAttributeManager>> Attributes;
+  SummaryConsumer *TheSummaryConsumer;
 
   void SummarizeFunctionBody(const FunctionDecl *FD);
 };
diff --git a/clang/include/clang/Sema/SummaryAttribute.h b/clang/include/clang/Sema/SummaryAttribute.h
new file mode 100644
index 0000000000000..ed8a8125d22f1
--- /dev/null
+++ b/clang/include/clang/Sema/SummaryAttribute.h
@@ -0,0 +1,54 @@
+#ifndef LLVM_CLANG_SEMA_SEMASUMMARYATTRIBUTE_H
+#define LLVM_CLANG_SEMA_SEMASUMMARYATTRIBUTE_H
+
+#include "clang/AST/Decl.h"
+#include <string>
+
+namespace clang {
+enum SummaryAttribute {
+  NO_WRITE_GLOBAL,
+};
+
+class FunctionSummary;
+
+class SummaryAttributeManager {
+  inline static std::unordered_map<SummaryAttribute, std::string> AttrToStr;
+
+protected:
+  const SummaryAttribute Attr;
+  const char *Str;
+
+public:
+  SummaryAttributeManager(SummaryAttribute Attr, const char *Str)
+      : Attr(Attr), Str(Str) {
+    assert(AttrToStr.count(Attr) == 0 && "attribute already registered");
+    for (auto &&[attr, str] : AttrToStr)
+      assert(str != Str && "attribute representation is already used");
+
+    AttrToStr[Attr] = Str;
+  }
+  virtual ~SummaryAttributeManager() = default;
+
+  virtual bool predicate(const FunctionDecl *FD) = 0;
+
+  // FIXME: This should receive all the parsed summaries as well.
+  virtual bool merge(FunctionSummary &Summary) = 0;
+
+  virtual std::string serialize() const { return Str; };
+  virtual std::optional<SummaryAttribute> parse(std::string_view Input) const {
+    if (Str == Input)
+      return Attr;
+
+    return std::nullopt;
+  };
+
+  std::optional<SummaryAttribute> infer(const FunctionDecl *FD) {
+    if (predicate(FD))
+      return Attr;
+
+    return std::nullopt;
+  };
+};
+} // namespace clang
+
+#endif // LLVM_CLANG_SEMA_SEMASUMMARYATTRIBUTEH
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
index 65ac0a7e4ec36..1a7dd7ba26f99 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -1,31 +1,14 @@
 #include "clang/Sema/SemaSummarizer.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Index/USRGeneration.h"
+#include "clang/Sema/SummaryConsumer.h"
 #include <set>
 
 namespace clang {
 namespace {
-class FunctionSummary {
-  SmallVector<char> ID;
-  std::vector<std::string> FunctionAttrs;
-  std::set<SmallVector<char>> Calls;
-
-public:
-  void addCall(const clang::FunctionDecl *FD) {
-    SmallVector<char> Call;
-    index::generateUSRForDecl(FD, Call);
-    Calls.emplace(Call);
-  }
-
-  FunctionSummary(const clang::FunctionDecl *FD) {
-    index::generateUSRForDecl(FD, ID);
-  }
-};
-
 class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
   FunctionSummary *Summary;
 
-public:
   CallCollector(FunctionSummary &Summary) : Summary(&Summary) {}
 
   virtual void
@@ -37,23 +20,81 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
     const auto *Callee = llvm::dyn_cast<FunctionDecl>(Call->getCalleeDecl());
     Summary->addCall(Callee);
   }
+
+public:
+  static void CollectCalledFunctions(const FunctionDecl *FD,
+                                     FunctionSummary &Summary) {
+    using namespace ast_matchers;
+    MatchFinder Finder;
+    CallCollector CC(Summary);
+
+    Finder.addMatcher(functionDecl(forEachDescendant(callExpr().bind("call"))),
+                      &CC);
+    Finder.match(*FD, FD->getASTContext());
+  }
 };
 
-void CollectCalledFunctions(const FunctionDecl *FD, FunctionSummary &Summary) {
-  using namespace ast_matchers;
-  MatchFinder Finder;
-  CallCollector CC(Summary);
+class NoWriteGlobalAttrManager : public SummaryAttributeManager {
+  class Callback : public ast_matchers::MatchFinder::MatchCallback {
+  public:
+    bool WriteGlobal = false;
 
-  Finder.addMatcher(functionDecl(forEachDescendant(callExpr().bind("call"))),
-                    &CC);
-  Finder.match(*FD, FD->getASTContext());
-}
+    void run(const ast_matchers::MatchFinder::MatchResult &Result) override {
+      const auto *Assignment =
+          Result.Nodes.getNodeAs<BinaryOperator>("assignment");
+      if (!Assignment)
+        return;
+
+      WriteGlobal = true;
+    };
+  };
 
+public:
+  NoWriteGlobalAttrManager()
+      : SummaryAttributeManager(NO_WRITE_GLOBAL, "no_write_global") {}
+
+  bool predicate(const FunctionDecl *FD) override {
+    using namespace ast_matchers;
+    MatchFinder Finder;
+    Callback CB;
+
+    Finder.addMatcher(
+        functionDecl(forEachDescendant(
+            binaryOperator(isAssignmentOperator(),
+                           hasLHS(declRefExpr(to(varDecl(hasGlobalStorage())))))
+                .bind("assignment"))),
+        &CB);
+    Finder.match(*FD, FD->getASTContext());
+    return !CB.WriteGlobal;
+  };
+
+  bool merge(FunctionSummary &Summary) override { return true; };
+};
 } // namespace
 
+void FunctionSummary::addCall(const clang::FunctionDecl *FD) {
+  SmallVector<char> Call;
+  index::generateUSRForDecl(FD, Call);
+  Calls.emplace(Call);
+}
+
+FunctionSummary::FunctionSummary(const clang::FunctionDecl *FD) {
+  index::generateUSRForDecl(FD, ID);
+}
+
+SemaSummarizer::SemaSummarizer(Sema &S, SummaryConsumer *SummaryConsumer)
+    : SemaBase(S), TheSummaryConsumer(SummaryConsumer) {
+  Attributes.emplace_back(std::make_unique<NoWriteGlobalAttrManager>());
+}
+
 void SemaSummarizer::SummarizeFunctionBody(const FunctionDecl *FD) {
   FunctionSummary Summary(FD);
-  CollectCalledFunctions(FD, Summary);
+  CallCollector::CollectCalledFunctions(FD, Summary);
+
+  for (auto &&Attr : Attributes) {
+    if (const auto &InferredAttr = Attr->infer(FD))
+      Summary.addAttribute(*InferredAttr);
+  }
 }
 
 } // namespace clang

>From f1ea491e6f2cf7c6d21f25b611cd33cd6607567b Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sun, 8 Jun 2025 17:08:55 +0200
Subject: [PATCH 05/48] [clang][Summary] summary printing prototype

---
 clang/include/clang/Sema/SemaSummarizer.h   | 10 ++++++-
 clang/include/clang/Sema/SummaryAttribute.h |  3 ++-
 clang/include/clang/Sema/SummaryConsumer.h  | 30 ++++++++++++++++++++-
 clang/lib/Frontend/CompilerInstance.cpp     | 12 +++++++--
 clang/lib/Frontend/FrontendAction.cpp       |  6 -----
 clang/lib/Sema/CMakeLists.txt               |  1 +
 clang/lib/Sema/Sema.cpp                     |  5 ++++
 clang/lib/Sema/SemaSummarizer.cpp           | 13 +++++++++
 clang/lib/Sema/SummaryConsumer.cpp          | 24 +++++++++++++++++
 9 files changed, 93 insertions(+), 11 deletions(-)
 create mode 100644 clang/lib/Sema/SummaryConsumer.cpp

diff --git a/clang/include/clang/Sema/SemaSummarizer.h b/clang/include/clang/Sema/SemaSummarizer.h
index 71ec68cf947ba..ef40599d982a1 100644
--- a/clang/include/clang/Sema/SemaSummarizer.h
+++ b/clang/include/clang/Sema/SemaSummarizer.h
@@ -15,8 +15,12 @@ class FunctionSummary {
 public:
   FunctionSummary(const clang::FunctionDecl *FD);
 
+  SmallVector<char> getID() const { return ID; }
+  const std::set<SummaryAttribute> &getFunctionAttrs() const { return FunctionAttrs; }
+  const std::set<SmallVector<char>> &getCalls() const { return Calls; }
+
   void addAttribute(SummaryAttribute Attr) { FunctionAttrs.emplace(Attr); }
-  bool hasAttribute(SummaryAttribute Attr) { return FunctionAttrs.count(Attr); }
+  bool hasAttribute(SummaryAttribute Attr) const { return FunctionAttrs.count(Attr); }
 
   void addCall(const clang::FunctionDecl *FD);
 };
@@ -28,7 +32,11 @@ class SemaSummarizer : public SemaBase {
   std::vector<std::unique_ptr<SummaryAttributeManager>> Attributes;
   SummaryConsumer *TheSummaryConsumer;
 
+  void ActOnStartOfSourceFile();
+  void ActOnEndOfSourceFile();
+  
   void SummarizeFunctionBody(const FunctionDecl *FD);
+
 };
 } // namespace clang
 
diff --git a/clang/include/clang/Sema/SummaryAttribute.h b/clang/include/clang/Sema/SummaryAttribute.h
index ed8a8125d22f1..2d2fd1e4ff623 100644
--- a/clang/include/clang/Sema/SummaryAttribute.h
+++ b/clang/include/clang/Sema/SummaryAttribute.h
@@ -34,7 +34,8 @@ class SummaryAttributeManager {
   // FIXME: This should receive all the parsed summaries as well.
   virtual bool merge(FunctionSummary &Summary) = 0;
 
-  virtual std::string serialize() const { return Str; };
+  // FIXME: bad design
+  static std::string serialize(SummaryAttribute Attr) { return AttrToStr[Attr]; };
   virtual std::optional<SummaryAttribute> parse(std::string_view Input) const {
     if (Str == Input)
       return Attr;
diff --git a/clang/include/clang/Sema/SummaryConsumer.h b/clang/include/clang/Sema/SummaryConsumer.h
index 8aa2713b46c65..c2698288ce721 100644
--- a/clang/include/clang/Sema/SummaryConsumer.h
+++ b/clang/include/clang/Sema/SummaryConsumer.h
@@ -1,8 +1,36 @@
 #ifndef LLVM_CLANG_SEMA_SUMMARYCONSUMER_H
 #define LLVM_CLANG_SEMA_SUMMARYCONSUMER_H
 
+#include "clang/Basic/LLVM.h"
+#include "llvm/Support/JSON.h"
 namespace clang {
-class SummaryConsumer {};
+class FunctionSummary;
+
+class SummaryConsumer {
+public:
+    virtual ~SummaryConsumer() = default;
+
+    virtual void ProcessStartOfSourceFile() {};
+    virtual void ProcessFunctionSummary(const FunctionSummary&) {};
+    virtual void ProcessEndOfSourceFile() {};
+};
+
+class PrintingSummaryConsumer : public SummaryConsumer {
+public:
+    PrintingSummaryConsumer(raw_ostream &OS)
+      : SummaryConsumer() {}
+};
+
+class JSONPrintingSummaryConsumer : public PrintingSummaryConsumer {
+    llvm::json::OStream JOS;
+
+public:
+    JSONPrintingSummaryConsumer(raw_ostream &OS) : PrintingSummaryConsumer(OS), JOS(OS, 2) {}
+
+    void ProcessStartOfSourceFile() override { JOS.arrayBegin(); };
+    void ProcessFunctionSummary(const FunctionSummary&) override;
+    void ProcessEndOfSourceFile() override { JOS.arrayEnd(); };
+};
 } // namespace clang
 
 #endif // LLVM_CLANG_SEMA_SUMMARYCONSUMER_H
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 917a187f49fb5..c90b8884f5731 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -743,8 +743,16 @@ CompilerInstance::createCodeCompletionConsumer(Preprocessor &PP,
 }
 
 void CompilerInstance::createSummaryConsumer() {
-  TheSummaryConsumer.reset(
-      getFrontendOpts().SummaryFile.empty() ? nullptr : new SummaryConsumer());
+  const std::string& SummaryFile = getFrontendOpts().SummaryFile;
+  if(SummaryFile.empty())
+    return;
+
+  std::error_code EC;
+  // FIXME: this being static is a design error
+  static llvm::raw_fd_ostream SummaryOS(SummaryFile, EC, llvm::sys::fs::CD_CreateAlways);
+
+  if(!EC)
+    TheSummaryConsumer.reset(new JSONPrintingSummaryConsumer(SummaryOS));
 }
 
 void CompilerInstance::createSema(TranslationUnitKind TUKind,
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index aeeab901b9ed7..35b0c0373533a 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -1253,12 +1253,6 @@ void FrontendAction::EndSourceFile() {
   // Finalize the action.
   EndSourceFileAction();
 
-  if (CI.hasSema() && !CI.getFrontendOpts().SummaryFile.empty()) {
-    std::error_code EC;
-    llvm::raw_fd_ostream(CI.getFrontendOpts().SummaryFile, EC,
-                         llvm::sys::fs::CD_CreateAlways);
-  }
-
   // Sema references the ast consumer, so reset sema first.
   //
   // FIXME: There is more per-file stuff we could just drop here?
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index 5237f20201fde..d6297016c015f 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -99,6 +99,7 @@ add_clang_library(clangSema
   SemaType.cpp
   SemaWasm.cpp
   SemaX86.cpp
+  SummaryConsumer.cpp
   TypeLocBuilder.cpp
 
   DEPENDS
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 2410d513c299e..ae9f8c00081b3 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -1147,6 +1147,9 @@ void Sema::ActOnStartOfTranslationUnit() {
   if (getLangOpts().CPlusPlusModules &&
       getLangOpts().getCompilingModule() == LangOptions::CMK_HeaderUnit)
     HandleStartOfHeaderUnit();
+  
+  if(SummarizerPtr)
+    SummarizerPtr->ActOnStartOfSourceFile();
 }
 
 void Sema::ActOnEndOfTranslationUnitFragment(TUFragmentKind Kind) {
@@ -1222,6 +1225,8 @@ void Sema::ActOnEndOfTranslationUnit() {
   assert(DelayedDiagnostics.getCurrentPool() == nullptr
          && "reached end of translation unit with a pool attached?");
 
+  if(SummarizerPtr)
+    SummarizerPtr->ActOnEndOfSourceFile();
   // If code completion is enabled, don't perform any end-of-translation-unit
   // work.
   if (PP.isCodeCompletionEnabled())
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
index 1a7dd7ba26f99..9fbb940ec3fbb 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -87,6 +87,16 @@ SemaSummarizer::SemaSummarizer(Sema &S, SummaryConsumer *SummaryConsumer)
   Attributes.emplace_back(std::make_unique<NoWriteGlobalAttrManager>());
 }
 
+void SemaSummarizer::ActOnStartOfSourceFile() {
+  if(TheSummaryConsumer)
+    TheSummaryConsumer->ProcessStartOfSourceFile();
+}
+
+void SemaSummarizer::ActOnEndOfSourceFile() {
+  if(TheSummaryConsumer)
+    TheSummaryConsumer->ProcessEndOfSourceFile();
+}
+
 void SemaSummarizer::SummarizeFunctionBody(const FunctionDecl *FD) {
   FunctionSummary Summary(FD);
   CallCollector::CollectCalledFunctions(FD, Summary);
@@ -95,6 +105,9 @@ void SemaSummarizer::SummarizeFunctionBody(const FunctionDecl *FD) {
     if (const auto &InferredAttr = Attr->infer(FD))
       Summary.addAttribute(*InferredAttr);
   }
+
+  if(TheSummaryConsumer)
+    TheSummaryConsumer->ProcessFunctionSummary(Summary);
 }
 
 } // namespace clang
diff --git a/clang/lib/Sema/SummaryConsumer.cpp b/clang/lib/Sema/SummaryConsumer.cpp
new file mode 100644
index 0000000000000..45aeaee502ed9
--- /dev/null
+++ b/clang/lib/Sema/SummaryConsumer.cpp
@@ -0,0 +1,24 @@
+#include "clang/Sema/SummaryConsumer.h"
+#include "clang/Sema/SemaSummarizer.h"
+
+namespace clang {
+void JSONPrintingSummaryConsumer::ProcessFunctionSummary(const FunctionSummary &Summary) {
+  JOS.object([&]{
+    JOS.attribute("id", llvm::json::Value(Summary.getID()));
+    JOS.attributeObject("attrs", [&]{
+      JOS.attributeArray("function", [&]{
+        for(auto &&Attr : Summary.getFunctionAttrs()) {
+          JOS.value(llvm::json::Value(SummaryAttributeManager::serialize(Attr)));
+        }
+      });
+    });
+    JOS.attributeArray("calls", [&]{
+      for(auto &&Call : Summary.getCalls()) {
+        JOS.object([&]{
+          JOS.attribute("id", llvm::json::Value(Call));
+        });
+      }
+    });
+  });
+}
+} // namespace clang
\ No newline at end of file

>From 6fb640433209596d293316ed07dab58e014acf2a Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sun, 8 Jun 2025 22:26:29 +0200
Subject: [PATCH 06/48] [clang][Driver][Summary] add a flag to specify the
 directory to parse the summaries from

---
 clang/include/clang/Driver/Options.td         |  4 +++
 .../include/clang/Frontend/FrontendOptions.h  |  3 ++
 clang/lib/Driver/ToolChains/Clang.cpp         |  3 ++
 clang/lib/Frontend/FrontendAction.cpp         | 29 +++++++++++++++++++
 4 files changed, 39 insertions(+)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index f0c7b277e68e2..c66f098c459c2 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5941,6 +5941,10 @@ def save_temps : Flag<["-", "--"], "save-temps">, Flags<[NoXarchOption]>,
   Visibility<[ClangOption, FlangOption, FC1Option]>,
   Alias<save_temps_EQ>, AliasArgs<["cwd"]>,
   HelpText<"Alias for --save-temps=cwd">;
+def summaries_dir_EQ : Joined<["-", "--"], "summaries-dir=">, Flags<[NoXarchOption]>,
+  Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Read summaries about different functions from this directory">,
+  MarshallingInfoString<FrontendOpts<"SummaryDirPath">>;
 def emit_summaries_EQ : Joined<["-", "--"], "emit-summaries=">, Flags<[NoXarchOption]>,
   Visibility<[ClangOption, CC1Option]>,
   HelpText<"Save summaries about the different functions. <arg> can be set to 'cwd' for "
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index af2451c1bde8e..20c60f823d5f3 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -537,6 +537,9 @@ class FrontendOptions {
   /// Filename to write summaries about function definitions to.
   std::string SummaryFile;
 
+  /// The directory used to load summary files.
+  std::string SummaryDirPath;
+
 public:
   FrontendOptions()
       : DisableFree(false), RelocatablePCH(false), ShowHelp(false),
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 676a4e34ddb27..48b19615ab08f 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5470,6 +5470,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.getLastArg(options::OPT_save_temps_EQ))
     Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ);
 
+  if (Args.getLastArg(options::OPT_summaries_dir_EQ))
+    Args.AddLastArg(CmdArgs, options::OPT_summaries_dir_EQ);
+
   // FIXME: This needs to be cleaned up and needs proper error handling as well.
   if (const Arg *A = Args.getLastArg(options::OPT_emit_summaries_EQ)) {
     llvm::SmallString<10> input;
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 35b0c0373533a..9cbebbabd8529 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -46,7 +46,9 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
+#include <fstream>
 #include <memory>
+#include <sstream>
 #include <system_error>
 using namespace clang;
 
@@ -962,6 +964,33 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     }
   }
 
+  // FIXME: lookup dirs recursively
+  if (!CI.getFrontendOpts().SummaryDirPath.empty()) {
+    FileManager &FileMgr = CI.getFileManager();
+
+    StringRef SummaryDirPath = CI.getFrontendOpts().SummaryDirPath;
+    if (auto SummaryDir = FileMgr.getOptionalDirectoryRef(SummaryDirPath)) {
+      std::error_code EC;
+      SmallString<128> DirNative;
+      llvm::sys::path::native(SummaryDir->getName(), DirNative);
+
+      llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
+      for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC),
+                                         DirEnd;
+           Dir != DirEnd && !EC; Dir.increment(EC)) {
+        if (llvm::sys::path::extension(Dir->path()) == ".json") {
+          std::ifstream t(Dir->path().str());
+          std::stringstream buffer;
+          buffer << t.rdbuf();
+
+          auto JSON = llvm::json::parse(buffer.str());
+          if (JSON)
+            JSON->dump();
+        }
+      }
+    }
+  }
+
   // Set up the preprocessor if needed. When parsing model files the
   // preprocessor of the original source is reused.
   if (!isModelParsingAction())

>From a45d2c0cedab95d5b0db1dba11218f97e86f64b9 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Mon, 9 Jun 2025 23:45:47 +0200
Subject: [PATCH 07/48] [clang][Summary] implement parsing the summaries

---
 .../include/clang/Frontend/CompilerInstance.h |  14 +++
 clang/include/clang/Sema/Sema.h               |   2 +
 clang/include/clang/Sema/SemaSummarizer.h     |  29 ++++-
 clang/include/clang/Sema/SummaryAttribute.h   |  35 ++----
 clang/include/clang/Sema/SummaryConsumer.h    |  11 +-
 clang/lib/Frontend/CompilerInstance.cpp       |   9 +-
 clang/lib/Frontend/FrontendAction.cpp         |  23 ++--
 clang/lib/Sema/CMakeLists.txt                 |   1 +
 clang/lib/Sema/Sema.cpp                       |   4 +-
 clang/lib/Sema/SemaDecl.cpp                   |   2 +-
 clang/lib/Sema/SemaSummarizer.cpp             | 105 +++++++++++++++---
 clang/lib/Sema/SummaryAttribute.cpp           |  19 ++++
 clang/lib/Sema/SummaryConsumer.cpp            |  18 +--
 13 files changed, 191 insertions(+), 81 deletions(-)
 create mode 100644 clang/lib/Sema/SummaryAttribute.cpp

diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index f296e6d042a54..f8f13bbc998d9 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -57,6 +57,7 @@ class Module;
 class ModuleCache;
 class Preprocessor;
 class Sema;
+class SummaryManager;
 class SourceManager;
 class TargetInfo;
 enum class DisableValidationForModuleKind;
@@ -124,6 +125,9 @@ class CompilerInstance : public ModuleLoader {
 
   /// The summary consumer.
   std::unique_ptr<SummaryConsumer> TheSummaryConsumer;
+  
+  /// The summary manager object.
+  std::unique_ptr<SummaryManager> TheSummaryManager;
 
   /// The semantic analysis object.
   std::unique_ptr<Sema> TheSema;
@@ -520,6 +524,15 @@ class CompilerInstance : public ModuleLoader {
   /// setASTContext - Replace the current AST context.
   void setASTContext(ASTContext *Value);
 
+  bool hasSummaryManager() {
+    return TheSummaryManager != nullptr;
+  }
+
+  SummaryManager &getSummaryManager() {
+    assert(TheSummaryManager && "Compiler instance has no summary manager!");
+    return *TheSummaryManager;
+  }
+
   /// Replace the current Sema; the compiler instance takes ownership
   /// of S.
   void setSema(Sema *S);
@@ -753,6 +766,7 @@ class CompilerInstance : public ModuleLoader {
       const CodeCompleteOptions &Opts, raw_ostream &OS);
 
   void createSummaryConsumer();
+  void createSummaryManager();
 
   /// Create the Sema object to be used for parsing.
   void createSema(TranslationUnitKind TUKind,
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 9cb2158ab4ff1..b36fce47fb792 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -125,6 +125,7 @@ class CXXBasePath;
 class CXXBasePaths;
 class CXXFieldCollector;
 class CodeCompleteConsumer;
+class SummaryManager;
 class SummaryConsumer;
 enum class ComparisonCategoryType : unsigned char;
 class ConstraintSatisfaction;
@@ -886,6 +887,7 @@ class Sema final : public SemaBase {
   Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
        TranslationUnitKind TUKind = TU_Complete,
        CodeCompleteConsumer *CompletionConsumer = nullptr,
+       SummaryManager *SummaryManager = nullptr,
        SummaryConsumer *SummaryConsumer = nullptr);
   ~Sema();
 
diff --git a/clang/include/clang/Sema/SemaSummarizer.h b/clang/include/clang/Sema/SemaSummarizer.h
index ef40599d982a1..38e3565af0a95 100644
--- a/clang/include/clang/Sema/SemaSummarizer.h
+++ b/clang/include/clang/Sema/SemaSummarizer.h
@@ -13,6 +13,7 @@ class FunctionSummary {
   std::set<SmallVector<char>> Calls;
 
 public:
+  FunctionSummary(SmallVector<char> ID, std::set<SummaryAttribute> FunctionAttrs, std::set<SmallVector<char>> Calls);
   FunctionSummary(const clang::FunctionDecl *FD);
 
   SmallVector<char> getID() const { return ID; }
@@ -25,18 +26,36 @@ class FunctionSummary {
   void addCall(const clang::FunctionDecl *FD);
 };
 
-class SemaSummarizer : public SemaBase {
+class SummaryManager {
+  std::map<std::string, const FunctionSummary *> IDToSummary;
+  std::vector<std::unique_ptr<FunctionSummary>> FunctionSummaries;
+  
+  std::map<SummaryAttribute, const SummaryAttributeDescription *> AttrToDescription;
+  std::vector<std::unique_ptr<SummaryAttributeDescription>> AttributeDescriptions;
+
 public:
-  SemaSummarizer(Sema &S, SummaryConsumer *SummaryConsumer);
+  SummaryManager();
+
+  FunctionSummary SummarizeFunctionBody(const FunctionDecl *FD);
+  
+  void SerializeSummary(llvm::json::OStream &, const FunctionSummary &) const;
+  void ParseSummaryFromJSON(StringRef path);
+
+  void ReduceSummaries();
+};
 
-  std::vector<std::unique_ptr<SummaryAttributeManager>> Attributes;
+// FIXME: Is this class needed?
+class SemaSummarizer : public SemaBase {
+public:
+  SummaryManager *TheSummaryManager;
   SummaryConsumer *TheSummaryConsumer;
 
+  SemaSummarizer(Sema &S, SummaryManager &SummaryManager, SummaryConsumer *SummaryConsumer) 
+    : SemaBase(S), TheSummaryManager(&SummaryManager), TheSummaryConsumer(SummaryConsumer) {};
+
   void ActOnStartOfSourceFile();
   void ActOnEndOfSourceFile();
-  
   void SummarizeFunctionBody(const FunctionDecl *FD);
-
 };
 } // namespace clang
 
diff --git a/clang/include/clang/Sema/SummaryAttribute.h b/clang/include/clang/Sema/SummaryAttribute.h
index 2d2fd1e4ff623..66d1d11334ac5 100644
--- a/clang/include/clang/Sema/SummaryAttribute.h
+++ b/clang/include/clang/Sema/SummaryAttribute.h
@@ -11,44 +11,25 @@ enum SummaryAttribute {
 
 class FunctionSummary;
 
-class SummaryAttributeManager {
-  inline static std::unordered_map<SummaryAttribute, std::string> AttrToStr;
-
+class SummaryAttributeDescription {
 protected:
   const SummaryAttribute Attr;
-  const char *Str;
+  std::string_view Serialzed;
 
 public:
-  SummaryAttributeManager(SummaryAttribute Attr, const char *Str)
-      : Attr(Attr), Str(Str) {
-    assert(AttrToStr.count(Attr) == 0 && "attribute already registered");
-    for (auto &&[attr, str] : AttrToStr)
-      assert(str != Str && "attribute representation is already used");
+  SummaryAttributeDescription(SummaryAttribute Attr, const char *Str) : Attr(Attr), Serialzed(Str) {}
+  virtual ~SummaryAttributeDescription() = default;
 
-    AttrToStr[Attr] = Str;
-  }
-  virtual ~SummaryAttributeManager() = default;
+  SummaryAttribute getAttribute() { return Attr; };
 
   virtual bool predicate(const FunctionDecl *FD) = 0;
+  std::optional<SummaryAttribute> infer(const FunctionDecl *FD);
 
   // FIXME: This should receive all the parsed summaries as well.
   virtual bool merge(FunctionSummary &Summary) = 0;
 
-  // FIXME: bad design
-  static std::string serialize(SummaryAttribute Attr) { return AttrToStr[Attr]; };
-  virtual std::optional<SummaryAttribute> parse(std::string_view Input) const {
-    if (Str == Input)
-      return Attr;
-
-    return std::nullopt;
-  };
-
-  std::optional<SummaryAttribute> infer(const FunctionDecl *FD) {
-    if (predicate(FD))
-      return Attr;
-
-    return std::nullopt;
-  };
+  virtual std::string serialize();
+  virtual std::optional<SummaryAttribute> parse(std::string_view input);
 };
 } // namespace clang
 
diff --git a/clang/include/clang/Sema/SummaryConsumer.h b/clang/include/clang/Sema/SummaryConsumer.h
index c2698288ce721..85c48b3c1b939 100644
--- a/clang/include/clang/Sema/SummaryConsumer.h
+++ b/clang/include/clang/Sema/SummaryConsumer.h
@@ -5,9 +5,14 @@
 #include "llvm/Support/JSON.h"
 namespace clang {
 class FunctionSummary;
+class SummaryManager;
 
 class SummaryConsumer {
+protected:
+    const SummaryManager *TheSummaryManager;
+
 public:
+    SummaryConsumer(const SummaryManager &SummaryManager) : TheSummaryManager(&SummaryManager) {}
     virtual ~SummaryConsumer() = default;
 
     virtual void ProcessStartOfSourceFile() {};
@@ -17,15 +22,15 @@ class SummaryConsumer {
 
 class PrintingSummaryConsumer : public SummaryConsumer {
 public:
-    PrintingSummaryConsumer(raw_ostream &OS)
-      : SummaryConsumer() {}
+    PrintingSummaryConsumer(const SummaryManager &SummaryManager, raw_ostream &OS)
+      : SummaryConsumer(SummaryManager) {}
 };
 
 class JSONPrintingSummaryConsumer : public PrintingSummaryConsumer {
     llvm::json::OStream JOS;
 
 public:
-    JSONPrintingSummaryConsumer(raw_ostream &OS) : PrintingSummaryConsumer(OS), JOS(OS, 2) {}
+    JSONPrintingSummaryConsumer(const SummaryManager &SummaryManager, raw_ostream &OS) : PrintingSummaryConsumer(SummaryManager, OS), JOS(OS, 2) {}
 
     void ProcessStartOfSourceFile() override { JOS.arrayBegin(); };
     void ProcessFunctionSummary(const FunctionSummary&) override;
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index c90b8884f5731..deed94e1f0b42 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -37,6 +37,7 @@
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Sema.h"
+#include "clang/Sema/SemaSummarizer.h"
 #include "clang/Sema/SummaryConsumer.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
@@ -752,14 +753,18 @@ void CompilerInstance::createSummaryConsumer() {
   static llvm::raw_fd_ostream SummaryOS(SummaryFile, EC, llvm::sys::fs::CD_CreateAlways);
 
   if(!EC)
-    TheSummaryConsumer.reset(new JSONPrintingSummaryConsumer(SummaryOS));
+    TheSummaryConsumer.reset(new JSONPrintingSummaryConsumer(getSummaryManager(), SummaryOS));
+}
+
+void CompilerInstance::createSummaryManager() {
+  TheSummaryManager.reset(new SummaryManager());
 }
 
 void CompilerInstance::createSema(TranslationUnitKind TUKind,
                                   CodeCompleteConsumer *CompletionConsumer,
                                   SummaryConsumer *SummaryConsumer) {
   TheSema.reset(new Sema(getPreprocessor(), getASTContext(), getASTConsumer(),
-                         TUKind, CompletionConsumer, SummaryConsumer));
+                         TUKind, CompletionConsumer, hasSummaryManager() ? &getSummaryManager() : nullptr, SummaryConsumer));
 
   // Set up API notes.
   TheSema->APINotes.setSwiftVersion(getAPINotesOpts().SwiftVersion);
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 9cbebbabd8529..58aac1b8cd1b1 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -35,6 +35,7 @@
 #include "clang/Parse/ParseAST.h"
 #include "clang/Sema/HLSLExternalSemaSource.h"
 #include "clang/Sema/MultiplexExternalSemaSource.h"
+#include "clang/Sema/SemaSummarizer.h"
 #include "clang/Serialization/ASTDeserializationListener.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
@@ -46,9 +47,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
-#include <fstream>
 #include <memory>
-#include <sstream>
 #include <system_error>
 using namespace clang;
 
@@ -894,6 +893,10 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     }
   }
 
+  if(!CI.hasSummaryManager()) {
+    CI.createSummaryManager();
+  }
+
   // Set up embedding for any specified files. Do this before we load any
   // source files, including the primary module map for the compilation.
   for (const auto &F : CI.getFrontendOpts().ModulesEmbedFiles) {
@@ -978,16 +981,11 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
       for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC),
                                          DirEnd;
            Dir != DirEnd && !EC; Dir.increment(EC)) {
-        if (llvm::sys::path::extension(Dir->path()) == ".json") {
-          std::ifstream t(Dir->path().str());
-          std::stringstream buffer;
-          buffer << t.rdbuf();
-
-          auto JSON = llvm::json::parse(buffer.str());
-          if (JSON)
-            JSON->dump();
-        }
+        if (llvm::sys::path::extension(Dir->path()) == ".json")
+          CI.getSummaryManager().ParseSummaryFromJSON(Dir->path());
       }
+
+      CI.getSummaryManager().ReduceSummaries();
     }
   }
 
@@ -1362,6 +1360,9 @@ void ASTFrontendAction::ExecuteAction() {
   if (CI.hasCodeCompletionConsumer())
     CompletionConsumer = &CI.getCodeCompletionConsumer();
 
+  if(!CI.hasSummaryManager()) {
+    CI.createSummaryManager();
+  }
   CI.createSummaryConsumer();
 
   // Use a code completion consumer?
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index d6297016c015f..bf8f73dc985db 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -99,6 +99,7 @@ add_clang_library(clangSema
   SemaType.cpp
   SemaWasm.cpp
   SemaX86.cpp
+  SummaryAttribute.cpp
   SummaryConsumer.cpp
   TypeLocBuilder.cpp
 
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index ae9f8c00081b3..5b3f1a4dca36e 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -250,6 +250,7 @@ const uint64_t Sema::MaximumAlignment;
 
 Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
            TranslationUnitKind TUKind, CodeCompleteConsumer *CodeCompleter,
+           SummaryManager *SummaryManager,
            SummaryConsumer *SummaryConsumer)
     : SemaBase(*this), CollectStats(false), TUKind(TUKind),
       CurFPFeatures(pp.getLangOpts()), LangOpts(pp.getLangOpts()), PP(pp),
@@ -265,8 +266,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
       BPFPtr(std::make_unique<SemaBPF>(*this)),
       CodeCompletionPtr(
           std::make_unique<SemaCodeCompletion>(*this, CodeCompleter)),
-      SummarizerPtr(SummaryConsumer ? std::make_unique<SemaSummarizer>(
-                                          *this, SummaryConsumer)
+      SummarizerPtr(SummaryManager ? std::make_unique<SemaSummarizer>(*this, *SummaryManager, SummaryConsumer)
                                     : nullptr),
       CUDAPtr(std::make_unique<SemaCUDA>(*this)),
       DirectXPtr(std::make_unique<SemaDirectX>(*this)),
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index bb80fd2a46f58..45f1523868f75 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -16695,7 +16695,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
   if (FD && !FD->isDeleted())
     checkTypeSupport(FD->getType(), FD->getLocation(), FD);
 
-  if (FD && SummarizerPtr)
+  if (FD && SummarizerPtr && SummarizerPtr->TheSummaryConsumer)
     SummarizerPtr->SummarizeFunctionBody(FD);
 
   return dcl;
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
index 9fbb940ec3fbb..b4ad0d4a6db2f 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -2,7 +2,10 @@
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Index/USRGeneration.h"
 #include "clang/Sema/SummaryConsumer.h"
+#include "clang/Sema/SummaryAttribute.h"
 #include <set>
+#include <fstream>
+#include <sstream>
 
 namespace clang {
 namespace {
@@ -34,7 +37,7 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
   }
 };
 
-class NoWriteGlobalAttrManager : public SummaryAttributeManager {
+class NoWriteGlobalDescription : public SummaryAttributeDescription {
   class Callback : public ast_matchers::MatchFinder::MatchCallback {
   public:
     bool WriteGlobal = false;
@@ -50,8 +53,8 @@ class NoWriteGlobalAttrManager : public SummaryAttributeManager {
   };
 
 public:
-  NoWriteGlobalAttrManager()
-      : SummaryAttributeManager(NO_WRITE_GLOBAL, "no_write_global") {}
+  NoWriteGlobalDescription()
+      : SummaryAttributeDescription(NO_WRITE_GLOBAL, "no_write_global") {}
 
   bool predicate(const FunctionDecl *FD) override {
     using namespace ast_matchers;
@@ -78,13 +81,95 @@ void FunctionSummary::addCall(const clang::FunctionDecl *FD) {
   Calls.emplace(Call);
 }
 
+FunctionSummary::FunctionSummary(SmallVector<char> ID, std::set<SummaryAttribute> FunctionAttrs, std::set<SmallVector<char>> Calls) :
+  ID(std::move(ID)), FunctionAttrs(std::move(FunctionAttrs)), Calls(std::move(Calls)) {}
+
 FunctionSummary::FunctionSummary(const clang::FunctionDecl *FD) {
   index::generateUSRForDecl(FD, ID);
 }
 
-SemaSummarizer::SemaSummarizer(Sema &S, SummaryConsumer *SummaryConsumer)
-    : SemaBase(S), TheSummaryConsumer(SummaryConsumer) {
-  Attributes.emplace_back(std::make_unique<NoWriteGlobalAttrManager>());
+SummaryManager::SummaryManager() {
+  AttributeDescriptions.emplace_back(std::make_unique<NoWriteGlobalDescription>());
+
+  for(auto &&AttrDescr : AttributeDescriptions)
+    AttrToDescription[AttrDescr->getAttribute()] = AttrDescr.get();
+}
+
+FunctionSummary SummaryManager::SummarizeFunctionBody(const FunctionDecl *FD) {
+  auto Summary = std::make_unique<FunctionSummary>(FD);
+  CallCollector::CollectCalledFunctions(FD, *Summary);
+
+  for (auto &&AttrDesc : AttributeDescriptions) {
+    if (const auto &Attr = AttrDesc->infer(FD))
+      Summary->addAttribute(*Attr);
+  }
+
+  // FIXME: This is duplicated and hurts my eyes regardless
+  std::string key(Summary->getID().begin(), Summary->getID().size());
+  auto *SummaryPtr = FunctionSummaries.emplace_back(std::move(Summary)).get();
+  IDToSummary[key] = SummaryPtr;
+  return *SummaryPtr;
+}
+
+void SummaryManager::SerializeSummary(llvm::json::OStream &JOS, const FunctionSummary &Summary) const {
+  JOS.object([&]{
+    JOS.attribute("id", llvm::json::Value(Summary.getID()));
+    JOS.attributeObject("attrs", [&]{
+      JOS.attributeArray("function", [&]{
+        for(auto &&Attr : Summary.getFunctionAttrs()) {
+          JOS.value(llvm::json::Value(AttributeDescriptions[Attr]->serialize()));
+        }
+      });
+    });
+    JOS.attributeArray("calls", [&]{
+      for(auto &&Call : Summary.getCalls()) {
+        JOS.object([&]{
+          JOS.attribute("id", llvm::json::Value(Call));
+        });
+      }
+    });
+  });
+}
+
+void SummaryManager::ParseSummaryFromJSON(StringRef path) {
+  std::ifstream t(path.str());
+  std::stringstream buffer;
+  buffer << t.rdbuf();
+
+  auto JSON = llvm::json::parse(buffer.str());
+  if (!JSON)
+    return;
+
+  llvm::json::Array *Summaries = JSON->getAsArray();
+  for(auto it = Summaries->begin(); it != Summaries->end(); ++it) {
+    llvm::json::Object *Summary = it->getAsObject();
+
+    SmallString<128> ID(*Summary->getString("id"));
+    std::set<SummaryAttribute> FunctionAttrs;
+    llvm::json::Array *FunctionAttributes = Summary->getObject("attrs")->getArray("function");
+    for(auto attrIt = FunctionAttributes->begin(); attrIt != FunctionAttributes->end(); ++attrIt) {
+      for(auto &&AttrDesc : AttributeDescriptions) {
+        if(auto Attr = AttrDesc->parse(*attrIt->getAsString()))
+          FunctionAttrs.emplace(*Attr);
+      }
+    }
+
+    std::set<SmallVector<char>> Calls;
+    llvm::json::Array *CallEntries = Summary->getArray("calls");
+    for(auto callIt = CallEntries->begin(); callIt != CallEntries->end(); ++callIt) {
+      auto *Obj = callIt->getAsObject();
+      Calls.emplace(SmallString<128>(*Obj->getString("id")));
+    }
+    
+    std::string key = ID.str().str();
+    auto ParsedSummary = std::make_unique<FunctionSummary>(std::move(ID), std::move(FunctionAttrs), std::move(Calls));
+    auto *ParsedSummaryPtr = FunctionSummaries.emplace_back(std::move(ParsedSummary)).get();
+    IDToSummary[key] = ParsedSummaryPtr;
+  }
+}
+
+void SummaryManager::ReduceSummaries() {
+  // FIXME: implement
 }
 
 void SemaSummarizer::ActOnStartOfSourceFile() {
@@ -98,13 +183,7 @@ void SemaSummarizer::ActOnEndOfSourceFile() {
 }
 
 void SemaSummarizer::SummarizeFunctionBody(const FunctionDecl *FD) {
-  FunctionSummary Summary(FD);
-  CallCollector::CollectCalledFunctions(FD, Summary);
-
-  for (auto &&Attr : Attributes) {
-    if (const auto &InferredAttr = Attr->infer(FD))
-      Summary.addAttribute(*InferredAttr);
-  }
+  FunctionSummary Summary = TheSummaryManager->SummarizeFunctionBody(FD);
 
   if(TheSummaryConsumer)
     TheSummaryConsumer->ProcessFunctionSummary(Summary);
diff --git a/clang/lib/Sema/SummaryAttribute.cpp b/clang/lib/Sema/SummaryAttribute.cpp
new file mode 100644
index 0000000000000..4affec7b4ffb7
--- /dev/null
+++ b/clang/lib/Sema/SummaryAttribute.cpp
@@ -0,0 +1,19 @@
+#include "clang/Sema/SummaryAttribute.h"
+
+namespace clang {
+std::string SummaryAttributeDescription::serialize() { return std::string(Serialzed); }
+
+std::optional<SummaryAttribute> SummaryAttributeDescription::parse(std::string_view input) {
+  if(input == Serialzed)
+    return Attr;
+
+  return std::nullopt;
+}
+
+std::optional<SummaryAttribute> SummaryAttributeDescription::infer(const FunctionDecl *FD) {
+  if (predicate(FD))
+    return Attr;
+
+  return std::nullopt;
+}
+} // namespace clang
\ No newline at end of file
diff --git a/clang/lib/Sema/SummaryConsumer.cpp b/clang/lib/Sema/SummaryConsumer.cpp
index 45aeaee502ed9..37646c1e07a7f 100644
--- a/clang/lib/Sema/SummaryConsumer.cpp
+++ b/clang/lib/Sema/SummaryConsumer.cpp
@@ -3,22 +3,6 @@
 
 namespace clang {
 void JSONPrintingSummaryConsumer::ProcessFunctionSummary(const FunctionSummary &Summary) {
-  JOS.object([&]{
-    JOS.attribute("id", llvm::json::Value(Summary.getID()));
-    JOS.attributeObject("attrs", [&]{
-      JOS.attributeArray("function", [&]{
-        for(auto &&Attr : Summary.getFunctionAttrs()) {
-          JOS.value(llvm::json::Value(SummaryAttributeManager::serialize(Attr)));
-        }
-      });
-    });
-    JOS.attributeArray("calls", [&]{
-      for(auto &&Call : Summary.getCalls()) {
-        JOS.object([&]{
-          JOS.attribute("id", llvm::json::Value(Call));
-        });
-      }
-    });
-  });
+  TheSummaryManager->SerializeSummary(JOS, Summary);
 }
 } // namespace clang
\ No newline at end of file

>From e6b21071a455f1aeae544dec93cba08ccf985008 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Tue, 10 Jun 2025 23:30:34 +0200
Subject: [PATCH 08/48] [clang][Summary] implement reduction prototype

---
 clang/include/clang/Sema/SemaSummarizer.h   |  6 +++
 clang/include/clang/Sema/SummaryAttribute.h |  3 +-
 clang/lib/Frontend/FrontendAction.cpp       | 14 +++++++
 clang/lib/Sema/SemaSummarizer.cpp           | 42 ++++++++++++++++++++-
 4 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Sema/SemaSummarizer.h b/clang/include/clang/Sema/SemaSummarizer.h
index 38e3565af0a95..7d2f8252dc957 100644
--- a/clang/include/clang/Sema/SemaSummarizer.h
+++ b/clang/include/clang/Sema/SemaSummarizer.h
@@ -20,6 +20,7 @@ class FunctionSummary {
   const std::set<SummaryAttribute> &getFunctionAttrs() const { return FunctionAttrs; }
   const std::set<SmallVector<char>> &getCalls() const { return Calls; }
 
+  void clearAttributes() { FunctionAttrs.clear(); }
   void addAttribute(SummaryAttribute Attr) { FunctionAttrs.emplace(Attr); }
   bool hasAttribute(SummaryAttribute Attr) const { return FunctionAttrs.count(Attr); }
 
@@ -42,6 +43,11 @@ class SummaryManager {
   void ParseSummaryFromJSON(StringRef path);
 
   void ReduceSummaries();
+
+  // FIXME: debug only, remove later
+  const std::vector<std::unique_ptr<FunctionSummary>> &getSummaries() {
+    return FunctionSummaries;
+  }
 };
 
 // FIXME: Is this class needed?
diff --git a/clang/include/clang/Sema/SummaryAttribute.h b/clang/include/clang/Sema/SummaryAttribute.h
index 66d1d11334ac5..beb735c00933c 100644
--- a/clang/include/clang/Sema/SummaryAttribute.h
+++ b/clang/include/clang/Sema/SummaryAttribute.h
@@ -25,8 +25,7 @@ class SummaryAttributeDescription {
   virtual bool predicate(const FunctionDecl *FD) = 0;
   std::optional<SummaryAttribute> infer(const FunctionDecl *FD);
 
-  // FIXME: This should receive all the parsed summaries as well.
-  virtual bool merge(FunctionSummary &Summary) = 0;
+  virtual bool merge(const FunctionSummary &Summary) const = 0;
 
   virtual std::string serialize();
   virtual std::optional<SummaryAttribute> parse(std::string_view input);
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 58aac1b8cd1b1..7af4a79f83eec 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -985,7 +985,21 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
           CI.getSummaryManager().ParseSummaryFromJSON(Dir->path());
       }
 
+      // FIXME: debug only, remove later
+      for (auto &&S : CI.getSummaryManager().getSummaries()) {
+        llvm::json::OStream Out(llvm::errs());
+        CI.getSummaryManager().SerializeSummary(Out, *S);
+      }
+      llvm::errs() << '\n';
+
       CI.getSummaryManager().ReduceSummaries();
+
+      // FIXME: debug only, remove later
+      for (auto &&S : CI.getSummaryManager().getSummaries()) {
+        llvm::json::OStream Out(llvm::errs());
+        CI.getSummaryManager().SerializeSummary(Out, *S);
+      }
+      llvm::errs() << '\n';
     }
   }
 
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
index b4ad0d4a6db2f..569d272b36bb3 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -71,7 +71,9 @@ class NoWriteGlobalDescription : public SummaryAttributeDescription {
     return !CB.WriteGlobal;
   };
 
-  bool merge(FunctionSummary &Summary) override { return true; };
+  bool merge(const FunctionSummary &Summary) const override {
+    return Summary.getFunctionAttrs().count(Attr);
+  };
 };
 } // namespace
 
@@ -169,7 +171,43 @@ void SummaryManager::ParseSummaryFromJSON(StringRef path) {
 }
 
 void SummaryManager::ReduceSummaries() {
-  // FIXME: implement
+  bool changed = true;
+  while (changed) {
+    changed = false;
+
+    for (auto &&Function : FunctionSummaries) {
+      for (auto &&call : Function->getCalls()) {
+        // FIXME: This is duplicated and hurts my eyes regardless
+        std::string key(call.begin(), call.size());
+
+        // If we don't have a summary about a called function, we forget
+        // everything about the current one as well.
+        if (!IDToSummary.count(key)) {
+          changed = true;
+          Function->clearAttributes();
+          break;
+        }
+
+        const FunctionSummary *callSummary = IDToSummary.at(key);
+
+        std::set<SummaryAttribute> reducedAttrs;
+        for (auto &&attr : Function->getFunctionAttrs()) {
+          // FIXME: handle union style attributes...
+          if (AttrToDescription[attr]->merge(*callSummary))
+            reducedAttrs.emplace(attr);
+        }
+
+        if (reducedAttrs != Function->getFunctionAttrs()) {
+          Function->clearAttributes();
+
+          for (auto &&attr : reducedAttrs)
+            Function->addAttribute(attr);
+
+          changed = true;
+        }
+      }
+    }
+  }
 }
 
 void SemaSummarizer::ActOnStartOfSourceFile() {

>From da80bdbf6e7749fc9e81ec6bfdeb211b999d637d Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Thu, 12 Jun 2025 00:31:33 +0200
Subject: [PATCH 09/48] [clang][Summary] make `SummaryManager` own the
 attributes

---
 clang/include/clang/Sema/SemaSummarizer.h   |  22 +--
 clang/include/clang/Sema/SummaryAttribute.h |  38 +++--
 clang/lib/Sema/SemaSummarizer.cpp           | 168 ++++++++------------
 clang/lib/Sema/SummaryAttribute.cpp         |  33 ++--
 4 files changed, 130 insertions(+), 131 deletions(-)

diff --git a/clang/include/clang/Sema/SemaSummarizer.h b/clang/include/clang/Sema/SemaSummarizer.h
index 7d2f8252dc957..42f668009d2f2 100644
--- a/clang/include/clang/Sema/SemaSummarizer.h
+++ b/clang/include/clang/Sema/SemaSummarizer.h
@@ -9,35 +9,37 @@
 namespace clang {
 class FunctionSummary {
   SmallVector<char> ID;
-  std::set<SummaryAttribute> FunctionAttrs;
+  std::set<const SummaryAttribute *> FunctionAttrs;
   std::set<SmallVector<char>> Calls;
 
 public:
-  FunctionSummary(SmallVector<char> ID, std::set<SummaryAttribute> FunctionAttrs, std::set<SmallVector<char>> Calls);
+  FunctionSummary(SmallVector<char> ID, std::set<const SummaryAttribute *> FunctionAttrs, std::set<SmallVector<char>> Calls);
   FunctionSummary(const clang::FunctionDecl *FD);
 
   SmallVector<char> getID() const { return ID; }
-  const std::set<SummaryAttribute> &getFunctionAttrs() const { return FunctionAttrs; }
+  const std::set<const SummaryAttribute *> &getFunctionAttrs() const { return FunctionAttrs; }
   const std::set<SmallVector<char>> &getCalls() const { return Calls; }
 
-  void clearAttributes() { FunctionAttrs.clear(); }
-  void addAttribute(SummaryAttribute Attr) { FunctionAttrs.emplace(Attr); }
-  bool hasAttribute(SummaryAttribute Attr) const { return FunctionAttrs.count(Attr); }
+  void replaceAttributes(std::set<const SummaryAttribute *> Attrs) { FunctionAttrs = std::move(Attrs); }
+  void addAttribute(const SummaryAttribute * Attr) { FunctionAttrs.emplace(Attr); }
 
   void addCall(const clang::FunctionDecl *FD);
 };
 
 class SummaryManager {
-  std::map<std::string, const FunctionSummary *> IDToSummary;
+  std::map<SmallVector<char>, const FunctionSummary *> IDToSummary;
   std::vector<std::unique_ptr<FunctionSummary>> FunctionSummaries;
   
-  std::map<SummaryAttribute, const SummaryAttributeDescription *> AttrToDescription;
-  std::vector<std::unique_ptr<SummaryAttributeDescription>> AttributeDescriptions;
+  std::map<SummaryAttributeKind, const SummaryAttribute *> KindToAttribute;
+  std::vector<std::unique_ptr<SummaryAttribute>> Attributes;
 
+  void SaveSummary(std::unique_ptr<FunctionSummary> Summary);
+  bool ReduceFunctionSummary(FunctionSummary &FunctionSummary);
 public:
   SummaryManager();
 
-  FunctionSummary SummarizeFunctionBody(const FunctionDecl *FD);
+  const FunctionSummary *GetSummary(const FunctionDecl *FD) const;
+  void SummarizeFunctionBody(const FunctionDecl *FD);
   
   void SerializeSummary(llvm::json::OStream &, const FunctionSummary &) const;
   void ParseSummaryFromJSON(StringRef path);
diff --git a/clang/include/clang/Sema/SummaryAttribute.h b/clang/include/clang/Sema/SummaryAttribute.h
index beb735c00933c..bfe7ecf84e795 100644
--- a/clang/include/clang/Sema/SummaryAttribute.h
+++ b/clang/include/clang/Sema/SummaryAttribute.h
@@ -2,33 +2,47 @@
 #define LLVM_CLANG_SEMA_SEMASUMMARYATTRIBUTE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
 #include <string>
 
 namespace clang {
-enum SummaryAttribute {
+enum SummaryAttributeKind {
   NO_WRITE_GLOBAL,
 };
 
 class FunctionSummary;
 
-class SummaryAttributeDescription {
-protected:
-  const SummaryAttribute Attr;
+class SummaryAttribute {
+  const SummaryAttributeKind Kind;
   std::string_view Serialzed;
 
 public:
-  SummaryAttributeDescription(SummaryAttribute Attr, const char *Str) : Attr(Attr), Serialzed(Str) {}
-  virtual ~SummaryAttributeDescription() = default;
+  SummaryAttribute(SummaryAttributeKind Attr, const char *Str) : Kind(Attr), Serialzed(Str) {}
+  virtual ~SummaryAttribute() = default;
+  
+  SummaryAttributeKind getKind() { return Kind; }
 
-  SummaryAttribute getAttribute() { return Attr; };
+  virtual bool infer(const FunctionDecl *FD) const = 0;
+  virtual bool merge(const FunctionSummary &Summary) const = 0;
 
-  virtual bool predicate(const FunctionDecl *FD) = 0;
-  std::optional<SummaryAttribute> infer(const FunctionDecl *FD);
+  virtual std::string serialize() const { return std::string(Serialzed); };
+  virtual bool parse(std::string_view input) const { return input == Serialzed; };
+};
 
-  virtual bool merge(const FunctionSummary &Summary) const = 0;
+class NoWriteGlobalDescription : public SummaryAttribute {
+  class Callback : public ast_matchers::MatchFinder::MatchCallback {
+  public:
+    bool WriteGlobal = false;
 
-  virtual std::string serialize();
-  virtual std::optional<SummaryAttribute> parse(std::string_view input);
+    void run(const ast_matchers::MatchFinder::MatchResult &Result) override final;
+  };
+
+public:
+  NoWriteGlobalDescription()
+  : SummaryAttribute(NO_WRITE_GLOBAL, "no_write_global") {}
+  
+  bool infer(const FunctionDecl *FD) const override final;
+  bool merge(const FunctionSummary &Summary) const override final;
 };
 } // namespace clang
 
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
index 569d272b36bb3..97b2d50bd7602 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -37,80 +37,54 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
   }
 };
 
-class NoWriteGlobalDescription : public SummaryAttributeDescription {
-  class Callback : public ast_matchers::MatchFinder::MatchCallback {
-  public:
-    bool WriteGlobal = false;
-
-    void run(const ast_matchers::MatchFinder::MatchResult &Result) override {
-      const auto *Assignment =
-          Result.Nodes.getNodeAs<BinaryOperator>("assignment");
-      if (!Assignment)
-        return;
-
-      WriteGlobal = true;
-    };
-  };
-
-public:
-  NoWriteGlobalDescription()
-      : SummaryAttributeDescription(NO_WRITE_GLOBAL, "no_write_global") {}
-
-  bool predicate(const FunctionDecl *FD) override {
-    using namespace ast_matchers;
-    MatchFinder Finder;
-    Callback CB;
-
-    Finder.addMatcher(
-        functionDecl(forEachDescendant(
-            binaryOperator(isAssignmentOperator(),
-                           hasLHS(declRefExpr(to(varDecl(hasGlobalStorage())))))
-                .bind("assignment"))),
-        &CB);
-    Finder.match(*FD, FD->getASTContext());
-    return !CB.WriteGlobal;
-  };
-
-  bool merge(const FunctionSummary &Summary) const override {
-    return Summary.getFunctionAttrs().count(Attr);
-  };
-};
+SmallVector<char> GetUSR(const FunctionDecl *FD) {
+  SmallVector<char> USR;
+  index::generateUSRForDecl(FD, USR);
+  return USR;
+}
 } // namespace
 
 void FunctionSummary::addCall(const clang::FunctionDecl *FD) {
-  SmallVector<char> Call;
-  index::generateUSRForDecl(FD, Call);
-  Calls.emplace(Call);
+  Calls.emplace(GetUSR(FD));
 }
 
-FunctionSummary::FunctionSummary(SmallVector<char> ID, std::set<SummaryAttribute> FunctionAttrs, std::set<SmallVector<char>> Calls) :
+FunctionSummary::FunctionSummary(SmallVector<char> ID, std::set<const SummaryAttribute *> FunctionAttrs, std::set<SmallVector<char>> Calls) :
   ID(std::move(ID)), FunctionAttrs(std::move(FunctionAttrs)), Calls(std::move(Calls)) {}
 
-FunctionSummary::FunctionSummary(const clang::FunctionDecl *FD) {
-  index::generateUSRForDecl(FD, ID);
-}
+FunctionSummary::FunctionSummary(const clang::FunctionDecl *FD) : ID(GetUSR(FD)) {}
 
 SummaryManager::SummaryManager() {
-  AttributeDescriptions.emplace_back(std::make_unique<NoWriteGlobalDescription>());
+  Attributes.emplace_back(std::make_unique<NoWriteGlobalDescription>());
+
+  for(auto &&Attr : Attributes) {
+    assert(KindToAttribute.count(Attr->getKind()) == 0 && "Attr already registered");
+    KindToAttribute[Attr->getKind()] = Attr.get();
+  }
+}
 
-  for(auto &&AttrDescr : AttributeDescriptions)
-    AttrToDescription[AttrDescr->getAttribute()] = AttrDescr.get();
+void SummaryManager::SaveSummary(std::unique_ptr<FunctionSummary> Summary) {
+  auto *SummaryPtr = FunctionSummaries.emplace_back(std::move(Summary)).get();
+  IDToSummary[SummaryPtr->getID()] = SummaryPtr;
 }
 
-FunctionSummary SummaryManager::SummarizeFunctionBody(const FunctionDecl *FD) {
+const FunctionSummary *SummaryManager::GetSummary(const FunctionDecl *FD) const { 
+  auto USR = GetUSR(FD);
+  if(!IDToSummary.count(USR))
+    return nullptr;
+
+  return IDToSummary.at(USR);
+}
+
+void SummaryManager::SummarizeFunctionBody(const FunctionDecl *FD) {
   auto Summary = std::make_unique<FunctionSummary>(FD);
   CallCollector::CollectCalledFunctions(FD, *Summary);
 
-  for (auto &&AttrDesc : AttributeDescriptions) {
-    if (const auto &Attr = AttrDesc->infer(FD))
-      Summary->addAttribute(*Attr);
+  for (auto &&Attr : Attributes) {
+    if (Attr->infer(FD))
+      Summary->addAttribute(Attr.get());
   }
 
-  // FIXME: This is duplicated and hurts my eyes regardless
-  std::string key(Summary->getID().begin(), Summary->getID().size());
-  auto *SummaryPtr = FunctionSummaries.emplace_back(std::move(Summary)).get();
-  IDToSummary[key] = SummaryPtr;
-  return *SummaryPtr;
+  SaveSummary(std::move(Summary));
 }
 
 void SummaryManager::SerializeSummary(llvm::json::OStream &JOS, const FunctionSummary &Summary) const {
@@ -119,7 +93,7 @@ void SummaryManager::SerializeSummary(llvm::json::OStream &JOS, const FunctionSu
     JOS.attributeObject("attrs", [&]{
       JOS.attributeArray("function", [&]{
         for(auto &&Attr : Summary.getFunctionAttrs()) {
-          JOS.value(llvm::json::Value(AttributeDescriptions[Attr]->serialize()));
+          JOS.value(llvm::json::Value(Attr->serialize()));
         }
       });
     });
@@ -147,12 +121,12 @@ void SummaryManager::ParseSummaryFromJSON(StringRef path) {
     llvm::json::Object *Summary = it->getAsObject();
 
     SmallString<128> ID(*Summary->getString("id"));
-    std::set<SummaryAttribute> FunctionAttrs;
+    std::set<const SummaryAttribute *> FunctionAttrs;
     llvm::json::Array *FunctionAttributes = Summary->getObject("attrs")->getArray("function");
     for(auto attrIt = FunctionAttributes->begin(); attrIt != FunctionAttributes->end(); ++attrIt) {
-      for(auto &&AttrDesc : AttributeDescriptions) {
-        if(auto Attr = AttrDesc->parse(*attrIt->getAsString()))
-          FunctionAttrs.emplace(*Attr);
+      for(auto &&Attr : Attributes) {
+        if(Attr->parse(*attrIt->getAsString()))
+          FunctionAttrs.emplace(Attr.get());
       }
     }
 
@@ -163,51 +137,47 @@ void SummaryManager::ParseSummaryFromJSON(StringRef path) {
       Calls.emplace(SmallString<128>(*Obj->getString("id")));
     }
     
-    std::string key = ID.str().str();
-    auto ParsedSummary = std::make_unique<FunctionSummary>(std::move(ID), std::move(FunctionAttrs), std::move(Calls));
-    auto *ParsedSummaryPtr = FunctionSummaries.emplace_back(std::move(ParsedSummary)).get();
-    IDToSummary[key] = ParsedSummaryPtr;
+    SaveSummary(std::make_unique<FunctionSummary>(std::move(ID), std::move(FunctionAttrs), std::move(Calls)));
   }
 }
 
-void SummaryManager::ReduceSummaries() {
-  bool changed = true;
-  while (changed) {
-    changed = false;
-
-    for (auto &&Function : FunctionSummaries) {
-      for (auto &&call : Function->getCalls()) {
-        // FIXME: This is duplicated and hurts my eyes regardless
-        std::string key(call.begin(), call.size());
-
-        // If we don't have a summary about a called function, we forget
-        // everything about the current one as well.
-        if (!IDToSummary.count(key)) {
-          changed = true;
-          Function->clearAttributes();
-          break;
-        }
+bool SummaryManager::ReduceFunctionSummary(FunctionSummary &Function) {
+  bool changed = false;
 
-        const FunctionSummary *callSummary = IDToSummary.at(key);
+  for (auto &&call : Function.getCalls()) {
+    std::set<const SummaryAttribute *> reducedAttrs;
 
-        std::set<SummaryAttribute> reducedAttrs;
-        for (auto &&attr : Function->getFunctionAttrs()) {
-          // FIXME: handle union style attributes...
-          if (AttrToDescription[attr]->merge(*callSummary))
-            reducedAttrs.emplace(attr);
-        }
+    // If we don't have a summary about a called function, we forget
+    // everything about the current one as well.
+    if (!IDToSummary.count(call)) {
+      Function.replaceAttributes(std::move(reducedAttrs));
+      return true;
+    }
 
-        if (reducedAttrs != Function->getFunctionAttrs()) {
-          Function->clearAttributes();
+    const FunctionSummary *callSummary = IDToSummary[call];
 
-          for (auto &&attr : reducedAttrs)
-            Function->addAttribute(attr);
+    for (auto &&Attr : Function.getFunctionAttrs()) {
+      if (Attr->merge(*callSummary))
+        reducedAttrs.emplace(Attr);
+    }
 
-          changed = true;
-        }
-      }
+    if (reducedAttrs != Function.getFunctionAttrs()) {
+      Function.replaceAttributes(std::move(reducedAttrs));
+      changed = true;
     }
   }
+
+  return changed;
+}
+
+void SummaryManager::ReduceSummaries() {
+  bool changed = true;
+  while (changed) {
+    changed = false;
+
+    for (auto &&Function : FunctionSummaries)
+      changed |= ReduceFunctionSummary(*Function);
+  }
 }
 
 void SemaSummarizer::ActOnStartOfSourceFile() {
@@ -221,10 +191,10 @@ void SemaSummarizer::ActOnEndOfSourceFile() {
 }
 
 void SemaSummarizer::SummarizeFunctionBody(const FunctionDecl *FD) {
-  FunctionSummary Summary = TheSummaryManager->SummarizeFunctionBody(FD);
+  TheSummaryManager->SummarizeFunctionBody(FD);
 
   if(TheSummaryConsumer)
-    TheSummaryConsumer->ProcessFunctionSummary(Summary);
+    TheSummaryConsumer->ProcessFunctionSummary(*TheSummaryManager->GetSummary(FD));
 }
 
 } // namespace clang
diff --git a/clang/lib/Sema/SummaryAttribute.cpp b/clang/lib/Sema/SummaryAttribute.cpp
index 4affec7b4ffb7..953cc38b2e7a1 100644
--- a/clang/lib/Sema/SummaryAttribute.cpp
+++ b/clang/lib/Sema/SummaryAttribute.cpp
@@ -1,19 +1,32 @@
 #include "clang/Sema/SummaryAttribute.h"
+#include "clang/Sema/SemaSummarizer.h"
 
 namespace clang {
-std::string SummaryAttributeDescription::serialize() { return std::string(Serialzed); }
+void NoWriteGlobalDescription::Callback::run(const ast_matchers::MatchFinder::MatchResult &Result) {
+  const auto *Assignment =
+      Result.Nodes.getNodeAs<BinaryOperator>("assignment");
+  if (!Assignment)
+    return;
 
-std::optional<SummaryAttribute> SummaryAttributeDescription::parse(std::string_view input) {
-  if(input == Serialzed)
-    return Attr;
-
-  return std::nullopt;
+  WriteGlobal = true;
 }
 
-std::optional<SummaryAttribute> SummaryAttributeDescription::infer(const FunctionDecl *FD) {
-  if (predicate(FD))
-    return Attr;
+bool NoWriteGlobalDescription::infer(const FunctionDecl *FD) const {
+  using namespace ast_matchers;
+  MatchFinder Finder;
+  Callback CB;
+
+  Finder.addMatcher(
+      functionDecl(forEachDescendant(
+          binaryOperator(isAssignmentOperator(),
+                         hasLHS(declRefExpr(to(varDecl(hasGlobalStorage())))))
+              .bind("assignment"))),
+      &CB);
+  Finder.match(*FD, FD->getASTContext());
+  return !CB.WriteGlobal;
+}
 
-  return std::nullopt;
+bool NoWriteGlobalDescription::merge(const FunctionSummary &Summary) const {
+  return Summary.getFunctionAttrs().count(this);
 }
 } // namespace clang
\ No newline at end of file

>From 4964c96b88d0a1a1b6b251a0489b2e82ceb60ebe Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Thu, 12 Jun 2025 01:04:13 +0200
Subject: [PATCH 10/48] [clang][Summary] only keep one constructor for
 `FunctionSummary`

---
 clang/include/clang/Sema/SemaSummarizer.h   | 27 ++++---
 clang/include/clang/Sema/SummaryAttribute.h | 16 ++--
 clang/lib/Sema/SemaSummarizer.cpp           | 87 ++++++++++-----------
 clang/lib/Sema/SummaryAttribute.cpp         |  8 +-
 4 files changed, 73 insertions(+), 65 deletions(-)

diff --git a/clang/include/clang/Sema/SemaSummarizer.h b/clang/include/clang/Sema/SemaSummarizer.h
index 42f668009d2f2..7fea668bae694 100644
--- a/clang/include/clang/Sema/SemaSummarizer.h
+++ b/clang/include/clang/Sema/SemaSummarizer.h
@@ -9,38 +9,45 @@
 namespace clang {
 class FunctionSummary {
   SmallVector<char> ID;
-  std::set<const SummaryAttribute *> FunctionAttrs;
+  std::set<const SummaryAttribute *> Attrs;
   std::set<SmallVector<char>> Calls;
 
 public:
-  FunctionSummary(SmallVector<char> ID, std::set<const SummaryAttribute *> FunctionAttrs, std::set<SmallVector<char>> Calls);
-  FunctionSummary(const clang::FunctionDecl *FD);
+  FunctionSummary(SmallVector<char> ID,
+                  std::set<const SummaryAttribute *> Attrs,
+                  std::set<SmallVector<char>> Calls);
 
   SmallVector<char> getID() const { return ID; }
-  const std::set<const SummaryAttribute *> &getFunctionAttrs() const { return FunctionAttrs; }
+  const std::set<const SummaryAttribute *> &getAttributes() const {
+    return Attrs;
+  }
   const std::set<SmallVector<char>> &getCalls() const { return Calls; }
 
-  void replaceAttributes(std::set<const SummaryAttribute *> Attrs) { FunctionAttrs = std::move(Attrs); }
-  void addAttribute(const SummaryAttribute * Attr) { FunctionAttrs.emplace(Attr); }
+  void replaceAttributes(std::set<const SummaryAttribute *> Attrs) {
+    this->Attrs = std::move(Attrs);
+  }
 
-  void addCall(const clang::FunctionDecl *FD);
+  friend class SummaryManager;
 };
 
 class SummaryManager {
   std::map<SmallVector<char>, const FunctionSummary *> IDToSummary;
   std::vector<std::unique_ptr<FunctionSummary>> FunctionSummaries;
-  
+
   std::map<SummaryAttributeKind, const SummaryAttribute *> KindToAttribute;
   std::vector<std::unique_ptr<SummaryAttribute>> Attributes;
 
-  void SaveSummary(std::unique_ptr<FunctionSummary> Summary);
+  void CreateSummary(SmallVector<char> ID,
+                     std::set<const SummaryAttribute *> Attrs,
+                     std::set<SmallVector<char>> Calls);
   bool ReduceFunctionSummary(FunctionSummary &FunctionSummary);
+
 public:
   SummaryManager();
 
   const FunctionSummary *GetSummary(const FunctionDecl *FD) const;
   void SummarizeFunctionBody(const FunctionDecl *FD);
-  
+
   void SerializeSummary(llvm::json::OStream &, const FunctionSummary &) const;
   void ParseSummaryFromJSON(StringRef path);
 
diff --git a/clang/include/clang/Sema/SummaryAttribute.h b/clang/include/clang/Sema/SummaryAttribute.h
index bfe7ecf84e795..22067da3a4b81 100644
--- a/clang/include/clang/Sema/SummaryAttribute.h
+++ b/clang/include/clang/Sema/SummaryAttribute.h
@@ -17,16 +17,19 @@ class SummaryAttribute {
   std::string_view Serialzed;
 
 public:
-  SummaryAttribute(SummaryAttributeKind Attr, const char *Str) : Kind(Attr), Serialzed(Str) {}
+  SummaryAttribute(SummaryAttributeKind Attr, const char *Str)
+      : Kind(Attr), Serialzed(Str) {}
   virtual ~SummaryAttribute() = default;
-  
+
   SummaryAttributeKind getKind() { return Kind; }
 
   virtual bool infer(const FunctionDecl *FD) const = 0;
   virtual bool merge(const FunctionSummary &Summary) const = 0;
 
   virtual std::string serialize() const { return std::string(Serialzed); };
-  virtual bool parse(std::string_view input) const { return input == Serialzed; };
+  virtual bool parse(std::string_view input) const {
+    return input == Serialzed;
+  };
 };
 
 class NoWriteGlobalDescription : public SummaryAttribute {
@@ -34,13 +37,14 @@ class NoWriteGlobalDescription : public SummaryAttribute {
   public:
     bool WriteGlobal = false;
 
-    void run(const ast_matchers::MatchFinder::MatchResult &Result) override final;
+    void
+    run(const ast_matchers::MatchFinder::MatchResult &Result) override final;
   };
 
 public:
   NoWriteGlobalDescription()
-  : SummaryAttribute(NO_WRITE_GLOBAL, "no_write_global") {}
-  
+      : SummaryAttribute(NO_WRITE_GLOBAL, "no_write_global") {}
+
   bool infer(const FunctionDecl *FD) const override final;
   bool merge(const FunctionSummary &Summary) const override final;
 };
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
index 97b2d50bd7602..811c95028abb0 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -9,10 +9,14 @@
 
 namespace clang {
 namespace {
-class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
-  FunctionSummary *Summary;
+SmallVector<char> GetUSR(const FunctionDecl *FD) {
+  SmallVector<char> USR;
+  index::generateUSRForDecl(FD, USR);
+  return USR;
+}
 
-  CallCollector(FunctionSummary &Summary) : Summary(&Summary) {}
+class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
+  std::set<SmallVector<char>> Calls;
 
   virtual void
   run(const ast_matchers::MatchFinder::MatchResult &Result) override {
@@ -21,78 +25,70 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
       return;
 
     const auto *Callee = llvm::dyn_cast<FunctionDecl>(Call->getCalleeDecl());
-    Summary->addCall(Callee);
+    Calls.emplace(GetUSR(Callee));
   }
 
 public:
-  static void CollectCalledFunctions(const FunctionDecl *FD,
-                                     FunctionSummary &Summary) {
+  std::set<SmallVector<char>> collect(const FunctionDecl *FD) {
     using namespace ast_matchers;
     MatchFinder Finder;
-    CallCollector CC(Summary);
 
     Finder.addMatcher(functionDecl(forEachDescendant(callExpr().bind("call"))),
-                      &CC);
+                      this);
     Finder.match(*FD, FD->getASTContext());
+
+    return Calls;
   }
 };
-
-SmallVector<char> GetUSR(const FunctionDecl *FD) {
-  SmallVector<char> USR;
-  index::generateUSRForDecl(FD, USR);
-  return USR;
-}
 } // namespace
 
-void FunctionSummary::addCall(const clang::FunctionDecl *FD) {
-  Calls.emplace(GetUSR(FD));
-}
-
-FunctionSummary::FunctionSummary(SmallVector<char> ID, std::set<const SummaryAttribute *> FunctionAttrs, std::set<SmallVector<char>> Calls) :
-  ID(std::move(ID)), FunctionAttrs(std::move(FunctionAttrs)), Calls(std::move(Calls)) {}
-
-FunctionSummary::FunctionSummary(const clang::FunctionDecl *FD) : ID(GetUSR(FD)) {}
+FunctionSummary::FunctionSummary(
+    SmallVector<char> ID, std::set<const SummaryAttribute *> FunctionAttrs,
+    std::set<SmallVector<char>> Calls)
+    : ID(std::move(ID)), Attrs(std::move(FunctionAttrs)),
+      Calls(std::move(Calls)) {}
 
 SummaryManager::SummaryManager() {
   Attributes.emplace_back(std::make_unique<NoWriteGlobalDescription>());
 
-  for(auto &&Attr : Attributes) {
-    assert(KindToAttribute.count(Attr->getKind()) == 0 && "Attr already registered");
+  for (auto &&Attr : Attributes) {
+    assert(KindToAttribute.count(Attr->getKind()) == 0 &&
+           "Attr already registered");
     KindToAttribute[Attr->getKind()] = Attr.get();
   }
 }
 
-void SummaryManager::SaveSummary(std::unique_ptr<FunctionSummary> Summary) {
+void SummaryManager::CreateSummary(SmallVector<char> ID,
+                                   std::set<const SummaryAttribute *> Attrs,
+                                   std::set<SmallVector<char>> Calls) {
+  auto Summary = std::make_unique<FunctionSummary>(
+      std::move(ID), std::move(Attrs), std::move(Calls));
   auto *SummaryPtr = FunctionSummaries.emplace_back(std::move(Summary)).get();
   IDToSummary[SummaryPtr->getID()] = SummaryPtr;
 }
 
-const FunctionSummary *SummaryManager::GetSummary(const FunctionDecl *FD) const { 
+const FunctionSummary *
+SummaryManager::GetSummary(const FunctionDecl *FD) const {
   auto USR = GetUSR(FD);
-  if(!IDToSummary.count(USR))
-    return nullptr;
-
-  return IDToSummary.at(USR);
+  return IDToSummary.count(USR) ? IDToSummary.at(USR) : nullptr;
 }
 
 void SummaryManager::SummarizeFunctionBody(const FunctionDecl *FD) {
-  auto Summary = std::make_unique<FunctionSummary>(FD);
-  CallCollector::CollectCalledFunctions(FD, *Summary);
-
+  std::set<const SummaryAttribute *> Attrs;
   for (auto &&Attr : Attributes) {
     if (Attr->infer(FD))
-      Summary->addAttribute(Attr.get());
+      Attrs.emplace(Attr.get());
   }
 
-  SaveSummary(std::move(Summary));
+  CreateSummary(GetUSR(FD), std::move(Attrs), CallCollector().collect(FD));
 }
 
 void SummaryManager::SerializeSummary(llvm::json::OStream &JOS, const FunctionSummary &Summary) const {
   JOS.object([&]{
     JOS.attribute("id", llvm::json::Value(Summary.getID()));
-    JOS.attributeObject("attrs", [&]{
-      JOS.attributeArray("function", [&]{
-        for(auto &&Attr : Summary.getFunctionAttrs()) {
+    JOS.attributeObject("attrs", [&] {
+      JOS.attributeArray("function", [&] {
+        for (auto &&Attr : Summary.getAttributes()) {
           JOS.value(llvm::json::Value(Attr->serialize()));
         }
       });
@@ -124,8 +120,8 @@ void SummaryManager::ParseSummaryFromJSON(StringRef path) {
     std::set<const SummaryAttribute *> FunctionAttrs;
     llvm::json::Array *FunctionAttributes = Summary->getObject("attrs")->getArray("function");
     for(auto attrIt = FunctionAttributes->begin(); attrIt != FunctionAttributes->end(); ++attrIt) {
-      for(auto &&Attr : Attributes) {
-        if(Attr->parse(*attrIt->getAsString()))
+      for (auto &&Attr : Attributes) {
+        if (Attr->parse(*attrIt->getAsString()))
           FunctionAttrs.emplace(Attr.get());
       }
     }
@@ -136,8 +132,8 @@ void SummaryManager::ParseSummaryFromJSON(StringRef path) {
       auto *Obj = callIt->getAsObject();
       Calls.emplace(SmallString<128>(*Obj->getString("id")));
     }
-    
-    SaveSummary(std::make_unique<FunctionSummary>(std::move(ID), std::move(FunctionAttrs), std::move(Calls)));
+
+    CreateSummary(std::move(ID), std::move(FunctionAttrs), std::move(Calls));
   }
 }
 
@@ -156,12 +152,12 @@ bool SummaryManager::ReduceFunctionSummary(FunctionSummary &Function) {
 
     const FunctionSummary *callSummary = IDToSummary[call];
 
-    for (auto &&Attr : Function.getFunctionAttrs()) {
+    for (auto &&Attr : Function.getAttributes()) {
       if (Attr->merge(*callSummary))
         reducedAttrs.emplace(Attr);
     }
 
-    if (reducedAttrs != Function.getFunctionAttrs()) {
+    if (reducedAttrs != Function.getAttributes()) {
       Function.replaceAttributes(std::move(reducedAttrs));
       changed = true;
     }
@@ -194,7 +190,8 @@ void SemaSummarizer::SummarizeFunctionBody(const FunctionDecl *FD) {
   TheSummaryManager->SummarizeFunctionBody(FD);
 
   if(TheSummaryConsumer)
-    TheSummaryConsumer->ProcessFunctionSummary(*TheSummaryManager->GetSummary(FD));
+    TheSummaryConsumer->ProcessFunctionSummary(
+        *TheSummaryManager->GetSummary(FD));
 }
 
 } // namespace clang
diff --git a/clang/lib/Sema/SummaryAttribute.cpp b/clang/lib/Sema/SummaryAttribute.cpp
index 953cc38b2e7a1..c1611ef90fae5 100644
--- a/clang/lib/Sema/SummaryAttribute.cpp
+++ b/clang/lib/Sema/SummaryAttribute.cpp
@@ -2,9 +2,9 @@
 #include "clang/Sema/SemaSummarizer.h"
 
 namespace clang {
-void NoWriteGlobalDescription::Callback::run(const ast_matchers::MatchFinder::MatchResult &Result) {
-  const auto *Assignment =
-      Result.Nodes.getNodeAs<BinaryOperator>("assignment");
+void NoWriteGlobalDescription::Callback::run(
+    const ast_matchers::MatchFinder::MatchResult &Result) {
+  const auto *Assignment = Result.Nodes.getNodeAs<BinaryOperator>("assignment");
   if (!Assignment)
     return;
 
@@ -27,6 +27,6 @@ bool NoWriteGlobalDescription::infer(const FunctionDecl *FD) const {
 }
 
 bool NoWriteGlobalDescription::merge(const FunctionSummary &Summary) const {
-  return Summary.getFunctionAttrs().count(this);
+  return Summary.getAttributes().count(this);
 }
 } // namespace clang
\ No newline at end of file

>From d8c26d3d7a6bcf989748bcde25f6ef46c93f43eb Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Thu, 12 Jun 2025 01:13:23 +0200
Subject: [PATCH 11/48] [clang][Summary] the summary manager shouldn't be
 reading the JSON file

---
 clang/include/clang/Sema/SemaSummarizer.h |  2 +-
 clang/lib/Frontend/FrontendAction.cpp     | 15 +++++++++++--
 clang/lib/Sema/SemaSummarizer.cpp         | 26 +++++++----------------
 3 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/clang/include/clang/Sema/SemaSummarizer.h b/clang/include/clang/Sema/SemaSummarizer.h
index 7fea668bae694..2c58591350b8e 100644
--- a/clang/include/clang/Sema/SemaSummarizer.h
+++ b/clang/include/clang/Sema/SemaSummarizer.h
@@ -49,7 +49,7 @@ class SummaryManager {
   void SummarizeFunctionBody(const FunctionDecl *FD);
 
   void SerializeSummary(llvm::json::OStream &, const FunctionSummary &) const;
-  void ParseSummaryFromJSON(StringRef path);
+  void ParseSummaryFromJSON(const llvm::json::Array &Summary);
 
   void ReduceSummaries();
 
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 7af4a79f83eec..b8732e58e7ffd 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -47,7 +47,9 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
+#include <fstream>
 #include <memory>
+#include <sstream>
 #include <system_error>
 using namespace clang;
 
@@ -981,8 +983,17 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
       for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC),
                                          DirEnd;
            Dir != DirEnd && !EC; Dir.increment(EC)) {
-        if (llvm::sys::path::extension(Dir->path()) == ".json")
-          CI.getSummaryManager().ParseSummaryFromJSON(Dir->path());
+        if (llvm::sys::path::extension(Dir->path()) == ".json") {
+          std::ifstream t(Dir->path().str());
+          std::stringstream buffer;
+          buffer << t.rdbuf();
+
+          auto JSON = llvm::json::parse(buffer.str());
+          if (!JSON)
+            continue;
+
+          CI.getSummaryManager().ParseSummaryFromJSON(*JSON->getAsArray());
+        }
       }
 
       // FIXME: debug only, remove later
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
index 811c95028abb0..5cd25fd563b90 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -1,11 +1,9 @@
 #include "clang/Sema/SemaSummarizer.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Index/USRGeneration.h"
-#include "clang/Sema/SummaryConsumer.h"
 #include "clang/Sema/SummaryAttribute.h"
+#include "clang/Sema/SummaryConsumer.h"
 #include <set>
-#include <fstream>
-#include <sstream>
 
 namespace clang {
 namespace {
@@ -103,22 +101,14 @@ void SummaryManager::SerializeSummary(llvm::json::OStream &JOS, const FunctionSu
   });
 }
 
-void SummaryManager::ParseSummaryFromJSON(StringRef path) {
-  std::ifstream t(path.str());
-  std::stringstream buffer;
-  buffer << t.rdbuf();
-
-  auto JSON = llvm::json::parse(buffer.str());
-  if (!JSON)
-    return;
-
-  llvm::json::Array *Summaries = JSON->getAsArray();
-  for(auto it = Summaries->begin(); it != Summaries->end(); ++it) {
-    llvm::json::Object *Summary = it->getAsObject();
+void SummaryManager::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
+  for (auto it = Summary.begin(); it != Summary.end(); ++it) {
+    const llvm::json::Object *FunctionSummary = it->getAsObject();
 
-    SmallString<128> ID(*Summary->getString("id"));
+    SmallString<128> ID(*FunctionSummary->getString("id"));
     std::set<const SummaryAttribute *> FunctionAttrs;
-    llvm::json::Array *FunctionAttributes = Summary->getObject("attrs")->getArray("function");
+    const llvm::json::Array *FunctionAttributes =
+        FunctionSummary->getObject("attrs")->getArray("function");
     for(auto attrIt = FunctionAttributes->begin(); attrIt != FunctionAttributes->end(); ++attrIt) {
       for (auto &&Attr : Attributes) {
         if (Attr->parse(*attrIt->getAsString()))
@@ -127,7 +117,7 @@ void SummaryManager::ParseSummaryFromJSON(StringRef path) {
     }
 
     std::set<SmallVector<char>> Calls;
-    llvm::json::Array *CallEntries = Summary->getArray("calls");
+    const llvm::json::Array *CallEntries = FunctionSummary->getArray("calls");
     for(auto callIt = CallEntries->begin(); callIt != CallEntries->end(); ++callIt) {
       auto *Obj = callIt->getAsObject();
       Calls.emplace(SmallString<128>(*Obj->getString("id")));

>From b5465c15d2e8bf8d9667894a775b96cdaa99c768 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Thu, 12 Jun 2025 01:17:07 +0200
Subject: [PATCH 12/48] [clang][Summary] the summary manager shouldn't contain
 the JSON summary consumer logic

---
 clang/include/clang/Sema/SemaSummarizer.h |  7 -------
 clang/lib/Frontend/FrontendAction.cpp     | 14 --------------
 clang/lib/Sema/SemaSummarizer.cpp         | 20 --------------------
 clang/lib/Sema/SummaryConsumer.cpp        | 16 +++++++++++++++-
 4 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/clang/include/clang/Sema/SemaSummarizer.h b/clang/include/clang/Sema/SemaSummarizer.h
index 2c58591350b8e..7e0c5990e5de1 100644
--- a/clang/include/clang/Sema/SemaSummarizer.h
+++ b/clang/include/clang/Sema/SemaSummarizer.h
@@ -48,15 +48,8 @@ class SummaryManager {
   const FunctionSummary *GetSummary(const FunctionDecl *FD) const;
   void SummarizeFunctionBody(const FunctionDecl *FD);
 
-  void SerializeSummary(llvm::json::OStream &, const FunctionSummary &) const;
   void ParseSummaryFromJSON(const llvm::json::Array &Summary);
-
   void ReduceSummaries();
-
-  // FIXME: debug only, remove later
-  const std::vector<std::unique_ptr<FunctionSummary>> &getSummaries() {
-    return FunctionSummaries;
-  }
 };
 
 // FIXME: Is this class needed?
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index b8732e58e7ffd..dfbc3e0d14e91 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -996,21 +996,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
         }
       }
 
-      // FIXME: debug only, remove later
-      for (auto &&S : CI.getSummaryManager().getSummaries()) {
-        llvm::json::OStream Out(llvm::errs());
-        CI.getSummaryManager().SerializeSummary(Out, *S);
-      }
-      llvm::errs() << '\n';
-
       CI.getSummaryManager().ReduceSummaries();
-
-      // FIXME: debug only, remove later
-      for (auto &&S : CI.getSummaryManager().getSummaries()) {
-        llvm::json::OStream Out(llvm::errs());
-        CI.getSummaryManager().SerializeSummary(Out, *S);
-      }
-      llvm::errs() << '\n';
     }
   }
 
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
index 5cd25fd563b90..615df8e78a90b 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -81,26 +81,6 @@ void SummaryManager::SummarizeFunctionBody(const FunctionDecl *FD) {
   CreateSummary(GetUSR(FD), std::move(Attrs), CallCollector().collect(FD));
 }
 
-void SummaryManager::SerializeSummary(llvm::json::OStream &JOS, const FunctionSummary &Summary) const {
-  JOS.object([&]{
-    JOS.attribute("id", llvm::json::Value(Summary.getID()));
-    JOS.attributeObject("attrs", [&] {
-      JOS.attributeArray("function", [&] {
-        for (auto &&Attr : Summary.getAttributes()) {
-          JOS.value(llvm::json::Value(Attr->serialize()));
-        }
-      });
-    });
-    JOS.attributeArray("calls", [&]{
-      for(auto &&Call : Summary.getCalls()) {
-        JOS.object([&]{
-          JOS.attribute("id", llvm::json::Value(Call));
-        });
-      }
-    });
-  });
-}
-
 void SummaryManager::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
   for (auto it = Summary.begin(); it != Summary.end(); ++it) {
     const llvm::json::Object *FunctionSummary = it->getAsObject();
diff --git a/clang/lib/Sema/SummaryConsumer.cpp b/clang/lib/Sema/SummaryConsumer.cpp
index 37646c1e07a7f..b10bc827f94ab 100644
--- a/clang/lib/Sema/SummaryConsumer.cpp
+++ b/clang/lib/Sema/SummaryConsumer.cpp
@@ -3,6 +3,20 @@
 
 namespace clang {
 void JSONPrintingSummaryConsumer::ProcessFunctionSummary(const FunctionSummary &Summary) {
-  TheSummaryManager->SerializeSummary(JOS, Summary);
+  JOS.object([&] {
+    JOS.attribute("id", llvm::json::Value(Summary.getID()));
+    JOS.attributeObject("attrs", [&] {
+      JOS.attributeArray("function", [&] {
+        for (auto &&Attr : Summary.getAttributes()) {
+          JOS.value(llvm::json::Value(Attr->serialize()));
+        }
+      });
+    });
+    JOS.attributeArray("calls", [&] {
+      for (auto &&Call : Summary.getCalls()) {
+        JOS.object([&] { JOS.attribute("id", llvm::json::Value(Call)); });
+      }
+    });
+  });
 }
 } // namespace clang
\ No newline at end of file

>From 1754b30c1a54261223b827da001ba078815b79ea Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Thu, 12 Jun 2025 01:25:59 +0200
Subject: [PATCH 13/48] [clang][Summary] rename description to attr

---
 clang/include/clang/Sema/SummaryAttribute.h | 5 ++---
 clang/lib/Sema/SemaSummarizer.cpp           | 2 +-
 clang/lib/Sema/SummaryAttribute.cpp         | 6 +++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Sema/SummaryAttribute.h b/clang/include/clang/Sema/SummaryAttribute.h
index 22067da3a4b81..759710fb052e6 100644
--- a/clang/include/clang/Sema/SummaryAttribute.h
+++ b/clang/include/clang/Sema/SummaryAttribute.h
@@ -32,7 +32,7 @@ class SummaryAttribute {
   };
 };
 
-class NoWriteGlobalDescription : public SummaryAttribute {
+class NoWriteGlobalAttr : public SummaryAttribute {
   class Callback : public ast_matchers::MatchFinder::MatchCallback {
   public:
     bool WriteGlobal = false;
@@ -42,8 +42,7 @@ class NoWriteGlobalDescription : public SummaryAttribute {
   };
 
 public:
-  NoWriteGlobalDescription()
-      : SummaryAttribute(NO_WRITE_GLOBAL, "no_write_global") {}
+  NoWriteGlobalAttr() : SummaryAttribute(NO_WRITE_GLOBAL, "no_write_global") {}
 
   bool infer(const FunctionDecl *FD) const override final;
   bool merge(const FunctionSummary &Summary) const override final;
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
index 615df8e78a90b..63091b6f7fefc 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -47,7 +47,7 @@ FunctionSummary::FunctionSummary(
       Calls(std::move(Calls)) {}
 
 SummaryManager::SummaryManager() {
-  Attributes.emplace_back(std::make_unique<NoWriteGlobalDescription>());
+  Attributes.emplace_back(std::make_unique<NoWriteGlobalAttr>());
 
   for (auto &&Attr : Attributes) {
     assert(KindToAttribute.count(Attr->getKind()) == 0 &&
diff --git a/clang/lib/Sema/SummaryAttribute.cpp b/clang/lib/Sema/SummaryAttribute.cpp
index c1611ef90fae5..48fda7077f4a3 100644
--- a/clang/lib/Sema/SummaryAttribute.cpp
+++ b/clang/lib/Sema/SummaryAttribute.cpp
@@ -2,7 +2,7 @@
 #include "clang/Sema/SemaSummarizer.h"
 
 namespace clang {
-void NoWriteGlobalDescription::Callback::run(
+void NoWriteGlobalAttr::Callback::run(
     const ast_matchers::MatchFinder::MatchResult &Result) {
   const auto *Assignment = Result.Nodes.getNodeAs<BinaryOperator>("assignment");
   if (!Assignment)
@@ -11,7 +11,7 @@ void NoWriteGlobalDescription::Callback::run(
   WriteGlobal = true;
 }
 
-bool NoWriteGlobalDescription::infer(const FunctionDecl *FD) const {
+bool NoWriteGlobalAttr::infer(const FunctionDecl *FD) const {
   using namespace ast_matchers;
   MatchFinder Finder;
   Callback CB;
@@ -26,7 +26,7 @@ bool NoWriteGlobalDescription::infer(const FunctionDecl *FD) const {
   return !CB.WriteGlobal;
 }
 
-bool NoWriteGlobalDescription::merge(const FunctionSummary &Summary) const {
+bool NoWriteGlobalAttr::merge(const FunctionSummary &Summary) const {
   return Summary.getAttributes().count(this);
 }
 } // namespace clang
\ No newline at end of file

>From 7e27c32c9a0c03a0fc995bb3710aac61494219ad Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Fri, 13 Jun 2025 00:21:50 +0200
Subject: [PATCH 14/48] [clang][Summary] more flexible merge logic

---
 clang/include/clang/Sema/SummaryAttribute.h | 6 ++++--
 clang/lib/Sema/SemaSummarizer.cpp           | 7 ++++---
 clang/lib/Sema/SummaryAttribute.cpp         | 6 ++++--
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Sema/SummaryAttribute.h b/clang/include/clang/Sema/SummaryAttribute.h
index 759710fb052e6..0014a9878bacd 100644
--- a/clang/include/clang/Sema/SummaryAttribute.h
+++ b/clang/include/clang/Sema/SummaryAttribute.h
@@ -24,7 +24,8 @@ class SummaryAttribute {
   SummaryAttributeKind getKind() { return Kind; }
 
   virtual bool infer(const FunctionDecl *FD) const = 0;
-  virtual bool merge(const FunctionSummary &Summary) const = 0;
+  virtual bool merge(const FunctionSummary &Caller,
+                     const FunctionSummary &Callee) const = 0;
 
   virtual std::string serialize() const { return std::string(Serialzed); };
   virtual bool parse(std::string_view input) const {
@@ -45,7 +46,8 @@ class NoWriteGlobalAttr : public SummaryAttribute {
   NoWriteGlobalAttr() : SummaryAttribute(NO_WRITE_GLOBAL, "no_write_global") {}
 
   bool infer(const FunctionDecl *FD) const override final;
-  bool merge(const FunctionSummary &Summary) const override final;
+  bool merge(const FunctionSummary &Caller,
+             const FunctionSummary &Callee) const override final;
 };
 } // namespace clang
 
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SemaSummarizer.cpp
index 63091b6f7fefc..3b056fc53f8c9 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SemaSummarizer.cpp
@@ -73,6 +73,7 @@ SummaryManager::GetSummary(const FunctionDecl *FD) const {
 
 void SummaryManager::SummarizeFunctionBody(const FunctionDecl *FD) {
   std::set<const SummaryAttribute *> Attrs;
+
   for (auto &&Attr : Attributes) {
     if (Attr->infer(FD))
       Attrs.emplace(Attr.get());
@@ -122,9 +123,9 @@ bool SummaryManager::ReduceFunctionSummary(FunctionSummary &Function) {
 
     const FunctionSummary *callSummary = IDToSummary[call];
 
-    for (auto &&Attr : Function.getAttributes()) {
-      if (Attr->merge(*callSummary))
-        reducedAttrs.emplace(Attr);
+    for (auto &&Attr : Attributes) {
+      if (Attr->merge(Function, *callSummary))
+        reducedAttrs.emplace(Attr.get());
     }
 
     if (reducedAttrs != Function.getAttributes()) {
diff --git a/clang/lib/Sema/SummaryAttribute.cpp b/clang/lib/Sema/SummaryAttribute.cpp
index 48fda7077f4a3..6f141dde68e52 100644
--- a/clang/lib/Sema/SummaryAttribute.cpp
+++ b/clang/lib/Sema/SummaryAttribute.cpp
@@ -26,7 +26,9 @@ bool NoWriteGlobalAttr::infer(const FunctionDecl *FD) const {
   return !CB.WriteGlobal;
 }
 
-bool NoWriteGlobalAttr::merge(const FunctionSummary &Summary) const {
-  return Summary.getAttributes().count(this);
+bool NoWriteGlobalAttr::merge(const FunctionSummary &Caller,
+                              const FunctionSummary &Callee) const {
+  return Caller.getAttributes().count(this) &&
+         Callee.getAttributes().count(this);
 }
 } // namespace clang
\ No newline at end of file

>From 4f8acb909effe739523a7342e5bf6dbb0ac75353 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 00:02:10 +0200
Subject: [PATCH 15/48] [clang][Summary] refactor summaries in the compiler
 instance and sema

---
 .../include/clang/Frontend/CompilerInstance.h | 41 ++++++++++---------
 clang/include/clang/Sema/Sema.h               |  8 ++--
 clang/include/clang/Sema/SummaryConsumer.h    | 27 ++++++------
 .../{SemaSummarizer.h => SummaryContext.h}    | 27 +++---------
 clang/lib/Frontend/CompilerInstance.cpp       | 19 +++++----
 clang/lib/Frontend/FrontendAction.cpp         | 14 +++----
 clang/lib/Sema/CMakeLists.txt                 |  2 +-
 clang/lib/Sema/Sema.cpp                       | 21 +++++-----
 clang/lib/Sema/SemaDecl.cpp                   |  8 ++--
 clang/lib/Sema/SummaryAttribute.cpp           |  2 +-
 clang/lib/Sema/SummaryConsumer.cpp            |  2 +-
 ...{SemaSummarizer.cpp => SummaryContext.cpp} | 35 ++++------------
 12 files changed, 89 insertions(+), 117 deletions(-)
 rename clang/include/clang/Sema/{SemaSummarizer.h => SummaryContext.h} (67%)
 rename clang/lib/Sema/{SemaSummarizer.cpp => SummaryContext.cpp} (82%)

diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index f8f13bbc998d9..b107f15af9563 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -19,6 +19,7 @@
 #include "clang/Lex/DependencyDirectivesScanner.h"
 #include "clang/Lex/HeaderSearchOptions.h"
 #include "clang/Lex/ModuleLoader.h"
+#include "clang/Sema/SummaryContext.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
@@ -48,6 +49,7 @@ class ModuleFile;
 }
 
 class CodeCompleteConsumer;
+class SummaryContext;
 class SummaryConsumer;
 class DiagnosticsEngine;
 class DiagnosticConsumer;
@@ -57,7 +59,6 @@ class Module;
 class ModuleCache;
 class Preprocessor;
 class Sema;
-class SummaryManager;
 class SourceManager;
 class TargetInfo;
 enum class DisableValidationForModuleKind;
@@ -125,9 +126,12 @@ class CompilerInstance : public ModuleLoader {
 
   /// The summary consumer.
   std::unique_ptr<SummaryConsumer> TheSummaryConsumer;
-  
-  /// The summary manager object.
-  std::unique_ptr<SummaryManager> TheSummaryManager;
+
+  /// The summary context.
+  std::unique_ptr<SummaryContext> SummaryCtx;
+
+  /// The summary output file.
+  std::unique_ptr<llvm::raw_fd_ostream> SummaryOS;
 
   /// The semantic analysis object.
   std::unique_ptr<Sema> TheSema;
@@ -524,15 +528,6 @@ class CompilerInstance : public ModuleLoader {
   /// setASTContext - Replace the current AST context.
   void setASTContext(ASTContext *Value);
 
-  bool hasSummaryManager() {
-    return TheSummaryManager != nullptr;
-  }
-
-  SummaryManager &getSummaryManager() {
-    assert(TheSummaryManager && "Compiler instance has no summary manager!");
-    return *TheSummaryManager;
-  }
-
   /// Replace the current Sema; the compiler instance takes ownership
   /// of S.
   void setSema(Sema *S);
@@ -624,10 +619,23 @@ class CompilerInstance : public ModuleLoader {
     return *CompletionConsumer;
   }
 
+  /// setCodeCompletionConsumer - Replace the current code completion consumer;
+  /// the compiler instance takes ownership of \p Value.
+  void setCodeCompletionConsumer(CodeCompleteConsumer *Value);
+
   /// @}
   /// @name Summary
   /// @{
 
+  bool hasSummaryContext() { return (bool)SummaryCtx; }
+
+  SummaryContext &getSummaryContext() {
+    assert(SummaryCtx && "Compiler instance has no summary context!");
+    return *SummaryCtx;
+  }
+
+  void createSummaryContext() { SummaryCtx.reset(new SummaryContext()); }
+
   bool hasSummaryConsumer() const { return (bool)TheSummaryConsumer; }
 
   SummaryConsumer &getSummaryConsumer() const {
@@ -636,9 +644,7 @@ class CompilerInstance : public ModuleLoader {
     return *TheSummaryConsumer;
   }
 
-  /// setCodeCompletionConsumer - Replace the current code completion consumer;
-  /// the compiler instance takes ownership of \p Value.
-  void setCodeCompletionConsumer(CodeCompleteConsumer *Value);
+  void createSummaryConsumer();
 
   /// @}
   /// @name Frontend timer
@@ -765,9 +771,6 @@ class CompilerInstance : public ModuleLoader {
       Preprocessor &PP, StringRef Filename, unsigned Line, unsigned Column,
       const CodeCompleteOptions &Opts, raw_ostream &OS);
 
-  void createSummaryConsumer();
-  void createSummaryManager();
-
   /// Create the Sema object to be used for parsing.
   void createSema(TranslationUnitKind TUKind,
                   CodeCompleteConsumer *CompletionConsumer,
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index b36fce47fb792..1a4d2e99685e0 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -125,7 +125,7 @@ class CXXBasePath;
 class CXXBasePaths;
 class CXXFieldCollector;
 class CodeCompleteConsumer;
-class SummaryManager;
+class SummaryContext;
 class SummaryConsumer;
 enum class ComparisonCategoryType : unsigned char;
 class ConstraintSatisfaction;
@@ -161,7 +161,6 @@ class SemaARM;
 class SemaAVR;
 class SemaBPF;
 class SemaCodeCompletion;
-class SemaSummarizer;
 class SemaCUDA;
 class SemaDirectX;
 class SemaHLSL;
@@ -887,7 +886,7 @@ class Sema final : public SemaBase {
   Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
        TranslationUnitKind TUKind = TU_Complete,
        CodeCompleteConsumer *CompletionConsumer = nullptr,
-       SummaryManager *SummaryManager = nullptr,
+       SummaryContext *SummaryCtx = nullptr,
        SummaryConsumer *SummaryConsumer = nullptr);
   ~Sema();
 
@@ -1265,6 +1264,8 @@ class Sema final : public SemaBase {
   DiagnosticsEngine &Diags;
   SourceManager &SourceMgr;
   api_notes::APINotesManager APINotes;
+  SummaryContext *SummaryCtx;
+  SummaryConsumer *SummaryCnsmr;
 
   /// A RAII object to enter scope of a compound statement.
   class CompoundScopeRAII {
@@ -1569,7 +1570,6 @@ class Sema final : public SemaBase {
   std::unique_ptr<SemaAVR> AVRPtr;
   std::unique_ptr<SemaBPF> BPFPtr;
   std::unique_ptr<SemaCodeCompletion> CodeCompletionPtr;
-  std::unique_ptr<SemaSummarizer> SummarizerPtr;
   std::unique_ptr<SemaCUDA> CUDAPtr;
   std::unique_ptr<SemaDirectX> DirectXPtr;
   std::unique_ptr<SemaHLSL> HLSLPtr;
diff --git a/clang/include/clang/Sema/SummaryConsumer.h b/clang/include/clang/Sema/SummaryConsumer.h
index 85c48b3c1b939..f8844ffb64e21 100644
--- a/clang/include/clang/Sema/SummaryConsumer.h
+++ b/clang/include/clang/Sema/SummaryConsumer.h
@@ -5,36 +5,37 @@
 #include "llvm/Support/JSON.h"
 namespace clang {
 class FunctionSummary;
-class SummaryManager;
+class SummaryContext;
 
 class SummaryConsumer {
 protected:
-    const SummaryManager *TheSummaryManager;
+  const SummaryContext *SummaryCtx;
 
 public:
-    SummaryConsumer(const SummaryManager &SummaryManager) : TheSummaryManager(&SummaryManager) {}
-    virtual ~SummaryConsumer() = default;
+  SummaryConsumer(const SummaryContext &SummaryCtx) : SummaryCtx(&SummaryCtx) {}
+  virtual ~SummaryConsumer() = default;
 
-    virtual void ProcessStartOfSourceFile() {};
-    virtual void ProcessFunctionSummary(const FunctionSummary&) {};
-    virtual void ProcessEndOfSourceFile() {};
+  virtual void ProcessStartOfSourceFile(){};
+  virtual void ProcessFunctionSummary(const FunctionSummary &){};
+  virtual void ProcessEndOfSourceFile(){};
 };
 
 class PrintingSummaryConsumer : public SummaryConsumer {
 public:
-    PrintingSummaryConsumer(const SummaryManager &SummaryManager, raw_ostream &OS)
-      : SummaryConsumer(SummaryManager) {}
+  PrintingSummaryConsumer(const SummaryContext &SummaryCtx, raw_ostream &OS)
+      : SummaryConsumer(SummaryCtx) {}
 };
 
 class JSONPrintingSummaryConsumer : public PrintingSummaryConsumer {
     llvm::json::OStream JOS;
 
 public:
-    JSONPrintingSummaryConsumer(const SummaryManager &SummaryManager, raw_ostream &OS) : PrintingSummaryConsumer(SummaryManager, OS), JOS(OS, 2) {}
+  JSONPrintingSummaryConsumer(const SummaryContext &SummaryCtx, raw_ostream &OS)
+      : PrintingSummaryConsumer(SummaryCtx, OS), JOS(OS, 2) {}
 
-    void ProcessStartOfSourceFile() override { JOS.arrayBegin(); };
-    void ProcessFunctionSummary(const FunctionSummary&) override;
-    void ProcessEndOfSourceFile() override { JOS.arrayEnd(); };
+  void ProcessStartOfSourceFile() override { JOS.arrayBegin(); };
+  void ProcessFunctionSummary(const FunctionSummary &) override;
+  void ProcessEndOfSourceFile() override { JOS.arrayEnd(); };
 };
 } // namespace clang
 
diff --git a/clang/include/clang/Sema/SemaSummarizer.h b/clang/include/clang/Sema/SummaryContext.h
similarity index 67%
rename from clang/include/clang/Sema/SemaSummarizer.h
rename to clang/include/clang/Sema/SummaryContext.h
index 7e0c5990e5de1..67b1162d61763 100644
--- a/clang/include/clang/Sema/SemaSummarizer.h
+++ b/clang/include/clang/Sema/SummaryContext.h
@@ -1,7 +1,6 @@
-#ifndef LLVM_CLANG_SEMA_SEMASUMMARIZER_H
-#define LLVM_CLANG_SEMA_SEMASUMMARIZER_H
+#ifndef LLVM_CLANG_SEMA_SEMASUMMARYCONTEXT_H
+#define LLVM_CLANG_SEMA_SEMASUMMARYCONTEXT_H
 
-#include "clang/Sema/SemaBase.h"
 #include "clang/Sema/SummaryAttribute.h"
 #include "clang/Sema/SummaryConsumer.h"
 #include <set>
@@ -26,11 +25,9 @@ class FunctionSummary {
   void replaceAttributes(std::set<const SummaryAttribute *> Attrs) {
     this->Attrs = std::move(Attrs);
   }
-
-  friend class SummaryManager;
 };
 
-class SummaryManager {
+class SummaryContext {
   std::map<SmallVector<char>, const FunctionSummary *> IDToSummary;
   std::vector<std::unique_ptr<FunctionSummary>> FunctionSummaries;
 
@@ -43,7 +40,7 @@ class SummaryManager {
   bool ReduceFunctionSummary(FunctionSummary &FunctionSummary);
 
 public:
-  SummaryManager();
+  SummaryContext();
 
   const FunctionSummary *GetSummary(const FunctionDecl *FD) const;
   void SummarizeFunctionBody(const FunctionDecl *FD);
@@ -51,20 +48,6 @@ class SummaryManager {
   void ParseSummaryFromJSON(const llvm::json::Array &Summary);
   void ReduceSummaries();
 };
-
-// FIXME: Is this class needed?
-class SemaSummarizer : public SemaBase {
-public:
-  SummaryManager *TheSummaryManager;
-  SummaryConsumer *TheSummaryConsumer;
-
-  SemaSummarizer(Sema &S, SummaryManager &SummaryManager, SummaryConsumer *SummaryConsumer) 
-    : SemaBase(S), TheSummaryManager(&SummaryManager), TheSummaryConsumer(SummaryConsumer) {};
-
-  void ActOnStartOfSourceFile();
-  void ActOnEndOfSourceFile();
-  void SummarizeFunctionBody(const FunctionDecl *FD);
-};
 } // namespace clang
 
-#endif // LLVM_CLANG_SEMA_SEMASUMMARIZE_H
+#endif // LLVM_CLANG_SEMA_SEMASUMMARYCONTEXTH
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index deed94e1f0b42..448d45b0f49db 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -37,7 +37,6 @@
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Sema.h"
-#include "clang/Sema/SemaSummarizer.h"
 #include "clang/Sema/SummaryConsumer.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
@@ -749,22 +748,24 @@ void CompilerInstance::createSummaryConsumer() {
     return;
 
   std::error_code EC;
-  // FIXME: this being static is a design error
-  static llvm::raw_fd_ostream SummaryOS(SummaryFile, EC, llvm::sys::fs::CD_CreateAlways);
+  SummaryOS.reset(new llvm::raw_fd_ostream(SummaryFile, EC,
+                                           llvm::sys::fs::CD_CreateAlways));
 
-  if(!EC)
-    TheSummaryConsumer.reset(new JSONPrintingSummaryConsumer(getSummaryManager(), SummaryOS));
-}
+  if (EC) {
+    SummaryOS = nullptr;
+    return;
+  }
 
-void CompilerInstance::createSummaryManager() {
-  TheSummaryManager.reset(new SummaryManager());
+  TheSummaryConsumer.reset(
+      new JSONPrintingSummaryConsumer(getSummaryContext(), *SummaryOS));
 }
 
 void CompilerInstance::createSema(TranslationUnitKind TUKind,
                                   CodeCompleteConsumer *CompletionConsumer,
                                   SummaryConsumer *SummaryConsumer) {
   TheSema.reset(new Sema(getPreprocessor(), getASTContext(), getASTConsumer(),
-                         TUKind, CompletionConsumer, hasSummaryManager() ? &getSummaryManager() : nullptr, SummaryConsumer));
+                         TUKind, CompletionConsumer, &getSummaryContext(),
+                         SummaryConsumer));
 
   // Set up API notes.
   TheSema->APINotes.setSwiftVersion(getAPINotesOpts().SwiftVersion);
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index dfbc3e0d14e91..2821327c6a824 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -35,7 +35,7 @@
 #include "clang/Parse/ParseAST.h"
 #include "clang/Sema/HLSLExternalSemaSource.h"
 #include "clang/Sema/MultiplexExternalSemaSource.h"
-#include "clang/Sema/SemaSummarizer.h"
+#include "clang/Sema/SummaryContext.h"
 #include "clang/Serialization/ASTDeserializationListener.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
@@ -895,8 +895,8 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     }
   }
 
-  if(!CI.hasSummaryManager()) {
-    CI.createSummaryManager();
+  if (!CI.hasSummaryContext()) {
+    CI.createSummaryContext();
   }
 
   // Set up embedding for any specified files. Do this before we load any
@@ -992,11 +992,11 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
           if (!JSON)
             continue;
 
-          CI.getSummaryManager().ParseSummaryFromJSON(*JSON->getAsArray());
+          CI.getSummaryContext().ParseSummaryFromJSON(*JSON->getAsArray());
         }
       }
 
-      CI.getSummaryManager().ReduceSummaries();
+      CI.getSummaryContext().ReduceSummaries();
     }
   }
 
@@ -1371,8 +1371,8 @@ void ASTFrontendAction::ExecuteAction() {
   if (CI.hasCodeCompletionConsumer())
     CompletionConsumer = &CI.getCodeCompletionConsumer();
 
-  if(!CI.hasSummaryManager()) {
-    CI.createSummaryManager();
+  if (!CI.hasSummaryContext()) {
+    CI.createSummaryContext();
   }
   CI.createSummaryConsumer();
 
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index bf8f73dc985db..9d5a593813bd3 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -85,7 +85,6 @@ add_clang_library(clangSema
   SemaStmt.cpp
   SemaStmtAsm.cpp
   SemaStmtAttr.cpp
-  SemaSummarizer.cpp
   SemaSPIRV.cpp
   SemaSYCL.cpp
   SemaSwift.cpp
@@ -101,6 +100,7 @@ add_clang_library(clangSema
   SemaX86.cpp
   SummaryAttribute.cpp
   SummaryConsumer.cpp
+  SummaryContext.cpp
   TypeLocBuilder.cpp
 
   DEPENDS
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 5b3f1a4dca36e..2870871876701 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -64,11 +64,11 @@
 #include "clang/Sema/SemaRISCV.h"
 #include "clang/Sema/SemaSPIRV.h"
 #include "clang/Sema/SemaSYCL.h"
-#include "clang/Sema/SemaSummarizer.h"
 #include "clang/Sema/SemaSwift.h"
 #include "clang/Sema/SemaSystemZ.h"
 #include "clang/Sema/SemaWasm.h"
 #include "clang/Sema/SemaX86.h"
+#include "clang/Sema/SummaryContext.h"
 #include "clang/Sema/TemplateDeduction.h"
 #include "clang/Sema/TemplateInstCallback.h"
 #include "clang/Sema/TypoCorrection.h"
@@ -250,12 +250,12 @@ const uint64_t Sema::MaximumAlignment;
 
 Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
            TranslationUnitKind TUKind, CodeCompleteConsumer *CodeCompleter,
-           SummaryManager *SummaryManager,
-           SummaryConsumer *SummaryConsumer)
+           SummaryContext *SummaryCtx, SummaryConsumer *SummaryConsumer)
     : SemaBase(*this), CollectStats(false), TUKind(TUKind),
       CurFPFeatures(pp.getLangOpts()), LangOpts(pp.getLangOpts()), PP(pp),
       Context(ctxt), Consumer(consumer), Diags(PP.getDiagnostics()),
       SourceMgr(PP.getSourceManager()), APINotes(SourceMgr, LangOpts),
+      SummaryCtx(SummaryCtx), SummaryCnsmr(SummaryConsumer),
       AnalysisWarnings(*this), ThreadSafetyDeclCache(nullptr),
       LateTemplateParser(nullptr), LateTemplateParserCleanup(nullptr),
       OpaqueParser(nullptr), CurContext(nullptr), ExternalSource(nullptr),
@@ -266,8 +266,6 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
       BPFPtr(std::make_unique<SemaBPF>(*this)),
       CodeCompletionPtr(
           std::make_unique<SemaCodeCompletion>(*this, CodeCompleter)),
-      SummarizerPtr(SummaryManager ? std::make_unique<SemaSummarizer>(*this, *SummaryManager, SummaryConsumer)
-                                    : nullptr),
       CUDAPtr(std::make_unique<SemaCUDA>(*this)),
       DirectXPtr(std::make_unique<SemaDirectX>(*this)),
       HLSLPtr(std::make_unique<SemaHLSL>(*this)),
@@ -309,6 +307,8 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
       AccessCheckingSFINAE(false), CurrentInstantiationScope(nullptr),
       InNonInstantiationSFINAEContext(false), NonInstantiationEntries(0),
       ArgPackSubstIndex(std::nullopt), SatisfactionCache(Context) {
+  assert((!SummaryConsumer || SummaryCtx) &&
+         "summary consumer without a summary context");
   assert(pp.TUKind == TUKind);
   TUScope = nullptr;
 
@@ -1147,9 +1147,9 @@ void Sema::ActOnStartOfTranslationUnit() {
   if (getLangOpts().CPlusPlusModules &&
       getLangOpts().getCompilingModule() == LangOptions::CMK_HeaderUnit)
     HandleStartOfHeaderUnit();
-  
-  if(SummarizerPtr)
-    SummarizerPtr->ActOnStartOfSourceFile();
+
+  if (SummaryCnsmr)
+    SummaryCnsmr->ProcessStartOfSourceFile();
 }
 
 void Sema::ActOnEndOfTranslationUnitFragment(TUFragmentKind Kind) {
@@ -1225,8 +1225,9 @@ void Sema::ActOnEndOfTranslationUnit() {
   assert(DelayedDiagnostics.getCurrentPool() == nullptr
          && "reached end of translation unit with a pool attached?");
 
-  if(SummarizerPtr)
-    SummarizerPtr->ActOnEndOfSourceFile();
+  if (SummaryCnsmr)
+    SummaryCnsmr->ProcessEndOfSourceFile();
+
   // If code completion is enabled, don't perform any end-of-translation-unit
   // work.
   if (PP.isCodeCompletionEnabled())
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 45f1523868f75..1aa7a1cf178bd 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -55,9 +55,9 @@
 #include "clang/Sema/SemaPPC.h"
 #include "clang/Sema/SemaRISCV.h"
 #include "clang/Sema/SemaSYCL.h"
-#include "clang/Sema/SemaSummarizer.h"
 #include "clang/Sema/SemaSwift.h"
 #include "clang/Sema/SemaWasm.h"
+#include "clang/Sema/SummaryContext.h"
 #include "clang/Sema/Template.h"
 #include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -16695,8 +16695,10 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
   if (FD && !FD->isDeleted())
     checkTypeSupport(FD->getType(), FD->getLocation(), FD);
 
-  if (FD && SummarizerPtr && SummarizerPtr->TheSummaryConsumer)
-    SummarizerPtr->SummarizeFunctionBody(FD);
+  if (SummaryCnsmr) {
+    SummaryCtx->SummarizeFunctionBody(FD);
+    SummaryCnsmr->ProcessFunctionSummary(*SummaryCtx->GetSummary(FD));
+  }
 
   return dcl;
 }
diff --git a/clang/lib/Sema/SummaryAttribute.cpp b/clang/lib/Sema/SummaryAttribute.cpp
index 6f141dde68e52..86d422c16fa30 100644
--- a/clang/lib/Sema/SummaryAttribute.cpp
+++ b/clang/lib/Sema/SummaryAttribute.cpp
@@ -1,5 +1,5 @@
 #include "clang/Sema/SummaryAttribute.h"
-#include "clang/Sema/SemaSummarizer.h"
+#include "clang/Sema/SummaryContext.h"
 
 namespace clang {
 void NoWriteGlobalAttr::Callback::run(
diff --git a/clang/lib/Sema/SummaryConsumer.cpp b/clang/lib/Sema/SummaryConsumer.cpp
index b10bc827f94ab..043873f236b93 100644
--- a/clang/lib/Sema/SummaryConsumer.cpp
+++ b/clang/lib/Sema/SummaryConsumer.cpp
@@ -1,5 +1,5 @@
 #include "clang/Sema/SummaryConsumer.h"
-#include "clang/Sema/SemaSummarizer.h"
+#include "clang/Sema/SummaryContext.h"
 
 namespace clang {
 void JSONPrintingSummaryConsumer::ProcessFunctionSummary(const FunctionSummary &Summary) {
diff --git a/clang/lib/Sema/SemaSummarizer.cpp b/clang/lib/Sema/SummaryContext.cpp
similarity index 82%
rename from clang/lib/Sema/SemaSummarizer.cpp
rename to clang/lib/Sema/SummaryContext.cpp
index 3b056fc53f8c9..6b7207eedb5e5 100644
--- a/clang/lib/Sema/SemaSummarizer.cpp
+++ b/clang/lib/Sema/SummaryContext.cpp
@@ -1,4 +1,4 @@
-#include "clang/Sema/SemaSummarizer.h"
+#include "clang/Sema/SummaryContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Index/USRGeneration.h"
 #include "clang/Sema/SummaryAttribute.h"
@@ -46,7 +46,7 @@ FunctionSummary::FunctionSummary(
     : ID(std::move(ID)), Attrs(std::move(FunctionAttrs)),
       Calls(std::move(Calls)) {}
 
-SummaryManager::SummaryManager() {
+SummaryContext::SummaryContext() {
   Attributes.emplace_back(std::make_unique<NoWriteGlobalAttr>());
 
   for (auto &&Attr : Attributes) {
@@ -56,7 +56,7 @@ SummaryManager::SummaryManager() {
   }
 }
 
-void SummaryManager::CreateSummary(SmallVector<char> ID,
+void SummaryContext::CreateSummary(SmallVector<char> ID,
                                    std::set<const SummaryAttribute *> Attrs,
                                    std::set<SmallVector<char>> Calls) {
   auto Summary = std::make_unique<FunctionSummary>(
@@ -66,12 +66,12 @@ void SummaryManager::CreateSummary(SmallVector<char> ID,
 }
 
 const FunctionSummary *
-SummaryManager::GetSummary(const FunctionDecl *FD) const {
+SummaryContext::GetSummary(const FunctionDecl *FD) const {
   auto USR = GetUSR(FD);
   return IDToSummary.count(USR) ? IDToSummary.at(USR) : nullptr;
 }
 
-void SummaryManager::SummarizeFunctionBody(const FunctionDecl *FD) {
+void SummaryContext::SummarizeFunctionBody(const FunctionDecl *FD) {
   std::set<const SummaryAttribute *> Attrs;
 
   for (auto &&Attr : Attributes) {
@@ -82,7 +82,7 @@ void SummaryManager::SummarizeFunctionBody(const FunctionDecl *FD) {
   CreateSummary(GetUSR(FD), std::move(Attrs), CallCollector().collect(FD));
 }
 
-void SummaryManager::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
+void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
   for (auto it = Summary.begin(); it != Summary.end(); ++it) {
     const llvm::json::Object *FunctionSummary = it->getAsObject();
 
@@ -108,7 +108,7 @@ void SummaryManager::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
   }
 }
 
-bool SummaryManager::ReduceFunctionSummary(FunctionSummary &Function) {
+bool SummaryContext::ReduceFunctionSummary(FunctionSummary &Function) {
   bool changed = false;
 
   for (auto &&call : Function.getCalls()) {
@@ -137,7 +137,7 @@ bool SummaryManager::ReduceFunctionSummary(FunctionSummary &Function) {
   return changed;
 }
 
-void SummaryManager::ReduceSummaries() {
+void SummaryContext::ReduceSummaries() {
   bool changed = true;
   while (changed) {
     changed = false;
@@ -146,23 +146,4 @@ void SummaryManager::ReduceSummaries() {
       changed |= ReduceFunctionSummary(*Function);
   }
 }
-
-void SemaSummarizer::ActOnStartOfSourceFile() {
-  if(TheSummaryConsumer)
-    TheSummaryConsumer->ProcessStartOfSourceFile();
-}
-
-void SemaSummarizer::ActOnEndOfSourceFile() {
-  if(TheSummaryConsumer)
-    TheSummaryConsumer->ProcessEndOfSourceFile();
-}
-
-void SemaSummarizer::SummarizeFunctionBody(const FunctionDecl *FD) {
-  TheSummaryManager->SummarizeFunctionBody(FD);
-
-  if(TheSummaryConsumer)
-    TheSummaryConsumer->ProcessFunctionSummary(
-        *TheSummaryManager->GetSummary(FD));
-}
-
 } // namespace clang

>From d635e3221b8218337b0f80d54eae390d417bba5d Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 00:17:47 +0200
Subject: [PATCH 16/48] [clang][Summary] give the summary consumer a default
 value to keep the diff smaller

---
 clang/include/clang/Frontend/CompilerInstance.h | 2 +-
 clang/lib/Frontend/ChainedIncludesSource.cpp    | 2 +-
 clang/lib/Frontend/FrontendActions.cpp          | 2 +-
 clang/lib/Testing/TestAST.cpp                   | 2 +-
 clang/unittests/CodeGen/TestCompiler.h          | 2 +-
 clang/unittests/Frontend/CodeGenActionTest.cpp  | 2 +-
 clang/unittests/Sema/ExternalSemaSourceTest.cpp | 2 +-
 clang/unittests/Sema/SemaLookupTest.cpp         | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index b107f15af9563..7cb82e587cdfe 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -774,7 +774,7 @@ class CompilerInstance : public ModuleLoader {
   /// Create the Sema object to be used for parsing.
   void createSema(TranslationUnitKind TUKind,
                   CodeCompleteConsumer *CompletionConsumer,
-                  SummaryConsumer *SummaryConsumer);
+                  SummaryConsumer *SummaryConsumer = nullptr);
 
   /// Create the frontend timer and replace any existing one with it.
   void createFrontendTimer();
diff --git a/clang/lib/Frontend/ChainedIncludesSource.cpp b/clang/lib/Frontend/ChainedIncludesSource.cpp
index 437f5387375f7..95b0ed248d545 100644
--- a/clang/lib/Frontend/ChainedIncludesSource.cpp
+++ b/clang/lib/Frontend/ChainedIncludesSource.cpp
@@ -142,7 +142,7 @@ IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
     Clang->getASTContext().setASTMutationListener(
                                             consumer->GetASTMutationListener());
     Clang->setASTConsumer(std::move(consumer));
-    Clang->createSema(TU_Prefix, nullptr, nullptr);
+    Clang->createSema(TU_Prefix, nullptr);
 
     if (firstInclude) {
       Preprocessor &PP = Clang->getPreprocessor();
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 49f1420c75047..8c75e1a46da54 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -52,7 +52,7 @@ void EnsureSemaIsCreated(CompilerInstance &CI, FrontendAction &Action) {
 
   if (!CI.hasSema())
     CI.createSema(Action.getTranslationUnitKind(),
-                  GetCodeCompletionConsumer(CI), nullptr);
+                  GetCodeCompletionConsumer(CI));
 }
 } // namespace
 
diff --git a/clang/lib/Testing/TestAST.cpp b/clang/lib/Testing/TestAST.cpp
index db0490689da53..748f59b856e83 100644
--- a/clang/lib/Testing/TestAST.cpp
+++ b/clang/lib/Testing/TestAST.cpp
@@ -69,7 +69,7 @@ void createMissingComponents(CompilerInstance &Clang) {
   if (!Clang.hasASTContext())
     Clang.createASTContext();
   if (!Clang.hasSema())
-    Clang.createSema(TU_Complete, /*CodeCompleteConsumer=*/nullptr, nullptr);
+    Clang.createSema(TU_Complete, /*CodeCompleteConsumer=*/nullptr);
 }
 
 } // namespace
diff --git a/clang/unittests/CodeGen/TestCompiler.h b/clang/unittests/CodeGen/TestCompiler.h
index 760fa340c3d74..a6fec7fb0945d 100644
--- a/clang/unittests/CodeGen/TestCompiler.h
+++ b/clang/unittests/CodeGen/TestCompiler.h
@@ -69,7 +69,7 @@ struct TestCompiler {
 
     compiler.setASTConsumer(std::move(Consumer));
 
-    compiler.createSema(clang::TU_Prefix, nullptr, nullptr);
+    compiler.createSema(clang::TU_Prefix, nullptr);
 
     clang::SourceManager &sm = compiler.getSourceManager();
     sm.setMainFileID(sm.createFileID(
diff --git a/clang/unittests/Frontend/CodeGenActionTest.cpp b/clang/unittests/Frontend/CodeGenActionTest.cpp
index e958ea1993a4a..90818b72cd6e6 100644
--- a/clang/unittests/Frontend/CodeGenActionTest.cpp
+++ b/clang/unittests/Frontend/CodeGenActionTest.cpp
@@ -37,7 +37,7 @@ class NullCodeGenAction : public CodeGenAction {
     if (!CI.hasPreprocessor())
       return;
     if (!CI.hasSema())
-      CI.createSema(getTranslationUnitKind(), nullptr, nullptr);
+      CI.createSema(getTranslationUnitKind(), nullptr);
   }
 };
 
diff --git a/clang/unittests/Sema/ExternalSemaSourceTest.cpp b/clang/unittests/Sema/ExternalSemaSourceTest.cpp
index d223a7135ee84..2b271d4bf7825 100644
--- a/clang/unittests/Sema/ExternalSemaSourceTest.cpp
+++ b/clang/unittests/Sema/ExternalSemaSourceTest.cpp
@@ -194,7 +194,7 @@ class ExternalSemaSourceInstaller : public clang::ASTFrontendAction {
   void ExecuteAction() override {
     CompilerInstance &CI = getCompilerInstance();
     ASSERT_FALSE(CI.hasSema());
-    CI.createSema(getTranslationUnitKind(), nullptr, nullptr);
+    CI.createSema(getTranslationUnitKind(), nullptr);
     ASSERT_TRUE(CI.hasDiagnostics());
     DiagnosticsEngine &Diagnostics = CI.getDiagnostics();
     DiagnosticConsumer *Client = Diagnostics.getClient();
diff --git a/clang/unittests/Sema/SemaLookupTest.cpp b/clang/unittests/Sema/SemaLookupTest.cpp
index 96c27945421f9..d97b571f6a37c 100644
--- a/clang/unittests/Sema/SemaLookupTest.cpp
+++ b/clang/unittests/Sema/SemaLookupTest.cpp
@@ -22,7 +22,7 @@ class LookupAction : public ASTFrontendAction {
   void ExecuteAction() override {
     CompilerInstance &CI = getCompilerInstance();
     ASSERT_FALSE(CI.hasSema());
-    CI.createSema(getTranslationUnitKind(), nullptr, nullptr);
+    CI.createSema(getTranslationUnitKind(), nullptr);
     ASSERT_TRUE(CI.hasSema());
     Sema &S = CI.getSema();
     ParseAST(S);

>From 757c0d6d3250203cdb70dc559303f4d8007c4ed5 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 02:01:47 +0200
Subject: [PATCH 17/48] [clang][Summary] change frontend action and summary
 interaction

---
 clang/lib/Frontend/FrontendAction.cpp | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 2821327c6a824..8e66d6b8a6c27 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -895,10 +895,6 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     }
   }
 
-  if (!CI.hasSummaryContext()) {
-    CI.createSummaryContext();
-  }
-
   // Set up embedding for any specified files. Do this before we load any
   // source files, including the primary module map for the compilation.
   for (const auto &F : CI.getFrontendOpts().ModulesEmbedFiles) {
@@ -969,7 +965,12 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     }
   }
 
-  // FIXME: lookup dirs recursively
+  bool ProcessesSummaries = !CI.getFrontendOpts().SummaryDirPath.empty() ||
+                            !CI.getFrontendOpts().SummaryFile.empty();
+  if (ProcessesSummaries && !CI.hasSummaryContext())
+    CI.createSummaryContext();
+
+  // FIXME: cleanup and lookup dirs recursively
   if (!CI.getFrontendOpts().SummaryDirPath.empty()) {
     FileManager &FileMgr = CI.getFileManager();
 
@@ -1371,12 +1372,10 @@ void ASTFrontendAction::ExecuteAction() {
   if (CI.hasCodeCompletionConsumer())
     CompletionConsumer = &CI.getCodeCompletionConsumer();
 
-  if (!CI.hasSummaryContext()) {
-    CI.createSummaryContext();
-  }
-  CI.createSummaryConsumer();
+  if (!CI.getFrontendOpts().SummaryFile.empty())
+    CI.createSummaryConsumer();
 
-  // Use a code completion consumer?
+  // Use a code summary consumer?
   SummaryConsumer *SummaryConsumer = nullptr;
   if (CI.hasSummaryConsumer())
     SummaryConsumer = &CI.getSummaryConsumer();

>From f511c22187178d637b184d15fb2effaf1c08dd19 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 02:21:29 +0200
Subject: [PATCH 18/48] [clang][Summary] explicitly flush summary

---
 clang/include/clang/Sema/SummaryConsumer.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Sema/SummaryConsumer.h b/clang/include/clang/Sema/SummaryConsumer.h
index f8844ffb64e21..3d308b8464ef8 100644
--- a/clang/include/clang/Sema/SummaryConsumer.h
+++ b/clang/include/clang/Sema/SummaryConsumer.h
@@ -35,7 +35,10 @@ class JSONPrintingSummaryConsumer : public PrintingSummaryConsumer {
 
   void ProcessStartOfSourceFile() override { JOS.arrayBegin(); };
   void ProcessFunctionSummary(const FunctionSummary &) override;
-  void ProcessEndOfSourceFile() override { JOS.arrayEnd(); };
+  void ProcessEndOfSourceFile() override {
+    JOS.arrayEnd();
+    JOS.flush();
+  };
 };
 } // namespace clang
 

>From 6964f2c54db47c9ab748af19a55f4af761ed786b Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 03:00:33 +0200
Subject: [PATCH 19/48] [clang][analyzer][Summary] pass summaries to the
 analyzer

---
 .../StaticAnalyzer/Core/PathSensitive/ExprEngine.h    | 11 +++++++----
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp          |  5 +++--
 .../lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp  |  7 ++++++-
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index b8a4dcbc727a6..5e3b338be6e68 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -21,18 +21,19 @@
 #include "clang/Analysis/DomainSpecific/ObjCNoReturn.h"
 #include "clang/Analysis/ProgramPoint.h"
 #include "clang/Basic/LLVM.h"
-#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/Sema/SummaryContext.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/WorkList.h"
 #include "llvm/ADT/ArrayRef.h"
 #include <cassert>
@@ -178,10 +179,12 @@ class ExprEngine {
   /// The flag, which specifies the mode of inlining for the engine.
   InliningModes HowToInline;
 
+  const SummaryContext *SummaryCtx;
+
 public:
   ExprEngine(cross_tu::CrossTranslationUnitContext &CTU, AnalysisManager &mgr,
-             SetOfConstDecls *VisitedCalleesIn,
-             FunctionSummariesTy *FS, InliningModes HowToInlineIn);
+             SetOfConstDecls *VisitedCalleesIn, FunctionSummariesTy *FS,
+             InliningModes HowToInlineIn, const SummaryContext *SummaryCtx);
 
   virtual ~ExprEngine() = default;
 
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 1afd4b52eb354..c4836fb9d0aac 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -223,7 +223,8 @@ static const char* TagProviderName = "ExprEngine";
 
 ExprEngine::ExprEngine(cross_tu::CrossTranslationUnitContext &CTU,
                        AnalysisManager &mgr, SetOfConstDecls *VisitedCalleesIn,
-                       FunctionSummariesTy *FS, InliningModes HowToInlineIn)
+                       FunctionSummariesTy *FS, InliningModes HowToInlineIn,
+                       const SummaryContext *SummaryCtx)
     : CTU(CTU), IsCTUEnabled(mgr.getAnalyzerOptions().IsNaiveCTUEnabled),
       AMgr(mgr), AnalysisDeclContexts(mgr.getAnalysisDeclContextManager()),
       Engine(*this, FS, mgr.getAnalyzerOptions()), G(Engine.getGraph()),
@@ -232,7 +233,7 @@ ExprEngine::ExprEngine(cross_tu::CrossTranslationUnitContext &CTU,
       SymMgr(StateMgr.getSymbolManager()), MRMgr(StateMgr.getRegionManager()),
       svalBuilder(StateMgr.getSValBuilder()), ObjCNoRet(mgr.getASTContext()),
       BR(mgr, *this), VisitedCallees(VisitedCalleesIn),
-      HowToInline(HowToInlineIn) {
+      HowToInline(HowToInlineIn), SummaryCtx(SummaryCtx) {
   unsigned TrimInterval = mgr.options.GraphTrimInterval;
   if (TrimInterval != 0) {
     // Enable eager node reclamation when constructing the ExplodedGraph.
diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index 491aa93c96e49..b87014e4dcd00 100644
--- a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -93,6 +93,7 @@ class AnalysisConsumer : public AnalysisASTConsumer,
   ArrayRef<std::string> Plugins;
   std::unique_ptr<CodeInjector> Injector;
   cross_tu::CrossTranslationUnitContext CTU;
+  const SummaryContext *SummaryCtx;
 
   /// Stores the declarations from the local translation unit.
   /// Note, we pre-compute the local declarations at parse time as an
@@ -152,6 +153,9 @@ class AnalysisConsumer : public AnalysisASTConsumer,
     if (Opts.ShouldDisplayMacroExpansions)
       MacroExpansions.registerForPreprocessor(PP);
 
+    if (CI.hasSummaryContext())
+      SummaryCtx = &CI.getSummaryContext();
+
     // Visitor options.
     ShouldWalkTypesOfTypeLocs = false;
   }
@@ -761,7 +765,8 @@ void AnalysisConsumer::RunPathSensitiveChecks(Decl *D,
   if (!Mgr->getAnalysisDeclContext(D)->getAnalysis<RelaxedLiveVariables>())
     return;
 
-  ExprEngine Eng(CTU, *Mgr, VisitedCallees, &FunctionSummaries, IMode);
+  ExprEngine Eng(CTU, *Mgr, VisitedCallees, &FunctionSummaries, IMode,
+                 SummaryCtx);
 
   // Execute the worklist algorithm.
   llvm::TimeRecord ExprEngineStartTime;

>From 81b039f8abc76f83583a809bc9e363122062305f Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 03:47:15 +0200
Subject: [PATCH 20/48] [analyzer] don't invalidate global regions if a
 function doesn't write them

---
 clang/include/clang/Sema/SummaryContext.h     |  1 +
 .../Core/PathSensitive/ExprEngine.h           |  2 ++
 clang/lib/Sema/SummaryContext.cpp             |  5 +++++
 clang/lib/StaticAnalyzer/Core/CallEvent.cpp   | 19 +++++++++++++++----
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Sema/SummaryContext.h b/clang/include/clang/Sema/SummaryContext.h
index 67b1162d61763..2675a85583d69 100644
--- a/clang/include/clang/Sema/SummaryContext.h
+++ b/clang/include/clang/Sema/SummaryContext.h
@@ -42,6 +42,7 @@ class SummaryContext {
 public:
   SummaryContext();
 
+  const SummaryAttribute *GetAttribute(SummaryAttributeKind kind) const;
   const FunctionSummary *GetSummary(const FunctionDecl *FD) const;
   void SummarizeFunctionBody(const FunctionDecl *FD);
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index 5e3b338be6e68..68a8004a8ae26 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -217,6 +217,8 @@ class ExprEngine {
     return &CTU;
   }
 
+  const SummaryContext *getSummaryCtx() { return SummaryCtx; }
+
   const NodeBuilderContext &getBuilderContext() {
     assert(currBldrCtx);
     return *currBldrCtx;
diff --git a/clang/lib/Sema/SummaryContext.cpp b/clang/lib/Sema/SummaryContext.cpp
index 6b7207eedb5e5..50771d2da0963 100644
--- a/clang/lib/Sema/SummaryContext.cpp
+++ b/clang/lib/Sema/SummaryContext.cpp
@@ -65,6 +65,11 @@ void SummaryContext::CreateSummary(SmallVector<char> ID,
   IDToSummary[SummaryPtr->getID()] = SummaryPtr;
 }
 
+const SummaryAttribute *
+SummaryContext::GetAttribute(SummaryAttributeKind kind) const {
+  return KindToAttribute.at(kind);
+}
+
 const FunctionSummary *
 SummaryContext::GetSummary(const FunctionDecl *FD) const {
   auto USR = GetUSR(FD);
diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
index 583315f4f3a90..aa9a236ead9a3 100644
--- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -277,13 +277,24 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
             ValuesToInvalidate.push_back(loc::MemRegionVal(TVR));
   }
 
+  bool ShouldPreserveGlobals = false;
+  const SummaryContext *SummaryCtx =
+      State->getStateManager().getOwningEngine().getSummaryCtx();
+  if (SummaryCtx) {
+    const auto *Summary =
+        SummaryCtx->GetSummary(llvm::dyn_cast<FunctionDecl>(getDecl()));
+    ShouldPreserveGlobals =
+        Summary && Summary->getAttributes().count(
+                       SummaryCtx->GetAttribute(NO_WRITE_GLOBAL));
+  }
+
   // Invalidate designated regions using the batch invalidation API.
   // NOTE: Even if RegionsToInvalidate is empty, we may still invalidate
   //  global variables.
-  return Result->invalidateRegions(ValuesToInvalidate, getCFGElementRef(),
-                                   BlockCount, getLocationContext(),
-                                   /*CausedByPointerEscape*/ true,
-                                   /*Symbols=*/nullptr, this, &ETraits);
+  return Result->invalidateRegions(
+      ValuesToInvalidate, getCFGElementRef(), BlockCount, getLocationContext(),
+      /*CausedByPointerEscape*/ true,
+      /*Symbols=*/nullptr, ShouldPreserveGlobals ? nullptr : this, &ETraits);
 }
 
 ProgramPoint CallEvent::getProgramPoint(bool IsPreVisit,

>From 016dcdb4707a0b8369ca0ef6a0fe788d1fb2f78f Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 14:50:08 +0200
Subject: [PATCH 21/48] [clang][Summary] refactor summary attributes

---
 clang/include/clang/Sema/SummaryAttribute.h | 36 +++++++++++++--------
 clang/include/clang/Sema/SummaryContext.h   | 30 ++++++++++-------
 clang/lib/Sema/SummaryContext.cpp           | 35 +++++++++-----------
 clang/lib/StaticAnalyzer/Core/CallEvent.cpp |  9 +++---
 4 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/clang/include/clang/Sema/SummaryAttribute.h b/clang/include/clang/Sema/SummaryAttribute.h
index 0014a9878bacd..b3b8d452dfd18 100644
--- a/clang/include/clang/Sema/SummaryAttribute.h
+++ b/clang/include/clang/Sema/SummaryAttribute.h
@@ -3,37 +3,40 @@
 
 #include "clang/AST/Decl.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
-#include <string>
 
 namespace clang {
-enum SummaryAttributeKind {
+enum SummaryAttrKind {
   NO_WRITE_GLOBAL,
 };
 
 class FunctionSummary;
+class SummaryContext;
 
-class SummaryAttribute {
-  const SummaryAttributeKind Kind;
-  std::string_view Serialzed;
+class SummaryAttr {
+  const SummaryAttrKind Kind;
+  const char *Spelling;
+
+protected:
+  SummaryAttr(SummaryAttrKind Kind, const char *Spelling)
+      : Kind(Kind), Spelling(Spelling){};
 
 public:
-  SummaryAttribute(SummaryAttributeKind Attr, const char *Str)
-      : Kind(Attr), Serialzed(Str) {}
-  virtual ~SummaryAttribute() = default;
+  virtual ~SummaryAttr() = default;
 
-  SummaryAttributeKind getKind() { return Kind; }
+  SummaryAttrKind getKind() const { return Kind; }
+  const char *getSpelling() const { return Spelling; }
 
   virtual bool infer(const FunctionDecl *FD) const = 0;
   virtual bool merge(const FunctionSummary &Caller,
                      const FunctionSummary &Callee) const = 0;
 
-  virtual std::string serialize() const { return std::string(Serialzed); };
+  virtual std::string serialize() const { return std::string(Spelling); };
   virtual bool parse(std::string_view input) const {
-    return input == Serialzed;
+    return input == Spelling;
   };
 };
 
-class NoWriteGlobalAttr : public SummaryAttribute {
+class NoWriteGlobalAttr : public SummaryAttr {
   class Callback : public ast_matchers::MatchFinder::MatchCallback {
   public:
     bool WriteGlobal = false;
@@ -42,12 +45,17 @@ class NoWriteGlobalAttr : public SummaryAttribute {
     run(const ast_matchers::MatchFinder::MatchResult &Result) override final;
   };
 
-public:
-  NoWriteGlobalAttr() : SummaryAttribute(NO_WRITE_GLOBAL, "no_write_global") {}
+  NoWriteGlobalAttr() : SummaryAttr(NO_WRITE_GLOBAL, "no_write_global") {}
 
+public:
   bool infer(const FunctionDecl *FD) const override final;
   bool merge(const FunctionSummary &Caller,
              const FunctionSummary &Callee) const override final;
+
+  static bool classof(const SummaryAttr *A) {
+    return A->getKind() == NO_WRITE_GLOBAL;
+  }
+  friend class SummaryContext;
 };
 } // namespace clang
 
diff --git a/clang/include/clang/Sema/SummaryContext.h b/clang/include/clang/Sema/SummaryContext.h
index 2675a85583d69..c142484b131dc 100644
--- a/clang/include/clang/Sema/SummaryContext.h
+++ b/clang/include/clang/Sema/SummaryContext.h
@@ -8,21 +8,27 @@
 namespace clang {
 class FunctionSummary {
   SmallVector<char> ID;
-  std::set<const SummaryAttribute *> Attrs;
+  std::set<const SummaryAttr *> Attrs;
   std::set<SmallVector<char>> Calls;
 
 public:
-  FunctionSummary(SmallVector<char> ID,
-                  std::set<const SummaryAttribute *> Attrs,
+  FunctionSummary(SmallVector<char> ID, std::set<const SummaryAttr *> Attrs,
                   std::set<SmallVector<char>> Calls);
 
   SmallVector<char> getID() const { return ID; }
-  const std::set<const SummaryAttribute *> &getAttributes() const {
-    return Attrs;
-  }
+  const std::set<const SummaryAttr *> &getAttributes() const { return Attrs; }
   const std::set<SmallVector<char>> &getCalls() const { return Calls; }
 
-  void replaceAttributes(std::set<const SummaryAttribute *> Attrs) {
+  template <typename T> bool hasAttribute() const {
+    for (auto &&attr : Attrs) {
+      if (llvm::isa<T>(attr))
+        return true;
+    }
+
+    return false;
+  }
+
+  void replaceAttributes(std::set<const SummaryAttr *> Attrs) {
     this->Attrs = std::move(Attrs);
   }
 };
@@ -31,18 +37,18 @@ class SummaryContext {
   std::map<SmallVector<char>, const FunctionSummary *> IDToSummary;
   std::vector<std::unique_ptr<FunctionSummary>> FunctionSummaries;
 
-  std::map<SummaryAttributeKind, const SummaryAttribute *> KindToAttribute;
-  std::vector<std::unique_ptr<SummaryAttribute>> Attributes;
+  std::map<SummaryAttrKind, const SummaryAttr *> KindToAttribute;
+  std::vector<std::unique_ptr<SummaryAttr>> Attributes;
 
-  void CreateSummary(SmallVector<char> ID,
-                     std::set<const SummaryAttribute *> Attrs,
+  void CreateSummary(SmallVector<char> ID, std::set<const SummaryAttr *> Attrs,
                      std::set<SmallVector<char>> Calls);
   bool ReduceFunctionSummary(FunctionSummary &FunctionSummary);
 
+  template <typename T> void registerAttr();
+
 public:
   SummaryContext();
 
-  const SummaryAttribute *GetAttribute(SummaryAttributeKind kind) const;
   const FunctionSummary *GetSummary(const FunctionDecl *FD) const;
   void SummarizeFunctionBody(const FunctionDecl *FD);
 
diff --git a/clang/lib/Sema/SummaryContext.cpp b/clang/lib/Sema/SummaryContext.cpp
index 50771d2da0963..b4e49451030c0 100644
--- a/clang/lib/Sema/SummaryContext.cpp
+++ b/clang/lib/Sema/SummaryContext.cpp
@@ -40,24 +40,26 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
 };
 } // namespace
 
-FunctionSummary::FunctionSummary(
-    SmallVector<char> ID, std::set<const SummaryAttribute *> FunctionAttrs,
-    std::set<SmallVector<char>> Calls)
+FunctionSummary::FunctionSummary(SmallVector<char> ID,
+                                 std::set<const SummaryAttr *> FunctionAttrs,
+                                 std::set<SmallVector<char>> Calls)
     : ID(std::move(ID)), Attrs(std::move(FunctionAttrs)),
       Calls(std::move(Calls)) {}
 
-SummaryContext::SummaryContext() {
-  Attributes.emplace_back(std::make_unique<NoWriteGlobalAttr>());
+template <typename T> void SummaryContext::registerAttr() {
+  std::unique_ptr<T> attr(new T());
+  SummaryAttrKind Kind = attr->getKind();
 
-  for (auto &&Attr : Attributes) {
-    assert(KindToAttribute.count(Attr->getKind()) == 0 &&
-           "Attr already registered");
-    KindToAttribute[Attr->getKind()] = Attr.get();
-  }
+  if (KindToAttribute.count(Kind))
+    return;
+
+  KindToAttribute[Kind] = Attributes.emplace_back(std::move(attr)).get();
 }
 
+SummaryContext::SummaryContext() { registerAttr<NoWriteGlobalAttr>(); }
+
 void SummaryContext::CreateSummary(SmallVector<char> ID,
-                                   std::set<const SummaryAttribute *> Attrs,
+                                   std::set<const SummaryAttr *> Attrs,
                                    std::set<SmallVector<char>> Calls) {
   auto Summary = std::make_unique<FunctionSummary>(
       std::move(ID), std::move(Attrs), std::move(Calls));
@@ -65,11 +67,6 @@ void SummaryContext::CreateSummary(SmallVector<char> ID,
   IDToSummary[SummaryPtr->getID()] = SummaryPtr;
 }
 
-const SummaryAttribute *
-SummaryContext::GetAttribute(SummaryAttributeKind kind) const {
-  return KindToAttribute.at(kind);
-}
-
 const FunctionSummary *
 SummaryContext::GetSummary(const FunctionDecl *FD) const {
   auto USR = GetUSR(FD);
@@ -77,7 +74,7 @@ SummaryContext::GetSummary(const FunctionDecl *FD) const {
 }
 
 void SummaryContext::SummarizeFunctionBody(const FunctionDecl *FD) {
-  std::set<const SummaryAttribute *> Attrs;
+  std::set<const SummaryAttr *> Attrs;
 
   for (auto &&Attr : Attributes) {
     if (Attr->infer(FD))
@@ -92,7 +89,7 @@ void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
     const llvm::json::Object *FunctionSummary = it->getAsObject();
 
     SmallString<128> ID(*FunctionSummary->getString("id"));
-    std::set<const SummaryAttribute *> FunctionAttrs;
+    std::set<const SummaryAttr *> FunctionAttrs;
     const llvm::json::Array *FunctionAttributes =
         FunctionSummary->getObject("attrs")->getArray("function");
     for(auto attrIt = FunctionAttributes->begin(); attrIt != FunctionAttributes->end(); ++attrIt) {
@@ -117,7 +114,7 @@ bool SummaryContext::ReduceFunctionSummary(FunctionSummary &Function) {
   bool changed = false;
 
   for (auto &&call : Function.getCalls()) {
-    std::set<const SummaryAttribute *> reducedAttrs;
+    std::set<const SummaryAttr *> reducedAttrs;
 
     // If we don't have a summary about a called function, we forget
     // everything about the current one as well.
diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
index aa9a236ead9a3..a4885f62be627 100644
--- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -280,12 +280,11 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
   bool ShouldPreserveGlobals = false;
   const SummaryContext *SummaryCtx =
       State->getStateManager().getOwningEngine().getSummaryCtx();
-  if (SummaryCtx) {
-    const auto *Summary =
-        SummaryCtx->GetSummary(llvm::dyn_cast<FunctionDecl>(getDecl()));
+  const FunctionDecl *FD = llvm::dyn_cast<FunctionDecl>(getDecl());
+  if (SummaryCtx && FD) {
+    const auto *Summary = SummaryCtx->GetSummary(FD);
     ShouldPreserveGlobals =
-        Summary && Summary->getAttributes().count(
-                       SummaryCtx->GetAttribute(NO_WRITE_GLOBAL));
+        Summary && Summary->hasAttribute<NoWriteGlobalAttr>();
   }
 
   // Invalidate designated regions using the batch invalidation API.

>From 115584471eb25cd5211d8bda0854c17b2bf3eee4 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 14:57:16 +0200
Subject: [PATCH 22/48] [Summary] move the ast matcher callback out of the
 attribute declaration

---
 clang/include/clang/Sema/SummaryAttribute.h |  8 -------
 clang/lib/Sema/SummaryAttribute.cpp         | 25 ++++++++++++---------
 2 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/clang/include/clang/Sema/SummaryAttribute.h b/clang/include/clang/Sema/SummaryAttribute.h
index b3b8d452dfd18..27ceede1e4486 100644
--- a/clang/include/clang/Sema/SummaryAttribute.h
+++ b/clang/include/clang/Sema/SummaryAttribute.h
@@ -37,14 +37,6 @@ class SummaryAttr {
 };
 
 class NoWriteGlobalAttr : public SummaryAttr {
-  class Callback : public ast_matchers::MatchFinder::MatchCallback {
-  public:
-    bool WriteGlobal = false;
-
-    void
-    run(const ast_matchers::MatchFinder::MatchResult &Result) override final;
-  };
-
   NoWriteGlobalAttr() : SummaryAttr(NO_WRITE_GLOBAL, "no_write_global") {}
 
 public:
diff --git a/clang/lib/Sema/SummaryAttribute.cpp b/clang/lib/Sema/SummaryAttribute.cpp
index 86d422c16fa30..6bb6b938184de 100644
--- a/clang/lib/Sema/SummaryAttribute.cpp
+++ b/clang/lib/Sema/SummaryAttribute.cpp
@@ -2,19 +2,24 @@
 #include "clang/Sema/SummaryContext.h"
 
 namespace clang {
-void NoWriteGlobalAttr::Callback::run(
-    const ast_matchers::MatchFinder::MatchResult &Result) {
-  const auto *Assignment = Result.Nodes.getNodeAs<BinaryOperator>("assignment");
-  if (!Assignment)
-    return;
-
-  WriteGlobal = true;
-}
-
 bool NoWriteGlobalAttr::infer(const FunctionDecl *FD) const {
   using namespace ast_matchers;
   MatchFinder Finder;
-  Callback CB;
+
+  class Callback : public ast_matchers::MatchFinder::MatchCallback {
+  public:
+    bool WriteGlobal = false;
+
+    void
+    run(const ast_matchers::MatchFinder::MatchResult &Result) override final {
+      const auto *Assignment =
+          Result.Nodes.getNodeAs<BinaryOperator>("assignment");
+      if (!Assignment)
+        return;
+
+      WriteGlobal = true;
+    }
+  } CB;
 
   Finder.addMatcher(
       functionDecl(forEachDescendant(

>From 49937a6570ff49042b72a286896647dcdd926cb9 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 15:07:56 +0200
Subject: [PATCH 23/48] [clang] don't crash if there is no summary consumer

---
 clang/lib/Frontend/CompilerInstance.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 448d45b0f49db..9682a01fab226 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -764,7 +764,8 @@ void CompilerInstance::createSema(TranslationUnitKind TUKind,
                                   CodeCompleteConsumer *CompletionConsumer,
                                   SummaryConsumer *SummaryConsumer) {
   TheSema.reset(new Sema(getPreprocessor(), getASTContext(), getASTConsumer(),
-                         TUKind, CompletionConsumer, &getSummaryContext(),
+                         TUKind, CompletionConsumer,
+                         hasSummaryContext() ? &getSummaryContext() : nullptr,
                          SummaryConsumer));
 
   // Set up API notes.

>From 2499ff2e7f92537a5617f7c91a6dfb14e0c2e179 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 15:50:54 +0200
Subject: [PATCH 24/48] [Driver][Summary] implement emitting summary next to
 the object file

---
 clang/lib/Driver/ToolChains/Clang.cpp | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 48b19615ab08f..f61bb983900f4 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5473,7 +5473,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.getLastArg(options::OPT_summaries_dir_EQ))
     Args.AddLastArg(CmdArgs, options::OPT_summaries_dir_EQ);
 
-  // FIXME: This needs to be cleaned up and needs proper error handling as well.
   if (const Arg *A = Args.getLastArg(options::OPT_emit_summaries_EQ)) {
     llvm::SmallString<10> input;
     for (const auto &II : Inputs) {
@@ -5485,14 +5484,21 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
 
     if (!input.empty()) {
-      if (A->containsValue("cwd")) {
-        llvm::SmallString<10> filename = llvm::sys::path::filename(input);
-        llvm::sys::path::replace_extension(filename, "json");
+      Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
+      StringRef filename = llvm::sys::path::filename(input);
+      llvm::SmallString<10> summaryFile;
+
+      if (A->containsValue("cwd") || !FinalOutput) {
+        summaryFile = filename;
+      } else if (A->containsValue("obj") && FinalOutput) {
+        summaryFile = llvm::sys::path::parent_path(FinalOutput->getValue());
+        llvm::sys::path::append(summaryFile, filename);
+      }
 
+      if (!summaryFile.empty()) {
+        llvm::sys::path::replace_extension(summaryFile, "json");
         CmdArgs.push_back(
-            Args.MakeArgString(Twine("-summary-file=") + filename));
-      } else if (A->containsValue("obj")) {
-        // FIXME: implement
+            Args.MakeArgString(Twine("-summary-file=") + summaryFile));
       }
     }
   }

>From d44d9d051426b72ebf9f2b0de1e7b11ef2ae5635 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 16:23:29 +0200
Subject: [PATCH 25/48] [Summary] move summary related logic into a separate
 library

---
 clang/include/clang/Frontend/CompilerInstance.h    |  2 +-
 .../StaticAnalyzer/Core/PathSensitive/ExprEngine.h |  2 +-
 .../clang/{Sema => Summary}/SummaryAttribute.h     |  6 +++---
 .../clang/{Sema => Summary}/SummaryConsumer.h      |  8 ++++----
 .../clang/{Sema => Summary}/SummaryContext.h       | 10 +++++-----
 clang/lib/CMakeLists.txt                           |  1 +
 clang/lib/Frontend/CompilerInstance.cpp            |  2 +-
 clang/lib/Frontend/FrontendAction.cpp              |  2 +-
 clang/lib/Sema/CMakeLists.txt                      |  4 +---
 clang/lib/Sema/Sema.cpp                            |  2 +-
 clang/lib/Sema/SemaDecl.cpp                        |  2 +-
 clang/lib/Summary/CMakeLists.txt                   | 14 ++++++++++++++
 clang/lib/{Sema => Summary}/SummaryAttribute.cpp   |  4 ++--
 clang/lib/{Sema => Summary}/SummaryConsumer.cpp    |  7 ++++---
 clang/lib/{Sema => Summary}/SummaryContext.cpp     | 12 +++++++-----
 15 files changed, 47 insertions(+), 31 deletions(-)
 rename clang/include/clang/{Sema => Summary}/SummaryAttribute.h (90%)
 rename clang/include/clang/{Sema => Summary}/SummaryConsumer.h (87%)
 rename clang/include/clang/{Sema => Summary}/SummaryContext.h (87%)
 create mode 100644 clang/lib/Summary/CMakeLists.txt
 rename clang/lib/{Sema => Summary}/SummaryAttribute.cpp (92%)
 rename clang/lib/{Sema => Summary}/SummaryConsumer.cpp (74%)
 rename clang/lib/{Sema => Summary}/SummaryContext.cpp (92%)

diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index 7cb82e587cdfe..4a15fbf042cce 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -19,7 +19,7 @@
 #include "clang/Lex/DependencyDirectivesScanner.h"
 #include "clang/Lex/HeaderSearchOptions.h"
 #include "clang/Lex/ModuleLoader.h"
-#include "clang/Sema/SummaryContext.h"
+#include "clang/Summary/SummaryContext.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index 68a8004a8ae26..e349dcbbfb9c4 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -21,7 +21,6 @@
 #include "clang/Analysis/DomainSpecific/ObjCNoReturn.h"
 #include "clang/Analysis/ProgramPoint.h"
 #include "clang/Basic/LLVM.h"
-#include "clang/Sema/SummaryContext.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
@@ -35,6 +34,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/WorkList.h"
+#include "clang/Summary/SummaryContext.h"
 #include "llvm/ADT/ArrayRef.h"
 #include <cassert>
 #include <optional>
diff --git a/clang/include/clang/Sema/SummaryAttribute.h b/clang/include/clang/Summary/SummaryAttribute.h
similarity index 90%
rename from clang/include/clang/Sema/SummaryAttribute.h
rename to clang/include/clang/Summary/SummaryAttribute.h
index 27ceede1e4486..46de04a8169eb 100644
--- a/clang/include/clang/Sema/SummaryAttribute.h
+++ b/clang/include/clang/Summary/SummaryAttribute.h
@@ -1,5 +1,5 @@
-#ifndef LLVM_CLANG_SEMA_SEMASUMMARYATTRIBUTE_H
-#define LLVM_CLANG_SEMA_SEMASUMMARYATTRIBUTE_H
+#ifndef LLVM_CLANG_SUMMARY_SUMMARYATTRIBUTE_H
+#define LLVM_CLANG_SUMMARY_SUMMARYATTRIBUTE_H
 
 #include "clang/AST/Decl.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
@@ -51,4 +51,4 @@ class NoWriteGlobalAttr : public SummaryAttr {
 };
 } // namespace clang
 
-#endif // LLVM_CLANG_SEMA_SEMASUMMARYATTRIBUTEH
+#endif // LLVM_CLANG_SUMMARY_SUMMARYATTRIBUTEH
diff --git a/clang/include/clang/Sema/SummaryConsumer.h b/clang/include/clang/Summary/SummaryConsumer.h
similarity index 87%
rename from clang/include/clang/Sema/SummaryConsumer.h
rename to clang/include/clang/Summary/SummaryConsumer.h
index 3d308b8464ef8..f0b14b3db7c79 100644
--- a/clang/include/clang/Sema/SummaryConsumer.h
+++ b/clang/include/clang/Summary/SummaryConsumer.h
@@ -1,5 +1,5 @@
-#ifndef LLVM_CLANG_SEMA_SUMMARYCONSUMER_H
-#define LLVM_CLANG_SEMA_SUMMARYCONSUMER_H
+#ifndef LLVM_CLANG_SUMMARY_SUMMARYCONSUMER_H
+#define LLVM_CLANG_SUMMARY_SUMMARYCONSUMER_H
 
 #include "clang/Basic/LLVM.h"
 #include "llvm/Support/JSON.h"
@@ -27,7 +27,7 @@ class PrintingSummaryConsumer : public SummaryConsumer {
 };
 
 class JSONPrintingSummaryConsumer : public PrintingSummaryConsumer {
-    llvm::json::OStream JOS;
+  llvm::json::OStream JOS;
 
 public:
   JSONPrintingSummaryConsumer(const SummaryContext &SummaryCtx, raw_ostream &OS)
@@ -42,4 +42,4 @@ class JSONPrintingSummaryConsumer : public PrintingSummaryConsumer {
 };
 } // namespace clang
 
-#endif // LLVM_CLANG_SEMA_SUMMARYCONSUMER_H
+#endif // LLVM_CLANG_SUMMARY_SUMMARYCONSUMER_H
diff --git a/clang/include/clang/Sema/SummaryContext.h b/clang/include/clang/Summary/SummaryContext.h
similarity index 87%
rename from clang/include/clang/Sema/SummaryContext.h
rename to clang/include/clang/Summary/SummaryContext.h
index c142484b131dc..194f89a4b4007 100644
--- a/clang/include/clang/Sema/SummaryContext.h
+++ b/clang/include/clang/Summary/SummaryContext.h
@@ -1,8 +1,8 @@
-#ifndef LLVM_CLANG_SEMA_SEMASUMMARYCONTEXT_H
-#define LLVM_CLANG_SEMA_SEMASUMMARYCONTEXT_H
+#ifndef LLVM_CLANG_SUMMARY_SUMMARYCONTEXT_H
+#define LLVM_CLANG_SUMMARY_SUMMARYCONTEXT_H
 
-#include "clang/Sema/SummaryAttribute.h"
-#include "clang/Sema/SummaryConsumer.h"
+#include "clang/Summary/SummaryAttribute.h"
+#include "clang/Summary/SummaryConsumer.h"
 #include <set>
 
 namespace clang {
@@ -57,4 +57,4 @@ class SummaryContext {
 };
 } // namespace clang
 
-#endif // LLVM_CLANG_SEMA_SEMASUMMARYCONTEXTH
+#endif // LLVM_CLANG_SUMMARY_SUMMARYCONTEXTH
diff --git a/clang/lib/CMakeLists.txt b/clang/lib/CMakeLists.txt
index 4f2218b583e41..96f0ccd7d1c88 100644
--- a/clang/lib/CMakeLists.txt
+++ b/clang/lib/CMakeLists.txt
@@ -9,6 +9,7 @@ add_subdirectory(CrossTU)
 add_subdirectory(Sema)
 add_subdirectory(CodeGen)
 add_subdirectory(Analysis)
+add_subdirectory(Summary)
 add_subdirectory(Edit)
 add_subdirectory(ExtractAPI)
 add_subdirectory(Rewrite)
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 9682a01fab226..c75a4056e2847 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -37,11 +37,11 @@
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Sema.h"
-#include "clang/Sema/SummaryConsumer.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
 #include "clang/Serialization/InMemoryModuleCache.h"
 #include "clang/Serialization/ModuleCache.h"
+#include "clang/Summary/SummaryConsumer.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 8e66d6b8a6c27..966c514e04a96 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -35,10 +35,10 @@
 #include "clang/Parse/ParseAST.h"
 #include "clang/Sema/HLSLExternalSemaSource.h"
 #include "clang/Sema/MultiplexExternalSemaSource.h"
-#include "clang/Sema/SummaryContext.h"
 #include "clang/Serialization/ASTDeserializationListener.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
+#include "clang/Summary/SummaryContext.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/BuryPointer.h"
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index 9d5a593813bd3..940456e15f9f8 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -98,9 +98,6 @@ add_clang_library(clangSema
   SemaType.cpp
   SemaWasm.cpp
   SemaX86.cpp
-  SummaryAttribute.cpp
-  SummaryConsumer.cpp
-  SummaryContext.cpp
   TypeLocBuilder.cpp
 
   DEPENDS
@@ -117,4 +114,5 @@ add_clang_library(clangSema
   clangEdit
   clangLex
   clangSupport
+  clangSummary
   )
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 2870871876701..5c5fb005a3172 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -68,10 +68,10 @@
 #include "clang/Sema/SemaSystemZ.h"
 #include "clang/Sema/SemaWasm.h"
 #include "clang/Sema/SemaX86.h"
-#include "clang/Sema/SummaryContext.h"
 #include "clang/Sema/TemplateDeduction.h"
 #include "clang/Sema/TemplateInstCallback.h"
 #include "clang/Sema/TypoCorrection.h"
+#include "clang/Summary/SummaryContext.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 1aa7a1cf178bd..740fccc3aad33 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -57,8 +57,8 @@
 #include "clang/Sema/SemaSYCL.h"
 #include "clang/Sema/SemaSwift.h"
 #include "clang/Sema/SemaWasm.h"
-#include "clang/Sema/SummaryContext.h"
 #include "clang/Sema/Template.h"
+#include "clang/Summary/SummaryContext.h"
 #include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
diff --git a/clang/lib/Summary/CMakeLists.txt b/clang/lib/Summary/CMakeLists.txt
new file mode 100644
index 0000000000000..269d09cb10d16
--- /dev/null
+++ b/clang/lib/Summary/CMakeLists.txt
@@ -0,0 +1,14 @@
+set(LLVM_LINK_COMPONENTS
+  Core
+  Support
+  )
+
+add_clang_library(clangSummary
+  SummaryAttribute.cpp
+  SummaryConsumer.cpp
+  SummaryContext.cpp
+
+  LINK_LIBS
+  clangAST
+  clangSupport
+  )
diff --git a/clang/lib/Sema/SummaryAttribute.cpp b/clang/lib/Summary/SummaryAttribute.cpp
similarity index 92%
rename from clang/lib/Sema/SummaryAttribute.cpp
rename to clang/lib/Summary/SummaryAttribute.cpp
index 6bb6b938184de..c9a69140c5494 100644
--- a/clang/lib/Sema/SummaryAttribute.cpp
+++ b/clang/lib/Summary/SummaryAttribute.cpp
@@ -1,5 +1,5 @@
-#include "clang/Sema/SummaryAttribute.h"
-#include "clang/Sema/SummaryContext.h"
+#include "clang/Summary/SummaryAttribute.h"
+#include "clang/Summary/SummaryContext.h"
 
 namespace clang {
 bool NoWriteGlobalAttr::infer(const FunctionDecl *FD) const {
diff --git a/clang/lib/Sema/SummaryConsumer.cpp b/clang/lib/Summary/SummaryConsumer.cpp
similarity index 74%
rename from clang/lib/Sema/SummaryConsumer.cpp
rename to clang/lib/Summary/SummaryConsumer.cpp
index 043873f236b93..716248ffc9e24 100644
--- a/clang/lib/Sema/SummaryConsumer.cpp
+++ b/clang/lib/Summary/SummaryConsumer.cpp
@@ -1,8 +1,9 @@
-#include "clang/Sema/SummaryConsumer.h"
-#include "clang/Sema/SummaryContext.h"
+#include "clang/Summary/SummaryConsumer.h"
+#include "clang/Summary/SummaryContext.h"
 
 namespace clang {
-void JSONPrintingSummaryConsumer::ProcessFunctionSummary(const FunctionSummary &Summary) {
+void JSONPrintingSummaryConsumer::ProcessFunctionSummary(
+    const FunctionSummary &Summary) {
   JOS.object([&] {
     JOS.attribute("id", llvm::json::Value(Summary.getID()));
     JOS.attributeObject("attrs", [&] {
diff --git a/clang/lib/Sema/SummaryContext.cpp b/clang/lib/Summary/SummaryContext.cpp
similarity index 92%
rename from clang/lib/Sema/SummaryContext.cpp
rename to clang/lib/Summary/SummaryContext.cpp
index b4e49451030c0..6cb4b7a716200 100644
--- a/clang/lib/Sema/SummaryContext.cpp
+++ b/clang/lib/Summary/SummaryContext.cpp
@@ -1,8 +1,8 @@
-#include "clang/Sema/SummaryContext.h"
+#include "clang/Summary/SummaryContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Index/USRGeneration.h"
-#include "clang/Sema/SummaryAttribute.h"
-#include "clang/Sema/SummaryConsumer.h"
+#include "clang/Summary/SummaryAttribute.h"
+#include "clang/Summary/SummaryConsumer.h"
 #include <set>
 
 namespace clang {
@@ -92,7 +92,8 @@ void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
     std::set<const SummaryAttr *> FunctionAttrs;
     const llvm::json::Array *FunctionAttributes =
         FunctionSummary->getObject("attrs")->getArray("function");
-    for(auto attrIt = FunctionAttributes->begin(); attrIt != FunctionAttributes->end(); ++attrIt) {
+    for (auto attrIt = FunctionAttributes->begin();
+         attrIt != FunctionAttributes->end(); ++attrIt) {
       for (auto &&Attr : Attributes) {
         if (Attr->parse(*attrIt->getAsString()))
           FunctionAttrs.emplace(Attr.get());
@@ -101,7 +102,8 @@ void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
 
     std::set<SmallVector<char>> Calls;
     const llvm::json::Array *CallEntries = FunctionSummary->getArray("calls");
-    for(auto callIt = CallEntries->begin(); callIt != CallEntries->end(); ++callIt) {
+    for (auto callIt = CallEntries->begin(); callIt != CallEntries->end();
+         ++callIt) {
       auto *Obj = callIt->getAsObject();
       Calls.emplace(SmallString<128>(*Obj->getString("id")));
     }

>From 9e94174cf526adf00825311b5cb8f69a1baff69b Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 21:01:39 +0200
Subject: [PATCH 26/48] link clangSummary against clangIndex

---
 clang/lib/Summary/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Summary/CMakeLists.txt b/clang/lib/Summary/CMakeLists.txt
index 269d09cb10d16..a8d7d065d11c2 100644
--- a/clang/lib/Summary/CMakeLists.txt
+++ b/clang/lib/Summary/CMakeLists.txt
@@ -11,4 +11,5 @@ add_clang_library(clangSummary
   LINK_LIBS
   clangAST
   clangSupport
+  clangIndex
   )

>From 41c286bb417cd053d351d1710e3195dbac5f73a7 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 21:05:25 +0200
Subject: [PATCH 27/48] format

---
 clang/include/clang/Summary/SummaryAttribute.h | 2 +-
 clang/include/clang/Summary/SummaryConsumer.h  | 6 +++---
 clang/lib/Frontend/CompilerInstance.cpp        | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Summary/SummaryAttribute.h b/clang/include/clang/Summary/SummaryAttribute.h
index 46de04a8169eb..a736305b25f5b 100644
--- a/clang/include/clang/Summary/SummaryAttribute.h
+++ b/clang/include/clang/Summary/SummaryAttribute.h
@@ -18,7 +18,7 @@ class SummaryAttr {
 
 protected:
   SummaryAttr(SummaryAttrKind Kind, const char *Spelling)
-      : Kind(Kind), Spelling(Spelling){};
+      : Kind(Kind), Spelling(Spelling) {};
 
 public:
   virtual ~SummaryAttr() = default;
diff --git a/clang/include/clang/Summary/SummaryConsumer.h b/clang/include/clang/Summary/SummaryConsumer.h
index f0b14b3db7c79..a9f8abb78aff9 100644
--- a/clang/include/clang/Summary/SummaryConsumer.h
+++ b/clang/include/clang/Summary/SummaryConsumer.h
@@ -15,9 +15,9 @@ class SummaryConsumer {
   SummaryConsumer(const SummaryContext &SummaryCtx) : SummaryCtx(&SummaryCtx) {}
   virtual ~SummaryConsumer() = default;
 
-  virtual void ProcessStartOfSourceFile(){};
-  virtual void ProcessFunctionSummary(const FunctionSummary &){};
-  virtual void ProcessEndOfSourceFile(){};
+  virtual void ProcessStartOfSourceFile() {};
+  virtual void ProcessFunctionSummary(const FunctionSummary &) {};
+  virtual void ProcessEndOfSourceFile() {};
 };
 
 class PrintingSummaryConsumer : public SummaryConsumer {
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index c75a4056e2847..634e5234a5912 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -743,8 +743,8 @@ CompilerInstance::createCodeCompletionConsumer(Preprocessor &PP,
 }
 
 void CompilerInstance::createSummaryConsumer() {
-  const std::string& SummaryFile = getFrontendOpts().SummaryFile;
-  if(SummaryFile.empty())
+  const std::string &SummaryFile = getFrontendOpts().SummaryFile;
+  if (SummaryFile.empty())
     return;
 
   std::error_code EC;

>From 103956b7d9caafe46b2fa45fad1173f4bb77e54a Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 21:55:29 +0200
Subject: [PATCH 28/48] make the summary context in the expression engine
 nullptr by default

---
 .../clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h       | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index e349dcbbfb9c4..ac010e424da40 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -184,7 +184,8 @@ class ExprEngine {
 public:
   ExprEngine(cross_tu::CrossTranslationUnitContext &CTU, AnalysisManager &mgr,
              SetOfConstDecls *VisitedCalleesIn, FunctionSummariesTy *FS,
-             InliningModes HowToInlineIn, const SummaryContext *SummaryCtx);
+             InliningModes HowToInlineIn,
+             const SummaryContext *SummaryCtx = nullptr);
 
   virtual ~ExprEngine() = default;
 

>From 8a5a967ba801aa81e1f4c039224da7ec4afcbb19 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 22:33:28 +0200
Subject: [PATCH 29/48] initialize SummaryCtx to nullptr in AnalysisConsumer

---
 clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index b87014e4dcd00..06b7f434187df 100644
--- a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -128,7 +128,7 @@ class AnalysisConsumer : public AnalysisASTConsumer,
                    std::unique_ptr<CodeInjector> injector)
       : RecVisitorMode(0), RecVisitorBR(nullptr), Ctx(nullptr),
         PP(CI.getPreprocessor()), OutDir(outdir), Opts(opts), Plugins(plugins),
-        Injector(std::move(injector)), CTU(CI),
+        Injector(std::move(injector)), CTU(CI), SummaryCtx(nullptr),
         MacroExpansions(CI.getLangOpts()) {
     EntryPointStat::lockRegistry();
     DigestAnalyzerOptions();

>From f71dd84730578ed4850fbfe4171bbfad5df23dc3 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 14 Jun 2025 23:01:58 +0200
Subject: [PATCH 30/48] handle when call event doesn't have a decl

---
 clang/lib/StaticAnalyzer/Core/CallEvent.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
index a4885f62be627..2e42a8c06c2b4 100644
--- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -280,7 +280,7 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
   bool ShouldPreserveGlobals = false;
   const SummaryContext *SummaryCtx =
       State->getStateManager().getOwningEngine().getSummaryCtx();
-  const FunctionDecl *FD = llvm::dyn_cast<FunctionDecl>(getDecl());
+  const FunctionDecl *FD = llvm::dyn_cast_or_null<FunctionDecl>(getDecl());
   if (SummaryCtx && FD) {
     const auto *Summary = SummaryCtx->GetSummary(FD);
     ShouldPreserveGlobals =

>From daccc5088ee32162a452ed71362ca2dc98fbc8ee Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Thu, 19 Jun 2025 01:32:34 +0200
Subject: [PATCH 31/48] [Summary] flag functions that call virtual funtions and
 non-functions

---
 .../include/clang/Summary/SummaryAttribute.h  |  4 +-
 clang/include/clang/Summary/SummaryContext.h  |  6 ++-
 clang/lib/Summary/SummaryAttribute.cpp        |  6 +--
 clang/lib/Summary/SummaryConsumer.cpp         | 11 ++--
 clang/lib/Summary/SummaryContext.cpp          | 51 ++++++++++++-------
 5 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Summary/SummaryAttribute.h b/clang/include/clang/Summary/SummaryAttribute.h
index a736305b25f5b..4a9ebb0a684a8 100644
--- a/clang/include/clang/Summary/SummaryAttribute.h
+++ b/clang/include/clang/Summary/SummaryAttribute.h
@@ -28,7 +28,7 @@ class SummaryAttr {
 
   virtual bool infer(const FunctionDecl *FD) const = 0;
   virtual bool merge(const FunctionSummary &Caller,
-                     const FunctionSummary &Callee) const = 0;
+                     const FunctionSummary *Callee) const = 0;
 
   virtual std::string serialize() const { return std::string(Spelling); };
   virtual bool parse(std::string_view input) const {
@@ -42,7 +42,7 @@ class NoWriteGlobalAttr : public SummaryAttr {
 public:
   bool infer(const FunctionDecl *FD) const override final;
   bool merge(const FunctionSummary &Caller,
-             const FunctionSummary &Callee) const override final;
+             const FunctionSummary *Callee) const override final;
 
   static bool classof(const SummaryAttr *A) {
     return A->getKind() == NO_WRITE_GLOBAL;
diff --git a/clang/include/clang/Summary/SummaryContext.h b/clang/include/clang/Summary/SummaryContext.h
index 194f89a4b4007..9de88e4d81b35 100644
--- a/clang/include/clang/Summary/SummaryContext.h
+++ b/clang/include/clang/Summary/SummaryContext.h
@@ -10,14 +10,16 @@ class FunctionSummary {
   SmallVector<char> ID;
   std::set<const SummaryAttr *> Attrs;
   std::set<SmallVector<char>> Calls;
+  bool CallsOpaque;
 
 public:
   FunctionSummary(SmallVector<char> ID, std::set<const SummaryAttr *> Attrs,
-                  std::set<SmallVector<char>> Calls);
+                  std::set<SmallVector<char>> Calls, bool CallsOpaque);
 
   SmallVector<char> getID() const { return ID; }
   const std::set<const SummaryAttr *> &getAttributes() const { return Attrs; }
   const std::set<SmallVector<char>> &getCalls() const { return Calls; }
+  bool callsOpaqueObject() const { return CallsOpaque; }
 
   template <typename T> bool hasAttribute() const {
     for (auto &&attr : Attrs) {
@@ -41,7 +43,7 @@ class SummaryContext {
   std::vector<std::unique_ptr<SummaryAttr>> Attributes;
 
   void CreateSummary(SmallVector<char> ID, std::set<const SummaryAttr *> Attrs,
-                     std::set<SmallVector<char>> Calls);
+                     std::set<SmallVector<char>> Calls, bool CallsOpaque);
   bool ReduceFunctionSummary(FunctionSummary &FunctionSummary);
 
   template <typename T> void registerAttr();
diff --git a/clang/lib/Summary/SummaryAttribute.cpp b/clang/lib/Summary/SummaryAttribute.cpp
index c9a69140c5494..d28c548611375 100644
--- a/clang/lib/Summary/SummaryAttribute.cpp
+++ b/clang/lib/Summary/SummaryAttribute.cpp
@@ -32,8 +32,8 @@ bool NoWriteGlobalAttr::infer(const FunctionDecl *FD) const {
 }
 
 bool NoWriteGlobalAttr::merge(const FunctionSummary &Caller,
-                              const FunctionSummary &Callee) const {
-  return Caller.getAttributes().count(this) &&
-         Callee.getAttributes().count(this);
+                              const FunctionSummary *Callee) const {
+  return !Caller.callsOpaqueObject() && Caller.getAttributes().count(this) &&
+         Callee && Callee->getAttributes().count(this);
 }
 } // namespace clang
\ No newline at end of file
diff --git a/clang/lib/Summary/SummaryConsumer.cpp b/clang/lib/Summary/SummaryConsumer.cpp
index 716248ffc9e24..308f26abed0f4 100644
--- a/clang/lib/Summary/SummaryConsumer.cpp
+++ b/clang/lib/Summary/SummaryConsumer.cpp
@@ -13,10 +13,13 @@ void JSONPrintingSummaryConsumer::ProcessFunctionSummary(
         }
       });
     });
-    JOS.attributeArray("calls", [&] {
-      for (auto &&Call : Summary.getCalls()) {
-        JOS.object([&] { JOS.attribute("id", llvm::json::Value(Call)); });
-      }
+    JOS.attributeObject("calls", [&] {
+      JOS.attribute("opaque", llvm::json::Value(Summary.callsOpaqueObject()));
+      JOS.attributeArray("functions", [&] {
+        for (auto &&Call : Summary.getCalls()) {
+          JOS.object([&] { JOS.attribute("id", llvm::json::Value(Call)); });
+        }
+      });
     });
   });
 }
diff --git a/clang/lib/Summary/SummaryContext.cpp b/clang/lib/Summary/SummaryContext.cpp
index 6cb4b7a716200..883ac2689a2eb 100644
--- a/clang/lib/Summary/SummaryContext.cpp
+++ b/clang/lib/Summary/SummaryContext.cpp
@@ -15,6 +15,7 @@ SmallVector<char> GetUSR(const FunctionDecl *FD) {
 
 class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
   std::set<SmallVector<char>> Calls;
+  bool callsOpaqueSymbol = false;
 
   virtual void
   run(const ast_matchers::MatchFinder::MatchResult &Result) override {
@@ -23,11 +24,22 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
       return;
 
     const auto *Callee = llvm::dyn_cast<FunctionDecl>(Call->getCalleeDecl());
+    if (!Callee) {
+      callsOpaqueSymbol = true;
+      return;
+    }
+
+    if (const auto *MD = llvm::dyn_cast<CXXMethodDecl>(Callee);
+        MD && MD->isVirtual()) {
+      callsOpaqueSymbol = true;
+      return;
+    }
+
     Calls.emplace(GetUSR(Callee));
   }
 
 public:
-  std::set<SmallVector<char>> collect(const FunctionDecl *FD) {
+  std::pair<std::set<SmallVector<char>>, bool> collect(const FunctionDecl *FD) {
     using namespace ast_matchers;
     MatchFinder Finder;
 
@@ -35,16 +47,17 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
                       this);
     Finder.match(*FD, FD->getASTContext());
 
-    return Calls;
+    return {Calls, callsOpaqueSymbol};
   }
 };
 } // namespace
 
 FunctionSummary::FunctionSummary(SmallVector<char> ID,
                                  std::set<const SummaryAttr *> FunctionAttrs,
-                                 std::set<SmallVector<char>> Calls)
+                                 std::set<SmallVector<char>> Calls,
+                                 bool CallsOpaque)
     : ID(std::move(ID)), Attrs(std::move(FunctionAttrs)),
-      Calls(std::move(Calls)) {}
+      Calls(std::move(Calls)), CallsOpaque(CallsOpaque) {}
 
 template <typename T> void SummaryContext::registerAttr() {
   std::unique_ptr<T> attr(new T());
@@ -60,9 +73,10 @@ SummaryContext::SummaryContext() { registerAttr<NoWriteGlobalAttr>(); }
 
 void SummaryContext::CreateSummary(SmallVector<char> ID,
                                    std::set<const SummaryAttr *> Attrs,
-                                   std::set<SmallVector<char>> Calls) {
+                                   std::set<SmallVector<char>> Calls,
+                                   bool CallsOpaque) {
   auto Summary = std::make_unique<FunctionSummary>(
-      std::move(ID), std::move(Attrs), std::move(Calls));
+      std::move(ID), std::move(Attrs), std::move(Calls), CallsOpaque);
   auto *SummaryPtr = FunctionSummaries.emplace_back(std::move(Summary)).get();
   IDToSummary[SummaryPtr->getID()] = SummaryPtr;
 }
@@ -81,7 +95,9 @@ void SummaryContext::SummarizeFunctionBody(const FunctionDecl *FD) {
       Attrs.emplace(Attr.get());
   }
 
-  CreateSummary(GetUSR(FD), std::move(Attrs), CallCollector().collect(FD));
+  auto [calls, opaque] = CallCollector().collect(FD);
+
+  CreateSummary(GetUSR(FD), std::move(Attrs), std::move(calls), opaque);
 }
 
 void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
@@ -101,14 +117,18 @@ void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
     }
 
     std::set<SmallVector<char>> Calls;
-    const llvm::json::Array *CallEntries = FunctionSummary->getArray("calls");
+    const llvm::json::Object *CallsObject = FunctionSummary->getObject("calls");
+    bool callsOpaue = *CallsObject->getBoolean("opaque");
+
+    const llvm::json::Array *CallEntries = CallsObject->getArray("functions");
     for (auto callIt = CallEntries->begin(); callIt != CallEntries->end();
          ++callIt) {
       auto *Obj = callIt->getAsObject();
       Calls.emplace(SmallString<128>(*Obj->getString("id")));
     }
 
-    CreateSummary(std::move(ID), std::move(FunctionAttrs), std::move(Calls));
+    CreateSummary(std::move(ID), std::move(FunctionAttrs), std::move(Calls),
+                  callsOpaue);
   }
 }
 
@@ -118,17 +138,10 @@ bool SummaryContext::ReduceFunctionSummary(FunctionSummary &Function) {
   for (auto &&call : Function.getCalls()) {
     std::set<const SummaryAttr *> reducedAttrs;
 
-    // If we don't have a summary about a called function, we forget
-    // everything about the current one as well.
-    if (!IDToSummary.count(call)) {
-      Function.replaceAttributes(std::move(reducedAttrs));
-      return true;
-    }
-
-    const FunctionSummary *callSummary = IDToSummary[call];
-
+    const FunctionSummary *callSummary =
+        IDToSummary.count(call) ? IDToSummary[call] : nullptr;
     for (auto &&Attr : Attributes) {
-      if (Attr->merge(Function, *callSummary))
+      if (Attr->merge(Function, callSummary))
         reducedAttrs.emplace(Attr.get());
     }
 

>From 0daa24f5f2a90c242c63975743ff9c70920cc9ef Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Thu, 19 Jun 2025 02:13:51 +0200
Subject: [PATCH 32/48] fix crashes

---
 clang/lib/Sema/SemaDecl.cpp          | 2 +-
 clang/lib/Summary/SummaryContext.cpp | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 740fccc3aad33..431fd0f6651ba 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -16695,7 +16695,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
   if (FD && !FD->isDeleted())
     checkTypeSupport(FD->getType(), FD->getLocation(), FD);
 
-  if (SummaryCnsmr) {
+  if (SummaryCnsmr && !LateTemplateParser) {
     SummaryCtx->SummarizeFunctionBody(FD);
     SummaryCnsmr->ProcessFunctionSummary(*SummaryCtx->GetSummary(FD));
   }
diff --git a/clang/lib/Summary/SummaryContext.cpp b/clang/lib/Summary/SummaryContext.cpp
index 883ac2689a2eb..7ae04df0e90e9 100644
--- a/clang/lib/Summary/SummaryContext.cpp
+++ b/clang/lib/Summary/SummaryContext.cpp
@@ -23,7 +23,8 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
     if (!Call)
       return;
 
-    const auto *Callee = llvm::dyn_cast<FunctionDecl>(Call->getCalleeDecl());
+    const auto *Callee =
+        llvm::dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
     if (!Callee) {
       callsOpaqueSymbol = true;
       return;

>From e08fac7447be2d24a970db9c50aceca88f2feda0 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Wed, 25 Jun 2025 23:31:47 +0200
Subject: [PATCH 33/48] a few experimental changes

---
 clang/include/clang/Summary/SummaryContext.h |  2 +-
 clang/lib/Frontend/FrontendAction.cpp        | 50 ++++++++++++++++----
 clang/lib/Sema/SemaDecl.cpp                  |  4 +-
 clang/lib/Summary/SummaryContext.cpp         |  8 ++++
 4 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/clang/include/clang/Summary/SummaryContext.h b/clang/include/clang/Summary/SummaryContext.h
index 9de88e4d81b35..6e29951688683 100644
--- a/clang/include/clang/Summary/SummaryContext.h
+++ b/clang/include/clang/Summary/SummaryContext.h
@@ -36,6 +36,7 @@ class FunctionSummary {
 };
 
 class SummaryContext {
+public:
   std::map<SmallVector<char>, const FunctionSummary *> IDToSummary;
   std::vector<std::unique_ptr<FunctionSummary>> FunctionSummaries;
 
@@ -48,7 +49,6 @@ class SummaryContext {
 
   template <typename T> void registerAttr();
 
-public:
   SummaryContext();
 
   const FunctionSummary *GetSummary(const FunctionDecl *FD) const;
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 966c514e04a96..09eecd0d9f152 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -972,6 +972,10 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
 
   // FIXME: cleanup and lookup dirs recursively
   if (!CI.getFrontendOpts().SummaryDirPath.empty()) {
+    // FIXME: this is a quick shortcut so large summaries are only evaluated
+    // once, we should think about implementing it in a reasonable way...
+    static const char *reducedCache =
+        "reduced-summary-so-that-we-do-not-have-to-evaluate-it-every-time.json";
     FileManager &FileMgr = CI.getFileManager();
 
     StringRef SummaryDirPath = CI.getFrontendOpts().SummaryDirPath;
@@ -981,23 +985,49 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
       llvm::sys::path::native(SummaryDir->getName(), DirNative);
 
       llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
-      for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC),
-                                         DirEnd;
-           Dir != DirEnd && !EC; Dir.increment(EC)) {
-        if (llvm::sys::path::extension(Dir->path()) == ".json") {
-          std::ifstream t(Dir->path().str());
-          std::stringstream buffer;
-          buffer << t.rdbuf();
+      std::string cacheFile = DirNative.str().str() + '/' + reducedCache;
+
+      std::vector<std::string> paths;
 
-          auto JSON = llvm::json::parse(buffer.str());
-          if (!JSON)
+      if (FS.exists(cacheFile)) {
+        paths.emplace_back(cacheFile);
+      } else {
+        for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC),
+                                           DirEnd;
+             Dir != DirEnd && !EC; Dir.increment(EC)) {
+          if (llvm::sys::path::extension(Dir->path()) != ".json")
             continue;
 
-          CI.getSummaryContext().ParseSummaryFromJSON(*JSON->getAsArray());
+          paths.emplace_back(Dir->path().str());
         }
       }
 
+      for (auto &&path : paths) {
+        std::ifstream t(path);
+        std::stringstream buffer;
+        buffer << t.rdbuf();
+
+        auto JSON = llvm::json::parse(buffer.str());
+        if (!!JSON)
+          CI.getSummaryContext().ParseSummaryFromJSON(*JSON->getAsArray());
+
+        llvm::handleAllErrors(
+            JSON.takeError(),
+            [](const llvm::ErrorInfoBase &EI) { std::ignore = EI.message(); });
+      }
+
       CI.getSummaryContext().ReduceSummaries();
+
+      if (!FS.exists(cacheFile)) {
+        // FIXME: very quick printing of the summary to the cache file
+        llvm::raw_fd_ostream fd(cacheFile, EC, llvm::sys::fs::CD_CreateAlways);
+
+        JSONPrintingSummaryConsumer printer(CI.getSummaryContext(), fd);
+        printer.ProcessStartOfSourceFile();
+        for (auto &&Summary : CI.getSummaryContext().FunctionSummaries)
+          printer.ProcessFunctionSummary(*Summary);
+        printer.ProcessEndOfSourceFile();
+      }
     }
   }
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 431fd0f6651ba..5d427ced06159 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -16695,7 +16695,9 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
   if (FD && !FD->isDeleted())
     checkTypeSupport(FD->getType(), FD->getLocation(), FD);
 
-  if (SummaryCnsmr && !LateTemplateParser) {
+  // FIXME: checking this should be done by the summary context
+  if (SummaryCnsmr && !LateTemplateParser && FD &&
+      !SourceMgr.isInSystemHeader(FD->getLocation()) && !FD->getBuiltinID()) {
     SummaryCtx->SummarizeFunctionBody(FD);
     SummaryCnsmr->ProcessFunctionSummary(*SummaryCtx->GetSummary(FD));
   }
diff --git a/clang/lib/Summary/SummaryContext.cpp b/clang/lib/Summary/SummaryContext.cpp
index 7ae04df0e90e9..58e491911ef6d 100644
--- a/clang/lib/Summary/SummaryContext.cpp
+++ b/clang/lib/Summary/SummaryContext.cpp
@@ -30,6 +30,10 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
       return;
     }
 
+    if (Result.SourceManager->isInSystemHeader(Callee->getLocation()) ||
+        Callee->getBuiltinID())
+      return;
+
     if (const auto *MD = llvm::dyn_cast<CXXMethodDecl>(Callee);
         MD && MD->isVirtual()) {
       callsOpaqueSymbol = true;
@@ -76,6 +80,9 @@ void SummaryContext::CreateSummary(SmallVector<char> ID,
                                    std::set<const SummaryAttr *> Attrs,
                                    std::set<SmallVector<char>> Calls,
                                    bool CallsOpaque) {
+  if (IDToSummary.count(ID))
+    return;
+
   auto Summary = std::make_unique<FunctionSummary>(
       std::move(ID), std::move(Attrs), std::move(Calls), CallsOpaque);
   auto *SummaryPtr = FunctionSummaries.emplace_back(std::move(Summary)).get();
@@ -101,6 +108,7 @@ void SummaryContext::SummarizeFunctionBody(const FunctionDecl *FD) {
   CreateSummary(GetUSR(FD), std::move(Attrs), std::move(calls), opaque);
 }
 
+// FIXME: this needs proper error handling
 void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
   for (auto it = Summary.begin(); it != Summary.end(); ++it) {
     const llvm::json::Object *FunctionSummary = it->getAsObject();

>From 8739704df8b6ffd384d3dbe1ae7358a6b8b72176 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Thu, 26 Jun 2025 01:00:59 +0200
Subject: [PATCH 34/48] add new attribute that checks if a function modifies a
 pointer argument

---
 .../include/clang/Summary/SummaryAttribute.h  | 20 ++++++-
 clang/lib/Summary/SummaryAttribute.cpp        | 57 +++++++++++++++++++
 clang/lib/Summary/SummaryContext.cpp          |  5 +-
 3 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Summary/SummaryAttribute.h b/clang/include/clang/Summary/SummaryAttribute.h
index 4a9ebb0a684a8..4b1ac06f86fb8 100644
--- a/clang/include/clang/Summary/SummaryAttribute.h
+++ b/clang/include/clang/Summary/SummaryAttribute.h
@@ -5,9 +5,7 @@
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 
 namespace clang {
-enum SummaryAttrKind {
-  NO_WRITE_GLOBAL,
-};
+enum SummaryAttrKind { NO_WRITE_GLOBAL, NO_WRITE_PTR_PARAMETER };
 
 class FunctionSummary;
 class SummaryContext;
@@ -49,6 +47,22 @@ class NoWriteGlobalAttr : public SummaryAttr {
   }
   friend class SummaryContext;
 };
+
+// FIXME: create a macro for attr declarations?
+class NoWritePtrParameterAttr : public SummaryAttr {
+  NoWritePtrParameterAttr()
+      : SummaryAttr(NO_WRITE_PTR_PARAMETER, "no_write_ptr_parameter") {}
+
+public:
+  bool infer(const FunctionDecl *FD) const override final;
+  bool merge(const FunctionSummary &Caller,
+             const FunctionSummary *Callee) const override final;
+
+  static bool classof(const SummaryAttr *A) {
+    return A->getKind() == NO_WRITE_PTR_PARAMETER;
+  }
+  friend class SummaryContext;
+};
 } // namespace clang
 
 #endif // LLVM_CLANG_SUMMARY_SUMMARYATTRIBUTEH
diff --git a/clang/lib/Summary/SummaryAttribute.cpp b/clang/lib/Summary/SummaryAttribute.cpp
index d28c548611375..e6b4a3e2633df 100644
--- a/clang/lib/Summary/SummaryAttribute.cpp
+++ b/clang/lib/Summary/SummaryAttribute.cpp
@@ -36,4 +36,61 @@ bool NoWriteGlobalAttr::merge(const FunctionSummary &Caller,
   return !Caller.callsOpaqueObject() && Caller.getAttributes().count(this) &&
          Callee && Callee->getAttributes().count(this);
 }
+
+bool NoWritePtrParameterAttr::infer(const FunctionDecl *FD) const {
+  using namespace ast_matchers;
+  MatchFinder Finder;
+
+  class Callback : public ast_matchers::MatchFinder::MatchCallback {
+  public:
+    bool MayWritePtrParam = false;
+
+    void
+    run(const ast_matchers::MatchFinder::MatchResult &Result) override final {
+      const auto *FD = Result.Nodes.getNodeAs<FunctionDecl>("fn");
+      if (!FD)
+        return;
+
+      MayWritePtrParam = true;
+    }
+  } CB;
+
+  auto ptrParmDeclRef = declRefExpr(
+      allOf(unless(hasAncestor(unaryOperator(hasOperatorName("*")))),
+            to(parmVarDecl(hasType(pointerType())))));
+  auto ptrParmDereference = unaryOperator(allOf(
+      hasOperatorName("*"),
+      hasDescendant(declRefExpr(to(parmVarDecl(hasType(pointerType())))))));
+
+  Finder.addMatcher(
+      functionDecl(
+          anyOf(
+              // The value of the pointer is used to initialize a local
+              // variable.
+              forEachDescendant(
+                  varDecl(hasInitializer(hasDescendant(ptrParmDeclRef)))),
+              // The ptr parameter appears on the RHS of an assignment.
+              forEachDescendant(
+                  binaryOperator(isAssignmentOperator(),
+                                 hasRHS(hasDescendant(ptrParmDeclRef)))),
+              // The ptr is dereferenced on the LHS of an assignment.
+              forEachDescendant(binaryOperator(
+                  isAssignmentOperator(),
+                  hasLHS(anyOf(ptrParmDereference,
+                               hasDescendant(ptrParmDereference))))),
+              // The param is const casted
+              forEachDescendant(cxxConstCastExpr(hasDescendant(ptrParmDeclRef)))
+              // FIXME: handle member access
+              ))
+          .bind("fn"),
+      &CB);
+  Finder.match(*FD, FD->getASTContext());
+  return !CB.MayWritePtrParam;
+}
+
+bool NoWritePtrParameterAttr::merge(const FunctionSummary &Caller,
+                                    const FunctionSummary *Callee) const {
+  return !Caller.callsOpaqueObject() && Caller.getAttributes().count(this) &&
+         Callee && Callee->getAttributes().count(this);
+}
 } // namespace clang
\ No newline at end of file
diff --git a/clang/lib/Summary/SummaryContext.cpp b/clang/lib/Summary/SummaryContext.cpp
index 58e491911ef6d..61b461f9cd97b 100644
--- a/clang/lib/Summary/SummaryContext.cpp
+++ b/clang/lib/Summary/SummaryContext.cpp
@@ -74,7 +74,10 @@ template <typename T> void SummaryContext::registerAttr() {
   KindToAttribute[Kind] = Attributes.emplace_back(std::move(attr)).get();
 }
 
-SummaryContext::SummaryContext() { registerAttr<NoWriteGlobalAttr>(); }
+SummaryContext::SummaryContext() {
+  registerAttr<NoWriteGlobalAttr>();
+  registerAttr<NoWritePtrParameterAttr>();
+}
 
 void SummaryContext::CreateSummary(SmallVector<char> ID,
                                    std::set<const SummaryAttr *> Attrs,

>From 8dfc6093f3e64e299abb300dd79dd6e8b2272974 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sun, 29 Jun 2025 02:24:02 +0200
Subject: [PATCH 35/48] store string in function summary

---
 clang/include/clang/Summary/SummaryContext.h | 18 +++++++--------
 clang/lib/Summary/SummaryContext.cpp         | 24 ++++++++++----------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/clang/include/clang/Summary/SummaryContext.h b/clang/include/clang/Summary/SummaryContext.h
index 6e29951688683..3bf5e187fd8ec 100644
--- a/clang/include/clang/Summary/SummaryContext.h
+++ b/clang/include/clang/Summary/SummaryContext.h
@@ -7,18 +7,18 @@
 
 namespace clang {
 class FunctionSummary {
-  SmallVector<char> ID;
+  std::string ID;
   std::set<const SummaryAttr *> Attrs;
-  std::set<SmallVector<char>> Calls;
+  std::set<std::string> Calls;
   bool CallsOpaque;
 
 public:
-  FunctionSummary(SmallVector<char> ID, std::set<const SummaryAttr *> Attrs,
-                  std::set<SmallVector<char>> Calls, bool CallsOpaque);
+  FunctionSummary(std::string ID, std::set<const SummaryAttr *> Attrs,
+                  std::set<std::string> Calls, bool CallsOpaque);
 
-  SmallVector<char> getID() const { return ID; }
+  StringRef getID() const { return ID; }
   const std::set<const SummaryAttr *> &getAttributes() const { return Attrs; }
-  const std::set<SmallVector<char>> &getCalls() const { return Calls; }
+  const std::set<std::string> &getCalls() const { return Calls; }
   bool callsOpaqueObject() const { return CallsOpaque; }
 
   template <typename T> bool hasAttribute() const {
@@ -37,14 +37,14 @@ class FunctionSummary {
 
 class SummaryContext {
 public:
-  std::map<SmallVector<char>, const FunctionSummary *> IDToSummary;
+  std::map<StringRef, const FunctionSummary *> IDToSummary;
   std::vector<std::unique_ptr<FunctionSummary>> FunctionSummaries;
 
   std::map<SummaryAttrKind, const SummaryAttr *> KindToAttribute;
   std::vector<std::unique_ptr<SummaryAttr>> Attributes;
 
-  void CreateSummary(SmallVector<char> ID, std::set<const SummaryAttr *> Attrs,
-                     std::set<SmallVector<char>> Calls, bool CallsOpaque);
+  void CreateSummary(std::string ID, std::set<const SummaryAttr *> Attrs,
+                     std::set<std::string> Calls, bool CallsOpaque);
   bool ReduceFunctionSummary(FunctionSummary &FunctionSummary);
 
   template <typename T> void registerAttr();
diff --git a/clang/lib/Summary/SummaryContext.cpp b/clang/lib/Summary/SummaryContext.cpp
index 61b461f9cd97b..faec91e02050a 100644
--- a/clang/lib/Summary/SummaryContext.cpp
+++ b/clang/lib/Summary/SummaryContext.cpp
@@ -7,14 +7,14 @@
 
 namespace clang {
 namespace {
-SmallVector<char> GetUSR(const FunctionDecl *FD) {
-  SmallVector<char> USR;
+std::string GetUSR(const FunctionDecl *FD) {
+  SmallString<32> USR;
   index::generateUSRForDecl(FD, USR);
-  return USR;
+  return USR.str().str();
 }
 
 class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
-  std::set<SmallVector<char>> Calls;
+  std::set<std::string> Calls;
   bool callsOpaqueSymbol = false;
 
   virtual void
@@ -44,7 +44,7 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
   }
 
 public:
-  std::pair<std::set<SmallVector<char>>, bool> collect(const FunctionDecl *FD) {
+  std::pair<std::set<std::string>, bool> collect(const FunctionDecl *FD) {
     using namespace ast_matchers;
     MatchFinder Finder;
 
@@ -57,9 +57,9 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
 };
 } // namespace
 
-FunctionSummary::FunctionSummary(SmallVector<char> ID,
+FunctionSummary::FunctionSummary(std::string ID,
                                  std::set<const SummaryAttr *> FunctionAttrs,
-                                 std::set<SmallVector<char>> Calls,
+                                 std::set<std::string> Calls,
                                  bool CallsOpaque)
     : ID(std::move(ID)), Attrs(std::move(FunctionAttrs)),
       Calls(std::move(Calls)), CallsOpaque(CallsOpaque) {}
@@ -79,9 +79,9 @@ SummaryContext::SummaryContext() {
   registerAttr<NoWritePtrParameterAttr>();
 }
 
-void SummaryContext::CreateSummary(SmallVector<char> ID,
+void SummaryContext::CreateSummary(std::string ID,
                                    std::set<const SummaryAttr *> Attrs,
-                                   std::set<SmallVector<char>> Calls,
+                                   std::set<std::string> Calls,
                                    bool CallsOpaque) {
   if (IDToSummary.count(ID))
     return;
@@ -116,7 +116,7 @@ void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
   for (auto it = Summary.begin(); it != Summary.end(); ++it) {
     const llvm::json::Object *FunctionSummary = it->getAsObject();
 
-    SmallString<128> ID(*FunctionSummary->getString("id"));
+    std::string ID = FunctionSummary->getString("id")->str();
     std::set<const SummaryAttr *> FunctionAttrs;
     const llvm::json::Array *FunctionAttributes =
         FunctionSummary->getObject("attrs")->getArray("function");
@@ -128,7 +128,7 @@ void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
       }
     }
 
-    std::set<SmallVector<char>> Calls;
+    std::set<std::string> Calls;
     const llvm::json::Object *CallsObject = FunctionSummary->getObject("calls");
     bool callsOpaue = *CallsObject->getBoolean("opaque");
 
@@ -136,7 +136,7 @@ void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
     for (auto callIt = CallEntries->begin(); callIt != CallEntries->end();
          ++callIt) {
       auto *Obj = callIt->getAsObject();
-      Calls.emplace(SmallString<128>(*Obj->getString("id")));
+      Calls.emplace(Obj->getString("id")->str());
     }
 
     CreateSummary(std::move(ID), std::move(FunctionAttrs), std::move(Calls),

>From da96155c56c9341e4927ed03bdb1a6c3547f6cd9 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Mon, 30 Jun 2025 00:00:06 +0200
Subject: [PATCH 36/48] yaml serialization... complete mess

---
 clang/include/clang/Summary/SummaryConsumer.h | 13 +++++
 clang/include/clang/Summary/SummaryContext.h  |  4 ++
 .../clang/Summary/SummaryYamlMappings.h       | 27 +++++++++++
 clang/lib/Frontend/CompilerInstance.cpp       |  2 +-
 clang/lib/Frontend/FrontendAction.cpp         | 19 +++++---
 clang/lib/StaticAnalyzer/Core/CallEvent.cpp   | 25 +++++-----
 clang/lib/Summary/CMakeLists.txt              |  1 +
 clang/lib/Summary/SummaryConsumer.cpp         |  6 +++
 clang/lib/Summary/SummaryContext.cpp          | 11 +++++
 clang/lib/Summary/SummaryYamlMappings.cpp     | 47 +++++++++++++++++++
 10 files changed, 136 insertions(+), 19 deletions(-)
 create mode 100644 clang/include/clang/Summary/SummaryYamlMappings.h
 create mode 100644 clang/lib/Summary/SummaryYamlMappings.cpp

diff --git a/clang/include/clang/Summary/SummaryConsumer.h b/clang/include/clang/Summary/SummaryConsumer.h
index a9f8abb78aff9..580765ae8216d 100644
--- a/clang/include/clang/Summary/SummaryConsumer.h
+++ b/clang/include/clang/Summary/SummaryConsumer.h
@@ -3,6 +3,8 @@
 
 #include "clang/Basic/LLVM.h"
 #include "llvm/Support/JSON.h"
+#include "llvm/Support/YAMLTraits.h"
+
 namespace clang {
 class FunctionSummary;
 class SummaryContext;
@@ -40,6 +42,17 @@ class JSONPrintingSummaryConsumer : public PrintingSummaryConsumer {
     JOS.flush();
   };
 };
+
+class YAMLPrintingSummaryConsumer : public PrintingSummaryConsumer {
+  llvm::raw_ostream &OS;
+  llvm::yaml::Output YOS;
+
+public:
+   YAMLPrintingSummaryConsumer(const SummaryContext &SummaryCtx, raw_ostream &OS)
+      : PrintingSummaryConsumer(SummaryCtx, OS), OS(OS), YOS(OS) {}
+
+  void ProcessEndOfSourceFile() override;
+};
 } // namespace clang
 
 #endif // LLVM_CLANG_SUMMARY_SUMMARYCONSUMER_H
diff --git a/clang/include/clang/Summary/SummaryContext.h b/clang/include/clang/Summary/SummaryContext.h
index 3bf5e187fd8ec..023a651e1a801 100644
--- a/clang/include/clang/Summary/SummaryContext.h
+++ b/clang/include/clang/Summary/SummaryContext.h
@@ -33,6 +33,9 @@ class FunctionSummary {
   void replaceAttributes(std::set<const SummaryAttr *> Attrs) {
     this->Attrs = std::move(Attrs);
   }
+
+  friend struct llvm::yaml::MappingTraits<clang::FunctionSummary>;
+  friend struct llvm::yaml::MappingContextTraits<clang::FunctionSummary, clang::SummaryContext>;
 };
 
 class SummaryContext {
@@ -55,6 +58,7 @@ class SummaryContext {
   void SummarizeFunctionBody(const FunctionDecl *FD);
 
   void ParseSummaryFromJSON(const llvm::json::Array &Summary);
+  void ParseSummaryFromYAML(StringRef content);
   void ReduceSummaries();
 };
 } // namespace clang
diff --git a/clang/include/clang/Summary/SummaryYamlMappings.h b/clang/include/clang/Summary/SummaryYamlMappings.h
new file mode 100644
index 0000000000000..0a3402c009392
--- /dev/null
+++ b/clang/include/clang/Summary/SummaryYamlMappings.h
@@ -0,0 +1,27 @@
+#ifndef LLVM_CLANG_SUMMARY_SUMMARYYAMLMAPPINGS_H
+#define LLVM_CLANG_SUMMARY_SUMMARYYAMLMAPPINGS_H
+
+#include "llvm/Support/YAMLTraits.h"
+#include "clang/Summary/SummaryContext.h"
+
+#include <memory>
+#include <vector>
+
+namespace llvm {
+namespace yaml {
+  template <> struct MappingTraits<clang::FunctionSummary> {
+    static void mapping(IO &io, clang::FunctionSummary &FS);
+  };
+
+  template <>
+  struct SequenceTraits<std::vector<std::unique_ptr<clang::FunctionSummary>>> {
+    static size_t
+    size(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq);
+    static clang::FunctionSummary &
+    element(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq,
+            size_t index);
+  };
+} // namespace yaml
+} // namespace llvm
+
+#endif //LLVM_CLANG_SUMMARY_SUMMARYYAMLMAPPINGS_H
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 634e5234a5912..32eca563a8f44 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -757,7 +757,7 @@ void CompilerInstance::createSummaryConsumer() {
   }
 
   TheSummaryConsumer.reset(
-      new JSONPrintingSummaryConsumer(getSummaryContext(), *SummaryOS));
+      new YAMLPrintingSummaryConsumer(getSummaryContext(), *SummaryOS));
 }
 
 void CompilerInstance::createSema(TranslationUnitKind TUKind,
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 09eecd0d9f152..3cf695646789a 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -995,7 +995,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
         for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC),
                                            DirEnd;
              Dir != DirEnd && !EC; Dir.increment(EC)) {
-          if (llvm::sys::path::extension(Dir->path()) != ".json")
+          if (llvm::sys::path::extension(Dir->path()) != ".yaml")
             continue;
 
           paths.emplace_back(Dir->path().str());
@@ -1007,13 +1007,18 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
         std::stringstream buffer;
         buffer << t.rdbuf();
 
-        auto JSON = llvm::json::parse(buffer.str());
-        if (!!JSON)
-          CI.getSummaryContext().ParseSummaryFromJSON(*JSON->getAsArray());
+        llvm::outs() << buffer.str() << '\n';
 
-        llvm::handleAllErrors(
-            JSON.takeError(),
-            [](const llvm::ErrorInfoBase &EI) { std::ignore = EI.message(); });
+        CI.getSummaryContext().ParseSummaryFromYAML(buffer.str());
+
+
+        // auto JSON = llvm::json::parse(buffer.str());
+        // if (!!JSON)
+        //   CI.getSummaryContext().ParseSummaryFromJSON(*JSON->getAsArray());
+
+        // llvm::handleAllErrors(
+        //     JSON.takeError(),
+        //     [](const llvm::ErrorInfoBase &EI) { std::ignore = EI.message(); });
       }
 
       CI.getSummaryContext().ReduceSummaries();
diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
index 2e42a8c06c2b4..7b8120fb7579a 100644
--- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -233,12 +233,20 @@ static void findPtrToConstParams(llvm::SmallSet<unsigned, 4> &PreserveArgs,
 
 ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
                                              ProgramStateRef Orig) const {
+  const FunctionSummary *Summary = nullptr;
   ProgramStateRef Result = (Orig ? Orig : getState());
 
-  // Don't invalidate anything if the callee is marked pure/const.
-  if (const Decl *callee = getDecl())
+  if (const Decl *callee = getDecl()) {
+    const SummaryContext *SummaryCtx =
+      State->getStateManager().getOwningEngine().getSummaryCtx();
+    const FunctionDecl *FD = llvm::dyn_cast<FunctionDecl>(callee);
+    if (SummaryCtx && FD)
+      Summary = SummaryCtx->GetSummary(FD);
+    
+    // Don't invalidate anything if the callee is marked pure/const.
     if (callee->hasAttr<PureAttr>() || callee->hasAttr<ConstAttr>())
       return Result;
+  }
 
   SmallVector<SVal, 8> ValuesToInvalidate;
   RegionAndSymbolInvalidationTraits ETraits;
@@ -251,6 +259,9 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
     findPtrToConstParams(PreserveArgs, *this);
 
   for (unsigned Idx = 0, Count = getNumArgs(); Idx != Count; ++Idx) {
+    if(Summary && Summary->hasAttribute<NoWritePtrParameterAttr>())
+      continue;
+
     // Mark this region for invalidation.  We batch invalidate regions
     // below for efficiency.
     if (PreserveArgs.count(Idx))
@@ -277,15 +288,7 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
             ValuesToInvalidate.push_back(loc::MemRegionVal(TVR));
   }
 
-  bool ShouldPreserveGlobals = false;
-  const SummaryContext *SummaryCtx =
-      State->getStateManager().getOwningEngine().getSummaryCtx();
-  const FunctionDecl *FD = llvm::dyn_cast_or_null<FunctionDecl>(getDecl());
-  if (SummaryCtx && FD) {
-    const auto *Summary = SummaryCtx->GetSummary(FD);
-    ShouldPreserveGlobals =
-        Summary && Summary->hasAttribute<NoWriteGlobalAttr>();
-  }
+  bool ShouldPreserveGlobals = Summary && Summary->hasAttribute<NoWriteGlobalAttr>();
 
   // Invalidate designated regions using the batch invalidation API.
   // NOTE: Even if RegionsToInvalidate is empty, we may still invalidate
diff --git a/clang/lib/Summary/CMakeLists.txt b/clang/lib/Summary/CMakeLists.txt
index a8d7d065d11c2..ccefccb2dd117 100644
--- a/clang/lib/Summary/CMakeLists.txt
+++ b/clang/lib/Summary/CMakeLists.txt
@@ -7,6 +7,7 @@ add_clang_library(clangSummary
   SummaryAttribute.cpp
   SummaryConsumer.cpp
   SummaryContext.cpp
+  SummaryYamlMappings.cpp
 
   LINK_LIBS
   clangAST
diff --git a/clang/lib/Summary/SummaryConsumer.cpp b/clang/lib/Summary/SummaryConsumer.cpp
index 308f26abed0f4..af6bed48a375a 100644
--- a/clang/lib/Summary/SummaryConsumer.cpp
+++ b/clang/lib/Summary/SummaryConsumer.cpp
@@ -1,5 +1,6 @@
 #include "clang/Summary/SummaryConsumer.h"
 #include "clang/Summary/SummaryContext.h"
+#include "clang/Summary/SummaryYamlMappings.h"
 
 namespace clang {
 void JSONPrintingSummaryConsumer::ProcessFunctionSummary(
@@ -23,4 +24,9 @@ void JSONPrintingSummaryConsumer::ProcessFunctionSummary(
     });
   });
 }
+
+void YAMLPrintingSummaryConsumer::ProcessEndOfSourceFile() {
+  YOS << ((SummaryContext *)SummaryCtx)->FunctionSummaries;
+  OS.flush();
+}
 } // namespace clang
\ No newline at end of file
diff --git a/clang/lib/Summary/SummaryContext.cpp b/clang/lib/Summary/SummaryContext.cpp
index faec91e02050a..07d69a9611ca0 100644
--- a/clang/lib/Summary/SummaryContext.cpp
+++ b/clang/lib/Summary/SummaryContext.cpp
@@ -3,6 +3,7 @@
 #include "clang/Index/USRGeneration.h"
 #include "clang/Summary/SummaryAttribute.h"
 #include "clang/Summary/SummaryConsumer.h"
+#include "clang/Summary/SummaryYamlMappings.h"
 #include <set>
 
 namespace clang {
@@ -144,6 +145,16 @@ void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
   }
 }
 
+void SummaryContext::ParseSummaryFromYAML(StringRef content) {
+  std::vector<std::unique_ptr<clang::FunctionSummary>> summaries;
+
+  llvm::yaml::Input YIN(content, this);
+  YIN >> summaries;
+
+  for(auto &&summary : summaries)
+    CreateSummary(summary->getID().str(), summary->getAttributes(), summary->getCalls(), summary->callsOpaqueObject());
+}
+
 bool SummaryContext::ReduceFunctionSummary(FunctionSummary &Function) {
   bool changed = false;
 
diff --git a/clang/lib/Summary/SummaryYamlMappings.cpp b/clang/lib/Summary/SummaryYamlMappings.cpp
new file mode 100644
index 0000000000000..bdd6c1f82ed68
--- /dev/null
+++ b/clang/lib/Summary/SummaryYamlMappings.cpp
@@ -0,0 +1,47 @@
+#include "clang/Summary/SummaryYamlMappings.h"
+
+namespace llvm {
+namespace yaml {
+  void MappingTraits<clang::FunctionSummary>::mapping(IO &io, clang::FunctionSummary &FS) {
+    io.mapRequired("id", FS.ID);
+
+    std::vector<std::string> Attrs;
+    for(auto &&Attr : FS.Attrs)
+      Attrs.emplace_back(Attr->serialize());
+    io.mapRequired("fn_attrs", Attrs);
+    if(!io.outputting()) {
+      std::set<const clang::SummaryAttr *> FunctionAttrs;
+      for (auto parsedAttr : Attrs) {
+        for (auto &&Attr : ((clang::SummaryContext*)io.getContext())->Attributes) {
+          if (Attr->parse(parsedAttr))
+            FunctionAttrs.emplace(Attr.get());
+        }
+      }
+
+      FS.Attrs = std::move(FunctionAttrs);
+    }
+
+    io.mapRequired("opaque_calls", FS.CallsOpaque);
+
+    std::vector<std::string> Calls(FS.Calls.begin(), FS.Calls.end());
+    io.mapRequired("calls", Calls);
+    if(!io.outputting())
+      FS.Calls = std::set(Calls.begin(), Calls.end());
+  }
+
+  size_t
+  SequenceTraits<std::vector<std::unique_ptr<clang::FunctionSummary>>>::size(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq) {
+    return seq.size();
+  }
+
+  clang::FunctionSummary &
+  SequenceTraits<std::vector<std::unique_ptr<clang::FunctionSummary>>>::element(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq,
+          size_t index) {
+    if (index >= seq.size()) {
+      seq.resize(index + 1);
+      seq[index].reset(new clang::FunctionSummary("", {}, {}, false));
+    }
+    return *seq[index];
+  }
+} // namespace yaml
+} // namespace llvm
\ No newline at end of file

>From 03fc645d8f86bd6c8375c3b6800b05d667113407 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Tue, 1 Jul 2025 00:44:05 +0200
Subject: [PATCH 37/48] flexible serailization

---
 .../include/clang/Frontend/CompilerInstance.h |  13 ++
 clang/include/clang/Summary/SummaryConsumer.h |  36 +---
 clang/include/clang/Summary/SummaryContext.h  |   8 +-
 .../clang/Summary/SummarySerialization.h      |  42 ++++
 .../clang/Summary/SummaryYamlMappings.h       |  27 ---
 clang/lib/Frontend/CompilerInstance.cpp       |  10 +-
 clang/lib/Frontend/FrontendAction.cpp         |  27 +--
 clang/lib/StaticAnalyzer/Core/CallEvent.cpp   |   9 +-
 clang/lib/Summary/CMakeLists.txt              |   2 +-
 clang/lib/Summary/SummaryConsumer.cpp         |  32 +---
 clang/lib/Summary/SummaryContext.cpp          |  47 +----
 clang/lib/Summary/SummarySerialization.cpp    | 180 ++++++++++++++++++
 clang/lib/Summary/SummaryYamlMappings.cpp     |  47 -----
 13 files changed, 277 insertions(+), 203 deletions(-)
 create mode 100644 clang/include/clang/Summary/SummarySerialization.h
 delete mode 100644 clang/include/clang/Summary/SummaryYamlMappings.h
 create mode 100644 clang/lib/Summary/SummarySerialization.cpp
 delete mode 100644 clang/lib/Summary/SummaryYamlMappings.cpp

diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index 4a15fbf042cce..43a8e65c1f5eb 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -124,6 +124,9 @@ class CompilerInstance : public ModuleLoader {
   /// The code completion consumer.
   std::unique_ptr<CodeCompleteConsumer> CompletionConsumer;
 
+  /// The summary serializer.
+  std::unique_ptr<SummarySerializer> TheSummarySerializer;
+
   /// The summary consumer.
   std::unique_ptr<SummaryConsumer> TheSummaryConsumer;
 
@@ -646,6 +649,16 @@ class CompilerInstance : public ModuleLoader {
 
   void createSummaryConsumer();
 
+  bool hasSummarySerializer() const { return (bool)TheSummarySerializer; }
+
+  SummarySerializer &getSummarySerializer() const {
+    assert(TheSummarySerializer &&
+           "Compiler instance has no code summary serializer!");
+    return *TheSummarySerializer;
+  }
+
+  void createSummarySerializer();
+
   /// @}
   /// @name Frontend timer
   /// @{
diff --git a/clang/include/clang/Summary/SummaryConsumer.h b/clang/include/clang/Summary/SummaryConsumer.h
index 580765ae8216d..b1421c960da28 100644
--- a/clang/include/clang/Summary/SummaryConsumer.h
+++ b/clang/include/clang/Summary/SummaryConsumer.h
@@ -1,13 +1,11 @@
 #ifndef LLVM_CLANG_SUMMARY_SUMMARYCONSUMER_H
 #define LLVM_CLANG_SUMMARY_SUMMARYCONSUMER_H
 
-#include "clang/Basic/LLVM.h"
-#include "llvm/Support/JSON.h"
-#include "llvm/Support/YAMLTraits.h"
-
+#include "llvm/Support/raw_ostream.h"
 namespace clang {
 class FunctionSummary;
 class SummaryContext;
+class SummarySerializer;
 
 class SummaryConsumer {
 protected:
@@ -22,37 +20,17 @@ class SummaryConsumer {
   virtual void ProcessEndOfSourceFile() {};
 };
 
-class PrintingSummaryConsumer : public SummaryConsumer {
-public:
-  PrintingSummaryConsumer(const SummaryContext &SummaryCtx, raw_ostream &OS)
-      : SummaryConsumer(SummaryCtx) {}
-};
-
-class JSONPrintingSummaryConsumer : public PrintingSummaryConsumer {
-  llvm::json::OStream JOS;
-
-public:
-  JSONPrintingSummaryConsumer(const SummaryContext &SummaryCtx, raw_ostream &OS)
-      : PrintingSummaryConsumer(SummaryCtx, OS), JOS(OS, 2) {}
-
-  void ProcessStartOfSourceFile() override { JOS.arrayBegin(); };
-  void ProcessFunctionSummary(const FunctionSummary &) override;
-  void ProcessEndOfSourceFile() override {
-    JOS.arrayEnd();
-    JOS.flush();
-  };
-};
-
-class YAMLPrintingSummaryConsumer : public PrintingSummaryConsumer {
+class SerializingSummaryConsumer : public SummaryConsumer {
   llvm::raw_ostream &OS;
-  llvm::yaml::Output YOS;
+  SummarySerializer *Serializer;
 
 public:
-   YAMLPrintingSummaryConsumer(const SummaryContext &SummaryCtx, raw_ostream &OS)
-      : PrintingSummaryConsumer(SummaryCtx, OS), OS(OS), YOS(OS) {}
+  SerializingSummaryConsumer(SummarySerializer &Serializer,
+                             llvm::raw_ostream &OS);
 
   void ProcessEndOfSourceFile() override;
 };
+
 } // namespace clang
 
 #endif // LLVM_CLANG_SUMMARY_SUMMARYCONSUMER_H
diff --git a/clang/include/clang/Summary/SummaryContext.h b/clang/include/clang/Summary/SummaryContext.h
index 023a651e1a801..f90191787e227 100644
--- a/clang/include/clang/Summary/SummaryContext.h
+++ b/clang/include/clang/Summary/SummaryContext.h
@@ -3,6 +3,8 @@
 
 #include "clang/Summary/SummaryAttribute.h"
 #include "clang/Summary/SummaryConsumer.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/YAMLTraits.h"
 #include <set>
 
 namespace clang {
@@ -35,7 +37,8 @@ class FunctionSummary {
   }
 
   friend struct llvm::yaml::MappingTraits<clang::FunctionSummary>;
-  friend struct llvm::yaml::MappingContextTraits<clang::FunctionSummary, clang::SummaryContext>;
+  friend struct llvm::yaml::MappingContextTraits<clang::FunctionSummary,
+                                                 clang::SummaryContext>;
 };
 
 class SummaryContext {
@@ -56,9 +59,6 @@ class SummaryContext {
 
   const FunctionSummary *GetSummary(const FunctionDecl *FD) const;
   void SummarizeFunctionBody(const FunctionDecl *FD);
-
-  void ParseSummaryFromJSON(const llvm::json::Array &Summary);
-  void ParseSummaryFromYAML(StringRef content);
   void ReduceSummaries();
 };
 } // namespace clang
diff --git a/clang/include/clang/Summary/SummarySerialization.h b/clang/include/clang/Summary/SummarySerialization.h
new file mode 100644
index 0000000000000..ccadd49776e20
--- /dev/null
+++ b/clang/include/clang/Summary/SummarySerialization.h
@@ -0,0 +1,42 @@
+#ifndef LLVM_CLANG_SUMMARY_SUMMARYSERIALIZATION_H
+#define LLVM_CLANG_SUMMARY_SUMMARYSERIALIZATION_H
+
+#include "clang/Summary/SummaryContext.h"
+
+namespace clang {
+class SummarySerializer {
+protected:
+  SummaryContext *SummaryCtx;
+
+public:
+  SummaryContext *getSummaryCtx() const { return SummaryCtx; }
+
+  SummarySerializer(SummaryContext &SummaryCtx) : SummaryCtx(&SummaryCtx){};
+  virtual ~SummarySerializer() = default;
+
+  virtual void serialize(const std::vector<std::unique_ptr<FunctionSummary>> &,
+                         raw_ostream &OS) = 0;
+  virtual void parse(StringRef) = 0;
+};
+
+class JSONSummarySerializer : public SummarySerializer {
+public:
+  JSONSummarySerializer(SummaryContext &SummaryCtx)
+      : SummarySerializer(SummaryCtx){};
+
+  void serialize(const std::vector<std::unique_ptr<FunctionSummary>> &,
+                 raw_ostream &OS) override;
+  void parse(StringRef) override;
+};
+
+class YAMLSummarySerializer : public SummarySerializer {
+  YAMLSummarySerializer(SummaryContext &SummaryCtx)
+      : SummarySerializer(SummaryCtx){};
+
+  void serialize(const std::vector<std::unique_ptr<FunctionSummary>> &,
+                 raw_ostream &OS) override;
+  void parse(StringRef) override;
+};
+} // namespace clang
+
+#endif // LLVM_CLANG_SUMMARY_SUMMARYSERIALIZATION_H
diff --git a/clang/include/clang/Summary/SummaryYamlMappings.h b/clang/include/clang/Summary/SummaryYamlMappings.h
deleted file mode 100644
index 0a3402c009392..0000000000000
--- a/clang/include/clang/Summary/SummaryYamlMappings.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef LLVM_CLANG_SUMMARY_SUMMARYYAMLMAPPINGS_H
-#define LLVM_CLANG_SUMMARY_SUMMARYYAMLMAPPINGS_H
-
-#include "llvm/Support/YAMLTraits.h"
-#include "clang/Summary/SummaryContext.h"
-
-#include <memory>
-#include <vector>
-
-namespace llvm {
-namespace yaml {
-  template <> struct MappingTraits<clang::FunctionSummary> {
-    static void mapping(IO &io, clang::FunctionSummary &FS);
-  };
-
-  template <>
-  struct SequenceTraits<std::vector<std::unique_ptr<clang::FunctionSummary>>> {
-    static size_t
-    size(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq);
-    static clang::FunctionSummary &
-    element(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq,
-            size_t index);
-  };
-} // namespace yaml
-} // namespace llvm
-
-#endif //LLVM_CLANG_SUMMARY_SUMMARYYAMLMAPPINGS_H
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 32eca563a8f44..5d8d717e15844 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -42,6 +42,7 @@
 #include "clang/Serialization/InMemoryModuleCache.h"
 #include "clang/Serialization/ModuleCache.h"
 #include "clang/Summary/SummaryConsumer.h"
+#include "clang/Summary/SummarySerialization.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
@@ -756,8 +757,15 @@ void CompilerInstance::createSummaryConsumer() {
     return;
   }
 
+  if (!hasSummarySerializer())
+    createSummarySerializer();
+
   TheSummaryConsumer.reset(
-      new YAMLPrintingSummaryConsumer(getSummaryContext(), *SummaryOS));
+      new SerializingSummaryConsumer(getSummarySerializer(), *SummaryOS));
+}
+
+void CompilerInstance::createSummarySerializer() {
+  TheSummarySerializer.reset(new JSONSummarySerializer(getSummaryContext()));
 }
 
 void CompilerInstance::createSema(TranslationUnitKind TUKind,
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 3cf695646789a..389b8e5728b7f 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -39,6 +39,7 @@
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
 #include "clang/Summary/SummaryContext.h"
+#include "clang/Summary/SummarySerialization.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/BuryPointer.h"
@@ -970,6 +971,9 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
   if (ProcessesSummaries && !CI.hasSummaryContext())
     CI.createSummaryContext();
 
+  if (ProcessesSummaries && !CI.hasSummarySerializer())
+    CI.createSummarySerializer();
+
   // FIXME: cleanup and lookup dirs recursively
   if (!CI.getFrontendOpts().SummaryDirPath.empty()) {
     // FIXME: this is a quick shortcut so large summaries are only evaluated
@@ -995,7 +999,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
         for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC),
                                            DirEnd;
              Dir != DirEnd && !EC; Dir.increment(EC)) {
-          if (llvm::sys::path::extension(Dir->path()) != ".yaml")
+          if (llvm::sys::path::extension(Dir->path()) != ".json")
             continue;
 
           paths.emplace_back(Dir->path().str());
@@ -1007,18 +1011,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
         std::stringstream buffer;
         buffer << t.rdbuf();
 
-        llvm::outs() << buffer.str() << '\n';
-
-        CI.getSummaryContext().ParseSummaryFromYAML(buffer.str());
-
-
-        // auto JSON = llvm::json::parse(buffer.str());
-        // if (!!JSON)
-        //   CI.getSummaryContext().ParseSummaryFromJSON(*JSON->getAsArray());
-
-        // llvm::handleAllErrors(
-        //     JSON.takeError(),
-        //     [](const llvm::ErrorInfoBase &EI) { std::ignore = EI.message(); });
+        CI.getSummarySerializer().parse(buffer.str());
       }
 
       CI.getSummaryContext().ReduceSummaries();
@@ -1026,12 +1019,8 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
       if (!FS.exists(cacheFile)) {
         // FIXME: very quick printing of the summary to the cache file
         llvm::raw_fd_ostream fd(cacheFile, EC, llvm::sys::fs::CD_CreateAlways);
-
-        JSONPrintingSummaryConsumer printer(CI.getSummaryContext(), fd);
-        printer.ProcessStartOfSourceFile();
-        for (auto &&Summary : CI.getSummaryContext().FunctionSummaries)
-          printer.ProcessFunctionSummary(*Summary);
-        printer.ProcessEndOfSourceFile();
+        CI.getSummarySerializer().serialize(
+            CI.getSummaryContext().FunctionSummaries, fd);
       }
     }
   }
diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
index 7b8120fb7579a..669fd715c9ae1 100644
--- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -238,11 +238,11 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
 
   if (const Decl *callee = getDecl()) {
     const SummaryContext *SummaryCtx =
-      State->getStateManager().getOwningEngine().getSummaryCtx();
+        State->getStateManager().getOwningEngine().getSummaryCtx();
     const FunctionDecl *FD = llvm::dyn_cast<FunctionDecl>(callee);
     if (SummaryCtx && FD)
       Summary = SummaryCtx->GetSummary(FD);
-    
+
     // Don't invalidate anything if the callee is marked pure/const.
     if (callee->hasAttr<PureAttr>() || callee->hasAttr<ConstAttr>())
       return Result;
@@ -259,7 +259,7 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
     findPtrToConstParams(PreserveArgs, *this);
 
   for (unsigned Idx = 0, Count = getNumArgs(); Idx != Count; ++Idx) {
-    if(Summary && Summary->hasAttribute<NoWritePtrParameterAttr>())
+    if (Summary && Summary->hasAttribute<NoWritePtrParameterAttr>())
       continue;
 
     // Mark this region for invalidation.  We batch invalidate regions
@@ -288,7 +288,8 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
             ValuesToInvalidate.push_back(loc::MemRegionVal(TVR));
   }
 
-  bool ShouldPreserveGlobals = Summary && Summary->hasAttribute<NoWriteGlobalAttr>();
+  bool ShouldPreserveGlobals =
+      Summary && Summary->hasAttribute<NoWriteGlobalAttr>();
 
   // Invalidate designated regions using the batch invalidation API.
   // NOTE: Even if RegionsToInvalidate is empty, we may still invalidate
diff --git a/clang/lib/Summary/CMakeLists.txt b/clang/lib/Summary/CMakeLists.txt
index ccefccb2dd117..a0079e3bd2b3d 100644
--- a/clang/lib/Summary/CMakeLists.txt
+++ b/clang/lib/Summary/CMakeLists.txt
@@ -7,7 +7,7 @@ add_clang_library(clangSummary
   SummaryAttribute.cpp
   SummaryConsumer.cpp
   SummaryContext.cpp
-  SummaryYamlMappings.cpp
+  SummarySerialization.cpp
 
   LINK_LIBS
   clangAST
diff --git a/clang/lib/Summary/SummaryConsumer.cpp b/clang/lib/Summary/SummaryConsumer.cpp
index af6bed48a375a..2d7a9f8a1c2fc 100644
--- a/clang/lib/Summary/SummaryConsumer.cpp
+++ b/clang/lib/Summary/SummaryConsumer.cpp
@@ -1,32 +1,14 @@
 #include "clang/Summary/SummaryConsumer.h"
 #include "clang/Summary/SummaryContext.h"
-#include "clang/Summary/SummaryYamlMappings.h"
+#include "clang/Summary/SummarySerialization.h"
 
 namespace clang {
-void JSONPrintingSummaryConsumer::ProcessFunctionSummary(
-    const FunctionSummary &Summary) {
-  JOS.object([&] {
-    JOS.attribute("id", llvm::json::Value(Summary.getID()));
-    JOS.attributeObject("attrs", [&] {
-      JOS.attributeArray("function", [&] {
-        for (auto &&Attr : Summary.getAttributes()) {
-          JOS.value(llvm::json::Value(Attr->serialize()));
-        }
-      });
-    });
-    JOS.attributeObject("calls", [&] {
-      JOS.attribute("opaque", llvm::json::Value(Summary.callsOpaqueObject()));
-      JOS.attributeArray("functions", [&] {
-        for (auto &&Call : Summary.getCalls()) {
-          JOS.object([&] { JOS.attribute("id", llvm::json::Value(Call)); });
-        }
-      });
-    });
-  });
-}
+SerializingSummaryConsumer::SerializingSummaryConsumer(
+    SummarySerializer &Serializer, llvm::raw_ostream &OS)
+    : SummaryConsumer(*Serializer.getSummaryCtx()), OS(OS),
+      Serializer(&Serializer) {}
 
-void YAMLPrintingSummaryConsumer::ProcessEndOfSourceFile() {
-  YOS << ((SummaryContext *)SummaryCtx)->FunctionSummaries;
-  OS.flush();
+void SerializingSummaryConsumer::ProcessEndOfSourceFile() {
+  Serializer->serialize(SummaryCtx->FunctionSummaries, OS);
 }
 } // namespace clang
\ No newline at end of file
diff --git a/clang/lib/Summary/SummaryContext.cpp b/clang/lib/Summary/SummaryContext.cpp
index 07d69a9611ca0..62540a26e8ea1 100644
--- a/clang/lib/Summary/SummaryContext.cpp
+++ b/clang/lib/Summary/SummaryContext.cpp
@@ -3,7 +3,6 @@
 #include "clang/Index/USRGeneration.h"
 #include "clang/Summary/SummaryAttribute.h"
 #include "clang/Summary/SummaryConsumer.h"
-#include "clang/Summary/SummaryYamlMappings.h"
 #include <set>
 
 namespace clang {
@@ -60,8 +59,7 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
 
 FunctionSummary::FunctionSummary(std::string ID,
                                  std::set<const SummaryAttr *> FunctionAttrs,
-                                 std::set<std::string> Calls,
-                                 bool CallsOpaque)
+                                 std::set<std::string> Calls, bool CallsOpaque)
     : ID(std::move(ID)), Attrs(std::move(FunctionAttrs)),
       Calls(std::move(Calls)), CallsOpaque(CallsOpaque) {}
 
@@ -112,49 +110,6 @@ void SummaryContext::SummarizeFunctionBody(const FunctionDecl *FD) {
   CreateSummary(GetUSR(FD), std::move(Attrs), std::move(calls), opaque);
 }
 
-// FIXME: this needs proper error handling
-void SummaryContext::ParseSummaryFromJSON(const llvm::json::Array &Summary) {
-  for (auto it = Summary.begin(); it != Summary.end(); ++it) {
-    const llvm::json::Object *FunctionSummary = it->getAsObject();
-
-    std::string ID = FunctionSummary->getString("id")->str();
-    std::set<const SummaryAttr *> FunctionAttrs;
-    const llvm::json::Array *FunctionAttributes =
-        FunctionSummary->getObject("attrs")->getArray("function");
-    for (auto attrIt = FunctionAttributes->begin();
-         attrIt != FunctionAttributes->end(); ++attrIt) {
-      for (auto &&Attr : Attributes) {
-        if (Attr->parse(*attrIt->getAsString()))
-          FunctionAttrs.emplace(Attr.get());
-      }
-    }
-
-    std::set<std::string> Calls;
-    const llvm::json::Object *CallsObject = FunctionSummary->getObject("calls");
-    bool callsOpaue = *CallsObject->getBoolean("opaque");
-
-    const llvm::json::Array *CallEntries = CallsObject->getArray("functions");
-    for (auto callIt = CallEntries->begin(); callIt != CallEntries->end();
-         ++callIt) {
-      auto *Obj = callIt->getAsObject();
-      Calls.emplace(Obj->getString("id")->str());
-    }
-
-    CreateSummary(std::move(ID), std::move(FunctionAttrs), std::move(Calls),
-                  callsOpaue);
-  }
-}
-
-void SummaryContext::ParseSummaryFromYAML(StringRef content) {
-  std::vector<std::unique_ptr<clang::FunctionSummary>> summaries;
-
-  llvm::yaml::Input YIN(content, this);
-  YIN >> summaries;
-
-  for(auto &&summary : summaries)
-    CreateSummary(summary->getID().str(), summary->getAttributes(), summary->getCalls(), summary->callsOpaqueObject());
-}
-
 bool SummaryContext::ReduceFunctionSummary(FunctionSummary &Function) {
   bool changed = false;
 
diff --git a/clang/lib/Summary/SummarySerialization.cpp b/clang/lib/Summary/SummarySerialization.cpp
new file mode 100644
index 0000000000000..1297db37ee017
--- /dev/null
+++ b/clang/lib/Summary/SummarySerialization.cpp
@@ -0,0 +1,180 @@
+#include "clang/Summary/SummarySerialization.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<clang::FunctionSummary> {
+  static void mapping(IO &io, clang::FunctionSummary &FS) {
+    io.mapRequired("id", FS.ID);
+
+    std::vector<std::string> Attrs;
+    for (auto &&Attr : FS.Attrs)
+      Attrs.emplace_back(Attr->serialize());
+    io.mapRequired("fn_attrs", Attrs);
+    if (!io.outputting()) {
+      std::set<const clang::SummaryAttr *> FunctionAttrs;
+      for (auto parsedAttr : Attrs) {
+        for (auto &&Attr :
+             ((clang::SummaryContext *)io.getContext())->Attributes) {
+          if (Attr->parse(parsedAttr))
+            FunctionAttrs.emplace(Attr.get());
+        }
+      }
+
+      FS.Attrs = std::move(FunctionAttrs);
+    }
+
+    io.mapRequired("opaque_calls", FS.CallsOpaque);
+
+    std::vector<std::string> Calls(FS.Calls.begin(), FS.Calls.end());
+    io.mapRequired("calls", Calls);
+    if (!io.outputting())
+      FS.Calls = std::set(Calls.begin(), Calls.end());
+  }
+};
+
+template <>
+struct SequenceTraits<std::vector<std::unique_ptr<clang::FunctionSummary>>> {
+  static size_t
+  size(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq) {
+    return seq.size();
+  }
+
+  static clang::FunctionSummary &
+  element(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq,
+          size_t index) {
+    if (index >= seq.size()) {
+      seq.resize(index + 1);
+      seq[index].reset(new clang::FunctionSummary("", {}, {}, false));
+    }
+    return *seq[index];
+  }
+};
+
+} // namespace yaml
+} // namespace llvm
+
+namespace clang {
+void JSONSummarySerializer::serialize(
+    const std::vector<std::unique_ptr<FunctionSummary>> &Summaries,
+    raw_ostream &OS) {
+  llvm::json::OStream JOS(OS, 2);
+  JOS.arrayBegin();
+
+  for (auto &&Summary : Summaries) {
+    JOS.object([&] {
+      JOS.attribute("id", llvm::json::Value(Summary->getID()));
+      JOS.attributeObject("attrs", [&] {
+        JOS.attributeArray("function", [&] {
+          for (auto &&Attr : Summary->getAttributes()) {
+            JOS.value(llvm::json::Value(Attr->serialize()));
+          }
+        });
+      });
+      JOS.attributeObject("calls", [&] {
+        JOS.attribute("opaque",
+                      llvm::json::Value(Summary->callsOpaqueObject()));
+        JOS.attributeArray("functions", [&] {
+          for (auto &&Call : Summary->getCalls()) {
+            JOS.object([&] { JOS.attribute("id", llvm::json::Value(Call)); });
+          }
+        });
+      });
+    });
+  }
+
+  JOS.arrayEnd();
+  JOS.flush();
+}
+
+void JSONSummarySerializer::parse(StringRef Buffer) {
+  auto JSON = llvm::json::parse(Buffer);
+  if (!JSON) {
+    llvm::handleAllErrors(JSON.takeError(), [](const llvm::ErrorInfoBase &EI) {
+      std::ignore = EI.message();
+    });
+    return;
+  }
+
+  auto *JSONSummaries = JSON->getAsArray();
+  if (!JSONSummaries)
+    return;
+
+  for (auto &&JSONSummary : *JSONSummaries) {
+    const llvm::json::Object *JSONSummaryObject = JSONSummary.getAsObject();
+    if (!JSONSummaryObject)
+      continue;
+
+    std::optional<StringRef> ID = JSONSummaryObject->getString("id");
+    if (!ID)
+      continue;
+
+    const llvm::json::Object *JSONAttributes =
+        JSONSummaryObject->getObject("attrs");
+    if (!JSONAttributes)
+      continue;
+
+    const llvm::json::Array *JSONFunctionAttributes =
+        JSONAttributes->getArray("function");
+    if (!JSONFunctionAttributes)
+      continue;
+
+    std::set<const SummaryAttr *> FunctionAttrs;
+    for (auto &&JSONAttr : *JSONFunctionAttributes)
+      for (auto &&CtxAttr : SummaryCtx->Attributes)
+        if (auto JSONAttrStr = JSONAttr.getAsString();
+            JSONAttrStr && CtxAttr->parse(*JSONAttrStr))
+          FunctionAttrs.emplace(CtxAttr.get());
+
+    const llvm::json::Object *JSONCallsObject =
+        JSONSummaryObject->getObject("calls");
+    if (!JSONCallsObject)
+      continue;
+
+    std::optional<bool> CallsOpaue = *JSONCallsObject->getBoolean("opaque");
+    if (!CallsOpaue)
+      continue;
+
+    std::set<std::string> Calls;
+    const llvm::json::Array *JSONCallEntries =
+        JSONCallsObject->getArray("functions");
+    if (!JSONCallEntries)
+      continue;
+
+    for (auto &&JSONCall : *JSONCallEntries) {
+      auto *JSONCallObj = JSONCall.getAsObject();
+      if (!JSONCallObj)
+        continue;
+
+      std::optional<StringRef> CallID = JSONCallObj->getString("id");
+      if (!CallID)
+        continue;
+
+      Calls.emplace(CallID->str());
+    }
+
+    SummaryCtx->CreateSummary(ID->str(), std::move(FunctionAttrs),
+                              std::move(Calls), *CallsOpaue);
+  }
+}
+
+void YAMLSummarySerializer::serialize(
+    const std::vector<std::unique_ptr<FunctionSummary>> &Summaries,
+    raw_ostream &OS) {
+  llvm::yaml::Output YOUT(OS);
+  YOUT << ((SummaryContext *)SummaryCtx)->FunctionSummaries;
+  OS.flush();
+}
+
+void YAMLSummarySerializer::parse(StringRef Buffer) {
+  std::vector<std::unique_ptr<clang::FunctionSummary>> summaries;
+
+  llvm::yaml::Input YIN(Buffer, SummaryCtx);
+  YIN >> summaries;
+
+  for (auto &&summary : summaries)
+    SummaryCtx->CreateSummary(summary->getID().str(), summary->getAttributes(),
+                              summary->getCalls(),
+                              summary->callsOpaqueObject());
+}
+} // namespace clang
diff --git a/clang/lib/Summary/SummaryYamlMappings.cpp b/clang/lib/Summary/SummaryYamlMappings.cpp
deleted file mode 100644
index bdd6c1f82ed68..0000000000000
--- a/clang/lib/Summary/SummaryYamlMappings.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-#include "clang/Summary/SummaryYamlMappings.h"
-
-namespace llvm {
-namespace yaml {
-  void MappingTraits<clang::FunctionSummary>::mapping(IO &io, clang::FunctionSummary &FS) {
-    io.mapRequired("id", FS.ID);
-
-    std::vector<std::string> Attrs;
-    for(auto &&Attr : FS.Attrs)
-      Attrs.emplace_back(Attr->serialize());
-    io.mapRequired("fn_attrs", Attrs);
-    if(!io.outputting()) {
-      std::set<const clang::SummaryAttr *> FunctionAttrs;
-      for (auto parsedAttr : Attrs) {
-        for (auto &&Attr : ((clang::SummaryContext*)io.getContext())->Attributes) {
-          if (Attr->parse(parsedAttr))
-            FunctionAttrs.emplace(Attr.get());
-        }
-      }
-
-      FS.Attrs = std::move(FunctionAttrs);
-    }
-
-    io.mapRequired("opaque_calls", FS.CallsOpaque);
-
-    std::vector<std::string> Calls(FS.Calls.begin(), FS.Calls.end());
-    io.mapRequired("calls", Calls);
-    if(!io.outputting())
-      FS.Calls = std::set(Calls.begin(), Calls.end());
-  }
-
-  size_t
-  SequenceTraits<std::vector<std::unique_ptr<clang::FunctionSummary>>>::size(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq) {
-    return seq.size();
-  }
-
-  clang::FunctionSummary &
-  SequenceTraits<std::vector<std::unique_ptr<clang::FunctionSummary>>>::element(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq,
-          size_t index) {
-    if (index >= seq.size()) {
-      seq.resize(index + 1);
-      seq[index].reset(new clang::FunctionSummary("", {}, {}, false));
-    }
-    return *seq[index];
-  }
-} // namespace yaml
-} // namespace llvm
\ No newline at end of file

>From b125329006b6866b216ad70e2c0ce05949acf6db Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Tue, 1 Jul 2025 02:15:18 +0200
Subject: [PATCH 38/48] add support for summary format selection

---
 clang/include/clang/Driver/Options.td              |  3 +++
 clang/include/clang/Frontend/FrontendOptions.h     |  3 +++
 clang/include/clang/Summary/SummarySerialization.h |  1 +
 clang/lib/Driver/ToolChains/Clang.cpp              | 13 ++++++++++++-
 clang/lib/Frontend/CompilerInstance.cpp            | 10 +++++++++-
 clang/lib/Frontend/CompilerInvocation.cpp          | 11 +++++++++++
 clang/lib/Frontend/FrontendAction.cpp              | 13 ++++++++-----
 7 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index c66f098c459c2..5d7736a268057 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5954,6 +5954,9 @@ def emit_summaries : Flag<["-", "--"], "emit-summaries">, Flags<[NoXarchOption]>
   Visibility<[ClangOption]>,
   Alias<emit_summaries_EQ>, AliasArgs<["cwd"]>,
   HelpText<"Alias for --emit-summaries=cwd">;
+def summary_format_EQ : Joined<["-", "--"], "summary-format=">, Flags<[NoXarchOption]>,
+  Visibility<[ClangOption, CC1Option]>,
+  HelpText<"The format of the emitted summaries. Can be set to 'json' (default) or 'yaml'">;
 def save_stats_EQ : Joined<["-", "--"], "save-stats=">, Flags<[NoXarchOption]>,
   HelpText<"Save llvm statistics.">;
 def save_stats : Flag<["-", "--"], "save-stats">, Flags<[NoXarchOption]>,
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index 20c60f823d5f3..12ab9f0c44879 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -540,6 +540,9 @@ class FrontendOptions {
   /// The directory used to load summary files.
   std::string SummaryDirPath;
 
+  /// The format of the emitted summary files.
+  std::string SummaryFormat;
+
 public:
   FrontendOptions()
       : DisableFree(false), RelocatablePCH(false), ShowHelp(false),
diff --git a/clang/include/clang/Summary/SummarySerialization.h b/clang/include/clang/Summary/SummarySerialization.h
index ccadd49776e20..d195c5ba67fb3 100644
--- a/clang/include/clang/Summary/SummarySerialization.h
+++ b/clang/include/clang/Summary/SummarySerialization.h
@@ -30,6 +30,7 @@ class JSONSummarySerializer : public SummarySerializer {
 };
 
 class YAMLSummarySerializer : public SummarySerializer {
+public:
   YAMLSummarySerializer(SummaryContext &SummaryCtx)
       : SummarySerializer(SummaryCtx){};
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index f61bb983900f4..9758ea1789f0f 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5473,6 +5473,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.getLastArg(options::OPT_summaries_dir_EQ))
     Args.AddLastArg(CmdArgs, options::OPT_summaries_dir_EQ);
 
+  std::string SummaryFormat = "json";
+  if (Arg *A = Args.getLastArg(options::OPT_summary_format_EQ)) {
+    // FIXME: This logic is duplicated, so something is clearly wrong here...
+    StringRef Format = A->getValue();
+    if (Format == "json" || Format == "yaml")
+      SummaryFormat = Format;
+
+    Args.AddLastArg(CmdArgs, options::OPT_summary_format_EQ);
+  }
+
+  // FIXME: This arg shouldn't exist...
   if (const Arg *A = Args.getLastArg(options::OPT_emit_summaries_EQ)) {
     llvm::SmallString<10> input;
     for (const auto &II : Inputs) {
@@ -5496,7 +5507,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       }
 
       if (!summaryFile.empty()) {
-        llvm::sys::path::replace_extension(summaryFile, "json");
+        llvm::sys::path::replace_extension(summaryFile, SummaryFormat);
         CmdArgs.push_back(
             Args.MakeArgString(Twine("-summary-file=") + summaryFile));
       }
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 5d8d717e15844..8fa3d490c4bc6 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -765,7 +765,15 @@ void CompilerInstance::createSummaryConsumer() {
 }
 
 void CompilerInstance::createSummarySerializer() {
-  TheSummarySerializer.reset(new JSONSummarySerializer(getSummaryContext()));
+  StringRef Format = getFrontendOpts().SummaryFormat;
+  SummarySerializer *Serializer;
+
+  if (Format == "yaml")
+    Serializer = new YAMLSummarySerializer(getSummaryContext());
+  else
+    Serializer = new JSONSummarySerializer(getSummaryContext());
+
+  TheSummarySerializer.reset(Serializer);
 }
 
 void CompilerInstance::createSema(TranslationUnitKind TUKind,
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 3c23073fc6a8c..38ca191c69c9b 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2985,6 +2985,8 @@ static void GenerateFrontendArgs(const FrontendOptions &Opts,
                 Lang + HeaderUnit + Header + ModuleMap + Preprocessed);
   }
 
+  GenerateArg(Consumer, OPT_summary_format_EQ, Opts.SummaryFormat);
+
   // OPT_INPUT has a unique class, generate it directly.
   for (const auto &Input : Opts.Inputs)
     Consumer(Input.getFile());
@@ -3263,6 +3265,15 @@ static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
     Opts.Inputs.emplace_back(std::move(Inputs[i]), IK, IsSystem);
   }
 
+  Opts.SummaryFormat = "json";
+  if (const Arg *A = Args.getLastArg(OPT_summary_format_EQ)) {
+    StringRef Format = A->getValue();
+
+    // FIXME: don't hardcode these values
+    if (Format == "json" || Format == "yaml")
+      Opts.SummaryFormat = Format;
+  };
+
   Opts.DashX = DashX;
 
   return Diags.getNumErrors() == NumErrorsBefore;
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 389b8e5728b7f..767ef732146d3 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -978,10 +978,12 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
   if (!CI.getFrontendOpts().SummaryDirPath.empty()) {
     // FIXME: this is a quick shortcut so large summaries are only evaluated
     // once, we should think about implementing it in a reasonable way...
-    static const char *reducedCache =
-        "reduced-summary-so-that-we-do-not-have-to-evaluate-it-every-time.json";
-    FileManager &FileMgr = CI.getFileManager();
+    static const char *reducedCacheName =
+        "reduced-summary-so-that-we-do-not-have-to-evaluate-it-every-time";
+    const std::string summaryExtension =
+        '.' + CI.getFrontendOpts().SummaryFormat;
 
+    FileManager &FileMgr = CI.getFileManager();
     StringRef SummaryDirPath = CI.getFrontendOpts().SummaryDirPath;
     if (auto SummaryDir = FileMgr.getOptionalDirectoryRef(SummaryDirPath)) {
       std::error_code EC;
@@ -989,7 +991,8 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
       llvm::sys::path::native(SummaryDir->getName(), DirNative);
 
       llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
-      std::string cacheFile = DirNative.str().str() + '/' + reducedCache;
+      std::string cacheFile =
+          DirNative.str().str() + '/' + reducedCacheName + summaryExtension;
 
       std::vector<std::string> paths;
 
@@ -999,7 +1002,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
         for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC),
                                            DirEnd;
              Dir != DirEnd && !EC; Dir.increment(EC)) {
-          if (llvm::sys::path::extension(Dir->path()) != ".json")
+          if (llvm::sys::path::extension(Dir->path()) != summaryExtension)
             continue;
 
           paths.emplace_back(Dir->path().str());

>From cfd792cc5829a8e7a1a58b44c9f09e45e6333498 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Tue, 1 Jul 2025 02:33:03 +0200
Subject: [PATCH 39/48] format

---
 clang/include/clang/Summary/SummarySerialization.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Summary/SummarySerialization.h b/clang/include/clang/Summary/SummarySerialization.h
index d195c5ba67fb3..e2c3d66b41bdb 100644
--- a/clang/include/clang/Summary/SummarySerialization.h
+++ b/clang/include/clang/Summary/SummarySerialization.h
@@ -11,7 +11,7 @@ class SummarySerializer {
 public:
   SummaryContext *getSummaryCtx() const { return SummaryCtx; }
 
-  SummarySerializer(SummaryContext &SummaryCtx) : SummaryCtx(&SummaryCtx){};
+  SummarySerializer(SummaryContext &SummaryCtx) : SummaryCtx(&SummaryCtx) {};
   virtual ~SummarySerializer() = default;
 
   virtual void serialize(const std::vector<std::unique_ptr<FunctionSummary>> &,
@@ -22,7 +22,7 @@ class SummarySerializer {
 class JSONSummarySerializer : public SummarySerializer {
 public:
   JSONSummarySerializer(SummaryContext &SummaryCtx)
-      : SummarySerializer(SummaryCtx){};
+      : SummarySerializer(SummaryCtx) {};
 
   void serialize(const std::vector<std::unique_ptr<FunctionSummary>> &,
                  raw_ostream &OS) override;
@@ -32,7 +32,7 @@ class JSONSummarySerializer : public SummarySerializer {
 class YAMLSummarySerializer : public SummarySerializer {
 public:
   YAMLSummarySerializer(SummaryContext &SummaryCtx)
-      : SummarySerializer(SummaryCtx){};
+      : SummarySerializer(SummaryCtx) {};
 
   void serialize(const std::vector<std::unique_ptr<FunctionSummary>> &,
                  raw_ostream &OS) override;

>From a2b8e2d856d5ac88157ab5238e42693be9e4e660 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Fri, 4 Jul 2025 16:42:18 +0200
Subject: [PATCH 40/48] some cleanup

---
 clang/include/clang/Driver/Options.td         | 11 +++--
 .../include/clang/Frontend/CompilerInstance.h | 16 +++-----
 .../include/clang/Frontend/FrontendOptions.h  | 10 ++---
 clang/include/clang/Sema/Sema.h               |  2 +-
 clang/lib/Driver/ToolChains/Clang.cpp         | 41 ++++---------------
 clang/lib/Frontend/CompilerInstance.cpp       | 26 +++++++-----
 clang/lib/Frontend/CompilerInvocation.cpp     |  2 +-
 clang/lib/Frontend/FrontendAction.cpp         | 38 ++++++++---------
 clang/lib/Sema/Sema.cpp                       | 10 ++---
 clang/lib/Sema/SemaDecl.cpp                   |  5 +--
 .../Frontend/AnalysisConsumer.cpp             |  7 +---
 clang/lib/Summary/SummaryAttribute.cpp        |  2 +-
 clang/lib/Summary/SummaryConsumer.cpp         |  2 +-
 13 files changed, 68 insertions(+), 104 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 5d7736a268057..625ce2299fa70 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5944,9 +5944,9 @@ def save_temps : Flag<["-", "--"], "save-temps">, Flags<[NoXarchOption]>,
 def summaries_dir_EQ : Joined<["-", "--"], "summaries-dir=">, Flags<[NoXarchOption]>,
   Visibility<[ClangOption, CC1Option]>,
   HelpText<"Read summaries about different functions from this directory">,
-  MarshallingInfoString<FrontendOpts<"SummaryDirPath">>;
+  MarshallingInfoString<FrontendOpts<"ReadSummaryDir">>;
 def emit_summaries_EQ : Joined<["-", "--"], "emit-summaries=">, Flags<[NoXarchOption]>,
-  Visibility<[ClangOption, CC1Option]>,
+  Visibility<[ClangOption]>,
   HelpText<"Save summaries about the different functions. <arg> can be set to 'cwd' for "
   "current working directory, or 'obj' which will save temporary files in the "
   "same directory as the final output file">;
@@ -8164,10 +8164,9 @@ defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists",
   NegFlag<SetFalse, [], [ClangOption], "Don't preserve">,
   BothFlags<[], [ClangOption], " order of LLVM use-lists when serializing">>;
 
-def summary_file : Joined<["-"], "summary-file=">,
-  HelpText<"Filename to write summaries about function definitions to">,
-  MarshallingInfoString<FrontendOpts<"SummaryFile">>;
-
+def emit_summary_dir : Joined<["-"], "emit-summary-dir=">,
+  HelpText<"Directory to write summaries about function definitions to">,
+  MarshallingInfoString<FrontendOpts<"EmitSummaryDir">>;
 def print_stats : Flag<["-"], "print-stats">,
   HelpText<"Print performance metrics and statistics">,
   MarshallingInfoFlag<FrontendOpts<"ShowStats">>;
diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index 43a8e65c1f5eb..4208fb2ae797c 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -632,22 +632,17 @@ class CompilerInstance : public ModuleLoader {
 
   bool hasSummaryContext() { return (bool)SummaryCtx; }
 
-  SummaryContext &getSummaryContext() {
-    assert(SummaryCtx && "Compiler instance has no summary context!");
-    return *SummaryCtx;
-  }
+  SummaryContext *getSummaryContext() { return SummaryCtx.get(); }
 
   void createSummaryContext() { SummaryCtx.reset(new SummaryContext()); }
 
   bool hasSummaryConsumer() const { return (bool)TheSummaryConsumer; }
 
-  SummaryConsumer &getSummaryConsumer() const {
-    assert(TheSummaryConsumer &&
-           "Compiler instance has no code summary consumer!");
-    return *TheSummaryConsumer;
+  SummaryConsumer *getSummaryConsumer() const {
+    return TheSummaryConsumer.get();
   }
 
-  void createSummaryConsumer();
+  void createSummaryConsumer(FrontendInputFile Input);
 
   bool hasSummarySerializer() const { return (bool)TheSummarySerializer; }
 
@@ -786,8 +781,7 @@ class CompilerInstance : public ModuleLoader {
 
   /// Create the Sema object to be used for parsing.
   void createSema(TranslationUnitKind TUKind,
-                  CodeCompleteConsumer *CompletionConsumer,
-                  SummaryConsumer *SummaryConsumer = nullptr);
+                  CodeCompleteConsumer *CompletionConsumer);
 
   /// Create the frontend timer and replace any existing one with it.
   void createFrontendTimer();
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index 12ab9f0c44879..0e06c13000188 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -534,13 +534,13 @@ class FrontendOptions {
   /// minimization hints.
   std::string DumpMinimizationHintsPath;
 
-  /// Filename to write summaries about function definitions to.
-  std::string SummaryFile;
+  /// The directory used to write summary files to.
+  std::string EmitSummaryDir;
 
-  /// The directory used to load summary files.
-  std::string SummaryDirPath;
+  /// The directory used to load summary files from.
+  std::string ReadSummaryDir;
 
-  /// The format of the emitted summary files.
+  /// The format of the summary files.
   std::string SummaryFormat;
 
 public:
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 1a4d2e99685e0..34e3dc7778df2 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -1265,7 +1265,7 @@ class Sema final : public SemaBase {
   SourceManager &SourceMgr;
   api_notes::APINotesManager APINotes;
   SummaryContext *SummaryCtx;
-  SummaryConsumer *SummaryCnsmr;
+  SummaryConsumer *TheSummaryConsumer;
 
   /// A RAII object to enter scope of a compound statement.
   class CompoundScopeRAII {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 9758ea1789f0f..bdeb749699bea 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5473,45 +5473,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.getLastArg(options::OPT_summaries_dir_EQ))
     Args.AddLastArg(CmdArgs, options::OPT_summaries_dir_EQ);
 
-  std::string SummaryFormat = "json";
-  if (Arg *A = Args.getLastArg(options::OPT_summary_format_EQ)) {
-    // FIXME: This logic is duplicated, so something is clearly wrong here...
-    StringRef Format = A->getValue();
-    if (Format == "json" || Format == "yaml")
-      SummaryFormat = Format;
-
+  if (Arg *A = Args.getLastArg(options::OPT_summary_format_EQ))
     Args.AddLastArg(CmdArgs, options::OPT_summary_format_EQ);
-  }
 
-  // FIXME: This arg shouldn't exist...
   if (const Arg *A = Args.getLastArg(options::OPT_emit_summaries_EQ)) {
-    llvm::SmallString<10> input;
-    for (const auto &II : Inputs) {
-      if (!II.isFilename())
-        continue;
-
-      input = II.getFilename();
-      break;
-    }
+    std::string EmitSummaryDir = ".";
 
-    if (!input.empty()) {
-      Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
-      StringRef filename = llvm::sys::path::filename(input);
-      llvm::SmallString<10> summaryFile;
+    if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
+        A->containsValue("obj") && FinalOutput)
+      EmitSummaryDir = llvm::sys::path::parent_path(FinalOutput->getValue());
 
-      if (A->containsValue("cwd") || !FinalOutput) {
-        summaryFile = filename;
-      } else if (A->containsValue("obj") && FinalOutput) {
-        summaryFile = llvm::sys::path::parent_path(FinalOutput->getValue());
-        llvm::sys::path::append(summaryFile, filename);
-      }
-
-      if (!summaryFile.empty()) {
-        llvm::sys::path::replace_extension(summaryFile, SummaryFormat);
-        CmdArgs.push_back(
-            Args.MakeArgString(Twine("-summary-file=") + summaryFile));
-      }
-    }
+    CmdArgs.push_back(
+        Args.MakeArgString(Twine("-emit-summary-dir=") + EmitSummaryDir));
   }
 
   auto *MemProfArg = Args.getLastArg(options::OPT_fmemory_profile,
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 8fa3d490c4bc6..32c1c564ab48e 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -743,11 +743,16 @@ CompilerInstance::createCodeCompletionConsumer(Preprocessor &PP,
   return new PrintingCodeCompleteConsumer(Opts, OS);
 }
 
-void CompilerInstance::createSummaryConsumer() {
-  const std::string &SummaryFile = getFrontendOpts().SummaryFile;
-  if (SummaryFile.empty())
+void CompilerInstance::createSummaryConsumer(FrontendInputFile Input) {
+  StringRef EmitSummaryDir = getFrontendOpts().EmitSummaryDir;
+  if (EmitSummaryDir.empty() || !Input.isFile())
     return;
 
+  llvm::SmallString<32> SummaryFile = EmitSummaryDir;
+  llvm::sys::path::append(SummaryFile, Input.getFile());
+  llvm::sys::path::replace_extension(SummaryFile,
+                                     getFrontendOpts().SummaryFormat);
+
   std::error_code EC;
   SummaryOS.reset(new llvm::raw_fd_ostream(SummaryFile, EC,
                                            llvm::sys::fs::CD_CreateAlways));
@@ -768,21 +773,22 @@ void CompilerInstance::createSummarySerializer() {
   StringRef Format = getFrontendOpts().SummaryFormat;
   SummarySerializer *Serializer;
 
+  if (!hasSummaryContext())
+    createSummaryContext();
+
   if (Format == "yaml")
-    Serializer = new YAMLSummarySerializer(getSummaryContext());
+    Serializer = new YAMLSummarySerializer(*getSummaryContext());
   else
-    Serializer = new JSONSummarySerializer(getSummaryContext());
+    Serializer = new JSONSummarySerializer(*getSummaryContext());
 
   TheSummarySerializer.reset(Serializer);
 }
 
 void CompilerInstance::createSema(TranslationUnitKind TUKind,
-                                  CodeCompleteConsumer *CompletionConsumer,
-                                  SummaryConsumer *SummaryConsumer) {
+                                  CodeCompleteConsumer *CompletionConsumer) {
   TheSema.reset(new Sema(getPreprocessor(), getASTContext(), getASTConsumer(),
-                         TUKind, CompletionConsumer,
-                         hasSummaryContext() ? &getSummaryContext() : nullptr,
-                         SummaryConsumer));
+                         TUKind, CompletionConsumer, getSummaryContext(),
+                         getSummaryConsumer()));
 
   // Set up API notes.
   TheSema->APINotes.setSwiftVersion(getAPINotesOpts().SwiftVersion);
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 38ca191c69c9b..90ef9095c95d4 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3270,7 +3270,7 @@ static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
     StringRef Format = A->getValue();
 
     // FIXME: don't hardcode these values
-    if (Format == "json" || Format == "yaml")
+    if (Format == "yaml")
       Opts.SummaryFormat = Format;
   };
 
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 767ef732146d3..4ccb6c8abd86b 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -966,16 +966,21 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     }
   }
 
-  bool ProcessesSummaries = !CI.getFrontendOpts().SummaryDirPath.empty() ||
-                            !CI.getFrontendOpts().SummaryFile.empty();
-  if (ProcessesSummaries && !CI.hasSummaryContext())
-    CI.createSummaryContext();
+  bool EmitSummaries = !CI.getFrontendOpts().EmitSummaryDir.empty();
+  bool ReadSummaries = !CI.getFrontendOpts().ReadSummaryDir.empty();
 
-  if (ProcessesSummaries && !CI.hasSummarySerializer())
-    CI.createSummarySerializer();
+  if (EmitSummaries || ReadSummaries) {
+    if (!CI.hasSummaryContext())
+      CI.createSummaryContext();
 
-  // FIXME: cleanup and lookup dirs recursively
-  if (!CI.getFrontendOpts().SummaryDirPath.empty()) {
+    if (!CI.hasSummarySerializer())
+      CI.createSummarySerializer();
+  }
+
+  if (EmitSummaries)
+    CI.createSummaryConsumer(getCurrentInput());
+
+  if (ReadSummaries) {
     // FIXME: this is a quick shortcut so large summaries are only evaluated
     // once, we should think about implementing it in a reasonable way...
     static const char *reducedCacheName =
@@ -984,7 +989,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
         '.' + CI.getFrontendOpts().SummaryFormat;
 
     FileManager &FileMgr = CI.getFileManager();
-    StringRef SummaryDirPath = CI.getFrontendOpts().SummaryDirPath;
+    StringRef SummaryDirPath = CI.getFrontendOpts().ReadSummaryDir;
     if (auto SummaryDir = FileMgr.getOptionalDirectoryRef(SummaryDirPath)) {
       std::error_code EC;
       SmallString<128> DirNative;
@@ -1017,13 +1022,13 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
         CI.getSummarySerializer().parse(buffer.str());
       }
 
-      CI.getSummaryContext().ReduceSummaries();
+      CI.getSummaryContext()->ReduceSummaries();
 
       if (!FS.exists(cacheFile)) {
         // FIXME: very quick printing of the summary to the cache file
         llvm::raw_fd_ostream fd(cacheFile, EC, llvm::sys::fs::CD_CreateAlways);
         CI.getSummarySerializer().serialize(
-            CI.getSummaryContext().FunctionSummaries, fd);
+            CI.getSummaryContext()->FunctionSummaries, fd);
       }
     }
   }
@@ -1399,17 +1404,8 @@ void ASTFrontendAction::ExecuteAction() {
   if (CI.hasCodeCompletionConsumer())
     CompletionConsumer = &CI.getCodeCompletionConsumer();
 
-  if (!CI.getFrontendOpts().SummaryFile.empty())
-    CI.createSummaryConsumer();
-
-  // Use a code summary consumer?
-  SummaryConsumer *SummaryConsumer = nullptr;
-  if (CI.hasSummaryConsumer())
-    SummaryConsumer = &CI.getSummaryConsumer();
-
   if (!CI.hasSema())
-    CI.createSema(getTranslationUnitKind(), CompletionConsumer,
-                  SummaryConsumer);
+    CI.createSema(getTranslationUnitKind(), CompletionConsumer);
 
   ParseAST(CI.getSema(), CI.getFrontendOpts().ShowStats,
            CI.getFrontendOpts().SkipFunctionBodies);
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 5c5fb005a3172..fd29145d4d773 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -255,7 +255,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
       CurFPFeatures(pp.getLangOpts()), LangOpts(pp.getLangOpts()), PP(pp),
       Context(ctxt), Consumer(consumer), Diags(PP.getDiagnostics()),
       SourceMgr(PP.getSourceManager()), APINotes(SourceMgr, LangOpts),
-      SummaryCtx(SummaryCtx), SummaryCnsmr(SummaryConsumer),
+      SummaryCtx(SummaryCtx), TheSummaryConsumer(SummaryConsumer),
       AnalysisWarnings(*this), ThreadSafetyDeclCache(nullptr),
       LateTemplateParser(nullptr), LateTemplateParserCleanup(nullptr),
       OpaqueParser(nullptr), CurContext(nullptr), ExternalSource(nullptr),
@@ -1148,8 +1148,8 @@ void Sema::ActOnStartOfTranslationUnit() {
       getLangOpts().getCompilingModule() == LangOptions::CMK_HeaderUnit)
     HandleStartOfHeaderUnit();
 
-  if (SummaryCnsmr)
-    SummaryCnsmr->ProcessStartOfSourceFile();
+  if (TheSummaryConsumer)
+    TheSummaryConsumer->ProcessStartOfSourceFile();
 }
 
 void Sema::ActOnEndOfTranslationUnitFragment(TUFragmentKind Kind) {
@@ -1225,8 +1225,8 @@ void Sema::ActOnEndOfTranslationUnit() {
   assert(DelayedDiagnostics.getCurrentPool() == nullptr
          && "reached end of translation unit with a pool attached?");
 
-  if (SummaryCnsmr)
-    SummaryCnsmr->ProcessEndOfSourceFile();
+  if (TheSummaryConsumer)
+    TheSummaryConsumer->ProcessEndOfSourceFile();
 
   // If code completion is enabled, don't perform any end-of-translation-unit
   // work.
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 5d427ced06159..86892920dd479 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -16695,11 +16695,10 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
   if (FD && !FD->isDeleted())
     checkTypeSupport(FD->getType(), FD->getLocation(), FD);
 
-  // FIXME: checking this should be done by the summary context
-  if (SummaryCnsmr && !LateTemplateParser && FD &&
+  if (TheSummaryConsumer && !LateTemplateParser && FD &&
       !SourceMgr.isInSystemHeader(FD->getLocation()) && !FD->getBuiltinID()) {
     SummaryCtx->SummarizeFunctionBody(FD);
-    SummaryCnsmr->ProcessFunctionSummary(*SummaryCtx->GetSummary(FD));
+    TheSummaryConsumer->ProcessFunctionSummary(*SummaryCtx->GetSummary(FD));
   }
 
   return dcl;
diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index 06b7f434187df..b5b95c2ab7ae1 100644
--- a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -128,8 +128,8 @@ class AnalysisConsumer : public AnalysisASTConsumer,
                    std::unique_ptr<CodeInjector> injector)
       : RecVisitorMode(0), RecVisitorBR(nullptr), Ctx(nullptr),
         PP(CI.getPreprocessor()), OutDir(outdir), Opts(opts), Plugins(plugins),
-        Injector(std::move(injector)), CTU(CI), SummaryCtx(nullptr),
-        MacroExpansions(CI.getLangOpts()) {
+        Injector(std::move(injector)), CTU(CI),
+        SummaryCtx(CI.getSummaryContext()), MacroExpansions(CI.getLangOpts()) {
     EntryPointStat::lockRegistry();
     DigestAnalyzerOptions();
 
@@ -153,9 +153,6 @@ class AnalysisConsumer : public AnalysisASTConsumer,
     if (Opts.ShouldDisplayMacroExpansions)
       MacroExpansions.registerForPreprocessor(PP);
 
-    if (CI.hasSummaryContext())
-      SummaryCtx = &CI.getSummaryContext();
-
     // Visitor options.
     ShouldWalkTypesOfTypeLocs = false;
   }
diff --git a/clang/lib/Summary/SummaryAttribute.cpp b/clang/lib/Summary/SummaryAttribute.cpp
index e6b4a3e2633df..e1ad0a0eb6f51 100644
--- a/clang/lib/Summary/SummaryAttribute.cpp
+++ b/clang/lib/Summary/SummaryAttribute.cpp
@@ -93,4 +93,4 @@ bool NoWritePtrParameterAttr::merge(const FunctionSummary &Caller,
   return !Caller.callsOpaqueObject() && Caller.getAttributes().count(this) &&
          Callee && Callee->getAttributes().count(this);
 }
-} // namespace clang
\ No newline at end of file
+} // namespace clang
diff --git a/clang/lib/Summary/SummaryConsumer.cpp b/clang/lib/Summary/SummaryConsumer.cpp
index 2d7a9f8a1c2fc..71a42ba0eb500 100644
--- a/clang/lib/Summary/SummaryConsumer.cpp
+++ b/clang/lib/Summary/SummaryConsumer.cpp
@@ -11,4 +11,4 @@ SerializingSummaryConsumer::SerializingSummaryConsumer(
 void SerializingSummaryConsumer::ProcessEndOfSourceFile() {
   Serializer->serialize(SummaryCtx->FunctionSummaries, OS);
 }
-} // namespace clang
\ No newline at end of file
+} // namespace clang

>From d3923cedc2f38f171217dd5410158e4174dc8c9c Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Fri, 4 Jul 2025 18:41:01 +0200
Subject: [PATCH 41/48] don't generate summary format argument if not needed

---
 clang/lib/Frontend/CompilerInvocation.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 90ef9095c95d4..4cea62ebbb0ef 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2985,7 +2985,8 @@ static void GenerateFrontendArgs(const FrontendOptions &Opts,
                 Lang + HeaderUnit + Header + ModuleMap + Preprocessed);
   }
 
-  GenerateArg(Consumer, OPT_summary_format_EQ, Opts.SummaryFormat);
+  if (!Opts.EmitSummaryDir.empty() || !Opts.ReadSummaryDir.empty())
+    GenerateArg(Consumer, OPT_summary_format_EQ, Opts.SummaryFormat);
 
   // OPT_INPUT has a unique class, generate it directly.
   for (const auto &Input : Opts.Inputs)

>From 449adeb22ad3397304d91cd9c6767d18a7acee12 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Fri, 4 Jul 2025 19:43:30 +0200
Subject: [PATCH 42/48] emit summaries even if obj path has no base dir path

---
 clang/lib/Driver/ToolChains/Clang.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index bdeb749699bea..a3af722d0ada9 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5473,15 +5473,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.getLastArg(options::OPT_summaries_dir_EQ))
     Args.AddLastArg(CmdArgs, options::OPT_summaries_dir_EQ);
 
-  if (Arg *A = Args.getLastArg(options::OPT_summary_format_EQ))
+  if (Args.getLastArg(options::OPT_summary_format_EQ))
     Args.AddLastArg(CmdArgs, options::OPT_summary_format_EQ);
 
   if (const Arg *A = Args.getLastArg(options::OPT_emit_summaries_EQ)) {
     std::string EmitSummaryDir = ".";
 
     if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
-        A->containsValue("obj") && FinalOutput)
-      EmitSummaryDir = llvm::sys::path::parent_path(FinalOutput->getValue());
+        A->containsValue("obj") && FinalOutput) {
+      StringRef ObjDir = llvm::sys::path::parent_path(FinalOutput->getValue());
+      if (!ObjDir.empty())
+        EmitSummaryDir = ObjDir;
+    }
 
     CmdArgs.push_back(
         Args.MakeArgString(Twine("-emit-summary-dir=") + EmitSummaryDir));

>From 8449340b5bd5a87cc8ca539400a46843108cd90b Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sun, 6 Jul 2025 03:27:53 +0200
Subject: [PATCH 43/48] binary serialization prototype

---
 clang/include/clang/Driver/Options.td         |   2 +-
 .../clang/Summary/SummarySerialization.h      |  59 ++++
 clang/lib/Frontend/CompilerInstance.cpp       |   6 +-
 clang/lib/Frontend/CompilerInvocation.cpp     |   2 +-
 clang/lib/Frontend/FrontendAction.cpp         |   4 +-
 clang/lib/Summary/SummarySerialization.cpp    | 314 ++++++++++++++++++
 6 files changed, 383 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 625ce2299fa70..052ee8cd42bf0 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5956,7 +5956,7 @@ def emit_summaries : Flag<["-", "--"], "emit-summaries">, Flags<[NoXarchOption]>
   HelpText<"Alias for --emit-summaries=cwd">;
 def summary_format_EQ : Joined<["-", "--"], "summary-format=">, Flags<[NoXarchOption]>,
   Visibility<[ClangOption, CC1Option]>,
-  HelpText<"The format of the emitted summaries. Can be set to 'json' (default) or 'yaml'">;
+  HelpText<"The format of the emitted summaries. Can be set to 'json' (default), 'yaml', or 'binary'">;
 def save_stats_EQ : Joined<["-", "--"], "save-stats=">, Flags<[NoXarchOption]>,
   HelpText<"Save llvm statistics.">;
 def save_stats : Flag<["-", "--"], "save-stats">, Flags<[NoXarchOption]>,
diff --git a/clang/include/clang/Summary/SummarySerialization.h b/clang/include/clang/Summary/SummarySerialization.h
index e2c3d66b41bdb..a2140f96c4a63 100644
--- a/clang/include/clang/Summary/SummarySerialization.h
+++ b/clang/include/clang/Summary/SummarySerialization.h
@@ -2,6 +2,8 @@
 #define LLVM_CLANG_SUMMARY_SUMMARYSERIALIZATION_H
 
 #include "clang/Summary/SummaryContext.h"
+#include "llvm/Bitstream/BitstreamReader.h"
+#include "llvm/Bitstream/BitstreamWriter.h"
 
 namespace clang {
 class SummarySerializer {
@@ -38,6 +40,63 @@ class YAMLSummarySerializer : public SummarySerializer {
                  raw_ostream &OS) override;
   void parse(StringRef) override;
 };
+
+class BinarySummarySerializer : public SummarySerializer {
+  enum BlockIDs {
+    ATTRIBUTE_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID,
+    IDENTIFIER_BLOCK_ID,
+    SUMMARY_BLOCK_ID
+  };
+
+  enum AttributeRecordTypes {
+    ATTR = 1,
+  };
+
+  enum IdentifierRecordTypes {
+    IDENTIFIER = 1,
+  };
+
+  enum SummaryRecordTypes { FUNCTION = 1 };
+
+  // FIXME: get rid of this global state
+  std::map<const SummaryAttr *, uint64_t> AttrIDs;
+  std::map<std::string, uint64_t> FunctionIDs;
+
+  std::vector<const SummaryAttr *> ParsedAttrIDs;
+  std::vector<std::string> ParsedFunctionIDs;
+
+  llvm::SmallVector<char, 32> Buffer;
+  llvm::BitstreamWriter Stream;
+
+  void PopulateBlockInfo();
+  void EmitAttributeBlock();
+  void EmitIdentifierBlock();
+  void EmitSummaryBlock();
+
+  void EmitBlock(unsigned ID, const char *Name);
+  void EmitRecord(unsigned ID, const char *Name);
+
+  llvm::Error handleBlockStartCommon(unsigned ID,
+                                     llvm::BitstreamCursor &Stream);
+  llvm::Error handleBlockRecordsCommon(
+      llvm::BitstreamCursor &Stream,
+      llvm::function_ref<void(const SmallVector<uint64_t, 64> &)>);
+
+  llvm::Error parseAttributeBlock(llvm::BitstreamCursor &Stream);
+  llvm::Error parseIdentifierBlock(llvm::BitstreamCursor &Stream);
+  llvm::Error parseSummaryBlock(llvm::BitstreamCursor &Stream);
+  llvm::Error parseBlock(unsigned ID, llvm::BitstreamCursor &Stream);
+  llvm::Error parseImpl(StringRef Buffer);
+
+public:
+  BinarySummarySerializer(SummaryContext &SummaryCtx)
+      : SummarySerializer(SummaryCtx), Stream(Buffer) {};
+
+  void serialize(const std::vector<std::unique_ptr<FunctionSummary>> &,
+                 raw_ostream &OS) override;
+  void parse(StringRef) override;
+};
+
 } // namespace clang
 
 #endif // LLVM_CLANG_SUMMARY_SUMMARYSERIALIZATION_H
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 32c1c564ab48e..400cafa96e4d1 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -750,8 +750,10 @@ void CompilerInstance::createSummaryConsumer(FrontendInputFile Input) {
 
   llvm::SmallString<32> SummaryFile = EmitSummaryDir;
   llvm::sys::path::append(SummaryFile, Input.getFile());
+
+  StringRef Format = getFrontendOpts().SummaryFormat;
   llvm::sys::path::replace_extension(SummaryFile,
-                                     getFrontendOpts().SummaryFormat);
+                                     Format == "binary" ? "summary" : Format);
 
   std::error_code EC;
   SummaryOS.reset(new llvm::raw_fd_ostream(SummaryFile, EC,
@@ -778,6 +780,8 @@ void CompilerInstance::createSummarySerializer() {
 
   if (Format == "yaml")
     Serializer = new YAMLSummarySerializer(*getSummaryContext());
+  else if (Format == "binary")
+    Serializer = new BinarySummarySerializer(*getSummaryContext());
   else
     Serializer = new JSONSummarySerializer(*getSummaryContext());
 
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 4cea62ebbb0ef..1d3006505d110 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3271,7 +3271,7 @@ static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
     StringRef Format = A->getValue();
 
     // FIXME: don't hardcode these values
-    if (Format == "yaml")
+    if (Format == "yaml" || Format == "binary")
       Opts.SummaryFormat = Format;
   };
 
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 4ccb6c8abd86b..3c84450ee9281 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -986,7 +986,9 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     static const char *reducedCacheName =
         "reduced-summary-so-that-we-do-not-have-to-evaluate-it-every-time";
     const std::string summaryExtension =
-        '.' + CI.getFrontendOpts().SummaryFormat;
+        '.' + (CI.getFrontendOpts().SummaryFormat == "binary"
+                   ? "summary"
+                   : CI.getFrontendOpts().SummaryFormat);
 
     FileManager &FileMgr = CI.getFileManager();
     StringRef SummaryDirPath = CI.getFrontendOpts().ReadSummaryDir;
diff --git a/clang/lib/Summary/SummarySerialization.cpp b/clang/lib/Summary/SummarySerialization.cpp
index 1297db37ee017..95d68f80f3327 100644
--- a/clang/lib/Summary/SummarySerialization.cpp
+++ b/clang/lib/Summary/SummarySerialization.cpp
@@ -1,4 +1,5 @@
 #include "clang/Summary/SummarySerialization.h"
+#include "llvm/Bitstream/BitstreamReader.h"
 #include "llvm/Support/JSON.h"
 
 namespace llvm {
@@ -177,4 +178,317 @@ void YAMLSummarySerializer::parse(StringRef Buffer) {
                               summary->getCalls(),
                               summary->callsOpaqueObject());
 }
+
+void BinarySummarySerializer::PopulateBlockInfo() {
+  Stream.EnterBlockInfoBlock();
+  EmitBlock(ATTRIBUTE_BLOCK_ID, "ATTRIBUTES");
+  EmitRecord(ATTR, "ATTR");
+  EmitBlock(IDENTIFIER_BLOCK_ID, "IDENTIFIERS");
+  EmitRecord(IDENTIFIER, "IDENTIFIER");
+  EmitBlock(SUMMARY_BLOCK_ID, "SUMMARIES");
+  EmitRecord(FUNCTION, "FUNCTION");
+  Stream.ExitBlock();
+}
+
+void BinarySummarySerializer::EmitAttributeBlock() {
+  Stream.EnterSubblock(ATTRIBUTE_BLOCK_ID, 3);
+
+  auto Abv = std::make_shared<llvm::BitCodeAbbrev>();
+  Abv->Add(llvm::BitCodeAbbrevOp(ATTR));
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array));
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, 8));
+  unsigned Abbrev = Stream.EmitAbbrev(std::move(Abv));
+
+  uint64_t ID = 0;
+  uint64_t Record[] = {ATTR};
+
+  for (auto &&Attr : SummaryCtx->Attributes) {
+    AttrIDs[Attr.get()] = ID++;
+    Stream.EmitRecordWithArray(Abbrev, Record, Attr->serialize());
+  }
+
+  Stream.ExitBlock();
+}
+
+void BinarySummarySerializer::EmitIdentifierBlock() {
+  Stream.EnterSubblock(IDENTIFIER_BLOCK_ID, 3);
+
+  auto Abv = std::make_shared<llvm::BitCodeAbbrev>();
+  Abv->Add(llvm::BitCodeAbbrevOp(IDENTIFIER));
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array));
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, 8));
+  unsigned Abbrev = Stream.EmitAbbrev(std::move(Abv));
+
+  uint64_t ID = 0;
+  uint64_t Record[] = {IDENTIFIER};
+
+  for (auto &&Summary : SummaryCtx->FunctionSummaries) {
+    FunctionIDs[Summary->getID().str()] = ID++;
+    Stream.EmitRecordWithArray(Abbrev, Record, Summary->getID());
+
+    for (auto &&Call : Summary->getCalls()) {
+      if (FunctionIDs.count(Call))
+        continue;
+
+      FunctionIDs[Call] = ID++;
+      Stream.EmitRecordWithArray(Abbrev, Record, Call);
+    }
+  }
+
+  Stream.ExitBlock();
+}
+
+void BinarySummarySerializer::EmitSummaryBlock() {
+  Stream.EnterSubblock(SUMMARY_BLOCK_ID, 3);
+
+  auto Abv = std::make_shared<llvm::BitCodeAbbrev>();
+  Abv->Add(llvm::BitCodeAbbrevOp(FUNCTION));
+  // The number of attributes.
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, 7));
+  // The number of callees.
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, 32));
+  // Whether there are opaque callees or not.
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, 1));
+  // An array of the following form: [ID, Attr0...AttrN, Callee0...CalleeN]
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array));
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, 32));
+  unsigned Abbrev = Stream.EmitAbbrev(std::move(Abv));
+
+  for (auto &&Summary : SummaryCtx->FunctionSummaries) {
+    SmallVector<uint64_t, 64> Record;
+
+    Record.push_back(Summary->getAttributes().size());
+    Record.push_back(Summary->getCalls().size());
+    Record.push_back(Summary->callsOpaqueObject());
+
+    Record.push_back(1 + Summary->getAttributes().size() +
+                     Summary->getCalls().size());
+    Record.push_back(FunctionIDs[Summary->getID().str()]);
+    for (auto &&Attr : Summary->getAttributes())
+      Record.push_back(AttrIDs[Attr]);
+    for (auto &&Call : Summary->getCalls())
+      Record.push_back(FunctionIDs[Call]);
+
+    Stream.EmitRecord(FUNCTION, Record, Abbrev);
+  }
+
+  Stream.ExitBlock();
+}
+
+void BinarySummarySerializer::EmitBlock(unsigned ID, const char *Name) {
+  SmallVector<uint64_t, 64> Buffer;
+  Buffer.push_back(ID);
+  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Buffer);
+
+  Buffer.clear();
+  while (*Name)
+    Buffer.push_back(*Name++);
+  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Buffer);
+}
+
+void BinarySummarySerializer::EmitRecord(unsigned ID, const char *Name) {
+  SmallVector<uint64_t, 64> Buffer;
+  Buffer.push_back(ID);
+  while (*Name)
+    Buffer.push_back(*Name++);
+  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Buffer);
+}
+
+void BinarySummarySerializer::serialize(
+    const std::vector<std::unique_ptr<FunctionSummary>> &, raw_ostream &OS) {
+  Stream.Emit((unsigned)'C', 8);
+  Stream.Emit((unsigned)'T', 8);
+  Stream.Emit((unsigned)'U', 8);
+  Stream.Emit((unsigned)'S', 8);
+
+  PopulateBlockInfo();
+  EmitAttributeBlock();
+  EmitIdentifierBlock();
+  EmitSummaryBlock();
+
+  Stream.FlushToWord();
+  OS << Buffer;
+  OS.flush();
+}
+
+llvm::Error
+BinarySummarySerializer::handleBlockStartCommon(unsigned ID,
+                                                llvm::BitstreamCursor &Stream) {
+  unsigned NumWords = 0;
+  if (llvm::Error Err = Stream.EnterSubBlock(ID, &NumWords))
+    return Err;
+
+  llvm::BitstreamEntry Entry;
+  if (llvm::Error E =
+          Stream.advance(llvm::BitstreamCursor::AF_DontAutoprocessAbbrevs)
+              .moveInto(Entry))
+    return E;
+
+  if (Entry.Kind != llvm::BitstreamEntry::Record &&
+      Entry.ID != llvm::bitc::DEFINE_ABBREV)
+    return llvm::createStringError("expected abbrev");
+
+  if (llvm::Error Err = Stream.ReadAbbrevRecord())
+    return Err;
+
+  return llvm::Error::success();
+}
+
+llvm::Error BinarySummarySerializer::handleBlockRecordsCommon(
+    llvm::BitstreamCursor &Stream,
+    llvm::function_ref<void(const SmallVector<uint64_t, 64> &)> Callback) {
+  while (true) {
+    llvm::BitstreamEntry Entry;
+    if (llvm::Error E =
+            Stream.advance(llvm::BitstreamCursor::AF_DontAutoprocessAbbrevs)
+                .moveInto(Entry))
+      return E;
+
+    if (Entry.Kind == llvm::BitstreamEntry::EndBlock)
+      return llvm::Error::success();
+
+    if (Entry.Kind != llvm::BitstreamEntry::Record)
+      return llvm::createStringError("expected record");
+
+    SmallVector<uint64_t, 64> Record;
+    unsigned Code;
+    if (llvm::Error E = Stream.readRecord(Entry.ID, Record).moveInto(Code))
+      return E;
+
+    Callback(Record);
+  }
+}
+
+llvm::Error
+BinarySummarySerializer::parseAttributeBlock(llvm::BitstreamCursor &Stream) {
+  if (llvm::Error Err = handleBlockStartCommon(ATTRIBUTE_BLOCK_ID, Stream))
+    return Err;
+
+  ParsedAttrIDs.clear();
+  if (llvm::Error Err = handleBlockRecordsCommon(Stream, [&](auto &&Record) {
+        for (auto &&CtxAttr : SummaryCtx->Attributes) {
+          llvm::SmallString<64> AttributeStr(Record.begin(), Record.end());
+
+          if (CtxAttr->parse(AttributeStr.str())) {
+            ParsedAttrIDs.push_back(CtxAttr.get());
+            break;
+          }
+        }
+      }))
+    return Err;
+
+  return llvm::Error::success();
+}
+
+llvm::Error
+BinarySummarySerializer::parseIdentifierBlock(llvm::BitstreamCursor &Stream) {
+  if (llvm::Error Err = handleBlockStartCommon(IDENTIFIER_BLOCK_ID, Stream))
+    return Err;
+
+  ParsedFunctionIDs.clear();
+  if (llvm::Error Err = handleBlockRecordsCommon(Stream, [&](auto &&Record) {
+        llvm::SmallString<64> IdentifierStr(Record.begin(), Record.end());
+        ParsedFunctionIDs.emplace_back(IdentifierStr.str().str());
+      }))
+    return Err;
+
+  return llvm::Error::success();
+}
+
+llvm::Error
+BinarySummarySerializer::parseSummaryBlock(llvm::BitstreamCursor &Stream) {
+  if (llvm::Error Err = handleBlockStartCommon(SUMMARY_BLOCK_ID, Stream))
+    return Err;
+
+  if (llvm::Error Err = handleBlockRecordsCommon(Stream, [&](auto &&Record) {
+        int AttrCnt = Record[0];
+        int CallCnt = Record[1];
+        bool Opaque = Record[2];
+        int ID = Record[4];
+        int I = 0;
+
+        std::set<const SummaryAttr *> Attrs;
+        while (AttrCnt) {
+          Attrs.emplace(ParsedAttrIDs[Record[5 + I]]);
+          ++I;
+          --AttrCnt;
+        }
+
+        std::set<std::string> Calls;
+        while (CallCnt) {
+          Calls.emplace(ParsedFunctionIDs[Record[5 + I]]);
+          ++I;
+          --CallCnt;
+        }
+
+        SummaryCtx->CreateSummary(ParsedFunctionIDs[ID], std::move(Attrs),
+                                  std::move(Calls), Opaque);
+      }))
+    return Err;
+
+  return llvm::Error::success();
+}
+
+llvm::Error BinarySummarySerializer::parseBlock(unsigned ID,
+                                                llvm::BitstreamCursor &Stream) {
+  if (ID == llvm::bitc::BLOCKINFO_BLOCK_ID) {
+    std::optional<llvm::BitstreamBlockInfo> NewBlockInfo;
+    if (llvm::Error Err = Stream.ReadBlockInfoBlock().moveInto(NewBlockInfo))
+      return Err;
+    if (!NewBlockInfo)
+      return llvm::createStringError("expected block info");
+
+    return llvm::Error::success();
+  }
+
+  if (ID == ATTRIBUTE_BLOCK_ID)
+    return parseAttributeBlock(Stream);
+
+  if (ID == IDENTIFIER_BLOCK_ID)
+    return parseIdentifierBlock(Stream);
+
+  if (ID == SUMMARY_BLOCK_ID)
+    return parseSummaryBlock(Stream);
+
+  return llvm::createStringError("unexpected block");
+}
+
+llvm::Error BinarySummarySerializer::parseImpl(StringRef Buffer) {
+  llvm::BitstreamCursor Stream(Buffer);
+
+  llvm::SimpleBitstreamCursor::word_t Magic[4] = {0};
+  unsigned char ExpectedMagic[] = {'C', 'T', 'U', 'S'};
+  for (int i = 0; i < 4; ++i) {
+    if (llvm::Error Err = Stream.Read(8).moveInto(Magic[i]))
+      return Err;
+
+    if (Magic[i] != ExpectedMagic[i])
+      return llvm::createStringError("invalid magic number");
+  }
+
+  while (!Stream.AtEndOfStream()) {
+    Expected<unsigned> MaybeCode = Stream.ReadCode();
+    if (!MaybeCode)
+      return MaybeCode.takeError();
+    if (MaybeCode.get() != llvm::bitc::ENTER_SUBBLOCK)
+      return llvm::createStringError("expected record");
+
+    Expected<unsigned> MaybeBlockID = Stream.ReadSubBlockID();
+    if (!MaybeBlockID)
+      return MaybeBlockID.takeError();
+
+    if (llvm::Error Err = parseBlock(MaybeBlockID.get(), Stream))
+      return Err;
+  }
+
+  return llvm::Error::success();
+}
+
+void BinarySummarySerializer::parse(StringRef Buffer) {
+  if (llvm::Error Err = parseImpl(Buffer)) {
+    handleAllErrors(std::move(Err), [&](const llvm::ErrorInfoBase &EI) {
+      std::ignore = EI.message();
+    });
+  }
+}
 } // namespace clang

>From 767e3f9216769739c29058bcf9fcbc139043fa53 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Fri, 11 Jul 2025 02:21:26 +0200
Subject: [PATCH 44/48] unify summary layout accross the different formats

---
 clang/include/clang/Summary/SummaryContext.h  |  49 ++-
 .../clang/Summary/SummarySerialization.h      |  24 +-
 clang/lib/Frontend/FrontendAction.cpp         |   3 +-
 clang/lib/Summary/SummaryConsumer.cpp         |   2 +-
 clang/lib/Summary/SummaryContext.cpp          |  61 ++-
 clang/lib/Summary/SummarySerialization.cpp    | 397 +++++++++++-------
 6 files changed, 320 insertions(+), 216 deletions(-)

diff --git a/clang/include/clang/Summary/SummaryContext.h b/clang/include/clang/Summary/SummaryContext.h
index f90191787e227..e1c0cdd480720 100644
--- a/clang/include/clang/Summary/SummaryContext.h
+++ b/clang/include/clang/Summary/SummaryContext.h
@@ -3,24 +3,23 @@
 
 #include "clang/Summary/SummaryAttribute.h"
 #include "clang/Summary/SummaryConsumer.h"
-#include "llvm/Support/JSON.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <set>
 
 namespace clang {
 class FunctionSummary {
-  std::string ID;
+  size_t ID;
   std::set<const SummaryAttr *> Attrs;
-  std::set<std::string> Calls;
+  std::set<size_t> Calls;
   bool CallsOpaque;
 
 public:
-  FunctionSummary(std::string ID, std::set<const SummaryAttr *> Attrs,
-                  std::set<std::string> Calls, bool CallsOpaque);
+  FunctionSummary(size_t ID, std::set<const SummaryAttr *> Attrs,
+                  std::set<size_t> Calls, bool CallsOpaque);
 
-  StringRef getID() const { return ID; }
+  size_t getID() const { return ID; }
   const std::set<const SummaryAttr *> &getAttributes() const { return Attrs; }
-  const std::set<std::string> &getCalls() const { return Calls; }
+  const std::set<size_t> &getCalls() const { return Calls; }
   bool callsOpaqueObject() const { return CallsOpaque; }
 
   template <typename T> bool hasAttribute() const {
@@ -35,31 +34,45 @@ class FunctionSummary {
   void replaceAttributes(std::set<const SummaryAttr *> Attrs) {
     this->Attrs = std::move(Attrs);
   }
-
-  friend struct llvm::yaml::MappingTraits<clang::FunctionSummary>;
-  friend struct llvm::yaml::MappingContextTraits<clang::FunctionSummary,
-                                                 clang::SummaryContext>;
 };
 
 class SummaryContext {
-public:
-  std::map<StringRef, const FunctionSummary *> IDToSummary;
+  std::map<std::string, size_t> IdentifierToID;
+  std::vector<StringRef> Identifiers;
+
+  std::map<size_t, const FunctionSummary *> IDToSummary;
   std::vector<std::unique_ptr<FunctionSummary>> FunctionSummaries;
 
   std::map<SummaryAttrKind, const SummaryAttr *> KindToAttribute;
   std::vector<std::unique_ptr<SummaryAttr>> Attributes;
 
-  void CreateSummary(std::string ID, std::set<const SummaryAttr *> Attrs,
-                     std::set<std::string> Calls, bool CallsOpaque);
-  bool ReduceFunctionSummary(FunctionSummary &FunctionSummary);
-
-  template <typename T> void registerAttr();
+  template <typename T> void RegisterAttr();
 
+public:
   SummaryContext();
 
+  size_t GetOrInsertStoredIdentifierIdx(StringRef ID);
+  std::optional<size_t> GetStoredIdentifierIdx(StringRef ID) const;
+
+  void CreateSummary(size_t ID, std::set<const SummaryAttr *> Attrs,
+                     std::set<size_t> Calls, bool CallsOpaque);
+  bool ReduceFunctionSummary(FunctionSummary &FunctionSummary);
+
+  const std::vector<std::unique_ptr<FunctionSummary>> &GetSummaries() const {
+    return FunctionSummaries;
+  };
+  const std::vector<std::unique_ptr<SummaryAttr>> &GetAttributes() const {
+    return Attributes;
+  };
+  const std::vector<StringRef> &GetIdentifiers() const { return Identifiers; };
+
   const FunctionSummary *GetSummary(const FunctionDecl *FD) const;
   void SummarizeFunctionBody(const FunctionDecl *FD);
   void ReduceSummaries();
+
+  friend struct llvm::yaml::SequenceTraits<
+      std::vector<std::unique_ptr<clang::FunctionSummary>>>;
+  friend class YAMLSummarySerializer;
 };
 } // namespace clang
 
diff --git a/clang/include/clang/Summary/SummarySerialization.h b/clang/include/clang/Summary/SummarySerialization.h
index a2140f96c4a63..ca22be3b8da83 100644
--- a/clang/include/clang/Summary/SummarySerialization.h
+++ b/clang/include/clang/Summary/SummarySerialization.h
@@ -16,8 +16,7 @@ class SummarySerializer {
   SummarySerializer(SummaryContext &SummaryCtx) : SummaryCtx(&SummaryCtx) {};
   virtual ~SummarySerializer() = default;
 
-  virtual void serialize(const std::vector<std::unique_ptr<FunctionSummary>> &,
-                         raw_ostream &OS) = 0;
+  virtual void serialize(raw_ostream &OS) = 0;
   virtual void parse(StringRef) = 0;
 };
 
@@ -26,8 +25,7 @@ class JSONSummarySerializer : public SummarySerializer {
   JSONSummarySerializer(SummaryContext &SummaryCtx)
       : SummarySerializer(SummaryCtx) {};
 
-  void serialize(const std::vector<std::unique_ptr<FunctionSummary>> &,
-                 raw_ostream &OS) override;
+  void serialize(raw_ostream &OS) override;
   void parse(StringRef) override;
 };
 
@@ -36,15 +34,14 @@ class YAMLSummarySerializer : public SummarySerializer {
   YAMLSummarySerializer(SummaryContext &SummaryCtx)
       : SummarySerializer(SummaryCtx) {};
 
-  void serialize(const std::vector<std::unique_ptr<FunctionSummary>> &,
-                 raw_ostream &OS) override;
+  void serialize(raw_ostream &OS) override;
   void parse(StringRef) override;
 };
 
 class BinarySummarySerializer : public SummarySerializer {
   enum BlockIDs {
-    ATTRIBUTE_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID,
-    IDENTIFIER_BLOCK_ID,
+    IDENTIFIER_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID,
+    ATTRIBUTE_BLOCK_ID,
     SUMMARY_BLOCK_ID
   };
 
@@ -58,12 +55,8 @@ class BinarySummarySerializer : public SummarySerializer {
 
   enum SummaryRecordTypes { FUNCTION = 1 };
 
-  // FIXME: get rid of this global state
-  std::map<const SummaryAttr *, uint64_t> AttrIDs;
-  std::map<std::string, uint64_t> FunctionIDs;
-
-  std::vector<const SummaryAttr *> ParsedAttrIDs;
-  std::vector<std::string> ParsedFunctionIDs;
+  std::map<size_t, size_t> LocalToContextID;
+  std::map<size_t, const clang::SummaryAttr *> AttrIDToPtr;
 
   llvm::SmallVector<char, 32> Buffer;
   llvm::BitstreamWriter Stream;
@@ -92,8 +85,7 @@ class BinarySummarySerializer : public SummarySerializer {
   BinarySummarySerializer(SummaryContext &SummaryCtx)
       : SummarySerializer(SummaryCtx), Stream(Buffer) {};
 
-  void serialize(const std::vector<std::unique_ptr<FunctionSummary>> &,
-                 raw_ostream &OS) override;
+  void serialize(raw_ostream &OS) override;
   void parse(StringRef) override;
 };
 
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 3c84450ee9281..d8e201804c067 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -1029,8 +1029,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
       if (!FS.exists(cacheFile)) {
         // FIXME: very quick printing of the summary to the cache file
         llvm::raw_fd_ostream fd(cacheFile, EC, llvm::sys::fs::CD_CreateAlways);
-        CI.getSummarySerializer().serialize(
-            CI.getSummaryContext()->FunctionSummaries, fd);
+        CI.getSummarySerializer().serialize(fd);
       }
     }
   }
diff --git a/clang/lib/Summary/SummaryConsumer.cpp b/clang/lib/Summary/SummaryConsumer.cpp
index 71a42ba0eb500..3c7a08cbcd91b 100644
--- a/clang/lib/Summary/SummaryConsumer.cpp
+++ b/clang/lib/Summary/SummaryConsumer.cpp
@@ -9,6 +9,6 @@ SerializingSummaryConsumer::SerializingSummaryConsumer(
       Serializer(&Serializer) {}
 
 void SerializingSummaryConsumer::ProcessEndOfSourceFile() {
-  Serializer->serialize(SummaryCtx->FunctionSummaries, OS);
+  Serializer->serialize(OS);
 }
 } // namespace clang
diff --git a/clang/lib/Summary/SummaryContext.cpp b/clang/lib/Summary/SummaryContext.cpp
index 62540a26e8ea1..f2e72e29ab28d 100644
--- a/clang/lib/Summary/SummaryContext.cpp
+++ b/clang/lib/Summary/SummaryContext.cpp
@@ -14,7 +14,8 @@ std::string GetUSR(const FunctionDecl *FD) {
 }
 
 class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
-  std::set<std::string> Calls;
+  SummaryContext *Context;
+  std::set<size_t> Calls;
   bool callsOpaqueSymbol = false;
 
   virtual void
@@ -40,11 +41,13 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
       return;
     }
 
-    Calls.emplace(GetUSR(Callee));
+    Calls.emplace(Context->GetOrInsertStoredIdentifierIdx(GetUSR(Callee)));
   }
 
 public:
-  std::pair<std::set<std::string>, bool> collect(const FunctionDecl *FD) {
+  CallCollector(SummaryContext &Context) : Context(&Context) {}
+
+  std::pair<std::set<size_t>, bool> collect(const FunctionDecl *FD) {
     using namespace ast_matchers;
     MatchFinder Finder;
 
@@ -57,44 +60,64 @@ class CallCollector : public ast_matchers::MatchFinder::MatchCallback {
 };
 } // namespace
 
-FunctionSummary::FunctionSummary(std::string ID,
+FunctionSummary::FunctionSummary(size_t ID,
                                  std::set<const SummaryAttr *> FunctionAttrs,
-                                 std::set<std::string> Calls, bool CallsOpaque)
-    : ID(std::move(ID)), Attrs(std::move(FunctionAttrs)),
-      Calls(std::move(Calls)), CallsOpaque(CallsOpaque) {}
+                                 std::set<size_t> Calls, bool CallsOpaque)
+    : ID(ID), Attrs(std::move(FunctionAttrs)), Calls(std::move(Calls)),
+      CallsOpaque(CallsOpaque) {}
 
-template <typename T> void SummaryContext::registerAttr() {
+template <typename T> void SummaryContext::RegisterAttr() {
   std::unique_ptr<T> attr(new T());
   SummaryAttrKind Kind = attr->getKind();
 
   if (KindToAttribute.count(Kind))
     return;
 
+  if (!Attributes.empty())
+    assert(Attributes.back()->getKind() == Kind - 1 &&
+           "attributes are not stored continously");
+
   KindToAttribute[Kind] = Attributes.emplace_back(std::move(attr)).get();
 }
 
 SummaryContext::SummaryContext() {
-  registerAttr<NoWriteGlobalAttr>();
-  registerAttr<NoWritePtrParameterAttr>();
+  RegisterAttr<NoWriteGlobalAttr>();
+  RegisterAttr<NoWritePtrParameterAttr>();
+}
+
+size_t SummaryContext::GetOrInsertStoredIdentifierIdx(StringRef ID) {
+  auto &&[Element, Inserted] =
+      IdentifierToID.try_emplace(ID.str(), IdentifierToID.size());
+  if (Inserted)
+    Identifiers.emplace_back(Element->first);
+
+  return Element->second;
 }
 
-void SummaryContext::CreateSummary(std::string ID,
+std::optional<size_t>
+SummaryContext::GetStoredIdentifierIdx(StringRef ID) const {
+  if (IdentifierToID.count(ID.str()))
+    return IdentifierToID.at(ID.str());
+
+  return std::nullopt;
+}
+
+void SummaryContext::CreateSummary(size_t ID,
                                    std::set<const SummaryAttr *> Attrs,
-                                   std::set<std::string> Calls,
-                                   bool CallsOpaque) {
+                                   std::set<size_t> Calls, bool CallsOpaque) {
   if (IDToSummary.count(ID))
     return;
 
   auto Summary = std::make_unique<FunctionSummary>(
-      std::move(ID), std::move(Attrs), std::move(Calls), CallsOpaque);
+      ID, std::move(Attrs), std::move(Calls), CallsOpaque);
   auto *SummaryPtr = FunctionSummaries.emplace_back(std::move(Summary)).get();
   IDToSummary[SummaryPtr->getID()] = SummaryPtr;
 }
 
 const FunctionSummary *
 SummaryContext::GetSummary(const FunctionDecl *FD) const {
-  auto USR = GetUSR(FD);
-  return IDToSummary.count(USR) ? IDToSummary.at(USR) : nullptr;
+  std::optional<size_t> ID = GetStoredIdentifierIdx(GetUSR(FD));
+  return ID ? IDToSummary.at(*ID) : nullptr;
 }
 
 void SummaryContext::SummarizeFunctionBody(const FunctionDecl *FD) {
@@ -105,9 +128,9 @@ void SummaryContext::SummarizeFunctionBody(const FunctionDecl *FD) {
       Attrs.emplace(Attr.get());
   }
 
-  auto [calls, opaque] = CallCollector().collect(FD);
-
-  CreateSummary(GetUSR(FD), std::move(Attrs), std::move(calls), opaque);
+  auto [CollectedCalls, Opaque] = CallCollector(*this).collect(FD);
+  CreateSummary(GetOrInsertStoredIdentifierIdx(GetUSR(FD)), std::move(Attrs),
+                std::move(CollectedCalls), Opaque);
 }
 
 bool SummaryContext::ReduceFunctionSummary(FunctionSummary &Function) {
diff --git a/clang/lib/Summary/SummarySerialization.cpp b/clang/lib/Summary/SummarySerialization.cpp
index 95d68f80f3327..bd5b1e4463096 100644
--- a/clang/lib/Summary/SummarySerialization.cpp
+++ b/clang/lib/Summary/SummarySerialization.cpp
@@ -4,87 +4,149 @@
 
 namespace llvm {
 namespace yaml {
-template <> struct MappingTraits<clang::FunctionSummary> {
-  static void mapping(IO &io, clang::FunctionSummary &FS) {
+struct FunctionSummaryProxy {
+  size_t ID;
+  std::vector<size_t> Attrs;
+  std::vector<size_t> Calls;
+  bool CallsOpaque;
+
+  FunctionSummaryProxy() = default;
+  FunctionSummaryProxy(const clang::FunctionSummary &Summary)
+      : ID(Summary.getID()), CallsOpaque(Summary.callsOpaqueObject()) {
+    for (auto &&Attr : Summary.getAttributes())
+      Attrs.emplace_back(Attr->getKind());
+
+    for (auto &&Call : Summary.getCalls())
+      Calls.emplace_back(Call);
+  }
+};
+
+template <> struct MappingTraits<FunctionSummaryProxy> {
+  static void mapping(IO &io, FunctionSummaryProxy &FS) {
     io.mapRequired("id", FS.ID);
+    io.mapRequired("fn_attrs", FS.Attrs);
+    io.mapRequired("opaque_calls", FS.CallsOpaque);
+    io.mapRequired("calls", FS.Calls);
+  }
+};
+
+template <> struct SequenceTraits<std::vector<FunctionSummaryProxy>> {
+  static size_t size(IO &io, std::vector<FunctionSummaryProxy> &seq) {
+    return seq.size();
+  }
+
+  static FunctionSummaryProxy &
+  element(IO &io, std::vector<FunctionSummaryProxy> &seq, size_t index) {
+    if (index >= seq.size())
+      seq.emplace_back();
+
+    return seq[index];
+  }
+};
+
+template <> struct MappingTraits<clang::SummaryContext> {
+  static void mapping(IO &io, clang::SummaryContext &Ctx) {
+    std::vector<StringRef> Identifiers = Ctx.GetIdentifiers();
+    io.mapRequired("identifiers", Identifiers);
 
-    std::vector<std::string> Attrs;
-    for (auto &&Attr : FS.Attrs)
-      Attrs.emplace_back(Attr->serialize());
-    io.mapRequired("fn_attrs", Attrs);
+    std::map<size_t, size_t> LocalToContextID;
     if (!io.outputting()) {
-      std::set<const clang::SummaryAttr *> FunctionAttrs;
-      for (auto parsedAttr : Attrs) {
-        for (auto &&Attr :
-             ((clang::SummaryContext *)io.getContext())->Attributes) {
-          if (Attr->parse(parsedAttr))
-            FunctionAttrs.emplace(Attr.get());
-        }
-      }
+      for (auto &&ID : Identifiers)
+        LocalToContextID[LocalToContextID.size()] =
+            Ctx.GetOrInsertStoredIdentifierIdx(ID);
+    }
+
+    std::vector<std::string> Attributes;
+    for (auto &&Attr : Ctx.GetAttributes())
+      Attributes.emplace_back(Attr->serialize());
+    io.mapRequired("attributes", Attributes);
 
-      FS.Attrs = std::move(FunctionAttrs);
+    std::map<size_t, const clang::SummaryAttr *> AttrIDToPtr;
+    std::set<const clang::SummaryAttr *> Seen;
+    if (!io.outputting()) {
+      for (auto &&Attribute : Attributes) {
+        for (auto &&Attr : Ctx.GetAttributes())
+          if (Attr->parse(Attribute)) {
+            if (!Seen.emplace(Attr.get()).second)
+              break;
+            ;
+
+            AttrIDToPtr[AttrIDToPtr.size()] = Attr.get();
+            break;
+          }
+      }
     }
 
-    io.mapRequired("opaque_calls", FS.CallsOpaque);
+    std::vector<FunctionSummaryProxy> SummaryProxies;
+    for (auto &&Summary : Ctx.GetSummaries())
+      SummaryProxies.emplace_back(*Summary);
+    io.mapRequired("summaries", SummaryProxies);
+    if (!io.outputting()) {
+      for (auto &&Proxy : SummaryProxies) {
+        if (Proxy.ID >= LocalToContextID.size())
+          continue;
 
-    std::vector<std::string> Calls(FS.Calls.begin(), FS.Calls.end());
-    io.mapRequired("calls", Calls);
-    if (!io.outputting())
-      FS.Calls = std::set(Calls.begin(), Calls.end());
-  }
-};
+        std::set<const clang::SummaryAttr *> Attrs;
+        for (auto &&Attr : Proxy.Attrs) {
+          if (Attr >= AttrIDToPtr.size())
+            continue;
 
-template <>
-struct SequenceTraits<std::vector<std::unique_ptr<clang::FunctionSummary>>> {
-  static size_t
-  size(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq) {
-    return seq.size();
-  }
+          Attrs.emplace(AttrIDToPtr[Attr]);
+        }
+
+        std::set<size_t> Calls;
+        for (auto &&Call : Proxy.Calls) {
+          if (Call >= LocalToContextID.size())
+            continue;
 
-  static clang::FunctionSummary &
-  element(IO &io, std::vector<std::unique_ptr<clang::FunctionSummary>> &seq,
-          size_t index) {
-    if (index >= seq.size()) {
-      seq.resize(index + 1);
-      seq[index].reset(new clang::FunctionSummary("", {}, {}, false));
+          Calls.emplace(LocalToContextID[Call]);
+        }
+
+        Ctx.CreateSummary(LocalToContextID[Proxy.ID], std::move(Attrs),
+                          std::move(Calls), Proxy.CallsOpaque);
+      }
     }
-    return *seq[index];
   }
 };
-
 } // namespace yaml
 } // namespace llvm
 
 namespace clang {
-void JSONSummarySerializer::serialize(
-    const std::vector<std::unique_ptr<FunctionSummary>> &Summaries,
-    raw_ostream &OS) {
+void JSONSummarySerializer::serialize(raw_ostream &OS) {
   llvm::json::OStream JOS(OS, 2);
-  JOS.arrayBegin();
-
-  for (auto &&Summary : Summaries) {
-    JOS.object([&] {
-      JOS.attribute("id", llvm::json::Value(Summary->getID()));
-      JOS.attributeObject("attrs", [&] {
-        JOS.attributeArray("function", [&] {
+  JOS.objectBegin();
+
+  JOS.attributeArray("identifiers", [&] {
+    for (auto &&Identifier : SummaryCtx->GetIdentifiers())
+      JOS.value(Identifier);
+  });
+
+  JOS.attributeArray("attributes", [&] {
+    for (auto &&Attribute : SummaryCtx->GetAttributes())
+      JOS.value(Attribute->serialize());
+  });
+
+  JOS.attributeArray("summaries", [&] {
+    for (auto &&Summary : SummaryCtx->GetSummaries()) {
+      JOS.object([&] {
+        JOS.attribute("id", llvm::json::Value(Summary->getID()));
+        JOS.attributeArray("fn_attrs", [&] {
           for (auto &&Attr : Summary->getAttributes()) {
-            JOS.value(llvm::json::Value(Attr->serialize()));
+            JOS.value(llvm::json::Value(static_cast<size_t>(Attr->getKind())));
           }
         });
-      });
-      JOS.attributeObject("calls", [&] {
-        JOS.attribute("opaque",
+        JOS.attribute("opaque_calls",
                       llvm::json::Value(Summary->callsOpaqueObject()));
-        JOS.attributeArray("functions", [&] {
+        JOS.attributeArray("calls", [&] {
           for (auto &&Call : Summary->getCalls()) {
-            JOS.object([&] { JOS.attribute("id", llvm::json::Value(Call)); });
+            JOS.value(llvm::json::Value(Call));
           }
         });
       });
-    });
-  }
+    }
+  });
 
-  JOS.arrayEnd();
+  JOS.objectEnd();
   JOS.flush();
 }
 
@@ -97,86 +159,101 @@ void JSONSummarySerializer::parse(StringRef Buffer) {
     return;
   }
 
-  auto *JSONSummaries = JSON->getAsArray();
+  auto *JSONObject = JSON->getAsObject();
+  if (!JSONObject)
+    return;
+
+  auto *JSONIdentifiers = JSONObject->getArray("identifiers");
+  if (!JSONIdentifiers)
+    return;
+
+  std::map<size_t, size_t> LocalToContextID;
+  for (auto &&Identifier : *JSONIdentifiers) {
+    auto IdentifierStr = Identifier.getAsString();
+    if (!IdentifierStr)
+      return;
+
+    LocalToContextID[LocalToContextID.size()] =
+        SummaryCtx->GetOrInsertStoredIdentifierIdx(*IdentifierStr);
+  }
+
+  auto *JSONAttributes = JSONObject->getArray("attributes");
+  if (!JSONAttributes)
+    return;
+
+  std::map<size_t, const SummaryAttr *> AttrIDToPtr;
+  std::set<const SummaryAttr *> Seen;
+  for (auto &&Attribute : *JSONAttributes) {
+    auto AttributeStr = Attribute.getAsString();
+
+    for (auto &&Attr : SummaryCtx->GetAttributes())
+      if (Attr->parse(*AttributeStr)) {
+        if (!Seen.emplace(Attr.get()).second)
+          return;
+
+        AttrIDToPtr[AttrIDToPtr.size()] = Attr.get();
+        break;
+      }
+  }
+
+  auto *JSONSummaries = JSONObject->getArray("summaries");
   if (!JSONSummaries)
     return;
 
   for (auto &&JSONSummary : *JSONSummaries) {
-    const llvm::json::Object *JSONSummaryObject = JSONSummary.getAsObject();
+    auto *JSONSummaryObject = JSONSummary.getAsObject();
     if (!JSONSummaryObject)
       continue;
 
-    std::optional<StringRef> ID = JSONSummaryObject->getString("id");
-    if (!ID)
+    std::optional<size_t> ID = JSONSummaryObject->getInteger("id");
+    if (!ID || *ID >= LocalToContextID.size())
       continue;
 
-    const llvm::json::Object *JSONAttributes =
-        JSONSummaryObject->getObject("attrs");
+    auto *JSONAttributes = JSONSummaryObject->getArray("fn_attrs");
     if (!JSONAttributes)
       continue;
 
-    const llvm::json::Array *JSONFunctionAttributes =
-        JSONAttributes->getArray("function");
-    if (!JSONFunctionAttributes)
-      continue;
-
     std::set<const SummaryAttr *> FunctionAttrs;
-    for (auto &&JSONAttr : *JSONFunctionAttributes)
-      for (auto &&CtxAttr : SummaryCtx->Attributes)
-        if (auto JSONAttrStr = JSONAttr.getAsString();
-            JSONAttrStr && CtxAttr->parse(*JSONAttrStr))
-          FunctionAttrs.emplace(CtxAttr.get());
-
-    const llvm::json::Object *JSONCallsObject =
-        JSONSummaryObject->getObject("calls");
-    if (!JSONCallsObject)
-      continue;
+    for (auto &&JSONAttr : *JSONAttributes) {
+      std::optional<size_t> AttrID = JSONAttr.getAsUINT64();
+      if (!AttrID || *AttrID >= AttrIDToPtr.size())
+        return;
+
+      FunctionAttrs.emplace(AttrIDToPtr[*AttrID]);
+    }
 
-    std::optional<bool> CallsOpaue = *JSONCallsObject->getBoolean("opaque");
+    std::optional<bool> CallsOpaue =
+        *JSONSummaryObject->getBoolean("opaque_calls");
     if (!CallsOpaue)
       continue;
 
-    std::set<std::string> Calls;
-    const llvm::json::Array *JSONCallEntries =
-        JSONCallsObject->getArray("functions");
+    std::set<size_t> Calls;
+    auto *JSONCallEntries = JSONSummaryObject->getArray("calls");
     if (!JSONCallEntries)
       continue;
 
     for (auto &&JSONCall : *JSONCallEntries) {
-      auto *JSONCallObj = JSONCall.getAsObject();
-      if (!JSONCallObj)
+      std::optional<size_t> CallID = JSONCall.getAsUINT64();
+      if (!CallID || *CallID >= LocalToContextID.size())
         continue;
 
-      std::optional<StringRef> CallID = JSONCallObj->getString("id");
-      if (!CallID)
-        continue;
-
-      Calls.emplace(CallID->str());
+      Calls.emplace(LocalToContextID[*CallID]);
     }
 
-    SummaryCtx->CreateSummary(ID->str(), std::move(FunctionAttrs),
+    SummaryCtx->CreateSummary(LocalToContextID[*ID], std::move(FunctionAttrs),
                               std::move(Calls), *CallsOpaue);
   }
 }
 
-void YAMLSummarySerializer::serialize(
-    const std::vector<std::unique_ptr<FunctionSummary>> &Summaries,
-    raw_ostream &OS) {
+void YAMLSummarySerializer::serialize(raw_ostream &OS) {
   llvm::yaml::Output YOUT(OS);
-  YOUT << ((SummaryContext *)SummaryCtx)->FunctionSummaries;
+  YOUT << *SummaryCtx;
   OS.flush();
 }
 
 void YAMLSummarySerializer::parse(StringRef Buffer) {
-  std::vector<std::unique_ptr<clang::FunctionSummary>> summaries;
-
-  llvm::yaml::Input YIN(Buffer, SummaryCtx);
-  YIN >> summaries;
-
-  for (auto &&summary : summaries)
-    SummaryCtx->CreateSummary(summary->getID().str(), summary->getAttributes(),
-                              summary->getCalls(),
-                              summary->callsOpaqueObject());
+  llvm::yaml::Input YIN(Buffer);
+  YIN >> *SummaryCtx;
 }
 
 void BinarySummarySerializer::PopulateBlockInfo() {
@@ -190,50 +267,34 @@ void BinarySummarySerializer::PopulateBlockInfo() {
   Stream.ExitBlock();
 }
 
-void BinarySummarySerializer::EmitAttributeBlock() {
-  Stream.EnterSubblock(ATTRIBUTE_BLOCK_ID, 3);
+void BinarySummarySerializer::EmitIdentifierBlock() {
+  Stream.EnterSubblock(IDENTIFIER_BLOCK_ID, 3);
 
   auto Abv = std::make_shared<llvm::BitCodeAbbrev>();
-  Abv->Add(llvm::BitCodeAbbrevOp(ATTR));
+  Abv->Add(llvm::BitCodeAbbrevOp(IDENTIFIER));
   Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array));
   Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, 8));
   unsigned Abbrev = Stream.EmitAbbrev(std::move(Abv));
 
-  uint64_t ID = 0;
-  uint64_t Record[] = {ATTR};
-
-  for (auto &&Attr : SummaryCtx->Attributes) {
-    AttrIDs[Attr.get()] = ID++;
-    Stream.EmitRecordWithArray(Abbrev, Record, Attr->serialize());
-  }
+  uint64_t Record[] = {IDENTIFIER};
+  for (auto &&Identifier : SummaryCtx->GetIdentifiers())
+    Stream.EmitRecordWithArray(Abbrev, Record, Identifier);
 
   Stream.ExitBlock();
 }
 
-void BinarySummarySerializer::EmitIdentifierBlock() {
-  Stream.EnterSubblock(IDENTIFIER_BLOCK_ID, 3);
+void BinarySummarySerializer::EmitAttributeBlock() {
+  Stream.EnterSubblock(ATTRIBUTE_BLOCK_ID, 3);
 
   auto Abv = std::make_shared<llvm::BitCodeAbbrev>();
-  Abv->Add(llvm::BitCodeAbbrevOp(IDENTIFIER));
+  Abv->Add(llvm::BitCodeAbbrevOp(ATTR));
   Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array));
   Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, 8));
   unsigned Abbrev = Stream.EmitAbbrev(std::move(Abv));
 
-  uint64_t ID = 0;
-  uint64_t Record[] = {IDENTIFIER};
-
-  for (auto &&Summary : SummaryCtx->FunctionSummaries) {
-    FunctionIDs[Summary->getID().str()] = ID++;
-    Stream.EmitRecordWithArray(Abbrev, Record, Summary->getID());
-
-    for (auto &&Call : Summary->getCalls()) {
-      if (FunctionIDs.count(Call))
-        continue;
-
-      FunctionIDs[Call] = ID++;
-      Stream.EmitRecordWithArray(Abbrev, Record, Call);
-    }
-  }
+  uint64_t Record[] = {ATTR};
+  for (auto &&Attr : SummaryCtx->GetAttributes())
+    Stream.EmitRecordWithArray(Abbrev, Record, Attr->serialize());
 
   Stream.ExitBlock();
 }
@@ -254,7 +315,7 @@ void BinarySummarySerializer::EmitSummaryBlock() {
   Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, 32));
   unsigned Abbrev = Stream.EmitAbbrev(std::move(Abv));
 
-  for (auto &&Summary : SummaryCtx->FunctionSummaries) {
+  for (auto &&Summary : SummaryCtx->GetSummaries()) {
     SmallVector<uint64_t, 64> Record;
 
     Record.push_back(Summary->getAttributes().size());
@@ -263,11 +324,11 @@ void BinarySummarySerializer::EmitSummaryBlock() {
 
     Record.push_back(1 + Summary->getAttributes().size() +
                      Summary->getCalls().size());
-    Record.push_back(FunctionIDs[Summary->getID().str()]);
+    Record.push_back(Summary->getID());
     for (auto &&Attr : Summary->getAttributes())
-      Record.push_back(AttrIDs[Attr]);
+      Record.push_back(Attr->getKind());
     for (auto &&Call : Summary->getCalls())
-      Record.push_back(FunctionIDs[Call]);
+      Record.push_back(Call);
 
     Stream.EmitRecord(FUNCTION, Record, Abbrev);
   }
@@ -294,16 +355,15 @@ void BinarySummarySerializer::EmitRecord(unsigned ID, const char *Name) {
   Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Buffer);
 }
 
-void BinarySummarySerializer::serialize(
-    const std::vector<std::unique_ptr<FunctionSummary>> &, raw_ostream &OS) {
+void BinarySummarySerializer::serialize(raw_ostream &OS) {
   Stream.Emit((unsigned)'C', 8);
   Stream.Emit((unsigned)'T', 8);
   Stream.Emit((unsigned)'U', 8);
   Stream.Emit((unsigned)'S', 8);
 
   PopulateBlockInfo();
-  EmitAttributeBlock();
   EmitIdentifierBlock();
+  EmitAttributeBlock();
   EmitSummaryBlock();
 
   Stream.FlushToWord();
@@ -360,20 +420,14 @@ llvm::Error BinarySummarySerializer::handleBlockRecordsCommon(
 }
 
 llvm::Error
-BinarySummarySerializer::parseAttributeBlock(llvm::BitstreamCursor &Stream) {
-  if (llvm::Error Err = handleBlockStartCommon(ATTRIBUTE_BLOCK_ID, Stream))
+BinarySummarySerializer::parseIdentifierBlock(llvm::BitstreamCursor &Stream) {
+  if (llvm::Error Err = handleBlockStartCommon(IDENTIFIER_BLOCK_ID, Stream))
     return Err;
 
-  ParsedAttrIDs.clear();
   if (llvm::Error Err = handleBlockRecordsCommon(Stream, [&](auto &&Record) {
-        for (auto &&CtxAttr : SummaryCtx->Attributes) {
-          llvm::SmallString<64> AttributeStr(Record.begin(), Record.end());
-
-          if (CtxAttr->parse(AttributeStr.str())) {
-            ParsedAttrIDs.push_back(CtxAttr.get());
-            break;
-          }
-        }
+        llvm::SmallString<64> IdentifierStr(Record.begin(), Record.end());
+        LocalToContextID[LocalToContextID.size()] =
+            SummaryCtx->GetOrInsertStoredIdentifierIdx(IdentifierStr);
       }))
     return Err;
 
@@ -381,14 +435,24 @@ BinarySummarySerializer::parseAttributeBlock(llvm::BitstreamCursor &Stream) {
 }
 
 llvm::Error
-BinarySummarySerializer::parseIdentifierBlock(llvm::BitstreamCursor &Stream) {
-  if (llvm::Error Err = handleBlockStartCommon(IDENTIFIER_BLOCK_ID, Stream))
+BinarySummarySerializer::parseAttributeBlock(llvm::BitstreamCursor &Stream) {
+  if (llvm::Error Err = handleBlockStartCommon(ATTRIBUTE_BLOCK_ID, Stream))
     return Err;
 
-  ParsedFunctionIDs.clear();
   if (llvm::Error Err = handleBlockRecordsCommon(Stream, [&](auto &&Record) {
-        llvm::SmallString<64> IdentifierStr(Record.begin(), Record.end());
-        ParsedFunctionIDs.emplace_back(IdentifierStr.str().str());
+        std::set<const clang::SummaryAttr *> Seen;
+
+        for (auto &&Attr : SummaryCtx->GetAttributes()) {
+          llvm::SmallString<64> AttributeStr(Record.begin(), Record.end());
+
+          if (Attr->parse(AttributeStr.str())) {
+            if (!Seen.emplace(Attr.get()).second)
+              break;
+
+            AttrIDToPtr[AttrIDToPtr.size()] = Attr.get();
+            break;
+          }
+        }
       }))
     return Err;
 
@@ -404,24 +468,35 @@ BinarySummarySerializer::parseSummaryBlock(llvm::BitstreamCursor &Stream) {
         int AttrCnt = Record[0];
         int CallCnt = Record[1];
         bool Opaque = Record[2];
-        int ID = Record[4];
+        size_t ID = Record[4];
         int I = 0;
 
+        if (ID >= LocalToContextID.size())
+          return;
+
         std::set<const SummaryAttr *> Attrs;
         while (AttrCnt) {
-          Attrs.emplace(ParsedAttrIDs[Record[5 + I]]);
+          size_t AttrID = Record[5 + I];
+          if (AttrID >= AttrIDToPtr.size())
+            return;
+
+          Attrs.emplace(AttrIDToPtr[AttrID]);
           ++I;
           --AttrCnt;
         }
 
-        std::set<std::string> Calls;
+        std::set<size_t> Calls;
         while (CallCnt) {
-          Calls.emplace(ParsedFunctionIDs[Record[5 + I]]);
+          size_t CallID = Record[5 + I];
+          if (CallID >= LocalToContextID.size())
+            return;
+
+          Calls.emplace(LocalToContextID[CallID]);
           ++I;
           --CallCnt;
         }
 
-        SummaryCtx->CreateSummary(ParsedFunctionIDs[ID], std::move(Attrs),
+        SummaryCtx->CreateSummary(LocalToContextID[ID], std::move(Attrs),
                                   std::move(Calls), Opaque);
       }))
     return Err;
@@ -441,12 +516,12 @@ llvm::Error BinarySummarySerializer::parseBlock(unsigned ID,
     return llvm::Error::success();
   }
 
-  if (ID == ATTRIBUTE_BLOCK_ID)
-    return parseAttributeBlock(Stream);
-
   if (ID == IDENTIFIER_BLOCK_ID)
     return parseIdentifierBlock(Stream);
 
+  if (ID == ATTRIBUTE_BLOCK_ID)
+    return parseAttributeBlock(Stream);
+
   if (ID == SUMMARY_BLOCK_ID)
     return parseSummaryBlock(Stream);
 
@@ -455,6 +530,8 @@ llvm::Error BinarySummarySerializer::parseBlock(unsigned ID,
 
 llvm::Error BinarySummarySerializer::parseImpl(StringRef Buffer) {
   llvm::BitstreamCursor Stream(Buffer);
+  LocalToContextID.clear();
+  AttrIDToPtr.clear();
 
   llvm::SimpleBitstreamCursor::word_t Magic[4] = {0};
   unsigned char ExpectedMagic[] = {'C', 'T', 'U', 'S'};

>From 0e88fa89c1ed1cf1d9979d9bd8a1e392d91a01cf Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 12 Jul 2025 14:06:39 +0200
Subject: [PATCH 45/48] remove nowriteptrparameter

---
 .../include/clang/Summary/SummaryAttribute.h  | 16 ------
 clang/lib/StaticAnalyzer/Core/CallEvent.cpp   |  3 -
 clang/lib/Summary/SummaryAttribute.cpp        | 57 -------------------
 clang/lib/Summary/SummaryContext.cpp          |  1 -
 4 files changed, 77 deletions(-)

diff --git a/clang/include/clang/Summary/SummaryAttribute.h b/clang/include/clang/Summary/SummaryAttribute.h
index 4b1ac06f86fb8..5ac889e6b0d4d 100644
--- a/clang/include/clang/Summary/SummaryAttribute.h
+++ b/clang/include/clang/Summary/SummaryAttribute.h
@@ -47,22 +47,6 @@ class NoWriteGlobalAttr : public SummaryAttr {
   }
   friend class SummaryContext;
 };
-
-// FIXME: create a macro for attr declarations?
-class NoWritePtrParameterAttr : public SummaryAttr {
-  NoWritePtrParameterAttr()
-      : SummaryAttr(NO_WRITE_PTR_PARAMETER, "no_write_ptr_parameter") {}
-
-public:
-  bool infer(const FunctionDecl *FD) const override final;
-  bool merge(const FunctionSummary &Caller,
-             const FunctionSummary *Callee) const override final;
-
-  static bool classof(const SummaryAttr *A) {
-    return A->getKind() == NO_WRITE_PTR_PARAMETER;
-  }
-  friend class SummaryContext;
-};
 } // namespace clang
 
 #endif // LLVM_CLANG_SUMMARY_SUMMARYATTRIBUTEH
diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
index 669fd715c9ae1..4b9e852983f48 100644
--- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -259,9 +259,6 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
     findPtrToConstParams(PreserveArgs, *this);
 
   for (unsigned Idx = 0, Count = getNumArgs(); Idx != Count; ++Idx) {
-    if (Summary && Summary->hasAttribute<NoWritePtrParameterAttr>())
-      continue;
-
     // Mark this region for invalidation.  We batch invalidate regions
     // below for efficiency.
     if (PreserveArgs.count(Idx))
diff --git a/clang/lib/Summary/SummaryAttribute.cpp b/clang/lib/Summary/SummaryAttribute.cpp
index e1ad0a0eb6f51..92a83d5342d05 100644
--- a/clang/lib/Summary/SummaryAttribute.cpp
+++ b/clang/lib/Summary/SummaryAttribute.cpp
@@ -36,61 +36,4 @@ bool NoWriteGlobalAttr::merge(const FunctionSummary &Caller,
   return !Caller.callsOpaqueObject() && Caller.getAttributes().count(this) &&
          Callee && Callee->getAttributes().count(this);
 }
-
-bool NoWritePtrParameterAttr::infer(const FunctionDecl *FD) const {
-  using namespace ast_matchers;
-  MatchFinder Finder;
-
-  class Callback : public ast_matchers::MatchFinder::MatchCallback {
-  public:
-    bool MayWritePtrParam = false;
-
-    void
-    run(const ast_matchers::MatchFinder::MatchResult &Result) override final {
-      const auto *FD = Result.Nodes.getNodeAs<FunctionDecl>("fn");
-      if (!FD)
-        return;
-
-      MayWritePtrParam = true;
-    }
-  } CB;
-
-  auto ptrParmDeclRef = declRefExpr(
-      allOf(unless(hasAncestor(unaryOperator(hasOperatorName("*")))),
-            to(parmVarDecl(hasType(pointerType())))));
-  auto ptrParmDereference = unaryOperator(allOf(
-      hasOperatorName("*"),
-      hasDescendant(declRefExpr(to(parmVarDecl(hasType(pointerType())))))));
-
-  Finder.addMatcher(
-      functionDecl(
-          anyOf(
-              // The value of the pointer is used to initialize a local
-              // variable.
-              forEachDescendant(
-                  varDecl(hasInitializer(hasDescendant(ptrParmDeclRef)))),
-              // The ptr parameter appears on the RHS of an assignment.
-              forEachDescendant(
-                  binaryOperator(isAssignmentOperator(),
-                                 hasRHS(hasDescendant(ptrParmDeclRef)))),
-              // The ptr is dereferenced on the LHS of an assignment.
-              forEachDescendant(binaryOperator(
-                  isAssignmentOperator(),
-                  hasLHS(anyOf(ptrParmDereference,
-                               hasDescendant(ptrParmDereference))))),
-              // The param is const casted
-              forEachDescendant(cxxConstCastExpr(hasDescendant(ptrParmDeclRef)))
-              // FIXME: handle member access
-              ))
-          .bind("fn"),
-      &CB);
-  Finder.match(*FD, FD->getASTContext());
-  return !CB.MayWritePtrParam;
-}
-
-bool NoWritePtrParameterAttr::merge(const FunctionSummary &Caller,
-                                    const FunctionSummary *Callee) const {
-  return !Caller.callsOpaqueObject() && Caller.getAttributes().count(this) &&
-         Callee && Callee->getAttributes().count(this);
-}
 } // namespace clang
diff --git a/clang/lib/Summary/SummaryContext.cpp b/clang/lib/Summary/SummaryContext.cpp
index f2e72e29ab28d..8db3075d5aaa9 100644
--- a/clang/lib/Summary/SummaryContext.cpp
+++ b/clang/lib/Summary/SummaryContext.cpp
@@ -82,7 +82,6 @@ template <typename T> void SummaryContext::RegisterAttr() {
 
 SummaryContext::SummaryContext() {
   RegisterAttr<NoWriteGlobalAttr>();
-  RegisterAttr<NoWritePtrParameterAttr>();
 }
 
 size_t SummaryContext::GetOrInsertStoredIdentifierIdx(StringRef ID) {

>From 5d729376b9043321ee715fc4a0514c4e775dc902 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 12 Jul 2025 17:22:36 +0200
Subject: [PATCH 46/48] make file processing consistent

---
 clang/lib/Frontend/FrontendAction.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index d8e201804c067..44adc71b7a0df 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -1016,6 +1016,8 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
         }
       }
 
+      llvm::sort(paths);
+
       for (auto &&path : paths) {
         std::ifstream t(path);
         std::stringstream buffer;

>From bbfa385084280114069496ec98c49f81f1f85477 Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 12 Jul 2025 17:22:48 +0200
Subject: [PATCH 47/48] fix yamls parsing performance issues

---
 clang/include/clang/Summary/SummaryContext.h |  4 -
 clang/lib/Summary/SummarySerialization.cpp   | 93 +++++++++++---------
 2 files changed, 51 insertions(+), 46 deletions(-)

diff --git a/clang/include/clang/Summary/SummaryContext.h b/clang/include/clang/Summary/SummaryContext.h
index e1c0cdd480720..a54b4fb8a0d09 100644
--- a/clang/include/clang/Summary/SummaryContext.h
+++ b/clang/include/clang/Summary/SummaryContext.h
@@ -69,10 +69,6 @@ class SummaryContext {
   const FunctionSummary *GetSummary(const FunctionDecl *FD) const;
   void SummarizeFunctionBody(const FunctionDecl *FD);
   void ReduceSummaries();
-
-  friend struct llvm::yaml::SequenceTraits<
-      std::vector<std::unique_ptr<clang::FunctionSummary>>>;
-  friend class YAMLSummarySerializer;
 };
 } // namespace clang
 
diff --git a/clang/lib/Summary/SummarySerialization.cpp b/clang/lib/Summary/SummarySerialization.cpp
index bd5b1e4463096..3afad28def498 100644
--- a/clang/lib/Summary/SummarySerialization.cpp
+++ b/clang/lib/Summary/SummarySerialization.cpp
@@ -13,9 +13,11 @@ struct FunctionSummaryProxy {
   FunctionSummaryProxy() = default;
   FunctionSummaryProxy(const clang::FunctionSummary &Summary)
       : ID(Summary.getID()), CallsOpaque(Summary.callsOpaqueObject()) {
+    Attrs.reserve(Summary.getAttributes().size());
     for (auto &&Attr : Summary.getAttributes())
       Attrs.emplace_back(Attr->getKind());
 
+    Calls.reserve(Summary.getCalls().size());
     for (auto &&Call : Summary.getCalls())
       Calls.emplace_back(Call);
   }
@@ -46,65 +48,72 @@ template <> struct SequenceTraits<std::vector<FunctionSummaryProxy>> {
 
 template <> struct MappingTraits<clang::SummaryContext> {
   static void mapping(IO &io, clang::SummaryContext &Ctx) {
-    std::vector<StringRef> Identifiers = Ctx.GetIdentifiers();
-    io.mapRequired("identifiers", Identifiers);
+    if (io.outputting()) {
+      std::vector<StringRef> Identifiers = Ctx.GetIdentifiers();
+      io.mapRequired("identifiers", Identifiers);
+
+      std::vector<std::string> Attributes;
+      Attributes.reserve(Ctx.GetAttributes().size());
+      for (auto &&Attr : Ctx.GetAttributes())
+        Attributes.emplace_back(Attr->serialize());
+      io.mapRequired("attributes", Attributes);
+
+      std::vector<FunctionSummaryProxy> SummaryProxies;
+      SummaryProxies.reserve(Ctx.GetSummaries().size());
+      for (auto &&Summary : Ctx.GetSummaries())
+        SummaryProxies.emplace_back(*Summary);
+      io.mapRequired("summaries", SummaryProxies);
+
+      return;
+    }
 
+    std::vector<StringRef> Identifiers;
+    io.mapRequired("identifiers", Identifiers);
     std::map<size_t, size_t> LocalToContextID;
-    if (!io.outputting()) {
-      for (auto &&ID : Identifiers)
-        LocalToContextID[LocalToContextID.size()] =
-            Ctx.GetOrInsertStoredIdentifierIdx(ID);
+    for (auto &&ID : Identifiers) {
+      LocalToContextID[LocalToContextID.size()] =
+          Ctx.GetOrInsertStoredIdentifierIdx(ID);
     }
 
-    std::vector<std::string> Attributes;
-    for (auto &&Attr : Ctx.GetAttributes())
-      Attributes.emplace_back(Attr->serialize());
+    std::vector<StringRef> Attributes;
     io.mapRequired("attributes", Attributes);
-
     std::map<size_t, const clang::SummaryAttr *> AttrIDToPtr;
     std::set<const clang::SummaryAttr *> Seen;
-    if (!io.outputting()) {
-      for (auto &&Attribute : Attributes) {
-        for (auto &&Attr : Ctx.GetAttributes())
-          if (Attr->parse(Attribute)) {
-            if (!Seen.emplace(Attr.get()).second)
-              break;
-            ;
-
-            AttrIDToPtr[AttrIDToPtr.size()] = Attr.get();
+    for (auto &&Attribute : Attributes) {
+      for (auto &&Attr : Ctx.GetAttributes())
+        if (Attr->parse(Attribute)) {
+          if (!Seen.emplace(Attr.get()).second)
             break;
-          }
-      }
+
+          AttrIDToPtr[AttrIDToPtr.size()] = Attr.get();
+          break;
+        }
     }
 
     std::vector<FunctionSummaryProxy> SummaryProxies;
-    for (auto &&Summary : Ctx.GetSummaries())
-      SummaryProxies.emplace_back(*Summary);
     io.mapRequired("summaries", SummaryProxies);
-    if (!io.outputting()) {
-      for (auto &&Proxy : SummaryProxies) {
-        if (Proxy.ID >= LocalToContextID.size())
-          continue;
-
-        std::set<const clang::SummaryAttr *> Attrs;
-        for (auto &&Attr : Proxy.Attrs) {
-          if (Attr >= AttrIDToPtr.size())
-            continue;
+    for (auto &&Proxy : SummaryProxies) {
+      if (Proxy.ID >= LocalToContextID.size())
+        continue;
 
-          Attrs.emplace(AttrIDToPtr[Attr]);
-        }
+      std::set<const clang::SummaryAttr *> Attrs;
+      for (auto &&Attr : Proxy.Attrs) {
+        if (Attr >= AttrIDToPtr.size())
+          continue;
 
-        std::set<size_t> Calls;
-        for (auto &&Call : Proxy.Calls) {
-          if (Call >= LocalToContextID.size())
-            continue;
+        Attrs.emplace(AttrIDToPtr[Attr]);
+      }
 
-          Calls.emplace(LocalToContextID[Call]);
-        }
+      std::set<size_t> Calls;
+      for (auto &&Call : Proxy.Calls) {
+        if (Call >= LocalToContextID.size())
+          continue;
 
-        Ctx.CreateSummary(LocalToContextID[Proxy.ID], std::move(Attrs),
-                          std::move(Calls), Proxy.CallsOpaque);
+        Calls.emplace(LocalToContextID[Call]);
       }
+
+      Ctx.CreateSummary(LocalToContextID[Proxy.ID], std::move(Attrs),
+                        std::move(Calls), Proxy.CallsOpaque);
     }
   }
 };

>From cc3e507cb56936d4e4ee9e3480d117b792cd9eff Mon Sep 17 00:00:00 2001
From: isuckatcs <65320245+isuckatcs at users.noreply.github.com>
Date: Sat, 12 Jul 2025 17:23:14 +0200
Subject: [PATCH 48/48] format

---
 clang/lib/Summary/SummaryContext.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/lib/Summary/SummaryContext.cpp b/clang/lib/Summary/SummaryContext.cpp
index 8db3075d5aaa9..9a25261df3fa2 100644
--- a/clang/lib/Summary/SummaryContext.cpp
+++ b/clang/lib/Summary/SummaryContext.cpp
@@ -80,9 +80,7 @@ template <typename T> void SummaryContext::RegisterAttr() {
   KindToAttribute[Kind] = Attributes.emplace_back(std::move(attr)).get();
 }
 
-SummaryContext::SummaryContext() {
-  RegisterAttr<NoWriteGlobalAttr>();
-}
+SummaryContext::SummaryContext() { RegisterAttr<NoWriteGlobalAttr>(); }
 
 size_t SummaryContext::GetOrInsertStoredIdentifierIdx(StringRef ID) {
   auto &&[Element, Inserted] =



More information about the cfe-commits mailing list