[clang-tools-extra] [clangd] Avoid libFormat's objective-c guessing heuristic where possible (PR #84133)

Nathan Ridge via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 6 00:08:56 PST 2024


https://github.com/HighCommander4 created https://github.com/llvm/llvm-project/pull/84133

This avoids a known libFormat bug where the heuristic can OOM on certain large files (particularly single-header libraries such as miniaudio.h).

The OOM will still happen on affected files if you actually try to format them (this is harder to avoid since the underlyting issue affects the actual formatting logic, not just the language-guessing heuristic), but at least it's avoided during non-modifying operations like hover, and modifying operations that do local formatting like code completion.

Fixes https://github.com/clangd/clangd/issues/719
Fixes https://github.com/clangd/clangd/issues/1384
Fixes https://github.com/llvm/llvm-project/issues/70945

>From a92ab04df9c667fb2a427381c13a23d2b4bcf82d Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976 at hotmail.com>
Date: Sun, 25 Feb 2024 19:55:10 -0500
Subject: [PATCH] [clangd] Avoid libFormat's objective-c guessing heuristic
 where possible

This avoids a known libFormat bug where the heuristic can OOM on certain
large files (particularly single-header libraries such as miniaudio.h).

The OOM will still happen on affected files if you actually try to
format them (this is harder to avoid since the underlyting issue affects
the actual formatting logic, not just the language-guessing heuristic),
but at least it's avoided during non-modifying operations like hover, and
modifying operations that do local formatting like code completion.

Fixes https://github.com/clangd/clangd/issues/719
Fixes https://github.com/clangd/clangd/issues/1384
Fixes https://github.com/llvm/llvm-project/issues/70945
---
 clang-tools-extra/clangd/ClangdServer.cpp   | 20 ++++++++++++--------
 clang-tools-extra/clangd/CodeComplete.cpp   |  9 +++++----
 clang-tools-extra/clangd/IncludeCleaner.cpp |  3 ++-
 clang-tools-extra/clangd/ParsedAST.cpp      |  4 ++--
 clang-tools-extra/clangd/SourceCode.cpp     | 19 +++++++++++++++++--
 clang-tools-extra/clangd/SourceCode.h       | 17 ++++++++++++++++-
 clang-tools-extra/clangd/tool/Check.cpp     |  3 ++-
 7 files changed, 56 insertions(+), 19 deletions(-)

diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index 2907e3ba3c303c..19f0651aab4c29 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -523,7 +523,8 @@ void ClangdServer::formatFile(PathRef File, std::optional<Range> Rng,
   auto Action = [File = File.str(), Code = std::move(*Code),
                  Ranges = std::vector<tooling::Range>{RequestedRange},
                  CB = std::move(CB), this]() mutable {
-    format::FormatStyle Style = getFormatStyleForFile(File, Code, TFS);
+    format::FormatStyle Style =
+        getFormatStyleForFile(File, Code, TFS, FormatKind::EntireFileOrRange);
     tooling::Replacements IncludeReplaces =
         format::sortIncludes(Style, Code, Ranges, File);
     auto Changed = tooling::applyAllReplacements(Code, IncludeReplaces);
@@ -551,7 +552,8 @@ void ClangdServer::formatOnType(PathRef File, Position Pos,
   auto Action = [File = File.str(), Code = std::move(*Code),
                  TriggerText = TriggerText.str(), CursorPos = *CursorPos,
                  CB = std::move(CB), this]() mutable {
-    auto Style = getFormatStyleForFile(File, Code, TFS);
+    auto Style =
+        getFormatStyleForFile(File, Code, TFS, FormatKind::Replacements);
     std::vector<TextEdit> Result;
     for (const tooling::Replacement &R :
          formatIncremental(Code, CursorPos, TriggerText, Style))
@@ -604,8 +606,9 @@ void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName,
       return CB(R.takeError());
 
     if (Opts.WantFormat) {
-      auto Style = getFormatStyleForFile(File, InpAST->Inputs.Contents,
-                                         *InpAST->Inputs.TFS);
+      auto Style =
+          getFormatStyleForFile(File, InpAST->Inputs.Contents,
+                                *InpAST->Inputs.TFS, FormatKind::Replacements);
       llvm::Error Err = llvm::Error::success();
       for (auto &E : R->GlobalChanges)
         Err =
@@ -761,8 +764,8 @@ void ClangdServer::applyTweak(PathRef File, Range Sel, StringRef TweakID,
       // Format tweaks that require it centrally here.
       for (auto &It : (*Effect)->ApplyEdits) {
         Edit &E = It.second;
-        format::FormatStyle Style =
-            getFormatStyleForFile(File, E.InitialCode, TFS);
+        format::FormatStyle Style = getFormatStyleForFile(
+            File, E.InitialCode, TFS, FormatKind::Replacements);
         if (llvm::Error Err = reformatEdit(E, Style))
           elog("Failed to format {0}: {1}", It.first(), std::move(Err));
       }
@@ -824,8 +827,9 @@ void ClangdServer::findHover(PathRef File, Position Pos,
                  this](llvm::Expected<InputsAndAST> InpAST) mutable {
     if (!InpAST)
       return CB(InpAST.takeError());
-    format::FormatStyle Style = getFormatStyleForFile(
-        File, InpAST->Inputs.Contents, *InpAST->Inputs.TFS);
+    format::FormatStyle Style =
+        getFormatStyleForFile(File, InpAST->Inputs.Contents,
+                              *InpAST->Inputs.TFS, FormatKind::Snippet);
     CB(clangd::getHover(InpAST->AST, Pos, std::move(Style), Index));
   };
 
diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp
index 0e5f08cec440ce..b54cfe4c424bce 100644
--- a/clang-tools-extra/clangd/CodeComplete.cpp
+++ b/clang-tools-extra/clangd/CodeComplete.cpp
@@ -1626,9 +1626,9 @@ class CodeCompleteFlow {
       assert(Recorder && "Recorder is not set");
       CCContextKind = Recorder->CCContext.getKind();
       IsUsingDeclaration = Recorder->CCContext.isUsingDeclaration();
-      auto Style = getFormatStyleForFile(SemaCCInput.FileName,
-                                         SemaCCInput.ParseInput.Contents,
-                                         *SemaCCInput.ParseInput.TFS);
+      auto Style = getFormatStyleForFile(
+          SemaCCInput.FileName, SemaCCInput.ParseInput.Contents,
+          *SemaCCInput.ParseInput.TFS, FormatKind::Replacements);
       const auto NextToken = findTokenAfterCompletionPoint(
           Recorder->CCSema->getPreprocessor().getCodeCompletionLoc(),
           Recorder->CCSema->getSourceManager(), Recorder->CCSema->LangOpts);
@@ -1719,7 +1719,8 @@ class CodeCompleteFlow {
     ProxSources[FileName].Cost = 0;
     FileProximity.emplace(ProxSources);
 
-    auto Style = getFormatStyleForFile(FileName, Content, TFS);
+    auto Style =
+        getFormatStyleForFile(FileName, Content, TFS, FormatKind::Replacements);
     // This will only insert verbatim headers.
     Inserter.emplace(FileName, Content, Style,
                      /*BuildDir=*/"", /*HeaderSearchInfo=*/nullptr);
diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp
index 7375b7b0860917..e450936fd05612 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.cpp
+++ b/clang-tools-extra/clangd/IncludeCleaner.cpp
@@ -116,7 +116,8 @@ std::vector<Diag> generateMissingIncludeDiagnostics(
   const SourceManager &SM = AST.getSourceManager();
   const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
 
-  auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS);
+  auto FileStyle =
+      getFormatStyleForFile(AST.tuPath(), Code, TFS, FormatKind::Replacements);
 
   tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code,
                                          FileStyle.IncludeStyle);
diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp
index 862f06196a7100..98585c478b2b2c 100644
--- a/clang-tools-extra/clangd/ParsedAST.cpp
+++ b/clang-tools-extra/clangd/ParsedAST.cpp
@@ -625,8 +625,8 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs,
     // Add IncludeFixer which can recover diagnostics caused by missing includes
     // (e.g. incomplete type) and attach include insertion fixes to diagnostics.
     if (Inputs.Index && !BuildDir.getError()) {
-      auto Style =
-          getFormatStyleForFile(Filename, Inputs.Contents, *Inputs.TFS);
+      auto Style = getFormatStyleForFile(Filename, Inputs.Contents, *Inputs.TFS,
+                                         FormatKind::Replacements);
       auto Inserter = std::make_shared<IncludeInserter>(
           Filename, Inputs.Contents, Style, BuildDir.get(),
           &Clang->getPreprocessor().getHeaderSearchInfo());
diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp
index 3e741f6e0b536b..08f9f638bd18ac 100644
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@@ -582,10 +582,25 @@ std::optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
 
 format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
                                           llvm::StringRef Content,
-                                          const ThreadsafeFS &TFS) {
+                                          const ThreadsafeFS &TFS,
+                                          FormatKind Kind) {
+  // Unless we're formatting a substantial amount of code (the entire file
+  // or an arbitrarily large range), skip libFormat's heuristic check for
+  // .h files that tries to determine whether the file contains objective-c
+  // code. (This is accomplished by passing empty code contents to getStyle().
+  // The heuristic is the only thing that looks at the contents.)
+  // This is a workaround for PR60151, a known issue in libFormat where this
+  // heuristic can OOM on large files. If we *are* formatting the entire file,
+  // there's no point in doing this because the actual format::reformat() call
+  // will run into the same OOM; we'd just be risking inconsistencies between
+  // clangd and clang-format on smaller .h files where they disagree on what
+  // language is detected.
+  if (Kind != FormatKind::EntireFileOrRange)
+    Content = {};
   auto Style = format::getStyle(format::DefaultFormatStyle, File,
                                 format::DefaultFallbackStyle, Content,
-                                TFS.view(/*CWD=*/std::nullopt).get());
+                                TFS.view(/*CWD=*/std::nullopt).get(),
+                                /*AllowUnknownOptions=*/false);
   if (!Style) {
     log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File,
         Style.takeError());
diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h
index a1bb44c1761202..a76216768d8753 100644
--- a/clang-tools-extra/clangd/SourceCode.h
+++ b/clang-tools-extra/clangd/SourceCode.h
@@ -166,6 +166,20 @@ TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
 std::optional<std::string> getCanonicalPath(const FileEntryRef F,
                                             FileManager &FileMgr);
 
+/// A flag passed to getFormatStyleForFile() that specifies what kind of
+/// formatting operation the returned FormatStyle will be used for.
+enum class FormatKind {
+  // Formatting a snippet of synthesized code (e.g. a code snippet
+  // shown in a hover) that's not part of the main file.
+  Snippet,
+  // Formatting edits ade by an editor action such as code completion
+  // or rename.
+  Replacements,
+  // Formatting the entire main file (or a range selected by the user,
+  // which can be arbitrarily long).
+  EntireFileOrRange
+};
+
 /// Choose the clang-format style we should apply to a certain file.
 /// This will usually use FS to look for .clang-format directories.
 /// FIXME: should we be caching the .clang-format file search?
@@ -173,7 +187,8 @@ std::optional<std::string> getCanonicalPath(const FileEntryRef F,
 /// though the latter may have been overridden in main()!
 format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
                                           llvm::StringRef Content,
-                                          const ThreadsafeFS &TFS);
+                                          const ThreadsafeFS &TFS,
+                                          FormatKind Kind);
 
 /// Cleanup and format the given replacements.
 llvm::Expected<tooling::Replacements>
diff --git a/clang-tools-extra/clangd/tool/Check.cpp b/clang-tools-extra/clangd/tool/Check.cpp
index b5c4d145619df3..54dbe7ec5b32b1 100644
--- a/clang-tools-extra/clangd/tool/Check.cpp
+++ b/clang-tools-extra/clangd/tool/Check.cpp
@@ -226,7 +226,8 @@ class Checker {
 
     // FIXME: Check that resource-dir/built-in-headers exist?
 
-    Style = getFormatStyleForFile(File, Inputs.Contents, TFS);
+    Style =
+        getFormatStyleForFile(File, Inputs.Contents, TFS, FormatKind::Snippet);
 
     return true;
   }



More information about the cfe-commits mailing list