[clang-tools-extra] 3093d73 - [clangd] Avoid libFormat's objective-c guessing heuristic where possible (#84133)

via cfe-commits cfe-commits at lists.llvm.org
Mon Mar 11 01:16:49 PDT 2024


Author: Nathan Ridge
Date: 2024-03-11T04:16:45-04:00
New Revision: 3093d731dff93df02899dcc62f5e7ba02461ff2a

URL: https://github.com/llvm/llvm-project/commit/3093d731dff93df02899dcc62f5e7ba02461ff2a
DIFF: https://github.com/llvm/llvm-project/commit/3093d731dff93df02899dcc62f5e7ba02461ff2a.diff

LOG: [clangd] Avoid libFormat's objective-c guessing heuristic where possible (#84133)

This avoids a known libFormat bug where the heuristic can OOM on certain
large files (particularly single-header libraries such as miniaudio.h).

The OOM will still happen on affected files if you actually try to
format them (this is harder to avoid since the underlyting issue affects
the actual formatting logic, not just the language-guessing heuristic),
but at least it's avoided during non-modifying operations like hover,
and modifying operations that do local formatting like code completion.

Fixes https://github.com/clangd/clangd/issues/719
Fixes https://github.com/clangd/clangd/issues/1384
Fixes https://github.com/llvm/llvm-project/issues/70945

Added: 
    

Modified: 
    clang-tools-extra/clangd/ClangdServer.cpp
    clang-tools-extra/clangd/CodeComplete.cpp
    clang-tools-extra/clangd/IncludeCleaner.cpp
    clang-tools-extra/clangd/ParsedAST.cpp
    clang-tools-extra/clangd/SourceCode.cpp
    clang-tools-extra/clangd/SourceCode.h
    clang-tools-extra/clangd/tool/Check.cpp
    clang-tools-extra/clangd/unittests/SourceCodeTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index 2907e3ba3c303c..5790273d625ef1 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -523,7 +523,7 @@ void ClangdServer::formatFile(PathRef File, std::optional<Range> Rng,
   auto Action = [File = File.str(), Code = std::move(*Code),
                  Ranges = std::vector<tooling::Range>{RequestedRange},
                  CB = std::move(CB), this]() mutable {
-    format::FormatStyle Style = getFormatStyleForFile(File, Code, TFS);
+    format::FormatStyle Style = getFormatStyleForFile(File, Code, TFS, true);
     tooling::Replacements IncludeReplaces =
         format::sortIncludes(Style, Code, Ranges, File);
     auto Changed = tooling::applyAllReplacements(Code, IncludeReplaces);
@@ -551,7 +551,7 @@ void ClangdServer::formatOnType(PathRef File, Position Pos,
   auto Action = [File = File.str(), Code = std::move(*Code),
                  TriggerText = TriggerText.str(), CursorPos = *CursorPos,
                  CB = std::move(CB), this]() mutable {
-    auto Style = getFormatStyleForFile(File, Code, TFS);
+    auto Style = getFormatStyleForFile(File, Code, TFS, false);
     std::vector<TextEdit> Result;
     for (const tooling::Replacement &R :
          formatIncremental(Code, CursorPos, TriggerText, Style))
@@ -605,7 +605,7 @@ void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName,
 
     if (Opts.WantFormat) {
       auto Style = getFormatStyleForFile(File, InpAST->Inputs.Contents,
-                                         *InpAST->Inputs.TFS);
+                                         *InpAST->Inputs.TFS, false);
       llvm::Error Err = llvm::Error::success();
       for (auto &E : R->GlobalChanges)
         Err =
@@ -762,7 +762,7 @@ void ClangdServer::applyTweak(PathRef File, Range Sel, StringRef TweakID,
       for (auto &It : (*Effect)->ApplyEdits) {
         Edit &E = It.second;
         format::FormatStyle Style =
-            getFormatStyleForFile(File, E.InitialCode, TFS);
+            getFormatStyleForFile(File, E.InitialCode, TFS, false);
         if (llvm::Error Err = reformatEdit(E, Style))
           elog("Failed to format {0}: {1}", It.first(), std::move(Err));
       }
@@ -825,7 +825,7 @@ void ClangdServer::findHover(PathRef File, Position Pos,
     if (!InpAST)
       return CB(InpAST.takeError());
     format::FormatStyle Style = getFormatStyleForFile(
-        File, InpAST->Inputs.Contents, *InpAST->Inputs.TFS);
+        File, InpAST->Inputs.Contents, *InpAST->Inputs.TFS, false);
     CB(clangd::getHover(InpAST->AST, Pos, std::move(Style), Index));
   };
 

diff  --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp
index 0e5f08cec440ce..036eb9808ea082 100644
--- a/clang-tools-extra/clangd/CodeComplete.cpp
+++ b/clang-tools-extra/clangd/CodeComplete.cpp
@@ -1628,7 +1628,7 @@ class CodeCompleteFlow {
       IsUsingDeclaration = Recorder->CCContext.isUsingDeclaration();
       auto Style = getFormatStyleForFile(SemaCCInput.FileName,
                                          SemaCCInput.ParseInput.Contents,
-                                         *SemaCCInput.ParseInput.TFS);
+                                         *SemaCCInput.ParseInput.TFS, false);
       const auto NextToken = findTokenAfterCompletionPoint(
           Recorder->CCSema->getPreprocessor().getCodeCompletionLoc(),
           Recorder->CCSema->getSourceManager(), Recorder->CCSema->LangOpts);
@@ -1719,7 +1719,7 @@ class CodeCompleteFlow {
     ProxSources[FileName].Cost = 0;
     FileProximity.emplace(ProxSources);
 
-    auto Style = getFormatStyleForFile(FileName, Content, TFS);
+    auto Style = getFormatStyleForFile(FileName, Content, TFS, false);
     // This will only insert verbatim headers.
     Inserter.emplace(FileName, Content, Style,
                      /*BuildDir=*/"", /*HeaderSearchInfo=*/nullptr);

diff  --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp
index 7375b7b0860917..8e48f546d94e77 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.cpp
+++ b/clang-tools-extra/clangd/IncludeCleaner.cpp
@@ -116,7 +116,7 @@ std::vector<Diag> generateMissingIncludeDiagnostics(
   const SourceManager &SM = AST.getSourceManager();
   const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
 
-  auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS);
+  auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS, false);
 
   tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code,
                                          FileStyle.IncludeStyle);

diff  --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp
index 862f06196a7100..3ff759415f7c8b 100644
--- a/clang-tools-extra/clangd/ParsedAST.cpp
+++ b/clang-tools-extra/clangd/ParsedAST.cpp
@@ -626,7 +626,7 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs,
     // (e.g. incomplete type) and attach include insertion fixes to diagnostics.
     if (Inputs.Index && !BuildDir.getError()) {
       auto Style =
-          getFormatStyleForFile(Filename, Inputs.Contents, *Inputs.TFS);
+          getFormatStyleForFile(Filename, Inputs.Contents, *Inputs.TFS, false);
       auto Inserter = std::make_shared<IncludeInserter>(
           Filename, Inputs.Contents, Style, BuildDir.get(),
           &Clang->getPreprocessor().getHeaderSearchInfo());

diff  --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp
index 3e741f6e0b536b..3af99b9db056da 100644
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@@ -582,7 +582,21 @@ std::optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
 
 format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
                                           llvm::StringRef Content,
-                                          const ThreadsafeFS &TFS) {
+                                          const ThreadsafeFS &TFS,
+                                          bool FormatFile) {
+  // Unless we're formatting a substantial amount of code (the entire file
+  // or an arbitrarily large range), skip libFormat's heuristic check for
+  // .h files that tries to determine whether the file contains objective-c
+  // code. (This is accomplished by passing empty code contents to getStyle().
+  // The heuristic is the only thing that looks at the contents.)
+  // This is a workaround for PR60151, a known issue in libFormat where this
+  // heuristic can OOM on large files. If we *are* formatting the entire file,
+  // there's no point in doing this because the actual format::reformat() call
+  // will run into the same OOM; we'd just be risking inconsistencies between
+  // clangd and clang-format on smaller .h files where they disagree on what
+  // language is detected.
+  if (!FormatFile)
+    Content = {};
   auto Style = format::getStyle(format::DefaultFormatStyle, File,
                                 format::DefaultFallbackStyle, Content,
                                 TFS.view(/*CWD=*/std::nullopt).get());

diff  --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h
index a1bb44c1761202..028549f659d60a 100644
--- a/clang-tools-extra/clangd/SourceCode.h
+++ b/clang-tools-extra/clangd/SourceCode.h
@@ -171,9 +171,13 @@ std::optional<std::string> getCanonicalPath(const FileEntryRef F,
 /// FIXME: should we be caching the .clang-format file search?
 /// This uses format::DefaultFormatStyle and format::DefaultFallbackStyle,
 /// though the latter may have been overridden in main()!
+/// \p FormatFile indicates whether the returned FormatStyle is used
+/// to format the entire main file (or a range selected by the user
+/// which can be arbitrarily long).
 format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
                                           llvm::StringRef Content,
-                                          const ThreadsafeFS &TFS);
+                                          const ThreadsafeFS &TFS,
+                                          bool FormatFile);
 
 /// Cleanup and format the given replacements.
 llvm::Expected<tooling::Replacements>

diff  --git a/clang-tools-extra/clangd/tool/Check.cpp b/clang-tools-extra/clangd/tool/Check.cpp
index b5c4d145619df3..45e2e1e278deaf 100644
--- a/clang-tools-extra/clangd/tool/Check.cpp
+++ b/clang-tools-extra/clangd/tool/Check.cpp
@@ -226,7 +226,7 @@ class Checker {
 
     // FIXME: Check that resource-dir/built-in-headers exist?
 
-    Style = getFormatStyleForFile(File, Inputs.Contents, TFS);
+    Style = getFormatStyleForFile(File, Inputs.Contents, TFS, false);
 
     return true;
   }

diff  --git a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
index 1be5b7f6a8dbba..801d535c1b9d0d 100644
--- a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
@@ -1090,6 +1090,44 @@ TEST(ApplyEditsTest, EndLineOutOfRange) {
                     FailedWithMessage("Line value is out of range (100)"));
 }
 
+TEST(FormatStyleForFile, LanguageGuessingHeuristic) {
+  StringRef ObjCContent = "@interface Foo\n at end\n";
+  StringRef CppContent = "class Foo {};\n";
+  using LK = format::FormatStyle::LanguageKind;
+  struct TestCase {
+    llvm::StringRef Filename;
+    llvm::StringRef Contents;
+    bool FormatFile;
+    LK ExpectedLanguage;
+  } TestCases[] = {
+      // If the file extension identifies the file as ObjC, the guessed
+      // language should be ObjC regardless of content or FormatFile flag.
+      {"foo.mm", ObjCContent, true, LK::LK_ObjC},
+      {"foo.mm", ObjCContent, false, LK::LK_ObjC},
+      {"foo.mm", CppContent, true, LK::LK_ObjC},
+      {"foo.mm", CppContent, false, LK::LK_ObjC},
+
+      // If the file extension is ambiguous like .h, FormatFile=true should
+      // result in using libFormat's heuristic to guess the language based
+      // on the file contents.
+      {"foo.h", ObjCContent, true, LK::LK_ObjC},
+      {"foo.h", CppContent, true, LK::LK_Cpp},
+
+      // With FomatFile=false, the language guessing heuristic should be
+      // bypassed
+      {"foo.h", ObjCContent, false, LK::LK_Cpp},
+      {"foo.h", CppContent, false, LK::LK_Cpp},
+  };
+
+  MockFS FS;
+  for (const auto &[Filename, Contents, FormatFile, ExpectedLanguage] :
+       TestCases) {
+    EXPECT_EQ(
+        getFormatStyleForFile(Filename, Contents, FS, FormatFile).Language,
+        ExpectedLanguage);
+  }
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang


        


More information about the cfe-commits mailing list