[llvm] [Support] Optimize signal handling file removal code (PR #173586)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 25 16:58:10 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-support
Author: Reid Kleckner (rnk)
<details>
<summary>Changes</summary>
clangd/clang#<!-- -->1787 describes how LLVM's code for removing partially
written files when taking a fatal signal is slower than it should be.
This code was substantially rewritten by JF Bastien in
aa1333a91f8d8a060bcf5 to make it (more) signal-safe.
As written in 2018, the logic always allocates a new node for every file
we attempt to protect, and those nodes are added to a global singly
linked list and never removed. If you open a lot of output files,
suddenly output file opening becomes O(n^2), which is what happened
during clangd indexing.
Removing files on signals in a multi-threaded environment is really
complicated! We can use locks to synchronize between threads that are
writing to the list, but we cannot use locks to synchronize against
re-entrant signals, and we don't have any great tools for masking or
delaying things like SIGTERM. This makes it difficult to assert that we
have the one and only reference to a node, even after removing it from
the linked structure, so we can safely deallocate it.
This implementation sidesteps that problem by moving removed nodes onto
a recycling list. This is safe, insofar as that if a signal handler
manages to race and traverse a node that has been recycled, it will walk
over the list of recycled nodes and not delete any files. This is better
than crashing.
I added a synthetic load test in the form of a disabled gtest and this
change makes it 16x faster, and the profile shows that the time is no
longer mostly spent in this signal handling code.
---
Full diff: https://github.com/llvm/llvm-project/pull/173586.diff
2 Files Affected:
- (modified) llvm/lib/Support/Unix/Signals.inc (+64-11)
- (modified) llvm/unittests/Support/raw_ostream_test.cpp (+40)
``````````diff
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 56ad4fc504153..314ba8f0bc4ab 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -104,6 +104,8 @@ class FileToRemoveList {
std::atomic<char *> Filename = nullptr;
std::atomic<FileToRemoveList *> Next = nullptr;
+ static std::atomic<FileToRemoveList *> NodesToRecycle;
+
FileToRemoveList() = default;
// Not signal-safe.
FileToRemoveList(const std::string &str) : Filename(strdup(str.c_str())) {}
@@ -117,11 +119,34 @@ public:
free(F);
}
+ /// Allocate a new node or pull one off the free list.
+ static FileToRemoveList *allocateNode(const std::string &Filename) {
+ // Try to get a node from the recycle list first.
+ FileToRemoveList *NewHead = nullptr;
+ FileToRemoveList *RecycleHead = NodesToRecycle.load();
+ while (RecycleHead) {
+ FileToRemoveList *Next = RecycleHead->Next.load();
+ if (NodesToRecycle.compare_exchange_weak(RecycleHead, Next)) {
+ // Successfully took a node from the recycle list.
+ NewHead = RecycleHead;
+ NewHead->Filename.store(strdup(Filename.c_str()));
+ NewHead->Next.store(nullptr);
+ break;
+ }
+ // CAS failed, RecycleHead was updated by compare_exchange_weak, retry.
+ }
+
+ // If no recycled node was available, allocate a new one.
+ if (!NewHead)
+ NewHead = new FileToRemoveList(Filename);
+ return NewHead;
+ }
+
// Not signal-safe.
static void insert(std::atomic<FileToRemoveList *> &Head,
const std::string &Filename) {
// Insert the new file at the end of the list.
- FileToRemoveList *NewHead = new FileToRemoveList(Filename);
+ FileToRemoveList *NewHead = allocateNode(Filename);
std::atomic<FileToRemoveList *> *InsertionPoint = &Head;
FileToRemoveList *OldHead = nullptr;
while (!InsertionPoint->compare_exchange_strong(OldHead, NewHead)) {
@@ -138,19 +163,44 @@ public:
static ManagedStatic<sys::SmartMutex<true>> Lock;
sys::SmartScopedLock<true> Writer(*Lock);
- for (FileToRemoveList *Current = Head.load(); Current;
- Current = Current->Next.load()) {
+ FileToRemoveList *Current = nullptr;
+ std::atomic<FileToRemoveList *> *PrevNext = &Head;
+ for (Current = Head.load(); Current; Current = Current->Next.load()) {
if (char *OldFilename = Current->Filename.load()) {
- if (OldFilename != Filename)
- continue;
- // Leave an empty filename.
- OldFilename = Current->Filename.exchange(nullptr);
- // The filename might have become null between the time we
- // compared it and we exchanged it.
- if (OldFilename)
- free(OldFilename);
+ if (OldFilename == Filename) {
+ break;
+ }
}
+ PrevNext = &Current->Next;
}
+ if (Current == nullptr)
+ return; // No matching file.
+
+ // Found a match. First, unlink the node from the list by swapping
+ // the previous next pointer with the current node's next pointer.
+ FileToRemoveList *CurrentNext = Current->Next.load();
+ PrevNext->store(CurrentNext);
+
+ // Leave an empty filename.
+ char *OldFilename = Current->Filename.exchange(nullptr);
+ // The filename might have become null between the time we
+ // compared it and we exchanged it.
+ if (OldFilename)
+ free(OldFilename);
+
+ // Ideally, we'd deallocate the node at this point, but we have no way to
+ // synchronize with pending signals. Any other thread in the process
+ // could have taken a signal and captured the address of the node in a
+ // register and be in the process of removing files, and we have no way
+ // to safely wait until that is done. To avoid the problem altogether,
+ // recycle the nodes. In the rare case that a signal handler races with
+ // us and chases these links, it proceed down the recycled nodes list and
+ // will leak some temporary files, which is better than crashing.
+ // Add the node to the recycle list using a CAS loop.
+ FileToRemoveList *RecycleHead = NodesToRecycle.load();
+ do {
+ Current->Next.store(RecycleHead);
+ } while (!NodesToRecycle.compare_exchange_weak(RecycleHead, Current));
}
static void removeFile(char *path) {
@@ -195,8 +245,11 @@ public:
Head.exchange(OldHead);
}
};
+
static std::atomic<FileToRemoveList *> FilesToRemove = nullptr;
+std::atomic<FileToRemoveList *> FileToRemoveList::NodesToRecycle = nullptr;
+
/// Clean up the list in a signal-friendly manner.
/// Recall that signals can fire during llvm_shutdown. If this occurs we should
/// either clean something up or nothing at all, but we shouldn't crash!
diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp
index 8f9ed4143a873..cd90f4251a62b 100644
--- a/llvm/unittests/Support/raw_ostream_test.cpp
+++ b/llvm/unittests/Support/raw_ostream_test.cpp
@@ -17,6 +17,7 @@
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
#include <optional>
+#include <thread>
using namespace llvm;
@@ -647,4 +648,43 @@ TEST(raw_ostreamTest, writeToStdOut) {
EXPECT_EQ(CapturedStdOut, "HelloWorld");
}
+TEST(raw_ostreamTest, DISABLED_writeToOutputLoadTest) {
+ // Create 10 temporary directories
+ std::vector<std::unique_ptr<llvm::unittest::TempDir>> TempDirs;
+ for (int i = 0; i < 10; ++i) {
+ TempDirs.push_back(
+ std::make_unique<llvm::unittest::TempDir>("loadtest", /*Unique*/ true));
+ }
+
+ // Launch 10 threads, each writing 10,000 files (100,000 total)
+ std::vector<std::thread> Threads;
+ for (int threadIdx = 0; threadIdx < 10; ++threadIdx) {
+ Threads.emplace_back([&, threadIdx]() {
+ const auto &TempDir = TempDirs[threadIdx];
+ for (int fileIdx = 0; fileIdx < 10000; ++fileIdx) {
+ SmallString<128> Path(TempDir->path());
+ sys::path::append(Path, "file_" + std::to_string(fileIdx) + ".bin");
+
+ ASSERT_THAT_ERROR(
+ writeToOutput(Path,
+ [](raw_ostream &Out) -> Error {
+ // Write 10,000 32-bit integers (0-9999) = ~40KB per
+ // file
+ for (int32_t i = 0; i < 10000; ++i) {
+ Out.write(reinterpret_cast<const char *>(&i),
+ sizeof(i));
+ }
+ return Error::success();
+ }),
+ Succeeded());
+ }
+ });
+ }
+
+ // Wait for all threads to complete
+ for (auto &Thread : Threads) {
+ Thread.join();
+ }
+}
+
} // namespace
``````````
</details>
https://github.com/llvm/llvm-project/pull/173586
More information about the llvm-commits
mailing list