[llvm] [Support] Optimize signal handling file removal code (PR #173586)
Reid Kleckner via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 18 15:01:40 PST 2026
https://github.com/rnk updated https://github.com/llvm/llvm-project/pull/173586
>From 30a20b982f78de130220b4ebcf43984d18d02a93 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk at llvm.org>
Date: Thu, 25 Dec 2025 16:12:15 -0800
Subject: [PATCH 01/11] Add load test so we can test our fix
---
llvm/unittests/Support/raw_ostream_test.cpp | 40 +++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp
index 8f9ed4143a873..cd90f4251a62b 100644
--- a/llvm/unittests/Support/raw_ostream_test.cpp
+++ b/llvm/unittests/Support/raw_ostream_test.cpp
@@ -17,6 +17,7 @@
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
#include <optional>
+#include <thread>
using namespace llvm;
@@ -647,4 +648,43 @@ TEST(raw_ostreamTest, writeToStdOut) {
EXPECT_EQ(CapturedStdOut, "HelloWorld");
}
+TEST(raw_ostreamTest, DISABLED_writeToOutputLoadTest) {
+ // Create 10 temporary directories
+ std::vector<std::unique_ptr<llvm::unittest::TempDir>> TempDirs;
+ for (int i = 0; i < 10; ++i) {
+ TempDirs.push_back(
+ std::make_unique<llvm::unittest::TempDir>("loadtest", /*Unique*/ true));
+ }
+
+ // Launch 10 threads, each writing 10,000 files (100,000 total)
+ std::vector<std::thread> Threads;
+ for (int threadIdx = 0; threadIdx < 10; ++threadIdx) {
+ Threads.emplace_back([&, threadIdx]() {
+ const auto &TempDir = TempDirs[threadIdx];
+ for (int fileIdx = 0; fileIdx < 10000; ++fileIdx) {
+ SmallString<128> Path(TempDir->path());
+ sys::path::append(Path, "file_" + std::to_string(fileIdx) + ".bin");
+
+ ASSERT_THAT_ERROR(
+ writeToOutput(Path,
+ [](raw_ostream &Out) -> Error {
+ // Write 10,000 32-bit integers (0-9999) = ~40KB per
+ // file
+ for (int32_t i = 0; i < 10000; ++i) {
+ Out.write(reinterpret_cast<const char *>(&i),
+ sizeof(i));
+ }
+ return Error::success();
+ }),
+ Succeeded());
+ }
+ });
+ }
+
+ // Wait for all threads to complete
+ for (auto &Thread : Threads) {
+ Thread.join();
+ }
+}
+
} // namespace
>From edfefe7ceb9dc0bf6bfdcb508cd1c0fd05967039 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk at llvm.org>
Date: Thu, 25 Dec 2025 16:30:55 -0800
Subject: [PATCH 02/11] [Support] Optimize signal handling file removal code
clangd/clang#1787 describes how LLVM's code for removing partially
written files when taking a fatal signal is slower than it should be.
This code was substantially rewritten by JF Bastien in
aa1333a91f8d8a060bcf5 to make it (more) signal-safe.
As written in 2018, the logic always allocates a new node for every file
we attempt to protect, and those nodes are added to a global singly
linked list and never removed. If you open a lot of output files,
suddenly output file opening becomes O(n^2), which is what happened
during clangd indexing.
Removing files on signals in a multi-threaded environment is really
complicated! We can use locks to synchronize between threads that are
writing to the list, but we cannot use locks to synchronize against
re-entrant signals, and we don't have any great tools for masking or
delaying things like SIGTERM. This makes it difficult to assert that we
have the one and only reference to a node, even after removing it from
the linked structure, so we can safely deallocate it.
This implementation sidesteps that problem by moving removed nodes onto
a recycling list. This is safe, insofar as that if a signal handler
manages to race and traverse a node that has been recycled, it will walk
over the list of recycled nodes and not delete any files. This is better
than crashing.
I added a synthetic load test in the form of a disabled gtest and this
change makes it 16x faster, and the profile shows that the time is no
longer mostly spent in this signal handling code.
---
llvm/lib/Support/Unix/Signals.inc | 75 ++++++++++++++++++++++++++-----
1 file changed, 64 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 56ad4fc504153..314ba8f0bc4ab 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -104,6 +104,8 @@ class FileToRemoveList {
std::atomic<char *> Filename = nullptr;
std::atomic<FileToRemoveList *> Next = nullptr;
+ static std::atomic<FileToRemoveList *> NodesToRecycle;
+
FileToRemoveList() = default;
// Not signal-safe.
FileToRemoveList(const std::string &str) : Filename(strdup(str.c_str())) {}
@@ -117,11 +119,34 @@ public:
free(F);
}
+ /// Allocate a new node or pull one off the free list.
+ static FileToRemoveList *allocateNode(const std::string &Filename) {
+ // Try to get a node from the recycle list first.
+ FileToRemoveList *NewHead = nullptr;
+ FileToRemoveList *RecycleHead = NodesToRecycle.load();
+ while (RecycleHead) {
+ FileToRemoveList *Next = RecycleHead->Next.load();
+ if (NodesToRecycle.compare_exchange_weak(RecycleHead, Next)) {
+ // Successfully took a node from the recycle list.
+ NewHead = RecycleHead;
+ NewHead->Filename.store(strdup(Filename.c_str()));
+ NewHead->Next.store(nullptr);
+ break;
+ }
+ // CAS failed, RecycleHead was updated by compare_exchange_weak, retry.
+ }
+
+ // If no recycled node was available, allocate a new one.
+ if (!NewHead)
+ NewHead = new FileToRemoveList(Filename);
+ return NewHead;
+ }
+
// Not signal-safe.
static void insert(std::atomic<FileToRemoveList *> &Head,
const std::string &Filename) {
// Insert the new file at the end of the list.
- FileToRemoveList *NewHead = new FileToRemoveList(Filename);
+ FileToRemoveList *NewHead = allocateNode(Filename);
std::atomic<FileToRemoveList *> *InsertionPoint = &Head;
FileToRemoveList *OldHead = nullptr;
while (!InsertionPoint->compare_exchange_strong(OldHead, NewHead)) {
@@ -138,19 +163,44 @@ public:
static ManagedStatic<sys::SmartMutex<true>> Lock;
sys::SmartScopedLock<true> Writer(*Lock);
- for (FileToRemoveList *Current = Head.load(); Current;
- Current = Current->Next.load()) {
+ FileToRemoveList *Current = nullptr;
+ std::atomic<FileToRemoveList *> *PrevNext = &Head;
+ for (Current = Head.load(); Current; Current = Current->Next.load()) {
if (char *OldFilename = Current->Filename.load()) {
- if (OldFilename != Filename)
- continue;
- // Leave an empty filename.
- OldFilename = Current->Filename.exchange(nullptr);
- // The filename might have become null between the time we
- // compared it and we exchanged it.
- if (OldFilename)
- free(OldFilename);
+ if (OldFilename == Filename) {
+ break;
+ }
}
+ PrevNext = &Current->Next;
}
+ if (Current == nullptr)
+ return; // No matching file.
+
+ // Found a match. First, unlink the node from the list by swapping
+ // the previous next pointer with the current node's next pointer.
+ FileToRemoveList *CurrentNext = Current->Next.load();
+ PrevNext->store(CurrentNext);
+
+ // Leave an empty filename.
+ char *OldFilename = Current->Filename.exchange(nullptr);
+ // The filename might have become null between the time we
+ // compared it and we exchanged it.
+ if (OldFilename)
+ free(OldFilename);
+
+ // Ideally, we'd deallocate the node at this point, but we have no way to
+ // synchronize with pending signals. Any other thread in the process
+ // could have taken a signal and captured the address of the node in a
+ // register and be in the process of removing files, and we have no way
+ // to safely wait until that is done. To avoid the problem altogether,
+ // recycle the nodes. In the rare case that a signal handler races with
+ // us and chases these links, it proceed down the recycled nodes list and
+ // will leak some temporary files, which is better than crashing.
+ // Add the node to the recycle list using a CAS loop.
+ FileToRemoveList *RecycleHead = NodesToRecycle.load();
+ do {
+ Current->Next.store(RecycleHead);
+ } while (!NodesToRecycle.compare_exchange_weak(RecycleHead, Current));
}
static void removeFile(char *path) {
@@ -195,8 +245,11 @@ public:
Head.exchange(OldHead);
}
};
+
static std::atomic<FileToRemoveList *> FilesToRemove = nullptr;
+std::atomic<FileToRemoveList *> FileToRemoveList::NodesToRecycle = nullptr;
+
/// Clean up the list in a signal-friendly manner.
/// Recall that signals can fire during llvm_shutdown. If this occurs we should
/// either clean something up or nothing at all, but we shouldn't crash!
>From be293b3e708a8cb6ae55ec92af00d2b7488ec8d0 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rkleckner at nvidia.com>
Date: Wed, 21 Jan 2026 23:38:32 +0000
Subject: [PATCH 03/11] Tweak comment
---
llvm/lib/Support/Unix/Signals.inc | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 314ba8f0bc4ab..6c0e20728e587 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -119,7 +119,9 @@ public:
free(F);
}
- /// Allocate a new node or pull one off the free list.
+ /// Allocate a new node or pull one off the free list. We recycle nodes to
+ /// avoid deallocating them, which can race with signals delivered to other
+ /// threads.
static FileToRemoveList *allocateNode(const std::string &Filename) {
// Try to get a node from the recycle list first.
FileToRemoveList *NewHead = nullptr;
>From 61d89cd795fc30c6b157a2359f509a5b221167d6 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rkleckner at nvidia.com>
Date: Wed, 21 Jan 2026 23:48:21 +0000
Subject: [PATCH 04/11] Move the load test into a benchmark
---
llvm/benchmarks/CMakeLists.txt | 2 +-
llvm/benchmarks/writeToOutputInParallel.cpp | 101 ++++++++++++++++++++
llvm/unittests/Support/raw_ostream_test.cpp | 39 --------
3 files changed, 102 insertions(+), 40 deletions(-)
create mode 100644 llvm/benchmarks/writeToOutputInParallel.cpp
diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt
index 6581f47012552..00266e20ddf50 100644
--- a/llvm/benchmarks/CMakeLists.txt
+++ b/llvm/benchmarks/CMakeLists.txt
@@ -12,7 +12,7 @@ add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp
add_benchmark(SandboxIRBench SandboxIRBench.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(MustacheBench Mustache.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(SpecialCaseListBM SpecialCaseListBM.cpp PARTIAL_SOURCES_INTENDED)
-
+add_benchmark(writeToOutputInParallelBench writeToOutputInParallel.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(RuntimeLibcallsBench RuntimeLibcalls.cpp PARTIAL_SOURCES_INTENDED)
if(NOT LLVM_TOOL_LLVM_DRIVER_BUILD)
diff --git a/llvm/benchmarks/writeToOutputInParallel.cpp b/llvm/benchmarks/writeToOutputInParallel.cpp
new file mode 100644
index 0000000000000..4ad3061beb359
--- /dev/null
+++ b/llvm/benchmarks/writeToOutputInParallel.cpp
@@ -0,0 +1,101 @@
+//===- writeToOutputInParallel.cpp - Parallel file writing benchmark ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmark/benchmark.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include <memory>
+#include <string>
+#include <thread>
+#include <vector>
+
+using namespace llvm;
+
+// Benchmark parallel file writing using writeToOutput.
+// This simulates scenarios where multiple threads are writing files
+// concurrently, which is common in parallel compilation scenarios.
+static void BM_WriteToOutputInParallel(benchmark::State &State) {
+ const int NumThreads = State.range(0);
+ const int FilesPerThread = State.range(1);
+ const int BytesPerFile = 40 * 1024; // ~40KB per file
+
+ for (auto _ : State) {
+ // Pause timing while we set up temporary directories
+ State.PauseTiming();
+
+ // Create temporary directories for each thread
+ std::vector<std::unique_ptr<llvm::unittest::TempDir>> TempDirs;
+ for (int i = 0; i < NumThreads; ++i) {
+ TempDirs.push_back(std::make_unique<llvm::unittest::TempDir>(
+ "writeToOutputBM", /*Unique*/ true));
+ }
+
+ State.ResumeTiming();
+
+ // Launch threads, each writing multiple files
+ std::vector<std::thread> Threads;
+ for (int threadIdx = 0; threadIdx < NumThreads; ++threadIdx) {
+ Threads.emplace_back([&, threadIdx]() {
+ const auto &TempDir = TempDirs[threadIdx];
+ for (int fileIdx = 0; fileIdx < FilesPerThread; ++fileIdx) {
+ SmallString<128> Path(TempDir->path());
+ sys::path::append(Path, "file_" + std::to_string(fileIdx) + ".bin");
+
+ // Ignore errors in benchmark - we're measuring performance, not
+ // correctness
+ (void)writeToOutput(Path, [BytesPerFile](raw_ostream &Out) -> Error {
+ // Write 32-bit integers up to BytesPerFile
+ const int NumInts = BytesPerFile / sizeof(int32_t);
+ for (int32_t i = 0; i < NumInts; ++i) {
+ Out.write(reinterpret_cast<const char *>(&i), sizeof(i));
+ }
+ return Error::success();
+ });
+ }
+ });
+ }
+
+ // Wait for all threads to complete
+ for (auto &Thread : Threads) {
+ Thread.join();
+ }
+
+ // Cleanup happens automatically when TempDirs goes out of scope
+ }
+
+ // Report throughput metrics
+ const int64_t TotalFiles = NumThreads * FilesPerThread;
+ const int64_t TotalBytes = TotalFiles * BytesPerFile;
+
+ State.SetItemsProcessed(State.iterations() * TotalFiles);
+ State.SetBytesProcessed(State.iterations() * TotalBytes);
+ State.counters["threads"] = NumThreads;
+ State.counters["files_per_thread"] = FilesPerThread;
+ State.counters["total_files"] = TotalFiles;
+}
+
+// Test various combinations of thread counts and files per thread
+// These represent different parallelism scenarios:
+// - Low parallelism, many files per thread (serial-like workload)
+// - High parallelism, few files per thread (highly parallel workload)
+// - Balanced scenarios
+
+BENCHMARK(BM_WriteToOutputInParallel)
+ ->Args({1, 1000}) // 1 thread, 1000 files
+ ->Args({2, 500}) // 2 threads, 500 files each
+ ->Args({4, 250}) // 4 threads, 250 files each
+ ->Args({8, 125}) // 8 threads, 125 files each
+ ->Args({10, 100}) // 10 threads, 100 files each
+ ->Args({10, 1000}) // 10 threads, 1000 files each (stress test)
+ ->Unit(benchmark::kMillisecond);
+
+BENCHMARK_MAIN();
diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp
index efe6a5a799faa..e177f63bbd223 100644
--- a/llvm/unittests/Support/raw_ostream_test.cpp
+++ b/llvm/unittests/Support/raw_ostream_test.cpp
@@ -645,43 +645,4 @@ TEST(raw_ostreamTest, writeToStdOut) {
EXPECT_EQ(CapturedStdOut, "HelloWorld");
}
-TEST(raw_ostreamTest, DISABLED_writeToOutputLoadTest) {
- // Create 10 temporary directories
- std::vector<std::unique_ptr<llvm::unittest::TempDir>> TempDirs;
- for (int i = 0; i < 10; ++i) {
- TempDirs.push_back(
- std::make_unique<llvm::unittest::TempDir>("loadtest", /*Unique*/ true));
- }
-
- // Launch 10 threads, each writing 10,000 files (100,000 total)
- std::vector<std::thread> Threads;
- for (int threadIdx = 0; threadIdx < 10; ++threadIdx) {
- Threads.emplace_back([&, threadIdx]() {
- const auto &TempDir = TempDirs[threadIdx];
- for (int fileIdx = 0; fileIdx < 10000; ++fileIdx) {
- SmallString<128> Path(TempDir->path());
- sys::path::append(Path, "file_" + std::to_string(fileIdx) + ".bin");
-
- ASSERT_THAT_ERROR(
- writeToOutput(Path,
- [](raw_ostream &Out) -> Error {
- // Write 10,000 32-bit integers (0-9999) = ~40KB per
- // file
- for (int32_t i = 0; i < 10000; ++i) {
- Out.write(reinterpret_cast<const char *>(&i),
- sizeof(i));
- }
- return Error::success();
- }),
- Succeeded());
- }
- });
- }
-
- // Wait for all threads to complete
- for (auto &Thread : Threads) {
- Thread.join();
- }
-}
-
} // namespace
>From 8404784fd4e6e65a4dda4f878f2620bb15d34c70 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rkleckner at nvidia.com>
Date: Thu, 22 Jan 2026 00:14:50 +0000
Subject: [PATCH 05/11] Simplify benchmark
---
llvm/benchmarks/writeToOutputInParallel.cpp | 67 ++++++++++--------
llvm/lib/Support/Unix/Signals.inc | 77 +++------------------
2 files changed, 51 insertions(+), 93 deletions(-)
diff --git a/llvm/benchmarks/writeToOutputInParallel.cpp b/llvm/benchmarks/writeToOutputInParallel.cpp
index 4ad3061beb359..ee9011826f6ad 100644
--- a/llvm/benchmarks/writeToOutputInParallel.cpp
+++ b/llvm/benchmarks/writeToOutputInParallel.cpp
@@ -11,9 +11,8 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Testing/Support/SupportHelpers.h"
-#include <memory>
#include <string>
#include <thread>
#include <vector>
@@ -29,37 +28,51 @@ static void BM_WriteToOutputInParallel(benchmark::State &State) {
const int BytesPerFile = 40 * 1024; // ~40KB per file
for (auto _ : State) {
- // Pause timing while we set up temporary directories
- State.PauseTiming();
-
- // Create temporary directories for each thread
- std::vector<std::unique_ptr<llvm::unittest::TempDir>> TempDirs;
- for (int i = 0; i < NumThreads; ++i) {
- TempDirs.push_back(std::make_unique<llvm::unittest::TempDir>(
- "writeToOutputBM", /*Unique*/ true));
+ // Create one top-level unique directory
+ SmallString<128> TopLevelDir;
+ if (sys::fs::createUniqueDirectory("writeToOutputBM", TopLevelDir)) {
+ State.SkipWithError("Failed to create temporary directory");
+ return;
+ }
+ auto Cleanup =
+ llvm::scope_exit([&]() { sys::fs::remove_directories(TopLevelDir); });
+
+ // Create subdirectories for each thread within the top-level directory
+ std::vector<SmallString<128>> ThreadDirs;
+ for (int I = 0; I < NumThreads; ++I) {
+ SmallString<128> ThreadDir(TopLevelDir);
+ sys::path::append(ThreadDir, "thread_" + std::to_string(I));
+ if (sys::fs::create_directory(ThreadDir)) {
+ State.SkipWithError("Failed to create thread directory");
+ return;
+ }
+ ThreadDirs.push_back(ThreadDir);
}
-
- State.ResumeTiming();
// Launch threads, each writing multiple files
std::vector<std::thread> Threads;
- for (int threadIdx = 0; threadIdx < NumThreads; ++threadIdx) {
- Threads.emplace_back([&, threadIdx]() {
- const auto &TempDir = TempDirs[threadIdx];
- for (int fileIdx = 0; fileIdx < FilesPerThread; ++fileIdx) {
- SmallString<128> Path(TempDir->path());
- sys::path::append(Path, "file_" + std::to_string(fileIdx) + ".bin");
+ for (int ThreadIdx = 0; ThreadIdx < NumThreads; ++ThreadIdx) {
+ Threads.emplace_back([&, ThreadIdx]() {
+ const auto &ThreadDir = ThreadDirs[ThreadIdx];
+ for (int FileIdx = 0; FileIdx < FilesPerThread; ++FileIdx) {
+ SmallString<128> Path(ThreadDir);
+ sys::path::append(Path, "file_" + std::to_string(FileIdx) + ".bin");
// Ignore errors in benchmark - we're measuring performance, not
// correctness
- (void)writeToOutput(Path, [BytesPerFile](raw_ostream &Out) -> Error {
- // Write 32-bit integers up to BytesPerFile
- const int NumInts = BytesPerFile / sizeof(int32_t);
- for (int32_t i = 0; i < NumInts; ++i) {
- Out.write(reinterpret_cast<const char *>(&i), sizeof(i));
- }
- return Error::success();
- });
+ Error E = writeToOutput(Path, [=](raw_ostream &Out) -> Error {
+ // Write 32-bit integers up to BytesPerFile
+ const int NumInts = BytesPerFile / sizeof(int32_t);
+ for (int32_t I = 0; I < NumInts; ++I) {
+ Out.write(reinterpret_cast<const char *>(&I), sizeof(I));
+ }
+ return Error::success();
+ });
+ if (E) {
+ State.SkipWithError("Failed to create outputfile " +
+ Path.str().str());
+ return;
+ }
}
});
}
@@ -69,7 +82,7 @@ static void BM_WriteToOutputInParallel(benchmark::State &State) {
Thread.join();
}
- // Cleanup happens automatically when TempDirs goes out of scope
+ // Cleanup happens automatically via scope_exit
}
// Report throughput metrics
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 6c0e20728e587..56ad4fc504153 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -104,8 +104,6 @@ class FileToRemoveList {
std::atomic<char *> Filename = nullptr;
std::atomic<FileToRemoveList *> Next = nullptr;
- static std::atomic<FileToRemoveList *> NodesToRecycle;
-
FileToRemoveList() = default;
// Not signal-safe.
FileToRemoveList(const std::string &str) : Filename(strdup(str.c_str())) {}
@@ -119,36 +117,11 @@ public:
free(F);
}
- /// Allocate a new node or pull one off the free list. We recycle nodes to
- /// avoid deallocating them, which can race with signals delivered to other
- /// threads.
- static FileToRemoveList *allocateNode(const std::string &Filename) {
- // Try to get a node from the recycle list first.
- FileToRemoveList *NewHead = nullptr;
- FileToRemoveList *RecycleHead = NodesToRecycle.load();
- while (RecycleHead) {
- FileToRemoveList *Next = RecycleHead->Next.load();
- if (NodesToRecycle.compare_exchange_weak(RecycleHead, Next)) {
- // Successfully took a node from the recycle list.
- NewHead = RecycleHead;
- NewHead->Filename.store(strdup(Filename.c_str()));
- NewHead->Next.store(nullptr);
- break;
- }
- // CAS failed, RecycleHead was updated by compare_exchange_weak, retry.
- }
-
- // If no recycled node was available, allocate a new one.
- if (!NewHead)
- NewHead = new FileToRemoveList(Filename);
- return NewHead;
- }
-
// Not signal-safe.
static void insert(std::atomic<FileToRemoveList *> &Head,
const std::string &Filename) {
// Insert the new file at the end of the list.
- FileToRemoveList *NewHead = allocateNode(Filename);
+ FileToRemoveList *NewHead = new FileToRemoveList(Filename);
std::atomic<FileToRemoveList *> *InsertionPoint = &Head;
FileToRemoveList *OldHead = nullptr;
while (!InsertionPoint->compare_exchange_strong(OldHead, NewHead)) {
@@ -165,44 +138,19 @@ public:
static ManagedStatic<sys::SmartMutex<true>> Lock;
sys::SmartScopedLock<true> Writer(*Lock);
- FileToRemoveList *Current = nullptr;
- std::atomic<FileToRemoveList *> *PrevNext = &Head;
- for (Current = Head.load(); Current; Current = Current->Next.load()) {
+ for (FileToRemoveList *Current = Head.load(); Current;
+ Current = Current->Next.load()) {
if (char *OldFilename = Current->Filename.load()) {
- if (OldFilename == Filename) {
- break;
- }
+ if (OldFilename != Filename)
+ continue;
+ // Leave an empty filename.
+ OldFilename = Current->Filename.exchange(nullptr);
+ // The filename might have become null between the time we
+ // compared it and we exchanged it.
+ if (OldFilename)
+ free(OldFilename);
}
- PrevNext = &Current->Next;
}
- if (Current == nullptr)
- return; // No matching file.
-
- // Found a match. First, unlink the node from the list by swapping
- // the previous next pointer with the current node's next pointer.
- FileToRemoveList *CurrentNext = Current->Next.load();
- PrevNext->store(CurrentNext);
-
- // Leave an empty filename.
- char *OldFilename = Current->Filename.exchange(nullptr);
- // The filename might have become null between the time we
- // compared it and we exchanged it.
- if (OldFilename)
- free(OldFilename);
-
- // Ideally, we'd deallocate the node at this point, but we have no way to
- // synchronize with pending signals. Any other thread in the process
- // could have taken a signal and captured the address of the node in a
- // register and be in the process of removing files, and we have no way
- // to safely wait until that is done. To avoid the problem altogether,
- // recycle the nodes. In the rare case that a signal handler races with
- // us and chases these links, it proceed down the recycled nodes list and
- // will leak some temporary files, which is better than crashing.
- // Add the node to the recycle list using a CAS loop.
- FileToRemoveList *RecycleHead = NodesToRecycle.load();
- do {
- Current->Next.store(RecycleHead);
- } while (!NodesToRecycle.compare_exchange_weak(RecycleHead, Current));
}
static void removeFile(char *path) {
@@ -247,11 +195,8 @@ public:
Head.exchange(OldHead);
}
};
-
static std::atomic<FileToRemoveList *> FilesToRemove = nullptr;
-std::atomic<FileToRemoveList *> FileToRemoveList::NodesToRecycle = nullptr;
-
/// Clean up the list in a signal-friendly manner.
/// Recall that signals can fire during llvm_shutdown. If this occurs we should
/// either clean something up or nothing at all, but we shouldn't crash!
>From 90e3c2f03aaad8b955a32357127236de4d10e8bd Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rkleckner at nvidia.com>
Date: Thu, 22 Jan 2026 00:21:04 +0000
Subject: [PATCH 06/11] sort includes and adjust comments
---
llvm/benchmarks/writeToOutputInParallel.cpp | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/llvm/benchmarks/writeToOutputInParallel.cpp b/llvm/benchmarks/writeToOutputInParallel.cpp
index ee9011826f6ad..3d84bf947eebd 100644
--- a/llvm/benchmarks/writeToOutputInParallel.cpp
+++ b/llvm/benchmarks/writeToOutputInParallel.cpp
@@ -7,11 +7,11 @@
//===----------------------------------------------------------------------===//
#include "benchmark/benchmark.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
-#include "llvm/ADT/ScopeExit.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <thread>
@@ -19,9 +19,10 @@
using namespace llvm;
-// Benchmark parallel file writing using writeToOutput.
-// This simulates scenarios where multiple threads are writing files
-// concurrently, which is common in parallel compilation scenarios.
+// Benchmark parallel file writing using writeToOutput. This simulates scenarios
+// where multiple threads are writing files concurrently, which is common in
+// parallel compilation scenarios. The goal of this benchmark is to ensure that
+// LLVM's global signal handling state updates aren't too expensive.
static void BM_WriteToOutputInParallel(benchmark::State &State) {
const int NumThreads = State.range(0);
const int FilesPerThread = State.range(1);
@@ -58,8 +59,6 @@ static void BM_WriteToOutputInParallel(benchmark::State &State) {
SmallString<128> Path(ThreadDir);
sys::path::append(Path, "file_" + std::to_string(FileIdx) + ".bin");
- // Ignore errors in benchmark - we're measuring performance, not
- // correctness
Error E = writeToOutput(Path, [=](raw_ostream &Out) -> Error {
// Write 32-bit integers up to BytesPerFile
const int NumInts = BytesPerFile / sizeof(int32_t);
>From efbc631c7587bcf5740884c06f47758aeb92ed7e Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rkleckner at nvidia.com>
Date: Thu, 22 Jan 2026 00:21:35 +0000
Subject: [PATCH 07/11] Remove stray include
---
llvm/unittests/Support/raw_ostream_test.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp
index e177f63bbd223..ed04721816476 100644
--- a/llvm/unittests/Support/raw_ostream_test.cpp
+++ b/llvm/unittests/Support/raw_ostream_test.cpp
@@ -17,7 +17,6 @@
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
#include <optional>
-#include <thread>
using namespace llvm;
>From d76d467a5aba1b859f13d058070a54328b4b7319 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rkleckner at nvidia.com>
Date: Thu, 22 Jan 2026 00:22:55 +0000
Subject: [PATCH 08/11] Bring back the actual code change that got lost when
moving the benchmark
---
llvm/lib/Support/Unix/Signals.inc | 77 ++++++++++++++++++++++++++-----
1 file changed, 66 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 56ad4fc504153..6c0e20728e587 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -104,6 +104,8 @@ class FileToRemoveList {
std::atomic<char *> Filename = nullptr;
std::atomic<FileToRemoveList *> Next = nullptr;
+ static std::atomic<FileToRemoveList *> NodesToRecycle;
+
FileToRemoveList() = default;
// Not signal-safe.
FileToRemoveList(const std::string &str) : Filename(strdup(str.c_str())) {}
@@ -117,11 +119,36 @@ public:
free(F);
}
+ /// Allocate a new node or pull one off the free list. We recycle nodes to
+ /// avoid deallocating them, which can race with signals delivered to other
+ /// threads.
+ static FileToRemoveList *allocateNode(const std::string &Filename) {
+ // Try to get a node from the recycle list first.
+ FileToRemoveList *NewHead = nullptr;
+ FileToRemoveList *RecycleHead = NodesToRecycle.load();
+ while (RecycleHead) {
+ FileToRemoveList *Next = RecycleHead->Next.load();
+ if (NodesToRecycle.compare_exchange_weak(RecycleHead, Next)) {
+ // Successfully took a node from the recycle list.
+ NewHead = RecycleHead;
+ NewHead->Filename.store(strdup(Filename.c_str()));
+ NewHead->Next.store(nullptr);
+ break;
+ }
+ // CAS failed, RecycleHead was updated by compare_exchange_weak, retry.
+ }
+
+ // If no recycled node was available, allocate a new one.
+ if (!NewHead)
+ NewHead = new FileToRemoveList(Filename);
+ return NewHead;
+ }
+
// Not signal-safe.
static void insert(std::atomic<FileToRemoveList *> &Head,
const std::string &Filename) {
// Insert the new file at the end of the list.
- FileToRemoveList *NewHead = new FileToRemoveList(Filename);
+ FileToRemoveList *NewHead = allocateNode(Filename);
std::atomic<FileToRemoveList *> *InsertionPoint = &Head;
FileToRemoveList *OldHead = nullptr;
while (!InsertionPoint->compare_exchange_strong(OldHead, NewHead)) {
@@ -138,19 +165,44 @@ public:
static ManagedStatic<sys::SmartMutex<true>> Lock;
sys::SmartScopedLock<true> Writer(*Lock);
- for (FileToRemoveList *Current = Head.load(); Current;
- Current = Current->Next.load()) {
+ FileToRemoveList *Current = nullptr;
+ std::atomic<FileToRemoveList *> *PrevNext = &Head;
+ for (Current = Head.load(); Current; Current = Current->Next.load()) {
if (char *OldFilename = Current->Filename.load()) {
- if (OldFilename != Filename)
- continue;
- // Leave an empty filename.
- OldFilename = Current->Filename.exchange(nullptr);
- // The filename might have become null between the time we
- // compared it and we exchanged it.
- if (OldFilename)
- free(OldFilename);
+ if (OldFilename == Filename) {
+ break;
+ }
}
+ PrevNext = &Current->Next;
}
+ if (Current == nullptr)
+ return; // No matching file.
+
+ // Found a match. First, unlink the node from the list by swapping
+ // the previous next pointer with the current node's next pointer.
+ FileToRemoveList *CurrentNext = Current->Next.load();
+ PrevNext->store(CurrentNext);
+
+ // Leave an empty filename.
+ char *OldFilename = Current->Filename.exchange(nullptr);
+ // The filename might have become null between the time we
+ // compared it and we exchanged it.
+ if (OldFilename)
+ free(OldFilename);
+
+ // Ideally, we'd deallocate the node at this point, but we have no way to
+ // synchronize with pending signals. Any other thread in the process
+ // could have taken a signal and captured the address of the node in a
+ // register and be in the process of removing files, and we have no way
+ // to safely wait until that is done. To avoid the problem altogether,
+ // recycle the nodes. In the rare case that a signal handler races with
+ // us and chases these links, it proceed down the recycled nodes list and
+ // will leak some temporary files, which is better than crashing.
+ // Add the node to the recycle list using a CAS loop.
+ FileToRemoveList *RecycleHead = NodesToRecycle.load();
+ do {
+ Current->Next.store(RecycleHead);
+ } while (!NodesToRecycle.compare_exchange_weak(RecycleHead, Current));
}
static void removeFile(char *path) {
@@ -195,8 +247,11 @@ public:
Head.exchange(OldHead);
}
};
+
static std::atomic<FileToRemoveList *> FilesToRemove = nullptr;
+std::atomic<FileToRemoveList *> FileToRemoveList::NodesToRecycle = nullptr;
+
/// Clean up the list in a signal-friendly manner.
/// Recall that signals can fire during llvm_shutdown. If this occurs we should
/// either clean something up or nothing at all, but we shouldn't crash!
>From 2afa5504ddcb4322ef70f8869c38258bb3818492 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rkleckner at nvidia.com>
Date: Thu, 22 Jan 2026 00:45:36 +0000
Subject: [PATCH 09/11] fix formatting
---
llvm/benchmarks/writeToOutputInParallel.cpp | 28 ++++++++++-----------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/llvm/benchmarks/writeToOutputInParallel.cpp b/llvm/benchmarks/writeToOutputInParallel.cpp
index 3d84bf947eebd..c2b29f07a3eee 100644
--- a/llvm/benchmarks/writeToOutputInParallel.cpp
+++ b/llvm/benchmarks/writeToOutputInParallel.cpp
@@ -60,13 +60,13 @@ static void BM_WriteToOutputInParallel(benchmark::State &State) {
sys::path::append(Path, "file_" + std::to_string(FileIdx) + ".bin");
Error E = writeToOutput(Path, [=](raw_ostream &Out) -> Error {
- // Write 32-bit integers up to BytesPerFile
- const int NumInts = BytesPerFile / sizeof(int32_t);
- for (int32_t I = 0; I < NumInts; ++I) {
- Out.write(reinterpret_cast<const char *>(&I), sizeof(I));
- }
- return Error::success();
- });
+ // Write 32-bit integers up to BytesPerFile
+ const int NumInts = BytesPerFile / sizeof(int32_t);
+ for (int32_t I = 0; I < NumInts; ++I) {
+ Out.write(reinterpret_cast<const char *>(&I), sizeof(I));
+ }
+ return Error::success();
+ });
if (E) {
State.SkipWithError("Failed to create outputfile " +
Path.str().str());
@@ -87,7 +87,7 @@ static void BM_WriteToOutputInParallel(benchmark::State &State) {
// Report throughput metrics
const int64_t TotalFiles = NumThreads * FilesPerThread;
const int64_t TotalBytes = TotalFiles * BytesPerFile;
-
+
State.SetItemsProcessed(State.iterations() * TotalFiles);
State.SetBytesProcessed(State.iterations() * TotalBytes);
State.counters["threads"] = NumThreads;
@@ -102,12 +102,12 @@ static void BM_WriteToOutputInParallel(benchmark::State &State) {
// - Balanced scenarios
BENCHMARK(BM_WriteToOutputInParallel)
- ->Args({1, 1000}) // 1 thread, 1000 files
- ->Args({2, 500}) // 2 threads, 500 files each
- ->Args({4, 250}) // 4 threads, 250 files each
- ->Args({8, 125}) // 8 threads, 125 files each
- ->Args({10, 100}) // 10 threads, 100 files each
- ->Args({10, 1000}) // 10 threads, 1000 files each (stress test)
+ ->Args({1, 1000}) // 1 thread, 1000 files
+ ->Args({2, 500}) // 2 threads, 500 files each
+ ->Args({4, 250}) // 4 threads, 250 files each
+ ->Args({8, 125}) // 8 threads, 125 files each
+ ->Args({10, 100}) // 10 threads, 100 files each
+ ->Args({10, 1000}) // 10 threads, 1000 files each (stress test)
->Unit(benchmark::kMillisecond);
BENCHMARK_MAIN();
>From 834bb37e7ad60fcc71b6f0426dc3f452ceee5790 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rkleckner at nvidia.com>
Date: Tue, 17 Feb 2026 17:18:35 -0800
Subject: [PATCH 10/11] Eliminate the recycle list. I believe we can use the
null filenames to track which nodes are in use
---
llvm/lib/Support/Unix/Signals.inc | 105 +++++++++---------------------
1 file changed, 32 insertions(+), 73 deletions(-)
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 6c0e20728e587..176bf491092a6 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -104,11 +104,9 @@ class FileToRemoveList {
std::atomic<char *> Filename = nullptr;
std::atomic<FileToRemoveList *> Next = nullptr;
- static std::atomic<FileToRemoveList *> NodesToRecycle;
-
FileToRemoveList() = default;
- // Not signal-safe.
- FileToRemoveList(const std::string &str) : Filename(strdup(str.c_str())) {}
+ // Takes ownership of \p filename.
+ FileToRemoveList(char *filename) : Filename(filename) {}
public:
// Not signal-safe.
@@ -119,41 +117,30 @@ public:
free(F);
}
- /// Allocate a new node or pull one off the free list. We recycle nodes to
- /// avoid deallocating them, which can race with signals delivered to other
- /// threads.
- static FileToRemoveList *allocateNode(const std::string &Filename) {
- // Try to get a node from the recycle list first.
- FileToRemoveList *NewHead = nullptr;
- FileToRemoveList *RecycleHead = NodesToRecycle.load();
- while (RecycleHead) {
- FileToRemoveList *Next = RecycleHead->Next.load();
- if (NodesToRecycle.compare_exchange_weak(RecycleHead, Next)) {
- // Successfully took a node from the recycle list.
- NewHead = RecycleHead;
- NewHead->Filename.store(strdup(Filename.c_str()));
- NewHead->Next.store(nullptr);
- break;
- }
- // CAS failed, RecycleHead was updated by compare_exchange_weak, retry.
- }
-
- // If no recycled node was available, allocate a new one.
- if (!NewHead)
- NewHead = new FileToRemoveList(Filename);
- return NewHead;
- }
-
// Not signal-safe.
static void insert(std::atomic<FileToRemoveList *> &Head,
const std::string &Filename) {
- // Insert the new file at the end of the list.
- FileToRemoveList *NewHead = allocateNode(Filename);
+ // Reuse a node with a null filename (left behind by erase) if one exists.
+ // There are two cases where Filename can be null:
+ // - A node left behind by a previous file that we had to remove
+ // - A node whose file is actively being removed by a signal handler right
+ // now.
+ char *NewFilename = strdup(Filename.c_str());
std::atomic<FileToRemoveList *> *InsertionPoint = &Head;
- FileToRemoveList *OldHead = nullptr;
- while (!InsertionPoint->compare_exchange_strong(OldHead, NewHead)) {
- InsertionPoint = &OldHead->Next;
- OldHead = nullptr;
+ FileToRemoveList *Current = Head.load();
+ for (; Current; Current = Current->Next.load()) {
+ char *Expected = nullptr;
+ if (Current->Filename.compare_exchange_strong(Expected, NewFilename))
+ return; // Reused a slot.
+ InsertionPoint = &Current->Next;
+ }
+
+ // Append the new node at the end; on CAS failure, advance to the new tail.
+ FileToRemoveList *NewNode = new FileToRemoveList(NewFilename);
+ FileToRemoveList *OldNext = nullptr;
+ while (!InsertionPoint->compare_exchange_strong(OldNext, NewNode)) {
+ InsertionPoint = &OldNext->Next;
+ OldNext = nullptr;
}
}
@@ -165,44 +152,19 @@ public:
static ManagedStatic<sys::SmartMutex<true>> Lock;
sys::SmartScopedLock<true> Writer(*Lock);
- FileToRemoveList *Current = nullptr;
- std::atomic<FileToRemoveList *> *PrevNext = &Head;
- for (Current = Head.load(); Current; Current = Current->Next.load()) {
+ for (FileToRemoveList *Current = Head.load(); Current;
+ Current = Current->Next.load()) {
if (char *OldFilename = Current->Filename.load()) {
- if (OldFilename == Filename) {
- break;
- }
+ if (OldFilename != Filename)
+ continue;
+ // Leave an empty filename.
+ OldFilename = Current->Filename.exchange(nullptr);
+ // The filename might have become null between the time we
+ // compared it and we exchanged it.
+ if (OldFilename)
+ free(OldFilename);
}
- PrevNext = &Current->Next;
}
- if (Current == nullptr)
- return; // No matching file.
-
- // Found a match. First, unlink the node from the list by swapping
- // the previous next pointer with the current node's next pointer.
- FileToRemoveList *CurrentNext = Current->Next.load();
- PrevNext->store(CurrentNext);
-
- // Leave an empty filename.
- char *OldFilename = Current->Filename.exchange(nullptr);
- // The filename might have become null between the time we
- // compared it and we exchanged it.
- if (OldFilename)
- free(OldFilename);
-
- // Ideally, we'd deallocate the node at this point, but we have no way to
- // synchronize with pending signals. Any other thread in the process
- // could have taken a signal and captured the address of the node in a
- // register and be in the process of removing files, and we have no way
- // to safely wait until that is done. To avoid the problem altogether,
- // recycle the nodes. In the rare case that a signal handler races with
- // us and chases these links, it proceed down the recycled nodes list and
- // will leak some temporary files, which is better than crashing.
- // Add the node to the recycle list using a CAS loop.
- FileToRemoveList *RecycleHead = NodesToRecycle.load();
- do {
- Current->Next.store(RecycleHead);
- } while (!NodesToRecycle.compare_exchange_weak(RecycleHead, Current));
}
static void removeFile(char *path) {
@@ -247,11 +209,8 @@ public:
Head.exchange(OldHead);
}
};
-
static std::atomic<FileToRemoveList *> FilesToRemove = nullptr;
-std::atomic<FileToRemoveList *> FileToRemoveList::NodesToRecycle = nullptr;
-
/// Clean up the list in a signal-friendly manner.
/// Recall that signals can fire during llvm_shutdown. If this occurs we should
/// either clean something up or nothing at all, but we shouldn't crash!
>From 0244b34c06a6b63efacaea69b254fdc5c88df0bd Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rkleckner at nvidia.com>
Date: Wed, 18 Feb 2026 12:37:34 -0800
Subject: [PATCH 11/11] Tweak some names, comments, and simplify
---
llvm/lib/Support/Unix/Signals.inc | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 176bf491092a6..4d1e9be384e07 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -124,13 +124,13 @@ public:
// There are two cases where Filename can be null:
// - A node left behind by a previous file that we had to remove
// - A node whose file is actively being removed by a signal handler right
- // now.
+ // now, in which case, it's OK if this file doesn't get removed.
char *NewFilename = strdup(Filename.c_str());
std::atomic<FileToRemoveList *> *InsertionPoint = &Head;
- FileToRemoveList *Current = Head.load();
- for (; Current; Current = Current->Next.load()) {
- char *Expected = nullptr;
- if (Current->Filename.compare_exchange_strong(Expected, NewFilename))
+ for (FileToRemoveList *Current = Head.load(); Current;
+ Current = Current->Next.load()) {
+ char *NullFilename = nullptr;
+ if (Current->Filename.compare_exchange_strong(NullFilename, NewFilename))
return; // Reused a slot.
InsertionPoint = &Current->Next;
}
More information about the llvm-commits
mailing list