[llvm] [Support] mmap when possible in getSTDIN. (PR #162013)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 6 01:32:48 PDT 2025


https://github.com/aokblast updated https://github.com/llvm/llvm-project/pull/162013

>From a9d7f6c71d7dd766444b0c3b9ec20c1f64d7468e Mon Sep 17 00:00:00 2001
From: ShengYi Hung <aokblast at FreeBSD.org>
Date: Sun, 5 Oct 2025 21:20:07 +0800
Subject: [PATCH] [Support] mmap when possible in getSTDIN.

When stdin is executed like ./prog < file, we are possible to mmap the
buffer so that we can reduce the total memory usage if we try to open a
large file.

For example, programs like llvm-strings iterates the buffer until the
end of given size.
---
 llvm/include/llvm/Support/MemoryBuffer.h |  4 +--
 llvm/lib/Support/MemoryBuffer.cpp        | 31 ++++++++++++++++++++----
 llvm/tools/llvm-strings/llvm-strings.cpp |  3 ++-
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Support/MemoryBuffer.h b/llvm/include/llvm/Support/MemoryBuffer.h
index f092c67265a31..365aad6dd4243 100644
--- a/llvm/include/llvm/Support/MemoryBuffer.h
+++ b/llvm/include/llvm/Support/MemoryBuffer.h
@@ -75,7 +75,6 @@ class LLVM_ABI MemoryBuffer {
   /// Return an identifier for this buffer, typically the filename it was read
   /// from.
   virtual StringRef getBufferIdentifier() const { return "Unknown buffer"; }
-
   /// For read-only MemoryBuffer_MMap, mark the buffer as unused in the near
   /// future and the kernel can free resources associated with it. Further
   /// access is supported but may be expensive. This calls
@@ -143,7 +142,8 @@ class LLVM_ABI MemoryBuffer {
   getMemBufferCopy(StringRef InputData, const Twine &BufferName = "");
 
   /// Read all of stdin into a file buffer, and return it.
-  static ErrorOr<std::unique_ptr<MemoryBuffer>> getSTDIN();
+  static ErrorOr<std::unique_ptr<MemoryBuffer>>
+  getSTDIN(bool RequiresNullTerminatorRequires = true);
 
   /// Open the specified file as a MemoryBuffer, or open stdin if the Filename
   /// is "-".
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 1c4645ad83641..f65d58fffdd6c 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -165,7 +165,7 @@ MemoryBuffer::getFileOrSTDIN(const Twine &Filename, bool IsText,
   StringRef NameRef = Filename.toStringRef(NameBuf);
 
   if (NameRef == "-")
-    return getSTDIN();
+    return getSTDIN(RequiresNullTerminator);
   return getFile(Filename, IsText, RequiresNullTerminator,
                  /*IsVolatile=*/false, Alignment);
 }
@@ -567,12 +567,33 @@ ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getOpenFileSlice(
                                        IsVolatile, Alignment);
 }
 
-ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+MemoryBuffer::getSTDIN(bool RequiresNullTerminator) {
   // Read in all of the data from stdin, we cannot mmap stdin.
-  //
-  // FIXME: That isn't necessarily true, we should try to mmap stdin and
-  // fallback if it fails.
   sys::ChangeStdinMode(sys::fs::OF_Text);
+  std::error_code EC;
+  sys::fs::file_type Type;
+  sys::fs::file_status Status;
+  EC = sys::fs::status(sys::fs::getStdinHandle(), Status);
+  if (EC)
+    return EC;
+
+  Type = Status.type();
+  // If the FD is regular file or block file,
+  // we try to create a mmap buffer first.
+  // If failed, rollback to read and copy.
+  if ((Type == sys::fs::file_type::regular_file ||
+       Type == sys::fs::file_type::block_file) &&
+      shouldUseMmap(sys::fs::getStdinHandle(), Status.getSize(),
+                    Status.getSize(), 0, RequiresNullTerminator,
+                    sys::Process::getPageSizeEstimate(), false)) {
+    std::unique_ptr<MemoryBuffer> Result(
+        new (NamedBufferAlloc("<stdin>")) MemoryBufferMMapFile<MemoryBuffer>(
+            RequiresNullTerminator, sys::fs::getStdinHandle(), Status.getSize(),
+            0, EC));
+    if (!EC && (!RequiresNullTerminator || *Result->getBufferEnd() == '\0'))
+      return std::move(Result);
+  }
 
   return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>");
 }
diff --git a/llvm/tools/llvm-strings/llvm-strings.cpp b/llvm/tools/llvm-strings/llvm-strings.cpp
index 9979b93de8427..bfa5849da4740 100644
--- a/llvm/tools/llvm-strings/llvm-strings.cpp
+++ b/llvm/tools/llvm-strings/llvm-strings.cpp
@@ -175,7 +175,8 @@ int main(int argc, char **argv) {
 
   for (const auto &File : InputFileNames) {
     ErrorOr<std::unique_ptr<MemoryBuffer>> Buffer =
-        MemoryBuffer::getFileOrSTDIN(File, /*IsText=*/true);
+        MemoryBuffer::getFileOrSTDIN(File, /*IsText=*/true,
+                                     /*RequiresNullTerminator*/ false);
     if (std::error_code EC = Buffer.getError())
       errs() << File << ": " << EC.message() << '\n';
     else



More information about the llvm-commits mailing list