[libc-commits] [libc] [libc] Fix partial multi-byte write detection in File (PR #196402)

Jeff Bailey via libc-commits libc-commits at lists.llvm.org
Fri May 8 05:05:10 PDT 2026


https://github.com/kaladron updated https://github.com/llvm/llvm-project/pull/196402

>From c6db22718f0feb34ef5f316b67d703d31c1c2820 Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Thu, 7 May 2026 20:19:42 +0100
Subject: [PATCH 1/3] [libc] Fix partial multi-byte write detection in File

File::write_unlocked(const wchar_t*, size_t) checked
'write_res.value < 1' after writing a converted UTF-8 sequence. For
multi-byte characters, a short platform write (e.g. 2 of 3 bytes for
a 3-byte character) passed this check and was counted as a successful
write. The output stream would then contain an incomplete UTF-8
sequence with no error reported to the caller.

Changed the check to 'write_res.value < char_size' and set the error
indicator on the stream when it triggers.

Added a regression test using a mock File subclass that limits
platform_write to 2 bytes per call, simulating short writes on
pipes and sockets.

Assisted-by: Automated tooling, human reviewed.
---
 libc/src/__support/File/file.cpp           |  4 +-
 libc/test/src/__support/File/file_test.cpp | 85 ++++++++++++++++++++++
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 31e6b7dab95a2..2e1d1eb458300 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -590,8 +590,10 @@ FileIOResult File::write_unlocked(const wchar_t *ws, size_t len) {
     auto write_res = write_unlocked_impl(buffer, char_size);
     if (write_res.has_error())
       return {written, write_res.error};
-    if (write_res.value < 1)
+    if (write_res.value < char_size) {
+      err = true;
       return {written, 0};
+    }
     ++written;
   }
   return {written, 0};
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index f3eb8634ce0be..62a0b63cb3f7a 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -772,3 +772,88 @@ TEST(LlvmLibcFileTest, UngetwcWEOF) {
 
   ASSERT_EQ(f->close(), 0);
 }
+
+// A File subclass with a platform_write that simulates short writes.
+// This models the behavior of write(2) on pipes, sockets, or FIFOs where
+// the kernel may write fewer bytes than requested.
+class ShortWriteFile : public File {
+  static constexpr size_t SIZE = 512;
+  size_t pos;
+  char str[SIZE] = {0};
+  size_t max_write;
+
+  static FileIOResult short_write(LIBC_NAMESPACE::File *f, const void *data,
+                                  size_t len) {
+    ShortWriteFile *sf = static_cast<ShortWriteFile *>(f);
+    // Simulate a short write: write at most max_write bytes per call.
+    size_t to_write = len < sf->max_write ? len : sf->max_write;
+    for (size_t i = 0; i < to_write && sf->pos < SIZE; ++i, ++sf->pos)
+      sf->str[sf->pos] = reinterpret_cast<const char *>(data)[i];
+    return to_write;
+  }
+
+  static FileIOResult short_read(LIBC_NAMESPACE::File *f, void *data,
+                                 size_t len) {
+    ShortWriteFile *sf = static_cast<ShortWriteFile *>(f);
+    size_t i = 0;
+    for (i = 0; i < len && sf->pos < SIZE; ++i)
+      reinterpret_cast<char *>(data)[i] = sf->str[sf->pos + i];
+    sf->pos += i;
+    return i;
+  }
+
+  static ErrorOr<off_t> short_seek(LIBC_NAMESPACE::File *f, off_t offset,
+                                   int whence) {
+    ShortWriteFile *sf = static_cast<ShortWriteFile *>(f);
+    if (whence == SEEK_SET)
+      sf->pos = offset;
+    if (whence == SEEK_CUR)
+      sf->pos += offset;
+    if (whence == SEEK_END)
+      sf->pos = SIZE + offset;
+    return sf->pos;
+  }
+
+  static int short_close(LIBC_NAMESPACE::File *f) {
+    delete reinterpret_cast<ShortWriteFile *>(f);
+    return 0;
+  }
+
+public:
+  explicit ShortWriteFile(char *buffer, size_t buflen, int bufmode, bool owned,
+                          ModeFlags modeflags, size_t max_write_bytes)
+      : LIBC_NAMESPACE::File(&short_write, &short_read, &short_seek,
+                             &short_close, reinterpret_cast<uint8_t *>(buffer),
+                             buflen, bufmode, owned, modeflags),
+        pos(0), max_write(max_write_bytes) {}
+
+  void reset() { pos = 0; }
+  size_t get_pos() const { return pos; }
+  char *get_str() { return str; }
+};
+
+// Verify that a short platform_write of a multi-byte UTF-8 character is
+// detected and reported as a failure. POSIX write(2) may perform short
+// writes on pipes, sockets, and FIFOs, so a 3-byte character could have
+// only 2 bytes accepted by the kernel.
+TEST(LlvmLibcFileTest, PartialWideCharWriteDetected) {
+  LIBC_NAMESPACE::AllocChecker ac;
+  // Unbuffered so writes go directly to platform_write, limited to 2 bytes.
+  ShortWriteFile *f = new (ac) ShortWriteFile(
+      nullptr, 0, _IONBF, true, LIBC_NAMESPACE::File::mode_flags("w"),
+      /*max_write_bytes=*/2);
+  ASSERT_FALSE(f == nullptr);
+
+  // € (U+20AC) encodes to 3 UTF-8 bytes: 0xE2 0x82 0xAC.
+  // With max_write=2, only 2 of the 3 bytes will be accepted.
+  const wchar_t euro = L'€';
+  auto result = f->write(&euro, 1);
+
+  // The incomplete character must not be counted as written.
+  EXPECT_TRUE(result.has_error() || result.value < 1);
+
+  // The error indicator on the stream should be set.
+  EXPECT_TRUE(f->error());
+
+  ASSERT_EQ(f->close(), 0);
+}

>From 8e586f66e9cb762bb157731a8b34d8f8592bdeda Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Fri, 8 May 2026 13:04:25 +0100
Subject: [PATCH 2/3] Update libc/src/__support/File/file.cpp

Co-authored-by: Michael Jones <michaelrj at google.com>
---
 libc/src/__support/File/file.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 2e1d1eb458300..7da0d9b1c8697 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -592,7 +592,7 @@ FileIOResult File::write_unlocked(const wchar_t *ws, size_t len) {
       return {written, write_res.error};
     if (write_res.value < char_size) {
       err = true;
-      return {written, 0};
+      return {written, EIO};
     }
     ++written;
   }

>From 5902b63dda033b5bfadd5a2234500270d0c51321 Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Fri, 8 May 2026 13:05:00 +0100
Subject: [PATCH 3/3] Update libc/test/src/__support/File/file_test.cpp

Co-authored-by: Michael Jones <michaelrj at google.com>
---
 libc/test/src/__support/File/file_test.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index 62a0b63cb3f7a..aceda13c7b0ba 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -850,7 +850,8 @@ TEST(LlvmLibcFileTest, PartialWideCharWriteDetected) {
   auto result = f->write(&euro, 1);
 
   // The incomplete character must not be counted as written.
-  EXPECT_TRUE(result.has_error() || result.value < 1);
+  EXPECT_TRUE(result.has_error());
+  EXPECT_LT(result.value, 1);
 
   // The error indicator on the stream should be set.
   EXPECT_TRUE(f->error());



More information about the libc-commits mailing list