[libc-commits] [libc] 4ef4f90 - [libc] Fix partial multi-byte write detection in File (#196402)
via libc-commits
libc-commits at lists.llvm.org
Sun May 10 23:34:34 PDT 2026
Author: Jeff Bailey
Date: 2026-05-11T07:34:30+01:00
New Revision: 4ef4f900f0f42678f24bbcba79549217ace966ca
URL: https://github.com/llvm/llvm-project/commit/4ef4f900f0f42678f24bbcba79549217ace966ca
DIFF: https://github.com/llvm/llvm-project/commit/4ef4f900f0f42678f24bbcba79549217ace966ca.diff
LOG: [libc] Fix partial multi-byte write detection in File (#196402)
File::write_unlocked(const wchar_t*, size_t) checked 'write_res.value <
1' after writing a converted UTF-8 sequence. For multi-byte characters,
a short platform write (e.g. 2 of 3 bytes for a 3-byte character) passed
this check and was counted as a successful write. The output stream
would then contain an incomplete UTF-8 sequence with no error reported
to the caller.
Changed the check to 'write_res.value < char_size' and set the error
indicator on the stream when it triggers.
Added a regression test using a mock File subclass that limits
platform_write to 2 bytes per call, simulating short writes on pipes and
sockets.
Assisted-by: Automated tooling, human reviewed.
---------
Co-authored-by: Michael Jones <michaelrj at google.com>
Added:
Modified:
libc/src/__support/File/file.cpp
libc/test/src/__support/File/file_test.cpp
Removed:
################################################################################
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 1499ab56578a9..7c1bc103ca7aa 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -590,8 +590,10 @@ FileIOResult File::write_unlocked(const wchar_t *ws, size_t len) {
auto write_res = write_unlocked_impl(buffer, char_size);
if (write_res.has_error())
return {written, write_res.error};
- if (write_res.value < 1)
- return {written, 0};
+ if (write_res.value < char_size) {
+ err = true;
+ return {written, EIO};
+ }
++written;
}
return {written, 0};
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index f3eb8634ce0be..6c7855225a52a 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -772,3 +772,89 @@ TEST(LlvmLibcFileTest, UngetwcWEOF) {
ASSERT_EQ(f->close(), 0);
}
+
+// A File subclass with a platform_write that simulates short writes.
+// This models the behavior of write(2) on pipes, sockets, or FIFOs where
+// the kernel may write fewer bytes than requested.
+class ShortWriteFile : public File {
+ static constexpr size_t SIZE = 512;
+ size_t pos;
+ char str[SIZE] = {0};
+ size_t max_write;
+
+ static FileIOResult short_write(LIBC_NAMESPACE::File *f, const void *data,
+ size_t len) {
+ ShortWriteFile *sf = static_cast<ShortWriteFile *>(f);
+ // Simulate a short write: write at most max_write bytes per call.
+ size_t to_write = len < sf->max_write ? len : sf->max_write;
+ for (size_t i = 0; i < to_write && sf->pos < SIZE; ++i, ++sf->pos)
+ sf->str[sf->pos] = reinterpret_cast<const char *>(data)[i];
+ return to_write;
+ }
+
+ static FileIOResult short_read(LIBC_NAMESPACE::File *f, void *data,
+ size_t len) {
+ ShortWriteFile *sf = static_cast<ShortWriteFile *>(f);
+ size_t i = 0;
+ for (i = 0; i < len && sf->pos < SIZE; ++i)
+ reinterpret_cast<char *>(data)[i] = sf->str[sf->pos + i];
+ sf->pos += i;
+ return i;
+ }
+
+ static ErrorOr<off_t> short_seek(LIBC_NAMESPACE::File *f, off_t offset,
+ int whence) {
+ ShortWriteFile *sf = static_cast<ShortWriteFile *>(f);
+ if (whence == SEEK_SET)
+ sf->pos = offset;
+ if (whence == SEEK_CUR)
+ sf->pos += offset;
+ if (whence == SEEK_END)
+ sf->pos = SIZE + offset;
+ return sf->pos;
+ }
+
+ static int short_close(LIBC_NAMESPACE::File *f) {
+ delete reinterpret_cast<ShortWriteFile *>(f);
+ return 0;
+ }
+
+public:
+ explicit ShortWriteFile(char *buffer, size_t buflen, int bufmode, bool owned,
+ ModeFlags modeflags, size_t max_write_bytes)
+ : LIBC_NAMESPACE::File(&short_write, &short_read, &short_seek,
+ &short_close, reinterpret_cast<uint8_t *>(buffer),
+ buflen, bufmode, owned, modeflags),
+ pos(0), max_write(max_write_bytes) {}
+
+ void reset() { pos = 0; }
+ size_t get_pos() const { return pos; }
+ char *get_str() { return str; }
+};
+
+// Verify that a short platform_write of a multi-byte UTF-8 character is
+// detected and reported as a failure. POSIX write(2) may perform short
+// writes on pipes, sockets, and FIFOs, so a 3-byte character could have
+// only 2 bytes accepted by the kernel.
+TEST(LlvmLibcFileTest, PartialWideCharWriteDetected) {
+ LIBC_NAMESPACE::AllocChecker ac;
+ // Unbuffered so writes go directly to platform_write, limited to 2 bytes.
+ ShortWriteFile *f = new (ac) ShortWriteFile(
+ nullptr, 0, _IONBF, true, LIBC_NAMESPACE::File::mode_flags("w"),
+ /*max_write_bytes=*/2);
+ ASSERT_FALSE(f == nullptr);
+
+ // € (U+20AC) encodes to 3 UTF-8 bytes: 0xE2 0x82 0xAC.
+ // With max_write=2, only 2 of the 3 bytes will be accepted.
+ const wchar_t euro = L'€';
+ auto result = f->write(&euro, 1);
+
+ // The incomplete character must not be counted as written.
+ EXPECT_TRUE(result.has_error());
+ EXPECT_EQ(result.value, size_t(0));
+
+ // The error indicator on the stream should be set.
+ EXPECT_TRUE(f->error());
+
+ ASSERT_EQ(f->close(), 0);
+}
More information about the libc-commits
mailing list