[libc-commits] [libc] [libc] Add wchar support to File (PR #189504)

Michael Jones via libc-commits libc-commits at lists.llvm.org
Wed Apr 8 10:31:59 PDT 2026


https://github.com/michaelrj-google updated https://github.com/llvm/llvm-project/pull/189504

>From 93923fc69129f7b900651050d3e11e14d67f2d80 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Thu, 26 Mar 2026 23:27:06 +0000
Subject: [PATCH 1/9] [libc] Add wchar support to File struct

TODO: description
---
 libc/src/__support/File/CMakeLists.txt     |   3 +
 libc/src/__support/File/file.cpp           | 149 +++++++++++++++++++++
 libc/src/__support/File/file.h             |  33 ++++-
 libc/test/src/__support/File/file_test.cpp |  88 ++++++++++++
 4 files changed, 272 insertions(+), 1 deletion(-)

diff --git a/libc/src/__support/File/CMakeLists.txt b/libc/src/__support/File/CMakeLists.txt
index f5388ed8e5f34..3495bf3463d65 100644
--- a/libc/src/__support/File/CMakeLists.txt
+++ b/libc/src/__support/File/CMakeLists.txt
@@ -20,6 +20,9 @@ add_object_library(
     libc.src.__support.CPP.span
     libc.src.__support.threads.mutex
     libc.src.__support.error_or
+    libc.src.__support.wchar.mbrtowc
+    libc.src.__support.wchar.mbstate
+    libc.src.__support.wchar.wcrtomb
 )
 
 add_object_library(
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 7b38c543d748d..f24f478ce52ea 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -11,11 +11,14 @@
 #include "hdr/func/realloc.h"
 #include "hdr/stdio_macros.h"
 #include "hdr/types/off_t.h"
+#include "hdr/wchar_macros.h"
 #include "src/__support/CPP/new.h"
 #include "src/__support/CPP/span.h"
 #include "src/__support/alloc-checker.h"
 #include "src/__support/libc_errno.h" // For error macros
 #include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbrtowc.h"
+#include "src/__support/wchar/wcrtomb.h"
 #include "src/string/memory_utils/inline_memcpy.h"
 
 namespace LIBC_NAMESPACE_DECL {
@@ -53,6 +56,13 @@ void File::lock_list() { File::list_lock.lock(); }
 void File::unlock_list() { File::list_lock.unlock(); }
 
 FileIOResult File::write_unlocked(const void *data, size_t len) {
+  if (orientation == Orientation::WIDE) {
+    err = true;
+    return {0, EINVAL};
+  }
+  if (orientation == Orientation::UNORIENTED)
+    orientation = Orientation::BYTE;
+
   if (!write_allowed()) {
     err = true;
     return {0, EBADF};
@@ -214,6 +224,13 @@ FileIOResult File::write_unlocked_lbf(const uint8_t *data, size_t len) {
 }
 
 FileIOResult File::read_unlocked(void *data, size_t len) {
+  if (orientation == Orientation::WIDE) {
+    err = true;
+    return {0, EINVAL};
+  }
+  if (orientation == Orientation::UNORIENTED)
+    orientation = Orientation::BYTE;
+
   if (!read_allowed()) {
     err = true;
     return {0, EBADF};
@@ -315,6 +332,13 @@ FileIOResult File::read_unlocked_nbf(uint8_t *data, size_t len) {
 }
 
 int File::ungetc_unlocked(int c) {
+  if (orientation == Orientation::WIDE) {
+    err = true;
+    return EOF;
+  }
+  if (orientation == Orientation::UNORIENTED)
+    orientation = Orientation::BYTE;
+
   // There is no meaning to unget if:
   // 1. You are trying to push back EOF.
   // 2. Read operations are not allowed on this file.
@@ -509,4 +533,129 @@ File::ModeFlags File::mode_flags(const char *mode) {
   return flags;
 }
 
+FileIOResult File::write_wide_character_unlocked(wchar_t wc) {
+  if (orientation == Orientation::UNORIENTED)
+    orientation = Orientation::WIDE;
+  if (orientation != Orientation::WIDE) {
+    err = true;
+    return {0, EINVAL};
+  }
+
+  if (!write_allowed()) {
+    err = true;
+    return {0, EBADF};
+  }
+
+  prev_op = FileOp::WRITE;
+
+  char buf[4];
+  auto result = internal::wcrtomb(buf, wc, &shift_state);
+  if (!result.has_value()) {
+    err = true;
+    return {0, result.error()};
+  }
+
+  size_t n = result.value();
+  if (bufmode == _IONBF) {
+    size_t ret_val =
+        write_unlocked_nbf(reinterpret_cast<const uint8_t *>(buf), n);
+    flush_unlocked();
+    return ret_val;
+  } else if (bufmode == _IOFBF) {
+    return write_unlocked_fbf(reinterpret_cast<const uint8_t *>(buf), n);
+  } else {
+    return write_unlocked_lbf(reinterpret_cast<const uint8_t *>(buf), n);
+  }
+}
+
+ErrorOr<wchar_t> File::read_wide_character_unlocked() {
+  if (orientation == Orientation::UNORIENTED)
+    orientation = Orientation::WIDE;
+  if (orientation != Orientation::WIDE) {
+    err = true;
+    return Error(EINVAL);
+  }
+
+  if (!read_allowed()) {
+    err = true;
+    return Error(EBADF);
+  }
+
+  prev_op = FileOp::READ;
+
+  wchar_t wc;
+  bool first_byte = true;
+  while (true) {
+    uint8_t byte;
+    FileIOResult read_result{0};
+    if (bufmode == _IONBF) {
+      read_result = read_unlocked_nbf(&byte, 1);
+    } else {
+      read_result = read_unlocked_fbf(&byte, 1);
+    }
+    if (read_result.has_error()) {
+      err = true;
+      return Error(read_result.error);
+    }
+    if (read_result.value == 0) { // EOF
+      if (first_byte) {
+        return Error(0); // EOF
+      } else {
+        err = true;
+        return Error(EILSEQ); // Incomplete character at EOF
+      }
+    }
+    char c = static_cast<char>(byte);
+    auto res = internal::mbrtowc(&wc, &c, 1, &shift_state);
+    if (!res.has_value()) {
+      err = true;
+      return Error(res.error());
+    }
+    if (res.value() == 0) { // null terminator
+      return L'\0';
+    }
+    if (res.value() != static_cast<size_t>(-2)) { // Complete character
+      return wc;
+    }
+    first_byte = false;
+  }
+}
+
+wint_t File::ungetwc_unlocked(wchar_t wc) {
+  if (orientation == Orientation::UNORIENTED)
+    orientation = Orientation::WIDE;
+  if (orientation != Orientation::WIDE) {
+    err = true;
+    return WEOF;
+  }
+
+  char buf[4];
+  auto result = internal::wcrtomb(buf, wc, &shift_state);
+  if (!result.has_value()) {
+    err = true;
+    return WEOF;
+  }
+  size_t n = result.value();
+
+  if (read_limit == 0) {
+    for (size_t i = 0; i < n; ++i) {
+      this->buf[i] = static_cast<uint8_t>(buf[i]);
+    }
+    read_limit = n;
+    pos = 0;
+  } else {
+    if (pos < n) {
+      err = true;
+      return WEOF;
+    }
+    pos -= n;
+    for (size_t i = 0; i < n; ++i) {
+      this->buf[pos + i] = static_cast<uint8_t>(buf[i]);
+    }
+  }
+  eof = false;
+  err = false;
+  return wc;
+}
+
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index c569f24995a9b..c8bb4ca3ca90e 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -12,11 +12,14 @@
 #include "hdr/stdint_proxy.h"
 #include "hdr/stdio_macros.h"
 #include "hdr/types/off_t.h"
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/wint_t.h"
 #include "src/__support/CPP/new.h"
 #include "src/__support/error_or.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/properties/architectures.h"
 #include "src/__support/threads/mutex.h"
+#include "src/__support/wchar/mbstate.h"
 
 #include <stddef.h>
 
@@ -52,6 +55,8 @@ class File {
 
   static constexpr size_t DEFAULT_BUFFER_SIZE = 1024;
 
+  enum class Orientation { UNORIENTED, BYTE, WIDE };
+
   using LockFunc = void(File *);
   using UnlockFunc = void(File *);
 
@@ -131,6 +136,9 @@ class File {
   bool eof;
   bool err;
 
+  Orientation orientation;
+  internal::mbstate shift_state;
+
   // This is a convenience RAII class to lock and unlock file objects.
   class FileLock {
     File *file;
@@ -172,7 +180,9 @@ class File {
                                   /*robust=*/false, /*pshared=*/false),
         ungetc_buf(0), buf(buffer), bufsize(buffer_size), bufmode(buffer_mode),
         own_buf(owned), mode(modeflags), pos(0), prev_op(FileOp::NONE),
-        read_limit(0), eof(false), err(false), prev(nullptr), next(nullptr) {
+        read_limit(0), eof(false), err(false),
+        orientation(Orientation::UNORIENTED), shift_state(), prev(nullptr),
+        next(nullptr), {
     adjust_buf();
   }
 
@@ -215,6 +225,27 @@ class File {
     return ungetc_unlocked(c);
   }
 
+  FileIOResult write_wide_character_unlocked(wchar_t wc);
+
+  FileIOResult write_wide_character(wchar_t wc) {
+    FileLock l(this);
+    return write_wide_character_unlocked(wc);
+  }
+
+  ErrorOr<wchar_t> read_wide_character_unlocked();
+
+  ErrorOr<wchar_t> read_wide_character() {
+    FileLock l(this);
+    return read_wide_character_unlocked();
+  }
+
+  wint_t ungetwc_unlocked(wchar_t wc);
+
+  wint_t ungetwc(wchar_t wc) {
+    FileLock lock(this);
+    return ungetwc_unlocked(wc);
+  }
+
   // Does the following:
   // 1. If in write mode, Write out any data present in the buffer.
   // 2. Call platform_close.
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index ed2200b879048..fbfa9ed411ab2 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -512,3 +512,91 @@ TEST(LlvmLibcFileTest, WriteSplit) {
   EXPECT_TRUE(f->error());
   ASSERT_EQ(f->close(), 0);
 }
+
+TEST(LlvmLibcFileTest, WideCharIO) {
+  constexpr size_t FILE_BUFFER_SIZE = 512;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+
+  wchar_t wc = L'A';
+  auto write_res = f->write_wide_character(wc);
+  ASSERT_EQ(write_res.value, size_t(1));
+
+  wchar_t wc2 = L'€';
+  write_res = f->write_wide_character(wc2);
+  ASSERT_EQ(write_res.value, size_t(3));
+
+  ASSERT_EQ(f->flush(), 0);
+
+  ASSERT_EQ(f->seek(0, SEEK_SET).value(), 0);
+
+  auto read_res = f->read_wide_character();
+  ASSERT_TRUE(read_res.has_value());
+  EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+            static_cast<unsigned int>(L'A'));
+
+  read_res = f->read_wide_character();
+  ASSERT_TRUE(read_res.has_value());
+  EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+            static_cast<unsigned int>(L'€'));
+
+  ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, WideCharOrientation) {
+  constexpr size_t FILE_BUFFER_SIZE = 512;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+
+  f->write_wide_character(L'A');
+
+  auto write_res = f->write("B", 1);
+  EXPECT_EQ(write_res.value, size_t(0));
+  EXPECT_TRUE(f->error());
+
+  ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, ByteCharOrientation) {
+  constexpr size_t FILE_BUFFER_SIZE = 512;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+
+  f->write("A", 1);
+
+  auto write_res = f->write_wide_character(L'B');
+  EXPECT_EQ(write_res.value, size_t(0));
+  EXPECT_TRUE(f->error());
+
+  ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, Ungetwc) {
+  constexpr size_t FILE_BUFFER_SIZE = 512;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+
+  f->write_wide_character(L'A');
+  f->flush();
+  f->seek(0, SEEK_SET);
+
+  auto read_res = f->read_wide_character();
+  ASSERT_TRUE(read_res.has_value());
+  EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+            static_cast<unsigned int>(L'A'));
+
+  auto unget_res = f->ungetwc(L'B');
+  EXPECT_EQ(static_cast<unsigned int>(unget_res),
+            static_cast<unsigned int>(L'B'));
+
+  read_res = f->read_wide_character();
+  ASSERT_TRUE(read_res.has_value());
+  EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+            static_cast<unsigned int>(L'B'));
+
+  ASSERT_EQ(f->close(), 0);
+}

>From ec64d7e1032ee8fcc0d29cf6a9f8a6422159bf41 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Thu, 26 Mar 2026 23:57:40 +0000
Subject: [PATCH 2/9] add string functions

---
 libc/src/__support/File/CMakeLists.txt      |   6 +
 libc/src/__support/File/file.cpp            | 158 ++++++++++++++------
 libc/src/__support/File/file.h              |  21 ++-
 libc/test/src/__support/File/CMakeLists.txt |   1 +
 libc/test/src/__support/File/file_test.cpp  |  31 ++++
 5 files changed, 166 insertions(+), 51 deletions(-)

diff --git a/libc/src/__support/File/CMakeLists.txt b/libc/src/__support/File/CMakeLists.txt
index 3495bf3463d65..bd5ea5257a0aa 100644
--- a/libc/src/__support/File/CMakeLists.txt
+++ b/libc/src/__support/File/CMakeLists.txt
@@ -16,13 +16,19 @@ add_object_library(
     libc.hdr.stdint_proxy
     libc.hdr.func.realloc
     libc.hdr.types.off_t
+    libc.hdr.types.wchar_t
+    libc.hdr.types.wint_t
     libc.src.__support.CPP.new
     libc.src.__support.CPP.span
     libc.src.__support.threads.mutex
     libc.src.__support.error_or
+    libc.src.__support.macros.config
+    libc.src.__support.macros.properties.architectures
     libc.src.__support.wchar.mbrtowc
     libc.src.__support.wchar.mbstate
     libc.src.__support.wchar.wcrtomb
+    libc.src.string.memory_utils.inline_memcpy
+    libc.src.__support.libc_errno
 )
 
 add_object_library(
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index f24f478ce52ea..8051b835cb8f5 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -56,13 +56,20 @@ void File::lock_list() { File::list_lock.lock(); }
 void File::unlock_list() { File::list_lock.unlock(); }
 
 FileIOResult File::write_unlocked(const void *data, size_t len) {
-  if (orientation == Orientation::WIDE) {
+  switch (orientation) {
+  case Orientation::WIDE:
     err = true;
     return {0, EINVAL};
-  }
-  if (orientation == Orientation::UNORIENTED)
+  case Orientation::UNORIENTED:
     orientation = Orientation::BYTE;
+    break;
+  case Orientation::BYTE:
+    break;
+  }
+  return write_unlocked_impl(data, len);
+}
 
+FileIOResult File::write_unlocked_impl(const void *data, size_t len) {
   if (!write_allowed()) {
     err = true;
     return {0, EBADF};
@@ -75,9 +82,11 @@ FileIOResult File::write_unlocked(const void *data, size_t len) {
         write_unlocked_nbf(static_cast<const uint8_t *>(data), len);
     flush_unlocked();
     return ret_val;
-  } else if (bufmode == _IOFBF) { // fully buffered
+  }
+  if (bufmode == _IOFBF) { // fully buffered
     return write_unlocked_fbf(static_cast<const uint8_t *>(data), len);
-  } else /*if (bufmode == _IOLBF) */ { // line buffered
+  }
+  /*if (bufmode == _IOLBF) */ { // line buffered
     return write_unlocked_lbf(static_cast<const uint8_t *>(data), len);
   }
 }
@@ -224,13 +233,20 @@ FileIOResult File::write_unlocked_lbf(const uint8_t *data, size_t len) {
 }
 
 FileIOResult File::read_unlocked(void *data, size_t len) {
-  if (orientation == Orientation::WIDE) {
+  switch (orientation) {
+  case Orientation::WIDE:
     err = true;
     return {0, EINVAL};
-  }
-  if (orientation == Orientation::UNORIENTED)
+  case Orientation::UNORIENTED:
     orientation = Orientation::BYTE;
+    break;
+  case Orientation::BYTE:
+    break;
+  }
+  return read_unlocked_impl(data, len);
+}
 
+FileIOResult File::read_unlocked_impl(void *data, size_t len) {
   if (!read_allowed()) {
     err = true;
     return {0, EBADF};
@@ -240,9 +256,11 @@ FileIOResult File::read_unlocked(void *data, size_t len) {
 
   if (bufmode == _IONBF) { // unbuffered.
     return read_unlocked_nbf(static_cast<uint8_t *>(data), len);
-  } else if (bufmode == _IOFBF) { // fully buffered
+  }
+  if (bufmode == _IOFBF) { // fully buffered
     return read_unlocked_fbf(static_cast<uint8_t *>(data), len);
-  } else /*if (bufmode == _IOLBF) */ { // line buffered
+  }
+  /*if (bufmode == _IOLBF) */ { // line buffered
     // There is no line buffered mode for read. Use fully buffered instead.
     return read_unlocked_fbf(static_cast<uint8_t *>(data), len);
   }
@@ -533,66 +551,109 @@ File::ModeFlags File::mode_flags(const char *mode) {
   return flags;
 }
 
-FileIOResult File::write_wide_character_unlocked(wchar_t wc) {
-  if (orientation == Orientation::UNORIENTED)
-    orientation = Orientation::WIDE;
-  if (orientation != Orientation::WIDE) {
+FileIOResult File::write_unlocked(const wchar_t *ws, size_t len) {
+  switch (orientation) {
+  case Orientation::BYTE:
     err = true;
     return {0, EINVAL};
+  case Orientation::UNORIENTED:
+    orientation = Orientation::WIDE;
+    break;
+  case Orientation::WIDE:
+    break;
   }
 
-  if (!write_allowed()) {
-    err = true;
-    return {0, EBADF};
+  size_t written = 0;
+  for (size_t i = 0; i < len; ++i) {
+    char buf[4];
+    auto result = internal::wcrtomb(buf, ws[i], &mbstate);
+    if (!result.has_value()) {
+      err = true;
+      return {written, result.error()};
+    }
+    size_t n = result.value();
+    auto write_res = write_unlocked_impl(buf, n);
+    if (write_res.has_error()) {
+      err = true;
+      return {written, write_res.error};
+    }
+    if (write_res.value < n) {
+      // Partial write of bytes.
+      return {written, 0};
+    }
+    ++written;
   }
+  return {written, 0};
+}
 
-  prev_op = FileOp::WRITE;
+FileIOResult File::write_wide_character_unlocked(wchar_t wc) {
+  switch (orientation) {
+  case Orientation::BYTE:
+    err = true;
+    return {0, EINVAL};
+  case Orientation::UNORIENTED:
+    orientation = Orientation::WIDE;
+    break;
+  case Orientation::WIDE:
+    break;
+  }
 
   char buf[4];
-  auto result = internal::wcrtomb(buf, wc, &shift_state);
+  auto result = internal::wcrtomb(buf, wc, &mbstate);
   if (!result.has_value()) {
     err = true;
     return {0, result.error()};
   }
 
   size_t n = result.value();
-  if (bufmode == _IONBF) {
-    size_t ret_val =
-        write_unlocked_nbf(reinterpret_cast<const uint8_t *>(buf), n);
-    flush_unlocked();
-    return ret_val;
-  } else if (bufmode == _IOFBF) {
-    return write_unlocked_fbf(reinterpret_cast<const uint8_t *>(buf), n);
-  } else {
-    return write_unlocked_lbf(reinterpret_cast<const uint8_t *>(buf), n);
-  }
+  return write_unlocked_impl(buf, n);
 }
 
-ErrorOr<wchar_t> File::read_wide_character_unlocked() {
-  if (orientation == Orientation::UNORIENTED)
-    orientation = Orientation::WIDE;
-  if (orientation != Orientation::WIDE) {
+FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
+  switch (orientation) {
+  case Orientation::BYTE:
     err = true;
-    return Error(EINVAL);
+    return {0, EINVAL};
+  case Orientation::UNORIENTED:
+    orientation = Orientation::WIDE;
+    break;
+  case Orientation::WIDE:
+    break;
   }
 
-  if (!read_allowed()) {
-    err = true;
-    return Error(EBADF);
+  size_t read_count = 0;
+  for (size_t i = 0; i < len; ++i) {
+    auto res = read_wide_character_unlocked();
+    if (!res.has_value()) {
+      if (res.error() == 0) { // EOF
+        break;
+      }
+      err = true;
+      return {read_count, res.error()};
+    }
+    ws[i] = res.value();
+    ++read_count;
   }
+  return {read_count, 0};
+}
 
-  prev_op = FileOp::READ;
+ErrorOr<wchar_t> File::read_wide_character_unlocked() {
+  switch (orientation) {
+  case Orientation::BYTE:
+    err = true;
+    return Error(EINVAL);
+  case Orientation::UNORIENTED:
+    orientation = Orientation::WIDE;
+    break;
+  case Orientation::WIDE:
+    break;
+  }
 
   wchar_t wc;
   bool first_byte = true;
   while (true) {
     uint8_t byte;
-    FileIOResult read_result{0};
-    if (bufmode == _IONBF) {
-      read_result = read_unlocked_nbf(&byte, 1);
-    } else {
-      read_result = read_unlocked_fbf(&byte, 1);
-    }
+    FileIOResult read_result = read_unlocked_impl(&byte, 1);
     if (read_result.has_error()) {
       err = true;
       return Error(read_result.error);
@@ -600,13 +661,12 @@ ErrorOr<wchar_t> File::read_wide_character_unlocked() {
     if (read_result.value == 0) { // EOF
       if (first_byte) {
         return Error(0); // EOF
-      } else {
-        err = true;
-        return Error(EILSEQ); // Incomplete character at EOF
       }
+      err = true;
+      return Error(EILSEQ); // Incomplete character at EOF
     }
     char c = static_cast<char>(byte);
-    auto res = internal::mbrtowc(&wc, &c, 1, &shift_state);
+    auto res = internal::mbrtowc(&wc, &c, 1, &mbstate);
     if (!res.has_value()) {
       err = true;
       return Error(res.error());
@@ -630,7 +690,7 @@ wint_t File::ungetwc_unlocked(wchar_t wc) {
   }
 
   char buf[4];
-  auto result = internal::wcrtomb(buf, wc, &shift_state);
+  auto result = internal::wcrtomb(buf, wc, &mbstate);
   if (!result.has_value()) {
     err = true;
     return WEOF;
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index c8bb4ca3ca90e..de6697a441574 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -137,7 +137,7 @@ class File {
   bool err;
 
   Orientation orientation;
-  internal::mbstate shift_state;
+  internal::mbstate mbstate;
 
   // This is a convenience RAII class to lock and unlock file objects.
   class FileLock {
@@ -181,7 +181,7 @@ class File {
         ungetc_buf(0), buf(buffer), bufsize(buffer_size), bufmode(buffer_mode),
         own_buf(owned), mode(modeflags), pos(0), prev_op(FileOp::NONE),
         read_limit(0), eof(false), err(false),
-        orientation(Orientation::UNORIENTED), shift_state(), prev(nullptr),
+        orientation(Orientation::UNORIENTED), mbstate(), prev(nullptr),
         next(nullptr), {
     adjust_buf();
   }
@@ -225,6 +225,20 @@ class File {
     return ungetc_unlocked(c);
   }
 
+  FileIOResult write_unlocked(const wchar_t *ws, size_t len);
+
+  FileIOResult write(const wchar_t *ws, size_t len) {
+    FileLock l(this);
+    return write_unlocked(ws, len);
+  }
+
+  FileIOResult read_unlocked(wchar_t *ws, size_t len);
+
+  FileIOResult read(wchar_t *ws, size_t len) {
+    FileLock l(this);
+    return read_unlocked(ws, len);
+  }
+
   FileIOResult write_wide_character_unlocked(wchar_t wc);
 
   FileIOResult write_wide_character(wchar_t wc) {
@@ -326,6 +340,9 @@ class File {
   static ModeFlags mode_flags(const char *mode);
 
 private:
+  FileIOResult write_unlocked_impl(const void *data, size_t len);
+  FileIOResult read_unlocked_impl(void *data, size_t len);
+
   FileIOResult write_unlocked_lbf(const uint8_t *data, size_t len);
   FileIOResult write_unlocked_fbf(const uint8_t *data, size_t len);
   FileIOResult write_unlocked_nbf(const uint8_t *data, size_t len);
diff --git a/libc/test/src/__support/File/CMakeLists.txt b/libc/test/src/__support/File/CMakeLists.txt
index a11f52978f35f..76f6d7145efc9 100644
--- a/libc/test/src/__support/File/CMakeLists.txt
+++ b/libc/test/src/__support/File/CMakeLists.txt
@@ -19,6 +19,7 @@ add_libc_test(
     libc.src.errno.errno
     libc.src.__support.CPP.new
     libc.src.__support.File.file
+    libc.src.__support.error_or
 )
 
 add_libc_test(
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index fbfa9ed411ab2..9b578d4cdb5d8 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -600,3 +600,34 @@ TEST(LlvmLibcFileTest, Ungetwc) {
 
   ASSERT_EQ(f->close(), 0);
 }
+
+TEST(LlvmLibcFileTest, WideStringIO) {
+  constexpr size_t FILE_BUFFER_SIZE = 100;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+  ASSERT_FALSE(f == nullptr);
+
+  const wchar_t *ws = L"Hello, World!";
+  size_t len = 13;
+
+  auto write_res = f->write(ws, len);
+  ASSERT_FALSE(write_res.has_error());
+  EXPECT_EQ(write_res.value, len);
+
+  ASSERT_EQ(f->flush(), 0); // Ensure everything is written to StringFile
+
+  ASSERT_EQ(f->seek(0, SEEK_SET).value(), 0);
+
+  wchar_t read_buf[20];
+  auto read_res = f->read(read_buf, len);
+  ASSERT_FALSE(read_res.has_error());
+  EXPECT_EQ(read_res.value, len);
+
+  for (size_t i = 0; i < len; ++i) {
+    EXPECT_EQ(static_cast<unsigned int>(read_buf[i]),
+              static_cast<unsigned int>(ws[i]));
+  }
+
+  ASSERT_EQ(f->close(), 0);
+}

>From c2593781c4def04be16affeb870e78a0e65fad8b Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Fri, 27 Mar 2026 22:56:50 +0000
Subject: [PATCH 3/9] add orientation handling

---
 libc/src/__support/File/CMakeLists.txt     |  1 +
 libc/src/__support/File/file.cpp           | 22 +++++++++---------
 libc/src/__support/File/file.h             | 18 +++++++++++++++
 libc/test/src/__support/File/file_test.cpp | 26 ++++++++++++++++++++++
 4 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/libc/src/__support/File/CMakeLists.txt b/libc/src/__support/File/CMakeLists.txt
index bd5ea5257a0aa..da8ae6716020b 100644
--- a/libc/src/__support/File/CMakeLists.txt
+++ b/libc/src/__support/File/CMakeLists.txt
@@ -18,6 +18,7 @@ add_object_library(
     libc.hdr.types.off_t
     libc.hdr.types.wchar_t
     libc.hdr.types.wint_t
+    libc.hdr.wchar_macros
     libc.src.__support.CPP.new
     libc.src.__support.CPP.span
     libc.src.__support.threads.mutex
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 8051b835cb8f5..ef155174869e9 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -86,9 +86,8 @@ FileIOResult File::write_unlocked_impl(const void *data, size_t len) {
   if (bufmode == _IOFBF) { // fully buffered
     return write_unlocked_fbf(static_cast<const uint8_t *>(data), len);
   }
-  /*if (bufmode == _IOLBF) */ { // line buffered
-    return write_unlocked_lbf(static_cast<const uint8_t *>(data), len);
-  }
+  return write_unlocked_lbf(static_cast<const uint8_t *>(data),
+                            len); // line buffered
 }
 
 FileIOResult File::write_unlocked_nbf(const uint8_t *data, size_t len) {
@@ -280,9 +279,7 @@ size_t File::copy_data_from_buf(uint8_t *data, size_t len) {
   }
 
   // Copy all of the available data.
-  // TODO: Replace the for loop with a call to internal memcpy.
-  for (size_t i = 0; i < available_data; ++i)
-    dataref[i] = bufref[i + pos];
+  inline_memcpy(dataref.data(), bufref.data() + pos, available_data);
   read_limit = pos = 0; // Reset the pointers.
 
   return available_data;
@@ -409,14 +406,19 @@ ErrorOr<int> File::seek(off_t offset, int whence) {
     // function. Note that read_limit >= pos is always true.
     offset -= (read_limit - pos);
   }
+  auto result = platform_seek(this, offset, whence);
+  if (!result.has_value())
+    return Error(result.error());
+
   pos = read_limit = 0;
   prev_op = FileOp::SEEK;
-  // Reset the eof flag as a seek might move the file positon to some place
+  // Reset the eof flag as a seek might move the file position to some place
   // readable.
   eof = false;
-  auto result = platform_seek(this, offset, whence);
-  if (!result.has_value())
-    return Error(result.error());
+  if (orientation == Orientation::WIDE ||
+      orientation == Orientation::UNORIENTED)
+    mbstate = internal::mbstate();
+
   return 0;
 }
 
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index de6697a441574..852a6ed499d17 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -335,6 +335,24 @@ class File {
     return iseof_unlocked();
   }
 
+  Orientation get_orientation_unlocked() const { return orientation; }
+
+  Orientation get_orientation() {
+    FileLock l(this);
+    return get_orientation_unlocked();
+  }
+
+  Orientation try_set_orientation_unlocked(Orientation o) {
+    if (orientation == Orientation::UNORIENTED)
+      orientation = o;
+    return orientation;
+  }
+
+  Orientation try_set_orientation(Orientation o) {
+    FileLock l(this);
+    return try_set_orientation_unlocked(o);
+  }
+
   // Returns an bit map of flags corresponding to enumerations of
   // OpenMode, ContentType and CreateType.
   static ModeFlags mode_flags(const char *mode);
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index 9b578d4cdb5d8..1b17a4c1cce0c 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -631,3 +631,29 @@ TEST(LlvmLibcFileTest, WideStringIO) {
 
   ASSERT_EQ(f->close(), 0);
 }
+
+TEST(LlvmLibcFileTest, TrySetOrientation) {
+  constexpr size_t FILE_BUFFER_SIZE = 100;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "r+");
+  ASSERT_FALSE(f == nullptr);
+
+  EXPECT_EQ(static_cast<unsigned int>(f->get_orientation()),
+            static_cast<unsigned int>(File::Orientation::UNORIENTED));
+
+  EXPECT_EQ(static_cast<unsigned int>(
+                f->try_set_orientation(File::Orientation::WIDE)),
+            static_cast<unsigned int>(File::Orientation::WIDE));
+  EXPECT_EQ(static_cast<unsigned int>(f->get_orientation()),
+            static_cast<unsigned int>(File::Orientation::WIDE));
+
+  EXPECT_EQ(
+      static_cast<unsigned int>(
+          f->try_set_orientation(File::Orientation::BYTE)),
+      static_cast<unsigned int>(File::Orientation::WIDE)); // Cannot change
+  EXPECT_EQ(static_cast<unsigned int>(f->get_orientation()),
+            static_cast<unsigned int>(File::Orientation::WIDE));
+
+  ASSERT_EQ(f->close(), 0);
+}

>From 922e8bd63b64146738b713fac47850ff03d7707a Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Mon, 30 Mar 2026 22:34:02 +0000
Subject: [PATCH 4/9] remove characterwise functions

---
 libc/src/__support/File/file.cpp           | 130 +++++++--------------
 libc/src/__support/File/file.h             |  14 ---
 libc/test/src/__support/File/file_test.cpp |  50 ++------
 3 files changed, 53 insertions(+), 141 deletions(-)

diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index ef155174869e9..06183ee22dfe6 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -17,7 +17,7 @@
 #include "src/__support/alloc-checker.h"
 #include "src/__support/libc_errno.h" // For error macros
 #include "src/__support/macros/config.h"
-#include "src/__support/wchar/mbrtowc.h"
+#include "src/__support/wchar/character_converter.h"
 #include "src/__support/wchar/wcrtomb.h"
 #include "src/string/memory_utils/inline_memcpy.h"
 
@@ -567,50 +567,33 @@ FileIOResult File::write_unlocked(const wchar_t *ws, size_t len) {
 
   size_t written = 0;
   for (size_t i = 0; i < len; ++i) {
-    char buf[4];
-    auto result = internal::wcrtomb(buf, ws[i], &mbstate);
-    if (!result.has_value()) {
+    internal::CharacterConverter cr(&mbstate);
+    int push_err = cr.push(static_cast<char32_t>(ws[i]));
+    if (push_err != 0) {
       err = true;
-      return {written, result.error()};
+      return {written, push_err};
     }
-    size_t n = result.value();
-    auto write_res = write_unlocked_impl(buf, n);
-    if (write_res.has_error()) {
-      err = true;
-      return {written, write_res.error};
-    }
-    if (write_res.value < n) {
-      // Partial write of bytes.
-      return {written, 0};
+    while (!cr.isEmpty()) {
+      auto pop_res = cr.pop<char8_t>();
+      if (!pop_res.has_value()) {
+        err = true;
+        return {written, pop_res.error()};
+      }
+      char8_t byte = pop_res.value();
+      auto write_res = write_unlocked_impl(&byte, 1);
+      if (write_res.has_error()) {
+        err = true;
+        return {written, write_res.error};
+      }
+      if (write_res.value < 1) {
+        return {written, 0};
+      }
     }
     ++written;
   }
   return {written, 0};
 }
 
-FileIOResult File::write_wide_character_unlocked(wchar_t wc) {
-  switch (orientation) {
-  case Orientation::BYTE:
-    err = true;
-    return {0, EINVAL};
-  case Orientation::UNORIENTED:
-    orientation = Orientation::WIDE;
-    break;
-  case Orientation::WIDE:
-    break;
-  }
-
-  char buf[4];
-  auto result = internal::wcrtomb(buf, wc, &mbstate);
-  if (!result.has_value()) {
-    err = true;
-    return {0, result.error()};
-  }
-
-  size_t n = result.value();
-  return write_unlocked_impl(buf, n);
-}
-
 FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
   switch (orientation) {
   case Orientation::BYTE:
@@ -625,62 +608,35 @@ FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
 
   size_t read_count = 0;
   for (size_t i = 0; i < len; ++i) {
-    auto res = read_wide_character_unlocked();
-    if (!res.has_value()) {
-      if (res.error() == 0) { // EOF
-        break;
+    internal::CharacterConverter cr(&mbstate);
+    while (!cr.isFull()) {
+      uint8_t byte;
+      auto read_res = read_unlocked_impl(&byte, 1);
+      if (read_res.has_error()) {
+        err = true;
+        return {read_count, read_res.error};
       }
-      err = true;
-      return {read_count, res.error()};
-    }
-    ws[i] = res.value();
-    ++read_count;
-  }
-  return {read_count, 0};
-}
-
-ErrorOr<wchar_t> File::read_wide_character_unlocked() {
-  switch (orientation) {
-  case Orientation::BYTE:
-    err = true;
-    return Error(EINVAL);
-  case Orientation::UNORIENTED:
-    orientation = Orientation::WIDE;
-    break;
-  case Orientation::WIDE:
-    break;
-  }
-
-  wchar_t wc;
-  bool first_byte = true;
-  while (true) {
-    uint8_t byte;
-    FileIOResult read_result = read_unlocked_impl(&byte, 1);
-    if (read_result.has_error()) {
-      err = true;
-      return Error(read_result.error);
-    }
-    if (read_result.value == 0) { // EOF
-      if (first_byte) {
-        return Error(0); // EOF
+      if (read_res.value == 0) { // EOF
+        if (cr.isEmpty())
+          return {read_count, 0};
+        err = true;
+        return {read_count, EILSEQ}; // Incomplete character at EOF
+      }
+      int push_err = cr.push(static_cast<char8_t>(byte));
+      if (push_err != 0) {
+        err = true;
+        return {read_count, push_err};
       }
-      err = true;
-      return Error(EILSEQ); // Incomplete character at EOF
     }
-    char c = static_cast<char>(byte);
-    auto res = internal::mbrtowc(&wc, &c, 1, &mbstate);
-    if (!res.has_value()) {
+    auto pop_res = cr.pop<char32_t>();
+    if (!pop_res.has_value()) {
       err = true;
-      return Error(res.error());
-    }
-    if (res.value() == 0) { // null terminator
-      return L'\0';
+      return {read_count, pop_res.error()};
     }
-    if (res.value() != static_cast<size_t>(-2)) { // Complete character
-      return wc;
-    }
-    first_byte = false;
+    ws[i] = static_cast<wchar_t>(pop_res.value());
+    ++read_count;
   }
+  return {read_count, 0};
 }
 
 wint_t File::ungetwc_unlocked(wchar_t wc) {
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index 852a6ed499d17..f89e78cee4500 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -239,20 +239,6 @@ class File {
     return read_unlocked(ws, len);
   }
 
-  FileIOResult write_wide_character_unlocked(wchar_t wc);
-
-  FileIOResult write_wide_character(wchar_t wc) {
-    FileLock l(this);
-    return write_wide_character_unlocked(wc);
-  }
-
-  ErrorOr<wchar_t> read_wide_character_unlocked();
-
-  ErrorOr<wchar_t> read_wide_character() {
-    FileLock l(this);
-    return read_wide_character_unlocked();
-  }
-
   wint_t ungetwc_unlocked(wchar_t wc);
 
   wint_t ungetwc(wchar_t wc) {
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index 1b17a4c1cce0c..aa9844bf41094 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -513,44 +513,13 @@ TEST(LlvmLibcFileTest, WriteSplit) {
   ASSERT_EQ(f->close(), 0);
 }
 
-TEST(LlvmLibcFileTest, WideCharIO) {
-  constexpr size_t FILE_BUFFER_SIZE = 512;
-  char file_buffer[FILE_BUFFER_SIZE];
-  StringFile *f =
-      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
-
-  wchar_t wc = L'A';
-  auto write_res = f->write_wide_character(wc);
-  ASSERT_EQ(write_res.value, size_t(1));
-
-  wchar_t wc2 = L'€';
-  write_res = f->write_wide_character(wc2);
-  ASSERT_EQ(write_res.value, size_t(3));
-
-  ASSERT_EQ(f->flush(), 0);
-
-  ASSERT_EQ(f->seek(0, SEEK_SET).value(), 0);
-
-  auto read_res = f->read_wide_character();
-  ASSERT_TRUE(read_res.has_value());
-  EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
-            static_cast<unsigned int>(L'A'));
-
-  read_res = f->read_wide_character();
-  ASSERT_TRUE(read_res.has_value());
-  EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
-            static_cast<unsigned int>(L'€'));
-
-  ASSERT_EQ(f->close(), 0);
-}
-
 TEST(LlvmLibcFileTest, WideCharOrientation) {
   constexpr size_t FILE_BUFFER_SIZE = 512;
   char file_buffer[FILE_BUFFER_SIZE];
   StringFile *f =
       new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
 
-  f->write_wide_character(L'A');
+  f->write(L"A", 1);
 
   auto write_res = f->write("B", 1);
   EXPECT_EQ(write_res.value, size_t(0));
@@ -567,7 +536,7 @@ TEST(LlvmLibcFileTest, ByteCharOrientation) {
 
   f->write("A", 1);
 
-  auto write_res = f->write_wide_character(L'B');
+  auto write_res = f->write(L"B", 1);
   EXPECT_EQ(write_res.value, size_t(0));
   EXPECT_TRUE(f->error());
 
@@ -580,22 +549,23 @@ TEST(LlvmLibcFileTest, Ungetwc) {
   StringFile *f =
       new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
 
-  f->write_wide_character(L'A');
+  f->write(L"A", 1);
   f->flush();
   f->seek(0, SEEK_SET);
 
-  auto read_res = f->read_wide_character();
-  ASSERT_TRUE(read_res.has_value());
-  EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+  wchar_t ws_out[2];
+  auto read_res = f->read(ws_out, 1);
+  ASSERT_EQ(read_res.value, size_t(1));
+  EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
             static_cast<unsigned int>(L'A'));
 
   auto unget_res = f->ungetwc(L'B');
   EXPECT_EQ(static_cast<unsigned int>(unget_res),
             static_cast<unsigned int>(L'B'));
 
-  read_res = f->read_wide_character();
-  ASSERT_TRUE(read_res.has_value());
-  EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+  auto read_res2 = f->read(ws_out, 1);
+  ASSERT_EQ(read_res2.value, size_t(1));
+  EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
             static_cast<unsigned int>(L'B'));
 
   ASSERT_EQ(f->close(), 0);

>From 7d55c70d24754c303d5a1fb2e8f05f99ac8c5d70 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Mon, 30 Mar 2026 23:34:09 +0000
Subject: [PATCH 5/9] review and expand tests

---
 libc/src/__support/File/file.cpp           |  1 -
 libc/test/src/__support/File/file_test.cpp | 85 ++++++++++++++++++++++
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 06183ee22dfe6..4aaba1c909330 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -613,7 +613,6 @@ FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
       uint8_t byte;
       auto read_res = read_unlocked_impl(&byte, 1);
       if (read_res.has_error()) {
-        err = true;
         return {read_count, read_res.error};
       }
       if (read_res.value == 0) { // EOF
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index aa9844bf41094..c82badad49d89 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -627,3 +627,88 @@ TEST(LlvmLibcFileTest, TrySetOrientation) {
 
   ASSERT_EQ(f->close(), 0);
 }
+
+TEST(LlvmLibcFileTest, UngetwcMultiByte) {
+  constexpr size_t FILE_BUFFER_SIZE = 512;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+
+  f->write(L"€", 1);
+  f->flush();
+  f->seek(0, SEEK_SET);
+
+  wchar_t ws_out[2];
+  auto read_res = f->read(ws_out, 1);
+  ASSERT_EQ(read_res.value, size_t(1));
+  EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
+            static_cast<unsigned int>(L'€'));
+
+  auto unget_res = f->ungetwc(L'¢');
+  EXPECT_EQ(static_cast<unsigned int>(unget_res),
+            static_cast<unsigned int>(L'¢'));
+
+  auto read_res2 = f->read(ws_out, 1);
+  ASSERT_EQ(read_res2.value, size_t(1));
+  EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
+            static_cast<unsigned int>(L'¢'));
+
+  ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, WideStringIO_Multibyte) {
+  constexpr size_t FILE_BUFFER_SIZE = 100;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+  ASSERT_FALSE(f == nullptr);
+
+  const wchar_t *ws = L"Hello € World!";
+  size_t len = 14;
+
+  auto write_res = f->write(ws, len);
+  ASSERT_FALSE(write_res.has_error());
+  EXPECT_EQ(write_res.value, len);
+
+  ASSERT_EQ(f->flush(), 0);
+
+  ASSERT_EQ(f->seek(0, SEEK_SET).value(), 0);
+
+  wchar_t read_buf[20];
+  auto read_res = f->read(read_buf, len);
+  ASSERT_FALSE(read_res.has_error());
+  EXPECT_EQ(read_res.value, len);
+
+  for (size_t i = 0; i < len; ++i) {
+    EXPECT_EQ(static_cast<unsigned int>(read_buf[i]),
+              static_cast<unsigned int>(ws[i]));
+  }
+
+  ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, SeekResetsMbstate) {
+  constexpr size_t FILE_BUFFER_SIZE = 100;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "r+");
+  ASSERT_FALSE(f == nullptr);
+
+  f->reset_and_fill("\xE2\x82", 2);
+
+  wchar_t ws_out[1];
+  auto read_res = f->read(ws_out, 1);
+  EXPECT_EQ(read_res.value, size_t(0));
+  EXPECT_TRUE(f->error());
+
+  f->reset_and_fill("A", 1);
+  f->seek(0, SEEK_SET);
+  f->clearerr();
+
+  auto read_res2 = f->read(ws_out, 1);
+  EXPECT_EQ(read_res2.value, size_t(1));
+  EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
+            static_cast<unsigned int>(L'A'));
+
+  ASSERT_EQ(f->close(), 0);
+}

>From d3b8a8847c28b2a8d2a9ca61b9d440307f5f25d7 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Tue, 7 Apr 2026 21:37:32 +0000
Subject: [PATCH 6/9] fix deps

---
 libc/src/__support/File/CMakeLists.txt | 4 ++--
 libc/src/__support/File/file.cpp       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/src/__support/File/CMakeLists.txt b/libc/src/__support/File/CMakeLists.txt
index da8ae6716020b..68bbfe31ace31 100644
--- a/libc/src/__support/File/CMakeLists.txt
+++ b/libc/src/__support/File/CMakeLists.txt
@@ -25,11 +25,11 @@ add_object_library(
     libc.src.__support.error_or
     libc.src.__support.macros.config
     libc.src.__support.macros.properties.architectures
-    libc.src.__support.wchar.mbrtowc
     libc.src.__support.wchar.mbstate
     libc.src.__support.wchar.wcrtomb
+    libc.src.__support.wchar.character_converter
     libc.src.string.memory_utils.inline_memcpy
-    libc.src.__support.libc_errno
+    libc.hdr.errno_macros
 )
 
 add_object_library(
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 4aaba1c909330..f28c7fc53116d 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -8,6 +8,7 @@
 
 #include "file.h"
 
+#include "hdr/errno_macros.h"
 #include "hdr/func/realloc.h"
 #include "hdr/stdio_macros.h"
 #include "hdr/types/off_t.h"
@@ -15,7 +16,6 @@
 #include "src/__support/CPP/new.h"
 #include "src/__support/CPP/span.h"
 #include "src/__support/alloc-checker.h"
-#include "src/__support/libc_errno.h" // For error macros
 #include "src/__support/macros/config.h"
 #include "src/__support/wchar/character_converter.h"
 #include "src/__support/wchar/wcrtomb.h"

>From 873f7ed1edfc7e32bb3a74beadf4c56247f0a43e Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Tue, 7 Apr 2026 21:49:25 +0000
Subject: [PATCH 7/9] fix ungetwc

---
 libc/src/__support/File/file.cpp           |  4 ++++
 libc/src/__support/File/file.h             | 12 ++++++-----
 libc/test/src/__support/File/file_test.cpp | 25 ++++++++++++++++++++++
 3 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index f28c7fc53116d..b88ef9f575632 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -655,6 +655,10 @@ wint_t File::ungetwc_unlocked(wchar_t wc) {
   size_t n = result.value();
 
   if (read_limit == 0) {
+    if (n > bufsize) {
+      err = true;
+      return WEOF;
+    }
     for (size_t i = 0; i < n; ++i) {
       this->buf[i] = static_cast<uint8_t>(buf[i]);
     }
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index f89e78cee4500..6f7ba3f48c76c 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -107,8 +107,9 @@ class File {
 
   // For files which are readable, we should be able to support one ungetc
   // operation even if |buf| is nullptr. So, in the constructor of File, we
-  // set |buf| to point to this buffer character.
-  uint8_t ungetc_buf;
+  // set |buf| to point to this buffer character. It needs to be at least 4
+  // bytes so we can store a widechar.
+  uint8_t ungetc_buf[4];
 
   uint8_t *buf;   // Pointer to the stream buffer for buffered streams
   size_t bufsize; // Size of the buffer pointed to by |buf|.
@@ -178,7 +179,7 @@ class File {
       : platform_write(wf), platform_read(rf), platform_seek(sf),
         platform_close(cf), mutex(/*timed=*/false, /*recursive=*/false,
                                   /*robust=*/false, /*pshared=*/false),
-        ungetc_buf(0), buf(buffer), bufsize(buffer_size), bufmode(buffer_mode),
+        ungetc_buf{}, buf(buffer), bufsize(buffer_size), bufmode(buffer_mode),
         own_buf(owned), mode(modeflags), pos(0), prev_op(FileOp::NONE),
         read_limit(0), eof(false), err(false),
         orientation(Orientation::UNORIENTED), mbstate(), prev(nullptr),
@@ -371,8 +372,9 @@ class File {
       // 3. If user wants _IONBF, then the buffer is ignored for writing.
       // So, all of the above cases, having a single ungetc buffer does not
       // affect the behavior experienced by the user.
-      buf = &ungetc_buf;
-      bufsize = 1;
+      buf = ungetc_buf;
+      bufsize = sizeof(ungetc_buf);
+      own_buf = false; // We shouldn't call free on |buf| when closing the file.
     }
   }
 
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index c82badad49d89..694621218c452 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -656,6 +656,31 @@ TEST(LlvmLibcFileTest, UngetwcMultiByte) {
   ASSERT_EQ(f->close(), 0);
 }
 
+TEST(LlvmLibcFileTest, UngetwcUnbufferedMultiByte) {
+  StringFile *f = new_string_file(nullptr, 0, _IONBF, true, "w+");
+  ASSERT_FALSE(f == nullptr);
+
+  f->write(L"€", 1);
+  f->seek(0, SEEK_SET);
+
+  wchar_t ws_out[2];
+  auto read_res = f->read(ws_out, 1);
+  ASSERT_EQ(read_res.value, size_t(1));
+  EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
+            static_cast<unsigned int>(L'€'));
+
+  auto unget_res = f->ungetwc(L'¢');
+  EXPECT_EQ(static_cast<unsigned int>(unget_res),
+            static_cast<unsigned int>(L'¢'));
+
+  auto read_res2 = f->read(ws_out, 1);
+  ASSERT_EQ(read_res2.value, size_t(1));
+  EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
+            static_cast<unsigned int>(L'¢'));
+
+  ASSERT_EQ(f->close(), 0);
+}
+
 TEST(LlvmLibcFileTest, WideStringIO_Multibyte) {
   constexpr size_t FILE_BUFFER_SIZE = 100;
   char file_buffer[FILE_BUFFER_SIZE];

>From 90c2fc93098249c452fc1b25d2fffeb324f6925c Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Wed, 8 Apr 2026 17:07:40 +0000
Subject: [PATCH 8/9] ungetwc fix and cleanup

---
 libc/src/__support/File/file.cpp           | 47 +++++++--------
 libc/src/__support/File/file.h             |  4 +-
 libc/test/src/__support/File/file_test.cpp | 68 ++++++++++++++++++++++
 3 files changed, 92 insertions(+), 27 deletions(-)

diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index b88ef9f575632..be3a4521ae249 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -581,13 +581,10 @@ FileIOResult File::write_unlocked(const wchar_t *ws, size_t len) {
       }
       char8_t byte = pop_res.value();
       auto write_res = write_unlocked_impl(&byte, 1);
-      if (write_res.has_error()) {
-        err = true;
+      if (write_res.has_error())
         return {written, write_res.error};
-      }
-      if (write_res.value < 1) {
+      if (write_res.value < 1)
         return {written, 0};
-      }
     }
     ++written;
   }
@@ -612,9 +609,8 @@ FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
     while (!cr.isFull()) {
       uint8_t byte;
       auto read_res = read_unlocked_impl(&byte, 1);
-      if (read_res.has_error()) {
+      if (read_res.has_error())
         return {read_count, read_res.error};
-      }
       if (read_res.value == 0) { // EOF
         if (cr.isEmpty())
           return {read_count, 0};
@@ -638,41 +634,42 @@ FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
   return {read_count, 0};
 }
 
-wint_t File::ungetwc_unlocked(wchar_t wc) {
-  if (orientation == Orientation::UNORIENTED)
-    orientation = Orientation::WIDE;
-  if (orientation != Orientation::WIDE) {
+wint_t File::ungetwc_unlocked(wint_t wc) {
+  if (wc == WEOF)
+    return WEOF;
+  switch (orientation) {
+  case Orientation::BYTE:
     err = true;
     return WEOF;
+  case Orientation::UNORIENTED:
+    orientation = Orientation::WIDE;
+    break;
+  case Orientation::WIDE:
+    break;
   }
 
   char buf[4];
-  auto result = internal::wcrtomb(buf, wc, &mbstate);
-  if (!result.has_value()) {
-    err = true;
+  auto result = internal::wcrtomb(buf, static_cast<wchar_t>(wc), &mbstate);
+  if (!result.has_value())
     return WEOF;
-  }
+
   size_t n = result.value();
 
   if (read_limit == 0) {
-    if (n > bufsize) {
-      err = true;
+    if (n > bufsize)
       return WEOF;
-    }
-    for (size_t i = 0; i < n; ++i) {
+
+    for (size_t i = 0; i < n; ++i)
       this->buf[i] = static_cast<uint8_t>(buf[i]);
-    }
+
     read_limit = n;
     pos = 0;
   } else {
-    if (pos < n) {
-      err = true;
+    if (pos < n)
       return WEOF;
-    }
     pos -= n;
-    for (size_t i = 0; i < n; ++i) {
+    for (size_t i = 0; i < n; ++i)
       this->buf[pos + i] = static_cast<uint8_t>(buf[i]);
-    }
   }
   eof = false;
   err = false;
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index 6f7ba3f48c76c..9896d5198dd3e 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -240,9 +240,9 @@ class File {
     return read_unlocked(ws, len);
   }
 
-  wint_t ungetwc_unlocked(wchar_t wc);
+  wint_t ungetwc_unlocked(wint_t wc);
 
-  wint_t ungetwc(wchar_t wc) {
+  wint_t ungetwc(wint_t wc) {
     FileLock lock(this);
     return ungetwc_unlocked(wc);
   }
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index 694621218c452..53559a11df087 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "hdr/types/size_t.h"
+#include "hdr/wchar_macros.h"
 #include "src/__support/CPP/new.h"
 #include "src/__support/File/file.h"
 #include "src/__support/alloc-checker.h"
@@ -737,3 +738,70 @@ TEST(LlvmLibcFileTest, SeekResetsMbstate) {
 
   ASSERT_EQ(f->close(), 0);
 }
+
+TEST(LlvmLibcFileTest, ReadWideNotStopAtNewline) {
+  constexpr size_t FILE_BUFFER_SIZE = 100;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+  ASSERT_FALSE(f == nullptr);
+
+  const wchar_t *ws = L"Hello\nWorld!";
+  size_t len = 12;
+
+  auto write_res = f->write(ws, len);
+  ASSERT_FALSE(write_res.has_error());
+  EXPECT_EQ(write_res.value, len);
+
+  ASSERT_EQ(f->flush(), 0);
+  ASSERT_EQ(f->seek(0, SEEK_SET).value(), 0);
+
+  wchar_t read_buf[20];
+  auto read_res = f->read(read_buf, len);
+  ASSERT_FALSE(read_res.has_error());
+  // Should NOT stop at newline, so should read all 12 characters.
+  EXPECT_EQ(read_res.value, len);
+  EXPECT_EQ(static_cast<unsigned int>(read_buf[5]),
+            static_cast<unsigned int>(L'\n'));
+  EXPECT_EQ(static_cast<unsigned int>(read_buf[11]),
+            static_cast<unsigned int>(L'!'));
+
+  ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, UngetwcWEOF) {
+  constexpr size_t FILE_BUFFER_SIZE = 100;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "r+");
+  ASSERT_FALSE(f == nullptr);
+
+  EXPECT_EQ(static_cast<unsigned int>(f->get_orientation()),
+            static_cast<unsigned int>(File::Orientation::UNORIENTED));
+
+  auto unget_res = f->ungetwc(WEOF);
+  EXPECT_EQ(static_cast<unsigned int>(unget_res),
+            static_cast<unsigned int>(WEOF));
+
+  EXPECT_EQ(static_cast<unsigned int>(f->get_orientation()),
+            static_cast<unsigned int>(File::Orientation::UNORIENTED));
+
+  ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, UngetwcErrorIndicator) {
+  constexpr size_t FILE_BUFFER_SIZE = 100;
+  char file_buffer[FILE_BUFFER_SIZE];
+  StringFile *f =
+      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+  ASSERT_FALSE(f == nullptr);
+
+  f->write("A", 1);
+
+  auto unget_res = f->ungetwc(L'B');
+  EXPECT_EQ(static_cast<unsigned int>(unget_res),
+            static_cast<unsigned int>(WEOF));
+  EXPECT_FALSE(f->error());
+
+  ASSERT_EQ(f->close(), 0);
+}

>From b3f3fa37b986df0f6ab78859f747925878af7ce1 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Wed, 8 Apr 2026 17:31:12 +0000
Subject: [PATCH 9/9] cleanup after rebase, remove incorrect test.

---
 libc/src/__support/File/file.h             |  2 +-
 libc/test/src/__support/File/file_test.cpp | 19 +------------------
 2 files changed, 2 insertions(+), 19 deletions(-)

diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index 9896d5198dd3e..2c8ec26d5e7f0 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -183,7 +183,7 @@ class File {
         own_buf(owned), mode(modeflags), pos(0), prev_op(FileOp::NONE),
         read_limit(0), eof(false), err(false),
         orientation(Orientation::UNORIENTED), mbstate(), prev(nullptr),
-        next(nullptr), {
+        next(nullptr) {
     adjust_buf();
   }
 
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index 53559a11df087..4dbb75eb66d86 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -563,8 +563,8 @@ TEST(LlvmLibcFileTest, Ungetwc) {
   auto unget_res = f->ungetwc(L'B');
   EXPECT_EQ(static_cast<unsigned int>(unget_res),
             static_cast<unsigned int>(L'B'));
-
   auto read_res2 = f->read(ws_out, 1);
+
   ASSERT_EQ(read_res2.value, size_t(1));
   EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
             static_cast<unsigned int>(L'B'));
@@ -788,20 +788,3 @@ TEST(LlvmLibcFileTest, UngetwcWEOF) {
 
   ASSERT_EQ(f->close(), 0);
 }
-
-TEST(LlvmLibcFileTest, UngetwcErrorIndicator) {
-  constexpr size_t FILE_BUFFER_SIZE = 100;
-  char file_buffer[FILE_BUFFER_SIZE];
-  StringFile *f =
-      new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
-  ASSERT_FALSE(f == nullptr);
-
-  f->write("A", 1);
-
-  auto unget_res = f->ungetwc(L'B');
-  EXPECT_EQ(static_cast<unsigned int>(unget_res),
-            static_cast<unsigned int>(WEOF));
-  EXPECT_FALSE(f->error());
-
-  ASSERT_EQ(f->close(), 0);
-}



More information about the libc-commits mailing list