[libc-commits] [libc] [libc] Add wchar support to File (PR #189504)
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Wed Apr 8 10:31:59 PDT 2026
https://github.com/michaelrj-google updated https://github.com/llvm/llvm-project/pull/189504
>From 93923fc69129f7b900651050d3e11e14d67f2d80 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Thu, 26 Mar 2026 23:27:06 +0000
Subject: [PATCH 1/9] [libc] Add wchar support to File struct
TODO: description
---
libc/src/__support/File/CMakeLists.txt | 3 +
libc/src/__support/File/file.cpp | 149 +++++++++++++++++++++
libc/src/__support/File/file.h | 33 ++++-
libc/test/src/__support/File/file_test.cpp | 88 ++++++++++++
4 files changed, 272 insertions(+), 1 deletion(-)
diff --git a/libc/src/__support/File/CMakeLists.txt b/libc/src/__support/File/CMakeLists.txt
index f5388ed8e5f34..3495bf3463d65 100644
--- a/libc/src/__support/File/CMakeLists.txt
+++ b/libc/src/__support/File/CMakeLists.txt
@@ -20,6 +20,9 @@ add_object_library(
libc.src.__support.CPP.span
libc.src.__support.threads.mutex
libc.src.__support.error_or
+ libc.src.__support.wchar.mbrtowc
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.wchar.wcrtomb
)
add_object_library(
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 7b38c543d748d..f24f478ce52ea 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -11,11 +11,14 @@
#include "hdr/func/realloc.h"
#include "hdr/stdio_macros.h"
#include "hdr/types/off_t.h"
+#include "hdr/wchar_macros.h"
#include "src/__support/CPP/new.h"
#include "src/__support/CPP/span.h"
#include "src/__support/alloc-checker.h"
#include "src/__support/libc_errno.h" // For error macros
#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbrtowc.h"
+#include "src/__support/wchar/wcrtomb.h"
#include "src/string/memory_utils/inline_memcpy.h"
namespace LIBC_NAMESPACE_DECL {
@@ -53,6 +56,13 @@ void File::lock_list() { File::list_lock.lock(); }
void File::unlock_list() { File::list_lock.unlock(); }
FileIOResult File::write_unlocked(const void *data, size_t len) {
+ if (orientation == Orientation::WIDE) {
+ err = true;
+ return {0, EINVAL};
+ }
+ if (orientation == Orientation::UNORIENTED)
+ orientation = Orientation::BYTE;
+
if (!write_allowed()) {
err = true;
return {0, EBADF};
@@ -214,6 +224,13 @@ FileIOResult File::write_unlocked_lbf(const uint8_t *data, size_t len) {
}
FileIOResult File::read_unlocked(void *data, size_t len) {
+ if (orientation == Orientation::WIDE) {
+ err = true;
+ return {0, EINVAL};
+ }
+ if (orientation == Orientation::UNORIENTED)
+ orientation = Orientation::BYTE;
+
if (!read_allowed()) {
err = true;
return {0, EBADF};
@@ -315,6 +332,13 @@ FileIOResult File::read_unlocked_nbf(uint8_t *data, size_t len) {
}
int File::ungetc_unlocked(int c) {
+ if (orientation == Orientation::WIDE) {
+ err = true;
+ return EOF;
+ }
+ if (orientation == Orientation::UNORIENTED)
+ orientation = Orientation::BYTE;
+
// There is no meaning to unget if:
// 1. You are trying to push back EOF.
// 2. Read operations are not allowed on this file.
@@ -509,4 +533,129 @@ File::ModeFlags File::mode_flags(const char *mode) {
return flags;
}
+FileIOResult File::write_wide_character_unlocked(wchar_t wc) {
+ if (orientation == Orientation::UNORIENTED)
+ orientation = Orientation::WIDE;
+ if (orientation != Orientation::WIDE) {
+ err = true;
+ return {0, EINVAL};
+ }
+
+ if (!write_allowed()) {
+ err = true;
+ return {0, EBADF};
+ }
+
+ prev_op = FileOp::WRITE;
+
+ char buf[4];
+ auto result = internal::wcrtomb(buf, wc, &shift_state);
+ if (!result.has_value()) {
+ err = true;
+ return {0, result.error()};
+ }
+
+ size_t n = result.value();
+ if (bufmode == _IONBF) {
+ size_t ret_val =
+ write_unlocked_nbf(reinterpret_cast<const uint8_t *>(buf), n);
+ flush_unlocked();
+ return ret_val;
+ } else if (bufmode == _IOFBF) {
+ return write_unlocked_fbf(reinterpret_cast<const uint8_t *>(buf), n);
+ } else {
+ return write_unlocked_lbf(reinterpret_cast<const uint8_t *>(buf), n);
+ }
+}
+
+ErrorOr<wchar_t> File::read_wide_character_unlocked() {
+ if (orientation == Orientation::UNORIENTED)
+ orientation = Orientation::WIDE;
+ if (orientation != Orientation::WIDE) {
+ err = true;
+ return Error(EINVAL);
+ }
+
+ if (!read_allowed()) {
+ err = true;
+ return Error(EBADF);
+ }
+
+ prev_op = FileOp::READ;
+
+ wchar_t wc;
+ bool first_byte = true;
+ while (true) {
+ uint8_t byte;
+ FileIOResult read_result{0};
+ if (bufmode == _IONBF) {
+ read_result = read_unlocked_nbf(&byte, 1);
+ } else {
+ read_result = read_unlocked_fbf(&byte, 1);
+ }
+ if (read_result.has_error()) {
+ err = true;
+ return Error(read_result.error);
+ }
+ if (read_result.value == 0) { // EOF
+ if (first_byte) {
+ return Error(0); // EOF
+ } else {
+ err = true;
+ return Error(EILSEQ); // Incomplete character at EOF
+ }
+ }
+ char c = static_cast<char>(byte);
+ auto res = internal::mbrtowc(&wc, &c, 1, &shift_state);
+ if (!res.has_value()) {
+ err = true;
+ return Error(res.error());
+ }
+ if (res.value() == 0) { // null terminator
+ return L'\0';
+ }
+ if (res.value() != static_cast<size_t>(-2)) { // Complete character
+ return wc;
+ }
+ first_byte = false;
+ }
+}
+
+wint_t File::ungetwc_unlocked(wchar_t wc) {
+ if (orientation == Orientation::UNORIENTED)
+ orientation = Orientation::WIDE;
+ if (orientation != Orientation::WIDE) {
+ err = true;
+ return WEOF;
+ }
+
+ char buf[4];
+ auto result = internal::wcrtomb(buf, wc, &shift_state);
+ if (!result.has_value()) {
+ err = true;
+ return WEOF;
+ }
+ size_t n = result.value();
+
+ if (read_limit == 0) {
+ for (size_t i = 0; i < n; ++i) {
+ this->buf[i] = static_cast<uint8_t>(buf[i]);
+ }
+ read_limit = n;
+ pos = 0;
+ } else {
+ if (pos < n) {
+ err = true;
+ return WEOF;
+ }
+ pos -= n;
+ for (size_t i = 0; i < n; ++i) {
+ this->buf[pos + i] = static_cast<uint8_t>(buf[i]);
+ }
+ }
+ eof = false;
+ err = false;
+ return wc;
+}
+
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index c569f24995a9b..c8bb4ca3ca90e 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -12,11 +12,14 @@
#include "hdr/stdint_proxy.h"
#include "hdr/stdio_macros.h"
#include "hdr/types/off_t.h"
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/wint_t.h"
#include "src/__support/CPP/new.h"
#include "src/__support/error_or.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/threads/mutex.h"
+#include "src/__support/wchar/mbstate.h"
#include <stddef.h>
@@ -52,6 +55,8 @@ class File {
static constexpr size_t DEFAULT_BUFFER_SIZE = 1024;
+ enum class Orientation { UNORIENTED, BYTE, WIDE };
+
using LockFunc = void(File *);
using UnlockFunc = void(File *);
@@ -131,6 +136,9 @@ class File {
bool eof;
bool err;
+ Orientation orientation;
+ internal::mbstate shift_state;
+
// This is a convenience RAII class to lock and unlock file objects.
class FileLock {
File *file;
@@ -172,7 +180,9 @@ class File {
/*robust=*/false, /*pshared=*/false),
ungetc_buf(0), buf(buffer), bufsize(buffer_size), bufmode(buffer_mode),
own_buf(owned), mode(modeflags), pos(0), prev_op(FileOp::NONE),
- read_limit(0), eof(false), err(false), prev(nullptr), next(nullptr) {
+ read_limit(0), eof(false), err(false),
+ orientation(Orientation::UNORIENTED), shift_state(), prev(nullptr),
+ next(nullptr), {
adjust_buf();
}
@@ -215,6 +225,27 @@ class File {
return ungetc_unlocked(c);
}
+ FileIOResult write_wide_character_unlocked(wchar_t wc);
+
+ FileIOResult write_wide_character(wchar_t wc) {
+ FileLock l(this);
+ return write_wide_character_unlocked(wc);
+ }
+
+ ErrorOr<wchar_t> read_wide_character_unlocked();
+
+ ErrorOr<wchar_t> read_wide_character() {
+ FileLock l(this);
+ return read_wide_character_unlocked();
+ }
+
+ wint_t ungetwc_unlocked(wchar_t wc);
+
+ wint_t ungetwc(wchar_t wc) {
+ FileLock lock(this);
+ return ungetwc_unlocked(wc);
+ }
+
// Does the following:
// 1. If in write mode, Write out any data present in the buffer.
// 2. Call platform_close.
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index ed2200b879048..fbfa9ed411ab2 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -512,3 +512,91 @@ TEST(LlvmLibcFileTest, WriteSplit) {
EXPECT_TRUE(f->error());
ASSERT_EQ(f->close(), 0);
}
+
+TEST(LlvmLibcFileTest, WideCharIO) {
+ constexpr size_t FILE_BUFFER_SIZE = 512;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+
+ wchar_t wc = L'A';
+ auto write_res = f->write_wide_character(wc);
+ ASSERT_EQ(write_res.value, size_t(1));
+
+ wchar_t wc2 = L'€';
+ write_res = f->write_wide_character(wc2);
+ ASSERT_EQ(write_res.value, size_t(3));
+
+ ASSERT_EQ(f->flush(), 0);
+
+ ASSERT_EQ(f->seek(0, SEEK_SET).value(), 0);
+
+ auto read_res = f->read_wide_character();
+ ASSERT_TRUE(read_res.has_value());
+ EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+ static_cast<unsigned int>(L'A'));
+
+ read_res = f->read_wide_character();
+ ASSERT_TRUE(read_res.has_value());
+ EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+ static_cast<unsigned int>(L'€'));
+
+ ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, WideCharOrientation) {
+ constexpr size_t FILE_BUFFER_SIZE = 512;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+
+ f->write_wide_character(L'A');
+
+ auto write_res = f->write("B", 1);
+ EXPECT_EQ(write_res.value, size_t(0));
+ EXPECT_TRUE(f->error());
+
+ ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, ByteCharOrientation) {
+ constexpr size_t FILE_BUFFER_SIZE = 512;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+
+ f->write("A", 1);
+
+ auto write_res = f->write_wide_character(L'B');
+ EXPECT_EQ(write_res.value, size_t(0));
+ EXPECT_TRUE(f->error());
+
+ ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, Ungetwc) {
+ constexpr size_t FILE_BUFFER_SIZE = 512;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+
+ f->write_wide_character(L'A');
+ f->flush();
+ f->seek(0, SEEK_SET);
+
+ auto read_res = f->read_wide_character();
+ ASSERT_TRUE(read_res.has_value());
+ EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+ static_cast<unsigned int>(L'A'));
+
+ auto unget_res = f->ungetwc(L'B');
+ EXPECT_EQ(static_cast<unsigned int>(unget_res),
+ static_cast<unsigned int>(L'B'));
+
+ read_res = f->read_wide_character();
+ ASSERT_TRUE(read_res.has_value());
+ EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+ static_cast<unsigned int>(L'B'));
+
+ ASSERT_EQ(f->close(), 0);
+}
>From ec64d7e1032ee8fcc0d29cf6a9f8a6422159bf41 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Thu, 26 Mar 2026 23:57:40 +0000
Subject: [PATCH 2/9] add string functions
---
libc/src/__support/File/CMakeLists.txt | 6 +
libc/src/__support/File/file.cpp | 158 ++++++++++++++------
libc/src/__support/File/file.h | 21 ++-
libc/test/src/__support/File/CMakeLists.txt | 1 +
libc/test/src/__support/File/file_test.cpp | 31 ++++
5 files changed, 166 insertions(+), 51 deletions(-)
diff --git a/libc/src/__support/File/CMakeLists.txt b/libc/src/__support/File/CMakeLists.txt
index 3495bf3463d65..bd5ea5257a0aa 100644
--- a/libc/src/__support/File/CMakeLists.txt
+++ b/libc/src/__support/File/CMakeLists.txt
@@ -16,13 +16,19 @@ add_object_library(
libc.hdr.stdint_proxy
libc.hdr.func.realloc
libc.hdr.types.off_t
+ libc.hdr.types.wchar_t
+ libc.hdr.types.wint_t
libc.src.__support.CPP.new
libc.src.__support.CPP.span
libc.src.__support.threads.mutex
libc.src.__support.error_or
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.architectures
libc.src.__support.wchar.mbrtowc
libc.src.__support.wchar.mbstate
libc.src.__support.wchar.wcrtomb
+ libc.src.string.memory_utils.inline_memcpy
+ libc.src.__support.libc_errno
)
add_object_library(
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index f24f478ce52ea..8051b835cb8f5 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -56,13 +56,20 @@ void File::lock_list() { File::list_lock.lock(); }
void File::unlock_list() { File::list_lock.unlock(); }
FileIOResult File::write_unlocked(const void *data, size_t len) {
- if (orientation == Orientation::WIDE) {
+ switch (orientation) {
+ case Orientation::WIDE:
err = true;
return {0, EINVAL};
- }
- if (orientation == Orientation::UNORIENTED)
+ case Orientation::UNORIENTED:
orientation = Orientation::BYTE;
+ break;
+ case Orientation::BYTE:
+ break;
+ }
+ return write_unlocked_impl(data, len);
+}
+FileIOResult File::write_unlocked_impl(const void *data, size_t len) {
if (!write_allowed()) {
err = true;
return {0, EBADF};
@@ -75,9 +82,11 @@ FileIOResult File::write_unlocked(const void *data, size_t len) {
write_unlocked_nbf(static_cast<const uint8_t *>(data), len);
flush_unlocked();
return ret_val;
- } else if (bufmode == _IOFBF) { // fully buffered
+ }
+ if (bufmode == _IOFBF) { // fully buffered
return write_unlocked_fbf(static_cast<const uint8_t *>(data), len);
- } else /*if (bufmode == _IOLBF) */ { // line buffered
+ }
+ /*if (bufmode == _IOLBF) */ { // line buffered
return write_unlocked_lbf(static_cast<const uint8_t *>(data), len);
}
}
@@ -224,13 +233,20 @@ FileIOResult File::write_unlocked_lbf(const uint8_t *data, size_t len) {
}
FileIOResult File::read_unlocked(void *data, size_t len) {
- if (orientation == Orientation::WIDE) {
+ switch (orientation) {
+ case Orientation::WIDE:
err = true;
return {0, EINVAL};
- }
- if (orientation == Orientation::UNORIENTED)
+ case Orientation::UNORIENTED:
orientation = Orientation::BYTE;
+ break;
+ case Orientation::BYTE:
+ break;
+ }
+ return read_unlocked_impl(data, len);
+}
+FileIOResult File::read_unlocked_impl(void *data, size_t len) {
if (!read_allowed()) {
err = true;
return {0, EBADF};
@@ -240,9 +256,11 @@ FileIOResult File::read_unlocked(void *data, size_t len) {
if (bufmode == _IONBF) { // unbuffered.
return read_unlocked_nbf(static_cast<uint8_t *>(data), len);
- } else if (bufmode == _IOFBF) { // fully buffered
+ }
+ if (bufmode == _IOFBF) { // fully buffered
return read_unlocked_fbf(static_cast<uint8_t *>(data), len);
- } else /*if (bufmode == _IOLBF) */ { // line buffered
+ }
+ /*if (bufmode == _IOLBF) */ { // line buffered
// There is no line buffered mode for read. Use fully buffered instead.
return read_unlocked_fbf(static_cast<uint8_t *>(data), len);
}
@@ -533,66 +551,109 @@ File::ModeFlags File::mode_flags(const char *mode) {
return flags;
}
-FileIOResult File::write_wide_character_unlocked(wchar_t wc) {
- if (orientation == Orientation::UNORIENTED)
- orientation = Orientation::WIDE;
- if (orientation != Orientation::WIDE) {
+FileIOResult File::write_unlocked(const wchar_t *ws, size_t len) {
+ switch (orientation) {
+ case Orientation::BYTE:
err = true;
return {0, EINVAL};
+ case Orientation::UNORIENTED:
+ orientation = Orientation::WIDE;
+ break;
+ case Orientation::WIDE:
+ break;
}
- if (!write_allowed()) {
- err = true;
- return {0, EBADF};
+ size_t written = 0;
+ for (size_t i = 0; i < len; ++i) {
+ char buf[4];
+ auto result = internal::wcrtomb(buf, ws[i], &mbstate);
+ if (!result.has_value()) {
+ err = true;
+ return {written, result.error()};
+ }
+ size_t n = result.value();
+ auto write_res = write_unlocked_impl(buf, n);
+ if (write_res.has_error()) {
+ err = true;
+ return {written, write_res.error};
+ }
+ if (write_res.value < n) {
+ // Partial write of bytes.
+ return {written, 0};
+ }
+ ++written;
}
+ return {written, 0};
+}
- prev_op = FileOp::WRITE;
+FileIOResult File::write_wide_character_unlocked(wchar_t wc) {
+ switch (orientation) {
+ case Orientation::BYTE:
+ err = true;
+ return {0, EINVAL};
+ case Orientation::UNORIENTED:
+ orientation = Orientation::WIDE;
+ break;
+ case Orientation::WIDE:
+ break;
+ }
char buf[4];
- auto result = internal::wcrtomb(buf, wc, &shift_state);
+ auto result = internal::wcrtomb(buf, wc, &mbstate);
if (!result.has_value()) {
err = true;
return {0, result.error()};
}
size_t n = result.value();
- if (bufmode == _IONBF) {
- size_t ret_val =
- write_unlocked_nbf(reinterpret_cast<const uint8_t *>(buf), n);
- flush_unlocked();
- return ret_val;
- } else if (bufmode == _IOFBF) {
- return write_unlocked_fbf(reinterpret_cast<const uint8_t *>(buf), n);
- } else {
- return write_unlocked_lbf(reinterpret_cast<const uint8_t *>(buf), n);
- }
+ return write_unlocked_impl(buf, n);
}
-ErrorOr<wchar_t> File::read_wide_character_unlocked() {
- if (orientation == Orientation::UNORIENTED)
- orientation = Orientation::WIDE;
- if (orientation != Orientation::WIDE) {
+FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
+ switch (orientation) {
+ case Orientation::BYTE:
err = true;
- return Error(EINVAL);
+ return {0, EINVAL};
+ case Orientation::UNORIENTED:
+ orientation = Orientation::WIDE;
+ break;
+ case Orientation::WIDE:
+ break;
}
- if (!read_allowed()) {
- err = true;
- return Error(EBADF);
+ size_t read_count = 0;
+ for (size_t i = 0; i < len; ++i) {
+ auto res = read_wide_character_unlocked();
+ if (!res.has_value()) {
+ if (res.error() == 0) { // EOF
+ break;
+ }
+ err = true;
+ return {read_count, res.error()};
+ }
+ ws[i] = res.value();
+ ++read_count;
}
+ return {read_count, 0};
+}
- prev_op = FileOp::READ;
+ErrorOr<wchar_t> File::read_wide_character_unlocked() {
+ switch (orientation) {
+ case Orientation::BYTE:
+ err = true;
+ return Error(EINVAL);
+ case Orientation::UNORIENTED:
+ orientation = Orientation::WIDE;
+ break;
+ case Orientation::WIDE:
+ break;
+ }
wchar_t wc;
bool first_byte = true;
while (true) {
uint8_t byte;
- FileIOResult read_result{0};
- if (bufmode == _IONBF) {
- read_result = read_unlocked_nbf(&byte, 1);
- } else {
- read_result = read_unlocked_fbf(&byte, 1);
- }
+ FileIOResult read_result = read_unlocked_impl(&byte, 1);
if (read_result.has_error()) {
err = true;
return Error(read_result.error);
@@ -600,13 +661,12 @@ ErrorOr<wchar_t> File::read_wide_character_unlocked() {
if (read_result.value == 0) { // EOF
if (first_byte) {
return Error(0); // EOF
- } else {
- err = true;
- return Error(EILSEQ); // Incomplete character at EOF
}
+ err = true;
+ return Error(EILSEQ); // Incomplete character at EOF
}
char c = static_cast<char>(byte);
- auto res = internal::mbrtowc(&wc, &c, 1, &shift_state);
+ auto res = internal::mbrtowc(&wc, &c, 1, &mbstate);
if (!res.has_value()) {
err = true;
return Error(res.error());
@@ -630,7 +690,7 @@ wint_t File::ungetwc_unlocked(wchar_t wc) {
}
char buf[4];
- auto result = internal::wcrtomb(buf, wc, &shift_state);
+ auto result = internal::wcrtomb(buf, wc, &mbstate);
if (!result.has_value()) {
err = true;
return WEOF;
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index c8bb4ca3ca90e..de6697a441574 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -137,7 +137,7 @@ class File {
bool err;
Orientation orientation;
- internal::mbstate shift_state;
+ internal::mbstate mbstate;
// This is a convenience RAII class to lock and unlock file objects.
class FileLock {
@@ -181,7 +181,7 @@ class File {
ungetc_buf(0), buf(buffer), bufsize(buffer_size), bufmode(buffer_mode),
own_buf(owned), mode(modeflags), pos(0), prev_op(FileOp::NONE),
read_limit(0), eof(false), err(false),
- orientation(Orientation::UNORIENTED), shift_state(), prev(nullptr),
+ orientation(Orientation::UNORIENTED), mbstate(), prev(nullptr),
next(nullptr), {
adjust_buf();
}
@@ -225,6 +225,20 @@ class File {
return ungetc_unlocked(c);
}
+ FileIOResult write_unlocked(const wchar_t *ws, size_t len);
+
+ FileIOResult write(const wchar_t *ws, size_t len) {
+ FileLock l(this);
+ return write_unlocked(ws, len);
+ }
+
+ FileIOResult read_unlocked(wchar_t *ws, size_t len);
+
+ FileIOResult read(wchar_t *ws, size_t len) {
+ FileLock l(this);
+ return read_unlocked(ws, len);
+ }
+
FileIOResult write_wide_character_unlocked(wchar_t wc);
FileIOResult write_wide_character(wchar_t wc) {
@@ -326,6 +340,9 @@ class File {
static ModeFlags mode_flags(const char *mode);
private:
+ FileIOResult write_unlocked_impl(const void *data, size_t len);
+ FileIOResult read_unlocked_impl(void *data, size_t len);
+
FileIOResult write_unlocked_lbf(const uint8_t *data, size_t len);
FileIOResult write_unlocked_fbf(const uint8_t *data, size_t len);
FileIOResult write_unlocked_nbf(const uint8_t *data, size_t len);
diff --git a/libc/test/src/__support/File/CMakeLists.txt b/libc/test/src/__support/File/CMakeLists.txt
index a11f52978f35f..76f6d7145efc9 100644
--- a/libc/test/src/__support/File/CMakeLists.txt
+++ b/libc/test/src/__support/File/CMakeLists.txt
@@ -19,6 +19,7 @@ add_libc_test(
libc.src.errno.errno
libc.src.__support.CPP.new
libc.src.__support.File.file
+ libc.src.__support.error_or
)
add_libc_test(
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index fbfa9ed411ab2..9b578d4cdb5d8 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -600,3 +600,34 @@ TEST(LlvmLibcFileTest, Ungetwc) {
ASSERT_EQ(f->close(), 0);
}
+
+TEST(LlvmLibcFileTest, WideStringIO) {
+ constexpr size_t FILE_BUFFER_SIZE = 100;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+ ASSERT_FALSE(f == nullptr);
+
+ const wchar_t *ws = L"Hello, World!";
+ size_t len = 13;
+
+ auto write_res = f->write(ws, len);
+ ASSERT_FALSE(write_res.has_error());
+ EXPECT_EQ(write_res.value, len);
+
+ ASSERT_EQ(f->flush(), 0); // Ensure everything is written to StringFile
+
+ ASSERT_EQ(f->seek(0, SEEK_SET).value(), 0);
+
+ wchar_t read_buf[20];
+ auto read_res = f->read(read_buf, len);
+ ASSERT_FALSE(read_res.has_error());
+ EXPECT_EQ(read_res.value, len);
+
+ for (size_t i = 0; i < len; ++i) {
+ EXPECT_EQ(static_cast<unsigned int>(read_buf[i]),
+ static_cast<unsigned int>(ws[i]));
+ }
+
+ ASSERT_EQ(f->close(), 0);
+}
>From c2593781c4def04be16affeb870e78a0e65fad8b Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Fri, 27 Mar 2026 22:56:50 +0000
Subject: [PATCH 3/9] add orientation handling
---
libc/src/__support/File/CMakeLists.txt | 1 +
libc/src/__support/File/file.cpp | 22 +++++++++---------
libc/src/__support/File/file.h | 18 +++++++++++++++
libc/test/src/__support/File/file_test.cpp | 26 ++++++++++++++++++++++
4 files changed, 57 insertions(+), 10 deletions(-)
diff --git a/libc/src/__support/File/CMakeLists.txt b/libc/src/__support/File/CMakeLists.txt
index bd5ea5257a0aa..da8ae6716020b 100644
--- a/libc/src/__support/File/CMakeLists.txt
+++ b/libc/src/__support/File/CMakeLists.txt
@@ -18,6 +18,7 @@ add_object_library(
libc.hdr.types.off_t
libc.hdr.types.wchar_t
libc.hdr.types.wint_t
+ libc.hdr.wchar_macros
libc.src.__support.CPP.new
libc.src.__support.CPP.span
libc.src.__support.threads.mutex
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 8051b835cb8f5..ef155174869e9 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -86,9 +86,8 @@ FileIOResult File::write_unlocked_impl(const void *data, size_t len) {
if (bufmode == _IOFBF) { // fully buffered
return write_unlocked_fbf(static_cast<const uint8_t *>(data), len);
}
- /*if (bufmode == _IOLBF) */ { // line buffered
- return write_unlocked_lbf(static_cast<const uint8_t *>(data), len);
- }
+ return write_unlocked_lbf(static_cast<const uint8_t *>(data),
+ len); // line buffered
}
FileIOResult File::write_unlocked_nbf(const uint8_t *data, size_t len) {
@@ -280,9 +279,7 @@ size_t File::copy_data_from_buf(uint8_t *data, size_t len) {
}
// Copy all of the available data.
- // TODO: Replace the for loop with a call to internal memcpy.
- for (size_t i = 0; i < available_data; ++i)
- dataref[i] = bufref[i + pos];
+ inline_memcpy(dataref.data(), bufref.data() + pos, available_data);
read_limit = pos = 0; // Reset the pointers.
return available_data;
@@ -409,14 +406,19 @@ ErrorOr<int> File::seek(off_t offset, int whence) {
// function. Note that read_limit >= pos is always true.
offset -= (read_limit - pos);
}
+ auto result = platform_seek(this, offset, whence);
+ if (!result.has_value())
+ return Error(result.error());
+
pos = read_limit = 0;
prev_op = FileOp::SEEK;
- // Reset the eof flag as a seek might move the file positon to some place
+ // Reset the eof flag as a seek might move the file position to some place
// readable.
eof = false;
- auto result = platform_seek(this, offset, whence);
- if (!result.has_value())
- return Error(result.error());
+ if (orientation == Orientation::WIDE ||
+ orientation == Orientation::UNORIENTED)
+ mbstate = internal::mbstate();
+
return 0;
}
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index de6697a441574..852a6ed499d17 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -335,6 +335,24 @@ class File {
return iseof_unlocked();
}
+ Orientation get_orientation_unlocked() const { return orientation; }
+
+ Orientation get_orientation() {
+ FileLock l(this);
+ return get_orientation_unlocked();
+ }
+
+ Orientation try_set_orientation_unlocked(Orientation o) {
+ if (orientation == Orientation::UNORIENTED)
+ orientation = o;
+ return orientation;
+ }
+
+ Orientation try_set_orientation(Orientation o) {
+ FileLock l(this);
+ return try_set_orientation_unlocked(o);
+ }
+
// Returns an bit map of flags corresponding to enumerations of
// OpenMode, ContentType and CreateType.
static ModeFlags mode_flags(const char *mode);
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index 9b578d4cdb5d8..1b17a4c1cce0c 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -631,3 +631,29 @@ TEST(LlvmLibcFileTest, WideStringIO) {
ASSERT_EQ(f->close(), 0);
}
+
+TEST(LlvmLibcFileTest, TrySetOrientation) {
+ constexpr size_t FILE_BUFFER_SIZE = 100;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "r+");
+ ASSERT_FALSE(f == nullptr);
+
+ EXPECT_EQ(static_cast<unsigned int>(f->get_orientation()),
+ static_cast<unsigned int>(File::Orientation::UNORIENTED));
+
+ EXPECT_EQ(static_cast<unsigned int>(
+ f->try_set_orientation(File::Orientation::WIDE)),
+ static_cast<unsigned int>(File::Orientation::WIDE));
+ EXPECT_EQ(static_cast<unsigned int>(f->get_orientation()),
+ static_cast<unsigned int>(File::Orientation::WIDE));
+
+ EXPECT_EQ(
+ static_cast<unsigned int>(
+ f->try_set_orientation(File::Orientation::BYTE)),
+ static_cast<unsigned int>(File::Orientation::WIDE)); // Cannot change
+ EXPECT_EQ(static_cast<unsigned int>(f->get_orientation()),
+ static_cast<unsigned int>(File::Orientation::WIDE));
+
+ ASSERT_EQ(f->close(), 0);
+}
>From 922e8bd63b64146738b713fac47850ff03d7707a Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Mon, 30 Mar 2026 22:34:02 +0000
Subject: [PATCH 4/9] remove characterwise functions
---
libc/src/__support/File/file.cpp | 130 +++++++--------------
libc/src/__support/File/file.h | 14 ---
libc/test/src/__support/File/file_test.cpp | 50 ++------
3 files changed, 53 insertions(+), 141 deletions(-)
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index ef155174869e9..06183ee22dfe6 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -17,7 +17,7 @@
#include "src/__support/alloc-checker.h"
#include "src/__support/libc_errno.h" // For error macros
#include "src/__support/macros/config.h"
-#include "src/__support/wchar/mbrtowc.h"
+#include "src/__support/wchar/character_converter.h"
#include "src/__support/wchar/wcrtomb.h"
#include "src/string/memory_utils/inline_memcpy.h"
@@ -567,50 +567,33 @@ FileIOResult File::write_unlocked(const wchar_t *ws, size_t len) {
size_t written = 0;
for (size_t i = 0; i < len; ++i) {
- char buf[4];
- auto result = internal::wcrtomb(buf, ws[i], &mbstate);
- if (!result.has_value()) {
+ internal::CharacterConverter cr(&mbstate);
+ int push_err = cr.push(static_cast<char32_t>(ws[i]));
+ if (push_err != 0) {
err = true;
- return {written, result.error()};
+ return {written, push_err};
}
- size_t n = result.value();
- auto write_res = write_unlocked_impl(buf, n);
- if (write_res.has_error()) {
- err = true;
- return {written, write_res.error};
- }
- if (write_res.value < n) {
- // Partial write of bytes.
- return {written, 0};
+ while (!cr.isEmpty()) {
+ auto pop_res = cr.pop<char8_t>();
+ if (!pop_res.has_value()) {
+ err = true;
+ return {written, pop_res.error()};
+ }
+ char8_t byte = pop_res.value();
+ auto write_res = write_unlocked_impl(&byte, 1);
+ if (write_res.has_error()) {
+ err = true;
+ return {written, write_res.error};
+ }
+ if (write_res.value < 1) {
+ return {written, 0};
+ }
}
++written;
}
return {written, 0};
}
-FileIOResult File::write_wide_character_unlocked(wchar_t wc) {
- switch (orientation) {
- case Orientation::BYTE:
- err = true;
- return {0, EINVAL};
- case Orientation::UNORIENTED:
- orientation = Orientation::WIDE;
- break;
- case Orientation::WIDE:
- break;
- }
-
- char buf[4];
- auto result = internal::wcrtomb(buf, wc, &mbstate);
- if (!result.has_value()) {
- err = true;
- return {0, result.error()};
- }
-
- size_t n = result.value();
- return write_unlocked_impl(buf, n);
-}
-
FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
switch (orientation) {
case Orientation::BYTE:
@@ -625,62 +608,35 @@ FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
size_t read_count = 0;
for (size_t i = 0; i < len; ++i) {
- auto res = read_wide_character_unlocked();
- if (!res.has_value()) {
- if (res.error() == 0) { // EOF
- break;
+ internal::CharacterConverter cr(&mbstate);
+ while (!cr.isFull()) {
+ uint8_t byte;
+ auto read_res = read_unlocked_impl(&byte, 1);
+ if (read_res.has_error()) {
+ err = true;
+ return {read_count, read_res.error};
}
- err = true;
- return {read_count, res.error()};
- }
- ws[i] = res.value();
- ++read_count;
- }
- return {read_count, 0};
-}
-
-ErrorOr<wchar_t> File::read_wide_character_unlocked() {
- switch (orientation) {
- case Orientation::BYTE:
- err = true;
- return Error(EINVAL);
- case Orientation::UNORIENTED:
- orientation = Orientation::WIDE;
- break;
- case Orientation::WIDE:
- break;
- }
-
- wchar_t wc;
- bool first_byte = true;
- while (true) {
- uint8_t byte;
- FileIOResult read_result = read_unlocked_impl(&byte, 1);
- if (read_result.has_error()) {
- err = true;
- return Error(read_result.error);
- }
- if (read_result.value == 0) { // EOF
- if (first_byte) {
- return Error(0); // EOF
+ if (read_res.value == 0) { // EOF
+ if (cr.isEmpty())
+ return {read_count, 0};
+ err = true;
+ return {read_count, EILSEQ}; // Incomplete character at EOF
+ }
+ int push_err = cr.push(static_cast<char8_t>(byte));
+ if (push_err != 0) {
+ err = true;
+ return {read_count, push_err};
}
- err = true;
- return Error(EILSEQ); // Incomplete character at EOF
}
- char c = static_cast<char>(byte);
- auto res = internal::mbrtowc(&wc, &c, 1, &mbstate);
- if (!res.has_value()) {
+ auto pop_res = cr.pop<char32_t>();
+ if (!pop_res.has_value()) {
err = true;
- return Error(res.error());
- }
- if (res.value() == 0) { // null terminator
- return L'\0';
+ return {read_count, pop_res.error()};
}
- if (res.value() != static_cast<size_t>(-2)) { // Complete character
- return wc;
- }
- first_byte = false;
+ ws[i] = static_cast<wchar_t>(pop_res.value());
+ ++read_count;
}
+ return {read_count, 0};
}
wint_t File::ungetwc_unlocked(wchar_t wc) {
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index 852a6ed499d17..f89e78cee4500 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -239,20 +239,6 @@ class File {
return read_unlocked(ws, len);
}
- FileIOResult write_wide_character_unlocked(wchar_t wc);
-
- FileIOResult write_wide_character(wchar_t wc) {
- FileLock l(this);
- return write_wide_character_unlocked(wc);
- }
-
- ErrorOr<wchar_t> read_wide_character_unlocked();
-
- ErrorOr<wchar_t> read_wide_character() {
- FileLock l(this);
- return read_wide_character_unlocked();
- }
-
wint_t ungetwc_unlocked(wchar_t wc);
wint_t ungetwc(wchar_t wc) {
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index 1b17a4c1cce0c..aa9844bf41094 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -513,44 +513,13 @@ TEST(LlvmLibcFileTest, WriteSplit) {
ASSERT_EQ(f->close(), 0);
}
-TEST(LlvmLibcFileTest, WideCharIO) {
- constexpr size_t FILE_BUFFER_SIZE = 512;
- char file_buffer[FILE_BUFFER_SIZE];
- StringFile *f =
- new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
-
- wchar_t wc = L'A';
- auto write_res = f->write_wide_character(wc);
- ASSERT_EQ(write_res.value, size_t(1));
-
- wchar_t wc2 = L'€';
- write_res = f->write_wide_character(wc2);
- ASSERT_EQ(write_res.value, size_t(3));
-
- ASSERT_EQ(f->flush(), 0);
-
- ASSERT_EQ(f->seek(0, SEEK_SET).value(), 0);
-
- auto read_res = f->read_wide_character();
- ASSERT_TRUE(read_res.has_value());
- EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
- static_cast<unsigned int>(L'A'));
-
- read_res = f->read_wide_character();
- ASSERT_TRUE(read_res.has_value());
- EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
- static_cast<unsigned int>(L'€'));
-
- ASSERT_EQ(f->close(), 0);
-}
-
TEST(LlvmLibcFileTest, WideCharOrientation) {
constexpr size_t FILE_BUFFER_SIZE = 512;
char file_buffer[FILE_BUFFER_SIZE];
StringFile *f =
new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
- f->write_wide_character(L'A');
+ f->write(L"A", 1);
auto write_res = f->write("B", 1);
EXPECT_EQ(write_res.value, size_t(0));
@@ -567,7 +536,7 @@ TEST(LlvmLibcFileTest, ByteCharOrientation) {
f->write("A", 1);
- auto write_res = f->write_wide_character(L'B');
+ auto write_res = f->write(L"B", 1);
EXPECT_EQ(write_res.value, size_t(0));
EXPECT_TRUE(f->error());
@@ -580,22 +549,23 @@ TEST(LlvmLibcFileTest, Ungetwc) {
StringFile *f =
new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
- f->write_wide_character(L'A');
+ f->write(L"A", 1);
f->flush();
f->seek(0, SEEK_SET);
- auto read_res = f->read_wide_character();
- ASSERT_TRUE(read_res.has_value());
- EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+ wchar_t ws_out[2];
+ auto read_res = f->read(ws_out, 1);
+ ASSERT_EQ(read_res.value, size_t(1));
+ EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
static_cast<unsigned int>(L'A'));
auto unget_res = f->ungetwc(L'B');
EXPECT_EQ(static_cast<unsigned int>(unget_res),
static_cast<unsigned int>(L'B'));
- read_res = f->read_wide_character();
- ASSERT_TRUE(read_res.has_value());
- EXPECT_EQ(static_cast<unsigned int>(read_res.value()),
+ auto read_res2 = f->read(ws_out, 1);
+ ASSERT_EQ(read_res2.value, size_t(1));
+ EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
static_cast<unsigned int>(L'B'));
ASSERT_EQ(f->close(), 0);
>From 7d55c70d24754c303d5a1fb2e8f05f99ac8c5d70 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Mon, 30 Mar 2026 23:34:09 +0000
Subject: [PATCH 5/9] review and expand tests
---
libc/src/__support/File/file.cpp | 1 -
libc/test/src/__support/File/file_test.cpp | 85 ++++++++++++++++++++++
2 files changed, 85 insertions(+), 1 deletion(-)
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 06183ee22dfe6..4aaba1c909330 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -613,7 +613,6 @@ FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
uint8_t byte;
auto read_res = read_unlocked_impl(&byte, 1);
if (read_res.has_error()) {
- err = true;
return {read_count, read_res.error};
}
if (read_res.value == 0) { // EOF
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index aa9844bf41094..c82badad49d89 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -627,3 +627,88 @@ TEST(LlvmLibcFileTest, TrySetOrientation) {
ASSERT_EQ(f->close(), 0);
}
+
+TEST(LlvmLibcFileTest, UngetwcMultiByte) {
+ constexpr size_t FILE_BUFFER_SIZE = 512;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+
+ f->write(L"€", 1);
+ f->flush();
+ f->seek(0, SEEK_SET);
+
+ wchar_t ws_out[2];
+ auto read_res = f->read(ws_out, 1);
+ ASSERT_EQ(read_res.value, size_t(1));
+ EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
+ static_cast<unsigned int>(L'€'));
+
+ auto unget_res = f->ungetwc(L'¢');
+ EXPECT_EQ(static_cast<unsigned int>(unget_res),
+ static_cast<unsigned int>(L'¢'));
+
+ auto read_res2 = f->read(ws_out, 1);
+ ASSERT_EQ(read_res2.value, size_t(1));
+ EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
+ static_cast<unsigned int>(L'¢'));
+
+ ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, WideStringIO_Multibyte) {
+ constexpr size_t FILE_BUFFER_SIZE = 100;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+ ASSERT_FALSE(f == nullptr);
+
+ const wchar_t *ws = L"Hello € World!";
+ size_t len = 14;
+
+ auto write_res = f->write(ws, len);
+ ASSERT_FALSE(write_res.has_error());
+ EXPECT_EQ(write_res.value, len);
+
+ ASSERT_EQ(f->flush(), 0);
+
+ ASSERT_EQ(f->seek(0, SEEK_SET).value(), 0);
+
+ wchar_t read_buf[20];
+ auto read_res = f->read(read_buf, len);
+ ASSERT_FALSE(read_res.has_error());
+ EXPECT_EQ(read_res.value, len);
+
+ for (size_t i = 0; i < len; ++i) {
+ EXPECT_EQ(static_cast<unsigned int>(read_buf[i]),
+ static_cast<unsigned int>(ws[i]));
+ }
+
+ ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, SeekResetsMbstate) {
+ constexpr size_t FILE_BUFFER_SIZE = 100;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "r+");
+ ASSERT_FALSE(f == nullptr);
+
+ f->reset_and_fill("\xE2\x82", 2);
+
+ wchar_t ws_out[1];
+ auto read_res = f->read(ws_out, 1);
+ EXPECT_EQ(read_res.value, size_t(0));
+ EXPECT_TRUE(f->error());
+
+ f->reset_and_fill("A", 1);
+ f->seek(0, SEEK_SET);
+ f->clearerr();
+
+ auto read_res2 = f->read(ws_out, 1);
+ EXPECT_EQ(read_res2.value, size_t(1));
+ EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
+ static_cast<unsigned int>(L'A'));
+
+ ASSERT_EQ(f->close(), 0);
+}
>From d3b8a8847c28b2a8d2a9ca61b9d440307f5f25d7 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Tue, 7 Apr 2026 21:37:32 +0000
Subject: [PATCH 6/9] fix deps
---
libc/src/__support/File/CMakeLists.txt | 4 ++--
libc/src/__support/File/file.cpp | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/libc/src/__support/File/CMakeLists.txt b/libc/src/__support/File/CMakeLists.txt
index da8ae6716020b..68bbfe31ace31 100644
--- a/libc/src/__support/File/CMakeLists.txt
+++ b/libc/src/__support/File/CMakeLists.txt
@@ -25,11 +25,11 @@ add_object_library(
libc.src.__support.error_or
libc.src.__support.macros.config
libc.src.__support.macros.properties.architectures
- libc.src.__support.wchar.mbrtowc
libc.src.__support.wchar.mbstate
libc.src.__support.wchar.wcrtomb
+ libc.src.__support.wchar.character_converter
libc.src.string.memory_utils.inline_memcpy
- libc.src.__support.libc_errno
+ libc.hdr.errno_macros
)
add_object_library(
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 4aaba1c909330..f28c7fc53116d 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -8,6 +8,7 @@
#include "file.h"
+#include "hdr/errno_macros.h"
#include "hdr/func/realloc.h"
#include "hdr/stdio_macros.h"
#include "hdr/types/off_t.h"
@@ -15,7 +16,6 @@
#include "src/__support/CPP/new.h"
#include "src/__support/CPP/span.h"
#include "src/__support/alloc-checker.h"
-#include "src/__support/libc_errno.h" // For error macros
#include "src/__support/macros/config.h"
#include "src/__support/wchar/character_converter.h"
#include "src/__support/wchar/wcrtomb.h"
>From 873f7ed1edfc7e32bb3a74beadf4c56247f0a43e Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Tue, 7 Apr 2026 21:49:25 +0000
Subject: [PATCH 7/9] fix ungetwc
---
libc/src/__support/File/file.cpp | 4 ++++
libc/src/__support/File/file.h | 12 ++++++-----
libc/test/src/__support/File/file_test.cpp | 25 ++++++++++++++++++++++
3 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index f28c7fc53116d..b88ef9f575632 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -655,6 +655,10 @@ wint_t File::ungetwc_unlocked(wchar_t wc) {
size_t n = result.value();
if (read_limit == 0) {
+ if (n > bufsize) {
+ err = true;
+ return WEOF;
+ }
for (size_t i = 0; i < n; ++i) {
this->buf[i] = static_cast<uint8_t>(buf[i]);
}
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index f89e78cee4500..6f7ba3f48c76c 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -107,8 +107,9 @@ class File {
// For files which are readable, we should be able to support one ungetc
// operation even if |buf| is nullptr. So, in the constructor of File, we
- // set |buf| to point to this buffer character.
- uint8_t ungetc_buf;
+ // set |buf| to point to this buffer character. It needs to be at least 4
+ // bytes so we can store a widechar.
+ uint8_t ungetc_buf[4];
uint8_t *buf; // Pointer to the stream buffer for buffered streams
size_t bufsize; // Size of the buffer pointed to by |buf|.
@@ -178,7 +179,7 @@ class File {
: platform_write(wf), platform_read(rf), platform_seek(sf),
platform_close(cf), mutex(/*timed=*/false, /*recursive=*/false,
/*robust=*/false, /*pshared=*/false),
- ungetc_buf(0), buf(buffer), bufsize(buffer_size), bufmode(buffer_mode),
+ ungetc_buf{}, buf(buffer), bufsize(buffer_size), bufmode(buffer_mode),
own_buf(owned), mode(modeflags), pos(0), prev_op(FileOp::NONE),
read_limit(0), eof(false), err(false),
orientation(Orientation::UNORIENTED), mbstate(), prev(nullptr),
@@ -371,8 +372,9 @@ class File {
// 3. If user wants _IONBF, then the buffer is ignored for writing.
// So, all of the above cases, having a single ungetc buffer does not
// affect the behavior experienced by the user.
- buf = &ungetc_buf;
- bufsize = 1;
+ buf = ungetc_buf;
+ bufsize = sizeof(ungetc_buf);
+ own_buf = false; // We shouldn't call free on |buf| when closing the file.
}
}
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index c82badad49d89..694621218c452 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -656,6 +656,31 @@ TEST(LlvmLibcFileTest, UngetwcMultiByte) {
ASSERT_EQ(f->close(), 0);
}
+TEST(LlvmLibcFileTest, UngetwcUnbufferedMultiByte) {
+ StringFile *f = new_string_file(nullptr, 0, _IONBF, true, "w+");
+ ASSERT_FALSE(f == nullptr);
+
+ f->write(L"€", 1);
+ f->seek(0, SEEK_SET);
+
+ wchar_t ws_out[2];
+ auto read_res = f->read(ws_out, 1);
+ ASSERT_EQ(read_res.value, size_t(1));
+ EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
+ static_cast<unsigned int>(L'€'));
+
+ auto unget_res = f->ungetwc(L'¢');
+ EXPECT_EQ(static_cast<unsigned int>(unget_res),
+ static_cast<unsigned int>(L'¢'));
+
+ auto read_res2 = f->read(ws_out, 1);
+ ASSERT_EQ(read_res2.value, size_t(1));
+ EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
+ static_cast<unsigned int>(L'¢'));
+
+ ASSERT_EQ(f->close(), 0);
+}
+
TEST(LlvmLibcFileTest, WideStringIO_Multibyte) {
constexpr size_t FILE_BUFFER_SIZE = 100;
char file_buffer[FILE_BUFFER_SIZE];
>From 90c2fc93098249c452fc1b25d2fffeb324f6925c Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Wed, 8 Apr 2026 17:07:40 +0000
Subject: [PATCH 8/9] ungetwc fix and cleanup
---
libc/src/__support/File/file.cpp | 47 +++++++--------
libc/src/__support/File/file.h | 4 +-
libc/test/src/__support/File/file_test.cpp | 68 ++++++++++++++++++++++
3 files changed, 92 insertions(+), 27 deletions(-)
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index b88ef9f575632..be3a4521ae249 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -581,13 +581,10 @@ FileIOResult File::write_unlocked(const wchar_t *ws, size_t len) {
}
char8_t byte = pop_res.value();
auto write_res = write_unlocked_impl(&byte, 1);
- if (write_res.has_error()) {
- err = true;
+ if (write_res.has_error())
return {written, write_res.error};
- }
- if (write_res.value < 1) {
+ if (write_res.value < 1)
return {written, 0};
- }
}
++written;
}
@@ -612,9 +609,8 @@ FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
while (!cr.isFull()) {
uint8_t byte;
auto read_res = read_unlocked_impl(&byte, 1);
- if (read_res.has_error()) {
+ if (read_res.has_error())
return {read_count, read_res.error};
- }
if (read_res.value == 0) { // EOF
if (cr.isEmpty())
return {read_count, 0};
@@ -638,41 +634,42 @@ FileIOResult File::read_unlocked(wchar_t *ws, size_t len) {
return {read_count, 0};
}
-wint_t File::ungetwc_unlocked(wchar_t wc) {
- if (orientation == Orientation::UNORIENTED)
- orientation = Orientation::WIDE;
- if (orientation != Orientation::WIDE) {
+wint_t File::ungetwc_unlocked(wint_t wc) {
+ if (wc == WEOF)
+ return WEOF;
+ switch (orientation) {
+ case Orientation::BYTE:
err = true;
return WEOF;
+ case Orientation::UNORIENTED:
+ orientation = Orientation::WIDE;
+ break;
+ case Orientation::WIDE:
+ break;
}
char buf[4];
- auto result = internal::wcrtomb(buf, wc, &mbstate);
- if (!result.has_value()) {
- err = true;
+ auto result = internal::wcrtomb(buf, static_cast<wchar_t>(wc), &mbstate);
+ if (!result.has_value())
return WEOF;
- }
+
size_t n = result.value();
if (read_limit == 0) {
- if (n > bufsize) {
- err = true;
+ if (n > bufsize)
return WEOF;
- }
- for (size_t i = 0; i < n; ++i) {
+
+ for (size_t i = 0; i < n; ++i)
this->buf[i] = static_cast<uint8_t>(buf[i]);
- }
+
read_limit = n;
pos = 0;
} else {
- if (pos < n) {
- err = true;
+ if (pos < n)
return WEOF;
- }
pos -= n;
- for (size_t i = 0; i < n; ++i) {
+ for (size_t i = 0; i < n; ++i)
this->buf[pos + i] = static_cast<uint8_t>(buf[i]);
- }
}
eof = false;
err = false;
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index 6f7ba3f48c76c..9896d5198dd3e 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -240,9 +240,9 @@ class File {
return read_unlocked(ws, len);
}
- wint_t ungetwc_unlocked(wchar_t wc);
+ wint_t ungetwc_unlocked(wint_t wc);
- wint_t ungetwc(wchar_t wc) {
+ wint_t ungetwc(wint_t wc) {
FileLock lock(this);
return ungetwc_unlocked(wc);
}
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index 694621218c452..53559a11df087 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "hdr/types/size_t.h"
+#include "hdr/wchar_macros.h"
#include "src/__support/CPP/new.h"
#include "src/__support/File/file.h"
#include "src/__support/alloc-checker.h"
@@ -737,3 +738,70 @@ TEST(LlvmLibcFileTest, SeekResetsMbstate) {
ASSERT_EQ(f->close(), 0);
}
+
+TEST(LlvmLibcFileTest, ReadWideNotStopAtNewline) {
+ constexpr size_t FILE_BUFFER_SIZE = 100;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+ ASSERT_FALSE(f == nullptr);
+
+ const wchar_t *ws = L"Hello\nWorld!";
+ size_t len = 12;
+
+ auto write_res = f->write(ws, len);
+ ASSERT_FALSE(write_res.has_error());
+ EXPECT_EQ(write_res.value, len);
+
+ ASSERT_EQ(f->flush(), 0);
+ ASSERT_EQ(f->seek(0, SEEK_SET).value(), 0);
+
+ wchar_t read_buf[20];
+ auto read_res = f->read(read_buf, len);
+ ASSERT_FALSE(read_res.has_error());
+ // Should NOT stop at newline, so should read all 12 characters.
+ EXPECT_EQ(read_res.value, len);
+ EXPECT_EQ(static_cast<unsigned int>(read_buf[5]),
+ static_cast<unsigned int>(L'\n'));
+ EXPECT_EQ(static_cast<unsigned int>(read_buf[11]),
+ static_cast<unsigned int>(L'!'));
+
+ ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, UngetwcWEOF) {
+ constexpr size_t FILE_BUFFER_SIZE = 100;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "r+");
+ ASSERT_FALSE(f == nullptr);
+
+ EXPECT_EQ(static_cast<unsigned int>(f->get_orientation()),
+ static_cast<unsigned int>(File::Orientation::UNORIENTED));
+
+ auto unget_res = f->ungetwc(WEOF);
+ EXPECT_EQ(static_cast<unsigned int>(unget_res),
+ static_cast<unsigned int>(WEOF));
+
+ EXPECT_EQ(static_cast<unsigned int>(f->get_orientation()),
+ static_cast<unsigned int>(File::Orientation::UNORIENTED));
+
+ ASSERT_EQ(f->close(), 0);
+}
+
+TEST(LlvmLibcFileTest, UngetwcErrorIndicator) {
+ constexpr size_t FILE_BUFFER_SIZE = 100;
+ char file_buffer[FILE_BUFFER_SIZE];
+ StringFile *f =
+ new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
+ ASSERT_FALSE(f == nullptr);
+
+ f->write("A", 1);
+
+ auto unget_res = f->ungetwc(L'B');
+ EXPECT_EQ(static_cast<unsigned int>(unget_res),
+ static_cast<unsigned int>(WEOF));
+ EXPECT_FALSE(f->error());
+
+ ASSERT_EQ(f->close(), 0);
+}
>From b3f3fa37b986df0f6ab78859f747925878af7ce1 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Wed, 8 Apr 2026 17:31:12 +0000
Subject: [PATCH 9/9] cleanup after rebase, remove incorrect test.
---
libc/src/__support/File/file.h | 2 +-
libc/test/src/__support/File/file_test.cpp | 19 +------------------
2 files changed, 2 insertions(+), 19 deletions(-)
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index 9896d5198dd3e..2c8ec26d5e7f0 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -183,7 +183,7 @@ class File {
own_buf(owned), mode(modeflags), pos(0), prev_op(FileOp::NONE),
read_limit(0), eof(false), err(false),
orientation(Orientation::UNORIENTED), mbstate(), prev(nullptr),
- next(nullptr), {
+ next(nullptr) {
adjust_buf();
}
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index 53559a11df087..4dbb75eb66d86 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -563,8 +563,8 @@ TEST(LlvmLibcFileTest, Ungetwc) {
auto unget_res = f->ungetwc(L'B');
EXPECT_EQ(static_cast<unsigned int>(unget_res),
static_cast<unsigned int>(L'B'));
-
auto read_res2 = f->read(ws_out, 1);
+
ASSERT_EQ(read_res2.value, size_t(1));
EXPECT_EQ(static_cast<unsigned int>(ws_out[0]),
static_cast<unsigned int>(L'B'));
@@ -788,20 +788,3 @@ TEST(LlvmLibcFileTest, UngetwcWEOF) {
ASSERT_EQ(f->close(), 0);
}
-
-TEST(LlvmLibcFileTest, UngetwcErrorIndicator) {
- constexpr size_t FILE_BUFFER_SIZE = 100;
- char file_buffer[FILE_BUFFER_SIZE];
- StringFile *f =
- new_string_file(file_buffer, FILE_BUFFER_SIZE, _IOFBF, false, "w+");
- ASSERT_FALSE(f == nullptr);
-
- f->write("A", 1);
-
- auto unget_res = f->ungetwc(L'B');
- EXPECT_EQ(static_cast<unsigned int>(unget_res),
- static_cast<unsigned int>(WEOF));
- EXPECT_FALSE(f->error());
-
- ASSERT_EQ(f->close(), 0);
-}
More information about the libc-commits
mailing list