[libc-commits] [libc] [libc] Added a helper to StringConverter to check if a conversion has been completed (reached null terminator) (PR #149364)
Uzair Nawaz via libc-commits
libc-commits at lists.llvm.org
Thu Jul 17 10:31:55 PDT 2025
https://github.com/uzairnawaz created https://github.com/llvm/llvm-project/pull/149364
Helper method that checks if we have completely converted a source string. Useful for certain public conversion functions
>From 9fe88da74a4bf7916f7c815e11169aec57ecfba6 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 17 Jul 2025 17:28:48 +0000
Subject: [PATCH] added a helper to check if a string conversion has completed
---
libc/src/__support/wchar/string_converter.h | 11 +++++++++--
.../__support/wchar/string_converter_test.cpp | 16 ++++++++++++++++
2 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/libc/src/__support/wchar/string_converter.h b/libc/src/__support/wchar/string_converter.h
index 0635bc57bf3e2..6e52e2c82ba1d 100644
--- a/libc/src/__support/wchar/string_converter.h
+++ b/libc/src/__support/wchar/string_converter.h
@@ -26,6 +26,7 @@ template <typename T> class StringConverter {
const T *src;
size_t src_len;
size_t src_idx;
+ bool completed = false;
// # of pops we are allowed to perform (essentially size of the dest buffer)
size_t num_to_write;
@@ -70,8 +71,10 @@ template <typename T> class StringConverter {
}
auto out = cr.pop_utf32();
- if (out.has_value() && out.value() == L'\0')
+ if (out.has_value() && out.value() == L'\0') {
+ completed = true;
src_len = src_idx;
+ }
num_to_write--;
@@ -93,8 +96,10 @@ template <typename T> class StringConverter {
}
auto out = cr.pop_utf8();
- if (out.has_value() && out.value() == '\0')
+ if (out.has_value() && out.value() == '\0') {
+ completed = true;
src_len = src_idx;
+ }
num_to_write--;
@@ -102,6 +107,8 @@ template <typename T> class StringConverter {
}
size_t getSourceIndex() { return src_idx; }
+
+ bool isConversionComplete() { return completed; }
};
} // namespace internal
diff --git a/libc/test/src/__support/wchar/string_converter_test.cpp b/libc/test/src/__support/wchar/string_converter_test.cpp
index 14d074156d033..548baaf4bf655 100644
--- a/libc/test/src/__support/wchar/string_converter_test.cpp
+++ b/libc/test/src/__support/wchar/string_converter_test.cpp
@@ -38,26 +38,31 @@ TEST(LlvmLibcStringConverterTest, UTF8To32) {
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0x1f921);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
+ ASSERT_FALSE(sc.isConversionComplete());
res = sc.popUTF32();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0x2211);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 7);
+ ASSERT_FALSE(sc.isConversionComplete());
res = sc.popUTF32();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0xff);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 9);
+ ASSERT_FALSE(sc.isConversionComplete());
res = sc.popUTF32();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0x41);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 10);
+ ASSERT_FALSE(sc.isConversionComplete());
res = sc.popUTF32();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 11);
+ ASSERT_TRUE(sc.isConversionComplete());
res = sc.popUTF32();
ASSERT_FALSE(res.has_value());
@@ -79,60 +84,71 @@ TEST(LlvmLibcStringConverterTest, UTF32To8) {
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0xF0);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
+ ASSERT_FALSE(sc.isConversionComplete());
res = sc.popUTF8();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0x9F);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
+ ASSERT_FALSE(sc.isConversionComplete());
res = sc.popUTF8();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0xA4);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
+ ASSERT_FALSE(sc.isConversionComplete());
res = sc.popUTF8();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0xA1);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
+ ASSERT_FALSE(sc.isConversionComplete());
// end of clown emoji, sigma symbol begins
res = sc.popUTF8();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0xE2);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 2);
+ ASSERT_FALSE(sc.isConversionComplete());
res = sc.popUTF8();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0x88);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 2);
+ ASSERT_FALSE(sc.isConversionComplete());
res = sc.popUTF8();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0x91);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 2);
+ ASSERT_FALSE(sc.isConversionComplete());
// end of sigma symbol, y with diaeresis begins
res = sc.popUTF8();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0xC3);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 3);
+ ASSERT_FALSE(sc.isConversionComplete());
res = sc.popUTF8();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0xBF);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 3);
+ ASSERT_FALSE(sc.isConversionComplete());
// end of y with diaeresis, letter A begins
res = sc.popUTF8();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0x41);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
+ ASSERT_FALSE(sc.isConversionComplete());
// null byte
res = sc.popUTF8();
ASSERT_TRUE(res.has_value());
ASSERT_EQ(static_cast<int>(res.value()), 0);
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 5);
+ ASSERT_TRUE(sc.isConversionComplete());
res = sc.popUTF8();
ASSERT_FALSE(res.has_value());
More information about the libc-commits
mailing list