[libc-commits] [libc] [libc] Added a helper to StringConverter to check if a conversion has been completed (reached null terminator) (PR #149364)

Uzair Nawaz via libc-commits libc-commits at lists.llvm.org
Thu Jul 17 10:31:55 PDT 2025


https://github.com/uzairnawaz created https://github.com/llvm/llvm-project/pull/149364

Helper method that checks if we have completely converted a source string. Useful for certain public conversion functions


>From 9fe88da74a4bf7916f7c815e11169aec57ecfba6 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 17 Jul 2025 17:28:48 +0000
Subject: [PATCH] added a helper to check if a string conversion has completed

---
 libc/src/__support/wchar/string_converter.h      | 11 +++++++++--
 .../__support/wchar/string_converter_test.cpp    | 16 ++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/libc/src/__support/wchar/string_converter.h b/libc/src/__support/wchar/string_converter.h
index 0635bc57bf3e2..6e52e2c82ba1d 100644
--- a/libc/src/__support/wchar/string_converter.h
+++ b/libc/src/__support/wchar/string_converter.h
@@ -26,6 +26,7 @@ template <typename T> class StringConverter {
   const T *src;
   size_t src_len;
   size_t src_idx;
+  bool completed = false;
 
   // # of pops we are allowed to perform (essentially size of the dest buffer)
   size_t num_to_write;
@@ -70,8 +71,10 @@ template <typename T> class StringConverter {
     }
 
     auto out = cr.pop_utf32();
-    if (out.has_value() && out.value() == L'\0')
+    if (out.has_value() && out.value() == L'\0') {
+      completed = true;
       src_len = src_idx;
+    }
 
     num_to_write--;
 
@@ -93,8 +96,10 @@ template <typename T> class StringConverter {
     }
 
     auto out = cr.pop_utf8();
-    if (out.has_value() && out.value() == '\0')
+    if (out.has_value() && out.value() == '\0') {
+      completed = true;
       src_len = src_idx;
+    }
 
     num_to_write--;
 
@@ -102,6 +107,8 @@ template <typename T> class StringConverter {
   }
 
   size_t getSourceIndex() { return src_idx; }
+
+  bool isConversionComplete() { return completed; }
 };
 
 } // namespace internal
diff --git a/libc/test/src/__support/wchar/string_converter_test.cpp b/libc/test/src/__support/wchar/string_converter_test.cpp
index 14d074156d033..548baaf4bf655 100644
--- a/libc/test/src/__support/wchar/string_converter_test.cpp
+++ b/libc/test/src/__support/wchar/string_converter_test.cpp
@@ -38,26 +38,31 @@ TEST(LlvmLibcStringConverterTest, UTF8To32) {
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x1f921);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   res = sc.popUTF32();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x2211);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 7);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   res = sc.popUTF32();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xff);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 9);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   res = sc.popUTF32();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x41);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 10);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   res = sc.popUTF32();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 11);
+  ASSERT_TRUE(sc.isConversionComplete());
 
   res = sc.popUTF32();
   ASSERT_FALSE(res.has_value());
@@ -79,60 +84,71 @@ TEST(LlvmLibcStringConverterTest, UTF32To8) {
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xF0);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   res = sc.popUTF8();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x9F);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   res = sc.popUTF8();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xA4);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   res = sc.popUTF8();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xA1);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   // end of clown emoji, sigma symbol begins
   res = sc.popUTF8();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xE2);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 2);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   res = sc.popUTF8();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x88);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 2);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   res = sc.popUTF8();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x91);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 2);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   // end of sigma symbol, y with diaeresis begins
   res = sc.popUTF8();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xC3);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 3);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   res = sc.popUTF8();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xBF);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 3);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   // end of y with diaeresis, letter A begins
   res = sc.popUTF8();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x41);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
+  ASSERT_FALSE(sc.isConversionComplete());
 
   // null byte
   res = sc.popUTF8();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 5);
+  ASSERT_TRUE(sc.isConversionComplete());
 
   res = sc.popUTF8();
   ASSERT_FALSE(res.has_value());



More information about the libc-commits mailing list