[libc-commits] [libc] [libc] Wchar Stringconverter (PR #146388)

Michael Jones via libc-commits libc-commits at lists.llvm.org
Mon Jun 30 11:01:28 PDT 2025


================
@@ -0,0 +1,87 @@
+//===-- Definition of a class for mbstate_t and conversion -----*-- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_STRING_CONVERTER_H
+#define LLVM_LIBC_SRC___SUPPORT_STRING_CONVERTER_H
+
+#include "hdr/types/char32_t.h"
+#include "hdr/types/char8_t.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/common.h"
+#include "src/__support/error_or.h"
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+template <typename T> class StringConverter {
+private:
+  CharacterConverter cr;
+  const T *src;
+  size_t src_len;
+  size_t src_idx;
+
+  int pushFullCharacter() {
+    if (!cr.isEmpty())
+      return 0;
+
+    int original_idx = src_idx;
+    while (!cr.isFull() && src_idx < src_len) {
+      int err = cr.push(src[src_idx++]);
+      if (err != 0) {
+        // point to the beginning of the invalid sequence
+        src_idx = original_idx;
----------------
michaelrj-google wrote:

here you reset the source index, but not the mbstate. This might result in the same char ending up in the mbstate twice if a string conversion ends partway through. Same issue below.

https://github.com/llvm/llvm-project/pull/146388


More information about the libc-commits mailing list