[Lldb-commits] [lldb] [lldb] Use Locale to convert between std::wstring and std::string (NFC) (PR #112582)

Jonas Devlieghere via lldb-commits lldb-commits at lists.llvm.org
Wed Oct 16 11:34:22 PDT 2024


https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/112582

>From de1a1c4cfb5a8c3fc47194275fcc8fc48b7cbbf9 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Wed, 16 Oct 2024 10:06:31 -0700
Subject: [PATCH 1/2] [lldb] Use Locale to convert between std::wstring and
 std::string (NFC)

The codecvt header has been deprecated in C++17. Use locale to convert
between std::string and std::wstring in Editline.
---
 lldb/include/lldb/Host/Editline.h    |  6 ----
 lldb/source/Host/common/Editline.cpp | 53 ++++++++++++++++++++++++----
 2 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/lldb/include/lldb/Host/Editline.h b/lldb/include/lldb/Host/Editline.h
index 9049b106f02a34..57e2c831e3499d 100644
--- a/lldb/include/lldb/Host/Editline.h
+++ b/lldb/include/lldb/Host/Editline.h
@@ -30,9 +30,6 @@
 
 #include "lldb/Host/Config.h"
 
-#if LLDB_EDITLINE_USE_WCHAR
-#include <codecvt>
-#endif
 #include <locale>
 #include <sstream>
 #include <vector>
@@ -366,9 +363,6 @@ class Editline {
   void SetEditLinePromptCallback(EditlinePromptCallbackType callbackFn);
   void SetGetCharacterFunction(EditlineGetCharCallbackType callbackFn);
 
-#if LLDB_EDITLINE_USE_WCHAR
-  std::wstring_convert<std::codecvt_utf8<wchar_t>> m_utf8conv;
-#endif
   ::EditLine *m_editline = nullptr;
   EditlineHistorySP m_history_sp;
   bool m_in_history = false;
diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp
index 561ec228cdb23f..99a5003e7a83db 100644
--- a/lldb/source/Host/common/Editline.cpp
+++ b/lldb/source/Host/common/Editline.cpp
@@ -76,6 +76,46 @@ using namespace lldb_private::line_editor;
 
 #endif // #if LLDB_EDITLINE_USE_WCHAR
 
+#if LLDB_EDITLINE_USE_WCHAR
+std::string ToBytes(const std::wstring &in) {
+  static std::locale locale("C.UTF-8");
+  static const auto &cvt =
+      std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(locale);
+
+  const size_t length = in.length();
+  std::string output(length + 1, 0x0);
+
+  std::mbstate_t mbs{};
+  const wchar_t *in_next;
+  char *out_next;
+
+  if (cvt.out(mbs, in.data(), in.data() + length + 1, in_next, output.data(),
+              output.data() + output.length() + 1,
+              out_next) == std::codecvt_base::ok)
+    return output;
+  return {};
+}
+
+std::wstring FromBytes(const std::string &in) {
+  static std::locale locale("C.UTF-8");
+  static const auto &cvt =
+      std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(locale);
+
+  const size_t length = in.length();
+  std::wstring output(length + 1, 0x0);
+
+  std::mbstate_t mbs{};
+  const char *in_next;
+  wchar_t *out_next;
+
+  if (cvt.in(mbs, in.data(), in.data() + length + 1, in_next, output.data(),
+             output.data() + output.length() + 1,
+             out_next) == std::codecvt_base::ok)
+    return output;
+  return {};
+}
+#endif
+
 bool IsOnlySpaces(const EditLineStringType &content) {
   for (wchar_t ch : content) {
     if (ch != EditLineCharType(' '))
@@ -444,7 +484,7 @@ StringList Editline::GetInputAsStringList(int line_count) {
     if (line_count == 0)
       break;
 #if LLDB_EDITLINE_USE_WCHAR
-    lines.AppendString(m_utf8conv.to_bytes(line));
+    lines.AppendString(ToBytes(line));
 #else
     lines.AppendString(line);
 #endif
@@ -636,7 +676,7 @@ unsigned char Editline::BreakLineCommand(int ch) {
     if (m_fix_indentation_callback) {
       StringList lines = GetInputAsStringList(m_current_line_index + 1);
 #if LLDB_EDITLINE_USE_WCHAR
-      lines.AppendString(m_utf8conv.to_bytes(new_line_fragment));
+      lines.AppendString(ToBytes(new_line_fragment));
 #else
       lines.AppendString(new_line_fragment);
 #endif
@@ -685,7 +725,7 @@ unsigned char Editline::EndOrAddLineCommand(int ch) {
       for (unsigned index = 0; index < lines.GetSize(); index++) {
 #if LLDB_EDITLINE_USE_WCHAR
         m_input_lines.insert(m_input_lines.end(),
-                             m_utf8conv.from_bytes(lines[index]));
+                             FromBytes(lines[index]));
 #else
         m_input_lines.insert(m_input_lines.end(), lines[index]);
 #endif
@@ -869,7 +909,7 @@ unsigned char Editline::FixIndentationCommand(int ch) {
     currentLine = currentLine.erase(0, -indent_correction);
   }
 #if LLDB_EDITLINE_USE_WCHAR
-  m_input_lines[m_current_line_index] = m_utf8conv.from_bytes(currentLine);
+  m_input_lines[m_current_line_index] = FromBytes(currentLine);
 #else
   m_input_lines[m_current_line_index] = currentLine;
 #endif
@@ -1502,7 +1542,7 @@ bool Editline::GetLine(std::string &line, bool &interrupted) {
     } else {
       m_history_sp->Enter(input);
 #if LLDB_EDITLINE_USE_WCHAR
-      line = m_utf8conv.to_bytes(SplitLines(input)[0]);
+      line = ToBytes(SplitLines(input)[0]);
 #else
       line = SplitLines(input)[0];
 #endif
@@ -1574,7 +1614,8 @@ bool Editline::CompleteCharacter(char ch, EditLineGetCharType &out) {
   out = (unsigned char)ch;
   return true;
 #else
-  std::codecvt_utf8<wchar_t> cvt;
+  std::locale locale("C.UTF-8");
+  const auto &cvt = std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(locale);
   llvm::SmallString<4> input;
   for (;;) {
     const char *from_next;

>From 1b391213aa74d7d1e75f6928bbb96f71fe7640dd Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Wed, 16 Oct 2024 11:34:08 -0700
Subject: [PATCH 2/2] Fix formatting

---
 lldb/source/Host/common/Editline.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp
index 99a5003e7a83db..aec8098c5ff71e 100644
--- a/lldb/source/Host/common/Editline.cpp
+++ b/lldb/source/Host/common/Editline.cpp
@@ -724,8 +724,7 @@ unsigned char Editline::EndOrAddLineCommand(int ch) {
       m_input_lines.clear();
       for (unsigned index = 0; index < lines.GetSize(); index++) {
 #if LLDB_EDITLINE_USE_WCHAR
-        m_input_lines.insert(m_input_lines.end(),
-                             FromBytes(lines[index]));
+        m_input_lines.insert(m_input_lines.end(), FromBytes(lines[index]));
 #else
         m_input_lines.insert(m_input_lines.end(), lines[index]);
 #endif
@@ -1615,7 +1614,8 @@ bool Editline::CompleteCharacter(char ch, EditLineGetCharType &out) {
   return true;
 #else
   std::locale locale("C.UTF-8");
-  const auto &cvt = std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(locale);
+  const auto &cvt =
+      std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(locale);
   llvm::SmallString<4> input;
   for (;;) {
     const char *from_next;



More information about the lldb-commits mailing list