[Lldb-commits] [lldb] [lldb] Use LLVM's helper for Unicode conversion (NFC) (PR #112582)

Jonas Devlieghere via lldb-commits lldb-commits at lists.llvm.org
Wed Oct 30 09:09:50 PDT 2024


https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/112582

>From de19f43fbee9aff2a99106a23b7f264012ab5485 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Mon, 21 Oct 2024 13:40:02 -0700
Subject: [PATCH 1/2] [lldb] Use LLVM's helper for Unicode conversion (NFC)

The codecvt header has been deprecated in C++17. Use LLVM's unicode
helpers to convert between UTF-8 and UTF-16.
---
 lldb/include/lldb/Host/Editline.h    | 25 ---------------
 lldb/source/Host/common/Editline.cpp | 48 ++++++++++++++++------------
 2 files changed, 28 insertions(+), 45 deletions(-)

diff --git a/lldb/include/lldb/Host/Editline.h b/lldb/include/lldb/Host/Editline.h
index a02f90891599ad..57e2c831e3499d 100644
--- a/lldb/include/lldb/Host/Editline.h
+++ b/lldb/include/lldb/Host/Editline.h
@@ -30,9 +30,6 @@
 
 #include "lldb/Host/Config.h"
 
-#if LLDB_EDITLINE_USE_WCHAR
-#include <codecvt>
-#endif
 #include <locale>
 #include <sstream>
 #include <vector>
@@ -57,23 +54,6 @@
 
 #include "llvm/ADT/FunctionExtras.h"
 
-#if defined(__clang__) && defined(__has_warning)
-#if __has_warning("-Wdeprecated-declarations")
-#define LLDB_DEPRECATED_WARNING_DISABLE                                        \
-  _Pragma("clang diagnostic push")                                             \
-      _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
-#define LLDB_DEPRECATED_WARNING_RESTORE _Pragma("clang diagnostic pop")
-#endif
-#elif defined(__GNUC__) && __GNUC__ > 6
-#define LLDB_DEPRECATED_WARNING_DISABLE                                        \
-  _Pragma("GCC diagnostic push")                                               \
-      _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#define LLDB_DEPRECATED_WARNING_RESTORE _Pragma("GCC diagnostic pop")
-#else
-#define LLDB_DEPRECATED_WARNING_DISABLE
-#define LLDB_DEPRECATED_WARNING_RESTORE
-#endif
-
 namespace lldb_private {
 namespace line_editor {
 
@@ -383,11 +363,6 @@ class Editline {
   void SetEditLinePromptCallback(EditlinePromptCallbackType callbackFn);
   void SetGetCharacterFunction(EditlineGetCharCallbackType callbackFn);
 
-#if LLDB_EDITLINE_USE_WCHAR
-  LLDB_DEPRECATED_WARNING_DISABLE
-  std::wstring_convert<std::codecvt_utf8<wchar_t>> m_utf8conv;
-  LLDB_DEPRECATED_WARNING_RESTORE
-#endif
   ::EditLine *m_editline = nullptr;
   EditlineHistorySP m_history_sp;
   bool m_in_history = false;
diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp
index 60117cb5f0e615..b568ede4fa0322 100644
--- a/lldb/source/Host/common/Editline.cpp
+++ b/lldb/source/Host/common/Editline.cpp
@@ -12,6 +12,8 @@
 
 #include "lldb/Host/Editline.h"
 
+#include <codecvt>
+
 #include "lldb/Host/ConnectionFileDescriptor.h"
 #include "lldb/Host/FileSystem.h"
 #include "lldb/Host/Host.h"
@@ -23,6 +25,7 @@
 #include "lldb/Utility/StreamString.h"
 #include "lldb/Utility/StringList.h"
 #include "lldb/Utility/Timeout.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Locale.h"
@@ -444,7 +447,9 @@ StringList Editline::GetInputAsStringList(int line_count) {
     if (line_count == 0)
       break;
 #if LLDB_EDITLINE_USE_WCHAR
-    lines.AppendString(m_utf8conv.to_bytes(line));
+    std::string buffer;
+    llvm::convertWideToUTF8(line, buffer);
+    lines.AppendString(buffer);
 #else
     lines.AppendString(line);
 #endif
@@ -636,7 +641,9 @@ unsigned char Editline::BreakLineCommand(int ch) {
     if (m_fix_indentation_callback) {
       StringList lines = GetInputAsStringList(m_current_line_index + 1);
 #if LLDB_EDITLINE_USE_WCHAR
-      lines.AppendString(m_utf8conv.to_bytes(new_line_fragment));
+      std::string buffer;
+      llvm::convertWideToUTF8(new_line_fragment, buffer);
+      lines.AppendString(buffer);
 #else
       lines.AppendString(new_line_fragment);
 #endif
@@ -684,8 +691,9 @@ unsigned char Editline::EndOrAddLineCommand(int ch) {
       m_input_lines.clear();
       for (unsigned index = 0; index < lines.GetSize(); index++) {
 #if LLDB_EDITLINE_USE_WCHAR
-        m_input_lines.insert(m_input_lines.end(),
-                             m_utf8conv.from_bytes(lines[index]));
+        std::wstring wbuffer;
+        llvm::ConvertUTF8toWide(lines[index], wbuffer);
+        m_input_lines.insert(m_input_lines.end(), wbuffer);
 #else
         m_input_lines.insert(m_input_lines.end(), lines[index]);
 #endif
@@ -869,7 +877,9 @@ unsigned char Editline::FixIndentationCommand(int ch) {
     currentLine = currentLine.erase(0, -indent_correction);
   }
 #if LLDB_EDITLINE_USE_WCHAR
-  m_input_lines[m_current_line_index] = m_utf8conv.from_bytes(currentLine);
+  std::wstring wbuffer;
+  llvm::ConvertUTF8toWide(currentLine, wbuffer);
+  m_input_lines[m_current_line_index] = wbuffer;
 #else
   m_input_lines[m_current_line_index] = currentLine;
 #endif
@@ -1502,7 +1512,7 @@ bool Editline::GetLine(std::string &line, bool &interrupted) {
     } else {
       m_history_sp->Enter(input);
 #if LLDB_EDITLINE_USE_WCHAR
-      line = m_utf8conv.to_bytes(SplitLines(input)[0]);
+      llvm::convertWideToUTF8(SplitLines(input)[0], line);
 #else
       line = SplitLines(input)[0];
 #endif
@@ -1574,25 +1584,23 @@ bool Editline::CompleteCharacter(char ch, EditLineGetCharType &out) {
   out = (unsigned char)ch;
   return true;
 #else
-  LLDB_DEPRECATED_WARNING_DISABLE
-  std::codecvt_utf8<wchar_t> cvt;
-  LLDB_DEPRECATED_WARNING_RESTORE
   llvm::SmallString<4> input;
   for (;;) {
-    const char *from_next;
-    wchar_t *to_next;
-    std::mbstate_t state = std::mbstate_t();
     input.push_back(ch);
-    switch (cvt.in(state, input.begin(), input.end(), from_next, &out, &out + 1,
-                   to_next)) {
-    case std::codecvt_base::ok:
+    const char *cur_ptr = input.begin();
+    const char *end_ptr = input.end();
+    llvm::UTF32 code_point = 0;
+    llvm::ConversionResult cr = llvm::convertUTF8Sequence(
+        (const llvm::UTF8 **)&cur_ptr, (const llvm::UTF8 *)end_ptr, &code_point,
+        llvm::lenientConversion);
+    switch (cr) {
+    case llvm::conversionOK:
+      out = code_point;
       return out != (EditLineGetCharType)WEOF;
-
-    case std::codecvt_base::error:
-    case std::codecvt_base::noconv:
+    case llvm::targetExhausted:
+    case llvm::sourceIllegal:
       return false;
-
-    case std::codecvt_base::partial:
+    case llvm::sourceExhausted:
       lldb::ConnectionStatus status;
       size_t read_count = m_input_connection.Read(
           &ch, 1, std::chrono::seconds(0), status, nullptr);

>From dac7b1cb9f3418354f6d5efb3fbd1034a5e3d607 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Wed, 30 Oct 2024 09:09:34 -0700
Subject: [PATCH 2/2] Address Pavel's feedback

---
 lldb/source/Host/common/Editline.cpp | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp
index b568ede4fa0322..f95f854c5f220c 100644
--- a/lldb/source/Host/common/Editline.cpp
+++ b/lldb/source/Host/common/Editline.cpp
@@ -10,11 +10,8 @@
 #include <iomanip>
 #include <optional>
 
-#include "lldb/Host/Editline.h"
-
-#include <codecvt>
-
 #include "lldb/Host/ConnectionFileDescriptor.h"
+#include "lldb/Host/Editline.h"
 #include "lldb/Host/FileSystem.h"
 #include "lldb/Host/Host.h"
 #include "lldb/Utility/CompletionRequest.h"
@@ -1587,12 +1584,11 @@ bool Editline::CompleteCharacter(char ch, EditLineGetCharType &out) {
   llvm::SmallString<4> input;
   for (;;) {
     input.push_back(ch);
-    const char *cur_ptr = input.begin();
-    const char *end_ptr = input.end();
+    auto *cur_ptr = reinterpret_cast<const llvm::UTF8 *>(input.begin());
+    auto *end_ptr = reinterpret_cast<const llvm::UTF8 *>(input.end());
     llvm::UTF32 code_point = 0;
     llvm::ConversionResult cr = llvm::convertUTF8Sequence(
-        (const llvm::UTF8 **)&cur_ptr, (const llvm::UTF8 *)end_ptr, &code_point,
-        llvm::lenientConversion);
+        &cur_ptr, end_ptr, &code_point, llvm::lenientConversion);
     switch (cr) {
     case llvm::conversionOK:
       out = code_point;



More information about the lldb-commits mailing list