[Lldb-commits] [lldb] [lldb] improve the heuristics for checking if a terminal supports Unicode (PR #171832)

Charles Zablit via lldb-commits lldb-commits at lists.llvm.org
Fri Dec 12 03:36:41 PST 2025


https://github.com/charles-zablit updated https://github.com/llvm/llvm-project/pull/171832

>From e46383656970d88e4a73b5406c4a04eb7bea48a5 Mon Sep 17 00:00:00 2001
From: Charles Zablit <c_zablit at apple.com>
Date: Thu, 11 Dec 2025 14:02:09 +0000
Subject: [PATCH 1/4] [lldb] improve the heuristics for checking if a terminal
 supports Unicode

---
 lldb/include/lldb/Host/Terminal.h             | 12 ++++++++++++
 .../lldb/Host/common/DiagnosticsRendering.h   | 19 ++++++++++++++++++-
 .../Host/common/DiagnosticsRendering.cpp      | 11 +++++------
 lldb/source/Host/common/Terminal.cpp          | 16 ++++++++++++++++
 .../Shell/Commands/command-dwim-print.test    |  8 ++++----
 .../Commands/command-expr-diagnostics.test    | 15 +++++++--------
 lldb/test/Shell/Commands/command-options.test | 12 ++++++------
 .../Host/common/DiagnosticsRenderingTest.cpp  |  2 +-
 8 files changed, 69 insertions(+), 26 deletions(-)

diff --git a/lldb/include/lldb/Host/Terminal.h b/lldb/include/lldb/Host/Terminal.h
index da0d05e8bd265..3d66515c18812 100644
--- a/lldb/include/lldb/Host/Terminal.h
+++ b/lldb/include/lldb/Host/Terminal.h
@@ -68,6 +68,18 @@ class Terminal {
 
   llvm::Error SetHardwareFlowControl(bool enabled);
 
+  /// Returns whether or not the current terminal supports Unicode rendering.
+  ///
+  /// The value is cached after the first computation.
+  ///
+  /// On POSIX systems, we check if the LANG environment variable contains the
+  /// substring "UTF-8", case insensitive.
+  ///
+  /// On Windows, we always return true since we use the `WriteConsoleW` API
+  /// internally. Note that the default Windows codepage (437) does not support
+  /// all Unicode characters. This function does not check the codepage.
+  static bool SupportsUnicode();
+
 protected:
   struct Data;
 
diff --git a/lldb/include/lldb/Host/common/DiagnosticsRendering.h b/lldb/include/lldb/Host/common/DiagnosticsRendering.h
index dd33d671c24a5..3eea0647da37e 100644
--- a/lldb/include/lldb/Host/common/DiagnosticsRendering.h
+++ b/lldb/include/lldb/Host/common/DiagnosticsRendering.h
@@ -59,10 +59,27 @@ struct DiagnosticDetail {
 
 StructuredData::ObjectSP Serialize(llvm::ArrayRef<DiagnosticDetail> details);
 
+/// Renders an array of DiagnosticDetail instances.
+///
+/// \param[in] stream
+///     The stream to render the diagnostics to.
+/// \param offset_in_command
+///     An optional offset to the column position of the diagnostic in the
+///     source.
+/// \param show_inline
+///     Whether to show the diagnostics inline.
+/// \param details
+///     The array of DiagnosticsDetail to render.
+/// \param force_ascii
+///     Whether to force ascii rendering. If false, Unicode characters will be
+///     used if the output file supports them.
+///
+/// \see lldb_private::Terminal::SupportsUnicode
 void RenderDiagnosticDetails(Stream &stream,
                              std::optional<uint16_t> offset_in_command,
                              bool show_inline,
-                             llvm::ArrayRef<DiagnosticDetail> details);
+                             llvm::ArrayRef<DiagnosticDetail> details,
+                             bool force_ascii = false);
 
 class DiagnosticError
     : public llvm::ErrorInfo<DiagnosticError, CloneableECError> {
diff --git a/lldb/source/Host/common/DiagnosticsRendering.cpp b/lldb/source/Host/common/DiagnosticsRendering.cpp
index f2cd3968967fb..2c9d33a6c325c 100644
--- a/lldb/source/Host/common/DiagnosticsRendering.cpp
+++ b/lldb/source/Host/common/DiagnosticsRendering.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/Host/common/DiagnosticsRendering.h"
+#include "lldb/Host/Terminal.h"
+
 #include <cstdint>
 
 using namespace lldb_private;
@@ -85,7 +87,8 @@ static llvm::raw_ostream &PrintSeverity(Stream &stream,
 void RenderDiagnosticDetails(Stream &stream,
                              std::optional<uint16_t> offset_in_command,
                              bool show_inline,
-                             llvm::ArrayRef<DiagnosticDetail> details) {
+                             llvm::ArrayRef<DiagnosticDetail> details,
+                             bool force_ascii) {
   if (details.empty())
     return;
 
@@ -97,12 +100,8 @@ void RenderDiagnosticDetails(Stream &stream,
     return;
   }
 
-  // Since there is no other way to find this out, use the color
-  // attribute as a proxy for whether the terminal supports Unicode
-  // characters.  In the future it might make sense to move this into
-  // Host so it can be customized for a specific platform.
   llvm::StringRef cursor, underline, vbar, joint, hbar, spacer;
-  if (stream.AsRawOstream().colors_enabled()) {
+  if (Terminal::SupportsUnicode() && !force_ascii) {
     cursor = "˄";
     underline = "˜";
     vbar = "│";
diff --git a/lldb/source/Host/common/Terminal.cpp b/lldb/source/Host/common/Terminal.cpp
index 436dfd8130d9b..d3647835e3937 100644
--- a/lldb/source/Host/common/Terminal.cpp
+++ b/lldb/source/Host/common/Terminal.cpp
@@ -400,6 +400,22 @@ llvm::Error Terminal::SetHardwareFlowControl(bool enabled) {
 #endif // LLDB_ENABLE_TERMIOS
 }
 
+bool Terminal::SupportsUnicode() {
+  static std::optional<bool> g_result;
+  if (g_result)
+    return g_result.value();
+#ifdef _WIN32
+  return true;
+#else
+  const char *lang_var = std::getenv("LANG");
+  if (!lang_var)
+    return false;
+  g_result =
+      llvm::StringRef(lang_var).lower().find("utf-8") != std::string::npos;
+#endif
+  return g_result.value();
+}
+
 TerminalState::TerminalState(Terminal term, bool save_process_group)
     : m_tty(term) {
   Save(term, save_process_group);
diff --git a/lldb/test/Shell/Commands/command-dwim-print.test b/lldb/test/Shell/Commands/command-dwim-print.test
index 9153edbd21791..c30d9c7fc3f85 100644
--- a/lldb/test/Shell/Commands/command-dwim-print.test
+++ b/lldb/test/Shell/Commands/command-dwim-print.test
@@ -1,16 +1,16 @@
 # RUN: echo quit | %lldb -o "dwim-print a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK1
 #            (lldb) dwim-print a 
-# CHECK1:{{^                  \^}}
-# CHECK1: {{^                  error: use of undeclared identifier 'a'}}
+# CHECK1:{{^                  (\^|˄)}}
+# CHECK1: {{^                  (╰─)? error: use of undeclared identifier 'a'}}
 # RUN: echo quit | %lldb -o "p a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK2
 #            (lldb) p a 
-# CHECK2:{{^         \^}}
+# CHECK2:{{^         (\^|˄)}}
 # RUN: echo quit | %lldb -o "dwim-print -- a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK3
 #            (lldb) dwim-print -- a 
-# CHECK3:{{^                     \^}}
+# CHECK3:{{^                     (\^|˄)}}
 # RUN: echo quit | %lldb -o "settings set show-inline-diagnostics false" \
 # RUN:   -o "dwim-print a" 2>&1 | FileCheck %s --check-prefix=CHECK4
 # CHECK4: error: <user expression 0>:1:1: use of undeclared identifier
diff --git a/lldb/test/Shell/Commands/command-expr-diagnostics.test b/lldb/test/Shell/Commands/command-expr-diagnostics.test
index 3c827fb4516ec..b791c8d0eece9 100644
--- a/lldb/test/Shell/Commands/command-expr-diagnostics.test
+++ b/lldb/test/Shell/Commands/command-expr-diagnostics.test
@@ -1,21 +1,20 @@
-# XFAIL: target-windows
 # RUN: echo quit | %lldb -o "expression a+b" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK1
 #            (lldb) expression a+b
-# CHECK1:{{^                  \^ \^}}
-# CHECK1: {{^                  | error: use of undeclared identifier 'b'}}
-# CHECK1: {{^                  error: use of undeclared identifier 'a'}}
+# CHECK1:{{^                  (\^|˄) (\^|˄)}}
+# CHECK1: {{^                  (\||│) (╰─)? error: use of undeclared identifier 'b'}}
+# CHECK1: {{^                  (╰─)? error: use of undeclared identifier 'a'}}
 
 # RUN: echo quit | %lldb -o "expr a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK2
 #            (lldb) expr a 
-# CHECK2:{{^            \^}}
+# CHECK2:{{^            (\^|˄)}}
 
 # RUN: echo quit | %lldb -o "expr -i 0 -o 0 -- a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK3
 #            (lldb) expr -i 0 -o 0 -- a
-# CHECK3:{{^                         \^}}
-# CHECK3: {{^                         error: use of undeclared identifier 'a'}}
+# CHECK3:{{^                         (\^|˄)}}
+# CHECK3: {{^                         (╰─)? error: use of undeclared identifier 'a'}}
 
 # RUN: echo "int main(){return 0;}">%t.c
 # RUN: %clang_host %t.c -o %t.exe
@@ -23,7 +22,7 @@
 # RUN: "expr --top-level -- template<typename T> T FOO(T x) { return x/2;}" -o \
 # RUN: "expression -- FOO(\"\")" 2>&1 | FileCheck %s --check-prefix=CHECK4
 #            (lldb) expression -- FOO("")
-# CHECK4:{{^                     \^}}
+# CHECK4:{{^                     (\^|˄)}}
 # CHECK4: {{^                     note: in instantiation of function template}}
 # CHECK4: error: <user expression
 
diff --git a/lldb/test/Shell/Commands/command-options.test b/lldb/test/Shell/Commands/command-options.test
index 73aa374bde297..85cc2b881b856 100644
--- a/lldb/test/Shell/Commands/command-options.test
+++ b/lldb/test/Shell/Commands/command-options.test
@@ -1,16 +1,16 @@
 # RUN: echo quit | %lldb -O "log enable -x" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK1
 #            (lldb) log enable -x
-# CHECK1:{{^                  \^~}}
-# CHECK1: {{^                  error: unknown or ambiguous option}}
+# CHECK1:{{^                  (\^|˄)(~|˜)}}
+# CHECK1: {{^                  (╰─)? error: unknown or ambiguous option}}
 
 # RUN: echo quit | %lldb -O "    log enable -xxxxxxx" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK2
 #            (lldb)     log enable -xxxxxxx
-# CHECK2:{{^                      \^~~~~~~~}}
-# CHECK2: {{^                      error: unknown or ambiguous option}}
+# CHECK2:{{^                      (\^|˄)(~|˜)+}}
+# CHECK2: {{^                      (╰─)? error: unknown or ambiguous option}}
 # RUN: echo quit | %lldb -O "log enable dwarf all -f dwarf.log -x" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK3
 #            (lldb) log enable dwarf all -f dwarf.log -x
-# CHECK3:{{^                                         \^~}}
-# CHECK3: {{^                                         error: unknown or ambiguous option}}
+# CHECK3:{{^                                         (\^|˄)(~|˜)}}
+# CHECK3: {{^                                         (╰─)? error: unknown or ambiguous option}}
diff --git a/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp b/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp
index 851b478def32e..896ce1995fe1c 100644
--- a/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp
+++ b/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp
@@ -10,7 +10,7 @@ class ErrorDisplayTest : public ::testing::Test {};
 
 std::string Render(std::vector<DiagnosticDetail> details) {
   StreamString stream;
-  RenderDiagnosticDetails(stream, 0, true, details);
+  RenderDiagnosticDetails(stream, 0, true, details, /*force_ascii=*/true);
   return stream.GetData();
 }
 } // namespace

>From 71fcf5d2b03854898a0b2f14d365d162cd4130bc Mon Sep 17 00:00:00 2001
From: Charles Zablit <c_zablit at apple.com>
Date: Thu, 11 Dec 2025 14:36:12 +0000
Subject: [PATCH 2/4] fixup! [lldb] improve the heuristics for checking if a
 terminal supports Unicode

---
 lldb/test/Shell/Commands/command-dwim-print.test       | 2 +-
 lldb/test/Shell/Commands/command-expr-diagnostics.test | 6 +++---
 lldb/test/Shell/Commands/command-options.test          | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lldb/test/Shell/Commands/command-dwim-print.test b/lldb/test/Shell/Commands/command-dwim-print.test
index c30d9c7fc3f85..8c2697d8ebf8c 100644
--- a/lldb/test/Shell/Commands/command-dwim-print.test
+++ b/lldb/test/Shell/Commands/command-dwim-print.test
@@ -2,7 +2,7 @@
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK1
 #            (lldb) dwim-print a 
 # CHECK1:{{^                  (\^|˄)}}
-# CHECK1: {{^                  (╰─)? error: use of undeclared identifier 'a'}}
+# CHECK1: {{^                  (╰─ )?error: use of undeclared identifier 'a'}}
 # RUN: echo quit | %lldb -o "p a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK2
 #            (lldb) p a 
diff --git a/lldb/test/Shell/Commands/command-expr-diagnostics.test b/lldb/test/Shell/Commands/command-expr-diagnostics.test
index b791c8d0eece9..4888f344c93df 100644
--- a/lldb/test/Shell/Commands/command-expr-diagnostics.test
+++ b/lldb/test/Shell/Commands/command-expr-diagnostics.test
@@ -2,8 +2,8 @@
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK1
 #            (lldb) expression a+b
 # CHECK1:{{^                  (\^|˄) (\^|˄)}}
-# CHECK1: {{^                  (\||│) (╰─)? error: use of undeclared identifier 'b'}}
-# CHECK1: {{^                  (╰─)? error: use of undeclared identifier 'a'}}
+# CHECK1: {{^                  (\||│) (╰─ )?error: use of undeclared identifier 'b'}}
+# CHECK1: {{^                  (╰─ )?error: use of undeclared identifier 'a'}}
 
 # RUN: echo quit | %lldb -o "expr a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK2
@@ -14,7 +14,7 @@
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK3
 #            (lldb) expr -i 0 -o 0 -- a
 # CHECK3:{{^                         (\^|˄)}}
-# CHECK3: {{^                         (╰─)? error: use of undeclared identifier 'a'}}
+# CHECK3: {{^                         (╰─ )?error: use of undeclared identifier 'a'}}
 
 # RUN: echo "int main(){return 0;}">%t.c
 # RUN: %clang_host %t.c -o %t.exe
diff --git a/lldb/test/Shell/Commands/command-options.test b/lldb/test/Shell/Commands/command-options.test
index 85cc2b881b856..f75c0993ed78e 100644
--- a/lldb/test/Shell/Commands/command-options.test
+++ b/lldb/test/Shell/Commands/command-options.test
@@ -2,15 +2,15 @@
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK1
 #            (lldb) log enable -x
 # CHECK1:{{^                  (\^|˄)(~|˜)}}
-# CHECK1: {{^                  (╰─)? error: unknown or ambiguous option}}
+# CHECK1: {{^                  (╰─ )?error: unknown or ambiguous option}}
 
 # RUN: echo quit | %lldb -O "    log enable -xxxxxxx" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK2
 #            (lldb)     log enable -xxxxxxx
 # CHECK2:{{^                      (\^|˄)(~|˜)+}}
-# CHECK2: {{^                      (╰─)? error: unknown or ambiguous option}}
+# CHECK2: {{^                      (╰─ )?error: unknown or ambiguous option}}
 # RUN: echo quit | %lldb -O "log enable dwarf all -f dwarf.log -x" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK3
 #            (lldb) log enable dwarf all -f dwarf.log -x
 # CHECK3:{{^                                         (\^|˄)(~|˜)}}
-# CHECK3: {{^                                         (╰─)? error: unknown or ambiguous option}}
+# CHECK3: {{^                                         (╰─ )?error: unknown or ambiguous option}}

>From 502409a6d2143ce48e0b40fcc96a4542ef801a28 Mon Sep 17 00:00:00 2001
From: Charles Zablit <c_zablit at apple.com>
Date: Thu, 11 Dec 2025 14:38:34 +0000
Subject: [PATCH 3/4] fixup! [lldb] improve the heuristics for checking if a
 terminal supports Unicode

---
 lldb/test/Shell/Commands/command-expr-diagnostics.test | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/test/Shell/Commands/command-expr-diagnostics.test b/lldb/test/Shell/Commands/command-expr-diagnostics.test
index 4888f344c93df..0cb3cd381f3e2 100644
--- a/lldb/test/Shell/Commands/command-expr-diagnostics.test
+++ b/lldb/test/Shell/Commands/command-expr-diagnostics.test
@@ -1,3 +1,4 @@
+# XFAIL: target-windows
 # RUN: echo quit | %lldb -o "expression a+b" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK1
 #            (lldb) expression a+b

>From fcb3652b047e846dff293b3d693c1dcf7fd679bd Mon Sep 17 00:00:00 2001
From: Charles Zablit <c_zablit at apple.com>
Date: Fri, 12 Dec 2025 11:36:28 +0000
Subject: [PATCH 4/4] fixup! [lldb] improve the heuristics for checking if a
 terminal supports Unicode

---
 lldb/source/Host/common/Terminal.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Host/common/Terminal.cpp b/lldb/source/Host/common/Terminal.cpp
index d3647835e3937..b6d09425e956e 100644
--- a/lldb/source/Host/common/Terminal.cpp
+++ b/lldb/source/Host/common/Terminal.cpp
@@ -401,19 +401,20 @@ llvm::Error Terminal::SetHardwareFlowControl(bool enabled) {
 }
 
 bool Terminal::SupportsUnicode() {
-  static std::optional<bool> g_result;
-  if (g_result)
-    return g_result.value();
 #ifdef _WIN32
   return true;
 #else
+  static std::optional<bool> g_result;
+  if (g_result)
+    return g_result.value();
+
   const char *lang_var = std::getenv("LANG");
   if (!lang_var)
     return false;
   g_result =
       llvm::StringRef(lang_var).lower().find("utf-8") != std::string::npos;
-#endif
   return g_result.value();
+#endif
 }
 
 TerminalState::TerminalState(Terminal term, bool save_process_group)



More information about the lldb-commits mailing list