[Lldb-commits] [lldb] 7345233 - [lldb] improve the heuristics for checking if a terminal supports Unicode (#171832)

via lldb-commits lldb-commits at lists.llvm.org
Fri Dec 12 07:07:12 PST 2025


Author: Charles Zablit
Date: 2025-12-12T15:07:08Z
New Revision: 7345233fb60198ec170791f418d5f1d5fc1a0e53

URL: https://github.com/llvm/llvm-project/commit/7345233fb60198ec170791f418d5f1d5fc1a0e53
DIFF: https://github.com/llvm/llvm-project/commit/7345233fb60198ec170791f418d5f1d5fc1a0e53.diff

LOG: [lldb] improve the heuristics for checking if a terminal supports Unicode (#171832)

This patch improves the way lldb checks if the terminal it's opened in
(if any) supports Unicode or not.

On POSIX systems, we check if `LANG` contains `UTF-8`.

On Windows, we always return `true` since we use the `WriteToConsoleW`
api.

This is a relanding of https://github.com/llvm/llvm-project/pull/168603.

The tests failed because the bots support Unicode but the tests expect
ASCII. To avoid different outputs depending on the environment the tests
are running in, this patch always force ASCII in the tests.

Added: 
    

Modified: 
    lldb/include/lldb/Host/Terminal.h
    lldb/include/lldb/Host/common/DiagnosticsRendering.h
    lldb/source/Host/common/DiagnosticsRendering.cpp
    lldb/source/Host/common/Terminal.cpp
    lldb/test/Shell/Commands/command-dwim-print.test
    lldb/test/Shell/Commands/command-expr-diagnostics.test
    lldb/test/Shell/Commands/command-options.test
    lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/Host/Terminal.h b/lldb/include/lldb/Host/Terminal.h
index da0d05e8bd265..3d66515c18812 100644
--- a/lldb/include/lldb/Host/Terminal.h
+++ b/lldb/include/lldb/Host/Terminal.h
@@ -68,6 +68,18 @@ class Terminal {
 
   llvm::Error SetHardwareFlowControl(bool enabled);
 
+  /// Returns whether or not the current terminal supports Unicode rendering.
+  ///
+  /// The value is cached after the first computation.
+  ///
+  /// On POSIX systems, we check if the LANG environment variable contains the
+  /// substring "UTF-8", case insensitive.
+  ///
+  /// On Windows, we always return true since we use the `WriteConsoleW` API
+  /// internally. Note that the default Windows codepage (437) does not support
+  /// all Unicode characters. This function does not check the codepage.
+  static bool SupportsUnicode();
+
 protected:
   struct Data;
 

diff  --git a/lldb/include/lldb/Host/common/DiagnosticsRendering.h b/lldb/include/lldb/Host/common/DiagnosticsRendering.h
index dd33d671c24a5..3eea0647da37e 100644
--- a/lldb/include/lldb/Host/common/DiagnosticsRendering.h
+++ b/lldb/include/lldb/Host/common/DiagnosticsRendering.h
@@ -59,10 +59,27 @@ struct DiagnosticDetail {
 
 StructuredData::ObjectSP Serialize(llvm::ArrayRef<DiagnosticDetail> details);
 
+/// Renders an array of DiagnosticDetail instances.
+///
+/// \param[in] stream
+///     The stream to render the diagnostics to.
+/// \param offset_in_command
+///     An optional offset to the column position of the diagnostic in the
+///     source.
+/// \param show_inline
+///     Whether to show the diagnostics inline.
+/// \param details
+///     The array of DiagnosticsDetail to render.
+/// \param force_ascii
+///     Whether to force ascii rendering. If false, Unicode characters will be
+///     used if the output file supports them.
+///
+/// \see lldb_private::Terminal::SupportsUnicode
 void RenderDiagnosticDetails(Stream &stream,
                              std::optional<uint16_t> offset_in_command,
                              bool show_inline,
-                             llvm::ArrayRef<DiagnosticDetail> details);
+                             llvm::ArrayRef<DiagnosticDetail> details,
+                             bool force_ascii = false);
 
 class DiagnosticError
     : public llvm::ErrorInfo<DiagnosticError, CloneableECError> {

diff  --git a/lldb/source/Host/common/DiagnosticsRendering.cpp b/lldb/source/Host/common/DiagnosticsRendering.cpp
index f2cd3968967fb..2c9d33a6c325c 100644
--- a/lldb/source/Host/common/DiagnosticsRendering.cpp
+++ b/lldb/source/Host/common/DiagnosticsRendering.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/Host/common/DiagnosticsRendering.h"
+#include "lldb/Host/Terminal.h"
+
 #include <cstdint>
 
 using namespace lldb_private;
@@ -85,7 +87,8 @@ static llvm::raw_ostream &PrintSeverity(Stream &stream,
 void RenderDiagnosticDetails(Stream &stream,
                              std::optional<uint16_t> offset_in_command,
                              bool show_inline,
-                             llvm::ArrayRef<DiagnosticDetail> details) {
+                             llvm::ArrayRef<DiagnosticDetail> details,
+                             bool force_ascii) {
   if (details.empty())
     return;
 
@@ -97,12 +100,8 @@ void RenderDiagnosticDetails(Stream &stream,
     return;
   }
 
-  // Since there is no other way to find this out, use the color
-  // attribute as a proxy for whether the terminal supports Unicode
-  // characters.  In the future it might make sense to move this into
-  // Host so it can be customized for a specific platform.
   llvm::StringRef cursor, underline, vbar, joint, hbar, spacer;
-  if (stream.AsRawOstream().colors_enabled()) {
+  if (Terminal::SupportsUnicode() && !force_ascii) {
     cursor = "˄";
     underline = "˜";
     vbar = "│";

diff  --git a/lldb/source/Host/common/Terminal.cpp b/lldb/source/Host/common/Terminal.cpp
index 436dfd8130d9b..b6d09425e956e 100644
--- a/lldb/source/Host/common/Terminal.cpp
+++ b/lldb/source/Host/common/Terminal.cpp
@@ -400,6 +400,23 @@ llvm::Error Terminal::SetHardwareFlowControl(bool enabled) {
 #endif // LLDB_ENABLE_TERMIOS
 }
 
+bool Terminal::SupportsUnicode() {
+#ifdef _WIN32
+  return true;
+#else
+  static std::optional<bool> g_result;
+  if (g_result)
+    return g_result.value();
+
+  const char *lang_var = std::getenv("LANG");
+  if (!lang_var)
+    return false;
+  g_result =
+      llvm::StringRef(lang_var).lower().find("utf-8") != std::string::npos;
+  return g_result.value();
+#endif
+}
+
 TerminalState::TerminalState(Terminal term, bool save_process_group)
     : m_tty(term) {
   Save(term, save_process_group);

diff  --git a/lldb/test/Shell/Commands/command-dwim-print.test b/lldb/test/Shell/Commands/command-dwim-print.test
index 9153edbd21791..8c2697d8ebf8c 100644
--- a/lldb/test/Shell/Commands/command-dwim-print.test
+++ b/lldb/test/Shell/Commands/command-dwim-print.test
@@ -1,16 +1,16 @@
 # RUN: echo quit | %lldb -o "dwim-print a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK1
 #            (lldb) dwim-print a 
-# CHECK1:{{^                  \^}}
-# CHECK1: {{^                  error: use of undeclared identifier 'a'}}
+# CHECK1:{{^                  (\^|˄)}}
+# CHECK1: {{^                  (╰─ )?error: use of undeclared identifier 'a'}}
 # RUN: echo quit | %lldb -o "p a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK2
 #            (lldb) p a 
-# CHECK2:{{^         \^}}
+# CHECK2:{{^         (\^|˄)}}
 # RUN: echo quit | %lldb -o "dwim-print -- a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK3
 #            (lldb) dwim-print -- a 
-# CHECK3:{{^                     \^}}
+# CHECK3:{{^                     (\^|˄)}}
 # RUN: echo quit | %lldb -o "settings set show-inline-diagnostics false" \
 # RUN:   -o "dwim-print a" 2>&1 | FileCheck %s --check-prefix=CHECK4
 # CHECK4: error: <user expression 0>:1:1: use of undeclared identifier

diff  --git a/lldb/test/Shell/Commands/command-expr-diagnostics.test b/lldb/test/Shell/Commands/command-expr-diagnostics.test
index 3c827fb4516ec..0cb3cd381f3e2 100644
--- a/lldb/test/Shell/Commands/command-expr-diagnostics.test
+++ b/lldb/test/Shell/Commands/command-expr-diagnostics.test
@@ -2,20 +2,20 @@
 # RUN: echo quit | %lldb -o "expression a+b" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK1
 #            (lldb) expression a+b
-# CHECK1:{{^                  \^ \^}}
-# CHECK1: {{^                  | error: use of undeclared identifier 'b'}}
-# CHECK1: {{^                  error: use of undeclared identifier 'a'}}
+# CHECK1:{{^                  (\^|˄) (\^|˄)}}
+# CHECK1: {{^                  (\||│) (╰─ )?error: use of undeclared identifier 'b'}}
+# CHECK1: {{^                  (╰─ )?error: use of undeclared identifier 'a'}}
 
 # RUN: echo quit | %lldb -o "expr a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK2
 #            (lldb) expr a 
-# CHECK2:{{^            \^}}
+# CHECK2:{{^            (\^|˄)}}
 
 # RUN: echo quit | %lldb -o "expr -i 0 -o 0 -- a" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK3
 #            (lldb) expr -i 0 -o 0 -- a
-# CHECK3:{{^                         \^}}
-# CHECK3: {{^                         error: use of undeclared identifier 'a'}}
+# CHECK3:{{^                         (\^|˄)}}
+# CHECK3: {{^                         (╰─ )?error: use of undeclared identifier 'a'}}
 
 # RUN: echo "int main(){return 0;}">%t.c
 # RUN: %clang_host %t.c -o %t.exe
@@ -23,7 +23,7 @@
 # RUN: "expr --top-level -- template<typename T> T FOO(T x) { return x/2;}" -o \
 # RUN: "expression -- FOO(\"\")" 2>&1 | FileCheck %s --check-prefix=CHECK4
 #            (lldb) expression -- FOO("")
-# CHECK4:{{^                     \^}}
+# CHECK4:{{^                     (\^|˄)}}
 # CHECK4: {{^                     note: in instantiation of function template}}
 # CHECK4: error: <user expression
 

diff  --git a/lldb/test/Shell/Commands/command-options.test b/lldb/test/Shell/Commands/command-options.test
index 73aa374bde297..f75c0993ed78e 100644
--- a/lldb/test/Shell/Commands/command-options.test
+++ b/lldb/test/Shell/Commands/command-options.test
@@ -1,16 +1,16 @@
 # RUN: echo quit | %lldb -O "log enable -x" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK1
 #            (lldb) log enable -x
-# CHECK1:{{^                  \^~}}
-# CHECK1: {{^                  error: unknown or ambiguous option}}
+# CHECK1:{{^                  (\^|˄)(~|˜)}}
+# CHECK1: {{^                  (╰─ )?error: unknown or ambiguous option}}
 
 # RUN: echo quit | %lldb -O "    log enable -xxxxxxx" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK2
 #            (lldb)     log enable -xxxxxxx
-# CHECK2:{{^                      \^~~~~~~~}}
-# CHECK2: {{^                      error: unknown or ambiguous option}}
+# CHECK2:{{^                      (\^|˄)(~|˜)+}}
+# CHECK2: {{^                      (╰─ )?error: unknown or ambiguous option}}
 # RUN: echo quit | %lldb -O "log enable dwarf all -f dwarf.log -x" \
 # RUN:   | FileCheck %s --strict-whitespace --check-prefix=CHECK3
 #            (lldb) log enable dwarf all -f dwarf.log -x
-# CHECK3:{{^                                         \^~}}
-# CHECK3: {{^                                         error: unknown or ambiguous option}}
+# CHECK3:{{^                                         (\^|˄)(~|˜)}}
+# CHECK3: {{^                                         (╰─ )?error: unknown or ambiguous option}}

diff  --git a/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp b/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp
index 851b478def32e..896ce1995fe1c 100644
--- a/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp
+++ b/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp
@@ -10,7 +10,7 @@ class ErrorDisplayTest : public ::testing::Test {};
 
 std::string Render(std::vector<DiagnosticDetail> details) {
   StreamString stream;
-  RenderDiagnosticDetails(stream, 0, true, details);
+  RenderDiagnosticDetails(stream, 0, true, details, /*force_ascii=*/true);
   return stream.GetData();
 }
 } // namespace


        


More information about the lldb-commits mailing list