[Lldb-commits] [lldb] [lldb] improve the heuristics for checking if a terminal supports Unicode (PR #171832)
Charles Zablit via lldb-commits
lldb-commits at lists.llvm.org
Thu Dec 11 06:08:30 PST 2025
https://github.com/charles-zablit created https://github.com/llvm/llvm-project/pull/171832
This patch improves the way lldb checks if the terminal it's opened in (if any) supports Unicode or not.
On POSIX systems, we check if `LANG` contains `UTF-8`.
On Windows, we always return `true` since we use the `WriteToConsoleW` api.
This is a relanding of https://github.com/llvm/llvm-project/pull/168603.
The tests failed because the bots support Unicode but the tests expect ASCII. To avoid different outputs depending on the environment the tests are running in, this patch always force ASCII in the tests.
>From e46383656970d88e4a73b5406c4a04eb7bea48a5 Mon Sep 17 00:00:00 2001
From: Charles Zablit <c_zablit at apple.com>
Date: Thu, 11 Dec 2025 14:02:09 +0000
Subject: [PATCH] [lldb] improve the heuristics for checking if a terminal
supports Unicode
---
lldb/include/lldb/Host/Terminal.h | 12 ++++++++++++
.../lldb/Host/common/DiagnosticsRendering.h | 19 ++++++++++++++++++-
.../Host/common/DiagnosticsRendering.cpp | 11 +++++------
lldb/source/Host/common/Terminal.cpp | 16 ++++++++++++++++
.../Shell/Commands/command-dwim-print.test | 8 ++++----
.../Commands/command-expr-diagnostics.test | 15 +++++++--------
lldb/test/Shell/Commands/command-options.test | 12 ++++++------
.../Host/common/DiagnosticsRenderingTest.cpp | 2 +-
8 files changed, 69 insertions(+), 26 deletions(-)
diff --git a/lldb/include/lldb/Host/Terminal.h b/lldb/include/lldb/Host/Terminal.h
index da0d05e8bd265..3d66515c18812 100644
--- a/lldb/include/lldb/Host/Terminal.h
+++ b/lldb/include/lldb/Host/Terminal.h
@@ -68,6 +68,18 @@ class Terminal {
llvm::Error SetHardwareFlowControl(bool enabled);
+ /// Returns whether or not the current terminal supports Unicode rendering.
+ ///
+ /// The value is cached after the first computation.
+ ///
+ /// On POSIX systems, we check if the LANG environment variable contains the
+ /// substring "UTF-8", case insensitive.
+ ///
+ /// On Windows, we always return true since we use the `WriteConsoleW` API
+ /// internally. Note that the default Windows codepage (437) does not support
+ /// all Unicode characters. This function does not check the codepage.
+ static bool SupportsUnicode();
+
protected:
struct Data;
diff --git a/lldb/include/lldb/Host/common/DiagnosticsRendering.h b/lldb/include/lldb/Host/common/DiagnosticsRendering.h
index dd33d671c24a5..3eea0647da37e 100644
--- a/lldb/include/lldb/Host/common/DiagnosticsRendering.h
+++ b/lldb/include/lldb/Host/common/DiagnosticsRendering.h
@@ -59,10 +59,27 @@ struct DiagnosticDetail {
StructuredData::ObjectSP Serialize(llvm::ArrayRef<DiagnosticDetail> details);
+/// Renders an array of DiagnosticDetail instances.
+///
+/// \param[in] stream
+/// The stream to render the diagnostics to.
+/// \param offset_in_command
+/// An optional offset to the column position of the diagnostic in the
+/// source.
+/// \param show_inline
+/// Whether to show the diagnostics inline.
+/// \param details
+/// The array of DiagnosticsDetail to render.
+/// \param force_ascii
+/// Whether to force ascii rendering. If false, Unicode characters will be
+/// used if the output file supports them.
+///
+/// \see lldb_private::Terminal::SupportsUnicode
void RenderDiagnosticDetails(Stream &stream,
std::optional<uint16_t> offset_in_command,
bool show_inline,
- llvm::ArrayRef<DiagnosticDetail> details);
+ llvm::ArrayRef<DiagnosticDetail> details,
+ bool force_ascii = false);
class DiagnosticError
: public llvm::ErrorInfo<DiagnosticError, CloneableECError> {
diff --git a/lldb/source/Host/common/DiagnosticsRendering.cpp b/lldb/source/Host/common/DiagnosticsRendering.cpp
index f2cd3968967fb..2c9d33a6c325c 100644
--- a/lldb/source/Host/common/DiagnosticsRendering.cpp
+++ b/lldb/source/Host/common/DiagnosticsRendering.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "lldb/Host/common/DiagnosticsRendering.h"
+#include "lldb/Host/Terminal.h"
+
#include <cstdint>
using namespace lldb_private;
@@ -85,7 +87,8 @@ static llvm::raw_ostream &PrintSeverity(Stream &stream,
void RenderDiagnosticDetails(Stream &stream,
std::optional<uint16_t> offset_in_command,
bool show_inline,
- llvm::ArrayRef<DiagnosticDetail> details) {
+ llvm::ArrayRef<DiagnosticDetail> details,
+ bool force_ascii) {
if (details.empty())
return;
@@ -97,12 +100,8 @@ void RenderDiagnosticDetails(Stream &stream,
return;
}
- // Since there is no other way to find this out, use the color
- // attribute as a proxy for whether the terminal supports Unicode
- // characters. In the future it might make sense to move this into
- // Host so it can be customized for a specific platform.
llvm::StringRef cursor, underline, vbar, joint, hbar, spacer;
- if (stream.AsRawOstream().colors_enabled()) {
+ if (Terminal::SupportsUnicode() && !force_ascii) {
cursor = "˄";
underline = "˜";
vbar = "│";
diff --git a/lldb/source/Host/common/Terminal.cpp b/lldb/source/Host/common/Terminal.cpp
index 436dfd8130d9b..d3647835e3937 100644
--- a/lldb/source/Host/common/Terminal.cpp
+++ b/lldb/source/Host/common/Terminal.cpp
@@ -400,6 +400,22 @@ llvm::Error Terminal::SetHardwareFlowControl(bool enabled) {
#endif // LLDB_ENABLE_TERMIOS
}
+bool Terminal::SupportsUnicode() {
+ static std::optional<bool> g_result;
+ if (g_result)
+ return g_result.value();
+#ifdef _WIN32
+ return true;
+#else
+ const char *lang_var = std::getenv("LANG");
+ if (!lang_var)
+ return false;
+ g_result =
+ llvm::StringRef(lang_var).lower().find("utf-8") != std::string::npos;
+#endif
+ return g_result.value();
+}
+
TerminalState::TerminalState(Terminal term, bool save_process_group)
: m_tty(term) {
Save(term, save_process_group);
diff --git a/lldb/test/Shell/Commands/command-dwim-print.test b/lldb/test/Shell/Commands/command-dwim-print.test
index 9153edbd21791..c30d9c7fc3f85 100644
--- a/lldb/test/Shell/Commands/command-dwim-print.test
+++ b/lldb/test/Shell/Commands/command-dwim-print.test
@@ -1,16 +1,16 @@
# RUN: echo quit | %lldb -o "dwim-print a" \
# RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK1
# (lldb) dwim-print a
-# CHECK1:{{^ \^}}
-# CHECK1: {{^ error: use of undeclared identifier 'a'}}
+# CHECK1:{{^ (\^|˄)}}
+# CHECK1: {{^ (╰─)? error: use of undeclared identifier 'a'}}
# RUN: echo quit | %lldb -o "p a" \
# RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK2
# (lldb) p a
-# CHECK2:{{^ \^}}
+# CHECK2:{{^ (\^|˄)}}
# RUN: echo quit | %lldb -o "dwim-print -- a" \
# RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK3
# (lldb) dwim-print -- a
-# CHECK3:{{^ \^}}
+# CHECK3:{{^ (\^|˄)}}
# RUN: echo quit | %lldb -o "settings set show-inline-diagnostics false" \
# RUN: -o "dwim-print a" 2>&1 | FileCheck %s --check-prefix=CHECK4
# CHECK4: error: <user expression 0>:1:1: use of undeclared identifier
diff --git a/lldb/test/Shell/Commands/command-expr-diagnostics.test b/lldb/test/Shell/Commands/command-expr-diagnostics.test
index 3c827fb4516ec..b791c8d0eece9 100644
--- a/lldb/test/Shell/Commands/command-expr-diagnostics.test
+++ b/lldb/test/Shell/Commands/command-expr-diagnostics.test
@@ -1,21 +1,20 @@
-# XFAIL: target-windows
# RUN: echo quit | %lldb -o "expression a+b" \
# RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK1
# (lldb) expression a+b
-# CHECK1:{{^ \^ \^}}
-# CHECK1: {{^ | error: use of undeclared identifier 'b'}}
-# CHECK1: {{^ error: use of undeclared identifier 'a'}}
+# CHECK1:{{^ (\^|˄) (\^|˄)}}
+# CHECK1: {{^ (\||│) (╰─)? error: use of undeclared identifier 'b'}}
+# CHECK1: {{^ (╰─)? error: use of undeclared identifier 'a'}}
# RUN: echo quit | %lldb -o "expr a" \
# RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK2
# (lldb) expr a
-# CHECK2:{{^ \^}}
+# CHECK2:{{^ (\^|˄)}}
# RUN: echo quit | %lldb -o "expr -i 0 -o 0 -- a" \
# RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK3
# (lldb) expr -i 0 -o 0 -- a
-# CHECK3:{{^ \^}}
-# CHECK3: {{^ error: use of undeclared identifier 'a'}}
+# CHECK3:{{^ (\^|˄)}}
+# CHECK3: {{^ (╰─)? error: use of undeclared identifier 'a'}}
# RUN: echo "int main(){return 0;}">%t.c
# RUN: %clang_host %t.c -o %t.exe
@@ -23,7 +22,7 @@
# RUN: "expr --top-level -- template<typename T> T FOO(T x) { return x/2;}" -o \
# RUN: "expression -- FOO(\"\")" 2>&1 | FileCheck %s --check-prefix=CHECK4
# (lldb) expression -- FOO("")
-# CHECK4:{{^ \^}}
+# CHECK4:{{^ (\^|˄)}}
# CHECK4: {{^ note: in instantiation of function template}}
# CHECK4: error: <user expression
diff --git a/lldb/test/Shell/Commands/command-options.test b/lldb/test/Shell/Commands/command-options.test
index 73aa374bde297..85cc2b881b856 100644
--- a/lldb/test/Shell/Commands/command-options.test
+++ b/lldb/test/Shell/Commands/command-options.test
@@ -1,16 +1,16 @@
# RUN: echo quit | %lldb -O "log enable -x" \
# RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK1
# (lldb) log enable -x
-# CHECK1:{{^ \^~}}
-# CHECK1: {{^ error: unknown or ambiguous option}}
+# CHECK1:{{^ (\^|˄)(~|˜)}}
+# CHECK1: {{^ (╰─)? error: unknown or ambiguous option}}
# RUN: echo quit | %lldb -O " log enable -xxxxxxx" \
# RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK2
# (lldb) log enable -xxxxxxx
-# CHECK2:{{^ \^~~~~~~~}}
-# CHECK2: {{^ error: unknown or ambiguous option}}
+# CHECK2:{{^ (\^|˄)(~|˜)+}}
+# CHECK2: {{^ (╰─)? error: unknown or ambiguous option}}
# RUN: echo quit | %lldb -O "log enable dwarf all -f dwarf.log -x" \
# RUN: | FileCheck %s --strict-whitespace --check-prefix=CHECK3
# (lldb) log enable dwarf all -f dwarf.log -x
-# CHECK3:{{^ \^~}}
-# CHECK3: {{^ error: unknown or ambiguous option}}
+# CHECK3:{{^ (\^|˄)(~|˜)}}
+# CHECK3: {{^ (╰─)? error: unknown or ambiguous option}}
diff --git a/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp b/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp
index 851b478def32e..896ce1995fe1c 100644
--- a/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp
+++ b/lldb/unittests/Host/common/DiagnosticsRenderingTest.cpp
@@ -10,7 +10,7 @@ class ErrorDisplayTest : public ::testing::Test {};
std::string Render(std::vector<DiagnosticDetail> details) {
StreamString stream;
- RenderDiagnosticDetails(stream, 0, true, details);
+ RenderDiagnosticDetails(stream, 0, true, details, /*force_ascii=*/true);
return stream.GetData();
}
} // namespace
More information about the lldb-commits
mailing list