[Lldb-commits] [lldb] [lldb] Implement ANSI & Unicode aware string stripping & padding (PR #130878)
Jonas Devlieghere via lldb-commits
lldb-commits at lists.llvm.org
Wed Mar 12 08:44:24 PDT 2025
================
@@ -172,28 +175,99 @@ inline std::string FormatAnsiTerminalCodes(llvm::StringRef format,
return fmt;
}
+inline std::tuple<llvm::StringRef, llvm::StringRef, llvm::StringRef>
+FindNextAnsiSequence(llvm::StringRef str) {
+ llvm::StringRef left;
+ llvm::StringRef right = str;
+
+ while (!right.empty()) {
+ const size_t start = right.find(ANSI_ESC_START);
+
+ // ANSI_ESC_START not found.
+ if (start == llvm::StringRef::npos)
+ return {str, {}, {}};
+
+ // Split the string around the current ANSI_ESC_START.
+ left = str.take_front(left.size() + start);
+ llvm::StringRef escape = right.substr(start);
+ right = right.substr(start + ANSI_ESC_START_LEN + 1);
+
+ const size_t end = right.find_first_not_of("0123456789;");
+
+ // ANSI_ESC_END found.
+ if (end < right.size() && (right[end] == 'm' || right[end] == 'G'))
+ return {left, escape.take_front(ANSI_ESC_START_LEN + 1 + end + 1),
+ right.substr(end + 1)};
+
+ // Maintain the invariant that str == left + right at the start of the loop.
+ left = str.take_front(left.size() + ANSI_ESC_START_LEN + 1);
+ }
+
+ return {str, {}, {}};
+}
+
inline std::string StripAnsiTerminalCodes(llvm::StringRef str) {
std::string stripped;
while (!str.empty()) {
- llvm::StringRef left, right;
-
- std::tie(left, right) = str.split(ANSI_ESC_START);
+ auto [left, escape, right] = FindNextAnsiSequence(str);
stripped += left;
+ str = right;
+ }
+ return stripped;
+}
- // ANSI_ESC_START not found.
- if (left == str && right.empty())
- break;
+inline std::string TrimAndPad(llvm::StringRef str, size_t visible_length,
+ char padding = ' ') {
+ std::string result;
+ result.reserve(visible_length);
+ size_t result_visibile_length = 0;
+
+ // Trim the string to the given visible length.
+ while (!str.empty()) {
+ auto [left, escape, right] = FindNextAnsiSequence(str);
+ str = right;
- size_t end = right.find_first_not_of("0123456789;");
- if (end < right.size() && (right[end] == 'm' || right[end] == 'G')) {
- str = right.substr(end + 1);
- } else {
- // ANSI_ESC_END not found.
- stripped += ANSI_ESC_START;
- str = right;
+ // Compute the length of the string without escape codes. If it fits, append
+ // it together with the invisible escape code.
+ size_t column_width = llvm::sys::locale::columnWidth(left);
+ if (result_visibile_length + column_width <= visible_length) {
+ result.append(left).append(escape);
+ result_visibile_length += column_width;
+ continue;
+ }
+
+ // The string doesn't fit but doesn't fit but doesn't contain unicode.
+ // Append the substring that fits.
+ if (column_width == left.size()) {
+ llvm::StringRef trimmed =
+ left.take_front(visible_length - result_visibile_length);
+ result.append(trimmed);
+ result_visibile_length += visible_length - result_visibile_length;
+ continue;
+ }
----------------
JDevlieghere wrote:
Good point, I didn't consider that.
https://github.com/llvm/llvm-project/pull/130878
More information about the lldb-commits
mailing list