[Lldb-commits] [lldb] d438115 - [lldb/libc++] Simplify the libc++ string formatter

Pavel Labath via lldb-commits lldb-commits at lists.llvm.org
Tue Jul 12 00:59:20 PDT 2022


Author: Pavel Labath
Date: 2022-07-12T09:57:13+02:00
New Revision: d4381153ea63e9458ffb9dd20ea92fb35d4e3042

URL: https://github.com/llvm/llvm-project/commit/d4381153ea63e9458ffb9dd20ea92fb35d4e3042
DIFF: https://github.com/llvm/llvm-project/commit/d4381153ea63e9458ffb9dd20ea92fb35d4e3042.diff

LOG: [lldb/libc++] Simplify the libc++ string formatter

Precise string layout has changed a lot recently, but a long of these
changes did not have any effect on the usages of its fields -- e.g.
introduction/removal of an anonymous struct or union does not change the
way one can access the field in C++. Our name-based variable lookup
rules (deliberately) copy the C++ semantics, which means these changes
would have been invisible to the them, if only we were using name-based
lookup.

This patch replaces the opaque child index accesses with name-based
lookups, which allows us to greatly simplify the data formatter code.
The formatter continues to support all the string layouts that it
previously supported.

It is unclear why the formatter was not using this approach from the
beginning. I would speculate that the original version was working
around some (now fixed) issue with anonymous members or base classes,
and the subsequent revisions stuck with that approach out of inertia.

Differential Revision: https://reviews.llvm.org/D129490

Added: 
    

Modified: 
    lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
index eaaa16413b1e7..6c651c6f1557b 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
@@ -547,101 +547,77 @@ bool lldb_private::formatters::LibcxxContainerSummaryProvider(
 }
 
 /// The field layout in a libc++ string (cap, side, data or data, size, cap).
-enum LibcxxStringLayoutMode {
-  eLibcxxStringLayoutModeCSD = 0,
-  eLibcxxStringLayoutModeDSC = 1,
-  eLibcxxStringLayoutModeInvalid = 0xffff
-};
+namespace {
+enum class StringLayout { CSD, DSC };
+}
 
 /// Determine the size in bytes of \p valobj (a libc++ std::string object) and
 /// extract its data payload. Return the size + payload pair.
 // TODO: Support big-endian architectures.
 static llvm::Optional<std::pair<uint64_t, ValueObjectSP>>
 ExtractLibcxxStringInfo(ValueObject &valobj) {
-  ValueObjectSP dataval_sp(valobj.GetChildAtIndexPath({0, 0, 0, 0}));
-  if (!dataval_sp)
+  ValueObjectSP valobj_r_sp =
+      valobj.GetChildMemberWithName(ConstString("__r_"), /*can_create=*/true);
+  if (!valobj_r_sp)
     return {};
-  if (!dataval_sp->GetError().Success())
+
+  // __r_ is a compressed_pair of the actual data and the allocator. The data we
+  // want is in the first base class.
+  ValueObjectSP valobj_r_base_sp =
+      valobj_r_sp->GetChildAtIndex(0, /*can_create=*/true);
+  if (!valobj_r_base_sp)
     return {};
 
-  ValueObjectSP layout_decider(
-      dataval_sp->GetChildAtIndexPath(llvm::ArrayRef<size_t>({0, 0})));
+  ValueObjectSP valobj_rep_sp = valobj_r_base_sp->GetChildMemberWithName(
+      ConstString("__value_"), /*can_create=*/true);
+  if (!valobj_rep_sp)
+    return {};
 
-  // this child should exist
-  if (!layout_decider)
+  ValueObjectSP l = valobj_rep_sp->GetChildMemberWithName(ConstString("__l"),
+                                                          /*can_create=*/true);
+  if (!l)
     return {};
 
-  ConstString g_data_name("__data_");
-  ConstString g_size_name("__size_");
+  StringLayout layout = l->GetIndexOfChildWithName(ConstString("__data_")) == 0
+                            ? StringLayout::DSC
+                            : StringLayout::CSD;
+
   bool short_mode = false; // this means the string is in short-mode and the
                            // data is stored inline
   bool using_bitmasks = true; // Whether the class uses bitmasks for the mode
                               // flag (pre-D123580).
   uint64_t size;
-  LibcxxStringLayoutMode layout = (layout_decider->GetName() == g_data_name)
-                                      ? eLibcxxStringLayoutModeDSC
-                                      : eLibcxxStringLayoutModeCSD;
   uint64_t size_mode_value = 0;
 
-  ValueObjectSP short_sp(dataval_sp->GetChildAtIndex(1, true));
+  ValueObjectSP short_sp = valobj_rep_sp->GetChildMemberWithName(
+      ConstString("__s"), /*can_create=*/true);
   if (!short_sp)
     return {};
 
-  ValueObjectSP short_fields_sp;
   ValueObjectSP is_long =
       short_sp->GetChildMemberWithName(ConstString("__is_long_"), true);
-  if (is_long) {
-    short_fields_sp = short_sp;
-  } else {
-    // After D128285, we need to access the `__is_long_` and `__size_` fields
-    // from a packed anonymous struct
-    short_fields_sp = short_sp->GetChildAtIndex(0, true);
-    is_long = short_sp->GetChildMemberWithName(ConstString("__is_long_"), true);
-  }
+  ValueObjectSP size_sp =
+      short_sp->GetChildAtNamePath({ConstString("__size_")});
+  if (!size_sp)
+    return {};
 
   if (is_long) {
     using_bitmasks = false;
     short_mode = !is_long->GetValueAsUnsigned(/*fail_value=*/0);
-    if (ValueObjectSP size_member =
-            dataval_sp->GetChildAtNamePath({ConstString("__s"), ConstString("__size_")}))
-      size = size_member->GetValueAsUnsigned(/*fail_value=*/0);
-    else
-      return {};
-  } else if (layout == eLibcxxStringLayoutModeDSC) {
-    llvm::SmallVector<size_t, 3> size_mode_locations[] = {
-        {1, 2}, // Post-c3d0205ee771 layout. This was in use for only a brief
-                // period, so we can delete it if it becomes a burden.
-        {1, 1, 0},
-        {1, 1, 1},
-    };
-    ValueObjectSP size_mode;
-    for (llvm::ArrayRef<size_t> loc : size_mode_locations) {
-      size_mode = dataval_sp->GetChildAtIndexPath(loc);
-      if (size_mode && size_mode->GetName() == g_size_name)
-        break;
-    }
-
-    if (!size_mode)
-      return {};
-
-    size_mode_value = (size_mode->GetValueAsUnsigned(0));
-    short_mode = ((size_mode_value & 0x80) == 0);
+    size = size_sp->GetValueAsUnsigned(/*fail_value=*/0);
   } else {
-    ValueObjectSP size_mode(dataval_sp->GetChildAtIndexPath({1, 0, 0}));
-    if (!size_mode)
-      return {};
-
-    size_mode_value = (size_mode->GetValueAsUnsigned(0));
-    short_mode = ((size_mode_value & 1) == 0);
+    // The string mode is encoded in the size field.
+    size_mode_value = size_sp->GetValueAsUnsigned(0);
+    uint8_t mode_mask = layout == StringLayout::DSC ? 0x80 : 1;
+    short_mode = (size_mode_value & mode_mask) == 0;
   }
 
   if (short_mode) {
     ValueObjectSP location_sp =
-        short_sp->GetChildMemberWithName(g_data_name, true);
+        short_sp->GetChildMemberWithName(ConstString("__data_"), true);
     if (using_bitmasks)
-      size = (layout == eLibcxxStringLayoutModeDSC)
-                 ? size_mode_value
-                 : ((size_mode_value >> 1) % 256);
+      size = (layout == StringLayout::DSC) ? size_mode_value
+                                           : ((size_mode_value >> 1) % 256);
 
     // When the small-string optimization takes place, the data must fit in the
     // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's
@@ -656,10 +632,6 @@ ExtractLibcxxStringInfo(ValueObject &valobj) {
     return std::make_pair(size, location_sp);
   }
 
-  ValueObjectSP l(dataval_sp->GetChildAtIndex(0, true));
-  if (!l)
-    return {};
-
   // we can use the layout_decider object as the data pointer
   ValueObjectSP location_sp =
       l->GetChildMemberWithName(ConstString("__data_"), /*can_create=*/true);
@@ -667,19 +639,11 @@ ExtractLibcxxStringInfo(ValueObject &valobj) {
       l->GetChildMemberWithName(ConstString("__size_"), /*can_create=*/true);
   ValueObjectSP capacity_vo =
       l->GetChildMemberWithName(ConstString("__cap_"), /*can_create=*/true);
-  if (!capacity_vo) {
-    // After D128285, we need to access the `__cap_` field from a packed
-    // anonymous struct
-    if (ValueObjectSP packed_fields_sp = l->GetChildAtIndex(0, true)) {
-      ValueObjectSP capacity_vo = packed_fields_sp->GetChildMemberWithName(
-          ConstString("__cap_"), /*can_create=*/true);
-    }
-  }
   if (!size_vo || !location_sp || !capacity_vo)
     return {};
   size = size_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
   uint64_t capacity = capacity_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
-  if (!using_bitmasks && layout == eLibcxxStringLayoutModeCSD)
+  if (!using_bitmasks && layout == StringLayout::CSD)
     capacity *= 2;
   if (size == LLDB_INVALID_OFFSET || capacity == LLDB_INVALID_OFFSET ||
       capacity < size)


        


More information about the lldb-commits mailing list